You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@subversion.apache.org by da...@apache.org on 2013/03/28 12:36:50 UTC

svn commit: r1462054 - in /subversion/branches/verify-at-commit/subversion: include/svn_fs.h libsvn_fs/fs-loader.c libsvn_fs/fs-loader.h

Author: danielsh
Date: Thu Mar 28 11:36:50 2013
New Revision: 1462054

URL: http://svn.apache.org/r1462054
Log:
On the verify-at-commit branch, add a backend-independent implementation:
a db/fs.conf file.

* subversion/include/svn_fs.h
  (SVN_FS_CONFIG_SECTION_MISC, (SVN_FS_CONFIG_OPTION_VERIFY_AT_COMMIT):
    New macros.

* subversion/libsvn_fs/fs-loader.h
  (svn_fs_t.verify_at_commit): New struct member.

* subversion/libsvn_fs/fs-loader.c
  (svn_config.h): Include.
  (CONFIG_FILENAME): New macro.
  (write_config): New helper, based on the eponymous helper in FSFS (rather
    than on write_fs_type()).
  (svn_fs_create): Call write_config().
  (fs_new): Initialise new struct member.
  (svn_fs_open): Read config.
  (svn_fs_commit_txn): Use the config.

Modified:
    subversion/branches/verify-at-commit/subversion/include/svn_fs.h
    subversion/branches/verify-at-commit/subversion/libsvn_fs/fs-loader.c
    subversion/branches/verify-at-commit/subversion/libsvn_fs/fs-loader.h

Modified: subversion/branches/verify-at-commit/subversion/include/svn_fs.h
URL: http://svn.apache.org/viewvc/subversion/branches/verify-at-commit/subversion/include/svn_fs.h?rev=1462054&r1=1462053&r2=1462054&view=diff
==============================================================================
--- subversion/branches/verify-at-commit/subversion/include/svn_fs.h (original)
+++ subversion/branches/verify-at-commit/subversion/include/svn_fs.h Thu Mar 28 11:36:50 2013
@@ -130,6 +130,13 @@ typedef struct svn_fs_t svn_fs_t;
  * @since New in 1.8.
  */
 #define SVN_FS_CONFIG_PRE_1_8_COMPATIBLE        "pre-1.8-compatible"
+
+
+/** @since New in 1.8. */
+#define SVN_FS_CONFIG_SECTION_MISC "miscellany"
+/** @since New in 1.8. */
+#define SVN_FS_CONFIG_OPTION_VERIFY_AT_COMMIT "verify-at-commit"
+
 /** @} */
 
 

Modified: subversion/branches/verify-at-commit/subversion/libsvn_fs/fs-loader.c
URL: http://svn.apache.org/viewvc/subversion/branches/verify-at-commit/subversion/libsvn_fs/fs-loader.c?rev=1462054&r1=1462053&r2=1462054&view=diff
==============================================================================
--- subversion/branches/verify-at-commit/subversion/libsvn_fs/fs-loader.c (original)
+++ subversion/branches/verify-at-commit/subversion/libsvn_fs/fs-loader.c Thu Mar 28 11:36:50 2013
@@ -32,6 +32,7 @@
 
 #include "svn_hash.h"
 #include "svn_ctype.h"
+#include "svn_config.h"
 #include "svn_types.h"
 #include "svn_dso.h"
 #include "svn_version.h"
@@ -57,6 +58,7 @@
 #endif
 
 #define FS_TYPE_FILENAME "fs-type"
+#define CONFIG_FILENAME "fs.conf"
 
 /* A pool common to all FS objects.  See the documentation on the
    open/create functions in fs-loader.h and for svn_fs_initialize(). */
@@ -338,6 +340,26 @@ write_fs_type(const char *path, const ch
   return svn_error_trace(svn_io_file_close(file, pool));
 }
 
+static svn_error_t *
+write_config(const char *path, apr_pool_t *pool)
+{
+  static const char * const fs_conf_contents =
+#define NL APR_EOL_STR
+"### This file controls backend-independent filesystem configuration."       NL
+""                                                                           NL
+"[" SVN_FS_CONFIG_SECTION_MISC "]"                                           NL
+"### When set, Subversion will run the equivalent of 'svnadmin verify -r'"   NL
+"### on each transaction (commit-in-progress) just before it becomes"        NL
+"### a revision.  This may slow down commits, since the cost of"             NL
+"### verification is proportional to the size of the commit (e.g., number"   NL
+"### of files 'svn log -q -v -r N' shows)."                                  NL
+"# " SVN_FS_CONFIG_OPTION_VERIFY_AT_COMMIT " = false"                        NL
+;
+#undef NL
+  SVN_ERR(svn_io_file_create(svn_dirent_join(path, CONFIG_FILENAME, pool),
+                             fs_conf_contents, pool));
+  return SVN_NO_ERROR;
+}
 
 /* --- Functions for operating on filesystems by pathname --- */
 
@@ -414,6 +436,11 @@ fs_new(apr_hash_t *fs_config, apr_pool_t
   fs->vtable = NULL;
   fs->fsap_data = NULL;
   fs->uuid = NULL;
+#ifdef SVN_DEBUG
+  fs->verify_at_commit = TRUE;
+#else
+  fs->verify_at_commit = FALSE;
+#endif
   return fs;
 }
 
@@ -445,6 +472,7 @@ svn_fs_create(svn_fs_t **fs_p, const cha
   /* Create the FS directory and write out the fsap-name file. */
   SVN_ERR(svn_io_dir_make_sgid(path, APR_OS_DEFAULT, pool));
   SVN_ERR(write_fs_type(path, fs_type, pool));
+  SVN_ERR(write_config(path, pool));
 
   /* Perform the actual creation. */
   *fs_p = fs_new(fs_config, pool);
@@ -459,9 +487,20 @@ svn_fs_open(svn_fs_t **fs_p, const char 
             apr_pool_t *pool)
 {
   fs_library_vtable_t *vtable;
+  svn_config_t *config;
 
   SVN_ERR(fs_library_vtable(&vtable, path, pool));
   *fs_p = fs_new(fs_config, pool);
+
+  SVN_ERR(svn_config_read2(&config,
+                           svn_dirent_join(path, CONFIG_FILENAME, pool),
+                           FALSE /* must_exist */, TRUE /* case-sensitive */,
+                           pool));
+  SVN_ERR(svn_config_get_bool(config, &(*fs_p)->verify_at_commit,
+                              SVN_FS_CONFIG_SECTION_MISC,
+                              SVN_FS_CONFIG_OPTION_VERIFY_AT_COMMIT,
+                              (*fs_p)->verify_at_commit));
+
   SVN_MUTEX__WITH_LOCK(common_pool_lock,
                        vtable->open_fs(*fs_p, path, pool, common_pool));
   return SVN_NO_ERROR;
@@ -750,23 +789,19 @@ svn_fs_commit_txn(const char **conflict_
                   svn_fs_txn_t *txn, apr_pool_t *pool)
 {
   svn_error_t *err;
-#if defined(PACK_AFTER_EVERY_COMMIT) || defined(SVN_DEBUG)
-  svn_fs_root_t *txn_root;
-#endif
+  svn_fs_t *fs = txn->fs;
 
   *new_rev = SVN_INVALID_REVNUM;
   if (conflict_p)
     *conflict_p = NULL;
 
-#if defined(PACK_AFTER_EVERY_COMMIT) || defined(SVN_DEBUG)
-  SVN_ERR(svn_fs_txn_root(&txn_root, txn, pool));
-#endif
-
-#ifdef SVN_DEBUG
-  /* ### TODO: add db/fs.conf with a knob to enable this in release builds */
-  /* ### TODO: should this run just before incrementing 'current'? */
-  SVN_ERR(svn_fs_verify_root(txn_root, pool));
-#endif
+  if (fs->verify_at_commit)
+    {
+      /* ### TODO: should this run just before incrementing 'current'? */
+      svn_fs_root_t *txn_root;
+      SVN_ERR(svn_fs_txn_root(&txn_root, txn, pool));
+      SVN_ERR(svn_fs_verify_root(txn_root, pool));
+    }
 
   err = txn->vtable->commit(conflict_p, new_rev, txn, pool);
 
@@ -786,7 +821,6 @@ svn_fs_commit_txn(const char **conflict_
 
 #ifdef PACK_AFTER_EVERY_COMMIT
   {
-    svn_fs_t *fs = svn_fs_root_fs(txn_root);
     const char *fs_path = svn_fs_path(fs, pool);
     err = svn_fs_pack(fs_path, NULL, NULL, NULL, NULL, pool);
     if (err && err->apr_err == SVN_ERR_UNSUPPORTED_FEATURE)

Modified: subversion/branches/verify-at-commit/subversion/libsvn_fs/fs-loader.h
URL: http://svn.apache.org/viewvc/subversion/branches/verify-at-commit/subversion/libsvn_fs/fs-loader.h?rev=1462054&r1=1462053&r2=1462054&view=diff
==============================================================================
--- subversion/branches/verify-at-commit/subversion/libsvn_fs/fs-loader.h (original)
+++ subversion/branches/verify-at-commit/subversion/libsvn_fs/fs-loader.h Thu Mar 28 11:36:50 2013
@@ -407,6 +407,9 @@ struct svn_fs_t
 
   /* UUID, stored by open(), create(), and set_uuid(). */
   const char *uuid;
+
+  /* Parsed contents of fs.conf */
+  svn_boolean_t verify_at_commit;
 };
 
 



Re: svn commit: r1462054 - in /subversion/branches/verify-at-commit/subversion: include/svn_fs.h libsvn_fs/fs-loader.c libsvn_fs/fs-loader.h

Posted by Daniel Shahaf <da...@elego.de>.
Philip Martin wrote on Thu, Mar 28, 2013 at 12:09:30 +0000:
> Stefan Sperling <st...@elego.de> writes:
> 
> > If we decide that verifying transactions before commit is a good
> > thing, I'd much rather see a new 'svnlook verify' command that can
> > be run from the pre-commit hook, instead of this implementation.
> 
> A separate process could be a performance hit since it would not share
> the FSFS cache used by the commit process.  On the other hand a separate
> process might be an advantage if there is a caching bug.

That's good.  There is already a comment in svn_fs_fs__verify_root()
asking how to disable/bypass caching; "use a child process" is a good
start...

Do we have any per-machine caches?  Or only per-process and memcached?

Re: svn commit: r1462054 - in /subversion/branches/verify-at-commit/subversion: include/svn_fs.h libsvn_fs/fs-loader.c libsvn_fs/fs-loader.h

Posted by Philip Martin <ph...@wandisco.com>.
Stefan Sperling <st...@elego.de> writes:

> If we decide that verifying transactions before commit is a good
> thing, I'd much rather see a new 'svnlook verify' command that can
> be run from the pre-commit hook, instead of this implementation.

A separate process could be a performance hit since it would not share
the FSFS cache used by the commit process.  On the other hand a separate
process might be an advantage if there is a caching bug.

-- 
Certified & Supported Apache Subversion Downloads:
http://www.wandisco.com/subversion/download

Re: svn commit: r1462054 - in /subversion/branches/verify-at-commit/subversion: include/svn_fs.h libsvn_fs/fs-loader.c libsvn_fs/fs-loader.h

Posted by Daniel Shahaf <d....@daniel.shahaf.name>.
Stefan Sperling wrote on Thu, Mar 28, 2013 at 12:49:46 +0100:
> On Thu, Mar 28, 2013 at 11:36:50AM -0000, danielsh@apache.org wrote:
> > Author: danielsh
> > Date: Thu Mar 28 11:36:50 2013
> > New Revision: 1462054
> > 
> > URL: http://svn.apache.org/r1462054
> > Log:
> > On the verify-at-commit branch, add a backend-independent implementation:
> > a db/fs.conf file.
> 
> As I said on IRC, I think it makes more sense to implement this
> on a per-filesystem basis. The generic fs wrapper can only verify
> the transaction, but there is no guarantee that this really represents
> the data written during commit. E.g. in FSFS we could check the rev
> and revprop files before updating 'current', which leaves less room
> for error between verification and commit. For BDB -- dunno.
> 

In BDB a transaction is promoted to a revision by amending the
'revisions' table.  If BDB supports nested transactions (like sqlite
does), it should be possible to: start a transaction, modify
'revisions', call svn_fs_base__verify_root() (which doesn't exist yet),
abort the transaction.

Or alternatively we could introduce an error code for "This backend
doesn't support svn_fs_verify_root() on transactions" and trap it in
svn_fs_commit_txn().

> If we decide that verifying transactions before commit is a good
> thing, I'd much rather see a new 'svnlook verify' command that can
> be run from the pre-commit hook, instead of this implementation.
> 

This makes sense.  How would such a subcommand relate to 'svnadmin verify'?

> Does that make sense?

Re: svn commit: r1462054 - in /subversion/branches/verify-at-commit/subversion: include/svn_fs.h libsvn_fs/fs-loader.c libsvn_fs/fs-loader.h

Posted by Stefan Sperling <st...@elego.de>.
On Thu, Mar 28, 2013 at 11:36:50AM -0000, danielsh@apache.org wrote:
> Author: danielsh
> Date: Thu Mar 28 11:36:50 2013
> New Revision: 1462054
> 
> URL: http://svn.apache.org/r1462054
> Log:
> On the verify-at-commit branch, add a backend-independent implementation:
> a db/fs.conf file.

As I said on IRC, I think it makes more sense to implement this
on a per-filesystem basis. The generic fs wrapper can only verify
the transaction, but there is no guarantee that this really represents
the data written during commit. E.g. in FSFS we could check the rev
and revprop files before updating 'current', which leaves less room
for error between verification and commit. For BDB -- dunno.

If we decide that verifying transactions before commit is a good
thing, I'd much rather see a new 'svnlook verify' command that can
be run from the pre-commit hook, instead of this implementation.

Does that make sense?

Re: svn commit: r1462054 - in /subversion/branches/verify-at-commit/subversion: include/svn_fs.h libsvn_fs/fs-loader.c libsvn_fs/fs-loader.h

Posted by Stefan Sperling <st...@elego.de>.
On Thu, Mar 28, 2013 at 03:02:29PM +0200, 'Daniel Shahaf' wrote:
> Bert Huijben wrote on Thu, Mar 28, 2013 at 13:39:36 +0100:
> > In many companies everything must be scanned and in most companies that use
> > virusscanners on Windows the average user is not even allowed to configure
> > skipping specific directories.
> > 
> 
> Who cares about the average user?  This is server code.

Bert is right. Many companies really do this on servers.
I've seen it. I think it's crazy and stupid, but people are doing this.

Anyway, we don't need to argue about an fs.conf file if we decide
to go for the proposed alternative approaches (FSFS-specific
implementation and/or svnlook subcommand).

Re: svn commit: r1462054 - in /subversion/branches/verify-at-commit/subversion: include/svn_fs.h libsvn_fs/fs-loader.c libsvn_fs/fs-loader.h

Posted by 'Daniel Shahaf' <da...@elego.de>.
Bert Huijben wrote on Thu, Mar 28, 2013 at 13:39:36 +0100:
> 
> 
> > -----Original Message-----
> > From: Daniel Shahaf [mailto:danielsh@elego.de]
> > Sent: donderdag 28 maart 2013 13:22
> > To: Bert Huijben
> > Cc: dev@subversion.apache.org
> > Subject: Re: svn commit: r1462054 - in /subversion/branches/verify-at-
> > commit/subversion: include/svn_fs.h libsvn_fs/fs-loader.c libsvn_fs/fs-
> > loader.h
> > 
> > Bert Huijben wrote on Thu, Mar 28, 2013 at 12:52:51 +0100:
> > >
> > >
> > > > -----Original Message-----
> > > > From: danielsh@apache.org [mailto:danielsh@apache.org]
> > > > Sent: donderdag 28 maart 2013 12:37
> > > > To: commits@subversion.apache.org
> > > > Subject: svn commit: r1462054 - in /subversion/branches/verify-at-
> > > > commit/subversion: include/svn_fs.h libsvn_fs/fs-loader.c
> libsvn_fs/fs-
> > > > loader.h
> > > >
> > > > Author: danielsh
> > > > Date: Thu Mar 28 11:36:50 2013
> > > > New Revision: 1462054
> > > >
> > > > URL: http://svn.apache.org/r1462054
> > > > Log:
> > > > On the verify-at-commit branch, add a backend-independent
> > > > implementation:
> > > > a db/fs.conf file.
> > > >
> > > > * subversion/include/svn_fs.h
> > > >   (SVN_FS_CONFIG_SECTION_MISC,
> > > > (SVN_FS_CONFIG_OPTION_VERIFY_AT_COMMIT):
> > > >     New macros.
> > > >
> > > > * subversion/libsvn_fs/fs-loader.h
> > > >   (svn_fs_t.verify_at_commit): New struct member.
> > > >
> > > > * subversion/libsvn_fs/fs-loader.c
> > > >   (svn_config.h): Include.
> > > >   (CONFIG_FILENAME): New macro.
> > > >   (write_config): New helper, based on the eponymous helper in FSFS
> > (rather
> > > >     than on write_fs_type()).
> > > >   (svn_fs_create): Call write_config().
> > > >   (fs_new): Initialise new struct member.
> > > >   (svn_fs_open): Read config.
> > > >   (svn_fs_commit_txn): Use the config.
> > >
> > > Summarizing what was said on #svn-dev
> > >
> > > I think we should make this a fsfs only feature that uses the existing
> > fsfs.conf file.
> > >
> > > The option doesn't appear to make much sense for the now mostly
> > > deprecated for new development BDB filesystem and the new work of
> > > stefan2 might give new ideas. Besides if a filesystem needs a
> > > verification step to be stable that should be part of the design.
> > > Subversion shouldn't start assuming that filesystems are likely to
> > > break down. (What would that tell us about the stability of previous
> > > versions of Subversion?)
> > >
> > 
> > Bugs happen, this is one way to catch them.  Compare r1086222, which is
> > basically a special-case of this work.
> > 
> > > The reading of one file for each access to the repository is a more
> > > than measurable slowdown when profiling operations. (Reading fsfs.conf
> > > over and over again is one of the most expensive things apache worker
> > > processes do when I profiled them. I think stefan2 optimized some of
> > > this away)
> > >
> > 
> > OK, we can move this particular config knob to be provided at runtime by
> > whoever creates the svn_fs_t.
> > 
> > > Opening a file is expensive on Windows and probably on any other
> > > system that always uses locking for file accesss and/or on-demand
> > > virus scanners and also on network drives.
> > >
> > 
> > Don't virus-scan repository config files.  (Why would you want to do
> > that?  Do you fear svn would try to execute the config option's value?)
> 
> These scanners scan everything; any file access.
> 
> Which might just be for looking that the file is uninteresting, but to get
> to that point they already opened the file and read the header. (Equivalent
> to what libmagic does)
> 
> In many companies everything must be scanned and in most companies that use
> virusscanners on Windows the average user is not even allowed to configure
> skipping specific directories.
> 

Who cares about the average user?  This is server code.

> And users are certainly not going to configure a specific new in Subversion
> 1.8 file to be ignored. That doesn't scale. (And .conf is not a standard
> windows extension that would be pre-configured)
> 
> The skip handling may introduce bugs, so in many virusscanners the skip
> handling is done in the scanner process and not in the kernel module that
> hands the file to the scanner. So you already have the penalty of at least
> two interprocess communication cycles.
> 

Right, because for some unexplainable reason you chose to run a virus
scanner on a server box.  That's about as questionable to me as running
'verify' during the commit process seems to be to you. :-)

> > >
> <snip>
> 
> 	Bert
> 

RE: svn commit: r1462054 - in /subversion/branches/verify-at-commit/subversion: include/svn_fs.h libsvn_fs/fs-loader.c libsvn_fs/fs-loader.h

Posted by Bert Huijben <be...@qqmail.nl>.

> -----Original Message-----
> From: Daniel Shahaf [mailto:danielsh@elego.de]
> Sent: donderdag 28 maart 2013 13:22
> To: Bert Huijben
> Cc: dev@subversion.apache.org
> Subject: Re: svn commit: r1462054 - in /subversion/branches/verify-at-
> commit/subversion: include/svn_fs.h libsvn_fs/fs-loader.c libsvn_fs/fs-
> loader.h
> 
> Bert Huijben wrote on Thu, Mar 28, 2013 at 12:52:51 +0100:
> >
> >
> > > -----Original Message-----
> > > From: danielsh@apache.org [mailto:danielsh@apache.org]
> > > Sent: donderdag 28 maart 2013 12:37
> > > To: commits@subversion.apache.org
> > > Subject: svn commit: r1462054 - in /subversion/branches/verify-at-
> > > commit/subversion: include/svn_fs.h libsvn_fs/fs-loader.c
libsvn_fs/fs-
> > > loader.h
> > >
> > > Author: danielsh
> > > Date: Thu Mar 28 11:36:50 2013
> > > New Revision: 1462054
> > >
> > > URL: http://svn.apache.org/r1462054
> > > Log:
> > > On the verify-at-commit branch, add a backend-independent
> > > implementation:
> > > a db/fs.conf file.
> > >
> > > * subversion/include/svn_fs.h
> > >   (SVN_FS_CONFIG_SECTION_MISC,
> > > (SVN_FS_CONFIG_OPTION_VERIFY_AT_COMMIT):
> > >     New macros.
> > >
> > > * subversion/libsvn_fs/fs-loader.h
> > >   (svn_fs_t.verify_at_commit): New struct member.
> > >
> > > * subversion/libsvn_fs/fs-loader.c
> > >   (svn_config.h): Include.
> > >   (CONFIG_FILENAME): New macro.
> > >   (write_config): New helper, based on the eponymous helper in FSFS
> (rather
> > >     than on write_fs_type()).
> > >   (svn_fs_create): Call write_config().
> > >   (fs_new): Initialise new struct member.
> > >   (svn_fs_open): Read config.
> > >   (svn_fs_commit_txn): Use the config.
> >
> > Summarizing what was said on #svn-dev
> >
> > I think we should make this a fsfs only feature that uses the existing
> fsfs.conf file.
> >
> > The option doesn't appear to make much sense for the now mostly
> > deprecated for new development BDB filesystem and the new work of
> > stefan2 might give new ideas. Besides if a filesystem needs a
> > verification step to be stable that should be part of the design.
> > Subversion shouldn't start assuming that filesystems are likely to
> > break down. (What would that tell us about the stability of previous
> > versions of Subversion?)
> >
> 
> Bugs happen, this is one way to catch them.  Compare r1086222, which is
> basically a special-case of this work.
> 
> > The reading of one file for each access to the repository is a more
> > than measurable slowdown when profiling operations. (Reading fsfs.conf
> > over and over again is one of the most expensive things apache worker
> > processes do when I profiled them. I think stefan2 optimized some of
> > this away)
> >
> 
> OK, we can move this particular config knob to be provided at runtime by
> whoever creates the svn_fs_t.
> 
> > Opening a file is expensive on Windows and probably on any other
> > system that always uses locking for file accesss and/or on-demand
> > virus scanners and also on network drives.
> >
> 
> Don't virus-scan repository config files.  (Why would you want to do
> that?  Do you fear svn would try to execute the config option's value?)

These scanners scan everything; any file access.

Which might just be for looking that the file is uninteresting, but to get
to that point they already opened the file and read the header. (Equivalent
to what libmagic does)

In many companies everything must be scanned and in most companies that use
virusscanners on Windows the average user is not even allowed to configure
skipping specific directories.

And users are certainly not going to configure a specific new in Subversion
1.8 file to be ignored. That doesn't scale. (And .conf is not a standard
windows extension that would be pre-configured)

The skip handling may introduce bugs, so in many virusscanners the skip
handling is done in the scanner process and not in the kernel module that
hands the file to the scanner. So you already have the penalty of at least
two interprocess communication cycles.

> >
<snip>

	Bert


Re: svn commit: r1462054 - in /subversion/branches/verify-at-commit/subversion: include/svn_fs.h libsvn_fs/fs-loader.c libsvn_fs/fs-loader.h

Posted by Ben Reser <be...@reser.org>.
On Thu, Mar 28, 2013 at 7:37 AM, Daniel Shahaf <da...@elego.de> wrote:
> I also prefer the backend-specific approach since the other one can be
> implemented with hook scripts just the same.
>
> The "penalty" would have been optional and off-by-default, though.

Personally I really like the svnlook idea.  Though I'm not sure that
we can be sure that the transaction is in the right state to verify it
just before committing in the pre-commit hook for all the
implementations.

In the meantime I think a fsfs only option would be fine.

Re: svn commit: r1462054 - in /subversion/branches/verify-at-commit/subversion: include/svn_fs.h libsvn_fs/fs-loader.c libsvn_fs/fs-loader.h

Posted by Daniel Shahaf <da...@elego.de>.
C. Michael Pilato wrote on Thu, Mar 28, 2013 at 10:12:38 -0400:
> On 03/28/2013 08:22 AM, Daniel Shahaf wrote:
> > See above, and yes if we end up opting for the backend-specific
> > implementation then the new fs.conf file will go away.
> 
> If this is to be done at all, it should be backend-specific.  No sense in
> preemptively penalizing every backends' (current and future) performance
> "just in case".  After all, one of our existing backends has a long history
> of not having these sorts of data-munging bugs ... and is already
> sufficiently slow thankyouverymuch.  :-)

I also prefer the backend-specific approach since the other one can be
implemented with hook scripts just the same.

The "penalty" would have been optional and off-by-default, though.

Re: svn commit: r1462054 - in /subversion/branches/verify-at-commit/subversion: include/svn_fs.h libsvn_fs/fs-loader.c libsvn_fs/fs-loader.h

Posted by "C. Michael Pilato" <cm...@collab.net>.
On 03/28/2013 08:22 AM, Daniel Shahaf wrote:
> See above, and yes if we end up opting for the backend-specific
> implementation then the new fs.conf file will go away.

If this is to be done at all, it should be backend-specific.  No sense in
preemptively penalizing every backends' (current and future) performance
"just in case".  After all, one of our existing backends has a long history
of not having these sorts of data-munging bugs ... and is already
sufficiently slow thankyouverymuch.  :-)

-- 
C. Michael Pilato <cm...@collab.net>
CollabNet   <>   www.collab.net   <>   Enterprise Cloud Development


Re: svn commit: r1462054 - in /subversion/branches/verify-at-commit/subversion: include/svn_fs.h libsvn_fs/fs-loader.c libsvn_fs/fs-loader.h

Posted by Daniel Shahaf <da...@elego.de>.
Bert Huijben wrote on Thu, Mar 28, 2013 at 12:52:51 +0100:
> 
> 
> > -----Original Message-----
> > From: danielsh@apache.org [mailto:danielsh@apache.org]
> > Sent: donderdag 28 maart 2013 12:37
> > To: commits@subversion.apache.org
> > Subject: svn commit: r1462054 - in /subversion/branches/verify-at-
> > commit/subversion: include/svn_fs.h libsvn_fs/fs-loader.c libsvn_fs/fs-
> > loader.h
> > 
> > Author: danielsh
> > Date: Thu Mar 28 11:36:50 2013
> > New Revision: 1462054
> > 
> > URL: http://svn.apache.org/r1462054
> > Log:
> > On the verify-at-commit branch, add a backend-independent
> > implementation:
> > a db/fs.conf file.
> > 
> > * subversion/include/svn_fs.h
> >   (SVN_FS_CONFIG_SECTION_MISC,
> > (SVN_FS_CONFIG_OPTION_VERIFY_AT_COMMIT):
> >     New macros.
> > 
> > * subversion/libsvn_fs/fs-loader.h
> >   (svn_fs_t.verify_at_commit): New struct member.
> > 
> > * subversion/libsvn_fs/fs-loader.c
> >   (svn_config.h): Include.
> >   (CONFIG_FILENAME): New macro.
> >   (write_config): New helper, based on the eponymous helper in FSFS (rather
> >     than on write_fs_type()).
> >   (svn_fs_create): Call write_config().
> >   (fs_new): Initialise new struct member.
> >   (svn_fs_open): Read config.
> >   (svn_fs_commit_txn): Use the config.
> 
> Summarizing what was said on #svn-dev
> 
> I think we should make this a fsfs only feature that uses the existing fsfs.conf file.
> 
> The option doesn't appear to make much sense for the now mostly
> deprecated for new development BDB filesystem and the new work of
> stefan2 might give new ideas. Besides if a filesystem needs a
> verification step to be stable that should be part of the design.
> Subversion shouldn't start assuming that filesystems are likely to
> break down. (What would that tell us about the stability of previous
> versions of Subversion?)
> 

Bugs happen, this is one way to catch them.  Compare r1086222, which is
basically a special-case of this work.

> The reading of one file for each access to the repository is a more
> than measurable slowdown when profiling operations. (Reading fsfs.conf
> over and over again is one of the most expensive things apache worker
> processes do when I profiled them. I think stefan2 optimized some of
> this away)
> 

OK, we can move this particular config knob to be provided at runtime by
whoever creates the svn_fs_t.

> Opening a file is expensive on Windows and probably on any other
> system that always uses locking for file accesss and/or on-demand
> virus scanners and also on network drives.
> 

Don't virus-scan repository config files.  (Why would you want to do
that?  Do you fear svn would try to execute the config option's value?)

> 
> I don't think introducing yet another config file makes much sense. If
> users want to turn on verifications at every commit they probably want
> it for all their repositories (in which case the config option belongs
> in the apache or svnserve config) or they are looking at specific fsfs
> issues, in which case the option can be in fsfs.conf which is read
> anyway.
> 

See above, and yes if we end up opting for the backend-specific
implementation then the new fs.conf file will go away.

> 
> I wouldn't want to introduce yet another layer of configuration for
> this verification helper. 
> 
> But then again I can see reasons that some users want the explicit
> verification. fsfs.conf and/or a post commit hook are good enough
> solutions without any performance/design implications.
> 

post-commit isn't good enough, since it's post-.

'svnlook verify -t' in pre-commit is as good as the libsvn_fs implementation.
That is nearly as good as the "just before bumping current" approach,
althuogh that one would catch bugs in the rev file that are absent from
the proto-rev file, too.

> 	Bert
> 

Re: Opening the repository hooks environment file

Posted by Ben Reser <be...@reser.org>.
On Thu, Mar 28, 2013 at 1:02 PM, Ivan Zhakov <iv...@visualsvn.com> wrote:
> Sorry, it seems I missed something important: what is the hooks
> environment file? But I agree that we need explicit mod_dav_svn option
> to enable this functionality if this new in svn 1.8.

http://svn.apache.org/r1380608

Re: Opening the repository hooks environment file

Posted by Ivan Zhakov <iv...@visualsvn.com>.
On Thu, Mar 28, 2013 at 10:31 PM, Philip Martin
<ph...@wandisco.com> wrote:
> "Bert Huijben" <be...@qqmail.nl> writes:
>
>> The reading of one file for each access to the repository is a more
>> than measurable slowdown when profiling operations. (Reading fsfs.conf
>> over and over again is one of the most expensive things apache worker
>> processes do when I profiled them. I think stefan2 optimized some of
>> this away)
>
> We have already picked up one new file on every access in 1.8: the hooks
> environment file.  This appears to be opened and parsed for every time
> mod_dav_svn opens the repository, both read and write operations.
>
> Perhaps we should require an explict config setting to enable the hooks
> file so that we can avoid opening it when it is empty?  Or perhaps we
> could make the opening/parsing lazy and delay it until running a hook
> thus avoiding it for read operations?
>
Sorry, it seems I missed something important: what is the hooks
environment file? But I agree that we need explicit mod_dav_svn option
to enable this functionality if this new in svn 1.8.

-- 
Ivan Zhakov

Re: Opening the repository hooks environment file

Posted by Daniel Shahaf <da...@elego.de>.
Stefan Sperling wrote on Wed, Apr 03, 2013 at 13:12:12 +0200:
> On Wed, Apr 03, 2013 at 02:58:56PM +0400, Ivan Zhakov wrote:
> > On Wed, Apr 3, 2013 at 2:56 PM, Stefan Sperling <st...@elego.de> wrote:
> > > On Wed, Apr 03, 2013 at 02:03:32PM +0400, Ivan Zhakov wrote:
> > >> I'm still not happy with svn_repos_hooks_setenv() API, but this is
> > >> separate issue.
> > >
> > > Why aren't you happy with it?
> > >
> > > It's a new API that has to be called in order to activate the hooks-env
> > > feature. Server-side tools written against 1.7 and earlier repos APIs
> > > should not see a behaviour change when using 1.8 libs at runtime.
> > I think result_pool parameter should be replaced with pool member in
> > svn_repos_t structure.
> 
> Or, as Philip suggested, we could replace the svn_repos_hooks_setenv()
> function with a new 'hooks_env_path' parameter to svn_repos_open().
> 
> > Also it's not possible to deactivate hooks-env feature using
> > svn_repos_hooks_setenv() because NULL path considered as 'use default
> > path'.
> 
> Why would you want to deactivate this feature via the API?
> Just don't put a hooks-env configuration file into the repository,
> and don't specify a hooks-env path in the server configuration.
> Isn't that good enough?

Or put "/usr/bin/env - "  in your hook script.

Re: Opening the repository hooks environment file

Posted by Philip Martin <ph...@wandisco.com>.
Stefan Sperling <st...@elego.de> writes:

> On Wed, Apr 03, 2013 at 02:58:56PM +0400, Ivan Zhakov wrote:
>> I think result_pool parameter should be replaced with pool member in
>> svn_repos_t structure.
>
> Or, as Philip suggested, we could replace the svn_repos_hooks_setenv()
> function with a new 'hooks_env_path' parameter to svn_repos_open().

With a pool in the structure we could cache the lazily created hash in
the structure as well.  Then when running multiple hooks, pre- and post-
commit or pre- and post- unlock say, we would only parse the environment
file once.

-- 
Certified & Supported Apache Subversion Downloads:
http://www.wandisco.com/subversion/download

Re: Opening the repository hooks environment file

Posted by Stefan Sperling <st...@elego.de>.
On Wed, Apr 03, 2013 at 02:58:56PM +0400, Ivan Zhakov wrote:
> On Wed, Apr 3, 2013 at 2:56 PM, Stefan Sperling <st...@elego.de> wrote:
> > On Wed, Apr 03, 2013 at 02:03:32PM +0400, Ivan Zhakov wrote:
> >> I'm still not happy with svn_repos_hooks_setenv() API, but this is
> >> separate issue.
> >
> > Why aren't you happy with it?
> >
> > It's a new API that has to be called in order to activate the hooks-env
> > feature. Server-side tools written against 1.7 and earlier repos APIs
> > should not see a behaviour change when using 1.8 libs at runtime.
> I think result_pool parameter should be replaced with pool member in
> svn_repos_t structure.

Or, as Philip suggested, we could replace the svn_repos_hooks_setenv()
function with a new 'hooks_env_path' parameter to svn_repos_open().

> Also it's not possible to deactivate hooks-env feature using
> svn_repos_hooks_setenv() because NULL path considered as 'use default
> path'.

Why would you want to deactivate this feature via the API?
Just don't put a hooks-env configuration file into the repository,
and don't specify a hooks-env path in the server configuration.
Isn't that good enough?

Note that the default path exists for ra_local, since there is no good
way of passing a config file path to the repos layer when ra_local
is used. Using an environment variable to store the hooks-env path was
discussed, but I don't like this approach since it doesn't scale well
to additional configuration files we might introduce in future releases.

Re: Opening the repository hooks environment file

Posted by Ivan Zhakov <iv...@visualsvn.com>.
On Wed, Apr 3, 2013 at 2:56 PM, Stefan Sperling <st...@elego.de> wrote:
> On Wed, Apr 03, 2013 at 02:03:32PM +0400, Ivan Zhakov wrote:
>> I'm still not happy with svn_repos_hooks_setenv() API, but this is
>> separate issue.
>
> Why aren't you happy with it?
>
> It's a new API that has to be called in order to activate the hooks-env
> feature. Server-side tools written against 1.7 and earlier repos APIs
> should not see a behaviour change when using 1.8 libs at runtime.
I think result_pool parameter should be replaced with pool member in
svn_repos_t structure.

Also it's not possible to deactivate hooks-env feature using
svn_repos_hooks_setenv() because NULL path considered as 'use default
path'.

-- 
Ivan Zhakov

Re: Opening the repository hooks environment file

Posted by Stefan Sperling <st...@elego.de>.
On Wed, Apr 03, 2013 at 02:03:32PM +0400, Ivan Zhakov wrote:
> I'm still not happy with svn_repos_hooks_setenv() API, but this is
> separate issue.

Why aren't you happy with it?

It's a new API that has to be called in order to activate the hooks-env
feature. Server-side tools written against 1.7 and earlier repos APIs
should not see a behaviour change when using 1.8 libs at runtime.

Re: Opening the repository hooks environment file

Posted by Ivan Zhakov <iv...@visualsvn.com>.
On Wed, Apr 3, 2013 at 1:48 PM, Ivan Zhakov <iv...@visualsvn.com> wrote:
> On Wed, Apr 3, 2013 at 4:38 AM, Ben Reser <be...@reser.org> wrote:
>> I've gone ahead and moved the default hooks-env to hooks-env.tmpl.
>> We'll still try to open the file but we won't bother to read it in or
>> parse a completely commented out file unless the user puts a file in
>> place.  The .tmpl is a common pattern with hooks already so it should
>> add any confusion.
>>
> I'm going to implement reading hooks environment file just before hook
> execution instead of repository open.
>
Done in r1463902.

I'm still not happy with svn_repos_hooks_setenv() API, but this is
separate issue.

-- 
Ivan Zhakov

Re: Opening the repository hooks environment file

Posted by Philip Martin <ph...@wandisco.com>.
Branko Čibej <br...@wandisco.com> writes:

> I thihk there has to be a way to say all of the following:
>
>  1. do not use a hooks-env file at all
>  2. use the default hooks-env file
>  3. use a specific, non-default hooks-env file

Do we need option 2, the default path?

The hooks-env path is specified in apache config file, the admin chooses
the path or no path.  The path is also specified in the svnserve config
file, the admin chooses the path or no path.  For ra_local there is a
hard-coded path and the admin chooses whether that file is present.
That is sufficient for the admin to control the environment.

Do we need option 1?

The svn_repos API doesn't have options to bypass hooks so why should it
provide a way to bypass the environment?  If the admin has configured
hooks they always run. If the admin has configured an environment it
should always be used.

-- 
Certified & Supported Apache Subversion Downloads:
http://www.wandisco.com/subversion/download

Re: Opening the repository hooks environment file

Posted by Branko Čibej <br...@wandisco.com>.
On 03.04.2013 13:01, Philip Martin wrote:
> Ivan Zhakov <iv...@visualsvn.com> writes:
>
>> On Wed, Apr 3, 2013 at 4:38 AM, Ben Reser <be...@reser.org> wrote:
>>> I've gone ahead and moved the default hooks-env to hooks-env.tmpl.
>>> We'll still try to open the file but we won't bother to read it in or
>>> parse a completely commented out file unless the user puts a file in
>>> place.  The .tmpl is a common pattern with hooks already so it should
>>> add any confusion.
>>>
>> I'm going to implement reading hooks environment file just before hook
>> execution instead of repository open.
> The current API is:
>
> svn_error_t *
> svn_repos_hooks_setenv(svn_repos_t *repos,
>                        const char *hooks_env_path,
>                        apr_pool_t *result_pool,
>                        apr_pool_t *scratch_pool);
>
> With your change svn_repos_hooks_setenv no longer needs scratch_pool.
> It does still require result_pool to be the same pool used to allocate
> svn_repos_t and that's a bit ugly.  Perhaps we should add a pool member
> to svn_repos_t and store the pool when allocating the struct?  Perhaps
> we should remove svn_repos_hooks_setenv and pass the path to a new
> svn_repos_open3?

I thihk there has to be a way to say all of the following:

 1. do not use a hooks-env file at all
 2. use the default hooks-env file
 3. use a specific, non-default hooks-env file

Given that just calling svn_repos_open2 will (?) result in behaviour
(1),  then svn_repos_hooks_setenv should result in (2) if hooks_env_path
is NULL, and (3) otherwise.

I think it would be a bit too involved to represent the tristate nature
of the option in a svn_repos_open3; although we could introduce a
"default"-type constant value (e.g., an empty string) to represent
either (1) or (2) (with NULL representing the other of these two options).

But in any case, there should never be an option to use a separate
result pool; any storage associated with the env file path must have the
same lifetime as the repository handle.

-- Brane

-- 
Branko Čibej
Director of Subversion | WANdisco | www.wandisco.com


Re: Opening the repository hooks environment file

Posted by Philip Martin <ph...@wandisco.com>.
Ivan Zhakov <iv...@visualsvn.com> writes:

> On Wed, Apr 3, 2013 at 4:38 AM, Ben Reser <be...@reser.org> wrote:
>> I've gone ahead and moved the default hooks-env to hooks-env.tmpl.
>> We'll still try to open the file but we won't bother to read it in or
>> parse a completely commented out file unless the user puts a file in
>> place.  The .tmpl is a common pattern with hooks already so it should
>> add any confusion.
>>
> I'm going to implement reading hooks environment file just before hook
> execution instead of repository open.

The current API is:

svn_error_t *
svn_repos_hooks_setenv(svn_repos_t *repos,
                       const char *hooks_env_path,
                       apr_pool_t *result_pool,
                       apr_pool_t *scratch_pool);

With your change svn_repos_hooks_setenv no longer needs scratch_pool.
It does still require result_pool to be the same pool used to allocate
svn_repos_t and that's a bit ugly.  Perhaps we should add a pool member
to svn_repos_t and store the pool when allocating the struct?  Perhaps
we should remove svn_repos_hooks_setenv and pass the path to a new
svn_repos_open3?

-- 
Certified & Supported Apache Subversion Downloads:
http://www.wandisco.com/subversion/download

Re: Opening the repository hooks environment file

Posted by Ivan Zhakov <iv...@visualsvn.com>.
On Wed, Apr 3, 2013 at 4:38 AM, Ben Reser <be...@reser.org> wrote:
> I've gone ahead and moved the default hooks-env to hooks-env.tmpl.
> We'll still try to open the file but we won't bother to read it in or
> parse a completely commented out file unless the user puts a file in
> place.  The .tmpl is a common pattern with hooks already so it should
> add any confusion.
>
I'm going to implement reading hooks environment file just before hook
execution instead of repository open.

-- 
Ivan Zhakov

Re: Opening the repository hooks environment file

Posted by Ben Reser <be...@reser.org>.
I've gone ahead and moved the default hooks-env to hooks-env.tmpl.
We'll still try to open the file but we won't bother to read it in or
parse a completely commented out file unless the user puts a file in
place.  The .tmpl is a common pattern with hooks already so it should
add any confusion.

On Thu, Mar 28, 2013 at 2:33 PM, Bert Huijben <be...@qqmail.nl> wrote:
>
>
>> -----Original Message-----
>> From: Branko Čibej [mailto:brane@wandisco.com]
>> Sent: donderdag 28 maart 2013 22:06
>> To: dev@subversion.apache.org
>> Subject: Re: Opening the repository hooks environment file
>>
>> On 28.03.2013 21:50, Bert Huijben wrote:
>> >
>> >> -----Original Message-----
>> >> From: MARTIN PHILIP [mailto:codematters@ntlworld.com] On Behalf Of
>> >> Philip Martin
>> >> Sent: donderdag 28 maart 2013 19:32
>> >> To: Bert Huijben
>> >> Cc: dev@subversion.apache.org
>> >> Subject: Opening the repository hooks environment file
>> >>
>> >> "Bert Huijben" <be...@qqmail.nl> writes:
>> >>
>> >>> The reading of one file for each access to the repository is a more
>> >>> than measurable slowdown when profiling operations. (Reading
>> fsfs.conf
>> >>> over and over again is one of the most expensive things apache worker
>> >>> processes do when I profiled them. I think stefan2 optimized some of
>> >>> this away)
>> >> We have already picked up one new file on every access in 1.8: the hooks
>> >> environment file.  This appears to be opened and parsed for every time
>> >> mod_dav_svn opens the repository, both read and write operations.
>> >>
>> >> Perhaps we should require an explict config setting to enable the hooks
>> >> file so that we can avoid opening it when it is empty?  Or perhaps we
>> >> could make the opening/parsing lazy and delay it until running a hook
>> >> thus avoiding it for read operations?
>> > Would be nice if we can read it on first use (after the hook exists check?)
>> > and then cache it.
>> >
>> > I'm not sure why this didn't turn op in the performance traces though.
>> Maybe
>> > because this file doesn't exist by default?
>>
>> Sure it does, "svnadmin create" will create a template. I think you're
>> overestimating the cost of reading such small files; it'll mostly stay
>> in RAM once it's been read, and parsing it is not all that expensive.
>
> I'm pretty sure you remember that with Subversion 1.6 running 'svn update' on ^/subversion/branches took one and a half minutes on Windows to obtain the directory locks, before it even started doing anything update related?
>
> (And this was without a virusscanner on a for that time fast desktop harddisk)
>
> Simple file operations may be cheap on one system (E.g. linux) but don't use that as a measurement for other systems. That one and a half minute operation took less than half a second on ext3/ext4.
> And this problem existed on many Subversion releases without anybody noticing... The common suggestion was "Windows is slow".
>
> Loading 'fsfs.conf' is a slow operation. Maybe because it is too big to fit in a buffer; maybe because our parsing is inefficient and even more inefficient on systems where we use "\r\n" as EOL, but it really is performance critical on Windows.
>
> I don't say that we should stop reading fsfs.conf, but why should we introduce a 'fs.conf' at the repos level for things that don't belong at that level and most likely 99% of our users will never use?
>
> fsfs contains things like sharding information which are essential for functionality. It is not a diagnostics configuration file.
>
>
>
>
> Reading a very tiny file hundreds of thousands times during a typical ra-serf test run as on our buildbot makes its impact huge. (Even when using a ramdrive). This will certainly have an impact on production Subversion servers with similar loads.
>
> But as noted earlier... our testsuite uses uncommon small files.
> But the changes applied to the files might be in the more common space.
>
>
>         Bert
>

Re: Opening the repository hooks environment file

Posted by Daniel Shahaf <d....@daniel.shahaf.name>.
Bert Huijben wrote on Thu, Mar 28, 2013 at 22:33:09 +0100:
> I don't say that we should stop reading fsfs.conf, but why should we
> introduce a 'fs.conf' at the repos level for things that don't belong
> at that level and most likely 99% of our users will never use?
> 

fs.conf will be going away.  My current plan (given consensus on this
thread) is to go for the FSFS-specific implementation, controlled by an
svn_fs_t.fs_config knob.

And implement 'svnadmin verify -t', I have a half-written patch for this
lying around.

> fsfs contains things like sharding information which are essential for functionality. It is not a diagnostics configuration file.

That's in the format file, not in fsfs.conf

RE: Opening the repository hooks environment file

Posted by Bert Huijben <be...@qqmail.nl>.

> -----Original Message-----
> From: Branko Čibej [mailto:brane@wandisco.com]
> Sent: donderdag 28 maart 2013 22:06
> To: dev@subversion.apache.org
> Subject: Re: Opening the repository hooks environment file
> 
> On 28.03.2013 21:50, Bert Huijben wrote:
> >
> >> -----Original Message-----
> >> From: MARTIN PHILIP [mailto:codematters@ntlworld.com] On Behalf Of
> >> Philip Martin
> >> Sent: donderdag 28 maart 2013 19:32
> >> To: Bert Huijben
> >> Cc: dev@subversion.apache.org
> >> Subject: Opening the repository hooks environment file
> >>
> >> "Bert Huijben" <be...@qqmail.nl> writes:
> >>
> >>> The reading of one file for each access to the repository is a more
> >>> than measurable slowdown when profiling operations. (Reading
> fsfs.conf
> >>> over and over again is one of the most expensive things apache worker
> >>> processes do when I profiled them. I think stefan2 optimized some of
> >>> this away)
> >> We have already picked up one new file on every access in 1.8: the hooks
> >> environment file.  This appears to be opened and parsed for every time
> >> mod_dav_svn opens the repository, both read and write operations.
> >>
> >> Perhaps we should require an explict config setting to enable the hooks
> >> file so that we can avoid opening it when it is empty?  Or perhaps we
> >> could make the opening/parsing lazy and delay it until running a hook
> >> thus avoiding it for read operations?
> > Would be nice if we can read it on first use (after the hook exists check?)
> > and then cache it.
> >
> > I'm not sure why this didn't turn op in the performance traces though.
> Maybe
> > because this file doesn't exist by default?
> 
> Sure it does, "svnadmin create" will create a template. I think you're
> overestimating the cost of reading such small files; it'll mostly stay
> in RAM once it's been read, and parsing it is not all that expensive.

I'm pretty sure you remember that with Subversion 1.6 running 'svn update' on ^/subversion/branches took one and a half minutes on Windows to obtain the directory locks, before it even started doing anything update related?

(And this was without a virusscanner on a for that time fast desktop harddisk)

Simple file operations may be cheap on one system (E.g. linux) but don't use that as a measurement for other systems. That one and a half minute operation took less than half a second on ext3/ext4.
And this problem existed on many Subversion releases without anybody noticing... The common suggestion was "Windows is slow".

Loading 'fsfs.conf' is a slow operation. Maybe because it is too big to fit in a buffer; maybe because our parsing is inefficient and even more inefficient on systems where we use "\r\n" as EOL, but it really is performance critical on Windows.

I don't say that we should stop reading fsfs.conf, but why should we introduce a 'fs.conf' at the repos level for things that don't belong at that level and most likely 99% of our users will never use?

fsfs contains things like sharding information which are essential for functionality. It is not a diagnostics configuration file.




Reading a very tiny file hundreds of thousands times during a typical ra-serf test run as on our buildbot makes its impact huge. (Even when using a ramdrive). This will certainly have an impact on production Subversion servers with similar loads.

But as noted earlier... our testsuite uses uncommon small files. 
But the changes applied to the files might be in the more common space.


	Bert 



Re: Opening the repository hooks environment file

Posted by Branko Čibej <br...@wandisco.com>.
On 28.03.2013 21:50, Bert Huijben wrote:
>
>> -----Original Message-----
>> From: MARTIN PHILIP [mailto:codematters@ntlworld.com] On Behalf Of
>> Philip Martin
>> Sent: donderdag 28 maart 2013 19:32
>> To: Bert Huijben
>> Cc: dev@subversion.apache.org
>> Subject: Opening the repository hooks environment file
>>
>> "Bert Huijben" <be...@qqmail.nl> writes:
>>
>>> The reading of one file for each access to the repository is a more
>>> than measurable slowdown when profiling operations. (Reading fsfs.conf
>>> over and over again is one of the most expensive things apache worker
>>> processes do when I profiled them. I think stefan2 optimized some of
>>> this away)
>> We have already picked up one new file on every access in 1.8: the hooks
>> environment file.  This appears to be opened and parsed for every time
>> mod_dav_svn opens the repository, both read and write operations.
>>
>> Perhaps we should require an explict config setting to enable the hooks
>> file so that we can avoid opening it when it is empty?  Or perhaps we
>> could make the opening/parsing lazy and delay it until running a hook
>> thus avoiding it for read operations?
> Would be nice if we can read it on first use (after the hook exists check?)
> and then cache it.
>
> I'm not sure why this didn't turn op in the performance traces though. Maybe
> because this file doesn't exist by default?

Sure it does, "svnadmin create" will create a template. I think you're
overestimating the cost of reading such small files; it'll mostly stay
in RAM once it's been read, and parsing it is not all that expensive.

-- Brane

-- 
Branko Čibej
Director of Subversion | WANdisco | www.wandisco.com


RE: Opening the repository hooks environment file

Posted by Bert Huijben <be...@qqmail.nl>.

> -----Original Message-----
> From: MARTIN PHILIP [mailto:codematters@ntlworld.com] On Behalf Of
> Philip Martin
> Sent: donderdag 28 maart 2013 19:32
> To: Bert Huijben
> Cc: dev@subversion.apache.org
> Subject: Opening the repository hooks environment file
> 
> "Bert Huijben" <be...@qqmail.nl> writes:
> 
> > The reading of one file for each access to the repository is a more
> > than measurable slowdown when profiling operations. (Reading fsfs.conf
> > over and over again is one of the most expensive things apache worker
> > processes do when I profiled them. I think stefan2 optimized some of
> > this away)
> 
> We have already picked up one new file on every access in 1.8: the hooks
> environment file.  This appears to be opened and parsed for every time
> mod_dav_svn opens the repository, both read and write operations.
> 
> Perhaps we should require an explict config setting to enable the hooks
> file so that we can avoid opening it when it is empty?  Or perhaps we
> could make the opening/parsing lazy and delay it until running a hook
> thus avoiding it for read operations?

Would be nice if we can read it on first use (after the hook exists check?)
and then cache it.

I'm not sure why this didn't turn op in the performance traces though. Maybe
because this file doesn't exist by default?

fsfs.conf does exist and is considerable larger than just a few lines
because we generate a few huge comments in it that have to be parsed again
and again.

	Bert

	


Opening the repository hooks environment file

Posted by Philip Martin <ph...@wandisco.com>.
"Bert Huijben" <be...@qqmail.nl> writes:

> The reading of one file for each access to the repository is a more
> than measurable slowdown when profiling operations. (Reading fsfs.conf
> over and over again is one of the most expensive things apache worker
> processes do when I profiled them. I think stefan2 optimized some of
> this away)

We have already picked up one new file on every access in 1.8: the hooks
environment file.  This appears to be opened and parsed for every time
mod_dav_svn opens the repository, both read and write operations.

Perhaps we should require an explict config setting to enable the hooks
file so that we can avoid opening it when it is empty?  Or perhaps we
could make the opening/parsing lazy and delay it until running a hook
thus avoiding it for read operations?

-- 
Certified & Supported Apache Subversion Downloads:
http://www.wandisco.com/subversion/download

RE: svn commit: r1462054 - in /subversion/branches/verify-at-commit/subversion: include/svn_fs.h libsvn_fs/fs-loader.c libsvn_fs/fs-loader.h

Posted by Bert Huijben <be...@qqmail.nl>.

> -----Original Message-----
> From: danielsh@apache.org [mailto:danielsh@apache.org]
> Sent: donderdag 28 maart 2013 12:37
> To: commits@subversion.apache.org
> Subject: svn commit: r1462054 - in /subversion/branches/verify-at-
> commit/subversion: include/svn_fs.h libsvn_fs/fs-loader.c libsvn_fs/fs-
> loader.h
> 
> Author: danielsh
> Date: Thu Mar 28 11:36:50 2013
> New Revision: 1462054
> 
> URL: http://svn.apache.org/r1462054
> Log:
> On the verify-at-commit branch, add a backend-independent
> implementation:
> a db/fs.conf file.
> 
> * subversion/include/svn_fs.h
>   (SVN_FS_CONFIG_SECTION_MISC,
> (SVN_FS_CONFIG_OPTION_VERIFY_AT_COMMIT):
>     New macros.
> 
> * subversion/libsvn_fs/fs-loader.h
>   (svn_fs_t.verify_at_commit): New struct member.
> 
> * subversion/libsvn_fs/fs-loader.c
>   (svn_config.h): Include.
>   (CONFIG_FILENAME): New macro.
>   (write_config): New helper, based on the eponymous helper in FSFS (rather
>     than on write_fs_type()).
>   (svn_fs_create): Call write_config().
>   (fs_new): Initialise new struct member.
>   (svn_fs_open): Read config.
>   (svn_fs_commit_txn): Use the config.

Summarizing what was said on #svn-dev

I think we should make this a fsfs only feature that uses the existing fsfs.conf file.

The option doesn't appear to make much sense for the now mostly deprecated for new development BDB filesystem and the new work of stefan2 might give new ideas. Besides if a filesystem needs a verification step to be stable that should be part of the design. Subversion shouldn't start assuming that filesystems are likely to break down. (What would that tell us about the stability of previous versions of Subversion?)

The reading of one file for each access to the repository is a more than measurable slowdown when profiling operations. (Reading fsfs.conf over and over again is one of the most expensive things apache worker processes do when I profiled them. I think stefan2 optimized some of this away)

Opening a file is expensive on Windows and probably on any other system that always uses locking for file accesss and/or on-demand virus scanners and also on network drives.


I don't think introducing yet another config file makes much sense. If users want to turn on verifications at every commit they probably want it for all their repositories (in which case the config option belongs in the apache or svnserve config) or they are looking at specific fsfs issues, in which case the option can be in fsfs.conf which is read anyway.


I wouldn't want to introduce yet another layer of configuration for this verification helper. 

But then again I can see reasons that some users want the explicit verification. fsfs.conf and/or a post commit hook are good enough solutions without any performance/design implications.

	Bert