You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@subversion.apache.org by ju...@apache.org on 2011/02/22 16:38:35 UTC

svn commit: r1073366 - in /subversion/trunk: notes/wc-ng/pristine-store subversion/libsvn_wc/wc-queries.sql subversion/libsvn_wc/wc_db_pristine.c

Author: julianfoad
Date: Tue Feb 22 15:38:35 2011
New Revision: 1073366

URL: http://svn.apache.org/viewvc?rev=1073366&view=rev
Log:
Make the WC pristine store thread-safe by implementing the spec in section A
of 'notes/wc-ng/pristine-store'.  Mainly this involves making the 'install',
'remove' and 'read' operations run inside a SQLite transaction.

* notes/wc-ng/pristine-store
  (A-3(a), A-3(b)): Specify the need to acquire a reserved lock as an
    explicit step.

* subversion/libsvn_wc/wc_db_pristine.c
  (pristine_read_baton_t, pristine_read_txn): New.
  (pristine_install_baton_t, pristine_install_txn): New.
  (pristine_remove_baton_t, pristine_remove_if_unreferenced_txn): New.
  (svn_wc__db_pristine_read): Re-write to run in a txn, using
    pristine_read_txn(). Require the table row exists as well as the file.
  (svn_wc__db_pristine_install): Re-write to run in a txn, using
    pristine_install_txn(). Never overwrite an existing pristine text.
  (pristine_remove_if_unreferenced): Re-write to run in a txn, using
    pristine_remove_if_unreferenced_txn().
  (svn_wc__db_pristine_check): Ignore orphan files. Don't check whether the
    file exists on disk unless compiled in debug mode.

* subversion/libsvn_wc/wc-queries.sql
  (STMT_INSERT_PRISTINE): New statement, like STMT_INSERT_OR_IGNORE_PRISTINE
    but which will not tolerate an existing row.

Modified:
    subversion/trunk/notes/wc-ng/pristine-store
    subversion/trunk/subversion/libsvn_wc/wc-queries.sql
    subversion/trunk/subversion/libsvn_wc/wc_db_pristine.c

Modified: subversion/trunk/notes/wc-ng/pristine-store
URL: http://svn.apache.org/viewvc/subversion/trunk/notes/wc-ng/pristine-store?rev=1073366&r1=1073365&r2=1073366&view=diff
==============================================================================
--- subversion/trunk/notes/wc-ng/pristine-store (original)
+++ subversion/trunk/notes/wc-ng/pristine-store Tue Feb 22 15:38:35 2011
@@ -56,6 +56,7 @@ The numbered steps should be carried out
 rationale.)
 
 (a) To add a pristine, do the following inside an SDB txn:
+    0. Acquire a 'RESERVED' lock.
     1. Add the table row, and set the refcount as desired.  If a row
        already exists, add the desired refcount to its refcount, and
        preferably verify the old row matches the new metadata.
@@ -65,7 +66,8 @@ rationale.)
        and optionally verify it matches the new metadata (e.g. length).
 
 (b) To remove a pristine, do the following inside an SDB txn:
-    1. First, check refcount == 0, and abort if not.
+    0. Acquire a 'RESERVED' lock.
+    1. Check refcount == 0, and abort if not.
     2. Delete the table row.
     3. Delete the file or move it away. (If not present, log a
        consistency error but, in a release build, return success.)

Modified: subversion/trunk/subversion/libsvn_wc/wc-queries.sql
URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_wc/wc-queries.sql?rev=1073366&r1=1073365&r2=1073366&view=diff
==============================================================================
--- subversion/trunk/subversion/libsvn_wc/wc-queries.sql (original)
+++ subversion/trunk/subversion/libsvn_wc/wc-queries.sql Tue Feb 22 15:38:35 2011
@@ -439,6 +439,10 @@ DELETE FROM work_queue WHERE id = ?1;
 INSERT OR IGNORE INTO pristine (checksum, md5_checksum, size, refcount)
 VALUES (?1, ?2, ?3, 0);
 
+-- STMT_INSERT_PRISTINE
+INSERT INTO pristine (checksum, md5_checksum, size, refcount)
+VALUES (?1, ?2, ?3, 0);
+
 -- STMT_SELECT_PRISTINE
 SELECT md5_checksum
 FROM pristine

Modified: subversion/trunk/subversion/libsvn_wc/wc_db_pristine.c
URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_wc/wc_db_pristine.c?rev=1073366&r1=1073365&r2=1073366&view=diff
==============================================================================
--- subversion/trunk/subversion/libsvn_wc/wc_db_pristine.c (original)
+++ subversion/trunk/subversion/libsvn_wc/wc_db_pristine.c Tue Feb 22 15:38:35 2011
@@ -1,6 +1,8 @@
 /*
  * wc_db_pristine.c :  Pristine ("text base") management
  *
+ * See the spec in 'notes/wc-ng/pristine-store'.
+ *
  * ====================================================================
  *    Licensed to the Apache Software Foundation (ASF) under one
  *    or more contributor license agreements.  See the NOTICE file
@@ -163,6 +165,57 @@ svn_wc__db_pristine_get_future_path(cons
   return SVN_NO_ERROR;
 }
 
+/* Data for pristine_read_txn(). */
+typedef struct pristine_read_baton_t
+{
+  /* Where to put the result stream. */
+  svn_stream_t **contents;
+  /* The pristine text's SHA-1 checksum. */
+  const svn_checksum_t *sha1_checksum;
+  /* The path to the pristine file (within the pristine store). */
+  const char *pristine_abspath;
+  /* The pool from which to allocate the result stream. */
+  apr_pool_t *result_pool;
+} pristine_read_baton_t;
+
+/* Set (*BATON->contents) to a readable stream from which the pristine text
+ * identified by BATON->sha1_checksum can be read from the pristine store of
+ * SDB.  If that text is not in the pristine store, return an error.
+ *
+ * Allocate the stream in BATON->result_pool.
+ *
+ * This function expects to be executed inside a SQLite txn.
+ *
+ * Implements 'notes/wc-ng/pristine-store' section A-3(d).
+ * Implements svn_sqlite__transaction_callback_t. */
+static svn_error_t *
+pristine_read_txn(void *baton,
+                  svn_sqlite__db_t *sdb,
+                  apr_pool_t *scratch_pool)
+{
+  pristine_read_baton_t *b = baton;
+  svn_sqlite__stmt_t *stmt;
+  svn_boolean_t have_row;
+
+  /* Check that this pristine text is present in the store.  (The presence
+   * of the file is not sufficient.) */
+  SVN_ERR(svn_sqlite__get_statement(&stmt, sdb, STMT_SELECT_PRISTINE));
+  SVN_ERR(svn_sqlite__bind_checksum(stmt, 1, b->sha1_checksum, scratch_pool));
+  SVN_ERR(svn_sqlite__step(&have_row, stmt));
+  SVN_ERR(svn_sqlite__reset(stmt));
+  if (! have_row)
+    {
+      return svn_error_createf(SVN_ERR_WC_PATH_NOT_FOUND, NULL,
+                               _("Pristine text '%s' not present"),
+                               svn_checksum_to_cstring_display(
+                                 b->sha1_checksum, scratch_pool));
+    }
+
+  SVN_ERR(svn_stream_open_readonly(b->contents, b->pristine_abspath,
+                                   b->result_pool, scratch_pool));
+  return SVN_NO_ERROR;
+}
+
 svn_error_t *
 svn_wc__db_pristine_read(svn_stream_t **contents,
                          svn_wc__db_t *db,
@@ -173,7 +226,7 @@ svn_wc__db_pristine_read(svn_stream_t **
 {
   svn_wc__db_pdh_t *pdh;
   const char *local_relpath;
-  const char *pristine_abspath;
+  pristine_read_baton_t b;
 
   SVN_ERR_ASSERT(contents != NULL);
   SVN_ERR_ASSERT(svn_dirent_is_absolute(wri_abspath));
@@ -191,15 +244,18 @@ svn_wc__db_pristine_read(svn_stream_t **
                               scratch_pool, scratch_pool));
   VERIFY_USABLE_WCROOT(pdh->wcroot);
 
-  /* ### should we look in the PRISTINE table for anything?  */
-
-  SVN_ERR(get_pristine_fname(&pristine_abspath, pdh->wcroot->abspath,
+  b.contents = contents;
+  b.sha1_checksum = sha1_checksum;
+  b.result_pool = result_pool;
+  SVN_ERR(get_pristine_fname(&b.pristine_abspath, pdh->wcroot->abspath,
                              sha1_checksum,
                              FALSE /* create_subdir */,
                              scratch_pool, scratch_pool));
-  return svn_error_return(svn_stream_open_readonly(
-                            contents, pristine_abspath,
-                            result_pool, scratch_pool));
+  SVN_ERR(svn_sqlite__with_transaction(pdh->wcroot->sdb,
+                                       pristine_read_txn, &b,
+                                       scratch_pool));
+
+  return SVN_NO_ERROR;
 }
 
 
@@ -230,6 +286,82 @@ svn_wc__db_pristine_get_tempdir(const ch
 }
 
 
+/* Data for pristine_install_txn(). */
+typedef struct pristine_install_baton_t
+{
+  /* The path to the source file that is to be moved into place. */
+  const char *tempfile_abspath;
+  /* The target path for the file (within the pristine store). */
+  const char *pristine_abspath;
+  /* The pristine text's SHA-1 checksum. */
+  const svn_checksum_t *sha1_checksum;
+  /* The pristine text's MD-5 checksum. */
+  const svn_checksum_t *md5_checksum;
+} pristine_install_baton_t;
+
+
+/* Install the pristine text described by BATON into the pristine store of
+ * SDB.  If it is already stored then just delete the new file
+ * BATON->tempfile_abspath.
+ *
+ * This function expects to be executed inside a SQLite txn that has already
+ * acquired a 'RESERVED' lock.
+ * 
+ * Implements 'notes/wc-ng/pristine-store' section A-3(a).
+ * Implements svn_sqlite__transaction_callback_t. */
+static svn_error_t *
+pristine_install_txn(void *baton,
+                     svn_sqlite__db_t *sdb,
+                     apr_pool_t *scratch_pool)
+{
+  pristine_install_baton_t *b = baton;
+  apr_finfo_t finfo;
+  svn_sqlite__stmt_t *stmt;
+  svn_boolean_t have_row;
+  svn_node_kind_t kind;
+
+  /* If this pristine text is already present in the store, just keep it:
+   * delete the new one and return. */
+  SVN_ERR(svn_sqlite__get_statement(&stmt, sdb, STMT_SELECT_PRISTINE));
+  SVN_ERR(svn_sqlite__bind_checksum(stmt, 1, b->sha1_checksum, scratch_pool));
+  SVN_ERR(svn_sqlite__step(&have_row, stmt));
+  SVN_ERR(svn_sqlite__reset(stmt));
+  if (have_row)
+    {
+      /* Remove the temp file: it's already there */
+      /* ### TODO: Maybe verify the new file matches the existing one. */
+      SVN_ERR(svn_io_remove_file2(b->tempfile_abspath, FALSE, scratch_pool));
+      return SVN_NO_ERROR;
+    }
+
+  /* Move the file to its target location, or discard it if already there. */
+  SVN_ERR(svn_io_check_path(b->pristine_abspath, &kind, scratch_pool));
+  if (kind == svn_node_file)
+    {
+      /* Remove the temp file: it's already there */
+      /* ### TODO: Maybe verify the new file matches the existing one. */
+      SVN_ERR(svn_io_remove_file2(b->tempfile_abspath, FALSE, scratch_pool));
+    }
+  else
+    {
+      SVN_ERR(svn_io_file_rename(b->tempfile_abspath, b->pristine_abspath,
+                                 scratch_pool));
+    }
+
+  SVN_ERR(svn_io_stat(&finfo, b->pristine_abspath, APR_FINFO_SIZE,
+                      scratch_pool));
+
+  SVN_ERR(svn_sqlite__get_statement(&stmt, sdb,
+                                    STMT_INSERT_PRISTINE));
+  SVN_ERR(svn_sqlite__bind_checksum(stmt, 1, b->sha1_checksum, scratch_pool));
+  SVN_ERR(svn_sqlite__bind_checksum(stmt, 2, b->md5_checksum, scratch_pool));
+  SVN_ERR(svn_sqlite__bind_int64(stmt, 3, finfo.size));
+  SVN_ERR(svn_sqlite__insert(NULL, stmt));
+
+  return SVN_NO_ERROR;
+}
+
+
 svn_error_t *
 svn_wc__db_pristine_install(svn_wc__db_t *db,
                             const char *tempfile_abspath,
@@ -240,10 +372,7 @@ svn_wc__db_pristine_install(svn_wc__db_t
   svn_wc__db_pdh_t *pdh;
   const char *local_relpath;
   const char *wri_abspath;
-  const char *pristine_abspath;
-  apr_finfo_t finfo;
-  svn_sqlite__stmt_t *stmt;
-  svn_node_kind_t kind;
+  struct pristine_install_baton_t b;
 
   SVN_ERR_ASSERT(svn_dirent_is_absolute(tempfile_abspath));
   SVN_ERR_ASSERT(sha1_checksum != NULL);
@@ -264,35 +393,20 @@ svn_wc__db_pristine_install(svn_wc__db_t
                               scratch_pool, scratch_pool));
   VERIFY_USABLE_WCROOT(pdh->wcroot);
 
-  SVN_ERR(get_pristine_fname(&pristine_abspath, pdh->wcroot->abspath,
+  b.tempfile_abspath = tempfile_abspath;
+  b.sha1_checksum = sha1_checksum;
+  b.md5_checksum = md5_checksum;
+
+  SVN_ERR(get_pristine_fname(&b.pristine_abspath, pdh->wcroot->abspath,
                              sha1_checksum,
                              TRUE /* create_subdir */,
                              scratch_pool, scratch_pool));
 
-
-  SVN_ERR(svn_io_check_path(pristine_abspath, &kind, scratch_pool));
-
-  if (kind == svn_node_file)
-    {
-      /* Remove the tempfile, it's already there */
-      return svn_error_return(
-                  svn_io_remove_file2(tempfile_abspath,
-                                      FALSE, scratch_pool));
-    }
-
-  /* Put the file into its target location.  */
-    SVN_ERR(svn_io_file_rename(tempfile_abspath, pristine_abspath,
-                               scratch_pool));
-
-  SVN_ERR(svn_io_stat(&finfo, pristine_abspath, APR_FINFO_SIZE,
-                      scratch_pool));
-
-  SVN_ERR(svn_sqlite__get_statement(&stmt, pdh->wcroot->sdb,
-                                    STMT_INSERT_OR_IGNORE_PRISTINE));
-  SVN_ERR(svn_sqlite__bind_checksum(stmt, 1, sha1_checksum, scratch_pool));
-  SVN_ERR(svn_sqlite__bind_checksum(stmt, 2, md5_checksum, scratch_pool));
-  SVN_ERR(svn_sqlite__bind_int64(stmt, 3, finfo.size));
-  SVN_ERR(svn_sqlite__insert(NULL, stmt));
+  /* Ensure the SQL txn has at least a 'RESERVED' lock before we start looking
+   * at the disk, to ensure no concurrent pristine install/delete txn. */
+  SVN_ERR(svn_sqlite__with_immediate_transaction(pdh->wcroot->sdb,
+                                                 pristine_install_txn, &b,
+                                                 scratch_pool));
 
   return SVN_NO_ERROR;
 }
@@ -378,37 +492,74 @@ svn_wc__db_pristine_get_sha1(const svn_c
 }
 
 
-/* If the pristine text referenced by SHA1_CHECKSUM has a reference count
- * of zero, delete it from the pristine store of WCROOT.  Delete both the
- * database row and the file on disk. */
+/* Data for pristine_remove_if_unreferenced_txn(). */
+typedef struct pristine_remove_baton_t
+{
+  /* The pristine text's SHA-1 checksum. */
+  const svn_checksum_t *sha1_checksum;
+  /* The path to the pristine file (within the pristine store). */
+  const char *pristine_abspath;
+} pristine_remove_baton_t;
+
+/* If the pristine text referenced by BATON in SDB has a reference count of
+ * zero, delete it (both the database row and the disk file).
+ *
+ * This function expects to be executed inside a SQLite txn that has already
+ * acquired a 'RESERVED' lock.
+ *
+ * Implements svn_sqlite__transaction_callback_t. */
 static svn_error_t *
-pristine_remove_if_unreferenced(svn_wc__db_wcroot_t *wcroot,
-                                const svn_checksum_t *sha1_checksum,
-                                apr_pool_t *scratch_pool)
+pristine_remove_if_unreferenced_txn(void *baton,
+                                    svn_sqlite__db_t *sdb,
+                                    apr_pool_t *scratch_pool)
 {
+  pristine_remove_baton_t *b = baton;
   svn_sqlite__stmt_t *stmt;
-  const char *pristine_abspath;
   int affected_rows;
 
   /* Remove the DB row, if refcount is 0. */
-  SVN_ERR(svn_sqlite__get_statement(&stmt, wcroot->sdb,
+  SVN_ERR(svn_sqlite__get_statement(&stmt, sdb,
                                     STMT_DELETE_PRISTINE_IF_UNREFERENCED));
-  SVN_ERR(svn_sqlite__bind_checksum(stmt, 1, sha1_checksum, scratch_pool));
+  SVN_ERR(svn_sqlite__bind_checksum(stmt, 1, b->sha1_checksum, scratch_pool));
   SVN_ERR(svn_sqlite__update(&affected_rows, stmt));
 
+  /* If we removed the DB row, then remove the file. */
   if (affected_rows > 0)
     {
-      /* Remove the file. */
-      SVN_ERR(get_pristine_fname(&pristine_abspath, wcroot->abspath,
-                                 sha1_checksum, TRUE /* create_subdir */,
-                                 scratch_pool, scratch_pool));
-      SVN_ERR(svn_io_remove_file2(pristine_abspath, TRUE /* ignore_enoent */,
+      /* ### TODO: If file not present, log a consistency error but return
+       * success. */
+      SVN_ERR(svn_io_remove_file2(b->pristine_abspath, FALSE /* ignore_enoent */,
                                   scratch_pool));
     }
 
   return SVN_NO_ERROR;
 }
 
+/* If the pristine text referenced by SHA1_CHECKSUM in WCROOT has a
+ * reference count of zero, delete it (both the database row and the disk
+ * file).
+ *
+ * Implements 'notes/wc-ng/pristine-store' section A-3(b). */
+static svn_error_t *
+pristine_remove_if_unreferenced(svn_wc__db_wcroot_t *wcroot,
+                                const svn_checksum_t *sha1_checksum,
+                                apr_pool_t *scratch_pool)
+{
+  pristine_remove_baton_t b;
+
+  b.sha1_checksum = sha1_checksum;
+  SVN_ERR(get_pristine_fname(&b.pristine_abspath, wcroot->abspath,
+                             sha1_checksum, FALSE /* create_subdir */,
+                             scratch_pool, scratch_pool));
+
+  /* Ensure the SQL txn has at least a 'RESERVED' lock before we start looking
+   * at the disk, to ensure no concurrent pristine install/delete txn. */
+  SVN_ERR(svn_sqlite__with_immediate_transaction(
+    wcroot->sdb, pristine_remove_if_unreferenced_txn, &b, scratch_pool));
+
+  return SVN_NO_ERROR;
+}
+
 svn_error_t *
 svn_wc__db_pristine_remove(svn_wc__db_t *db,
                            const char *wri_abspath,
@@ -518,7 +669,6 @@ svn_wc__db_pristine_check(svn_boolean_t 
   const char *pristine_abspath;
   svn_sqlite__stmt_t *stmt;
   svn_boolean_t have_row;
-  svn_node_kind_t kind_on_disk;
 
   SVN_ERR_ASSERT(present != NULL);
   SVN_ERR_ASSERT(svn_dirent_is_absolute(wri_abspath));
@@ -543,19 +693,24 @@ svn_wc__db_pristine_check(svn_boolean_t 
   SVN_ERR(svn_sqlite__step(&have_row, stmt));
   SVN_ERR(svn_sqlite__reset(stmt));
 
-  /* Check that the pristine text file exists. */
-  SVN_ERR(get_pristine_fname(&pristine_abspath, pdh->wcroot->abspath,
-                             sha1_checksum,
-                             FALSE /* create_subdir */,
-                             scratch_pool, scratch_pool));
-  SVN_ERR(svn_io_check_path(pristine_abspath, &kind_on_disk, scratch_pool));
+#ifdef SVN_DEBUG
+  /* Check that the pristine text file exists iff the DB says it does. */
+  if (have_row)
+    {
+      svn_node_kind_t kind_on_disk;
+      SVN_ERR(get_pristine_fname(&pristine_abspath, pdh->wcroot->abspath,
+                                 sha1_checksum, FALSE /* create_subdir */,
+                                 scratch_pool, scratch_pool));
+      SVN_ERR(svn_io_check_path(pristine_abspath, &kind_on_disk, scratch_pool));
 
-  if (kind_on_disk != (have_row ? svn_node_file : svn_node_none))
-    return svn_error_createf(SVN_ERR_WC_DB_ERROR, svn_sqlite__reset(stmt),
-                             _("The pristine text with checksum '%s' was "
-                               "found in the DB or on disk but not both"),
-                             svn_checksum_to_cstring_display(sha1_checksum,
-                                                             scratch_pool));
+      if (kind_on_disk != svn_node_file)
+        return svn_error_createf(SVN_ERR_WC_DB_ERROR, svn_sqlite__reset(stmt),
+                                 _("The pristine text with checksum '%s' was "
+                                   "found in the DB but not on disk"),
+                                 svn_checksum_to_cstring_display(sha1_checksum,
+                                                                 scratch_pool));
+    }
+#endif
 
   *present = have_row;
   return SVN_NO_ERROR;



Re: svn commit: r1073366 - in /subversion/trunk: notes/wc-ng/pristine-store subversion/libsvn_wc/wc-queries.sql subversion/libsvn_wc/wc_db_pristine.c

Posted by Branko Čibej <br...@e-reka.si>.
On 26.02.2011 02:23, Branko Čibej wrote:
> ... but my last battle with Windows filesystem internals was
> more than 10 years ago.
(I lost)

Re: svn commit: r1073366 - in /subversion/trunk: notes/wc-ng/pristine-store subversion/libsvn_wc/wc-queries.sql subversion/libsvn_wc/wc_db_pristine.c

Posted by Ivan Zhakov <iv...@visualsvn.com>.
On Tue, Mar 15, 2011 at 16:49, Julian Foad <ju...@btopenworld.com> wrote:
> Ivan Zhakov wrote:
>> On Tue, Mar 1, 2011 at 21:13, Julian Foad <ju...@wandisco.com> wrote:
>> > Bert Huijben wrote:
>> >> > -----Original Message-----
>> >> > From: Julian Foad [mailto:julian.foad@wandisco.com]
>> >> >
>> >> > I'm not clear exactly what problem we would avoid by eliminating the
>> >> > "select a unique name" step of this process.  Is it what I describe
>> >> > below at the end of note [1] - that a scanner might be more likely to
>> >> > re-scan the content, and therefore more likely to cause a delay?
>> >>
>> >> No, the problem I try to avoid is
>> >> * you create a file
>> >> <virus scanner opens the file to verify that it is not a virus>
>> >> * you delete the file (after the virusscanner releases the file)
>> >> * you rename a file to be at the old location
>> >>
>> >> While we really need something like
>> >> * rename to a unique name.
>> >> <virusscanner ignores the file, because it was already scanned at the original location>
>> >
>> > >From discussing on IRC we agree that the main concern is that this
>> > involves too many separate filesystem operations, rather than any
>> > specific problem with a particular step.
>> >
>> > I have committed it as is in r1075942, and would like to improve it as a
>> > follow-up.
>> >
>> > Options for improvement:
>> >
>> >  (a) Don't open a pristine file with FILE_SHARE_DELETE.  Instead,
>> > accept that an attempt to delete it may fail, and if that happens, leave
>> > the there as an orphan.  When adding a pristine file, if it already
>> > exists on disk then just keep the copy that already exists.  When
>> > cleaning up orphan files, if a delete fails, just leave the file there.
>> > Consider implementing automatic clean-up to prevent orphan files from
>> > accumulating indefinitely.
>> >
>> >  (b) Find or write a "rename to a unique name" function that operates
>> > in a single step instead of a creating a new file and then overwriting
>> > it.
>> >
>> >  (c) Don't rename the file before deleting it; instead, just delete it.
>> > On Windows, when adding a new file, if a file with that name already
>> > exists, *then* rename the existing file before moving the new file into
>> > place. (We can't just keep the existing file because it is pending
>> > deletion and will disappear when the reader finishes reading it.)
>> >
>> > Pros and cons:
>> >
>> >  (a) This would reduce the number of file system operations to a
>> > minimum.  It would involve bypassing APR to avoid the FILE_SHARE_DELETE
>> > flag, which is not ideal but possible.
>> >
>> >  (b) This would remove one of the file system operations.  It would
>> > involve writing a function similar to APR's fall-back implementation of
>> > apr_file_mktemp() that exists for systems that do not provide a
>> > "mkstemp" system API.  I'm not sure if there are any concrete problems
>> > with doing this sort of thing in "user space".
>> >
>> >  (c) This would remove two of the file system operations.  It sounds
>> > straightforward.
>> >
>> > Comments?
>> >
>>
>> (b) Is best option for.
>
> For ... what?
for me :)

>>  It should be big problem to generate random
>> name using current time and then try rename file to this name. Repeat
>> on error.
>
> I assume you mean "it should not be a big problem...".
>
Yes, I meant "should not be a big problem". Sorry.


-- 
Ivan Zhakov

Re: svn commit: r1073366 - in /subversion/trunk: notes/wc-ng/pristine-store subversion/libsvn_wc/wc-queries.sql subversion/libsvn_wc/wc_db_pristine.c

Posted by Julian Foad <ju...@btopenworld.com>.
Ivan Zhakov wrote:
> On Tue, Mar 1, 2011 at 21:13, Julian Foad <ju...@wandisco.com> wrote:
> > Bert Huijben wrote:
> >> > -----Original Message-----
> >> > From: Julian Foad [mailto:julian.foad@wandisco.com]
> >> >
> >> > I'm not clear exactly what problem we would avoid by eliminating the
> >> > "select a unique name" step of this process.  Is it what I describe
> >> > below at the end of note [1] - that a scanner might be more likely to
> >> > re-scan the content, and therefore more likely to cause a delay?
> >>
> >> No, the problem I try to avoid is
> >> * you create a file
> >> <virus scanner opens the file to verify that it is not a virus>
> >> * you delete the file (after the virusscanner releases the file)
> >> * you rename a file to be at the old location
> >>
> >> While we really need something like
> >> * rename to a unique name.
> >> <virusscanner ignores the file, because it was already scanned at the original location>
> >
> > >From discussing on IRC we agree that the main concern is that this
> > involves too many separate filesystem operations, rather than any
> > specific problem with a particular step.
> >
> > I have committed it as is in r1075942, and would like to improve it as a
> > follow-up.
> >
> > Options for improvement:
> >
> >  (a) Don't open a pristine file with FILE_SHARE_DELETE.  Instead,
> > accept that an attempt to delete it may fail, and if that happens, leave
> > the there as an orphan.  When adding a pristine file, if it already
> > exists on disk then just keep the copy that already exists.  When
> > cleaning up orphan files, if a delete fails, just leave the file there.
> > Consider implementing automatic clean-up to prevent orphan files from
> > accumulating indefinitely.
> >
> >  (b) Find or write a "rename to a unique name" function that operates
> > in a single step instead of a creating a new file and then overwriting
> > it.
> >
> >  (c) Don't rename the file before deleting it; instead, just delete it.
> > On Windows, when adding a new file, if a file with that name already
> > exists, *then* rename the existing file before moving the new file into
> > place. (We can't just keep the existing file because it is pending
> > deletion and will disappear when the reader finishes reading it.)
> >
> > Pros and cons:
> >
> >  (a) This would reduce the number of file system operations to a
> > minimum.  It would involve bypassing APR to avoid the FILE_SHARE_DELETE
> > flag, which is not ideal but possible.
> >
> >  (b) This would remove one of the file system operations.  It would
> > involve writing a function similar to APR's fall-back implementation of
> > apr_file_mktemp() that exists for systems that do not provide a
> > "mkstemp" system API.  I'm not sure if there are any concrete problems
> > with doing this sort of thing in "user space".
> >
> >  (c) This would remove two of the file system operations.  It sounds
> > straightforward.
> >
> > Comments?
> >
> 
> (b) Is best option for.

For ... what?

>  It should be big problem to generate random
> name using current time and then try rename file to this name. Repeat
> on error.

I assume you mean "it should not be a big problem...".

- Julian



Re: svn commit: r1073366 - in /subversion/trunk: notes/wc-ng/pristine-store subversion/libsvn_wc/wc-queries.sql subversion/libsvn_wc/wc_db_pristine.c

Posted by Ivan Zhakov <iv...@visualsvn.com>.
On Tue, Mar 1, 2011 at 21:13, Julian Foad <ju...@wandisco.com> wrote:
> Bert Huijben wrote:
>> > -----Original Message-----
>> > From: Julian Foad [mailto:julian.foad@wandisco.com]
>> >
>> > I'm not clear exactly what problem we would avoid by eliminating the
>> > "select a unique name" step of this process.  Is it what I describe
>> > below at the end of note [1] - that a scanner might be more likely to
>> > re-scan the content, and therefore more likely to cause a delay?
>>
>> No, the problem I try to avoid is
>> * you create a file
>> <virus scanner opens the file to verify that it is not a virus>
>> * you delete the file (after the virusscanner releases the file)
>> * you rename a file to be at the old location
>>
>> While we really need something like
>> * rename to a unique name.
>> <virusscanner ignores the file, because it was already scanned at the original location>
>
> >From discussing on IRC we agree that the main concern is that this
> involves too many separate filesystem operations, rather than any
> specific problem with a particular step.
>
> I have committed it as is in r1075942, and would like to improve it as a
> follow-up.
>
> Options for improvement:
>
>  (a) Don't open a pristine file with FILE_SHARE_DELETE.  Instead,
> accept that an attempt to delete it may fail, and if that happens, leave
> the there as an orphan.  When adding a pristine file, if it already
> exists on disk then just keep the copy that already exists.  When
> cleaning up orphan files, if a delete fails, just leave the file there.
> Consider implementing automatic clean-up to prevent orphan files from
> accumulating indefinitely.
>
>  (b) Find or write a "rename to a unique name" function that operates
> in a single step instead of a creating a new file and then overwriting
> it.
>
>  (c) Don't rename the file before deleting it; instead, just delete it.
> On Windows, when adding a new file, if a file with that name already
> exists, *then* rename the existing file before moving the new file into
> place. (We can't just keep the existing file because it is pending
> deletion and will disappear when the reader finishes reading it.)
>
> Pros and cons:
>
>  (a) This would reduce the number of file system operations to a
> minimum.  It would involve bypassing APR to avoid the FILE_SHARE_DELETE
> flag, which is not ideal but possible.
>
>  (b) This would remove one of the file system operations.  It would
> involve writing a function similar to APR's fall-back implementation of
> apr_file_mktemp() that exists for systems that do not provide a
> "mkstemp" system API.  I'm not sure if there are any concrete problems
> with doing this sort of thing in "user space".
>
>  (c) This would remove two of the file system operations.  It sounds
> straightforward.
>
> Comments?
>

(b) Is best option for. It should be big problem to generate random
name using current time and then try rename file to this name. Repeat
on error.

-- 
Ivan Zhakov

RE: svn commit: r1073366 - in /subversion/trunk: notes/wc-ng/pristine-store subversion/libsvn_wc/wc-queries.sql subversion/libsvn_wc/wc_db_pristine.c

Posted by Julian Foad <ju...@wandisco.com>.
Bert Huijben wrote:
> > -----Original Message-----
> > From: Julian Foad [mailto:julian.foad@wandisco.com]
> >
> > I'm not clear exactly what problem we would avoid by eliminating the
> > "select a unique name" step of this process.  Is it what I describe
> > below at the end of note [1] - that a scanner might be more likely to
> > re-scan the content, and therefore more likely to cause a delay?
> 
> No, the problem I try to avoid is
> * you create a file
> <virus scanner opens the file to verify that it is not a virus>
> * you delete the file (after the virusscanner releases the file)
> * you rename a file to be at the old location
> 
> While we really need something like
> * rename to a unique name.
> <virusscanner ignores the file, because it was already scanned at the original location>

>>From discussing on IRC we agree that the main concern is that this
involves too many separate filesystem operations, rather than any
specific problem with a particular step.

I have committed it as is in r1075942, and would like to improve it as a
follow-up.

Options for improvement:

  (a) Don't open a pristine file with FILE_SHARE_DELETE.  Instead,
accept that an attempt to delete it may fail, and if that happens, leave
the there as an orphan.  When adding a pristine file, if it already
exists on disk then just keep the copy that already exists.  When
cleaning up orphan files, if a delete fails, just leave the file there.
Consider implementing automatic clean-up to prevent orphan files from
accumulating indefinitely.

  (b) Find or write a "rename to a unique name" function that operates
in a single step instead of a creating a new file and then overwriting
it.

  (c) Don't rename the file before deleting it; instead, just delete it.
On Windows, when adding a new file, if a file with that name already
exists, *then* rename the existing file before moving the new file into
place. (We can't just keep the existing file because it is pending
deletion and will disappear when the reader finishes reading it.)

Pros and cons:

  (a) This would reduce the number of file system operations to a
minimum.  It would involve bypassing APR to avoid the FILE_SHARE_DELETE
flag, which is not ideal but possible.

  (b) This would remove one of the file system operations.  It would
involve writing a function similar to APR's fall-back implementation of
apr_file_mktemp() that exists for systems that do not provide a
"mkstemp" system API.  I'm not sure if there are any concrete problems
with doing this sort of thing in "user space".

  (c) This would remove two of the file system operations.  It sounds
straightforward.

Comments?

- Julian



RE: svn commit: r1073366 - in /subversion/trunk: notes/wc-ng/pristine-store subversion/libsvn_wc/wc-queries.sql subversion/libsvn_wc/wc_db_pristine.c

Posted by Bert Huijben <be...@qqmail.nl>.

> -----Original Message-----
> From: Julian Foad [mailto:julian.foad@wandisco.com]
> Sent: dinsdag 1 maart 2011 14:34
> To: Bert Huijben
> Cc: 'Branko Čibej'; 'Subversion Development'
> Subject: RE: svn commit: r1073366 - in /subversion/trunk: notes/wc-
> ng/pristine-store subversion/libsvn_wc/wc-queries.sql
> subversion/libsvn_wc/wc_db_pristine.c


> I'm not clear exactly what problem we would avoid by eliminating the
> "select a unique name" step of this process.  Is it what I describe
> below at the end of note [1] - that a scanner might be more likely to
> re-scan the content, and therefore more likely to cause a delay?

No, the problem I try to avoid is
* you create a file
<virus scanner opens the file to verify that it is not a virus>
* you delete the file (after the virusscanner releases the file)
* you rename a file to be at the old location

While we really need something like
* rename to a unique name.
<virusscanner ignores the file, because it was already scanned at the original location>

	Bert


RE: svn commit: r1073366 - in /subversion/trunk: notes/wc-ng/pristine-store subversion/libsvn_wc/wc-queries.sql subversion/libsvn_wc/wc_db_pristine.c

Posted by Julian Foad <ju...@wandisco.com>.
On Mon, 2011-02-28, Bert Huijben wrote:
> > -----Original Message-----
> > From: Julian Foad [mailto:julian.foad@wandisco.com]
> > 
> > On Mon, 2011-02-28, I (Julian Foad) wrote:
> > > On Sat, 2011-02-26, Branko Čibej wrote:
> > > > On 26.02.2011 20:40, Ivan Zhakov wrote:
> > > > > Btw I think it makes sense rename file to tmp directory in working
> > > > > copy instead of pristines directory, since it could be crash/failure
> > > > > between rename and delete. In this case pristines directory will
> > > > > polluted with orphaned pristines.
> > > >
> > > > That works as long as the pristine store lives in the WC root, so yes.
> > >
> > > This seems to be a good plan.  Thanks for the help.  I'll do it right
> > > away.
> > 
> > Please see attached patch.  (I might write a test or two before
> > committing it or I might commit first.)
> 
> If you just created a file in the tempdir it will be scanned by
> virusscanners while you will just want to delete it directly. (Which
> might trigger an access denied and then a wait loop)

I would hope that some (virus/indexing) scanners might avoid re-scanning
the file since all that happened was a rename [1], but yes, if a scanner
does open the file after the rename then there would be a delay.

> I think you can safely assume that the file won't be removed from the
> pristine twice at about the same time, so just using the sha1 as the
> filename should be pretty collision safe. (And the wait loop will
> catch the other cases)

Often, that would be fine.  However, the re-try loop doesn't help if the
file is being held open for a long time.

Let's say I open a graphical diff against a pristine text, and the diff
program holds the file open for reading.  Then, while that's still
displayed, I run

  svn update -rX  # removes the text
  svn update -rY  # re-adds the text
  svn update -rZ  # removes the text

The third update would delay in the re-try loop in the rename, and the
re-try loop would time-out and fail.  Although that may be an uncommon
use case, I think it is reasonable and should work.

I'm not clear exactly what problem we would avoid by eliminating the
"select a unique name" step of this process.  Is it what I describe
below at the end of note [1] - that a scanner might be more likely to
re-scan the content, and therefore more likely to cause a delay?

- Julian


[1] Actually, we don't have an atomic rename-to-a-unique-name function.
We currently use two separate steps, "Create a new file" followed by
"Overwrite the new file".  The scanner might immediately open the new
empty file and that would delay our overwriting of it, but that
shouldn't take long.  Then, I would expect the "overwrite" step to
behave like a delete followed by renaming the original file to the new
file's name, which is equivalent to the hypothetical atomic
rename-to-a-unique-name function.  A scanner might not see the rename in
that way, it might see it instead as a content-change of the new file,
and then it would scan the content, which would be a waste of time.


> Note that we might assume that Subversion opens files with
> FILE_SHARE_DELETE, but we can't assume that other programs -like
> virusscanners, file indexers, etc.- triggered by our disk i.o., do the
> same thing. [...]




RE: svn commit: r1073366 - in /subversion/trunk: notes/wc-ng/pristine-store subversion/libsvn_wc/wc-queries.sql subversion/libsvn_wc/wc_db_pristine.c

Posted by Bert Huijben <be...@qqmail.nl>.

> -----Original Message-----
> From: Julian Foad [mailto:julian.foad@wandisco.com]
> Sent: maandag 28 februari 2011 19:29
> To: Branko Čibej
> Cc: Subversion Development
> Subject: Re: svn commit: r1073366 - in /subversion/trunk: notes/wc-
> ng/pristine-store subversion/libsvn_wc/wc-queries.sql
> subversion/libsvn_wc/wc_db_pristine.c
> 
> On Mon, 2011-02-28, I (Julian Foad) wrote:
> > On Sat, 2011-02-26, Branko Čibej wrote:
> > > On 26.02.2011 20:40, Ivan Zhakov wrote:
> > > > Btw I think it makes sense rename file to tmp directory in working
> > > > copy instead of pristines directory, since it could be crash/failure
> > > > between rename and delete. In this case pristines directory will
> > > > polluted with orphaned pristines.
> > >
> > > That works as long as the pristine store lives in the WC root, so yes.
> >
> > This seems to be a good plan.  Thanks for the help.  I'll do it right
> > away.
> 
> Please see attached patch.  (I might write a test or two before
> committing it or I might commit first.)

If you just created a file in the tempdir it will be scanned by virusscanners while you will just want to delete it directly. (Which might trigger an access denied and then a wait loop)

I think you can safely assume that the file won't be removed from the pristine twice at about the same time, so just using the sha1 as the filename should be pretty collision safe. (And the wait loop will catch the other cases)


Note that we might assume that Subversion opens files with FILE_SHARE_DELETE, but we can't assume that other programs -like virusscanners, file indexers, etc.- triggered by our disk i.o., do the same thing. Especially since using this share flag has a performance penalty and allowing files to be deleted/changed while open makes it harder to test your application for correctness.

	Bert



Re: svn commit: r1073366 - in /subversion/trunk: notes/wc-ng/pristine-store subversion/libsvn_wc/wc-queries.sql subversion/libsvn_wc/wc_db_pristine.c

Posted by Julian Foad <ju...@wandisco.com>.
On Mon, 2011-02-28, I (Julian Foad) wrote:
> On Sat, 2011-02-26, Branko Čibej wrote:
> > On 26.02.2011 20:40, Ivan Zhakov wrote:
> > > Btw I think it makes sense rename file to tmp directory in working
> > > copy instead of pristines directory, since it could be crash/failure
> > > between rename and delete. In this case pristines directory will
> > > polluted with orphaned pristines.
> > 
> > That works as long as the pristine store lives in the WC root, so yes.
> 
> This seems to be a good plan.  Thanks for the help.  I'll do it right
> away.

Please see attached patch.  (I might write a test or two before
committing it or I might commit first.)

- Julian


Re: svn commit: r1073366 - in /subversion/trunk: notes/wc-ng/pristine-store subversion/libsvn_wc/wc-queries.sql subversion/libsvn_wc/wc_db_pristine.c

Posted by Julian Foad <ju...@wandisco.com>.
On Sat, 2011-02-26, Branko Čibej wrote:
> On 26.02.2011 20:40, Ivan Zhakov wrote:
> > Btw I think it makes sense rename file to tmp directory in working
> > copy instead of pristines directory, since it could be crash/failure
> > between rename and delete. In this case pristines directory will
> > polluted with orphaned pristines.
> 
> That works as long as the pristine store lives in the WC root, so yes.

This seems to be a good plan.  Thanks for the help.  I'll do it right
away.

> BTW Julian, have you considered making the pristine files read-only
> (once they're written of course)? If yes, then be aware that in order to
> delete and/or rename a file on Windows, you have to first clear the
> read-only bit.

It seems sensible to make them read-only if it's near zero cost, and I
think it is.  Our normal delete API svn_io_remove_file2() first clears
the read-only bit, although I've just noticed its doc string doesn't say
so.  I'll update it.

- Julian



Re: svn commit: r1073366 - in /subversion/trunk: notes/wc-ng/pristine-store subversion/libsvn_wc/wc-queries.sql subversion/libsvn_wc/wc_db_pristine.c

Posted by Branko Čibej <br...@e-reka.si>.
On 26.02.2011 20:40, Ivan Zhakov wrote:
> Btw I think it makes sense rename file to tmp directory in working
> copy instead of pristines directory, since it could be crash/failure
> between rename and delete. In this case pristines directory will
> polluted with orphaned pristines.

That works as long as the pristine store lives in the WC root, so yes.

-- Brane


Re: svn commit: r1073366 - in /subversion/trunk: notes/wc-ng/pristine-store subversion/libsvn_wc/wc-queries.sql subversion/libsvn_wc/wc_db_pristine.c

Posted by Ivan Zhakov <iv...@visualsvn.com>.
2011/2/26 Branko Čibej <br...@e-reka.si>:
> On 26.02.2011 10:50, Ivan Zhakov wrote:
>> 2011/2/26 Branko Čibej <br...@e-reka.si>:
>>> On 26.02.2011 07:32, Ivan Zhakov wrote:
>>>> Problem of re-installing file over marked for deletion file can be
>>>> solved using the following trick:
>>>> 1. Rename file to temporary name.
>>>> 2. Delete it
>>> (If the proper share bits are set.)
>>>
>> Sure.
>
> It turns out that the FILE_SHARE_DELETE bit is the one that allows renames.
>
Yes, you're right.

>>> Yes, that'd work, but if there's a way to unmark the deletion bit,
>>> that's even better, since then you'd not even have to create another
>>> file (with identical contents).
>>>
>> I'm not aware how to unmark deletion bit on Windows.
>
> You're right, there's no way to do it from user space. I suppose then
> that the easiest thing to do on Windows would be this: when a pristine
> needs to be deleted, instead of just deleting it, rename it to some
> unique random name within the same directory, and delete that. This way,
> you don't need extra logic for the "reinstate" step, nor do you have to
> ever check if there are open handles to the file. Rename (clearing the
> way for reinstate) and delete (the file goes away when all handles are
> closed) and apr_file_open already sets all the required share bits.
>
That's exactly what I was suggesting.

Btw I think it makes sense rename file to tmp directory in working
copy instead of pristines directory, since it could be crash/failure
between rename and delete. In this case pristines directory will
polluted with orphaned pristines.

> BTW Julian, have you considered making the pristine files read-only
> (once they're written of course)? If yes, then be aware that in order to
> delete and/or rename a file on Windows, you have to first clear the
> read-only bit. I think we have utility functions for that in
> libsvn_subr, but it's worth a check.
>
Makes sense for me.

-- 
Ivan Zhakov
VisualSVN Team

Re: svn commit: r1073366 - in /subversion/trunk: notes/wc-ng/pristine-store subversion/libsvn_wc/wc-queries.sql subversion/libsvn_wc/wc_db_pristine.c

Posted by Branko Čibej <br...@e-reka.si>.
On 26.02.2011 10:50, Ivan Zhakov wrote:
> 2011/2/26 Branko Čibej <br...@e-reka.si>:
>> On 26.02.2011 07:32, Ivan Zhakov wrote:
>>> Problem of re-installing file over marked for deletion file can be
>>> solved using the following trick:
>>> 1. Rename file to temporary name.
>>> 2. Delete it
>> (If the proper share bits are set.)
>>
> Sure.

It turns out that the FILE_SHARE_DELETE bit is the one that allows renames.

>> Yes, that'd work, but if there's a way to unmark the deletion bit,
>> that's even better, since then you'd not even have to create another
>> file (with identical contents).
>>
> I'm not aware how to unmark deletion bit on Windows.

You're right, there's no way to do it from user space. I suppose then
that the easiest thing to do on Windows would be this: when a pristine
needs to be deleted, instead of just deleting it, rename it to some
unique random name within the same directory, and delete that. This way,
you don't need extra logic for the "reinstate" step, nor do you have to
ever check if there are open handles to the file. Rename (clearing the
way for reinstate) and delete (the file goes away when all handles are
closed) and apr_file_open already sets all the required share bits.

BTW Julian, have you considered making the pristine files read-only
(once they're written of course)? If yes, then be aware that in order to
delete and/or rename a file on Windows, you have to first clear the
read-only bit. I think we have utility functions for that in
libsvn_subr, but it's worth a check.

-- Brane

Re: svn commit: r1073366 - in /subversion/trunk: notes/wc-ng/pristine-store subversion/libsvn_wc/wc-queries.sql subversion/libsvn_wc/wc_db_pristine.c

Posted by Ivan Zhakov <iv...@visualsvn.com>.
2011/2/26 Branko Čibej <br...@e-reka.si>:
> On 26.02.2011 07:32, Ivan Zhakov wrote:
>> 2011/2/26 Branko Čibej <br...@e-reka.si>:
>>> On 25.02.2011 16:53, Julian Foad wrote:
>>>> On Thu, 2011-02-24, Branko Čibej wrote:
>>>>> On 24.02.2011 18:03, Julian Foad wrote:
>>>>>> On Wed, 2011-02-23, Daniel Shahaf wrote:
>>>>>>> julianfoad@apache.org wrote on Tue, Feb 22, 2011 at 15:38:35 -0000:
>> [...]
>>
>>>>>   It is not the business of the wc_db+pristine-store to track
>>>>> every process that happens to have an open handle to the pristine file.
>>>>> A deletion of the pristine file should succeed even if there are open
>>>>> handles referring to it.
>>>> So you're suggesting we should promise that a reader can continue
>>>> reading the file (at least once through to the end, not sure about
>>>> rewinding) even if something else deletes the file from the store part
>>>> way through.  I think you're suggesting those semantics are more
>>>> reasonable than "you have to hold some sort of lock while you read it",
>>>> which is what my design boiled down to.
>>> Yes, indeed, they're far more reasonable because the OS already gives
>>> them to you. On Unix, when you delete a file, it vanishes from the
>>> directory; but open handles remain valid, and the backing store of the
>>> data still exists. The file only really goes away when the last handle
>>> is closed.
>>>
>>> On Windows, the situation is pretty much the same (assuming
>>> FILE_SHARE_DELETE which we've already determined APR always does --
>>> guess why :), *except* that the file only vanishes from the directory
>>> after it's been deleted once the last handle to it is closed, that's why
>>> I mentioned the tricky part of re-instating the file.
>>>
>> [..]
>>>> I guess I'll have to figure out how to implement this "trifle more
>>>> involved" part on Windows, now.
>>> Lucky you, the name of the file is the digest of its contents, so in
>>> order to reinstate the file on Windows you only have get the system to
>>> twiddle it's "deleted" bit. "Only." I seem to recall that's not even
>>> hard to do, but my last battle with Windows filesystem internals was
>>> more than 10 years ago. If you can't find relevant docs, you could try
>>> asking APR for that functionality. I'm sure Will Rowe will give you a
>>> dozen reasons why doing that is not a good idea, and also explain how to
>>> do it. :)
>>>
>> Problem of re-installing file over marked for deletion file can be
>> solved using the following trick:
>> 1. Rename file to temporary name.
>> 2. Delete it
>
> (If the proper share bits are set.)
>
Sure.

> Yes, that'd work, but if there's a way to unmark the deletion bit,
> that's even better, since then you'd not even have to create another
> file (with identical contents).
>
I'm not aware how to unmark deletion bit on Windows.

-- 
Ivan Zhakov

Re: svn commit: r1073366 - in /subversion/trunk: notes/wc-ng/pristine-store subversion/libsvn_wc/wc-queries.sql subversion/libsvn_wc/wc_db_pristine.c

Posted by Branko Čibej <br...@e-reka.si>.
On 26.02.2011 07:32, Ivan Zhakov wrote:
> 2011/2/26 Branko Čibej <br...@e-reka.si>:
>> On 25.02.2011 16:53, Julian Foad wrote:
>>> On Thu, 2011-02-24, Branko Čibej wrote:
>>>> On 24.02.2011 18:03, Julian Foad wrote:
>>>>> On Wed, 2011-02-23, Daniel Shahaf wrote:
>>>>>> julianfoad@apache.org wrote on Tue, Feb 22, 2011 at 15:38:35 -0000:
> [...]
>
>>>>   It is not the business of the wc_db+pristine-store to track
>>>> every process that happens to have an open handle to the pristine file.
>>>> A deletion of the pristine file should succeed even if there are open
>>>> handles referring to it.
>>> So you're suggesting we should promise that a reader can continue
>>> reading the file (at least once through to the end, not sure about
>>> rewinding) even if something else deletes the file from the store part
>>> way through.  I think you're suggesting those semantics are more
>>> reasonable than "you have to hold some sort of lock while you read it",
>>> which is what my design boiled down to.
>> Yes, indeed, they're far more reasonable because the OS already gives
>> them to you. On Unix, when you delete a file, it vanishes from the
>> directory; but open handles remain valid, and the backing store of the
>> data still exists. The file only really goes away when the last handle
>> is closed.
>>
>> On Windows, the situation is pretty much the same (assuming
>> FILE_SHARE_DELETE which we've already determined APR always does --
>> guess why :), *except* that the file only vanishes from the directory
>> after it's been deleted once the last handle to it is closed, that's why
>> I mentioned the tricky part of re-instating the file.
>>
> [..]
>>> I guess I'll have to figure out how to implement this "trifle more
>>> involved" part on Windows, now.
>> Lucky you, the name of the file is the digest of its contents, so in
>> order to reinstate the file on Windows you only have get the system to
>> twiddle it's "deleted" bit. "Only." I seem to recall that's not even
>> hard to do, but my last battle with Windows filesystem internals was
>> more than 10 years ago. If you can't find relevant docs, you could try
>> asking APR for that functionality. I'm sure Will Rowe will give you a
>> dozen reasons why doing that is not a good idea, and also explain how to
>> do it. :)
>>
> Problem of re-installing file over marked for deletion file can be
> solved using the following trick:
> 1. Rename file to temporary name.
> 2. Delete it

(If the proper share bits are set.)

Yes, that'd work, but if there's a way to unmark the deletion bit,
that's even better, since then you'd not even have to create another
file (with identical contents).

-- Brane

Re: svn commit: r1073366 - in /subversion/trunk: notes/wc-ng/pristine-store subversion/libsvn_wc/wc-queries.sql subversion/libsvn_wc/wc_db_pristine.c

Posted by Ivan Zhakov <iv...@visualsvn.com>.
2011/2/26 Branko Čibej <br...@e-reka.si>:
> On 25.02.2011 16:53, Julian Foad wrote:
>> On Thu, 2011-02-24, Branko Čibej wrote:
>>> On 24.02.2011 18:03, Julian Foad wrote:
>>>> On Wed, 2011-02-23, Daniel Shahaf wrote:
>>>>> julianfoad@apache.org wrote on Tue, Feb 22, 2011 at 15:38:35 -0000:
[...]

>>>   It is not the business of the wc_db+pristine-store to track
>>> every process that happens to have an open handle to the pristine file.
>>> A deletion of the pristine file should succeed even if there are open
>>> handles referring to it.
>> So you're suggesting we should promise that a reader can continue
>> reading the file (at least once through to the end, not sure about
>> rewinding) even if something else deletes the file from the store part
>> way through.  I think you're suggesting those semantics are more
>> reasonable than "you have to hold some sort of lock while you read it",
>> which is what my design boiled down to.
>
> Yes, indeed, they're far more reasonable because the OS already gives
> them to you. On Unix, when you delete a file, it vanishes from the
> directory; but open handles remain valid, and the backing store of the
> data still exists. The file only really goes away when the last handle
> is closed.
>
> On Windows, the situation is pretty much the same (assuming
> FILE_SHARE_DELETE which we've already determined APR always does --
> guess why :), *except* that the file only vanishes from the directory
> after it's been deleted once the last handle to it is closed, that's why
> I mentioned the tricky part of re-instating the file.
>
[..]
>> I guess I'll have to figure out how to implement this "trifle more
>> involved" part on Windows, now.
>
> Lucky you, the name of the file is the digest of its contents, so in
> order to reinstate the file on Windows you only have get the system to
> twiddle it's "deleted" bit. "Only." I seem to recall that's not even
> hard to do, but my last battle with Windows filesystem internals was
> more than 10 years ago. If you can't find relevant docs, you could try
> asking APR for that functionality. I'm sure Will Rowe will give you a
> dozen reasons why doing that is not a good idea, and also explain how to
> do it. :)
>
Problem of re-installing file over marked for deletion file can be
solved using the following trick:
1. Rename file to temporary name.
2. Delete it


-- 
Ivan Zhakov
VisualSVN Team

Re: svn commit: r1073366 - in /subversion/trunk: notes/wc-ng/pristine-store subversion/libsvn_wc/wc-queries.sql subversion/libsvn_wc/wc_db_pristine.c

Posted by Branko Čibej <br...@e-reka.si>.
On 25.02.2011 16:53, Julian Foad wrote:
> On Thu, 2011-02-24, Branko Čibej wrote:
>> On 24.02.2011 18:03, Julian Foad wrote:
>>> On Wed, 2011-02-23, Daniel Shahaf wrote:
>>>> julianfoad@apache.org wrote on Tue, Feb 22, 2011 at 15:38:35 -0000:
>>>>> +/* Set (*BATON->contents) to a readable stream from which the pristine text
>>>>> + * identified by BATON->sha1_checksum can be read from the pristine store of
>>>>> + * SDB.  If that text is not in the pristine store, return an error.
>>>>> + *
>>>>> + * Allocate the stream in BATON->result_pool.
>>>>> + *
>>>>> + * This function expects to be executed inside a SQLite txn.
>>>>> + *
>>>>> + * Implements 'notes/wc-ng/pristine-store' section A-3(d).
>>>>> + * Implements svn_sqlite__transaction_callback_t. */
>>>>> +static svn_error_t *
>>>>> +pristine_read_txn(void *baton,
>>>>> +                  svn_sqlite__db_t *sdb,
>>>>> +                  apr_pool_t *scratch_pool)
>>>>> +{
>>>> Should document somewhere that the returned stream is only guaranteed to
>>>> remain valid if the caller holds a wc lock, etc.?
>>>>
>>>> (svn_wc__db_pristine_read()'s docs don't mention locks)
>>> For now, I've written (on both functions):
>>>
>>>  The stream will remain readable as long as the pristine text remains in
>>>  the store; if the pristine text is removed from the store, any subsequent
>>>  read or other operation on the stream may return an error.
> Hi Brane.  Thanks for helping me out here.  I'm *very* glad of your
> attention.
>
> A summary of this reply: I think I get you and agree your way is better.
>
> But taking one piece at a time...
>
>> Wait wait wait, I missed something there... You want to make *reads*
>> from the (transactionally opened) stream to error out if someone deletes
>> the pristine file? Woah. That implies a stat for every read, at least in
>> Unix-like environments, and frankly I can't see any good reason to do that.
> ... you did miss something: I said it *may* return an error.  That is:
> this API does not promise that you can continue to read from the file
> after removing it from the pristine store, but nor does it promise that
> any such attempt shall fail (immediately or at all).
>
> The idea was that the caller should ensure the pristine file won't be
> deleted while it's reading it.  (One way the caller can do so is to hold
> a WC lock on a WC subtree that contains a reference to that pristine.
> We could define other ways.)
>
> Does that make more sense?

Mm... frankly, this looks just a bit over-engineered to me. The usage
then goes like this:

    * So, I want to read this pristine, let's call that specialized
      "open" function in libsvn_wc that does all the magic for us ...
    * oops, now I have a file handle, but I can't safely read from it
      without first taking out a WC lock ...
    * ah, but I'd have been better off taking the WC lock first ... but
      then, why did I have to call that special magic "open" function in
      the first place ...

etc. :)

It's much easier to do open ... read ... forget.

>   I thought that was the kind of non-guarantee
> (although not the specific non-guarantee) you were advocating when you
> wrote before:
>
>>     * Impose constraints on what users of the file handle can expect,
>>       for example:
>>           o you are not allowed write access through the file (pristine
>>             file creation is a magical step)
>>           o you cannot assume that you will find the file that the
>>             handle refers to in any directory listing
>>           o you cannot assume that the file will continue to exist after
>>             the handle is closed
> [...]
>>   It is not the business of the wc_db+pristine-store to track
>> every process that happens to have an open handle to the pristine file.
>> A deletion of the pristine file should succeed even if there are open
>> handles referring to it.
> So you're suggesting we should promise that a reader can continue
> reading the file (at least once through to the end, not sure about
> rewinding) even if something else deletes the file from the store part
> way through.  I think you're suggesting those semantics are more
> reasonable than "you have to hold some sort of lock while you read it",
> which is what my design boiled down to.

Yes, indeed, they're far more reasonable because the OS already gives
them to you. On Unix, when you delete a file, it vanishes from the
directory; but open handles remain valid, and the backing store of the
data still exists. The file only really goes away when the last handle
is closed.

On Windows, the situation is pretty much the same (assuming
FILE_SHARE_DELETE which we've already determined APR always does --
guess why :), *except* that the file only vanishes from the directory
after it's been deleted once the last handle to it is closed, that's why
I mentioned the tricky part of re-instating the file.

In other words, there's really nothing extra that either the user of the
file *or* the pristine store needs to do, except for guaranteeing that
the file is opened atomically from the point of view of the
wc-db+pristine-store.

[...]

> I guess I'll have to figure out how to implement this "trifle more
> involved" part on Windows, now.

Lucky you, the name of the file is the digest of its contents, so in
order to reinstate the file on Windows you only have get the system to
twiddle it's "deleted" bit. "Only." I seem to recall that's not even
hard to do, but my last battle with Windows filesystem internals was
more than 10 years ago. If you can't find relevant docs, you could try
asking APR for that functionality. I'm sure Will Rowe will give you a
dozen reasons why doing that is not a good idea, and also explain how to
do it. :)

-- Brane


Re: svn commit: r1073366 - in /subversion/trunk: notes/wc-ng/pristine-store subversion/libsvn_wc/wc-queries.sql subversion/libsvn_wc/wc_db_pristine.c

Posted by Julian Foad <ju...@wandisco.com>.
On Thu, 2011-02-24, Branko Čibej wrote:
> On 24.02.2011 18:03, Julian Foad wrote:
> > On Wed, 2011-02-23, Daniel Shahaf wrote:
> >> julianfoad@apache.org wrote on Tue, Feb 22, 2011 at 15:38:35 -0000:
> >>> +/* Set (*BATON->contents) to a readable stream from which the pristine text
> >>> + * identified by BATON->sha1_checksum can be read from the pristine store of
> >>> + * SDB.  If that text is not in the pristine store, return an error.
> >>> + *
> >>> + * Allocate the stream in BATON->result_pool.
> >>> + *
> >>> + * This function expects to be executed inside a SQLite txn.
> >>> + *
> >>> + * Implements 'notes/wc-ng/pristine-store' section A-3(d).
> >>> + * Implements svn_sqlite__transaction_callback_t. */
> >>> +static svn_error_t *
> >>> +pristine_read_txn(void *baton,
> >>> +                  svn_sqlite__db_t *sdb,
> >>> +                  apr_pool_t *scratch_pool)
> >>> +{
> >> Should document somewhere that the returned stream is only guaranteed to
> >> remain valid if the caller holds a wc lock, etc.?
> >>
> >> (svn_wc__db_pristine_read()'s docs don't mention locks)
> > For now, I've written (on both functions):
> >
> >  The stream will remain readable as long as the pristine text remains in
> >  the store; if the pristine text is removed from the store, any subsequent
> >  read or other operation on the stream may return an error.

Hi Brane.  Thanks for helping me out here.  I'm *very* glad of your
attention.

A summary of this reply: I think I get you and agree your way is better.

But taking one piece at a time...

> Wait wait wait, I missed something there... You want to make *reads*
> from the (transactionally opened) stream to error out if someone deletes
> the pristine file? Woah. That implies a stat for every read, at least in
> Unix-like environments, and frankly I can't see any good reason to do that.

... you did miss something: I said it *may* return an error.  That is:
this API does not promise that you can continue to read from the file
after removing it from the pristine store, but nor does it promise that
any such attempt shall fail (immediately or at all).

The idea was that the caller should ensure the pristine file won't be
deleted while it's reading it.  (One way the caller can do so is to hold
a WC lock on a WC subtree that contains a reference to that pristine.
We could define other ways.)

Does that make more sense?  I thought that was the kind of non-guarantee
(although not the specific non-guarantee) you were advocating when you
wrote before:

>     * Impose constraints on what users of the file handle can expect,
>       for example:
>           o you are not allowed write access through the file (pristine
>             file creation is a magical step)
>           o you cannot assume that you will find the file that the
>             handle refers to in any directory listing
>           o you cannot assume that the file will continue to exist after
>             the handle is closed

[...]
>   It is not the business of the wc_db+pristine-store to track
> every process that happens to have an open handle to the pristine file.
> A deletion of the pristine file should succeed even if there are open
> handles referring to it.

So you're suggesting we should promise that a reader can continue
reading the file (at least once through to the end, not sure about
rewinding) even if something else deletes the file from the store part
way through.  I think you're suggesting those semantics are more
reasonable than "you have to hold some sort of lock while you read it",
which is what my design boiled down to.

OK, I am happy to take that advice.  In the design I have, the reader
must hold a WC lock to be sure that the pristine text won't be deleted
from the store.  But a WC lock is a write lock.  That sounds
undesirable, now I come to state it!

>  That's a trivial task on Unix (i.e., "do
> nothing" ... modulo NFS most likely) and a trifle more involved on
> Windows (where the trifle lies in doing the right thing if we need to
> reinstate a file with the same checksum while there are open handles to it).

I guess I'll have to figure out how to implement this "trifle more
involved" part on Windows, now.

> All it takes to make this behaviour valid is to state in the docs that
> there's absolutely no guarantee that the pristine file that you've just
> opened will still represent anything in the working copy by the time you
> manage to read from it, or indeed that it will even continue to exist if
> you close it.

Those conditions are pretty much inevitable, but yes, better to state
them.

- Julian



Re: svn commit: r1073366 - in /subversion/trunk: notes/wc-ng/pristine-store subversion/libsvn_wc/wc-queries.sql subversion/libsvn_wc/wc_db_pristine.c

Posted by Branko Čibej <br...@e-reka.si>.
On 24.02.2011 18:03, Julian Foad wrote:
> On Wed, 2011-02-23, Daniel Shahaf wrote:
>> julianfoad@apache.org wrote on Tue, Feb 22, 2011 at 15:38:35 -0000:
>>> Modified: subversion/trunk/subversion/libsvn_wc/wc_db_pristine.c
>>> URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_wc/wc_db_pristine.c?rev=1073366&r1=1073365&r2=1073366&view=diff
>>> ==============================================================================
>>> --- subversion/trunk/subversion/libsvn_wc/wc_db_pristine.c (original)
>>> +++ subversion/trunk/subversion/libsvn_wc/wc_db_pristine.c Tue Feb 22 15:38:35 2011
>>> @@ -163,6 +165,57 @@ svn_wc__db_pristine_get_future_path(cons
>>> +/* Set (*BATON->contents) to a readable stream from which the pristine text
>>> + * identified by BATON->sha1_checksum can be read from the pristine store of
>>> + * SDB.  If that text is not in the pristine store, return an error.
>>> + *
>>> + * Allocate the stream in BATON->result_pool.
>>> + *
>>> + * This function expects to be executed inside a SQLite txn.
>>> + *
>>> + * Implements 'notes/wc-ng/pristine-store' section A-3(d).
>>> + * Implements svn_sqlite__transaction_callback_t. */
>>> +static svn_error_t *
>>> +pristine_read_txn(void *baton,
>>> +                  svn_sqlite__db_t *sdb,
>>> +                  apr_pool_t *scratch_pool)
>>> +{
>> Should document somewhere that the returned stream is only guaranteed to
>> remain valid if the caller holds a wc lock, etc.?
>>
>> (svn_wc__db_pristine_read()'s docs don't mention locks)
> For now, I've written (on both functions):
>
>  The stream will remain readable as long as the pristine text remains in
>  the store; if the pristine text is removed from the store, any subsequent
>  read or other operation on the stream may return an error.

Wait wait wait, I missed something there... You want to make *reads*
from the (transactionally opened) stream to error out if someone deletes
the pristine file? Woah. That implies a stat for every read, at least in
Unix-like environments, and frankly I can't see any good reason to do that.

>> Obviously there's still a bit of work here (eg, that "Delayed delete"
>> flag on Windows) and it can't all be done in one commit.

Why can't it be all done in one commit? All you need is to set
FILE_SHARE_READ and FILE_SHARE_DELETE flags in the CreateFile call ...
and surprise surprise, APR already does that by default (except on Win95
of course, but I couldn't care less about that). You don't need any
"delayed delete" flag, which does exist but is meant for creating
temporary files.

> I'm not sure we need that.

Yes we do. It is not the business of the wc_db+pristine-store to track
every process that happens to have an open handle to the pristine file.
A deletion of the pristine file should succeed even if there are open
handles referring to it. That's a trivial task on Unix (i.e., "do
nothing" ... modulo NFS most likely) and a trifle more involved on
Windows (where the trifle lies in doing the right thing if we need to
reinstate a file with the same checksum while there are open handles to it).

All it takes to make this behaviour valid is to state in the docs that
there's absolutely no guarantee that the pristine file that you've just
opened will still represent anything in the working copy by the time you
manage to read from it, or indeed that it will even continue to exist if
you close it.

-- Brane

Re: svn commit: r1073366 - in /subversion/trunk: notes/wc-ng/pristine-store subversion/libsvn_wc/wc-queries.sql subversion/libsvn_wc/wc_db_pristine.c

Posted by Daniel Shahaf <da...@elego.de>.
Julian Foad wrote on Thu, Feb 24, 2011 at 17:03:21 +0000:
> On Wed, 2011-02-23, Daniel Shahaf wrote:
> > julianfoad@apache.org wrote on Tue, Feb 22, 2011 at 15:38:35 -0000:
> > > +  return SVN_NO_ERROR;
> > > +}
> > > +
> > >  svn_error_t *
> > >  svn_wc__db_pristine_read(svn_stream_t **contents,
> > >                           svn_wc__db_t *db,
> > > @@ -173,7 +226,7 @@ svn_wc__db_pristine_read(svn_stream_t **
> > >  {
> > >    svn_wc__db_pdh_t *pdh;
> > >    const char *local_relpath;
> > > -  const char *pristine_abspath;
> > > +  pristine_read_baton_t b;
> > ...
> > > +pristine_install_txn(void *baton,
> > > +                     svn_sqlite__db_t *sdb,
> > > +                     apr_pool_t *scratch_pool)
> > > +{
> > ...
> > > +  /* If this pristine text is already present in the store, just keep it:
> > > +   * delete the new one and return. */
> > > +  SVN_ERR(svn_sqlite__get_statement(&stmt, sdb, STMT_SELECT_PRISTINE));
> > > +  SVN_ERR(svn_sqlite__bind_checksum(stmt, 1, b->sha1_checksum, scratch_pool));
> > > +  SVN_ERR(svn_sqlite__step(&have_row, stmt));
> > > +  SVN_ERR(svn_sqlite__reset(stmt));
> > > +  if (have_row)
> > > +    {
> > > +      /* Remove the temp file: it's already there */
> > > +      /* ### TODO: Maybe verify the new file matches the existing one. */
> > > +      SVN_ERR(svn_io_remove_file2(b->tempfile_abspath, FALSE, scratch_pool));
> > > +      return SVN_NO_ERROR;
> > 
> > Perhaps check (assert?) that b->PRISTINE_ABSPATH exists?
> 
> I think what I want to do here is, only if SVN_DEBUG is defined, check
> that both the old and new files exist and that they are "the same" in
> some loose sense.  For example:
> 
> #ifdef SVN_DEBUG
> /* Consistency checks.  Verify both files exist and match. */
> {

Okay.  (That's not much more expensive, and might catch some bugs.)

>   apr_finfo_t finfo1, finfo2;
>   SVN_ERR(svn_io_stat(&finfo1, b->tempfile_abspath, APR_FINFO_SIZE,
>                       scratch_pool));
>   SVN_ERR(svn_io_stat(&finfo2, b->pristine_abspath, APR_FINFO_SIZE,
>                       scratch_pool));
>   if (finfo1->size != finfo2->size)
>     return svn_error_createf(SVN_ERR_WC_CORRUPT_TEXT_BASE, NULL,
>                              _("New pristine doesn't match existing "
>                                "pristine with same checksum '%s'"),
>                              svn_checksum_to_ctring_display(
>                                b->sha1_checksum, scratch_pool));
> }
> #endif
> 
> 
> > (which reduces to dropping the "if (have_row)" check)
> 
> This 'if' block deals with the valid condition where the pristine text
> that we're adding has already been added.  I want to deal with that case
> neatly.  If I were to drop this block then in this case we would
> overwrite the DB row, which would reset its ref count.
> 

Right, my bad: I didn't mean to suggest overwriting the DB row, just to
re-use the IO logic just after the end of the if().

> > > +    }
> > > +
> > > +  /* Move the file to its target location, or discard it if already there. */
> > > +  SVN_ERR(svn_io_check_path(b->pristine_abspath, &kind, scratch_pool));
> 
> Instead, I will simplify this part.  This extra 'stat' and the following
> if-then-else block are not really important, as they only deal with the
> "orphan file" case.  It doesn't matter if we overwrite an orphan file,
> so we can unconditionally do the "rename".

> > > +pristine_remove_if_unreferenced_txn(void *baton,
> > > +                                    svn_sqlite__db_t *sdb,
> > > +                                    apr_pool_t *scratch_pool)
> > >  {
> > ...
> > > +  /* If we removed the DB row, then remove the file. */
> > >    if (affected_rows > 0)
> > >      {
> > > -      /* Remove the file. */
> > > -      SVN_ERR(get_pristine_fname(&pristine_abspath, wcroot->abspath,
> > > -                                 sha1_checksum, TRUE /* create_subdir */,
> > > -                                 scratch_pool, scratch_pool));
> > > -      SVN_ERR(svn_io_remove_file2(pristine_abspath, TRUE /* ignore_enoent */,
> > > +      /* ### TODO: If file not present, log a consistency error but return
> > > +       * success. */
> > > +      SVN_ERR(svn_io_remove_file2(b->pristine_abspath, FALSE /* ignore_enoent */,
> > >                                    scratch_pool));
> > 
> > Shouldn't it be ignore_enoent=TRUE at least in release builds?
> 
> Yes.  Thanks.  I'll do it like this:
> 
> #ifdef SVN_DEBUG
>   /* Consistency check: If file not present, log an error. */
>   SVN_ERR(svn_io_remove_file2(b->pristine_abspath,
>                               FALSE /* ignore_enoent */,
>                               scratch_pool));
> #else
>   /* In release mode, if file not present, ignore and return success. */
>   SVN_ERR(svn_io_remove_file2(b->pristine_abspath,
>                               TRUE /* ignore_enoent */,
>                               scratch_pool));
> #endif
> 

+1

Re: svn commit: r1073366 - in /subversion/trunk: notes/wc-ng/pristine-store subversion/libsvn_wc/wc-queries.sql subversion/libsvn_wc/wc_db_pristine.c

Posted by Julian Foad <ju...@wandisco.com>.
On Wed, 2011-02-23, Daniel Shahaf wrote:
> julianfoad@apache.org wrote on Tue, Feb 22, 2011 at 15:38:35 -0000:
> > Modified: subversion/trunk/subversion/libsvn_wc/wc_db_pristine.c
> > URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_wc/wc_db_pristine.c?rev=1073366&r1=1073365&r2=1073366&view=diff
> > ==============================================================================
> > --- subversion/trunk/subversion/libsvn_wc/wc_db_pristine.c (original)
> > +++ subversion/trunk/subversion/libsvn_wc/wc_db_pristine.c Tue Feb 22 15:38:35 2011
> > @@ -163,6 +165,57 @@ svn_wc__db_pristine_get_future_path(cons
> > +/* Set (*BATON->contents) to a readable stream from which the pristine text
> > + * identified by BATON->sha1_checksum can be read from the pristine store of
> > + * SDB.  If that text is not in the pristine store, return an error.
> > + *
> > + * Allocate the stream in BATON->result_pool.
> > + *
> > + * This function expects to be executed inside a SQLite txn.
> > + *
> > + * Implements 'notes/wc-ng/pristine-store' section A-3(d).
> > + * Implements svn_sqlite__transaction_callback_t. */
> > +static svn_error_t *
> > +pristine_read_txn(void *baton,
> > +                  svn_sqlite__db_t *sdb,
> > +                  apr_pool_t *scratch_pool)
> > +{
> 
> Should document somewhere that the returned stream is only guaranteed to
> remain valid if the caller holds a wc lock, etc.?
> 
> (svn_wc__db_pristine_read()'s docs don't mention locks)

For now, I've written (on both functions):

 The stream will remain readable as long as the pristine text remains in
 the store; if the pristine text is removed from the store, any subsequent
 read or other operation on the stream may return an error.

> Obviously there's still a bit of work here (eg, that "Delayed delete"
> flag on Windows) and it can't all be done in one commit.

I'm not sure we need that.

> > +  return SVN_NO_ERROR;
> > +}
> > +
> >  svn_error_t *
> >  svn_wc__db_pristine_read(svn_stream_t **contents,
> >                           svn_wc__db_t *db,
> > @@ -173,7 +226,7 @@ svn_wc__db_pristine_read(svn_stream_t **
> >  {
> >    svn_wc__db_pdh_t *pdh;
> >    const char *local_relpath;
> > -  const char *pristine_abspath;
> > +  pristine_read_baton_t b;
> ...
> > +pristine_install_txn(void *baton,
> > +                     svn_sqlite__db_t *sdb,
> > +                     apr_pool_t *scratch_pool)
> > +{
> ...
> > +  /* If this pristine text is already present in the store, just keep it:
> > +   * delete the new one and return. */
> > +  SVN_ERR(svn_sqlite__get_statement(&stmt, sdb, STMT_SELECT_PRISTINE));
> > +  SVN_ERR(svn_sqlite__bind_checksum(stmt, 1, b->sha1_checksum, scratch_pool));
> > +  SVN_ERR(svn_sqlite__step(&have_row, stmt));
> > +  SVN_ERR(svn_sqlite__reset(stmt));
> > +  if (have_row)
> > +    {
> > +      /* Remove the temp file: it's already there */
> > +      /* ### TODO: Maybe verify the new file matches the existing one. */
> > +      SVN_ERR(svn_io_remove_file2(b->tempfile_abspath, FALSE, scratch_pool));
> > +      return SVN_NO_ERROR;
> 
> Perhaps check (assert?) that b->PRISTINE_ABSPATH exists?

I think what I want to do here is, only if SVN_DEBUG is defined, check
that both the old and new files exist and that they are "the same" in
some loose sense.  For example:

#ifdef SVN_DEBUG
/* Consistency checks.  Verify both files exist and match. */
{
  apr_finfo_t finfo1, finfo2;
  SVN_ERR(svn_io_stat(&finfo1, b->tempfile_abspath, APR_FINFO_SIZE,
                      scratch_pool));
  SVN_ERR(svn_io_stat(&finfo2, b->pristine_abspath, APR_FINFO_SIZE,
                      scratch_pool));
  if (finfo1->size != finfo2->size)
    return svn_error_createf(SVN_ERR_WC_CORRUPT_TEXT_BASE, NULL,
                             _("New pristine doesn't match existing "
                               "pristine with same checksum '%s'"),
                             svn_checksum_to_ctring_display(
                               b->sha1_checksum, scratch_pool));
}
#endif


> (which reduces to dropping the "if (have_row)" check)

This 'if' block deals with the valid condition where the pristine text
that we're adding has already been added.  I want to deal with that case
neatly.  If I were to drop this block then in this case we would
overwrite the DB row, which would reset its ref count.

> > +    }
> > +
> > +  /* Move the file to its target location, or discard it if already there. */
> > +  SVN_ERR(svn_io_check_path(b->pristine_abspath, &kind, scratch_pool));

Instead, I will simplify this part.  This extra 'stat' and the following
if-then-else block are not really important, as they only deal with the
"orphan file" case.  It doesn't matter if we overwrite an orphan file,
so we can unconditionally do the "rename".

> > +  if (kind == svn_node_file)
> > +    {
> > +      /* Remove the temp file: it's already there */
> > +      /* ### TODO: Maybe verify the new file matches the existing one. */
> > +      SVN_ERR(svn_io_remove_file2(b->tempfile_abspath, FALSE, scratch_pool));
> > +    }
> > +  else
> > +    {
> > +      SVN_ERR(svn_io_file_rename(b->tempfile_abspath, b->pristine_abspath,
> > +                                 scratch_pool));
> > +    }
> ...
> > +}
> 
> > +pristine_remove_if_unreferenced_txn(void *baton,
> > +                                    svn_sqlite__db_t *sdb,
> > +                                    apr_pool_t *scratch_pool)
> >  {
> ...
> > +  /* If we removed the DB row, then remove the file. */
> >    if (affected_rows > 0)
> >      {
> > -      /* Remove the file. */
> > -      SVN_ERR(get_pristine_fname(&pristine_abspath, wcroot->abspath,
> > -                                 sha1_checksum, TRUE /* create_subdir */,
> > -                                 scratch_pool, scratch_pool));
> > -      SVN_ERR(svn_io_remove_file2(pristine_abspath, TRUE /* ignore_enoent */,
> > +      /* ### TODO: If file not present, log a consistency error but return
> > +       * success. */
> > +      SVN_ERR(svn_io_remove_file2(b->pristine_abspath, FALSE /* ignore_enoent */,
> >                                    scratch_pool));
> 
> Shouldn't it be ignore_enoent=TRUE at least in release builds?

Yes.  Thanks.  I'll do it like this:

#ifdef SVN_DEBUG
  /* Consistency check: If file not present, log an error. */
  SVN_ERR(svn_io_remove_file2(b->pristine_abspath,
                              FALSE /* ignore_enoent */,
                              scratch_pool));
#else
  /* In release mode, if file not present, ignore and return success. */
  SVN_ERR(svn_io_remove_file2(b->pristine_abspath,
                              TRUE /* ignore_enoent */,
                              scratch_pool));
#endif


> >      }
> >  
> >    return SVN_NO_ERROR;
> >  }



Re: svn commit: r1073366 - in /subversion/trunk: notes/wc-ng/pristine-store subversion/libsvn_wc/wc-queries.sql subversion/libsvn_wc/wc_db_pristine.c

Posted by Daniel Shahaf <d....@daniel.shahaf.name>.
julianfoad@apache.org wrote on Tue, Feb 22, 2011 at 15:38:35 -0000:
> Modified: subversion/trunk/subversion/libsvn_wc/wc_db_pristine.c
> URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_wc/wc_db_pristine.c?rev=1073366&r1=1073365&r2=1073366&view=diff
> ==============================================================================
> --- subversion/trunk/subversion/libsvn_wc/wc_db_pristine.c (original)
> +++ subversion/trunk/subversion/libsvn_wc/wc_db_pristine.c Tue Feb 22 15:38:35 2011
> @@ -163,6 +165,57 @@ svn_wc__db_pristine_get_future_path(cons
> +/* Set (*BATON->contents) to a readable stream from which the pristine text
> + * identified by BATON->sha1_checksum can be read from the pristine store of
> + * SDB.  If that text is not in the pristine store, return an error.
> + *
> + * Allocate the stream in BATON->result_pool.
> + *
> + * This function expects to be executed inside a SQLite txn.
> + *
> + * Implements 'notes/wc-ng/pristine-store' section A-3(d).
> + * Implements svn_sqlite__transaction_callback_t. */
> +static svn_error_t *
> +pristine_read_txn(void *baton,
> +                  svn_sqlite__db_t *sdb,
> +                  apr_pool_t *scratch_pool)
> +{

Should document somewhere that the returned stream is only guaranteed to
remain valid if the caller holds a wc lock, etc.?

(svn_wc__db_pristine_read()'s docs don't mention locks)

Obviously there's still a bit of work here (eg, that "Delayed delete"
flag on Windows) and it can't all be done in one commit.

> +  return SVN_NO_ERROR;
> +}
> +
>  svn_error_t *
>  svn_wc__db_pristine_read(svn_stream_t **contents,
>                           svn_wc__db_t *db,
> @@ -173,7 +226,7 @@ svn_wc__db_pristine_read(svn_stream_t **
>  {
>    svn_wc__db_pdh_t *pdh;
>    const char *local_relpath;
> -  const char *pristine_abspath;
> +  pristine_read_baton_t b;
...
> +pristine_install_txn(void *baton,
> +                     svn_sqlite__db_t *sdb,
> +                     apr_pool_t *scratch_pool)
> +{
...
> +  /* If this pristine text is already present in the store, just keep it:
> +   * delete the new one and return. */
> +  SVN_ERR(svn_sqlite__get_statement(&stmt, sdb, STMT_SELECT_PRISTINE));
> +  SVN_ERR(svn_sqlite__bind_checksum(stmt, 1, b->sha1_checksum, scratch_pool));
> +  SVN_ERR(svn_sqlite__step(&have_row, stmt));
> +  SVN_ERR(svn_sqlite__reset(stmt));
> +  if (have_row)
> +    {
> +      /* Remove the temp file: it's already there */
> +      /* ### TODO: Maybe verify the new file matches the existing one. */
> +      SVN_ERR(svn_io_remove_file2(b->tempfile_abspath, FALSE, scratch_pool));
> +      return SVN_NO_ERROR;

Perhaps check (assert?) that b->PRISTINE_ABSPATH exists?
(which reduces to dropping the "if (have_row)" check)

> +    }
> +
> +  /* Move the file to its target location, or discard it if already there. */
> +  SVN_ERR(svn_io_check_path(b->pristine_abspath, &kind, scratch_pool));
> +  if (kind == svn_node_file)
> +    {
> +      /* Remove the temp file: it's already there */
> +      /* ### TODO: Maybe verify the new file matches the existing one. */
> +      SVN_ERR(svn_io_remove_file2(b->tempfile_abspath, FALSE, scratch_pool));
> +    }
> +  else
> +    {
> +      SVN_ERR(svn_io_file_rename(b->tempfile_abspath, b->pristine_abspath,
> +                                 scratch_pool));
> +    }
...
> +}

> +pristine_remove_if_unreferenced_txn(void *baton,
> +                                    svn_sqlite__db_t *sdb,
> +                                    apr_pool_t *scratch_pool)
>  {
...
> +  /* If we removed the DB row, then remove the file. */
>    if (affected_rows > 0)
>      {
> -      /* Remove the file. */
> -      SVN_ERR(get_pristine_fname(&pristine_abspath, wcroot->abspath,
> -                                 sha1_checksum, TRUE /* create_subdir */,
> -                                 scratch_pool, scratch_pool));
> -      SVN_ERR(svn_io_remove_file2(pristine_abspath, TRUE /* ignore_enoent */,
> +      /* ### TODO: If file not present, log a consistency error but return
> +       * success. */
> +      SVN_ERR(svn_io_remove_file2(b->pristine_abspath, FALSE /* ignore_enoent */,
>                                    scratch_pool));

Shouldn't it be ignore_enoent=TRUE at least in release builds?

>      }
>  
>    return SVN_NO_ERROR;
>  }