You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@subversion.apache.org by st...@apache.org on 2017/03/06 19:53:55 UTC

svn commit: r1785754 - in /subversion/trunk/subversion: include/svn_error_codes.h libsvn_fs_fs/cached_data.c libsvn_fs_fs/cached_data.h libsvn_fs_fs/transaction.c tests/libsvn_fs/fs-test.c

Author: stefan2
Date: Mon Mar  6 19:53:55 2017
New Revision: 1785754

URL: http://svn.apache.org/viewvc?rev=1785754&view=rev
Log:
Make FSFS consistency no longer depend on hash algorithms.

With this patch, FSFS will use MD5 and SHA1 for consistency *checks* and
efficiently finding *potential* duplicates but never allows them to control
the repository contents and structure.  The only part where we did up to now
was the rep-sharing.

We now compare the rep contents before dropping an incoming rep and replacing
it with a reference to an already existing one.

* subversion/include/svn_error_codes.h
  (SVN_ERR_FS_AMBIUGOUS_CHECKSUM_REP): Declare a new error code.

* subversion/libsvn_fs_fs/cached_data.h
  (svn_fs_fs__get_contents_from_file): Declare a new internal API function.

* subversion/libsvn_fs_fs/cached_data.c
  (rep_read_contents): We will now call this for empty reps as well, thus
                       we must handle that edge case, too.
  (svn_fs_fs__get_contents_from_file): Implement the new internal API such
                                       that it will work with the existing
                                       rep read stream functions.

* subversion/libsvn_fs_fs/transaction.c
  (get_writable_proto_rev): We must now be able to re-read the file to
                            compare rep contents.
  (get_shared_rep): Compare contents of the potential match and the new REP.
  (rep_write_contents_close,
   write_container_rep,
   write_container_delta_rep): Update callers.

* subversion/tests/libsvn_fs/fs-test.c
  (test_funcs): The test passes for FSFS now.

Modified:
    subversion/trunk/subversion/include/svn_error_codes.h
    subversion/trunk/subversion/libsvn_fs_fs/cached_data.c
    subversion/trunk/subversion/libsvn_fs_fs/cached_data.h
    subversion/trunk/subversion/libsvn_fs_fs/transaction.c
    subversion/trunk/subversion/tests/libsvn_fs/fs-test.c

Modified: subversion/trunk/subversion/include/svn_error_codes.h
URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/include/svn_error_codes.h?rev=1785754&r1=1785753&r2=1785754&view=diff
==============================================================================
--- subversion/trunk/subversion/include/svn_error_codes.h (original)
+++ subversion/trunk/subversion/include/svn_error_codes.h Mon Mar  6 19:53:55 2017
@@ -878,6 +878,11 @@ SVN_ERROR_START
              SVN_ERR_FS_CATEGORY_START + 65,
              "Property list is corrupt.")
 
+  /** @since New in 1.10. */
+  SVN_ERRDEF(SVN_ERR_FS_AMBIUGOUS_CHECKSUM_REP,
+             SVN_ERR_FS_CATEGORY_START + 67,
+             "Content checksums supposedly match but content does not.")
+
   /* repos errors */
 
   SVN_ERRDEF(SVN_ERR_REPOS_LOCKED,

Modified: subversion/trunk/subversion/libsvn_fs_fs/cached_data.c
URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_fs_fs/cached_data.c?rev=1785754&r1=1785753&r2=1785754&view=diff
==============================================================================
--- subversion/trunk/subversion/libsvn_fs_fs/cached_data.c (original)
+++ subversion/trunk/subversion/libsvn_fs_fs/cached_data.c Mon Mar  6 19:53:55 2017
@@ -2138,8 +2138,13 @@ rep_read_contents(void *baton,
       SVN_ERR(skip_contents(rb, rb->fulltext_delivered));
     }
 
-  /* Get the next block of data. */
-  SVN_ERR(get_contents_from_windows(rb, buf, len));
+  /* Get the next block of data.
+   * Keep in mind that the representation might be empty and leave us
+   * already positioned at the end of the rep. */
+  if (rb->off == rb->len)
+    *len = 0;
+  else
+    SVN_ERR(get_contents_from_windows(rb, buf, len));
 
   if (rb->current_fulltext)
     svn_stringbuf_appendbytes(rb->current_fulltext, buf, *len);
@@ -2230,6 +2235,96 @@ svn_fs_fs__get_contents(svn_stream_t **c
 
   return SVN_NO_ERROR;
 }
+
+svn_error_t *
+svn_fs_fs__get_contents_from_file(svn_stream_t **contents_p,
+                                  svn_fs_t *fs,
+                                  representation_t *rep,
+                                  apr_file_t *file,
+                                  apr_off_t offset,
+                                  apr_pool_t *pool)
+{
+  struct rep_read_baton *rb;
+  pair_cache_key_t fulltext_cache_key = { SVN_INVALID_REVNUM, 0 };
+  rep_state_t *rs = apr_pcalloc(pool, sizeof(*rs));
+  svn_fs_fs__rep_header_t *rh;
+
+  /* Initialize the reader baton.  Some members may added lazily
+   * while reading from the stream. */
+  SVN_ERR(rep_read_get_baton(&rb, fs, rep, fulltext_cache_key, pool));
+
+  /* Continue constructing RS. Leave caches as NULL. */
+  rs->size = rep->size;
+  rs->revision = SVN_INVALID_REVNUM;
+  rs->item_index = 0;
+  rs->ver = -1;
+  rs->start = -1;
+
+  /* Provide just enough file access info to allow for a basic read from
+   * FILE but leave all index / footer info with empty values b/c FILE
+   * probably is not a complete revision file. */
+  rs->sfile = apr_pcalloc(pool, sizeof(*rs->sfile));
+  rs->sfile->revision = rep->revision;
+  rs->sfile->pool = pool;
+  rs->sfile->fs = fs;
+  rs->sfile->rfile = apr_pcalloc(pool, sizeof(*rs->sfile->rfile));
+  rs->sfile->rfile->start_revision = SVN_INVALID_REVNUM;
+  rs->sfile->rfile->file = file;
+  rs->sfile->rfile->stream = svn_stream_from_aprfile2(file, TRUE, pool);
+
+  /* Read the rep header. */
+  SVN_ERR(aligned_seek(fs, file, NULL, offset, pool));
+  SVN_ERR(svn_fs_fs__read_rep_header(&rh, rs->sfile->rfile->stream,
+                                     pool, pool));
+  SVN_ERR(get_file_offset(&rs->start, rs, pool));
+  rs->header_size = rh->header_size;
+
+  /* Log the access. */
+  SVN_ERR(dbg_log_access(fs, SVN_INVALID_REVNUM, 0, rh,
+                         SVN_FS_FS__ITEM_TYPE_ANY_REP, pool));
+
+  /* Build the representation list (delta chain). */
+  if (rh->type == svn_fs_fs__rep_plain)
+    {
+      rb->rs_list = apr_array_make(pool, 0, sizeof(rep_state_t *));
+      rb->src_state = rs;
+    }
+  else if (rh->type == svn_fs_fs__rep_self_delta)
+    {
+      rb->rs_list = apr_array_make(pool, 1, sizeof(rep_state_t *));
+      APR_ARRAY_PUSH(rb->rs_list, rep_state_t *) = rs;
+      rb->src_state = NULL;
+    }
+  else
+    {
+      representation_t next_rep;
+
+      /* skip "SVNx" diff marker */
+      rs->current = 4;
+
+      /* REP's base rep is inside a proper revision.
+       * It can be reconstructed in the usual way.  */
+      next_rep.revision = rh->base_revision;
+      next_rep.item_index = rh->base_item_index;
+      next_rep.size = rh->base_length;
+      svn_fs_fs__id_txn_reset(&next_rep.txn_id);
+
+      SVN_ERR(build_rep_list(&rb->rs_list, &rb->base_window,
+                             &rb->src_state, rb->fs, &next_rep,
+                             rb->filehandle_pool));
+
+      /* Insert the access to REP as the first element of the delta chain. */
+      svn_sort__array_insert(rb->rs_list, &rs, 0);
+    }
+
+  /* Now, the baton is complete and we can assemble the stream around it. */
+  *contents_p = svn_stream_create(rb, pool);
+  svn_stream_set_read2(*contents_p, NULL /* only full read support */,
+                       rep_read_contents);
+  svn_stream_set_close(*contents_p, rep_read_contents_close);
+
+  return SVN_NO_ERROR;
+}
 
 /* Baton for cache_access_wrapper. Wraps the original parameters of
  * svn_fs_fs__try_process_file_content().

Modified: subversion/trunk/subversion/libsvn_fs_fs/cached_data.h
URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_fs_fs/cached_data.h?rev=1785754&r1=1785753&r2=1785754&view=diff
==============================================================================
--- subversion/trunk/subversion/libsvn_fs_fs/cached_data.h (original)
+++ subversion/trunk/subversion/libsvn_fs_fs/cached_data.h Mon Mar  6 19:53:55 2017
@@ -93,6 +93,18 @@ svn_fs_fs__get_contents(svn_stream_t **c
                         svn_boolean_t cache_fulltext,
                         apr_pool_t *pool);
 
+/* Set *CONTENTS_P to be a readable svn_stream_t that receives the text
+   representation REP as seen in filesystem FS.  Read the latest element
+   of the delta chain from FILE at offset OFFSET.
+   Use POOL for allocations. */
+svn_error_t *
+svn_fs_fs__get_contents_from_file(svn_stream_t **contents_p,
+                                  svn_fs_t *fs,
+                                  representation_t *rep,
+                                  apr_file_t *file,
+                                  apr_off_t offset,
+                                  apr_pool_t *pool);
+
 /* Attempt to fetch the text representation of node-revision NODEREV as
    seen in filesystem FS and pass it along with the BATON to the PROCESSOR.
    Set *SUCCESS only of the data could be provided and the processing

Modified: subversion/trunk/subversion/libsvn_fs_fs/transaction.c
URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_fs_fs/transaction.c?rev=1785754&r1=1785753&r2=1785754&view=diff
==============================================================================
--- subversion/trunk/subversion/libsvn_fs_fs/transaction.c (original)
+++ subversion/trunk/subversion/libsvn_fs_fs/transaction.c Mon Mar  6 19:53:55 2017
@@ -442,7 +442,7 @@ get_writable_proto_rev(apr_file_t **file
   /* Now open the prototype revision file and seek to the end. */
   err = svn_io_file_open(file,
                          svn_fs_fs__path_txn_proto_rev(fs, txn_id, pool),
-                         APR_WRITE | APR_BUFFERED, APR_OS_DEFAULT,
+                         APR_READ | APR_WRITE | APR_BUFFERED, APR_OS_DEFAULT,
                          pool);
 
   /* You might expect that we could dispense with the following seek
@@ -2253,6 +2253,11 @@ rep_write_get_baton(struct rep_write_bat
    there may be new duplicate representations within the same uncommitted
    revision, those can be passed in REPS_HASH (maps a sha1 digest onto
    representation_t*), otherwise pass in NULL for REPS_HASH.
+
+   The content of both representations will be compared, taking REP's content
+   from FILE at OFFSET.  Only if they actually match, will *OLD_REP not be
+   NULL.
+
    Use RESULT_POOL for *OLD_REP  allocations and SCRATCH_POOL for temporaries.
    The lifetime of *OLD_REP is limited by both, RESULT_POOL and REP lifetime.
  */
@@ -2260,6 +2265,8 @@ static svn_error_t *
 get_shared_rep(representation_t **old_rep,
                svn_fs_t *fs,
                representation_t *rep,
+               apr_file_t *file,
+               apr_off_t offset,
                apr_hash_t *reps_hash,
                apr_pool_t *result_pool,
                apr_pool_t *scratch_pool)
@@ -2385,6 +2392,78 @@ get_shared_rep(representation_t **old_re
       (*old_rep)->uniquifier = rep->uniquifier;
     }
 
+  /* If we (very likely) found a matching representation, compare the actual
+   * contents such that we can be sure that no rep-cache.db corruption or
+   * hash collision produced a false positive. */
+  if (*old_rep)
+    {
+      apr_off_t old_position;
+      svn_stream_t *contents;
+      svn_stream_t *old_contents;
+      svn_boolean_t same;
+
+      /* The existing representation may itsel be part of the current
+       * transaction.  In that case, it may be in different stages of
+       * the commit finalization process.
+       *
+       * OLD_REP_NORM is the same as that OLD_REP but it is assigned
+       * explicitly to REP's transaction if OLD_REP does not point
+       * to an already committed revision.  This then prevents the
+       * revision lookup and the txn data will be accessed.
+       */
+      representation_t old_rep_norm = **old_rep;
+      if (   !SVN_IS_VALID_REVNUM(old_rep_norm.revision)
+          || old_rep_norm.revision > ffd->youngest_rev_cache)
+        old_rep_norm.txn_id = rep->txn_id;
+
+      /* Make sure we can later restore FILE's current position. */
+      SVN_ERR(svn_io_file_get_offset(&old_position, file, scratch_pool));
+
+      /* Compare the two representations.
+       * Note that the stream comparison might also produce MD5 checksum
+       * errors or other failures in case of SHA1 collisions. */
+      SVN_ERR(svn_fs_fs__get_contents_from_file(&contents, fs, rep, file,
+                                                offset, scratch_pool));
+      SVN_ERR(svn_fs_fs__get_contents(&old_contents, fs, &old_rep_norm,
+                                      FALSE, scratch_pool));
+      err = svn_stream_contents_same2(&same, contents, old_contents,
+                                      scratch_pool);
+
+      /* Restore FILE's read / write position. */
+      SVN_ERR(svn_io_file_seek(file, APR_SET, &old_position, scratch_pool));
+
+      /* A mismatch should be extremely rare.
+       * If it does happen, log the event and don't share the rep. */
+      if (!same || err)
+        {
+          /* SHA1 collision or worse.
+             We can continue without rep-sharing, but warn.
+            */
+          svn_stringbuf_t *old_rep_str
+            = svn_fs_fs__unparse_representation(*old_rep,
+                                                ffd->format, FALSE,
+                                                scratch_pool,
+                                                scratch_pool);
+          svn_stringbuf_t *rep_str
+            = svn_fs_fs__unparse_representation(rep,
+                                                ffd->format, FALSE,
+                                                scratch_pool,
+                                                scratch_pool);
+          const char *checksum__str
+            = svn_checksum_to_cstring_display(&checksum, scratch_pool);
+
+          err = svn_error_createf(SVN_ERR_FS_AMBIUGOUS_CHECKSUM_REP,
+                                  err, "SHA1 of reps '%s' and '%s' "
+                                  "matches (%s) but contents differ",
+                                  old_rep_str->data, rep_str->data,
+                                  checksum__str);
+
+          (fs->warning)(fs->warning_baton, err);
+          svn_error_clear(err);
+          *old_rep = NULL;
+        }
+    }
+
   return SVN_NO_ERROR;
 }
 
@@ -2446,8 +2525,8 @@ rep_write_contents_close(void *baton)
 
   /* Check and see if we already have a representation somewhere that's
      identical to the one we just wrote out. */
-  SVN_ERR(get_shared_rep(&old_rep, b->fs, rep, NULL, b->result_pool,
-                         b->scratch_pool));
+  SVN_ERR(get_shared_rep(&old_rep, b->fs, rep, b->file, b->rep_offset, NULL,
+                         b->result_pool, b->scratch_pool));
 
   if (old_rep)
     {
@@ -2735,8 +2814,8 @@ write_container_rep(representation_t *re
   if (allow_rep_sharing)
     {
       representation_t *old_rep;
-      SVN_ERR(get_shared_rep(&old_rep, fs, rep, reps_hash, scratch_pool,
-                             scratch_pool));
+      SVN_ERR(get_shared_rep(&old_rep, fs, rep, file, offset, reps_hash,
+                             scratch_pool, scratch_pool));
 
       if (old_rep)
         {
@@ -2887,8 +2966,8 @@ write_container_delta_rep(representation
   if (allow_rep_sharing)
     {
       representation_t *old_rep;
-      SVN_ERR(get_shared_rep(&old_rep, fs, rep, reps_hash, scratch_pool,
-                             scratch_pool));
+      SVN_ERR(get_shared_rep(&old_rep, fs, rep, file, offset, reps_hash,
+                             scratch_pool, scratch_pool));
 
       if (old_rep)
         {

Modified: subversion/trunk/subversion/tests/libsvn_fs/fs-test.c
URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/tests/libsvn_fs/fs-test.c?rev=1785754&r1=1785753&r2=1785754&view=diff
==============================================================================
--- subversion/trunk/subversion/tests/libsvn_fs/fs-test.c (original)
+++ subversion/trunk/subversion/tests/libsvn_fs/fs-test.c Mon Mar  6 19:53:55 2017
@@ -7482,8 +7482,9 @@ static struct svn_test_descriptor_t test
                        "test commit with locked rep-cache"),
     SVN_TEST_OPTS_PASS(test_cache_clear_during_stream,
                        "test clearing the cache while streaming a rep"),
-    SVN_TEST_OPTS_XFAIL(test_rep_sharing_strict_content_check,
-                       "test rep-sharing on content rather than SHA1"),
+    SVN_TEST_OPTS_XFAIL_OTOH(test_rep_sharing_strict_content_check,
+                             "test rep-sharing on content rather than SHA1",
+                             SVN_TEST_PASS_IF_FS_TYPE_IS(SVN_FS_TYPE_FSFS)),
     SVN_TEST_NULL
   };
 



Re: svn commit: r1785754 - in /subversion/trunk/subversion: include/svn_error_codes.h libsvn_fs_fs/cached_data.c libsvn_fs_fs/cached_data.h libsvn_fs_fs/transaction.c tests/libsvn_fs/fs-test.c

Posted by Stefan Fuhrmann <st...@apache.org>.
On 06.03.2017 21:56, Daniel Shahaf wrote:
> stefan2@apache.org wrote on Mon, Mar 06, 2017 at 19:53:55 -0000:
>> Author: stefan2
>> Date: Mon Mar  6 19:53:55 2017
>> New Revision: 1785754
>>
>> URL: http://svn.apache.org/viewvc?rev=1785754&view=rev
>> Log:
>> Make FSFS consistency no longer depend on hash algorithms.
> First of all, thanks for fixing this.
>
> I didn't do a full review with out-of-diff context, but I did spot two
> issues:
>
>> +++ subversion/trunk/subversion/libsvn_fs_fs/transaction.c Mon Mar  6 19:53:55 2017
>> @@ -2385,6 +2392,78 @@ get_shared_rep(representation_t **old_re
>> +      /* The existing representation may itsel be part of the current
> Typo: "itsel".

Fixed in r1786515.
>
>> +      /* Compare the two representations.
>> +       * Note that the stream comparison might also produce MD5 checksum
>> +       * errors or other failures in case of SHA1 collisions. */
>> +      SVN_ERR(svn_fs_fs__get_contents_from_file(&contents, fs, rep, file,
>> +                                                offset, scratch_pool));
>> +      SVN_ERR(svn_fs_fs__get_contents(&old_contents, fs, &old_rep_norm,
>> +                                      FALSE, scratch_pool));
>> +      err = svn_stream_contents_same2(&same, contents, old_contents,
>> +                                      scratch_pool);
>> +
>> +      /* Restore FILE's read / write position. */
>> +      SVN_ERR(svn_io_file_seek(file, APR_SET, &old_position, scratch_pool));
> Leaks 'err'.
Fixed in r1786446.

Thanks for the review!

-- Stefan^2.

Re: svn commit: r1785754 - in /subversion/trunk/subversion: include/svn_error_codes.h libsvn_fs_fs/cached_data.c libsvn_fs_fs/cached_data.h libsvn_fs_fs/transaction.c tests/libsvn_fs/fs-test.c

Posted by Daniel Shahaf <d....@daniel.shahaf.name>.
stefan2@apache.org wrote on Mon, Mar 06, 2017 at 19:53:55 -0000:
> Author: stefan2
> Date: Mon Mar  6 19:53:55 2017
> New Revision: 1785754
> 
> URL: http://svn.apache.org/viewvc?rev=1785754&view=rev
> Log:
> Make FSFS consistency no longer depend on hash algorithms.

First of all, thanks for fixing this.

I didn't do a full review with out-of-diff context, but I did spot two
issues:

> +++ subversion/trunk/subversion/libsvn_fs_fs/transaction.c Mon Mar  6 19:53:55 2017
> @@ -2385,6 +2392,78 @@ get_shared_rep(representation_t **old_re
> +      /* The existing representation may itsel be part of the current

Typo: "itsel".

> +      /* Compare the two representations.
> +       * Note that the stream comparison might also produce MD5 checksum
> +       * errors or other failures in case of SHA1 collisions. */
> +      SVN_ERR(svn_fs_fs__get_contents_from_file(&contents, fs, rep, file,
> +                                                offset, scratch_pool));
> +      SVN_ERR(svn_fs_fs__get_contents(&old_contents, fs, &old_rep_norm,
> +                                      FALSE, scratch_pool));
> +      err = svn_stream_contents_same2(&same, contents, old_contents,
> +                                      scratch_pool);
> +
> +      /* Restore FILE's read / write position. */
> +      SVN_ERR(svn_io_file_seek(file, APR_SET, &old_position, scratch_pool));

Leaks 'err'.

Cheers,

Daniel

Re: svn commit: r1785754 - in /subversion/trunk/subversion: include/svn_error_codes.h libsvn_fs_fs/cached_data.c libsvn_fs_fs/cached_data.h libsvn_fs_fs/transaction.c tests/libsvn_fs/fs-test.c

Posted by Daniel Shahaf <d....@daniel.shahaf.name>.
stefan2@apache.org wrote on Mon, Mar 06, 2017 at 19:53:55 -0000:
> Author: stefan2
> Date: Mon Mar  6 19:53:55 2017
> New Revision: 1785754
> 
> URL: http://svn.apache.org/viewvc?rev=1785754&view=rev
> Log:
> Make FSFS consistency no longer depend on hash algorithms.

First of all, thanks for fixing this.

I didn't do a full review with out-of-diff context, but I did spot two
issues:

> +++ subversion/trunk/subversion/libsvn_fs_fs/transaction.c Mon Mar  6 19:53:55 2017
> @@ -2385,6 +2392,78 @@ get_shared_rep(representation_t **old_re
> +      /* The existing representation may itsel be part of the current

Typo: "itsel".

> +      /* Compare the two representations.
> +       * Note that the stream comparison might also produce MD5 checksum
> +       * errors or other failures in case of SHA1 collisions. */
> +      SVN_ERR(svn_fs_fs__get_contents_from_file(&contents, fs, rep, file,
> +                                                offset, scratch_pool));
> +      SVN_ERR(svn_fs_fs__get_contents(&old_contents, fs, &old_rep_norm,
> +                                      FALSE, scratch_pool));
> +      err = svn_stream_contents_same2(&same, contents, old_contents,
> +                                      scratch_pool);
> +
> +      /* Restore FILE's read / write position. */
> +      SVN_ERR(svn_io_file_seek(file, APR_SET, &old_position, scratch_pool));

Leaks 'err'.

Cheers,

Daniel