You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@subversion.apache.org by st...@apache.org on 2017/03/06 19:53:55 UTC
svn commit: r1785754 - in /subversion/trunk/subversion:
include/svn_error_codes.h libsvn_fs_fs/cached_data.c
libsvn_fs_fs/cached_data.h libsvn_fs_fs/transaction.c
tests/libsvn_fs/fs-test.c
Author: stefan2
Date: Mon Mar 6 19:53:55 2017
New Revision: 1785754
URL: http://svn.apache.org/viewvc?rev=1785754&view=rev
Log:
Make FSFS consistency no longer depend on hash algorithms.
With this patch, FSFS will use MD5 and SHA1 for consistency *checks* and
efficiently finding *potential* duplicates but never allows them to control
the repository contents and structure. The only part where we did up to now
was the rep-sharing.
We now compare the rep contents before dropping an incoming rep and replacing
it with a reference to an already existing one.
* subversion/include/svn_error_codes.h
(SVN_ERR_FS_AMBIUGOUS_CHECKSUM_REP): Declare a new error code.
* subversion/libsvn_fs_fs/cached_data.h
(svn_fs_fs__get_contents_from_file): Declare a new internal API function.
* subversion/libsvn_fs_fs/cached_data.c
(rep_read_contents): We will now call this for empty reps as well, thus
we must handle that edge case, too.
(svn_fs_fs__get_contents_from_file): Implement the new internal API such
that it will work with the existing
rep read stream functions.
* subversion/libsvn_fs_fs/transaction.c
(get_writable_proto_rev): We must now be able to re-read the file to
compare rep contents.
(get_shared_rep): Compare contents of the potential match and the new REP.
(rep_write_contents_close,
write_container_rep,
write_container_delta_rep): Update callers.
* subversion/tests/libsvn_fs/fs-test.c
(test_funcs): The test passes for FSFS now.
Modified:
subversion/trunk/subversion/include/svn_error_codes.h
subversion/trunk/subversion/libsvn_fs_fs/cached_data.c
subversion/trunk/subversion/libsvn_fs_fs/cached_data.h
subversion/trunk/subversion/libsvn_fs_fs/transaction.c
subversion/trunk/subversion/tests/libsvn_fs/fs-test.c
Modified: subversion/trunk/subversion/include/svn_error_codes.h
URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/include/svn_error_codes.h?rev=1785754&r1=1785753&r2=1785754&view=diff
==============================================================================
--- subversion/trunk/subversion/include/svn_error_codes.h (original)
+++ subversion/trunk/subversion/include/svn_error_codes.h Mon Mar 6 19:53:55 2017
@@ -878,6 +878,11 @@ SVN_ERROR_START
SVN_ERR_FS_CATEGORY_START + 65,
"Property list is corrupt.")
+ /** @since New in 1.10. */
+ SVN_ERRDEF(SVN_ERR_FS_AMBIUGOUS_CHECKSUM_REP,
+ SVN_ERR_FS_CATEGORY_START + 67,
+ "Content checksums supposedly match but content does not.")
+
/* repos errors */
SVN_ERRDEF(SVN_ERR_REPOS_LOCKED,
Modified: subversion/trunk/subversion/libsvn_fs_fs/cached_data.c
URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_fs_fs/cached_data.c?rev=1785754&r1=1785753&r2=1785754&view=diff
==============================================================================
--- subversion/trunk/subversion/libsvn_fs_fs/cached_data.c (original)
+++ subversion/trunk/subversion/libsvn_fs_fs/cached_data.c Mon Mar 6 19:53:55 2017
@@ -2138,8 +2138,13 @@ rep_read_contents(void *baton,
SVN_ERR(skip_contents(rb, rb->fulltext_delivered));
}
- /* Get the next block of data. */
- SVN_ERR(get_contents_from_windows(rb, buf, len));
+ /* Get the next block of data.
+ * Keep in mind that the representation might be empty and leave us
+ * already positioned at the end of the rep. */
+ if (rb->off == rb->len)
+ *len = 0;
+ else
+ SVN_ERR(get_contents_from_windows(rb, buf, len));
if (rb->current_fulltext)
svn_stringbuf_appendbytes(rb->current_fulltext, buf, *len);
@@ -2230,6 +2235,96 @@ svn_fs_fs__get_contents(svn_stream_t **c
return SVN_NO_ERROR;
}
+
+svn_error_t *
+svn_fs_fs__get_contents_from_file(svn_stream_t **contents_p,
+ svn_fs_t *fs,
+ representation_t *rep,
+ apr_file_t *file,
+ apr_off_t offset,
+ apr_pool_t *pool)
+{
+ struct rep_read_baton *rb;
+ pair_cache_key_t fulltext_cache_key = { SVN_INVALID_REVNUM, 0 };
+ rep_state_t *rs = apr_pcalloc(pool, sizeof(*rs));
+ svn_fs_fs__rep_header_t *rh;
+
+ /* Initialize the reader baton. Some members may added lazily
+ * while reading from the stream. */
+ SVN_ERR(rep_read_get_baton(&rb, fs, rep, fulltext_cache_key, pool));
+
+ /* Continue constructing RS. Leave caches as NULL. */
+ rs->size = rep->size;
+ rs->revision = SVN_INVALID_REVNUM;
+ rs->item_index = 0;
+ rs->ver = -1;
+ rs->start = -1;
+
+ /* Provide just enough file access info to allow for a basic read from
+ * FILE but leave all index / footer info with empty values b/c FILE
+ * probably is not a complete revision file. */
+ rs->sfile = apr_pcalloc(pool, sizeof(*rs->sfile));
+ rs->sfile->revision = rep->revision;
+ rs->sfile->pool = pool;
+ rs->sfile->fs = fs;
+ rs->sfile->rfile = apr_pcalloc(pool, sizeof(*rs->sfile->rfile));
+ rs->sfile->rfile->start_revision = SVN_INVALID_REVNUM;
+ rs->sfile->rfile->file = file;
+ rs->sfile->rfile->stream = svn_stream_from_aprfile2(file, TRUE, pool);
+
+ /* Read the rep header. */
+ SVN_ERR(aligned_seek(fs, file, NULL, offset, pool));
+ SVN_ERR(svn_fs_fs__read_rep_header(&rh, rs->sfile->rfile->stream,
+ pool, pool));
+ SVN_ERR(get_file_offset(&rs->start, rs, pool));
+ rs->header_size = rh->header_size;
+
+ /* Log the access. */
+ SVN_ERR(dbg_log_access(fs, SVN_INVALID_REVNUM, 0, rh,
+ SVN_FS_FS__ITEM_TYPE_ANY_REP, pool));
+
+ /* Build the representation list (delta chain). */
+ if (rh->type == svn_fs_fs__rep_plain)
+ {
+ rb->rs_list = apr_array_make(pool, 0, sizeof(rep_state_t *));
+ rb->src_state = rs;
+ }
+ else if (rh->type == svn_fs_fs__rep_self_delta)
+ {
+ rb->rs_list = apr_array_make(pool, 1, sizeof(rep_state_t *));
+ APR_ARRAY_PUSH(rb->rs_list, rep_state_t *) = rs;
+ rb->src_state = NULL;
+ }
+ else
+ {
+ representation_t next_rep;
+
+ /* skip "SVNx" diff marker */
+ rs->current = 4;
+
+ /* REP's base rep is inside a proper revision.
+ * It can be reconstructed in the usual way. */
+ next_rep.revision = rh->base_revision;
+ next_rep.item_index = rh->base_item_index;
+ next_rep.size = rh->base_length;
+ svn_fs_fs__id_txn_reset(&next_rep.txn_id);
+
+ SVN_ERR(build_rep_list(&rb->rs_list, &rb->base_window,
+ &rb->src_state, rb->fs, &next_rep,
+ rb->filehandle_pool));
+
+ /* Insert the access to REP as the first element of the delta chain. */
+ svn_sort__array_insert(rb->rs_list, &rs, 0);
+ }
+
+ /* Now, the baton is complete and we can assemble the stream around it. */
+ *contents_p = svn_stream_create(rb, pool);
+ svn_stream_set_read2(*contents_p, NULL /* only full read support */,
+ rep_read_contents);
+ svn_stream_set_close(*contents_p, rep_read_contents_close);
+
+ return SVN_NO_ERROR;
+}
/* Baton for cache_access_wrapper. Wraps the original parameters of
* svn_fs_fs__try_process_file_content().
Modified: subversion/trunk/subversion/libsvn_fs_fs/cached_data.h
URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_fs_fs/cached_data.h?rev=1785754&r1=1785753&r2=1785754&view=diff
==============================================================================
--- subversion/trunk/subversion/libsvn_fs_fs/cached_data.h (original)
+++ subversion/trunk/subversion/libsvn_fs_fs/cached_data.h Mon Mar 6 19:53:55 2017
@@ -93,6 +93,18 @@ svn_fs_fs__get_contents(svn_stream_t **c
svn_boolean_t cache_fulltext,
apr_pool_t *pool);
+/* Set *CONTENTS_P to be a readable svn_stream_t that receives the text
+ representation REP as seen in filesystem FS. Read the latest element
+ of the delta chain from FILE at offset OFFSET.
+ Use POOL for allocations. */
+svn_error_t *
+svn_fs_fs__get_contents_from_file(svn_stream_t **contents_p,
+ svn_fs_t *fs,
+ representation_t *rep,
+ apr_file_t *file,
+ apr_off_t offset,
+ apr_pool_t *pool);
+
/* Attempt to fetch the text representation of node-revision NODEREV as
seen in filesystem FS and pass it along with the BATON to the PROCESSOR.
Set *SUCCESS only of the data could be provided and the processing
Modified: subversion/trunk/subversion/libsvn_fs_fs/transaction.c
URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_fs_fs/transaction.c?rev=1785754&r1=1785753&r2=1785754&view=diff
==============================================================================
--- subversion/trunk/subversion/libsvn_fs_fs/transaction.c (original)
+++ subversion/trunk/subversion/libsvn_fs_fs/transaction.c Mon Mar 6 19:53:55 2017
@@ -442,7 +442,7 @@ get_writable_proto_rev(apr_file_t **file
/* Now open the prototype revision file and seek to the end. */
err = svn_io_file_open(file,
svn_fs_fs__path_txn_proto_rev(fs, txn_id, pool),
- APR_WRITE | APR_BUFFERED, APR_OS_DEFAULT,
+ APR_READ | APR_WRITE | APR_BUFFERED, APR_OS_DEFAULT,
pool);
/* You might expect that we could dispense with the following seek
@@ -2253,6 +2253,11 @@ rep_write_get_baton(struct rep_write_bat
there may be new duplicate representations within the same uncommitted
revision, those can be passed in REPS_HASH (maps a sha1 digest onto
representation_t*), otherwise pass in NULL for REPS_HASH.
+
+ The content of both representations will be compared, taking REP's content
+ from FILE at OFFSET. Only if they actually match, will *OLD_REP not be
+ NULL.
+
Use RESULT_POOL for *OLD_REP allocations and SCRATCH_POOL for temporaries.
The lifetime of *OLD_REP is limited by both, RESULT_POOL and REP lifetime.
*/
@@ -2260,6 +2265,8 @@ static svn_error_t *
get_shared_rep(representation_t **old_rep,
svn_fs_t *fs,
representation_t *rep,
+ apr_file_t *file,
+ apr_off_t offset,
apr_hash_t *reps_hash,
apr_pool_t *result_pool,
apr_pool_t *scratch_pool)
@@ -2385,6 +2392,78 @@ get_shared_rep(representation_t **old_re
(*old_rep)->uniquifier = rep->uniquifier;
}
+ /* If we (very likely) found a matching representation, compare the actual
+ * contents such that we can be sure that no rep-cache.db corruption or
+ * hash collision produced a false positive. */
+ if (*old_rep)
+ {
+ apr_off_t old_position;
+ svn_stream_t *contents;
+ svn_stream_t *old_contents;
+ svn_boolean_t same;
+
+ /* The existing representation may itsel be part of the current
+ * transaction. In that case, it may be in different stages of
+ * the commit finalization process.
+ *
+ * OLD_REP_NORM is the same as that OLD_REP but it is assigned
+ * explicitly to REP's transaction if OLD_REP does not point
+ * to an already committed revision. This then prevents the
+ * revision lookup and the txn data will be accessed.
+ */
+ representation_t old_rep_norm = **old_rep;
+ if ( !SVN_IS_VALID_REVNUM(old_rep_norm.revision)
+ || old_rep_norm.revision > ffd->youngest_rev_cache)
+ old_rep_norm.txn_id = rep->txn_id;
+
+ /* Make sure we can later restore FILE's current position. */
+ SVN_ERR(svn_io_file_get_offset(&old_position, file, scratch_pool));
+
+ /* Compare the two representations.
+ * Note that the stream comparison might also produce MD5 checksum
+ * errors or other failures in case of SHA1 collisions. */
+ SVN_ERR(svn_fs_fs__get_contents_from_file(&contents, fs, rep, file,
+ offset, scratch_pool));
+ SVN_ERR(svn_fs_fs__get_contents(&old_contents, fs, &old_rep_norm,
+ FALSE, scratch_pool));
+ err = svn_stream_contents_same2(&same, contents, old_contents,
+ scratch_pool);
+
+ /* Restore FILE's read / write position. */
+ SVN_ERR(svn_io_file_seek(file, APR_SET, &old_position, scratch_pool));
+
+ /* A mismatch should be extremely rare.
+ * If it does happen, log the event and don't share the rep. */
+ if (!same || err)
+ {
+ /* SHA1 collision or worse.
+ We can continue without rep-sharing, but warn.
+ */
+ svn_stringbuf_t *old_rep_str
+ = svn_fs_fs__unparse_representation(*old_rep,
+ ffd->format, FALSE,
+ scratch_pool,
+ scratch_pool);
+ svn_stringbuf_t *rep_str
+ = svn_fs_fs__unparse_representation(rep,
+ ffd->format, FALSE,
+ scratch_pool,
+ scratch_pool);
+ const char *checksum__str
+ = svn_checksum_to_cstring_display(&checksum, scratch_pool);
+
+ err = svn_error_createf(SVN_ERR_FS_AMBIUGOUS_CHECKSUM_REP,
+ err, "SHA1 of reps '%s' and '%s' "
+ "matches (%s) but contents differ",
+ old_rep_str->data, rep_str->data,
+ checksum__str);
+
+ (fs->warning)(fs->warning_baton, err);
+ svn_error_clear(err);
+ *old_rep = NULL;
+ }
+ }
+
return SVN_NO_ERROR;
}
@@ -2446,8 +2525,8 @@ rep_write_contents_close(void *baton)
/* Check and see if we already have a representation somewhere that's
identical to the one we just wrote out. */
- SVN_ERR(get_shared_rep(&old_rep, b->fs, rep, NULL, b->result_pool,
- b->scratch_pool));
+ SVN_ERR(get_shared_rep(&old_rep, b->fs, rep, b->file, b->rep_offset, NULL,
+ b->result_pool, b->scratch_pool));
if (old_rep)
{
@@ -2735,8 +2814,8 @@ write_container_rep(representation_t *re
if (allow_rep_sharing)
{
representation_t *old_rep;
- SVN_ERR(get_shared_rep(&old_rep, fs, rep, reps_hash, scratch_pool,
- scratch_pool));
+ SVN_ERR(get_shared_rep(&old_rep, fs, rep, file, offset, reps_hash,
+ scratch_pool, scratch_pool));
if (old_rep)
{
@@ -2887,8 +2966,8 @@ write_container_delta_rep(representation
if (allow_rep_sharing)
{
representation_t *old_rep;
- SVN_ERR(get_shared_rep(&old_rep, fs, rep, reps_hash, scratch_pool,
- scratch_pool));
+ SVN_ERR(get_shared_rep(&old_rep, fs, rep, file, offset, reps_hash,
+ scratch_pool, scratch_pool));
if (old_rep)
{
Modified: subversion/trunk/subversion/tests/libsvn_fs/fs-test.c
URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/tests/libsvn_fs/fs-test.c?rev=1785754&r1=1785753&r2=1785754&view=diff
==============================================================================
--- subversion/trunk/subversion/tests/libsvn_fs/fs-test.c (original)
+++ subversion/trunk/subversion/tests/libsvn_fs/fs-test.c Mon Mar 6 19:53:55 2017
@@ -7482,8 +7482,9 @@ static struct svn_test_descriptor_t test
"test commit with locked rep-cache"),
SVN_TEST_OPTS_PASS(test_cache_clear_during_stream,
"test clearing the cache while streaming a rep"),
- SVN_TEST_OPTS_XFAIL(test_rep_sharing_strict_content_check,
- "test rep-sharing on content rather than SHA1"),
+ SVN_TEST_OPTS_XFAIL_OTOH(test_rep_sharing_strict_content_check,
+ "test rep-sharing on content rather than SHA1",
+ SVN_TEST_PASS_IF_FS_TYPE_IS(SVN_FS_TYPE_FSFS)),
SVN_TEST_NULL
};
Re: svn commit: r1785754 - in /subversion/trunk/subversion:
include/svn_error_codes.h libsvn_fs_fs/cached_data.c
libsvn_fs_fs/cached_data.h libsvn_fs_fs/transaction.c
tests/libsvn_fs/fs-test.c
Posted by Stefan Fuhrmann <st...@apache.org>.
On 06.03.2017 21:56, Daniel Shahaf wrote:
> stefan2@apache.org wrote on Mon, Mar 06, 2017 at 19:53:55 -0000:
>> Author: stefan2
>> Date: Mon Mar 6 19:53:55 2017
>> New Revision: 1785754
>>
>> URL: http://svn.apache.org/viewvc?rev=1785754&view=rev
>> Log:
>> Make FSFS consistency no longer depend on hash algorithms.
> First of all, thanks for fixing this.
>
> I didn't do a full review with out-of-diff context, but I did spot two
> issues:
>
>> +++ subversion/trunk/subversion/libsvn_fs_fs/transaction.c Mon Mar 6 19:53:55 2017
>> @@ -2385,6 +2392,78 @@ get_shared_rep(representation_t **old_re
>> + /* The existing representation may itsel be part of the current
> Typo: "itsel".
Fixed in r1786515.
>
>> + /* Compare the two representations.
>> + * Note that the stream comparison might also produce MD5 checksum
>> + * errors or other failures in case of SHA1 collisions. */
>> + SVN_ERR(svn_fs_fs__get_contents_from_file(&contents, fs, rep, file,
>> + offset, scratch_pool));
>> + SVN_ERR(svn_fs_fs__get_contents(&old_contents, fs, &old_rep_norm,
>> + FALSE, scratch_pool));
>> + err = svn_stream_contents_same2(&same, contents, old_contents,
>> + scratch_pool);
>> +
>> + /* Restore FILE's read / write position. */
>> + SVN_ERR(svn_io_file_seek(file, APR_SET, &old_position, scratch_pool));
> Leaks 'err'.
Fixed in r1786446.
Thanks for the review!
-- Stefan^2.
Re: svn commit: r1785754 - in /subversion/trunk/subversion:
include/svn_error_codes.h libsvn_fs_fs/cached_data.c
libsvn_fs_fs/cached_data.h libsvn_fs_fs/transaction.c
tests/libsvn_fs/fs-test.c
Posted by Daniel Shahaf <d....@daniel.shahaf.name>.
stefan2@apache.org wrote on Mon, Mar 06, 2017 at 19:53:55 -0000:
> Author: stefan2
> Date: Mon Mar 6 19:53:55 2017
> New Revision: 1785754
>
> URL: http://svn.apache.org/viewvc?rev=1785754&view=rev
> Log:
> Make FSFS consistency no longer depend on hash algorithms.
First of all, thanks for fixing this.
I didn't do a full review with out-of-diff context, but I did spot two
issues:
> +++ subversion/trunk/subversion/libsvn_fs_fs/transaction.c Mon Mar 6 19:53:55 2017
> @@ -2385,6 +2392,78 @@ get_shared_rep(representation_t **old_re
> + /* The existing representation may itsel be part of the current
Typo: "itsel".
> + /* Compare the two representations.
> + * Note that the stream comparison might also produce MD5 checksum
> + * errors or other failures in case of SHA1 collisions. */
> + SVN_ERR(svn_fs_fs__get_contents_from_file(&contents, fs, rep, file,
> + offset, scratch_pool));
> + SVN_ERR(svn_fs_fs__get_contents(&old_contents, fs, &old_rep_norm,
> + FALSE, scratch_pool));
> + err = svn_stream_contents_same2(&same, contents, old_contents,
> + scratch_pool);
> +
> + /* Restore FILE's read / write position. */
> + SVN_ERR(svn_io_file_seek(file, APR_SET, &old_position, scratch_pool));
Leaks 'err'.
Cheers,
Daniel
Re: svn commit: r1785754 - in /subversion/trunk/subversion:
include/svn_error_codes.h libsvn_fs_fs/cached_data.c
libsvn_fs_fs/cached_data.h libsvn_fs_fs/transaction.c
tests/libsvn_fs/fs-test.c
Posted by Daniel Shahaf <d....@daniel.shahaf.name>.
stefan2@apache.org wrote on Mon, Mar 06, 2017 at 19:53:55 -0000:
> Author: stefan2
> Date: Mon Mar 6 19:53:55 2017
> New Revision: 1785754
>
> URL: http://svn.apache.org/viewvc?rev=1785754&view=rev
> Log:
> Make FSFS consistency no longer depend on hash algorithms.
First of all, thanks for fixing this.
I didn't do a full review with out-of-diff context, but I did spot two
issues:
> +++ subversion/trunk/subversion/libsvn_fs_fs/transaction.c Mon Mar 6 19:53:55 2017
> @@ -2385,6 +2392,78 @@ get_shared_rep(representation_t **old_re
> + /* The existing representation may itsel be part of the current
Typo: "itsel".
> + /* Compare the two representations.
> + * Note that the stream comparison might also produce MD5 checksum
> + * errors or other failures in case of SHA1 collisions. */
> + SVN_ERR(svn_fs_fs__get_contents_from_file(&contents, fs, rep, file,
> + offset, scratch_pool));
> + SVN_ERR(svn_fs_fs__get_contents(&old_contents, fs, &old_rep_norm,
> + FALSE, scratch_pool));
> + err = svn_stream_contents_same2(&same, contents, old_contents,
> + scratch_pool);
> +
> + /* Restore FILE's read / write position. */
> + SVN_ERR(svn_io_file_seek(file, APR_SET, &old_position, scratch_pool));
Leaks 'err'.
Cheers,
Daniel