You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@subversion.apache.org by David Glasser <gl...@davidglasser.net> on 2007/12/21 20:32:18 UTC

Re: svn commit: r28624 - branches/issue-2897/subversion/libsvn_fs_util

On Dec 21, 2007 4:05 AM,  <ka...@tigris.org> wrote:
> Author: kameshj
> Date: Fri Dec 21 04:05:33 2007
> New Revision: 28624
>
> Log:
> On the issue-2897 branch:
>
> Fix 'get-commit-and-merge-ranges' data retrieval logic as found by
> David Glasser on
> http://subversion.tigris.org/servlets/ReadMsg?list=dev&msgNo=133562
>
> * subversion/libsvn_fs_util/mergeinfo-sqlite-index.c
>   (get_parent_target_path_having_mergeinfo): Removed.
>   (get_commit_and_merge_ranges): Reimplemented to retrieve the mergeinfo
>    from a ancestrally nearest merge target on a per revision basis.
>
> Found by: glasser
>
>
> Modified:
>    branches/issue-2897/subversion/libsvn_fs_util/mergeinfo-sqlite-index.c
>
> Modified: branches/issue-2897/subversion/libsvn_fs_util/mergeinfo-sqlite-index.c
> URL: http://svn.collab.net/viewvc/svn/branches/issue-2897/subversion/libsvn_fs_util/mergeinfo-sqlite-index.c?pathrev=28624&r1=28623&r2=28624
> ==============================================================================
> --- branches/issue-2897/subversion/libsvn_fs_util/mergeinfo-sqlite-index.c      (original)
> +++ branches/issue-2897/subversion/libsvn_fs_util/mergeinfo-sqlite-index.c      Fri Dec 21 04:05:33 2007
> @@ -847,62 +847,6 @@
>    return svn_fs__sqlite_close(db, err);
>  }
>
> -/* Helper function for 'get_commit_and_merge_ranges'.
> -
> -   Set *PARENT_WITH_MERGEINFO to the path where the mergeinfo of
> -   MERGE_TARGET elides to, if there exists no mergeinfo in any of the parent
> -   it sets it to NULL.  Retrieve the data from DB, within the
> -   commit range MIN_COMMIT_REV(exclusive):MAX_COMMIT_REV(inclusive).
> -   Perform all allocations in POOL. */
> -static svn_error_t *
> -get_parent_target_path_having_mergeinfo(const char **parent_with_mergeinfo,
> -                                        sqlite3 *db,
> -                                        const char *merge_target,
> -                                        svn_revnum_t min_commit_rev,
> -                                        svn_revnum_t max_commit_rev,
> -                                        apr_pool_t *pool)
> -{
> -  svn_fs__sqlite_stmt_t *stmt;
> -  svn_boolean_t got_row;
> -  *parent_with_mergeinfo = NULL;
> -  SVN_ERR(svn_fs__sqlite_prepare(&stmt, db,
> -                                 "SELECT revision FROM mergeinfo_changed WHERE"
> -                                 " mergedto = ? AND"
> -                                 " revision between ? AND ?;", pool));
> -  SVN_ERR(svn_fs__sqlite_bind_text(stmt, 1, merge_target));
> -  SVN_ERR(svn_fs__sqlite_bind_int64(stmt, 2, min_commit_rev+1));
> -  SVN_ERR(svn_fs__sqlite_bind_int64(stmt, 3, max_commit_rev));
> -  SVN_ERR(svn_fs__sqlite_step(&got_row, stmt));
> -  SVN_ERR(svn_fs__sqlite_reset(stmt));
> -  if (got_row)
> -    {
> -      *parent_with_mergeinfo = apr_pstrdup(pool, merge_target);
> -    }
> -  else
> -    {
> -      apr_pool_t *subpool = svn_pool_create(pool);
> -      const char *parent_path = svn_path_dirname(merge_target, subpool);
> -      while (strcmp(parent_path, "/") != 0)
> -        {
> -          SVN_ERR(svn_fs__sqlite_bind_text(stmt, 1, parent_path));
> -          SVN_ERR(svn_fs__sqlite_step(&got_row, stmt));
> -          SVN_ERR(svn_fs__sqlite_reset(stmt));
> -          if (got_row)
> -            {
> -              *parent_with_mergeinfo = apr_pstrdup(pool, parent_path);
> -              break;
> -            }
> -          else
> -            parent_path = svn_path_dirname(parent_path, subpool);
> -        }
> -      svn_pool_destroy(subpool);
> -    }
> -
> -  SVN_ERR(svn_fs__sqlite_finalize(stmt));
> -
> -  return SVN_NO_ERROR;
> -}
> -
>  /* Helper function for 'svn_fs_mergeinfo__get_commit_and_merge_ranges'.
>
>     Set *COMMIT_RANGELIST to a list of revisions (sorted in
> @@ -941,56 +885,80 @@
>  {
>    svn_fs__sqlite_stmt_t *stmt;
>    svn_boolean_t got_row;
> -  const char *real_mergeinfo_target = merge_target;
> -  const char *real_merge_source = merge_source;
>    apr_array_header_t *merge_rangelist;
> +  svn_boolean_t get_elided_parent_mergeinfo = FALSE;
>    svn_revnum_t last_commit_rev = SVN_INVALID_REVNUM;
> +  apr_hash_t *rev_target_hash = apr_hash_make(pool);
>
>    if (inherit == svn_mergeinfo_inherited
>        || inherit == svn_mergeinfo_nearest_ancestor)
> -    SVN_ERR(get_parent_target_path_having_mergeinfo(&real_mergeinfo_target,
> -                                                    db, merge_target,
> -                                                    min_commit_rev,
> -                                                    max_commit_rev, pool));
> +    get_elided_parent_mergeinfo = TRUE;

Should this variable be called get_inherited_mergeinfo instead?

(Also, I don't think you'll be handling the nearest_ancestor case
correctly.)

>    *commit_rangelist = apr_array_make(pool, 0, sizeof(svn_merge_range_t *));
>    *merge_ranges_list = apr_array_make(pool, 0, sizeof(apr_array_header_t *));
>    merge_rangelist = apr_array_make(pool, 0, sizeof(svn_merge_range_t *));
>
> -  if (!real_mergeinfo_target)
> -    return SVN_NO_ERROR;
> -
> -  if (strcmp(real_mergeinfo_target, merge_target) != 0)
> -    {
> -      int parent_merge_src_end;
> -      const char *target_base_name =
> -        merge_target + strlen(real_mergeinfo_target);
> -      parent_merge_src_end = strlen(merge_source) - strlen(target_base_name);
> -      real_merge_source = apr_pstrndup(pool, merge_source,
> -                                       parent_merge_src_end);
> -    }
>    SVN_ERR(svn_fs__sqlite_prepare(&stmt, db,
>                                   "SELECT revision, mergedrevstart, "
> -                                 "mergedrevend, inheritable "
> -                                 "FROM mergeinfo_changed "
> -                                 "WHERE mergedfrom = ? AND mergedto = ? "
> -                                 "AND revision between ? AND ? "
> -                                 "ORDER BY revision ASC ;", pool));
> -  SVN_ERR(svn_fs__sqlite_bind_text(stmt, 1, real_merge_source));
> -  SVN_ERR(svn_fs__sqlite_bind_text(stmt, 2, real_mergeinfo_target));
> -  SVN_ERR(svn_fs__sqlite_bind_int64(stmt, 3, min_commit_rev + 1));
> -  SVN_ERR(svn_fs__sqlite_bind_int64(stmt, 4, max_commit_rev));
> +                                 "mergedrevend, inheritable, mergedfrom, "
> +                                 "mergedto FROM mergeinfo_changed "
> +                                 "WHERE revision between ? AND ? "
> +                                 "ORDER BY revision ASC, mergedto ASC; ",
> +                                 pool));

This will iterate over *every* mergeinfo change *everywhere* in the
repository between the two revisions.  I can't imagine that this is
efficient for large multi-project multi-branch repositories.

> +  SVN_ERR(svn_fs__sqlite_bind_int64(stmt, 1, min_commit_rev + 1));
> +  SVN_ERR(svn_fs__sqlite_bind_int64(stmt, 2, max_commit_rev));
>    SVN_ERR(svn_fs__sqlite_step(&got_row, stmt));
>    while (got_row)
>      {
>        svn_merge_range_t *merge_range;
>        svn_revnum_t commit_rev, start_rev, end_rev;
> +      svn_revnum_t *commit_rev_ptr;
> +      const char *mergedfrom, *mergedto;
>        int inheritable;
> +      const char *cur_parent_path;
>
>        merge_range = apr_pcalloc(pool, sizeof(*merge_range));
> +      commit_rev_ptr = apr_pcalloc(pool, sizeof(*commit_rev_ptr));
>        commit_rev = svn_fs__sqlite_column_revnum(stmt, 0);
> +      *commit_rev_ptr = commit_rev;
>        start_rev = svn_fs__sqlite_column_revnum(stmt, 1);
>        end_rev = svn_fs__sqlite_column_revnum(stmt, 2);
>        inheritable = svn_fs__sqlite_column_boolean(stmt, 3);
> +      mergedfrom = svn_fs__sqlite_column_text(stmt, 4);
> +      mergedto = svn_fs__sqlite_column_text(stmt, 5);
> +      if (get_elided_parent_mergeinfo)
> +        {
> +          if ((svn_path_is_ancestor(mergedto, merge_target) == FALSE)
> +              || (svn_path_is_ancestor(mergedfrom, merge_source) == FALSE))
> +            {
> +              SVN_ERR(svn_fs__sqlite_step(&got_row, stmt));
> +              continue;
> +            }
> +        }
> +      else if ((strcmp(mergedto, merge_target) != 0)
> +                || strcmp(mergedfrom, merge_source) != 0)
> +        {
> +          SVN_ERR(svn_fs__sqlite_step(&got_row, stmt));
> +          continue;
> +        }
> +
> +      cur_parent_path = apr_hash_get(rev_target_hash, commit_rev_ptr,
> +                                     sizeof(*commit_rev_ptr));
> +      if (cur_parent_path)
> +        {
> +          if (strlen(mergedto) > strlen(cur_parent_path))
> +            {
> +              apr_hash_set(rev_target_hash, commit_rev_ptr,
> +                           sizeof(*commit_rev_ptr),
> +                           apr_pstrdup(pool, mergedto));
> +              /* merge target changed, so discard the earlier merge ranges.
> +                 TODO we should clear of the array. */

I'd also comment that the *only* reason this is safe is that you have
an ORDER BY mergedto in effect.

> +              merge_rangelist = apr_array_make(pool, 0,
> +                                               sizeof(svn_merge_range_t *));
> +            }
> +        }
> +      else
> +        apr_hash_set(rev_target_hash, commit_rev_ptr, sizeof(*commit_rev_ptr),
> +                     apr_pstrdup(pool, mergedto));
>        if ((last_commit_rev != commit_rev)
>            && (last_commit_rev != SVN_INVALID_REVNUM))
>          {


Of course, now that you're effectively doing a "loop over every change
in every path and every revision, doing filtering in C in svn instead
of in Sqlite in some index-improvable way", it's quite possible that a
pure-FS solution will be able to implement this API as well...

--dave


-- 
David Glasser | glasser@davidglasser.net | http://www.davidglasser.net/

---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@subversion.tigris.org
For additional commands, e-mail: dev-help@subversion.tigris.org

RE: Re: svn commit: r28624 - branches/issue-2897/subversion/libsvn_fs_util

Posted by Kamesh Jayachandran <ka...@collab.net>.
Hi,

>>    if (inherit == svn_mergeinfo_inherited
>>        || inherit == svn_mergeinfo_nearest_ancestor)
>> -    SVN_ERR(get_parent_target_path_having_mergeinfo(&real_mergeinfo_target,
>> -                                                    db, merge_target,
>> -                                                    min_commit_rev,
>> -                                                    max_commit_rev, pool));
>> +    get_elided_parent_mergeinfo = TRUE;

>Should this variable be called get_inherited_mergeinfo instead?

Fixed in r28638.

>(Also, I don't think you'll be handling the nearest_ancestor case
>correctly.)

Yes, Will correct it as soon as I complete the 'peg-awareness work'.

>> +                                 "mergedrevend, inheritable, mergedfrom, "
>> +                                 "mergedto FROM mergeinfo_changed "
>> +                                 "WHERE revision between ? AND ? "
>> +                                 "ORDER BY revision ASC, mergedto ASC; ",
>> +                                 pool));

>This will iterate over *every* mergeinfo change *everywhere* in the
>repository between the two revisions.  I can't imagine that this is
>efficient for large multi-project multi-branch repositories.

Yes I am aware of it, there should be better way to achieve the same, but I don't have one now.

>> +              apr_hash_set(rev_target_hash, commit_rev_ptr,
>> +                           sizeof(*commit_rev_ptr),
>> +                           apr_pstrdup(pool, mergedto));
>> +              /* merge target changed, so discard the earlier merge ranges.
>> +                 TODO we should clear of the array. */

>I'd also comment that the *only* reason this is safe is that you have
>an ORDER BY mergedto in effect.

Yes.

>> +              merge_rangelist = apr_array_make(pool, 0,
>> +                                               sizeof(svn_merge_range_t *));
>> +            }
>> +        }
>> +      else
>> +        apr_hash_set(rev_target_hash, commit_rev_ptr, sizeof(*commit_rev_ptr),
>> +                     apr_pstrdup(pool, mergedto));
>>        if ((last_commit_rev != commit_rev)
>>            && (last_commit_rev != SVN_INVALID_REVNUM))
>>          {


>Of course, now that you're effectively doing a "loop over every change
>in every path and every revision, doing filtering in C in svn instead
>of in Sqlite in some index-improvable way", it's quite possible that a
>pure-FS solution will be able to implement this API as well...

Yes.


Thanks for your review.
With regards
Kamesh Jayachandran

RE: svn commit: r28624 - branches/issue-2897/subversion/libsvn_fs_util

Posted by Kamesh Jayachandran <ka...@collab.net>.
>> > +                                 "mergedrevend, inheritable, mergedfrom, "
>> > +                                 "mergedto FROM mergeinfo_changed "
>> > +                                 "WHERE revision between ? AND ? "
>> > +                                 "ORDER BY revision ASC, mergedto ASC; ",
>> > +                                 pool));
>>
>> This will iterate over *every* mergeinfo change *everywhere* in the
>> repository between the two revisions.  I can't imagine that this is
>> efficient for large multi-project multi-branch repositories.

>Actually, can't you make this issue a little better by putting LIKEs
>on mergedto and mergedfrom?

No, I can not, as the paths I have here are child paths which are longer than the parents, so LIKE won't work.

May be I can generate dynamic where clause something like
Data:
mergedto = '/a/b/c/d/e/f/g'

Dynamic where clause:
'where mergedfrom in ('/a', '/a/b', '/a/b/c', '/a/b/c/d', '/a/b/c/d/e', '/a/b/c/d/e/f', '/a/b/c/d/e/f/g')'.

With regards
Kamesh Jayachandran

Re: svn commit: r28624 - branches/issue-2897/subversion/libsvn_fs_util

Posted by David Glasser <gl...@davidglasser.net>.
On Dec 21, 2007 12:32 PM, David Glasser <gl...@davidglasser.net> wrote:
> >    *commit_rangelist = apr_array_make(pool, 0, sizeof(svn_merge_range_t *));
> >    *merge_ranges_list = apr_array_make(pool, 0, sizeof(apr_array_header_t *));
> >    merge_rangelist = apr_array_make(pool, 0, sizeof(svn_merge_range_t *));
> >
> > -  if (!real_mergeinfo_target)
> > -    return SVN_NO_ERROR;
> > -
> > -  if (strcmp(real_mergeinfo_target, merge_target) != 0)
> > -    {
> > -      int parent_merge_src_end;
> > -      const char *target_base_name =
> > -        merge_target + strlen(real_mergeinfo_target);
> > -      parent_merge_src_end = strlen(merge_source) - strlen(target_base_name);
> > -      real_merge_source = apr_pstrndup(pool, merge_source,
> > -                                       parent_merge_src_end);
> > -    }
> >    SVN_ERR(svn_fs__sqlite_prepare(&stmt, db,
> >                                   "SELECT revision, mergedrevstart, "
> > -                                 "mergedrevend, inheritable "
> > -                                 "FROM mergeinfo_changed "
> > -                                 "WHERE mergedfrom = ? AND mergedto = ? "
> > -                                 "AND revision between ? AND ? "
> > -                                 "ORDER BY revision ASC ;", pool));
> > -  SVN_ERR(svn_fs__sqlite_bind_text(stmt, 1, real_merge_source));
> > -  SVN_ERR(svn_fs__sqlite_bind_text(stmt, 2, real_mergeinfo_target));
> > -  SVN_ERR(svn_fs__sqlite_bind_int64(stmt, 3, min_commit_rev + 1));
> > -  SVN_ERR(svn_fs__sqlite_bind_int64(stmt, 4, max_commit_rev));
> > +                                 "mergedrevend, inheritable, mergedfrom, "
> > +                                 "mergedto FROM mergeinfo_changed "
> > +                                 "WHERE revision between ? AND ? "
> > +                                 "ORDER BY revision ASC, mergedto ASC; ",
> > +                                 pool));
>
> This will iterate over *every* mergeinfo change *everywhere* in the
> repository between the two revisions.  I can't imagine that this is
> efficient for large multi-project multi-branch repositories.

Actually, can't you make this issue a little better by putting LIKEs
on mergedto and mergedfrom?

(Although brief experiments make me think this'll still be a linear
search, not an indexed search, on the paths, if you're doing a
selection on both revision and a path.  But doing it in sql *could* be
more efficient.)

--dave


-- 
David Glasser | glasser@davidglasser.net | http://www.davidglasser.net/

---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@subversion.tigris.org
For additional commands, e-mail: dev-help@subversion.tigris.org