You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@subversion.apache.org by Bert Huijben <be...@qqmail.nl> on 2014/06/20 22:09:14 UTC

RE: svn commit: r1604188 - in /subversion/trunk/subversion/libsvn_fs_fs: low_level.c low_level.h transaction.c


> -----Original Message-----
> From: stsp@apache.org [mailto:stsp@apache.org]
> Sent: vrijdag 20 juni 2014 17:24
> To: commits@subversion.apache.org
> Subject: svn commit: r1604188 - in
> /subversion/trunk/subversion/libsvn_fs_fs: low_level.c low_level.h
> transaction.c
> 
> Author: stsp
> Date: Fri Jun 20 15:24:02 2014
> New Revision: 1604188
> 
> URL: http://svn.apache.org/r1604188
> Log:
> Improve memory usage while fetching changed paths data from a
> transaction
> by eliminating a full in-memory copy of changed path data.
> 
> * subversion/libsvn_fs_fs/low_level.c
>   (svn_fs_fs__read_changes_incrementally): New FSFS-internal API which
>    invokes a callback for each changed path in a transaction.
> 
> * subversion/libsvn_fs_fs/low_level.h
>   (svn_fs_fs__read_changes_incrementally): Declare.
> 
> * subversion/libsvn_fs_fs/transaction.c
>   (svn_fs_fs__txn_changes_fetch): Use
> svn_fs_fs__read_changes_incrementally(),
>    instead of reading a changes_t list into memory and looping over that list.
> 
> Modified:
>     subversion/trunk/subversion/libsvn_fs_fs/low_level.c
>     subversion/trunk/subversion/libsvn_fs_fs/low_level.h
>     subversion/trunk/subversion/libsvn_fs_fs/transaction.c
> 
> Modified: subversion/trunk/subversion/libsvn_fs_fs/low_level.c
> URL:
> http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_fs_fs/lo
> w_level.c?rev=1604188&r1=1604187&r2=1604188&view=diff
> ==========================================================
> ====================
> --- subversion/trunk/subversion/libsvn_fs_fs/low_level.c (original)
> +++ subversion/trunk/subversion/libsvn_fs_fs/low_level.c Fri Jun 20
> 15:24:02 2014
> @@ -365,6 +365,31 @@ svn_fs_fs__read_changes(apr_array_header
>    return SVN_NO_ERROR;
>  }
> 
> +svn_error_t *
> +svn_fs_fs__read_changes_incrementally(svn_stream_t *stream,
> +                                      svn_fs_fs__change_receiver_t
> +                                        change_receiver,
> +                                      void *change_receiver_baton,
> +                                      apr_pool_t *pool)
> +{
> +  change_t *change;
> +  apr_pool_t *iterpool;
> +
> +  iterpool = svn_pool_create(pool);
> +  do
> +    {
> +      svn_pool_clear(iterpool);
> +
> +      SVN_ERR(read_change(&change, stream, iterpool));
> +      if (change)
> +        SVN_ERR(change_receiver(change_receiver_baton, change, iterpool));
> +    }
> +  while (change);
> +  svn_pool_destroy(iterpool);
> +
> +  return SVN_NO_ERROR;
> +}
> +
>  /* Write a single change entry, path PATH, change CHANGE, and copyfrom
>     string COPYFROM, into the file specified by FILE.  Only include the
>     node kind field if INCLUDE_NODE_KIND is true.  Only include the
> 
> Modified: subversion/trunk/subversion/libsvn_fs_fs/low_level.h
> URL:
> http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_fs_fs/lo
> w_level.h?rev=1604188&r1=1604187&r2=1604188&view=diff
> ==========================================================
> ====================
> --- subversion/trunk/subversion/libsvn_fs_fs/low_level.h (original)
> +++ subversion/trunk/subversion/libsvn_fs_fs/low_level.h Fri Jun 20
> 15:24:02 2014
> @@ -68,6 +68,20 @@ svn_fs_fs__read_changes(apr_array_header
>                          svn_stream_t *stream,
>                          apr_pool_t *pool);
> 
> +typedef svn_error_t *(*svn_fs_fs__change_receiver_t)(
> +  void *baton,
> +  change_t *change,
> +  apr_pool_t *scratch_pool);
> +
> +/* Read all the changes from STREAM and invoke CHANGE_RECEIVER on
> each change.
> +   Do all allocations in POOL. */
> +svn_error_t *
> +svn_fs_fs__read_changes_incrementally(svn_stream_t *stream,
> +                                      svn_fs_fs__change_receiver_t
> +                                        change_receiver,
> +                                      void *change_receiver_baton,
> +                                      apr_pool_t *scratch_pool);
> +
>  /* Write the changed path info from CHANGES in filesystem FS to the
>     output stream STREAM.  You may call this function multiple time on
>     the same stream but the last call should set TERMINATE_LIST to write
> 
> Modified: subversion/trunk/subversion/libsvn_fs_fs/transaction.c
> URL:
> http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_fs_fs/tra
> nsaction.c?rev=1604188&r1=1604187&r2=1604188&view=diff
> ==========================================================
> ====================
> --- subversion/trunk/subversion/libsvn_fs_fs/transaction.c (original)
> +++ subversion/trunk/subversion/libsvn_fs_fs/transaction.c Fri Jun 20
> 15:24:02 2014
> @@ -777,84 +777,72 @@ fold_change(apr_hash_t *changed_paths,
>    return SVN_NO_ERROR;
>  }
> 
> -/* Examine all the changed path entries in CHANGES and store them in
> +/* An implementation of svn_fs_fs__change_receiver_t.
> +   Examine all the changed path entries in CHANGES and store them in
>     *CHANGED_PATHS.  Folding is done to remove redundant or unnecessary
>     data. Do all allocations in POOL. */
>  static svn_error_t *
> -process_changes(apr_hash_t *changed_paths,
> -                apr_array_header_t *changes,
> -                apr_pool_t *pool)
> -{
> -  apr_pool_t *iterpool = svn_pool_create(pool);
> -  int i;
> -
> -  /* Read in the changes one by one, folding them into our local hash
> -     as necessary. */
> -
> -  for (i = 0; i < changes->nelts; ++i)
> -    {
> -      /* The ITERPOOL will be cleared at the end of this function
> -       * since it is only used rarely and for a single hash iterator.
> -       */
> -      change_t *change = APR_ARRAY_IDX(changes, i, change_t *);
> -
> -      SVN_ERR(fold_change(changed_paths, change));
> -
> -      /* Now, if our change was a deletion or replacement, we have to
> -         blow away any changes thus far on paths that are (or, were)
> -         children of this path.
> -         ### i won't bother with another iteration pool here -- at
> -         most we talking about a few extra dups of paths into what
> -         is already a temporary subpool.
> +process_changes(void *baton,
> +                change_t *change,
> +                apr_pool_t *scratch_pool)
> +{
> +  apr_hash_t *changed_paths = baton;
> +
> +  SVN_ERR(fold_change(changed_paths, change));
> +
> +  /* Now, if our change was a deletion or replacement, we have to
> +     blow away any changes thus far on paths that are (or, were)
> +     children of this path.
> +     ### i won't bother with another iteration pool here -- at
> +     most we talking about a few extra dups of paths into what
> +     is already a temporary subpool.
> +  */
> +
> +  if ((change->info.change_kind == svn_fs_path_change_delete)
> +       || (change->info.change_kind == svn_fs_path_change_replace))
> +    {
> +      apr_hash_index_t *hi;
> +      apr_pool_t *iterpool;
> +
> +      /* a potential child path must contain at least 2 more chars
> +         (the path separator plus at least one char for the name).
> +         Also, we should not assume that all paths have been normalized
> +         i.e. some might have trailing path separators.
>        */
> +      apr_ssize_t path_len = change->path.len;
> +      apr_ssize_t min_child_len = path_len == 0
> +                                ? 1
> +                                : change->path.data[path_len-1] == '/'
> +                                    ? path_len + 1
> +                                    : path_len + 2;
> +
> +      /* CAUTION: This is the inner loop of an O(n^2) algorithm.
> +         The number of changes to process may be >> 1000.
> +         Therefore, keep the inner loop as tight as possible.
> +      */
> +      iterpool = svn_pool_create(scratch_pool);
> +      for (hi = apr_hash_first(scratch_pool, changed_paths);
> +           hi;
> +           hi = apr_hash_next(hi))
> +        {
> +          /* KEY is the path. */
> +          const void *path;
> +          apr_ssize_t klen;
> +          apr_hash_this(hi, &path, &klen, NULL);
> 
> -      if ((change->info.change_kind == svn_fs_path_change_delete)
> -           || (change->info.change_kind == svn_fs_path_change_replace))
> -        {
> -          apr_hash_index_t *hi;
> -
> -          /* a potential child path must contain at least 2 more chars
> -             (the path separator plus at least one char for the name).
> -             Also, we should not assume that all paths have been normalized
> -             i.e. some might have trailing path separators.
> -          */
> -          apr_ssize_t path_len = change->path.len;
> -          apr_ssize_t min_child_len = path_len == 0
> -                                    ? 1
> -                                    : change->path.data[path_len-1] == '/'
> -                                        ? path_len + 1
> -                                        : path_len + 2;
> -
> -          /* CAUTION: This is the inner loop of an O(n^2) algorithm.
> -             The number of changes to process may be >> 1000.
> -             Therefore, keep the inner loop as tight as possible.
> -          */
> -          for (hi = apr_hash_first(iterpool, changed_paths);
> -               hi;
> -               hi = apr_hash_next(hi))
> -            {
> -              /* KEY is the path. */
> -              const void *path;
> -              apr_ssize_t klen;
> -              apr_hash_this(hi, &path, &klen, NULL);
> -
> -              /* If we come across a child of our path, remove it.
> -                 Call svn_dirent_is_child only if there is a chance that
> -                 this is actually a sub-path.
> -               */
> -              if (   klen >= min_child_len
> -                  && svn_dirent_is_child(change->path.data, path, iterpool))
> -                apr_hash_set(changed_paths, path, klen, NULL);
> -            }
> -
> -          /* Clear the per-iteration subpool. */
>            svn_pool_clear(iterpool);
> +
> +          /* If we come across a child of our path, remove it.
> +             Call svn_dirent_is_child only if there is a chance that
> +             this is actually a sub-path.
> +           */
> +          if (   klen >= min_child_len
> +              && svn_dirent_is_child(change->path.data, path, iterpool))
> +            apr_hash_set(changed_paths, path, klen, NULL);

This doesn't look like an on-disk path, so it should never use the dirent apis. It should use either a relpath or an fspath api.

And you can just pass a NULL pool to the is_child functions (except for the URI variant), so that might even completely avoid using an iterpool.

>          }
> +      svn_pool_destroy(iterpool);
>      }
> 
> -  /* Destroy the per-iteration subpool. */
> -  svn_pool_destroy(iterpool);
> -
>    return SVN_NO_ERROR;
>  }
> 
> @@ -866,7 +854,6 @@ svn_fs_fs__txn_changes_fetch(apr_hash_t
>  {
>    apr_file_t *file;
>    apr_hash_t *changed_paths = apr_hash_make(pool);
> -  apr_array_header_t *changes;
>    apr_pool_t *scratch_pool = svn_pool_create(pool);
> 
>    SVN_ERR(svn_io_file_open(&file,
> @@ -874,11 +861,11 @@ svn_fs_fs__txn_changes_fetch(apr_hash_t
>                             APR_READ | APR_BUFFERED, APR_OS_DEFAULT,
>                             scratch_pool));
> 
> -  SVN_ERR(svn_fs_fs__read_changes(&changes,
> +  SVN_ERR(svn_fs_fs__read_changes_incrementally(
>                                    svn_stream_from_aprfile2(file, TRUE,
>                                                             scratch_pool),
> +                                  process_changes, changed_paths,
>                                    scratch_pool));
> -  SVN_ERR(process_changes(changed_paths, changes, pool));
>    svn_pool_destroy(scratch_pool);

If the incrementally / streamy function uses a proper iterpool, the subpool called scratch_pool might not be necessary in this block.
(Might still be necessary for usages outside the diff hunks)

	Bert
> 
>    *changed_paths_p = changed_paths;
>