You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@subversion.apache.org by st...@apache.org on 2014/06/20 17:24:03 UTC

svn commit: r1604188 - in /subversion/trunk/subversion/libsvn_fs_fs: low_level.c low_level.h transaction.c

Author: stsp
Date: Fri Jun 20 15:24:02 2014
New Revision: 1604188

URL: http://svn.apache.org/r1604188
Log:
Improve memory usage while fetching changed paths data from a transaction
by eliminating a full in-memory copy of changed path data.

* subversion/libsvn_fs_fs/low_level.c
  (svn_fs_fs__read_changes_incrementally): New FSFS-internal API which
   invokes a callback for each changed path in a transaction.

* subversion/libsvn_fs_fs/low_level.h
  (svn_fs_fs__read_changes_incrementally): Declare.

* subversion/libsvn_fs_fs/transaction.c
  (svn_fs_fs__txn_changes_fetch): Use svn_fs_fs__read_changes_incrementally(),
   instead of reading a changes_t list into memory and looping over that list.

Modified:
    subversion/trunk/subversion/libsvn_fs_fs/low_level.c
    subversion/trunk/subversion/libsvn_fs_fs/low_level.h
    subversion/trunk/subversion/libsvn_fs_fs/transaction.c

Modified: subversion/trunk/subversion/libsvn_fs_fs/low_level.c
URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_fs_fs/low_level.c?rev=1604188&r1=1604187&r2=1604188&view=diff
==============================================================================
--- subversion/trunk/subversion/libsvn_fs_fs/low_level.c (original)
+++ subversion/trunk/subversion/libsvn_fs_fs/low_level.c Fri Jun 20 15:24:02 2014
@@ -365,6 +365,31 @@ svn_fs_fs__read_changes(apr_array_header
   return SVN_NO_ERROR;
 }
 
+svn_error_t *
+svn_fs_fs__read_changes_incrementally(svn_stream_t *stream,
+                                      svn_fs_fs__change_receiver_t
+                                        change_receiver,
+                                      void *change_receiver_baton,
+                                      apr_pool_t *pool)
+{
+  change_t *change;
+  apr_pool_t *iterpool;
+
+  iterpool = svn_pool_create(pool);
+  do
+    {
+      svn_pool_clear(iterpool);
+
+      SVN_ERR(read_change(&change, stream, iterpool));
+      if (change)
+        SVN_ERR(change_receiver(change_receiver_baton, change, iterpool));
+    }
+  while (change);
+  svn_pool_destroy(iterpool);
+  
+  return SVN_NO_ERROR;
+}
+
 /* Write a single change entry, path PATH, change CHANGE, and copyfrom
    string COPYFROM, into the file specified by FILE.  Only include the
    node kind field if INCLUDE_NODE_KIND is true.  Only include the

Modified: subversion/trunk/subversion/libsvn_fs_fs/low_level.h
URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_fs_fs/low_level.h?rev=1604188&r1=1604187&r2=1604188&view=diff
==============================================================================
--- subversion/trunk/subversion/libsvn_fs_fs/low_level.h (original)
+++ subversion/trunk/subversion/libsvn_fs_fs/low_level.h Fri Jun 20 15:24:02 2014
@@ -68,6 +68,20 @@ svn_fs_fs__read_changes(apr_array_header
                         svn_stream_t *stream,
                         apr_pool_t *pool);
 
+typedef svn_error_t *(*svn_fs_fs__change_receiver_t)(
+  void *baton,
+  change_t *change,
+  apr_pool_t *scratch_pool);
+
+/* Read all the changes from STREAM and invoke CHANGE_RECEIVER on each change.
+   Do all allocations in POOL. */
+svn_error_t *
+svn_fs_fs__read_changes_incrementally(svn_stream_t *stream,
+                                      svn_fs_fs__change_receiver_t
+                                        change_receiver,
+                                      void *change_receiver_baton,
+                                      apr_pool_t *scratch_pool);
+
 /* Write the changed path info from CHANGES in filesystem FS to the
    output stream STREAM.  You may call this function multiple time on
    the same stream but the last call should set TERMINATE_LIST to write

Modified: subversion/trunk/subversion/libsvn_fs_fs/transaction.c
URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_fs_fs/transaction.c?rev=1604188&r1=1604187&r2=1604188&view=diff
==============================================================================
--- subversion/trunk/subversion/libsvn_fs_fs/transaction.c (original)
+++ subversion/trunk/subversion/libsvn_fs_fs/transaction.c Fri Jun 20 15:24:02 2014
@@ -777,84 +777,72 @@ fold_change(apr_hash_t *changed_paths,
   return SVN_NO_ERROR;
 }
 
-/* Examine all the changed path entries in CHANGES and store them in
+/* An implementation of svn_fs_fs__change_receiver_t.
+   Examine all the changed path entries in CHANGES and store them in
    *CHANGED_PATHS.  Folding is done to remove redundant or unnecessary
    data. Do all allocations in POOL. */
 static svn_error_t *
-process_changes(apr_hash_t *changed_paths,
-                apr_array_header_t *changes,
-                apr_pool_t *pool)
-{
-  apr_pool_t *iterpool = svn_pool_create(pool);
-  int i;
-
-  /* Read in the changes one by one, folding them into our local hash
-     as necessary. */
-
-  for (i = 0; i < changes->nelts; ++i)
-    {
-      /* The ITERPOOL will be cleared at the end of this function
-       * since it is only used rarely and for a single hash iterator.
-       */
-      change_t *change = APR_ARRAY_IDX(changes, i, change_t *);
-
-      SVN_ERR(fold_change(changed_paths, change));
-
-      /* Now, if our change was a deletion or replacement, we have to
-         blow away any changes thus far on paths that are (or, were)
-         children of this path.
-         ### i won't bother with another iteration pool here -- at
-         most we talking about a few extra dups of paths into what
-         is already a temporary subpool.
+process_changes(void *baton,
+                change_t *change,
+                apr_pool_t *scratch_pool)
+{
+  apr_hash_t *changed_paths = baton;
+
+  SVN_ERR(fold_change(changed_paths, change));
+
+  /* Now, if our change was a deletion or replacement, we have to
+     blow away any changes thus far on paths that are (or, were)
+     children of this path.
+     ### i won't bother with another iteration pool here -- at
+     most we talking about a few extra dups of paths into what
+     is already a temporary subpool.
+  */
+
+  if ((change->info.change_kind == svn_fs_path_change_delete)
+       || (change->info.change_kind == svn_fs_path_change_replace))
+    {
+      apr_hash_index_t *hi;
+      apr_pool_t *iterpool;
+
+      /* a potential child path must contain at least 2 more chars
+         (the path separator plus at least one char for the name).
+         Also, we should not assume that all paths have been normalized
+         i.e. some might have trailing path separators.
       */
+      apr_ssize_t path_len = change->path.len;
+      apr_ssize_t min_child_len = path_len == 0
+                                ? 1
+                                : change->path.data[path_len-1] == '/'
+                                    ? path_len + 1
+                                    : path_len + 2;
+
+      /* CAUTION: This is the inner loop of an O(n^2) algorithm.
+         The number of changes to process may be >> 1000.
+         Therefore, keep the inner loop as tight as possible.
+      */
+      iterpool = svn_pool_create(scratch_pool);
+      for (hi = apr_hash_first(scratch_pool, changed_paths);
+           hi;
+           hi = apr_hash_next(hi))
+        {
+          /* KEY is the path. */
+          const void *path;
+          apr_ssize_t klen;
+          apr_hash_this(hi, &path, &klen, NULL);
 
-      if ((change->info.change_kind == svn_fs_path_change_delete)
-           || (change->info.change_kind == svn_fs_path_change_replace))
-        {
-          apr_hash_index_t *hi;
-
-          /* a potential child path must contain at least 2 more chars
-             (the path separator plus at least one char for the name).
-             Also, we should not assume that all paths have been normalized
-             i.e. some might have trailing path separators.
-          */
-          apr_ssize_t path_len = change->path.len;
-          apr_ssize_t min_child_len = path_len == 0
-                                    ? 1
-                                    : change->path.data[path_len-1] == '/'
-                                        ? path_len + 1
-                                        : path_len + 2;
-
-          /* CAUTION: This is the inner loop of an O(n^2) algorithm.
-             The number of changes to process may be >> 1000.
-             Therefore, keep the inner loop as tight as possible.
-          */
-          for (hi = apr_hash_first(iterpool, changed_paths);
-               hi;
-               hi = apr_hash_next(hi))
-            {
-              /* KEY is the path. */
-              const void *path;
-              apr_ssize_t klen;
-              apr_hash_this(hi, &path, &klen, NULL);
-
-              /* If we come across a child of our path, remove it.
-                 Call svn_dirent_is_child only if there is a chance that
-                 this is actually a sub-path.
-               */
-              if (   klen >= min_child_len
-                  && svn_dirent_is_child(change->path.data, path, iterpool))
-                apr_hash_set(changed_paths, path, klen, NULL);
-            }
-
-          /* Clear the per-iteration subpool. */
           svn_pool_clear(iterpool);
+
+          /* If we come across a child of our path, remove it.
+             Call svn_dirent_is_child only if there is a chance that
+             this is actually a sub-path.
+           */
+          if (   klen >= min_child_len
+              && svn_dirent_is_child(change->path.data, path, iterpool))
+            apr_hash_set(changed_paths, path, klen, NULL);
         }
+      svn_pool_destroy(iterpool);
     }
 
-  /* Destroy the per-iteration subpool. */
-  svn_pool_destroy(iterpool);
-
   return SVN_NO_ERROR;
 }
 
@@ -866,7 +854,6 @@ svn_fs_fs__txn_changes_fetch(apr_hash_t 
 {
   apr_file_t *file;
   apr_hash_t *changed_paths = apr_hash_make(pool);
-  apr_array_header_t *changes;
   apr_pool_t *scratch_pool = svn_pool_create(pool);
 
   SVN_ERR(svn_io_file_open(&file,
@@ -874,11 +861,11 @@ svn_fs_fs__txn_changes_fetch(apr_hash_t 
                            APR_READ | APR_BUFFERED, APR_OS_DEFAULT,
                            scratch_pool));
 
-  SVN_ERR(svn_fs_fs__read_changes(&changes,
+  SVN_ERR(svn_fs_fs__read_changes_incrementally(
                                   svn_stream_from_aprfile2(file, TRUE,
                                                            scratch_pool),
+                                  process_changes, changed_paths,
                                   scratch_pool));
-  SVN_ERR(process_changes(changed_paths, changes, pool));
   svn_pool_destroy(scratch_pool);
 
   *changed_paths_p = changed_paths;



RE: svn commit: r1604188 - in /subversion/trunk/subversion/libsvn_fs_fs: low_level.c low_level.h transaction.c

Posted by Bert Huijben <be...@qqmail.nl>.

> -----Original Message-----
> From: stsp@apache.org [mailto:stsp@apache.org]
> Sent: vrijdag 20 juni 2014 17:24
> To: commits@subversion.apache.org
> Subject: svn commit: r1604188 - in
> /subversion/trunk/subversion/libsvn_fs_fs: low_level.c low_level.h
> transaction.c
> 
> Author: stsp
> Date: Fri Jun 20 15:24:02 2014
> New Revision: 1604188
> 
> URL: http://svn.apache.org/r1604188
> Log:
> Improve memory usage while fetching changed paths data from a
> transaction
> by eliminating a full in-memory copy of changed path data.
> 
> * subversion/libsvn_fs_fs/low_level.c
>   (svn_fs_fs__read_changes_incrementally): New FSFS-internal API which
>    invokes a callback for each changed path in a transaction.
> 
> * subversion/libsvn_fs_fs/low_level.h
>   (svn_fs_fs__read_changes_incrementally): Declare.
> 
> * subversion/libsvn_fs_fs/transaction.c
>   (svn_fs_fs__txn_changes_fetch): Use
> svn_fs_fs__read_changes_incrementally(),
>    instead of reading a changes_t list into memory and looping over that list.
> 
> Modified:
>     subversion/trunk/subversion/libsvn_fs_fs/low_level.c
>     subversion/trunk/subversion/libsvn_fs_fs/low_level.h
>     subversion/trunk/subversion/libsvn_fs_fs/transaction.c
> 
> Modified: subversion/trunk/subversion/libsvn_fs_fs/low_level.c
> URL:
> http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_fs_fs/lo
> w_level.c?rev=1604188&r1=1604187&r2=1604188&view=diff
> ==========================================================
> ====================
> --- subversion/trunk/subversion/libsvn_fs_fs/low_level.c (original)
> +++ subversion/trunk/subversion/libsvn_fs_fs/low_level.c Fri Jun 20
> 15:24:02 2014
> @@ -365,6 +365,31 @@ svn_fs_fs__read_changes(apr_array_header
>    return SVN_NO_ERROR;
>  }
> 
> +svn_error_t *
> +svn_fs_fs__read_changes_incrementally(svn_stream_t *stream,
> +                                      svn_fs_fs__change_receiver_t
> +                                        change_receiver,
> +                                      void *change_receiver_baton,
> +                                      apr_pool_t *pool)
> +{
> +  change_t *change;
> +  apr_pool_t *iterpool;
> +
> +  iterpool = svn_pool_create(pool);
> +  do
> +    {
> +      svn_pool_clear(iterpool);
> +
> +      SVN_ERR(read_change(&change, stream, iterpool));
> +      if (change)
> +        SVN_ERR(change_receiver(change_receiver_baton, change, iterpool));
> +    }
> +  while (change);
> +  svn_pool_destroy(iterpool);
> +
> +  return SVN_NO_ERROR;
> +}
> +
>  /* Write a single change entry, path PATH, change CHANGE, and copyfrom
>     string COPYFROM, into the file specified by FILE.  Only include the
>     node kind field if INCLUDE_NODE_KIND is true.  Only include the
> 
> Modified: subversion/trunk/subversion/libsvn_fs_fs/low_level.h
> URL:
> http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_fs_fs/lo
> w_level.h?rev=1604188&r1=1604187&r2=1604188&view=diff
> ==========================================================
> ====================
> --- subversion/trunk/subversion/libsvn_fs_fs/low_level.h (original)
> +++ subversion/trunk/subversion/libsvn_fs_fs/low_level.h Fri Jun 20
> 15:24:02 2014
> @@ -68,6 +68,20 @@ svn_fs_fs__read_changes(apr_array_header
>                          svn_stream_t *stream,
>                          apr_pool_t *pool);
> 
> +typedef svn_error_t *(*svn_fs_fs__change_receiver_t)(
> +  void *baton,
> +  change_t *change,
> +  apr_pool_t *scratch_pool);
> +
> +/* Read all the changes from STREAM and invoke CHANGE_RECEIVER on
> each change.
> +   Do all allocations in POOL. */
> +svn_error_t *
> +svn_fs_fs__read_changes_incrementally(svn_stream_t *stream,
> +                                      svn_fs_fs__change_receiver_t
> +                                        change_receiver,
> +                                      void *change_receiver_baton,
> +                                      apr_pool_t *scratch_pool);
> +
>  /* Write the changed path info from CHANGES in filesystem FS to the
>     output stream STREAM.  You may call this function multiple time on
>     the same stream but the last call should set TERMINATE_LIST to write
> 
> Modified: subversion/trunk/subversion/libsvn_fs_fs/transaction.c
> URL:
> http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_fs_fs/tra
> nsaction.c?rev=1604188&r1=1604187&r2=1604188&view=diff
> ==========================================================
> ====================
> --- subversion/trunk/subversion/libsvn_fs_fs/transaction.c (original)
> +++ subversion/trunk/subversion/libsvn_fs_fs/transaction.c Fri Jun 20
> 15:24:02 2014
> @@ -777,84 +777,72 @@ fold_change(apr_hash_t *changed_paths,
>    return SVN_NO_ERROR;
>  }
> 
> -/* Examine all the changed path entries in CHANGES and store them in
> +/* An implementation of svn_fs_fs__change_receiver_t.
> +   Examine all the changed path entries in CHANGES and store them in
>     *CHANGED_PATHS.  Folding is done to remove redundant or unnecessary
>     data. Do all allocations in POOL. */
>  static svn_error_t *
> -process_changes(apr_hash_t *changed_paths,
> -                apr_array_header_t *changes,
> -                apr_pool_t *pool)
> -{
> -  apr_pool_t *iterpool = svn_pool_create(pool);
> -  int i;
> -
> -  /* Read in the changes one by one, folding them into our local hash
> -     as necessary. */
> -
> -  for (i = 0; i < changes->nelts; ++i)
> -    {
> -      /* The ITERPOOL will be cleared at the end of this function
> -       * since it is only used rarely and for a single hash iterator.
> -       */
> -      change_t *change = APR_ARRAY_IDX(changes, i, change_t *);
> -
> -      SVN_ERR(fold_change(changed_paths, change));
> -
> -      /* Now, if our change was a deletion or replacement, we have to
> -         blow away any changes thus far on paths that are (or, were)
> -         children of this path.
> -         ### i won't bother with another iteration pool here -- at
> -         most we talking about a few extra dups of paths into what
> -         is already a temporary subpool.
> +process_changes(void *baton,
> +                change_t *change,
> +                apr_pool_t *scratch_pool)
> +{
> +  apr_hash_t *changed_paths = baton;
> +
> +  SVN_ERR(fold_change(changed_paths, change));
> +
> +  /* Now, if our change was a deletion or replacement, we have to
> +     blow away any changes thus far on paths that are (or, were)
> +     children of this path.
> +     ### i won't bother with another iteration pool here -- at
> +     most we talking about a few extra dups of paths into what
> +     is already a temporary subpool.
> +  */
> +
> +  if ((change->info.change_kind == svn_fs_path_change_delete)
> +       || (change->info.change_kind == svn_fs_path_change_replace))
> +    {
> +      apr_hash_index_t *hi;
> +      apr_pool_t *iterpool;
> +
> +      /* a potential child path must contain at least 2 more chars
> +         (the path separator plus at least one char for the name).
> +         Also, we should not assume that all paths have been normalized
> +         i.e. some might have trailing path separators.
>        */
> +      apr_ssize_t path_len = change->path.len;
> +      apr_ssize_t min_child_len = path_len == 0
> +                                ? 1
> +                                : change->path.data[path_len-1] == '/'
> +                                    ? path_len + 1
> +                                    : path_len + 2;
> +
> +      /* CAUTION: This is the inner loop of an O(n^2) algorithm.
> +         The number of changes to process may be >> 1000.
> +         Therefore, keep the inner loop as tight as possible.
> +      */
> +      iterpool = svn_pool_create(scratch_pool);
> +      for (hi = apr_hash_first(scratch_pool, changed_paths);
> +           hi;
> +           hi = apr_hash_next(hi))
> +        {
> +          /* KEY is the path. */
> +          const void *path;
> +          apr_ssize_t klen;
> +          apr_hash_this(hi, &path, &klen, NULL);
> 
> -      if ((change->info.change_kind == svn_fs_path_change_delete)
> -           || (change->info.change_kind == svn_fs_path_change_replace))
> -        {
> -          apr_hash_index_t *hi;
> -
> -          /* a potential child path must contain at least 2 more chars
> -             (the path separator plus at least one char for the name).
> -             Also, we should not assume that all paths have been normalized
> -             i.e. some might have trailing path separators.
> -          */
> -          apr_ssize_t path_len = change->path.len;
> -          apr_ssize_t min_child_len = path_len == 0
> -                                    ? 1
> -                                    : change->path.data[path_len-1] == '/'
> -                                        ? path_len + 1
> -                                        : path_len + 2;
> -
> -          /* CAUTION: This is the inner loop of an O(n^2) algorithm.
> -             The number of changes to process may be >> 1000.
> -             Therefore, keep the inner loop as tight as possible.
> -          */
> -          for (hi = apr_hash_first(iterpool, changed_paths);
> -               hi;
> -               hi = apr_hash_next(hi))
> -            {
> -              /* KEY is the path. */
> -              const void *path;
> -              apr_ssize_t klen;
> -              apr_hash_this(hi, &path, &klen, NULL);
> -
> -              /* If we come across a child of our path, remove it.
> -                 Call svn_dirent_is_child only if there is a chance that
> -                 this is actually a sub-path.
> -               */
> -              if (   klen >= min_child_len
> -                  && svn_dirent_is_child(change->path.data, path, iterpool))
> -                apr_hash_set(changed_paths, path, klen, NULL);
> -            }
> -
> -          /* Clear the per-iteration subpool. */
>            svn_pool_clear(iterpool);
> +
> +          /* If we come across a child of our path, remove it.
> +             Call svn_dirent_is_child only if there is a chance that
> +             this is actually a sub-path.
> +           */
> +          if (   klen >= min_child_len
> +              && svn_dirent_is_child(change->path.data, path, iterpool))
> +            apr_hash_set(changed_paths, path, klen, NULL);

This doesn't look like an on-disk path, so it should never use the dirent apis. It should use either a relpath or an fspath api.

And you can just pass a NULL pool to the is_child functions (except for the URI variant), so that might even completely avoid using an iterpool.

>          }
> +      svn_pool_destroy(iterpool);
>      }
> 
> -  /* Destroy the per-iteration subpool. */
> -  svn_pool_destroy(iterpool);
> -
>    return SVN_NO_ERROR;
>  }
> 
> @@ -866,7 +854,6 @@ svn_fs_fs__txn_changes_fetch(apr_hash_t
>  {
>    apr_file_t *file;
>    apr_hash_t *changed_paths = apr_hash_make(pool);
> -  apr_array_header_t *changes;
>    apr_pool_t *scratch_pool = svn_pool_create(pool);
> 
>    SVN_ERR(svn_io_file_open(&file,
> @@ -874,11 +861,11 @@ svn_fs_fs__txn_changes_fetch(apr_hash_t
>                             APR_READ | APR_BUFFERED, APR_OS_DEFAULT,
>                             scratch_pool));
> 
> -  SVN_ERR(svn_fs_fs__read_changes(&changes,
> +  SVN_ERR(svn_fs_fs__read_changes_incrementally(
>                                    svn_stream_from_aprfile2(file, TRUE,
>                                                             scratch_pool),
> +                                  process_changes, changed_paths,
>                                    scratch_pool));
> -  SVN_ERR(process_changes(changed_paths, changes, pool));
>    svn_pool_destroy(scratch_pool);

If the incrementally / streamy function uses a proper iterpool, the subpool called scratch_pool might not be necessary in this block.
(Might still be necessary for usages outside the diff hunks)

	Bert
> 
>    *changed_paths_p = changed_paths;
>