You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@subversion.apache.org by rh...@apache.org on 2014/01/13 15:47:16 UTC

svn commit: r1557736 - /subversion/trunk/subversion/svnrdump/dump_editor.c

Author: rhuijben
Date: Mon Jan 13 14:47:16 2014
New Revision: 1557736

URL: http://svn.apache.org/r1557736
Log:
Reduce memory footprint of the svnrdump dump handling by creating subpools
per file and directory instead of only per revision. Somehow everything was
prepared, but not finished here.

* subversion/svnrdump/dump_editor.c
  (make_dir_baton,
   make_file_baton): Make file/dir pool within parent.
  (delete_entry): Use parent pool for data in parent.
  (add_directory,
   open_directory): Don't use editor pool as scratch pool.
  (close_directory): Destroy directory pool.

  (add_file,
   open_file,
   apply_textdelta): Don't use editor pool as scratch pool.
  (close_file): Destroy file pool.

Modified:
    subversion/trunk/subversion/svnrdump/dump_editor.c

Modified: subversion/trunk/subversion/svnrdump/dump_editor.c
URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/svnrdump/dump_editor.c?rev=1557736&r1=1557735&r2=1557736&view=diff
==============================================================================
--- subversion/trunk/subversion/svnrdump/dump_editor.c (original)
+++ subversion/trunk/subversion/svnrdump/dump_editor.c Mon Jan 13 14:47:16 2014
@@ -180,31 +180,38 @@ make_dir_baton(const char *path,
   struct dump_edit_baton *eb = edit_baton;
   struct dir_baton *new_db = apr_pcalloc(pool, sizeof(*new_db));
   const char *repos_relpath;
+  apr_pool_t *dir_pool;
 
   /* Construct the full path of this node. */
   if (pb)
-    repos_relpath = svn_relpath_canonicalize(path, pool);
+    {
+      dir_pool = svn_pool_create(pb->pool);
+      repos_relpath = svn_relpath_canonicalize(path, dir_pool);
+    }
   else
-    repos_relpath = "";
+    {
+      dir_pool = svn_pool_create(eb->pool);
+      repos_relpath = "";
+    }
 
   /* Strip leading slash from copyfrom_path so that the path is
      canonical and svn_relpath_join can be used */
   if (copyfrom_path)
-    copyfrom_path = svn_relpath_canonicalize(copyfrom_path, pool);
+    copyfrom_path = svn_relpath_canonicalize(copyfrom_path, dir_pool);
 
   new_db->eb = eb;
   new_db->parent_dir_baton = pb;
-  new_db->pool = pool;
+  new_db->pool = dir_pool;
   new_db->repos_relpath = repos_relpath;
   new_db->copyfrom_path = copyfrom_path
-                            ? svn_relpath_canonicalize(copyfrom_path, pool)
+                            ? svn_relpath_canonicalize(copyfrom_path, dir_pool)
                             : NULL;
   new_db->copyfrom_rev = copyfrom_rev;
   new_db->added = added;
   new_db->written_out = FALSE;
-  new_db->props = apr_hash_make(pool);
-  new_db->deleted_props = apr_hash_make(pool);
-  new_db->deleted_entries = apr_hash_make(pool);
+  new_db->props = apr_hash_make(dir_pool);
+  new_db->deleted_props = apr_hash_make(dir_pool);
+  new_db->deleted_entries = apr_hash_make(dir_pool);
 
   return new_db;
 }
@@ -218,14 +225,15 @@ make_file_baton(const char *path,
                 struct dir_baton *pb,
                 apr_pool_t *pool)
 {
-  struct file_baton *new_fb = apr_pcalloc(pool, sizeof(*new_fb));
+  apr_pool_t *file_pool = svn_pool_create(pb->pool);
+  struct file_baton *new_fb = apr_pcalloc(file_pool, sizeof(*new_fb));
 
   new_fb->eb = pb->eb;
   new_fb->parent_dir_baton = pb;
-  new_fb->pool = pool;
-  new_fb->repos_relpath = svn_relpath_canonicalize(path, pool);
-  new_fb->props = apr_hash_make(pool);
-  new_fb->deleted_props = apr_hash_make(pool);
+  new_fb->pool = file_pool;
+  new_fb->repos_relpath = svn_relpath_canonicalize(path, file_pool);
+  new_fb->props = apr_hash_make(file_pool);
+  new_fb->deleted_props = apr_hash_make(file_pool);
   new_fb->is_copy = FALSE;
   new_fb->copyfrom_path = NULL;
   new_fb->copyfrom_rev = SVN_INVALID_REVNUM;
@@ -663,7 +671,7 @@ delete_entry(const char *path,
      to the deleted_entries of the parent directory baton.  That way,
      we can tell (later) an addition from a replacement.  All the real
      deletions get handled in close_directory().  */
-  svn_hash_sets(pb->deleted_entries, apr_pstrdup(pb->eb->pool, path), pb);
+  svn_hash_sets(pb->deleted_entries, apr_pstrdup(pb->pool, path), pb);
 
   return SVN_NO_ERROR;
 }
@@ -686,7 +694,7 @@ add_directory(const char *path,
   SVN_ERR(dump_pending(pb->eb, pool));
 
   new_db = make_dir_baton(path, copyfrom_path, copyfrom_rev, pb->eb,
-                          pb, TRUE, pb->eb->pool);
+                          pb, TRUE, pb->pool);
 
   /* This might be a replacement -- is the path already deleted? */
   val = svn_hash_gets(pb->deleted_entries, path);
@@ -738,12 +746,12 @@ open_directory(const char *path,
     {
       copyfrom_path = svn_relpath_join(pb->copyfrom_path,
                                        svn_relpath_basename(path, NULL),
-                                       pb->eb->pool);
+                                       pb->pool);
       copyfrom_rev = pb->copyfrom_rev;
     }
 
   new_db = make_dir_baton(path, copyfrom_path, copyfrom_rev, pb->eb, pb,
-                          FALSE, pb->eb->pool);
+                          FALSE, pb->pool);
 
   *child_baton = new_db;
   return SVN_NO_ERROR;
@@ -794,7 +802,7 @@ close_directory(void *dir_baton,
     }
 
   /* ### should be unnecessary */
-  apr_hash_clear(db->deleted_entries);
+  svn_pool_destroy(db->pool);
 
   return SVN_NO_ERROR;
 }
@@ -861,7 +869,7 @@ open_file(const char *path,
     {
       fb->copyfrom_path = svn_relpath_join(pb->copyfrom_path,
                                            svn_relpath_basename(path, NULL),
-                                           pb->eb->pool);
+                                           pb->pool);
       fb->copyfrom_rev = pb->copyfrom_rev;
     }
 
@@ -954,7 +962,7 @@ apply_textdelta(void *file_baton, const 
 
   /* Record that there's text to be dumped, and its base checksum. */
   fb->dump_text = TRUE;
-  fb->base_checksum = apr_pstrdup(eb->pool, base_checksum);
+  fb->base_checksum = apr_pstrdup(fb->pool, base_checksum);
 
   return SVN_NO_ERROR;
 }
@@ -1067,6 +1075,8 @@ close_file(void *file_baton,
      dump` */
   SVN_ERR(svn_stream_puts(eb->stream, "\n\n"));
 
+  svn_pool_clear(fb->pool);
+
   return SVN_NO_ERROR;
 }
 



Re: r1557736 - Reduce memory footprint of the svnrdump dump handling...

Posted by Julian Foad <ju...@btopenworld.com>.
Bert Huijben wrote:
> Julian Foad wrote:
>> The [...] driver is already passing in suitably scoped pools
>> [...] I see that ra_serf's implementation of svn_ra_replay[_range]
>> actually is doing so.
>> 
>> Does it make sense to remove this creation and deletion of our own subpools
>> here?
[...]
> 
> Which editor driver does this right? ;)

By inspection, it appears RA-svn and RA-local have done this right since pre-1.0.

> Serf certainly didn't do this 'right' in 1.6-1.8, but I also fixed that on
> the other side when I rewrote the driver for the new xml engine for 1.9.

OK, thanks for explaining. I have updated the log messages for

r1499863 "Convert ra_serfs 'svn_ra_replay' support to the transition based xml parser."
r1557736 "Reduce memory footprint of the svnrdump dump handling..."

to mention this.

> Serf didn't provide short lived scratch pools and we had to fix a few of
> those in patch releases because we had problems of too many files open
> during update, etc. [...]
> 
> If this problem is now properly fixed for all drivers, and it improves the
> code we might want to remove some of those helper pools... 

OK, thanks. r1651690.

> We probably have to reintroduce a similar pool system once we go to editor
> v2, as that doesn't guarantee a depth first drive.
> (But that will probably require a complete redesign of svnrdump anyway)

Yes, pool handling will have to be completely different if Ev2 has a completely different driving order.

- Julian


RE: r1557736 - Reduce memory footprint of the svnrdump dump handling...

Posted by Bert Huijben <be...@qqmail.nl>.

> -----Original Message-----
> From: Julian Foad [mailto:julianfoad@btopenworld.com]
> Sent: woensdag 14 januari 2015 12:00
> To: Bert Huijben
> Cc: dev@subversion.apache.org
> Subject: Re: r1557736 - Reduce memory footprint of the svnrdump dump
> handling...
> 
> Hi Bert.
> 
> The second part of this patch -- "Don't use editor pool as scratch pool"
-- looks
> right.
> 
> The first part -- "Make file/dir pool within parent" -- looks unnecessary.
The
> editor driver is already passing in suitably scoped pools: a per-directory
pool to
> the 'open_directory' and 'add_directory' methods, and a per-file pool to
the
> 'open_file' and 'add_file' methods. At least the driver *should* be doing
so, and
> by inspection I see that ra_serf's implementation of svn_ra_replay[_range]
> actually is doing so.
> 
> Does it make sense to remove this creation and deletion of our own
subpools
> here?
> 
> I noticed this because make_dir_baton's and make_file_baton's doc strings
still
> say "Perform all allocations in POOL" but they don't.

Which editor driver does this right? ;)

Serf certainly didn't do this 'right' in 1.6-1.8, but I also fixed that on
the other side when I rewrote the driver for the new xml engine for 1.9.

Serf didn't provide short lived scratch pools and we had to fix a few of
those in patch releases because we had problems of too many files open
during update, etc.
(A driver with a pool per revision can give you tens of thousands of files
handled in a single pool)


If this problem is now properly fixed for all drivers, and it improves the
code we might want to remove some of those helper pools... 


We probably have to reintroduce a similar pool system once we go to editor
v2, as that doesn't guarantee a depth first drive.
(But that will probably require a complete redesign of svnrdump anyway)

	Bert



Re: r1557736 - Reduce memory footprint of the svnrdump dump handling...

Posted by Julian Foad <ju...@btopenworld.com>.
Hi Bert.

The second part of this patch -- "Don't use editor pool as scratch pool" -- looks right.

The first part -- "Make file/dir pool within parent" -- looks unnecessary. The editor driver is already passing in suitably scoped pools: a per-directory pool to the 'open_directory' and 'add_directory' methods, and a per-file pool to the 'open_file' and 'add_file' methods. At least the driver *should* be doing so, and by inspection I see that ra_serf's implementation of svn_ra_replay[_range] actually is doing so.

Does it make sense to remove this creation and deletion of our own subpools here?

I noticed this because make_dir_baton's and make_file_baton's doc strings still say "Perform all allocations in POOL" but they don't.

- Julian


> Author: rhuijben
> Date: Mon Jan 13 14:47:16 2014
> New Revision: 1557736
> 
> URL: http://svn.apache.org/r1557736
> Log:
> Reduce memory footprint of the svnrdump dump handling by creating subpools
> per file and directory instead of only per revision. Somehow everything was
> prepared, but not finished here.
> 
> * subversion/svnrdump/dump_editor.c
>   (make_dir_baton,
>    make_file_baton): Make file/dir pool within parent.
>   (delete_entry): Use parent pool for data in parent.
>   (add_directory,
>    open_directory): Don't use editor pool as scratch pool.
>   (close_directory): Destroy directory pool.
> 
>   (add_file,
>    open_file,
>    apply_textdelta): Don't use editor pool as scratch pool.
>   (close_file): Destroy file pool.
> 
> Modified:
>     subversion/trunk/subversion/svnrdump/dump_editor.c
> 
> Modified: subversion/trunk/subversion/svnrdump/dump_editor.c
> ==============================================================================
> --- subversion/trunk/subversion/svnrdump/dump_editor.c (original)
> +++ subversion/trunk/subversion/svnrdump/dump_editor.c Mon Jan 13 14:47:16 2014
> @@ -180,31 +180,38 @@ make_dir_baton(const char *path,
>    struct dump_edit_baton *eb = edit_baton;
>    struct dir_baton *new_db = apr_pcalloc(pool, sizeof(*new_db));
>    const char *repos_relpath;
> +  apr_pool_t *dir_pool;
> 
>    /* Construct the full path of this node. */
>    if (pb)
> -    repos_relpath = svn_relpath_canonicalize(path, pool);
> +    {
> +      dir_pool = svn_pool_create(pb->pool);
> +      repos_relpath = svn_relpath_canonicalize(path, dir_pool);
> +    }
>    else
> -    repos_relpath = "";
> +    {
> +      dir_pool = svn_pool_create(eb->pool);
> +      repos_relpath = "";
> +    }
> 
>    /* Strip leading slash from copyfrom_path so that the path is
>       canonical and svn_relpath_join can be used */
>    if (copyfrom_path)
> -    copyfrom_path = svn_relpath_canonicalize(copyfrom_path, pool);
> +    copyfrom_path = svn_relpath_canonicalize(copyfrom_path, dir_pool);
> 
>    new_db->eb = eb;
>    new_db->parent_dir_baton = pb;
> -  new_db->pool = pool;
> +  new_db->pool = dir_pool;
>    new_db->repos_relpath = repos_relpath;
>    new_db->copyfrom_path = copyfrom_path
> -                            ? svn_relpath_canonicalize(copyfrom_path, pool)
> +                            ? svn_relpath_canonicalize(copyfrom_path, dir_pool)
>                              : NULL;
>    new_db->copyfrom_rev = copyfrom_rev;
>    new_db->added = added;
>    new_db->written_out = FALSE;
> -  new_db->props = apr_hash_make(pool);
> -  new_db->deleted_props = apr_hash_make(pool);
> -  new_db->deleted_entries = apr_hash_make(pool);
> +  new_db->props = apr_hash_make(dir_pool);
> +  new_db->deleted_props = apr_hash_make(dir_pool);
> +  new_db->deleted_entries = apr_hash_make(dir_pool);
> 
>    return new_db;
> }
> @@ -218,14 +225,15 @@ make_file_baton(const char *path,
>                  struct dir_baton *pb,
>                  apr_pool_t *pool)
> {
> -  struct file_baton *new_fb = apr_pcalloc(pool, sizeof(*new_fb));
> +  apr_pool_t *file_pool = svn_pool_create(pb->pool);
> +  struct file_baton *new_fb = apr_pcalloc(file_pool, sizeof(*new_fb));
> 
>    new_fb->eb = pb->eb;
>    new_fb->parent_dir_baton = pb;
> -  new_fb->pool = pool;
> -  new_fb->repos_relpath = svn_relpath_canonicalize(path, pool);
> -  new_fb->props = apr_hash_make(pool);
> -  new_fb->deleted_props = apr_hash_make(pool);
> +  new_fb->pool = file_pool;
> +  new_fb->repos_relpath = svn_relpath_canonicalize(path, file_pool);
> +  new_fb->props = apr_hash_make(file_pool);
> +  new_fb->deleted_props = apr_hash_make(file_pool);
>    new_fb->is_copy = FALSE;
>    new_fb->copyfrom_path = NULL;
>    new_fb->copyfrom_rev = SVN_INVALID_REVNUM;
> @@ -663,7 +671,7 @@ delete_entry(const char *path,
>       to the deleted_entries of the parent directory baton.  That way,
>       we can tell (later) an addition from a replacement.  All the real
>       deletions get handled in close_directory().  */
> -  svn_hash_sets(pb->deleted_entries, apr_pstrdup(pb->eb->pool, path), 
> pb);
> +  svn_hash_sets(pb->deleted_entries, apr_pstrdup(pb->pool, path), pb);
> 
>    return SVN_NO_ERROR;
> }
> @@ -686,7 +694,7 @@ add_directory(const char *path,
>    SVN_ERR(dump_pending(pb->eb, pool));
> 
>    new_db = make_dir_baton(path, copyfrom_path, copyfrom_rev, pb->eb,
> -                          pb, TRUE, pb->eb->pool);
> +                          pb, TRUE, pb->pool);
> 
>    /* This might be a replacement -- is the path already deleted? */
>    val = svn_hash_gets(pb->deleted_entries, path);
> @@ -738,12 +746,12 @@ open_directory(const char *path,
>      {
>        copyfrom_path = svn_relpath_join(pb->copyfrom_path,
>                                         svn_relpath_basename(path, NULL),
> -                                       pb->eb->pool);
> +                                       pb->pool);
>        copyfrom_rev = pb->copyfrom_rev;
>      }
> 
>    new_db = make_dir_baton(path, copyfrom_path, copyfrom_rev, pb->eb, pb,
> -                          FALSE, pb->eb->pool);
> +                          FALSE, pb->pool);
> 
>    *child_baton = new_db;
>    return SVN_NO_ERROR;
> @@ -794,7 +802,7 @@ close_directory(void *dir_baton,
>      }
> 
>    /* ### should be unnecessary */
> -  apr_hash_clear(db->deleted_entries);
> +  svn_pool_destroy(db->pool);
> 
>    return SVN_NO_ERROR;
> }
> @@ -861,7 +869,7 @@ open_file(const char *path,
>      {
>        fb->copyfrom_path = svn_relpath_join(pb->copyfrom_path,
>                                             svn_relpath_basename(path, NULL),
> -                                           pb->eb->pool);
> +                                           pb->pool);
>        fb->copyfrom_rev = pb->copyfrom_rev;
>      }
> 
> @@ -954,7 +962,7 @@ apply_textdelta(void *file_baton, const 
> 
>    /* Record that there's text to be dumped, and its base checksum. */
>    fb->dump_text = TRUE;
> -  fb->base_checksum = apr_pstrdup(eb->pool, base_checksum);
> +  fb->base_checksum = apr_pstrdup(fb->pool, base_checksum);
> 
>    return SVN_NO_ERROR;
> }
> @@ -1067,6 +1075,8 @@ close_file(void *file_baton,
>       dump` */
>    SVN_ERR(svn_stream_puts(eb->stream, "\n\n"));
> 
> +  svn_pool_clear(fb->pool);
> +
>    return SVN_NO_ERROR;
> }
>