You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@subversion.apache.org by ar...@apache.org on 2010/10/03 09:05:05 UTC

svn commit: r1003924 - /subversion/trunk/subversion/svnrdump/dump_editor.c

Author: artagnon
Date: Sun Oct  3 07:05:05 2010
New Revision: 1003924

URL: http://svn.apache.org/viewvc?rev=1003924&view=rev
Log:
svnrdump: dump_editor: Use just one temporary file for all textdelta
application by truncating and re-using it in each iteration. This has
significant performance benefits.

* subversion/svnrdump/dump_editor.c

  (dump_edit_baton): Correct the comment about the description of
  pool, add a new delta_file variable and note in a comment that
  delta_file and delta_abspath should be allocated in the
  per-edit-session pool, not in the per-revision pool like the other
  variables. 

  (get_dump_editor): Open the delta file initializing `eb->delta_file`
  and `eb->delta_abspath`, allocating them in the per-edit-session
  pool. Mark delta_file as a del_on_close, so that it's automatically
  cleaned up when the editor is done. All streams in callbacks that
  map to it should set disown = TRUE.

  (close_file): Instead of opening the delta_file, simply seek to the
  beginning. During cleanup, don't close or remove the file; just
  truncate it to get it ready for the next textdelta
  application. Also, while getting the file stat, don't re-open the
  file -- use apr_file_info_get on the open file instead.

  (apply_textdelta): Don't create a unique file. Simply map a stream
  to the delta file to perform the textdelta application. Disown the
  stream and close it explicitly, making sure not to close the delta
  file itself.

Patch by: David Barr <da...@cordelta.com>
Helped by: artagnon

Modified:
    subversion/trunk/subversion/svnrdump/dump_editor.c

Modified: subversion/trunk/subversion/svnrdump/dump_editor.c
URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/svnrdump/dump_editor.c?rev=1003924&r1=1003923&r2=1003924&view=diff
==============================================================================
--- subversion/trunk/subversion/svnrdump/dump_editor.c (original)
+++ subversion/trunk/subversion/svnrdump/dump_editor.c Sun Oct  3 07:05:05 2010
@@ -45,7 +45,7 @@ struct dump_edit_baton {
   /* The output stream we write the dumpfile to */
   svn_stream_t *stream;
 
-  /* Pool for per-edit-session allocations */
+  /* Pool for per-revision allocations */
   apr_pool_t *pool;
 
   /* Properties which were modified during change_file_prop
@@ -59,9 +59,12 @@ struct dump_edit_baton {
   /* Temporary buffer to write property hashes to in human-readable
    * form. ### Is this really needed? */
   svn_stringbuf_t *propstring;
-   
-  /* Temporary file to write delta to along with its checksum. */
+
+  /* Temporary file used for textdelta application along with its
+     absolute path; these two variables should be allocated in the
+     per-edit-session pool */
   const char *delta_abspath;
+  apr_file_t *delta_file;
 
   /* The checksum of the file the delta is being applied to */
   const char *base_checksum;
@@ -711,15 +714,15 @@ apply_textdelta(void *file_baton, const 
   LDR_DBG(("apply_textdelta %p\n", file_baton));
 
   /* Use a temporary file to measure the text-content-length */
-  SVN_ERR(svn_stream_open_unique(&delta_filestream, &(eb->delta_abspath),
-                                 NULL, svn_io_file_del_none, eb->pool,
-                                 pool));
+  delta_filestream = svn_stream_from_aprfile2(eb->delta_file, TRUE, pool);
 
-  /* Prepare to write the delta to the temporary file. */
+  /* Prepare to write the delta to the delta_filestream */
   svn_txdelta_to_svndiff2(&(hb->apply_handler), &(hb->apply_baton),
                           delta_filestream, 0, pool);
+
   eb->dump_text = TRUE;
   eb->base_checksum = apr_pstrdup(eb->pool, base_checksum);
+  svn_stream_close(delta_filestream);
 
   /* The actual writing takes place when this function has
      finished. Set handler and handler_baton now so for
@@ -736,9 +739,10 @@ close_file(void *file_baton,
            apr_pool_t *pool)
 {
   struct dump_edit_baton *eb = file_baton;
-  apr_file_t *delta_file;
   svn_stream_t *delta_filestream;
   apr_finfo_t *info = apr_pcalloc(pool, sizeof(apr_finfo_t));
+  apr_off_t offset;
+  apr_status_t err;
 
   LDR_DBG(("close_file %p\n", file_baton));
 
@@ -754,7 +758,9 @@ close_file(void *file_baton,
                                 SVN_REPOS_DUMPFILE_TEXT_DELTA
                                 ": true\n"));
 
-      SVN_ERR(svn_io_stat(info, eb->delta_abspath, APR_FINFO_SIZE, pool));
+      err = apr_file_info_get(info, APR_FINFO_SIZE, eb->delta_file);
+      if (err)
+        SVN_ERR(svn_error_wrap_apr(err, NULL));
 
       if (eb->base_checksum)
         /* Text-delta-base-md5: */
@@ -804,18 +810,19 @@ close_file(void *file_baton,
   /* Dump the text */
   if (eb->dump_text)
     {
-      /* Open the temporary file, map it to a stream, copy
-         the stream to eb->stream, close and delete the
-         file */
-      SVN_ERR(svn_io_file_open(&delta_file, eb->delta_abspath, APR_READ,
-                               APR_OS_DEFAULT, pool));
-      delta_filestream = svn_stream_from_aprfile2(delta_file, TRUE, pool);
+      /* Seek to the beginning of the delta file, map it to a stream,
+         and copy the stream to eb->stream. Then close the stream and
+         truncate the file so we can reuse it for the next textdelta
+         application. Note that the file isn't created, opened or
+         closed here */
+      offset = 0;
+      SVN_ERR(svn_io_file_seek(eb->delta_file, APR_SET, &offset, pool));
+      delta_filestream = svn_stream_from_aprfile2(eb->delta_file, TRUE, pool);
       SVN_ERR(svn_stream_copy3(delta_filestream, eb->stream, NULL, NULL, pool));
 
       /* Cleanup */
-      SVN_ERR(svn_io_file_close(delta_file, pool));
       SVN_ERR(svn_stream_close(delta_filestream));
-      SVN_ERR(svn_io_remove_file2(eb->delta_abspath, TRUE, pool));
+      SVN_ERR(svn_io_file_trunc(eb->delta_file, 0, pool));
       eb->dump_text = FALSE;
     }
 
@@ -846,7 +853,14 @@ get_dump_editor(const svn_delta_editor_t
 
   /* Create a special per-revision pool */
   eb->pool = svn_pool_create(pool);
-  
+
+  /* Open a unique temporary file for all textdelta applications in
+     this edit session. The file is automatically closed and cleaned
+     up when the edit session is done. */
+  SVN_ERR(svn_io_open_uniquely_named(&(eb->delta_file), &(eb->delta_abspath),
+                                     NULL, "svnrdump", NULL,
+                                     svn_io_file_del_on_close, pool, pool));
+
   de = svn_delta_default_editor(pool);
   de->open_root = open_root;
   de->delete_entry = delete_entry;



Re: svn commit: r1003924 - /subversion/trunk/subversion/svnrdump/dump_editor.c

Posted by Ramkumar Ramachandra <ar...@gmail.com>.
Hi Bert,

Bert Huijben writes:
> Do you really need a 'named' file here?
> 
> The named file algorithm of svn_io_open_uniquely_named() is very slow compared to the 'just get me a temporary file' implemented by svn_io_open_unique_file3(). Especially when you need more than a few tempfiles with the same basename. (It tries the generated name itself, then it tries a 1 suffix, a 2 suffix, etc. Until it finds a name that doesn't exist)

Ah, I see. The named temporary made debugging easier initially- I
could easily see if the file was being removed or not, but I suppose
it doesn't matter anymore. Fixed in r1003958. Thanks for the
suggestion.

-- Ram

RE: svn commit: r1003924 - /subversion/trunk/subversion/svnrdump/dump_editor.c

Posted by Bert Huijben <be...@qqmail.nl>.

> -----Original Message-----
> From: artagnon@apache.org [mailto:artagnon@apache.org]
> Sent: zondag 3 oktober 2010 9:05
> To: commits@subversion.apache.org
> Subject: svn commit: r1003924 -
> /subversion/trunk/subversion/svnrdump/dump_editor.c
> 
> Author: artagnon
> Date: Sun Oct  3 07:05:05 2010
> New Revision: 1003924
> 
> URL: http://svn.apache.org/viewvc?rev=1003924&view=rev
> Log:
> svnrdump: dump_editor: Use just one temporary file for all textdelta
> application by truncating and re-using it in each iteration. This has
> significant performance benefits.
> 
> * subversion/svnrdump/dump_editor.c
> 
>   (dump_edit_baton): Correct the comment about the description of
>   pool, add a new delta_file variable and note in a comment that
>   delta_file and delta_abspath should be allocated in the
>   per-edit-session pool, not in the per-revision pool like the other
>   variables.
> 
>   (get_dump_editor): Open the delta file initializing `eb->delta_file`
>   and `eb->delta_abspath`, allocating them in the per-edit-session
>   pool. Mark delta_file as a del_on_close, so that it's automatically
>   cleaned up when the editor is done. All streams in callbacks that
>   map to it should set disown = TRUE.
> 
>   (close_file): Instead of opening the delta_file, simply seek to the
>   beginning. During cleanup, don't close or remove the file; just
>   truncate it to get it ready for the next textdelta
>   application. Also, while getting the file stat, don't re-open the
>   file -- use apr_file_info_get on the open file instead.
> 
>   (apply_textdelta): Don't create a unique file. Simply map a stream
>   to the delta file to perform the textdelta application. Disown the
>   stream and close it explicitly, making sure not to close the delta
>   file itself.
> 
> Patch by: David Barr <da...@cordelta.com>
> Helped by: artagnon
> 

Looks good; nice optimization :)

> 
> @@ -846,7 +853,14 @@ get_dump_editor(const svn_delta_editor_t
> 
>    /* Create a special per-revision pool */
>    eb->pool = svn_pool_create(pool);
> -
> +
> +  /* Open a unique temporary file for all textdelta applications in
> +     this edit session. The file is automatically closed and cleaned
> +     up when the edit session is done. */
> +  SVN_ERR(svn_io_open_uniquely_named(&(eb->delta_file), &(eb-
> >delta_abspath),
> +                                     NULL, "svnrdump", NULL,
> +                                     svn_io_file_del_on_close, pool,
> pool));

(This code is probably copied from the old code; but anyway:)

Do you really need a 'named' file here?

The named file algorithm of svn_io_open_uniquely_named() is very slow compared to the 'just get me a temporary file' implemented by svn_io_open_unique_file3(). Especially when you need more than a few tempfiles with the same basename. (It tries the generated name itself, then it tries a 1 suffix, a 2 suffix, etc. Until it finds a name that doesn't exist)

	Bert