You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@subversion.apache.org by ar...@apache.org on 2010/10/03 09:05:05 UTC
svn commit: r1003924 - /subversion/trunk/subversion/svnrdump/dump_editor.c
Author: artagnon
Date: Sun Oct 3 07:05:05 2010
New Revision: 1003924
URL: http://svn.apache.org/viewvc?rev=1003924&view=rev
Log:
svnrdump: dump_editor: Use just one temporary file for all textdelta
application by truncating and re-using it in each iteration. This has
significant performance benefits.
* subversion/svnrdump/dump_editor.c
(dump_edit_baton): Correct the comment about the description of
pool, add a new delta_file variable and note in a comment that
delta_file and delta_abspath should be allocated in the
per-edit-session pool, not in the per-revision pool like the other
variables.
(get_dump_editor): Open the delta file initializing `eb->delta_file`
and `eb->delta_abspath`, allocating them in the per-edit-session
pool. Mark delta_file as a del_on_close, so that it's automatically
cleaned up when the editor is done. All streams in callbacks that
map to it should set disown = TRUE.
(close_file): Instead of opening the delta_file, simply seek to the
beginning. During cleanup, don't close or remove the file; just
truncate it to get it ready for the next textdelta
application. Also, while getting the file stat, don't re-open the
file -- use apr_file_info_get on the open file instead.
(apply_textdelta): Don't create a unique file. Simply map a stream
to the delta file to perform the textdelta application. Disown the
stream and close it explicitly, making sure not to close the delta
file itself.
Patch by: David Barr <da...@cordelta.com>
Helped by: artagnon
Modified:
subversion/trunk/subversion/svnrdump/dump_editor.c
Modified: subversion/trunk/subversion/svnrdump/dump_editor.c
URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/svnrdump/dump_editor.c?rev=1003924&r1=1003923&r2=1003924&view=diff
==============================================================================
--- subversion/trunk/subversion/svnrdump/dump_editor.c (original)
+++ subversion/trunk/subversion/svnrdump/dump_editor.c Sun Oct 3 07:05:05 2010
@@ -45,7 +45,7 @@ struct dump_edit_baton {
/* The output stream we write the dumpfile to */
svn_stream_t *stream;
- /* Pool for per-edit-session allocations */
+ /* Pool for per-revision allocations */
apr_pool_t *pool;
/* Properties which were modified during change_file_prop
@@ -59,9 +59,12 @@ struct dump_edit_baton {
/* Temporary buffer to write property hashes to in human-readable
* form. ### Is this really needed? */
svn_stringbuf_t *propstring;
-
- /* Temporary file to write delta to along with its checksum. */
+
+ /* Temporary file used for textdelta application along with its
+ absolute path; these two variables should be allocated in the
+ per-edit-session pool */
const char *delta_abspath;
+ apr_file_t *delta_file;
/* The checksum of the file the delta is being applied to */
const char *base_checksum;
@@ -711,15 +714,15 @@ apply_textdelta(void *file_baton, const
LDR_DBG(("apply_textdelta %p\n", file_baton));
/* Use a temporary file to measure the text-content-length */
- SVN_ERR(svn_stream_open_unique(&delta_filestream, &(eb->delta_abspath),
- NULL, svn_io_file_del_none, eb->pool,
- pool));
+ delta_filestream = svn_stream_from_aprfile2(eb->delta_file, TRUE, pool);
- /* Prepare to write the delta to the temporary file. */
+ /* Prepare to write the delta to the delta_filestream */
svn_txdelta_to_svndiff2(&(hb->apply_handler), &(hb->apply_baton),
delta_filestream, 0, pool);
+
eb->dump_text = TRUE;
eb->base_checksum = apr_pstrdup(eb->pool, base_checksum);
+ svn_stream_close(delta_filestream);
/* The actual writing takes place when this function has
finished. Set handler and handler_baton now so for
@@ -736,9 +739,10 @@ close_file(void *file_baton,
apr_pool_t *pool)
{
struct dump_edit_baton *eb = file_baton;
- apr_file_t *delta_file;
svn_stream_t *delta_filestream;
apr_finfo_t *info = apr_pcalloc(pool, sizeof(apr_finfo_t));
+ apr_off_t offset;
+ apr_status_t err;
LDR_DBG(("close_file %p\n", file_baton));
@@ -754,7 +758,9 @@ close_file(void *file_baton,
SVN_REPOS_DUMPFILE_TEXT_DELTA
": true\n"));
- SVN_ERR(svn_io_stat(info, eb->delta_abspath, APR_FINFO_SIZE, pool));
+ err = apr_file_info_get(info, APR_FINFO_SIZE, eb->delta_file);
+ if (err)
+ SVN_ERR(svn_error_wrap_apr(err, NULL));
if (eb->base_checksum)
/* Text-delta-base-md5: */
@@ -804,18 +810,19 @@ close_file(void *file_baton,
/* Dump the text */
if (eb->dump_text)
{
- /* Open the temporary file, map it to a stream, copy
- the stream to eb->stream, close and delete the
- file */
- SVN_ERR(svn_io_file_open(&delta_file, eb->delta_abspath, APR_READ,
- APR_OS_DEFAULT, pool));
- delta_filestream = svn_stream_from_aprfile2(delta_file, TRUE, pool);
+ /* Seek to the beginning of the delta file, map it to a stream,
+ and copy the stream to eb->stream. Then close the stream and
+ truncate the file so we can reuse it for the next textdelta
+ application. Note that the file isn't created, opened or
+ closed here */
+ offset = 0;
+ SVN_ERR(svn_io_file_seek(eb->delta_file, APR_SET, &offset, pool));
+ delta_filestream = svn_stream_from_aprfile2(eb->delta_file, TRUE, pool);
SVN_ERR(svn_stream_copy3(delta_filestream, eb->stream, NULL, NULL, pool));
/* Cleanup */
- SVN_ERR(svn_io_file_close(delta_file, pool));
SVN_ERR(svn_stream_close(delta_filestream));
- SVN_ERR(svn_io_remove_file2(eb->delta_abspath, TRUE, pool));
+ SVN_ERR(svn_io_file_trunc(eb->delta_file, 0, pool));
eb->dump_text = FALSE;
}
@@ -846,7 +853,14 @@ get_dump_editor(const svn_delta_editor_t
/* Create a special per-revision pool */
eb->pool = svn_pool_create(pool);
-
+
+ /* Open a unique temporary file for all textdelta applications in
+ this edit session. The file is automatically closed and cleaned
+ up when the edit session is done. */
+ SVN_ERR(svn_io_open_uniquely_named(&(eb->delta_file), &(eb->delta_abspath),
+ NULL, "svnrdump", NULL,
+ svn_io_file_del_on_close, pool, pool));
+
de = svn_delta_default_editor(pool);
de->open_root = open_root;
de->delete_entry = delete_entry;
Re: svn commit: r1003924 -
/subversion/trunk/subversion/svnrdump/dump_editor.c
Posted by Ramkumar Ramachandra <ar...@gmail.com>.
Hi Bert,
Bert Huijben writes:
> Do you really need a 'named' file here?
>
> The named file algorithm of svn_io_open_uniquely_named() is very slow compared to the 'just get me a temporary file' implemented by svn_io_open_unique_file3(). Especially when you need more than a few tempfiles with the same basename. (It tries the generated name itself, then it tries a 1 suffix, a 2 suffix, etc. Until it finds a name that doesn't exist)
Ah, I see. The named temporary made debugging easier initially- I
could easily see if the file was being removed or not, but I suppose
it doesn't matter anymore. Fixed in r1003958. Thanks for the
suggestion.
-- Ram
RE: svn commit: r1003924 - /subversion/trunk/subversion/svnrdump/dump_editor.c
Posted by Bert Huijben <be...@qqmail.nl>.
> -----Original Message-----
> From: artagnon@apache.org [mailto:artagnon@apache.org]
> Sent: zondag 3 oktober 2010 9:05
> To: commits@subversion.apache.org
> Subject: svn commit: r1003924 -
> /subversion/trunk/subversion/svnrdump/dump_editor.c
>
> Author: artagnon
> Date: Sun Oct 3 07:05:05 2010
> New Revision: 1003924
>
> URL: http://svn.apache.org/viewvc?rev=1003924&view=rev
> Log:
> svnrdump: dump_editor: Use just one temporary file for all textdelta
> application by truncating and re-using it in each iteration. This has
> significant performance benefits.
>
> * subversion/svnrdump/dump_editor.c
>
> (dump_edit_baton): Correct the comment about the description of
> pool, add a new delta_file variable and note in a comment that
> delta_file and delta_abspath should be allocated in the
> per-edit-session pool, not in the per-revision pool like the other
> variables.
>
> (get_dump_editor): Open the delta file initializing `eb->delta_file`
> and `eb->delta_abspath`, allocating them in the per-edit-session
> pool. Mark delta_file as a del_on_close, so that it's automatically
> cleaned up when the editor is done. All streams in callbacks that
> map to it should set disown = TRUE.
>
> (close_file): Instead of opening the delta_file, simply seek to the
> beginning. During cleanup, don't close or remove the file; just
> truncate it to get it ready for the next textdelta
> application. Also, while getting the file stat, don't re-open the
> file -- use apr_file_info_get on the open file instead.
>
> (apply_textdelta): Don't create a unique file. Simply map a stream
> to the delta file to perform the textdelta application. Disown the
> stream and close it explicitly, making sure not to close the delta
> file itself.
>
> Patch by: David Barr <da...@cordelta.com>
> Helped by: artagnon
>
Looks good; nice optimization :)
>
> @@ -846,7 +853,14 @@ get_dump_editor(const svn_delta_editor_t
>
> /* Create a special per-revision pool */
> eb->pool = svn_pool_create(pool);
> -
> +
> + /* Open a unique temporary file for all textdelta applications in
> + this edit session. The file is automatically closed and cleaned
> + up when the edit session is done. */
> + SVN_ERR(svn_io_open_uniquely_named(&(eb->delta_file), &(eb-
> >delta_abspath),
> + NULL, "svnrdump", NULL,
> + svn_io_file_del_on_close, pool,
> pool));
(This code is probably copied from the old code; but anyway:)
Do you really need a 'named' file here?
The named file algorithm of svn_io_open_uniquely_named() is very slow compared to the 'just get me a temporary file' implemented by svn_io_open_unique_file3(). Especially when you need more than a few tempfiles with the same basename. (It tries the generated name itself, then it tries a 1 suffix, a 2 suffix, etc. Until it finds a name that doesn't exist)
Bert