You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@subversion.apache.org by ko...@apache.org on 2017/07/27 09:02:18 UTC

svn commit: r1803144 - /subversion/trunk/subversion/libsvn_client/import.c

Author: kotkov
Date: Thu Jul 27 09:02:18 2017
New Revision: 1803144

URL: http://svn.apache.org/viewvc?rev=1803144&view=rev
Log:
Stream svndiff deltas without creating temporary files during 'svn import'
over HTTP.

Creating the temporary files has a certain overhead (time to write them
and disk space), but what is more important, it prevents the client and
server from simultaneously processing the data, i.e., checksumming and
compressing it.

This can have a significant effect for large binary files, and the new
approach can be up to 2x faster.  The actual improvement depends on the
configuration, but is generally more visible if the server and the client
process data with roughly the same speed.  Several tests in my environment
showed that the import time was reduced from 63s to 32s, and from 8s to 5s
for two (different) large binary files.

Technically, this is achieved by teaching the delta editor driver that
is used in svn_client_import5() to use the new apply_textdelta_stream()
editor callback.

* subversion/libsvn_client/import.c
  (open_txdelta_stream_baton_t): New.
  (txdelta_stream_baton_t, txdelta_next_window, txdelta_md5_digest):
   New.  Used to create a custom txdelta stream that retargets the result
   checksum into a supplied buffer.  Used in open_txdelta_stream().
  (send_file_contents): Move the core of this function, i.e., preparation
   of the delta stream into ...
  (open_txdelta_stream): ...this new svn_txdelta_stream_open_func_t
   implementation, and pass the necessary arguments via baton.  Use
   result and scratch pools.  Note that this function must be restartable,
   as it can be called more than once, for example, if the HTTP server
   decides to do a renegotiation, and we would be forced to resend
   the whole request body.
  (import_file): Use the new apply_textdelta_stream() editor callback.

Modified:
    subversion/trunk/subversion/libsvn_client/import.c

Modified: subversion/trunk/subversion/libsvn_client/import.c
URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_client/import.c?rev=1803144&r1=1803143&r2=1803144&view=diff
==============================================================================
--- subversion/trunk/subversion/libsvn_client/import.c (original)
+++ subversion/trunk/subversion/libsvn_client/import.c Thu Jul 27 09:02:18 2017
@@ -73,48 +73,80 @@ typedef struct import_ctx_t
   apr_hash_t *autoprops;
 } import_ctx_t;
 
+typedef struct txdelta_stream_baton_t
+{
+  svn_txdelta_stream_t *txstream;
+  unsigned char *digest_buffer;
+} txdelta_stream_baton_t;
 
-/* Apply LOCAL_ABSPATH's contents (as a delta against the empty string) to
-   FILE_BATON in EDITOR.  Use POOL for any temporary allocation.
-   PROPERTIES is the set of node properties set on this file.
+static svn_error_t *
+txdelta_next_window(svn_txdelta_window_t **window,
+                    void *baton,
+                    apr_pool_t *pool)
+{
+  struct txdelta_stream_baton_t *b = baton;
 
-   Fill DIGEST with the md5 checksum of the sent file; DIGEST must be
-   at least APR_MD5_DIGESTSIZE bytes long. */
+  SVN_ERR(svn_txdelta_next_window(window, b->txstream, pool));
+  if (!window)
+    {
+      memcpy(b->digest_buffer, svn_txdelta_md5_digest(b->txstream),
+             APR_MD5_DIGESTSIZE);
+    }
+
+  return SVN_NO_ERROR;
+}
+
+static const unsigned char *
+txdelta_md5_digest(void *baton)
+{
+  struct txdelta_stream_baton_t *b = baton;
+
+  return svn_txdelta_md5_digest(b->txstream);
+}
+
+typedef struct open_txdelta_stream_baton_t
+{
+  const char *local_abspath;
+  apr_hash_t *properties;
+  unsigned char checksum_digest[APR_MD5_DIGESTSIZE];
+} open_txdelta_stream_baton_t;
+
+/* Open delta stream *TXDELTA_STREAM_P for the BATON->local_abspath
+   file contents (as a delta against the empty string).  Use POOL for
+   all allocations.  BATON->properties is the set of node properties
+   set on this file.
+
+   Implements svn_txdelta_stream_open_func_t. */
 
 /* ### how does this compare against svn_wc_transmit_text_deltas2() ??? */
 
 static svn_error_t *
-send_file_contents(const char *local_abspath,
-                   void *file_baton,
-                   const svn_delta_editor_t *editor,
-                   apr_hash_t *properties,
-                   unsigned char *digest,
-                   apr_pool_t *pool)
+open_txdelta_stream(svn_txdelta_stream_t **txdelta_stream_p,
+                    void *baton,
+                    apr_pool_t *result_pool,
+                    apr_pool_t *scratch_pool)
 {
+  open_txdelta_stream_baton_t *b = baton;
   svn_stream_t *contents;
-  svn_txdelta_window_handler_t handler;
-  void *handler_baton;
   const svn_string_t *eol_style_val = NULL, *keywords_val = NULL;
   svn_boolean_t special = FALSE;
   svn_subst_eol_style_t eol_style;
   const char *eol;
   apr_hash_t *keywords;
+  svn_txdelta_stream_t *txdelta_stream;
+  txdelta_stream_baton_t *tsb;
 
   /* If there are properties, look for EOL-style and keywords ones. */
-  if (properties)
+  if (b->properties)
     {
-      eol_style_val = apr_hash_get(properties, SVN_PROP_EOL_STYLE,
+      eol_style_val = apr_hash_get(b->properties, SVN_PROP_EOL_STYLE,
                                    sizeof(SVN_PROP_EOL_STYLE) - 1);
-      keywords_val = apr_hash_get(properties, SVN_PROP_KEYWORDS,
+      keywords_val = apr_hash_get(b->properties, SVN_PROP_KEYWORDS,
                                   sizeof(SVN_PROP_KEYWORDS) - 1);
-      if (svn_hash_gets(properties, SVN_PROP_SPECIAL))
+      if (svn_hash_gets(b->properties, SVN_PROP_SPECIAL))
         special = TRUE;
     }
 
-  /* Get an editor func that wants to consume the delta stream. */
-  SVN_ERR(editor->apply_textdelta(file_baton, NULL, pool,
-                                  &handler, &handler_baton));
-
   if (eol_style_val)
     svn_subst_eol_style_from_value(&eol_style, &eol, eol_style_val->data);
   else
@@ -126,19 +158,20 @@ send_file_contents(const char *local_abs
   if (keywords_val)
     SVN_ERR(svn_subst_build_keywords3(&keywords, keywords_val->data,
                                       APR_STRINGIFY(SVN_INVALID_REVNUM),
-                                      "", "", 0, "", pool));
+                                      "", "", 0, "", result_pool));
   else
     keywords = NULL;
 
   if (special)
     {
-      SVN_ERR(svn_subst_read_specialfile(&contents, local_abspath,
-                                         pool, pool));
+      SVN_ERR(svn_subst_read_specialfile(&contents, b->local_abspath,
+                                         result_pool, scratch_pool));
     }
   else
     {
       /* Open the working copy file. */
-      SVN_ERR(svn_stream_open_readonly(&contents, local_abspath, pool, pool));
+      SVN_ERR(svn_stream_open_readonly(&contents, b->local_abspath,
+                                       result_pool, scratch_pool));
 
       /* If we have EOL styles or keywords, then detranslate the file. */
       if (svn_subst_translation_required(eol_style, eol, keywords,
@@ -149,8 +182,8 @@ send_file_contents(const char *local_abs
                                     _("%s property on '%s' contains "
                                       "unrecognized EOL-style '%s'"),
                                     SVN_PROP_EOL_STYLE,
-                                    svn_dirent_local_style(local_abspath,
-                                                           pool),
+                                    svn_dirent_local_style(b->local_abspath,
+                                                           scratch_pool),
                                     eol_style_val->data);
 
           /* We're importing, so translate files with 'native' eol-style to
@@ -164,14 +197,24 @@ send_file_contents(const char *local_abs
                                                  TRUE /* repair */,
                                                  keywords,
                                                  FALSE /* expand */,
-                                                 pool);
+                                                 result_pool);
         }
     }
 
-  /* Send the file's contents to the delta-window handler. */
-  return svn_error_trace(svn_txdelta_send_stream(contents, handler,
-                                                 handler_baton, digest,
-                                                 pool));
+  /* Get the delta stream (delta against the empty string). */
+  svn_txdelta2(&txdelta_stream, svn_stream_empty(result_pool),
+               contents, TRUE, result_pool);
+
+  /* Retarget the checksum digest to our buffer, so that it would
+   * have appropriate lifetime. */
+  tsb = apr_pcalloc(result_pool, sizeof(*tsb));
+  tsb->digest_buffer = b->checksum_digest;
+  tsb->txstream = txdelta_stream;
+
+  *txdelta_stream_p = svn_txdelta_stream_create(tsb, txdelta_next_window,
+                                                txdelta_md5_digest,
+                                                result_pool);
+  return SVN_NO_ERROR;
 }
 
 
@@ -198,10 +241,10 @@ import_file(const svn_delta_editor_t *ed
 {
   void *file_baton;
   const char *mimetype = NULL;
-  unsigned char digest[APR_MD5_DIGESTSIZE];
   const char *text_checksum;
   apr_hash_t* properties;
   apr_hash_index_t *hi;
+  open_txdelta_stream_baton_t otsb = {0};
 
   SVN_ERR(svn_path_check_valid(local_abspath, pool));
 
@@ -262,13 +305,15 @@ import_file(const svn_delta_editor_t *ed
     }
 
   /* Now, transmit the file contents. */
-  SVN_ERR(send_file_contents(local_abspath, file_baton, editor,
-                             properties, digest, pool));
+  otsb.local_abspath = local_abspath;
+  otsb.properties = properties;
+  SVN_ERR(editor->apply_textdelta_stream(editor, file_baton, NULL,
+                                         open_txdelta_stream, &otsb, pool));
+  text_checksum = svn_checksum_to_cstring(
+                    svn_checksum__from_digest_md5(otsb.checksum_digest, pool),
+                    pool);
 
   /* Finally, close the file. */
-  text_checksum =
-    svn_checksum_to_cstring(svn_checksum__from_digest_md5(digest, pool), pool);
-
   return svn_error_trace(editor->close_file(file_baton, text_checksum, pool));
 }