You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@subversion.apache.org by st...@apache.org on 2010/11/04 21:52:40 UTC

svn commit: r1031234 - in /subversion/trunk: ./ subversion/libsvn_subr/subst.c

Author: stsp
Date: Thu Nov  4 20:52:40 2010
New Revision: 1031234

URL: http://svn.apache.org/viewvc?rev=1031234&view=rev
Log:
Merge the r986453 group from the performance branch.

See the branch for original log messages.

The STATUS entry for this group was:
* r986453, r1029092, r1029335, r1030763
 An even larger change: Speed up file translation by further optimizing
 the search for "interesting chars" and replacing EOLs only if they
 have actually changed (allowing for larger chunks of data per write
 output operation). The third patch fixes a svn_tristate_t comparison
 issue.
 Note:
   r1029092 and r1030763 are docstring changes.
 Justification:
   Apart from the physical I/O and the MD5 translation, this is the
   most CPU-demanding part of an svn export. This patch more than
   doubles its performance. It should also speed up svn checkout by
   the same amount (not factor) of time.

Modified:
    subversion/trunk/   (props changed)
    subversion/trunk/subversion/libsvn_subr/subst.c

Propchange: subversion/trunk/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Thu Nov  4 20:52:40 2010
@@ -23,7 +23,7 @@
 /subversion/branches/log-g-performance:870941-871032
 /subversion/branches/merge-skips-obstructions:874525-874615
 /subversion/branches/nfc-nfd-aware-client:870276,870376
-/subversion/branches/performance:982355,983764,983766,984927,984984,985014,985037,985046,985472,985477,985482,985500,985606,985669,987888,987893,995507,995603,1001413,1025660,1028092,1028094,1028104,1029038,1029042,1029090
+/subversion/branches/performance:982355,983764,983766,984927,984984,985014,985037,985046,985472,985477,985482,985500,985606,985669,986453,987888,987893,995507,995603,1001413,1025660,1028092,1028094,1028104,1029038,1029042,1029090,1029092,1029335,1030763
 /subversion/branches/ra_serf-digest-authn:875693-876404
 /subversion/branches/reintegrate-improvements:873853-874164
 /subversion/branches/subtree-mergeinfo:876734-878766

Modified: subversion/trunk/subversion/libsvn_subr/subst.c
URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_subr/subst.c?rev=1031234&r1=1031233&r2=1031234&view=diff
==============================================================================
--- subversion/trunk/subversion/libsvn_subr/subst.c (original)
+++ subversion/trunk/subversion/libsvn_subr/subst.c Thu Nov  4 20:52:40 2010
@@ -798,6 +798,10 @@ struct translation_baton
   /* Length of the EOL style string found in the chunk-source,
      or zero if none encountered yet */
   apr_size_t src_format_len;
+
+  /* if this is svn_tristate_false, translate_newline() will be called
+     for every newline in the file */
+  svn_tristate_t nl_translation_skippable;
 };
 
 
@@ -828,6 +832,7 @@ create_translation_baton(const char *eol
   b->newline_off = 0;
   b->keyword_off = 0;
   b->src_format_len = 0;
+  b->nl_translation_skippable = svn_tristate_unknown;
 
   /* Most characters don't start translation actions.
    * Mark those that do depending on the parameters we got. */
@@ -843,6 +848,38 @@ create_translation_baton(const char *eol
   return b;
 }
 
+/* Return TRUE if the EOL starting at BUF matches the eol_str member of B.
+ * Be aware of special cases like "\n\r\n" and "\n\n\r". For sequences like
+ * "\n$" (an EOL followed by a keyword), the result will be FALSE since it is
+ * more efficient to handle that special case implicitly in the calling code
+ * by exiting the quick scan loop.
+ * The caller must ensure that buf[0] and buf[1] refer to valid memory
+ * locations.
+ */
+static APR_INLINE svn_boolean_t
+eol_unchanged(struct translation_baton *b,
+              const char *buf)
+{
+  /* If the first byte doesn't match, the whole EOL won't.
+   * This does also handle the (certainly invalid) case that 
+   * eol_str would be an empty string.
+   */
+  if (buf[0] != b->eol_str[0])
+    return FALSE;
+
+  /* two-char EOLs must be a full match */
+  if (b->eol_str_len == 2)
+    return buf[1] == b->eol_str[1];
+
+  /* The first char matches the required 1-byte EOL. 
+   * But maybe, buf[] contains a 2-byte EOL?
+   * In that case, the second byte will be interesting
+   * and not be another EOL of its own.
+   */
+  return !b->interesting[(unsigned char)buf[1]] || buf[0] == buf[1];
+}
+
+
 /* Translate eols and keywords of a 'chunk' of characters BUF of size BUFLEN
  * according to the settings and state stored in baton B.
  *
@@ -948,19 +985,71 @@ translate_chunk(svn_stream_t *dst,
               continue;
             }
 
-          /* We're in the boring state; look for interest characters. */
-          len = 0;
+          /* translate_newline will modify the baton for src_format_len==0
+             or may return an error if b->repair is FALSE.  In all other
+             cases, we can skip the newline translation as long as source
+             EOL format and actual EOL format match.  If there is a 
+             mismatch, translate_newline will be called regardless of 
+             nl_translation_skippable. 
+           */
+          if (b->nl_translation_skippable == svn_tristate_unknown &&
+              b->src_format_len > 0)
+            {
+              /* test whether translate_newline may return an error */
+              if (b->eol_str_len == b->src_format_len &&
+                  strncmp(b->eol_str, b->src_format, b->eol_str_len) == 0)
+                b->nl_translation_skippable = svn_tristate_true;
+              else if (b->repair) 
+                b->nl_translation_skippable = svn_tristate_true;
+              else
+                b->nl_translation_skippable = svn_tristate_false;
+            }
+
+          /* We're in the boring state; look for interest characters.
+             Offset len such that it will become 0 in the first iteration. 
+           */
+          len = 0 - b->eol_str_len;
+
+          /* Look for the next EOL (or $) that actually needs translation.
+             Stop there or at EOF, whichever is encountered first.
+           */
+          do
+            {
+              /* skip current EOL */
+              len += b->eol_str_len;
+
+              /* Check 4 bytes at once to allow for efficient pipelining
+                 and to reduce loop condition overhead. */
+              while ((p + len + 4) <= end)
+                {
+                  char sum = interesting[(unsigned char)p[len]]
+                           | interesting[(unsigned char)p[len+1]]
+                           | interesting[(unsigned char)p[len+2]]
+                           | interesting[(unsigned char)p[len+3]];
+
+                  if (sum != 0)
+                    break;
+
+                  len += 4;
+                }
+
+               /* Found an interesting char or EOF in the next 4 bytes. 
+                  Find its exact position. */
+               while ((p + len) < end && !interesting[(unsigned char)p[len]])
+                 ++len;
+            }
+          while (b->nl_translation_skippable == svn_tristate_true &&   /* can skip EOLs at all */
+                 p + len + 2 < end &&             /* not too close to EOF */
+                 eol_unchanged (b, p + len));     /* EOL format already ok */
 
-          /* We wanted memcspn(), but lacking that, the loop below has
-             the same effect. Also, skip NUL characters.
-          */
           while ((p + len) < end && !interesting[(unsigned char)p[len]])
             len++;
 
           if (len)
-            SVN_ERR(translate_write(dst, p, len));
-
-          p += len;
+            {
+              SVN_ERR(translate_write(dst, p, len));
+              p += len;
+            }
 
           /* Set up state according to the interesting character, if any. */
           if (p < end)