You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@subversion.apache.org by st...@apache.org on 2010/08/15 01:37:47 UTC

svn commit: r985601 - /subversion/branches/performance/subversion/libsvn_subr/stream.c

Author: stefan2
Date: Sat Aug 14 23:37:46 2010
New Revision: 985601

URL: http://svn.apache.org/viewvc?rev=985601&view=rev
Log:
Management data in FSFS is stored as text lines. This patch speeds up
stream_readline by fetching larger chunks from the stream instead of
individual bytes. If the string does not support the features necessary
for that, fall back to the standard (=previously existing) implementation.

* subversion/libsvn_subr/stream.c
  (stream_readline_bytewise): renamed from the former stream_readline
  (stream_readline_chunky): new, faster alternative implementation
  (stream_readline): invoke the fastest suitable implementation

Modified:
    subversion/branches/performance/subversion/libsvn_subr/stream.c

Modified: subversion/branches/performance/subversion/libsvn_subr/stream.c
URL: http://svn.apache.org/viewvc/subversion/branches/performance/subversion/libsvn_subr/stream.c?rev=985601&r1=985600&r2=985601&view=diff
==============================================================================
--- subversion/branches/performance/subversion/libsvn_subr/stream.c (original)
+++ subversion/branches/performance/subversion/libsvn_subr/stream.c Sat Aug 14 23:37:46 2010
@@ -296,12 +296,12 @@ scan_eol(const char **eol, svn_stream_t 
  * If DETECT_EOL is FALSE, *EOL must point to the desired end-of-line
  * indicator.  STRINGBUF is allocated in POOL. */
 static svn_error_t *
-stream_readline(svn_stringbuf_t **stringbuf,
-                svn_boolean_t *eof,
-                const char **eol,
-                svn_stream_t *stream,
-                svn_boolean_t detect_eol,
-                apr_pool_t *pool)
+stream_readline_bytewise(svn_stringbuf_t **stringbuf,
+                         svn_boolean_t *eof,
+                         const char **eol,
+                         svn_stream_t *stream,
+                         svn_boolean_t detect_eol,
+                         apr_pool_t *pool)
 {
   svn_stringbuf_t *str;
   const char *eol_str;
@@ -313,7 +313,9 @@ stream_readline(svn_stringbuf_t **string
      optimize for the 90% case.  90% of the time, we can avoid the
      stringbuf ever having to realloc() itself if we start it out at
      80 chars.  */
-  str = svn_stringbuf_create_ensure(80, pool);
+#define LINE_CHUNK_SIZE 80
+
+  str = svn_stringbuf_create_ensure(LINE_CHUNK_SIZE, pool);
 
   if (detect_eol)
     {
@@ -360,6 +362,170 @@ stream_readline(svn_stringbuf_t **string
   return SVN_NO_ERROR;
 }
 
+static svn_error_t *
+stream_readline_chunky(svn_stringbuf_t **stringbuf,
+                       svn_boolean_t *eof,
+                       const char *eol,
+                       svn_stream_t *stream,
+                       apr_pool_t *pool)
+{
+  /* Read larger chunks of data at once into this buffer and scan
+   * that for EOL. A good chunk size should be about 80 chars since
+   * most text lines will be shorter. However, don't use a much
+   * larger value because filling the buffer from the stream takes
+   * time as well.
+   */
+  char buffer[LINE_CHUNK_SIZE+1];
+
+  /* variables */
+  svn_stream_mark_t *mark;
+  apr_size_t numbytes;
+  const char *eol_pos;
+  apr_off_t total_parsed = 0;
+
+  /* invariant for this call */
+  size_t eol_len = strlen(eol);
+
+  /* Remember the line start so this plus the line length will be
+   * the position to move to at the end of this function.
+   */
+  SVN_ERR(svn_stream_mark(stream, &mark, pool));
+
+  /* Read the first chunk. */
+  numbytes = LINE_CHUNK_SIZE;
+  SVN_ERR(svn_stream_read(stream, buffer, &numbytes));
+  buffer[numbytes] = '\0';
+
+  /* If we hit EOF, there are a number of possible special cases
+   * that we don't bother dealing with here. So, use the standard
+   * readline implementation to handle them.
+   */
+  if (numbytes < LINE_CHUNK_SIZE)
+    {
+      /* Move stream read pointer back to the initial position. */
+      SVN_ERR(svn_stream_seek(stream, mark));
+      return stream_readline_bytewise(stringbuf,
+                                      eof,
+                                      &eol,
+                                      stream,
+                                      FALSE,
+                                      pool);
+    }
+
+  /* Look for the EOL in this first chunk. If we find it, we are done here.
+   */
+  eol_pos = strstr(buffer, eol);
+  if (eol_pos != NULL)
+    {
+      *stringbuf = svn_stringbuf_ncreate(buffer, eol_pos - buffer, pool);
+      total_parsed = eol_pos - buffer + eol_len;
+    }
+  else
+    {
+      /* A larger buffer for the string is needed. */
+      svn_stringbuf_t *str;
+      str = svn_stringbuf_create_ensure(2*LINE_CHUNK_SIZE, pool);
+      svn_stringbuf_appendbytes(str, buffer, numbytes);
+
+      /* Loop reading chunks until an EOL was found. If we hit EOF, fall
+       * back to the standard implementation. */
+      do
+      {
+        /* Append the next chunk to the string read so far.
+         */
+        svn_stringbuf_ensure (str, str->len + LINE_CHUNK_SIZE);
+        numbytes = LINE_CHUNK_SIZE;
+        SVN_ERR(svn_stream_read(stream, str->data + str->len, &numbytes));
+        str->len += numbytes;
+        str->data[str->len] = '\0';
+
+        /* Again, if we hit EOF, fall back to the standard code. */
+        if (numbytes < LINE_CHUNK_SIZE)
+        {
+          /* Move stream read pointer back to the initial position. */
+          SVN_ERR(svn_stream_seek(stream, mark));
+          return stream_readline_bytewise(stringbuf,
+                                          eof,
+                                          &eol,
+                                          stream,
+                                          FALSE,
+                                          pool);
+        }
+
+        /* Look for the EOL in the new data plus the last part of the
+         * previous chunk because the EOL may span over the boundary
+         * between both chunks.
+         */
+        eol_pos = strstr(str->data + str->len - (eol_len-1), eol);
+      }
+      while (eol_pos == NULL);
+
+      /* Number of bytes we actually consumed (i.e. line + EOF).
+       * We need to "return" the rest to the stream by moving its
+       * read pointer.
+       */
+      total_parsed = eol_pos - str->data + eol_len;
+
+      /* Terminate the string at the EOL postion and return it. */
+      str->len = eol_pos - str->data;
+      str->data[str->len] = 0;
+
+      *stringbuf = svn_stringbuf_dup(str, pool);
+    }
+
+  /* Move the stream read pointer to the first position behind the EOL.
+   */
+  SVN_ERR(svn_stream_move_mark(stream, mark, total_parsed));
+  return svn_stream_seek(stream, mark);
+}
+
+/* Guts of svn_stream_readline() and svn_stream_readline_detect_eol().
+ * Returns the line read from STREAM in *STRINGBUF, and indicates
+ * end-of-file in *EOF.  If DETECT_EOL is TRUE, the end-of-line indicator
+ * is detected automatically and returned in *EOL.
+ * If DETECT_EOL is FALSE, *EOL must point to the desired end-of-line
+ * indicator.  STRINGBUF is allocated in POOL. */
+static svn_error_t *
+stream_readline(svn_stringbuf_t **stringbuf,
+                svn_boolean_t *eof,
+                const char **eol,
+                svn_stream_t *stream,
+                svn_boolean_t detect_eol,
+                apr_pool_t *pool)
+{
+  *eof = FALSE;
+
+  /* Often, we operate on APR file or string-based streams and know what
+   * EOL we are looking for. Optimize that common case.
+   */
+  if (svn_stream_supports_mark(stream) &&
+      svn_stream_buffered(stream) &&
+      !detect_eol)
+    {
+      /* We can efficiently read chunks speculatively and reposition the
+       * stream pointer to the end of the line once we found that.
+       */
+      SVN_ERR(stream_readline_chunky(stringbuf,
+                                     eof,
+                                     *eol,
+                                     stream,
+                                     pool));
+    }
+  else
+    {
+      /* Use the standard byte-byte implementation.
+       */
+      SVN_ERR(stream_readline_bytewise(stringbuf,
+                                       eof,
+                                       eol,
+                                       stream,
+                                       detect_eol,
+                                       pool));
+    }
+
+  return SVN_NO_ERROR;
+}
+
 svn_error_t *
 svn_stream_readline(svn_stream_t *stream,
                     svn_stringbuf_t **stringbuf,