You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@subversion.apache.org by st...@apache.org on 2011/02/20 11:48:27 UTC

svn commit: r1072553 - in /subversion/branches/integrate-readline-speedup: ./ subversion/libsvn_subr/stream.c

Author: stefan2
Date: Sun Feb 20 10:48:27 2011
New Revision: 1072553

URL: http://svn.apache.org/viewvc?rev=1072553&view=rev
Log:
Introduce optimized readline() for streams that support efficient, 
pseudo-random access to their content.

Merged revisions from /branches/performance:
985601, 985695, 1029054, 1029063

Modified:
    subversion/branches/integrate-readline-speedup/   (props changed)
    subversion/branches/integrate-readline-speedup/subversion/libsvn_subr/stream.c

Propchange: subversion/branches/integrate-readline-speedup/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Sun Feb 20 10:48:27 2011
@@ -31,7 +31,7 @@
 /subversion/branches/log-g-performance:870941-871032
 /subversion/branches/merge-skips-obstructions:874525-874615
 /subversion/branches/nfc-nfd-aware-client:870276,870376
-/subversion/branches/performance:979193,980118,981087,981090,981189,981194,981287,981684,981827,982043,982355,983398,983406,983474,983488,983490,983764,983766,983770,984927,984973,984984,985014,985037,985046,985472,985477,985482,985487-985488,985493,985497,985500,985514,985603,985606,985669,986453,986465,986485,986491-986492,986517,986521,986608,986832,987865,987868-987869,987872,987886-987888,987893,988319,988898,990533,990535-990537,990541,990568,990572,990600,990759,992899,992911,993127,993141,994956,995478,995507,995603,998012,998858,999098,1001413,1001417,1004291,1022668,1022670,1022676,1022715,1022719,1025660,1025672,1027193,1027203,1027206,1027214,1027227,1028077,1028092,1028094,1028104,1028107,1028111,1028354,1029038,1029042-1029043,1029055,1029062,1029078,1029080,1029090,1029092-1029093,1029111,1029151,1029158,1029229,1029232,1029335-1029336,1029339-1029340,1029342,1029344,1030763,1030827,1031203,1031235,1032285,1032333,1033040,1033057,1033294,1035869,1039511,104370
 5,1053735,1056015,1066452,1067683
+/subversion/branches/performance:979193,980118,981087,981090,981189,981194,981287,981684,981827,982043,982355,983398,983406,983474,983488,983490,983764,983766,983770,984927,984973,984984,985014,985037,985046,985472,985477,985482,985487-985488,985493,985497,985500,985514,985601,985603,985606,985669,985695,986453,986465,986485,986491-986492,986517,986521,986608,986832,987865,987868-987869,987872,987886-987888,987893,988319,988898,990533,990535-990537,990541,990568,990572,990600,990759,992899,992911,993127,993141,994956,995478,995507,995603,998012,998858,999098,1001413,1001417,1004291,1022668,1022670,1022676,1022715,1022719,1025660,1025672,1027193,1027203,1027206,1027214,1027227,1028077,1028092,1028094,1028104,1028107,1028111,1028354,1029038,1029042-1029043,1029054-1029055,1029062-1029063,1029078,1029080,1029090,1029092-1029093,1029111,1029151,1029158,1029229,1029232,1029335-1029336,1029339-1029340,1029342,1029344,1030763,1030827,1031203,1031235,1032285,1032333,1033040,1033057,
 1033294,1035869,1039511,1043705,1053735,1056015,1066452,1067683
 /subversion/branches/py-tests-as-modules:956579-1033052
 /subversion/branches/ra_serf-digest-authn:875693-876404
 /subversion/branches/reintegrate-improvements:873853-874164

Modified: subversion/branches/integrate-readline-speedup/subversion/libsvn_subr/stream.c
URL: http://svn.apache.org/viewvc/subversion/branches/integrate-readline-speedup/subversion/libsvn_subr/stream.c?rev=1072553&r1=1072552&r2=1072553&view=diff
==============================================================================
--- subversion/branches/integrate-readline-speedup/subversion/libsvn_subr/stream.c (original)
+++ subversion/branches/integrate-readline-speedup/subversion/libsvn_subr/stream.c Sun Feb 20 10:48:27 2011
@@ -274,6 +274,11 @@ scan_eol(const char **eol, svn_stream_t 
   return SVN_NO_ERROR;
 }
 
+/* Size that 90% of the lines we encounter will be not longer than.
+   used by stream_readline_bytewise() and stream_readline_chunky().
+ */
+#define LINE_CHUNK_SIZE 80
+
 /* Guts of svn_stream_readline() and svn_stream_readline_detect_eol().
  * Returns the line read from STREAM in *STRINGBUF, and indicates
  * end-of-file in *EOF.  If DETECT_EOL is TRUE, the end-of-line indicator
@@ -281,12 +286,12 @@ scan_eol(const char **eol, svn_stream_t 
  * If DETECT_EOL is FALSE, *EOL must point to the desired end-of-line
  * indicator.  STRINGBUF is allocated in POOL. */
 static svn_error_t *
-stream_readline(svn_stringbuf_t **stringbuf,
-                svn_boolean_t *eof,
-                const char **eol,
-                svn_stream_t *stream,
-                svn_boolean_t detect_eol,
-                apr_pool_t *pool)
+stream_readline_bytewise(svn_stringbuf_t **stringbuf,
+                         svn_boolean_t *eof,
+                         const char **eol,
+                         svn_stream_t *stream,
+                         svn_boolean_t detect_eol,
+                         apr_pool_t *pool)
 {
   svn_stringbuf_t *str;
   const char *eol_str;
@@ -298,7 +303,7 @@ stream_readline(svn_stringbuf_t **string
      optimize for the 90% case.  90% of the time, we can avoid the
      stringbuf ever having to realloc() itself if we start it out at
      80 chars.  */
-  str = svn_stringbuf_create_ensure(80, pool);
+  str = svn_stringbuf_create_ensure(LINE_CHUNK_SIZE, pool);
 
   if (detect_eol)
     {
@@ -345,6 +350,155 @@ stream_readline(svn_stringbuf_t **string
   return SVN_NO_ERROR;
 }
 
+static svn_error_t *
+stream_readline_chunky(svn_stringbuf_t **stringbuf,
+                       svn_boolean_t *eof,
+                       const char *eol,
+                       svn_stream_t *stream,
+                       apr_pool_t *pool)
+{
+  /* Read larger chunks of data at once into this buffer and scan
+   * that for EOL. A good chunk size should be about 80 chars since
+   * most text lines will be shorter. However, don't use a much
+   * larger value because filling the buffer from the stream takes
+   * time as well.
+   */
+  char buffer[LINE_CHUNK_SIZE+1];
+
+  /* variables */
+  svn_stream_mark_t *mark;
+  apr_size_t numbytes;
+  const char *eol_pos;
+  apr_off_t total_parsed = 0;
+
+  /* invariant for this call */
+  const size_t eol_len = strlen(eol);
+
+  /* Remember the line start so this plus the line length will be
+   * the position to move to at the end of this function.
+   */
+  SVN_ERR(svn_stream_mark(stream, &mark, pool));
+
+  /* Read the first chunk. */
+  numbytes = LINE_CHUNK_SIZE;
+  SVN_ERR(svn_stream_read(stream, buffer, &numbytes));
+  buffer[numbytes] = '\0';
+
+  /* Look for the EOL in this first chunk. If we find it, we are done here.
+   */
+  eol_pos = strstr(buffer, eol);
+  if (eol_pos != NULL)
+    {
+      *stringbuf = svn_stringbuf_ncreate(buffer, eol_pos - buffer, pool);
+      total_parsed = eol_pos - buffer + eol_len;
+    }
+  else if (numbytes < LINE_CHUNK_SIZE)
+    {
+      /* We hit EOF but not EOL.
+       */
+      *stringbuf = svn_stringbuf_ncreate(buffer, numbytes, pool);
+      *eof = TRUE;
+      return SVN_NO_ERROR;
+     }
+  else
+    {
+      /* A larger buffer for the string is needed. */
+      svn_stringbuf_t *str;
+      str = svn_stringbuf_create_ensure(2*LINE_CHUNK_SIZE, pool);
+      svn_stringbuf_appendbytes(str, buffer, numbytes);
+      *stringbuf = str;
+
+      /* Loop reading chunks until an EOL was found. If we hit EOF, fall
+       * back to the standard implementation. */
+      do
+      {
+        /* Append the next chunk to the string read so far.
+         */
+        svn_stringbuf_ensure(str, str->len + LINE_CHUNK_SIZE);
+        numbytes = LINE_CHUNK_SIZE;
+        SVN_ERR(svn_stream_read(stream, str->data + str->len, &numbytes));
+        str->len += numbytes;
+        str->data[str->len] = '\0';
+
+        /* Look for the EOL in the new data plus the last part of the
+         * previous chunk because the EOL may span over the boundary
+         * between both chunks.
+         */
+        eol_pos = strstr(str->data + str->len - numbytes - (eol_len-1), eol);
+
+        if ((numbytes < LINE_CHUNK_SIZE) && (eol_pos == NULL))
+        {
+          /* We hit EOF instead of EOL. */
+          *eof = TRUE;
+          return SVN_NO_ERROR;
+        }
+      }
+      while (eol_pos == NULL);
+
+      /* Number of bytes we actually consumed (i.e. line + EOF).
+       * We need to "return" the rest to the stream by moving its
+       * read pointer.
+       */
+      total_parsed = eol_pos - str->data + eol_len;
+
+      /* Terminate the string at the EOL postion and return it. */
+      str->len = eol_pos - str->data;
+      str->data[str->len] = 0;
+    }
+
+  /* Move the stream read pointer to the first position behind the EOL.
+   */
+  SVN_ERR(svn_stream_move_mark(stream, mark, total_parsed));
+  return svn_stream_seek(stream, mark);
+}
+
+/* Guts of svn_stream_readline() and svn_stream_readline_detect_eol().
+ * Returns the line read from STREAM in *STRINGBUF, and indicates
+ * end-of-file in *EOF.  If DETECT_EOL is TRUE, the end-of-line indicator
+ * is detected automatically and returned in *EOL.
+ * If DETECT_EOL is FALSE, *EOL must point to the desired end-of-line
+ * indicator.  STRINGBUF is allocated in POOL. */
+static svn_error_t *
+stream_readline(svn_stringbuf_t **stringbuf,
+                svn_boolean_t *eof,
+                const char **eol,
+                svn_stream_t *stream,
+                svn_boolean_t detect_eol,
+                apr_pool_t *pool)
+{
+  *eof = FALSE;
+
+  /* Often, we operate on APR file or string-based streams and know what
+   * EOL we are looking for. Optimize that common case.
+   */
+  if (svn_stream_supports_mark(stream) &&
+      svn_stream_buffered(stream) &&
+      !detect_eol)
+    {
+      /* We can efficiently read chunks speculatively and reposition the
+       * stream pointer to the end of the line once we found that.
+       */
+      SVN_ERR(stream_readline_chunky(stringbuf,
+                                     eof,
+                                     *eol,
+                                     stream,
+                                     pool));
+    }
+  else
+    {
+      /* Use the standard byte-byte implementation.
+       */
+      SVN_ERR(stream_readline_bytewise(stringbuf,
+                                       eof,
+                                       eol,
+                                       stream,
+                                       detect_eol,
+                                       pool));
+    }
+
+  return SVN_NO_ERROR;
+}
+
 svn_error_t *
 svn_stream_readline(svn_stream_t *stream,
                     svn_stringbuf_t **stringbuf,