You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@subversion.apache.org by rh...@apache.org on 2012/05/30 18:52:37 UTC

svn commit: r1344347 - /subversion/trunk/subversion/libsvn_subr/config_file.c

Author: rhuijben
Date: Wed May 30 16:52:36 2012
New Revision: 1344347

URL: http://svn.apache.org/viewvc?rev=1344347&view=rev
Log:
Remove about 15% of the cpu overhead of svnserve and httpd while running the
test suite by introducing a very simple parser buffer in the config file parser
code.

The nice clean stream translation code had an 7 times overhead on this very
performance critical code path, while clients of svnserve and httpd were
waiting. (Before this patch the this code path used 22% of the cpu time,
now just 3%)

* subversion/libsvn_subr/config_file.c
  (parse_context_t): Add buffer and position variables. Use EOF as no ungetc var.
  (parser_getc): Skip '\r' characters as we only need the '\n' character for our
    config files. Use EOF as no ungetc var.
  (parse_option,
   parse_section_name): Rename pool argument to scratch_pool.
  (svn_config__parse_file): Create scratch pool and store the file and parse
    ctx in it. Update initialization and destroy scratch_pool after successfull
    parsing.

Modified:
    subversion/trunk/subversion/libsvn_subr/config_file.c

Modified: subversion/trunk/subversion/libsvn_subr/config_file.c
URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_subr/config_file.c?rev=1344347&r1=1344346&r2=1344347&view=diff
==============================================================================
--- subversion/trunk/subversion/libsvn_subr/config_file.c (original)
+++ subversion/trunk/subversion/libsvn_subr/config_file.c Wed May 30 16:52:36 2012
@@ -60,15 +60,19 @@ typedef struct parse_context_t
   /* The current line in the file */
   int line;
 
-  /* Cached ungotten character  - streams don't support ungetc()
-     [emulate it] */
+  /* Emulate an ungetc */
   int ungotten_char;
-  svn_boolean_t have_ungotten_char;
 
-  /* Temporary strings, allocated from the temp pool */
+  /* Temporary strings */
   svn_stringbuf_t *section;
   svn_stringbuf_t *option;
   svn_stringbuf_t *value;
+
+  /* Parser buffer for getc() to avoid call overhead into several libraries
+     for every character */
+  char parser_buffer[SVN_STREAM_CHUNK_SIZE]; /* Larger than most config files */
+  size_t buffer_pos; /* Current position within parser_buffer */
+  size_t buffer_size; /* parser_buffer contains this many bytes */
 } parse_context_t;
 
 
@@ -82,23 +86,36 @@ typedef struct parse_context_t
 static APR_INLINE svn_error_t *
 parser_getc(parse_context_t *ctx, int *c)
 {
-  if (ctx->have_ungotten_char)
-    {
-      *c = ctx->ungotten_char;
-      ctx->have_ungotten_char = FALSE;
-    }
-  else
+  do
     {
-      char char_buf;
-      apr_size_t readlen = 1;
+      if (ctx->ungotten_char != EOF)
+        {
+          *c = ctx->ungotten_char;
+          ctx->ungotten_char = EOF;
+        }
+      else if (ctx->buffer_pos < ctx->buffer_size)
+        {
+          *c = ctx->parser_buffer[ctx->buffer_pos];
+          ctx->buffer_pos++;
+        }
+      else
+        {
+          ctx->buffer_pos = 0;
+          ctx->buffer_size = sizeof(ctx->parser_buffer);
 
-      SVN_ERR(svn_stream_read(ctx->stream, &char_buf, &readlen));
+          SVN_ERR(svn_stream_read(ctx->stream, ctx->parser_buffer,
+                                  &(ctx->buffer_size)));
 
-      if (readlen == 1)
-        *c = char_buf;
-      else
-        *c = EOF;
+          if (ctx->buffer_pos < ctx->buffer_size)
+            {
+              *c = ctx->parser_buffer[ctx->buffer_pos];
+              ctx->buffer_pos++;
+            }
+          else
+            *c = EOF;
+        }
     }
+  while (*c == '\r');
 
   return SVN_NO_ERROR;
 }
@@ -111,7 +128,6 @@ static APR_INLINE svn_error_t *
 parser_ungetc(parse_context_t *ctx, int c)
 {
   ctx->ungotten_char = c;
-  ctx->have_ungotten_char = TRUE;
 
   return SVN_NO_ERROR;
 }
@@ -241,7 +257,7 @@ parse_value(int *pch, parse_context_t *c
 
 /* Parse a single option */
 static svn_error_t *
-parse_option(int *pch, parse_context_t *ctx, apr_pool_t *pool)
+parse_option(int *pch, parse_context_t *ctx, apr_pool_t *scratch_pool)
 {
   svn_error_t *err = SVN_NO_ERROR;
   int ch;
@@ -260,7 +276,7 @@ parse_option(int *pch, parse_context_t *
       ch = EOF;
       err = svn_error_createf(SVN_ERR_MALFORMED_FILE, NULL,
                               "%s:%d: Option must end with ':' or '='",
-                              svn_dirent_local_style(ctx->file, pool),
+                              svn_dirent_local_style(ctx->file, scratch_pool),
                               ctx->line);
     }
   else
@@ -284,7 +300,8 @@ parse_option(int *pch, parse_context_t *
  * starts a section name.
  */
 static svn_error_t *
-parse_section_name(int *pch, parse_context_t *ctx, apr_pool_t *pool)
+parse_section_name(int *pch, parse_context_t *ctx,
+                   apr_pool_t *scratch_pool)
 {
   svn_error_t *err = SVN_NO_ERROR;
   int ch;
@@ -303,7 +320,7 @@ parse_section_name(int *pch, parse_conte
       ch = EOF;
       err = svn_error_createf(SVN_ERR_MALFORMED_FILE, NULL,
                               "%s:%d: Section header must end with ']'",
-                              svn_dirent_local_style(ctx->file, pool),
+                              svn_dirent_local_style(ctx->file, scratch_pool),
                               ctx->line);
     }
   else
@@ -363,91 +380,101 @@ svn_config__sys_config_path(const char *
 
 svn_error_t *
 svn_config__parse_file(svn_config_t *cfg, const char *file,
-                       svn_boolean_t must_exist, apr_pool_t *pool)
+                       svn_boolean_t must_exist, apr_pool_t *result_pool)
 {
   svn_error_t *err = SVN_NO_ERROR;
-  parse_context_t ctx;
+  parse_context_t *ctx;
   int ch, count;
   svn_stream_t *stream;
+  apr_pool_t *scratch_pool = svn_pool_create(result_pool);
 
-  err = svn_stream_open_readonly(&stream, file, pool, pool);
+  err = svn_stream_open_readonly(&stream, file, scratch_pool, scratch_pool);
 
   if (! must_exist && err && APR_STATUS_IS_ENOENT(err->apr_err))
     {
       svn_error_clear(err);
+      svn_pool_destroy(scratch_pool);
       return SVN_NO_ERROR;
     }
   else
     SVN_ERR(err);
 
-  ctx.cfg = cfg;
-  ctx.file = file;
-  ctx.stream = svn_subst_stream_translated(stream, "\n", TRUE, NULL, FALSE,
-                                           pool);
-  ctx.line = 1;
-  ctx.have_ungotten_char = FALSE;
-  ctx.section = svn_stringbuf_create_empty(pool);
-  ctx.option = svn_stringbuf_create_empty(pool);
-  ctx.value = svn_stringbuf_create_empty(pool);
+  ctx = apr_palloc(scratch_pool, sizeof(*ctx));
+
+  ctx->cfg = cfg;
+  ctx->file = file;
+  ctx->stream = stream;
+  ctx->line = 1;
+  ctx->ungotten_char = EOF;
+  ctx->section = svn_stringbuf_create_empty(scratch_pool);
+  ctx->option = svn_stringbuf_create_empty(scratch_pool);
+  ctx->value = svn_stringbuf_create_empty(scratch_pool);
+  ctx->buffer_pos = 0;
+  ctx->buffer_size = 0;
 
   do
     {
-      SVN_ERR(skip_whitespace(&ctx, &ch, &count));
+      SVN_ERR(skip_whitespace(ctx, &ch, &count));
 
       switch (ch)
         {
         case '[':               /* Start of section header */
           if (count == 0)
-            SVN_ERR(parse_section_name(&ch, &ctx, pool));
+            SVN_ERR(parse_section_name(&ch, ctx, scratch_pool));
           else
             return svn_error_createf(SVN_ERR_MALFORMED_FILE, NULL,
                                      "%s:%d: Section header"
                                      " must start in the first column",
-                                     svn_dirent_local_style(file, pool),
-                                     ctx.line);
+                                     svn_dirent_local_style(file,
+                                                            scratch_pool),
+                                     ctx->line);
           break;
 
         case '#':               /* Comment */
           if (count == 0)
             {
-              SVN_ERR(skip_to_eoln(&ctx, &ch));
-              ++ctx.line;
+              SVN_ERR(skip_to_eoln(ctx, &ch));
+              ++(ctx->line);
             }
           else
             return svn_error_createf(SVN_ERR_MALFORMED_FILE, NULL,
                                      "%s:%d: Comment"
                                      " must start in the first column",
-                                     svn_dirent_local_style(file, pool),
-                                     ctx.line);
+                                     svn_dirent_local_style(file,
+                                                            scratch_pool),
+                                     ctx->line);
           break;
 
         case '\n':              /* Empty line */
-          ++ctx.line;
+          ++(ctx->line);
           break;
 
         case EOF:               /* End of file or read error */
           break;
 
         default:
-          if (svn_stringbuf_isempty(ctx.section))
+          if (svn_stringbuf_isempty(ctx->section))
             return svn_error_createf(SVN_ERR_MALFORMED_FILE, NULL,
                                      "%s:%d: Section header expected",
-                                     svn_dirent_local_style(file, pool),
-                                     ctx.line);
+                                     svn_dirent_local_style(file,
+                                                            scratch_pool),
+                                     ctx->line);
           else if (count != 0)
             return svn_error_createf(SVN_ERR_MALFORMED_FILE, NULL,
                                      "%s:%d: Option expected",
-                                     svn_dirent_local_style(file, pool),
-                                     ctx.line);
+                                     svn_dirent_local_style(file,
+                                                            scratch_pool),
+                                     ctx->line);
           else
-            SVN_ERR(parse_option(&ch, &ctx, pool));
+            SVN_ERR(parse_option(&ch, ctx, scratch_pool));
           break;
         }
     }
   while (ch != EOF);
 
   /* Close the streams (and other cleanup): */
-  return svn_stream_close(ctx.stream);
+  svn_pool_destroy(scratch_pool);
+  return SVN_NO_ERROR;
 }
 
 



Re: svn commit: r1344347 - /subversion/trunk/subversion/libsvn_subr/config_file.c

Posted by Ivan Zhakov <iv...@visualsvn.com>.
On 25 March 2014 15:11, Bert Huijben <be...@qqmail.nl> wrote:
>
>
>> -----Original Message-----
>> From: Ivan Zhakov [mailto:ivan@visualsvn.com]
>> Sent: dinsdag 25 maart 2014 12:03
>> To: Subversion Development; Bert Huijben
>> Subject: Re: svn commit: r1344347 -
>> /subversion/trunk/subversion/libsvn_subr/config_file.c
>>
>> On 30 May 2012 20:52,  <rh...@apache.org> wrote:
>> > Author: rhuijben
>> > Date: Wed May 30 16:52:36 2012
>> > New Revision: 1344347
>> >
>> > URL: http://svn.apache.org/viewvc?rev=1344347&view=rev
>> > Log:
>> > Remove about 15% of the cpu overhead of svnserve and httpd while
>> running the
>> > test suite by introducing a very simple parser buffer in the config file parser
>> > code.
>> >
>> > The nice clean stream translation code had an 7 times overhead on this
>> very
>> > performance critical code path, while clients of svnserve and httpd were
>> > waiting. (Before this patch the this code path used 22% of the cpu time,
>> > now just 3%)
>> >
>> > * subversion/libsvn_subr/config_file.c
>> >   (parse_context_t): Add buffer and position variables. Use EOF as no
>> ungetc var.
>> >   (parser_getc): Skip '\r' characters as we only need the '\n' character for
>> our
>> >     config files. Use EOF as no ungetc var.
>> >   (parse_option,
>> >    parse_section_name): Rename pool argument to scratch_pool.
>> >   (svn_config__parse_file): Create scratch pool and store the file and parse
>> >     ctx in it. Update initialization and destroy scratch_pool after successfull
>> >     parsing.
>> >
>> > Modified:
>> >     subversion/trunk/subversion/libsvn_subr/config_file.c
>> >
>> > Modified: subversion/trunk/subversion/libsvn_subr/config_file.c
>> > URL:
>> http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_subr/co
>> nfig_file.c?rev=1344347&r1=1344346&r2=1344347&view=diff
>> >
>> ==========================================================
>> ====================
>> > --- subversion/trunk/subversion/libsvn_subr/config_file.c (original)
>> > +++ subversion/trunk/subversion/libsvn_subr/config_file.c Wed May 30
>> 16:52:36 2012
>> > @@ -60,15 +60,19 @@ typedef struct parse_context_t
>> >    /* The current line in the file */
>> >    int line;
>> >
>> > -  /* Cached ungotten character  - streams don't support ungetc()
>> > -     [emulate it] */
>> > +  /* Emulate an ungetc */
>> >    int ungotten_char;
>> > -  svn_boolean_t have_ungotten_char;
>> >
>> > -  /* Temporary strings, allocated from the temp pool */
>> > +  /* Temporary strings */
>> >    svn_stringbuf_t *section;
>> >    svn_stringbuf_t *option;
>> >    svn_stringbuf_t *value;
>> > +
>> > +  /* Parser buffer for getc() to avoid call overhead into several libraries
>> > +     for every character */
>> > +  char parser_buffer[SVN_STREAM_CHUNK_SIZE]; /* Larger than most
>> config files */
>> Hi Bert,
>>
>> Did you have specific reasons for using deprecated
>> SVN_STREAM_CHUNK_SIZE (100 kb) instead of SVN__STREAM_CHUNK_SIZE
>> (16kb) constant for read buffer? I'm asking because it seems this
>> significantly increase memory footprint for high loaded servers due
>> APR allocator behavior and fragmentation.
>>
>> I would like change this buffer to 16kb instead of 100kb, if you don't
>> have strong objections.
>
> Answered on IRC: +1
>
> Using something like <= 8 KByte would not handle our default generated client side config files...
> But 16 KB will easily hold them.
>
> The fsfs and repository config files are much smaller, so if it really helps we could make this configurable per use case.
>
Bert,

Thanks for quick reply.

Committed in r1581296 and nominated for backport to 1.8.x.
-- 
Ivan Zhakov
CTO | VisualSVN | http://www.visualsvn.com

RE: svn commit: r1344347 - /subversion/trunk/subversion/libsvn_subr/config_file.c

Posted by Bert Huijben <be...@qqmail.nl>.

> -----Original Message-----
> From: Ivan Zhakov [mailto:ivan@visualsvn.com]
> Sent: dinsdag 25 maart 2014 12:03
> To: Subversion Development; Bert Huijben
> Subject: Re: svn commit: r1344347 -
> /subversion/trunk/subversion/libsvn_subr/config_file.c
> 
> On 30 May 2012 20:52,  <rh...@apache.org> wrote:
> > Author: rhuijben
> > Date: Wed May 30 16:52:36 2012
> > New Revision: 1344347
> >
> > URL: http://svn.apache.org/viewvc?rev=1344347&view=rev
> > Log:
> > Remove about 15% of the cpu overhead of svnserve and httpd while
> running the
> > test suite by introducing a very simple parser buffer in the config file parser
> > code.
> >
> > The nice clean stream translation code had an 7 times overhead on this
> very
> > performance critical code path, while clients of svnserve and httpd were
> > waiting. (Before this patch the this code path used 22% of the cpu time,
> > now just 3%)
> >
> > * subversion/libsvn_subr/config_file.c
> >   (parse_context_t): Add buffer and position variables. Use EOF as no
> ungetc var.
> >   (parser_getc): Skip '\r' characters as we only need the '\n' character for
> our
> >     config files. Use EOF as no ungetc var.
> >   (parse_option,
> >    parse_section_name): Rename pool argument to scratch_pool.
> >   (svn_config__parse_file): Create scratch pool and store the file and parse
> >     ctx in it. Update initialization and destroy scratch_pool after successfull
> >     parsing.
> >
> > Modified:
> >     subversion/trunk/subversion/libsvn_subr/config_file.c
> >
> > Modified: subversion/trunk/subversion/libsvn_subr/config_file.c
> > URL:
> http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_subr/co
> nfig_file.c?rev=1344347&r1=1344346&r2=1344347&view=diff
> >
> ==========================================================
> ====================
> > --- subversion/trunk/subversion/libsvn_subr/config_file.c (original)
> > +++ subversion/trunk/subversion/libsvn_subr/config_file.c Wed May 30
> 16:52:36 2012
> > @@ -60,15 +60,19 @@ typedef struct parse_context_t
> >    /* The current line in the file */
> >    int line;
> >
> > -  /* Cached ungotten character  - streams don't support ungetc()
> > -     [emulate it] */
> > +  /* Emulate an ungetc */
> >    int ungotten_char;
> > -  svn_boolean_t have_ungotten_char;
> >
> > -  /* Temporary strings, allocated from the temp pool */
> > +  /* Temporary strings */
> >    svn_stringbuf_t *section;
> >    svn_stringbuf_t *option;
> >    svn_stringbuf_t *value;
> > +
> > +  /* Parser buffer for getc() to avoid call overhead into several libraries
> > +     for every character */
> > +  char parser_buffer[SVN_STREAM_CHUNK_SIZE]; /* Larger than most
> config files */
> Hi Bert,
> 
> Did you have specific reasons for using deprecated
> SVN_STREAM_CHUNK_SIZE (100 kb) instead of SVN__STREAM_CHUNK_SIZE
> (16kb) constant for read buffer? I'm asking because it seems this
> significantly increase memory footprint for high loaded servers due
> APR allocator behavior and fragmentation.
> 
> I would like change this buffer to 16kb instead of 100kb, if you don't
> have strong objections.

Answered on IRC: +1

Using something like <= 8 KByte would not handle our default generated client side config files...
But 16 KB will easily hold them.

The fsfs and repository config files are much smaller, so if it really helps we could make this configurable per use case.

	Bert 


Re: r1344347 - stream buffering in config_file.c

Posted by Ivan Zhakov <iv...@visualsvn.com>.
On 3 April 2014 22:15, Julian Foad <ju...@btopenworld.com> wrote:
>>> URL: http://svn.apache.org/viewvc?rev=1344347&view=rev
>
>>> Log:
>>> Remove about 15% of the cpu overhead of svnserve and httpd while running
>>> the test suite by introducing a very simple parser buffer in the config file
>>> parser code.
> [...]
>>>  * subversion/libsvn_subr/config_file.c
>>>    (parse_context_t): Add buffer and position variables. Use EOF as no
>>> ungetc var.
> [...]
>>>  Modified: subversion/trunk/subversion/libsvn_subr/config_file.c
>>> ==============================================================================
>>>  +
>>>  +  /* Parser buffer for getc() to avoid call overhead into several libraries
>>>  +     for every character */
>>>  +  char parser_buffer[SVN_STREAM_CHUNK_SIZE];
> [...]
>
> This commit implements buffering of a generic stream, directly in the config file parser. Would it
> not be better to implement buffering of a generic stream as a generic module?
>
This commit improved performance over buffer APR file, so switching to
back to buffered stream will degrade performance again.
Config parser also use parser_buffer knowledge to use extremle
optimized memchr() to find end of line to skip comment lines. Config
parsers used very often, that's because it should be optimized.


-- 
Ivan Zhakov

Re: r1344347 - stream buffering in config_file.c

Posted by Julian Foad <ju...@btopenworld.com>.
Branko Čibej wrote:
> Julian Foad wrote:
>> URL: http://svn.apache.org/viewvc?rev=1344347&view=rev
>> This commit implements buffering of a generic stream, directly in the
>> config file parser. Would it not be better to implement buffering of
>> a generic stream as a generic module?
> 
> We already have svn_stream_buffered.

That's something different: it's a read/write stream interface to a spill buffer.

- Julian

Re: r1344347 - stream buffering in config_file.c

Posted by Branko Čibej <br...@wandisco.com>.
On 03.04.2014 20:15, Julian Foad wrote:
>>> URL: http://svn.apache.org/viewvc?rev=1344347&view=rev
>>> Log:
>>> Remove about 15% of the cpu overhead of svnserve and httpd while running
>>> the test suite by introducing a very simple parser buffer in the config file
>>> parser code.
> [...]
>>>   * subversion/libsvn_subr/config_file.c
>>>    (parse_context_t): Add buffer and position variables. Use EOF as no 
>>> ungetc var.
> [...]
>>>   Modified: subversion/trunk/subversion/libsvn_subr/config_file.c
>>> ==============================================================================
>>>   +
>>>   +  /* Parser buffer for getc() to avoid call overhead into several libraries
>>>   +     for every character */
>>>   +  char parser_buffer[SVN_STREAM_CHUNK_SIZE];
> [...]
>
> This commit implements buffering of a generic stream, directly in the config file parser. Would it not be better to implement buffering of a generic stream as a generic module?

We already have svn_stream_buffered.

-- Brane


-- 
Branko Čibej | Director of Subversion
WANdisco // Non-Stop Data
e. brane@wandisco.com

Re: r1344347 - stream buffering in config_file.c

Posted by Julian Foad <ju...@btopenworld.com>.
Bert Huijben wrote:

> Julian Foad wrote:
>>>> URL: http://svn.apache.org/viewvc?rev=1344347&view=rev
>>  This commit implements buffering of a generic stream, directly in the config
>>  file parser. Would it not be better to implement buffering of a generic stream
>>  as a generic module?
> 
> Perhaps...
> 
> But a patch like that didn't allow speeding up the test suite by 15% in one
> afternoon. This was a very local patch with a very huge result. (And as it
> was very local it is easy to remove it when somebody else optimizes it in a
> different place). 

Sure -- I appreciate why it was worth doing that way.

> [...]
> 
> And just abstracting it, just for abstracting will add more overhead... (The
> current code is all in one C file and will be completely inlined by every
> compiler... Which is impossible if we need 3 callback and/or shared library
> calls for reading every byte... Even worse when that is taking out mutexes
> such as the apr read code)

I don't think it's fair to assume that an abstraction need be that inefficient, although I admit that an implementation easily can be that inefficient if we don't take care of it.

Part of what I was thinking is, why are we re-inventing the standard <stdio.h> streams interface (fread, fgetc, fungetc, ...) in Subversion? And I started wondering if we could hook in to the standard streams. The GNU C library (glibc) does provide hooks for doing this:

  http://www.gnu.org/software/libc/manual/html_mono/libc.html#Custom-Streams

But it's not standardized, and Windows doesn't seem to provide any such facility.

I don't think we need to do anything right now, but I wanted to note the suggestion.

- Julian

RE: r1344347 - stream buffering in config_file.c

Posted by Bert Huijben <be...@qqmail.nl>.

> -----Original Message-----
> From: Julian Foad [mailto:julianfoad@btopenworld.com]
> Sent: donderdag 3 april 2014 20:15
> To: Bert Huijben
> Cc: Ivan Zhakov; Subversion Development
> Subject: Re: r1344347 - stream buffering in config_file.c
> 
> >> URL: http://svn.apache.org/viewvc?rev=1344347&view=rev
> 
> >> Log:
> >> Remove about 15% of the cpu overhead of svnserve and httpd while
> running
> >> the test suite by introducing a very simple parser buffer in the config
file
> >> parser code.
> [...]
> >>  * subversion/libsvn_subr/config_file.c
> >>    (parse_context_t): Add buffer and position variables. Use EOF as no
> >> ungetc var.
> [...]
> >>  Modified: subversion/trunk/subversion/libsvn_subr/config_file.c
> >>
> ==========================================================
> ====================
> >>  +
> >>  +  /* Parser buffer for getc() to avoid call overhead into several
libraries
> >>  +     for every character */
> >>  +  char parser_buffer[SVN_STREAM_CHUNK_SIZE];
> [...]
> 
> This commit implements buffering of a generic stream, directly in the
config
> file parser. Would it not be better to implement buffering of a generic
stream
> as a generic module?

Perhaps...

But a patch like that didn't allow speeding up the test suite by 15% in one
afternoon. This was a very local patch with a very huge result. (And as it
was very local it is easy to remove it when somebody else optimizes it in a
different place). 

The stream api is optimized for using larger buffers while the config parser
is really specific in that it reads byte by byte. And the callback on
callback on callback wrapping (stream over svn_io over apr... none of them
inlineable by the compiler) was getting very expensive here, while there are
as far as I know no other users that have the same needs.
(Almost every other location uses per line parsing, which is already
optimized)

This was (at the time I applied the patch) by far the most expensive code in
both mod_dav_svn and svnserve when running the testsuite on Windows (on a
ramdrive), as we are reading the config files (e.g. fsfs.conf and the authz
files) over and over again.

I would guess this code should be easy to abstract... if/when there are more
parts of Subversion that need it, but I haven't found other performance
critical code paths that would need this.

And just abstracting it, just for abstracting will add more overhead... (The
current code is all in one C file and will be completely inlined by every
compiler... Which is impossible if we need 3 callback and/or shared library
calls for reading every byte... Even worse when that is taking out mutexes
such as the apr read code)

	Bert



Re: r1344347 - stream buffering in config_file.c

Posted by Julian Foad <ju...@btopenworld.com>.
>> URL: http://svn.apache.org/viewvc?rev=1344347&view=rev

>> Log:
>> Remove about 15% of the cpu overhead of svnserve and httpd while running
>> the test suite by introducing a very simple parser buffer in the config file
>> parser code.
[...]
>>  * subversion/libsvn_subr/config_file.c
>>    (parse_context_t): Add buffer and position variables. Use EOF as no 
>> ungetc var.
[...]
>>  Modified: subversion/trunk/subversion/libsvn_subr/config_file.c
>> ==============================================================================
>>  +
>>  +  /* Parser buffer for getc() to avoid call overhead into several libraries
>>  +     for every character */
>>  +  char parser_buffer[SVN_STREAM_CHUNK_SIZE];
[...]

This commit implements buffering of a generic stream, directly in the config file parser. Would it not be better to implement buffering of a generic stream as a generic module?

- Julian


Re: svn commit: r1344347 - /subversion/trunk/subversion/libsvn_subr/config_file.c

Posted by Ivan Zhakov <iv...@visualsvn.com>.
On 30 May 2012 20:52,  <rh...@apache.org> wrote:
> Author: rhuijben
> Date: Wed May 30 16:52:36 2012
> New Revision: 1344347
>
> URL: http://svn.apache.org/viewvc?rev=1344347&view=rev
> Log:
> Remove about 15% of the cpu overhead of svnserve and httpd while running the
> test suite by introducing a very simple parser buffer in the config file parser
> code.
>
> The nice clean stream translation code had an 7 times overhead on this very
> performance critical code path, while clients of svnserve and httpd were
> waiting. (Before this patch the this code path used 22% of the cpu time,
> now just 3%)
>
> * subversion/libsvn_subr/config_file.c
>   (parse_context_t): Add buffer and position variables. Use EOF as no ungetc var.
>   (parser_getc): Skip '\r' characters as we only need the '\n' character for our
>     config files. Use EOF as no ungetc var.
>   (parse_option,
>    parse_section_name): Rename pool argument to scratch_pool.
>   (svn_config__parse_file): Create scratch pool and store the file and parse
>     ctx in it. Update initialization and destroy scratch_pool after successfull
>     parsing.
>
> Modified:
>     subversion/trunk/subversion/libsvn_subr/config_file.c
>
> Modified: subversion/trunk/subversion/libsvn_subr/config_file.c
> URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_subr/config_file.c?rev=1344347&r1=1344346&r2=1344347&view=diff
> ==============================================================================
> --- subversion/trunk/subversion/libsvn_subr/config_file.c (original)
> +++ subversion/trunk/subversion/libsvn_subr/config_file.c Wed May 30 16:52:36 2012
> @@ -60,15 +60,19 @@ typedef struct parse_context_t
>    /* The current line in the file */
>    int line;
>
> -  /* Cached ungotten character  - streams don't support ungetc()
> -     [emulate it] */
> +  /* Emulate an ungetc */
>    int ungotten_char;
> -  svn_boolean_t have_ungotten_char;
>
> -  /* Temporary strings, allocated from the temp pool */
> +  /* Temporary strings */
>    svn_stringbuf_t *section;
>    svn_stringbuf_t *option;
>    svn_stringbuf_t *value;
> +
> +  /* Parser buffer for getc() to avoid call overhead into several libraries
> +     for every character */
> +  char parser_buffer[SVN_STREAM_CHUNK_SIZE]; /* Larger than most config files */
Hi Bert,

Did you have specific reasons for using deprecated
SVN_STREAM_CHUNK_SIZE (100 kb) instead of SVN__STREAM_CHUNK_SIZE
(16kb) constant for read buffer? I'm asking because it seems this
significantly increase memory footprint for high loaded servers due
APR allocator behavior and fragmentation.

I would like change this buffer to 16kb instead of 100kb, if you don't
have strong objections.

-- 
Ivan Zhakov
CTO | VisualSVN | http://www.visualsvn.com

Re: svn commit: r1344347 - /subversion/trunk/subversion/libsvn_subr/config_file.c

Posted by Philip Martin <ph...@wandisco.com>.
Greg Stein <gs...@gmail.com> writes:

> On Thu, May 31, 2012 at 4:39 AM, Philip Martin
> <ph...@wandisco.com> wrote:
>> Ivan Zhakov <iv...@visualsvn.com> writes:
>>
>>> Config files are also used for authz settings and they can be even
>>> more than 100 MB in real world scenarios.
>>
>> Yes, particularly for setups that use SVNParentPath but not the new
>> AuthzSVNReposRelativeAccessFile.
>
> Woah. Wait a second here.
>
> My understanding is that these are read on *every* request (per
> Daniel). Are you suggesting that we are reading/parsing 100 Mb on
> every request?
>
> There is something wrong here. 100 Mb config files are flat out wrong.
> We should be doing better. And if we are *actually* parsing those
> per-request, then we've gone off the deep-end.

We cache the parsed data in the connection pool, see
mod_authz_svn.c:get_access_conf, so it's per-connection rather than
per-request.  That's still not brilliant.

I'm not sure whether the SVNParentPath case discards parsed data for
repositories that are not the one in the current request.

-- 
uberSVN: Apache Subversion Made Easy
http://www.uberSVN.com

Re: svn commit: r1344347 - /subversion/trunk/subversion/libsvn_subr/config_file.c

Posted by Justin Erenkrantz <ju...@erenkrantz.com>.
On Fri, Jun 1, 2012 at 9:58 AM, Philip Martin
<ph...@wandisco.com> wrote:
> SVNParentPath and AuthzSVNReposRelativeAccessFile make doing it at
> config-time a bit complicated.  Would the server scan the SVNParentPath
> directory to locate all the repositories?  A repository created later
> would require a reload?

For those two directives, it'd just need to walk the children
directories...so, that's feasible to do at config-time.  And, yes,
without a validity check at connection/request-time, we'd need to do a
graceful whenever the repositories or authz files change.

Worst case scenario is that we could load it up at config-time, if it
is invalid, we just fallback to doing it on a per-connection basis.
I'm not entirely sure whether we'd be able to atomically update the
shared config structure if we wanted to stash it in the config object
- though, I guess we could do that too, but it'd add a bit of
complexity that may or may not be worth it.

I don't know how much of a real-world penalty this parsing actually
has (Bert seems to think it is quite significant), but if we wanted to
minimize the I/O, that's the path I think we should take.  -- justin

Re: svn commit: r1344347 - /subversion/trunk/subversion/libsvn_subr/config_file.c

Posted by Philip Martin <ph...@wandisco.com>.
Justin Erenkrantz <ju...@erenkrantz.com> writes:

> On Thu, May 31, 2012 at 2:47 AM, Ivan Zhakov <iv...@visualsvn.com> wrote:
>> Currently Subversion reads authz file per-connection, not per-request.
>
> Even that's probably still wrong.  A better solution would be to parse
> it at config-time - but, that would mean we'd want to do some level of
> checking/invalidity on a per-connection basis...or, I would probably
> just add a directive which tells the module to skip the
> per-connection/request check and require an httpd config reload (which
> can be done gracefully) to re-load the SVN authz file.

SVNParentPath and AuthzSVNReposRelativeAccessFile make doing it at
config-time a bit complicated.  Would the server scan the SVNParentPath
directory to locate all the repositories?  A repository created later
would require a reload?

-- 
uberSVN: Apache Subversion Made Easy
http://www.uberSVN.com

Re: svn commit: r1344347 - /subversion/trunk/subversion/libsvn_subr/config_file.c

Posted by Justin Erenkrantz <ju...@erenkrantz.com>.
On Thu, May 31, 2012 at 2:47 AM, Ivan Zhakov <iv...@visualsvn.com> wrote:
> Currently Subversion reads authz file per-connection, not per-request.

Even that's probably still wrong.  A better solution would be to parse
it at config-time - but, that would mean we'd want to do some level of
checking/invalidity on a per-connection basis...or, I would probably
just add a directive which tells the module to skip the
per-connection/request check and require an httpd config reload (which
can be done gracefully) to re-load the SVN authz file.

My $.02.  -- justin

Re: svn commit: r1344347 - /subversion/trunk/subversion/libsvn_subr/config_file.c

Posted by Ivan Zhakov <iv...@visualsvn.com>.
On Thu, May 31, 2012 at 1:27 PM, Greg Stein <gs...@gmail.com> wrote:
> On Thu, May 31, 2012 at 4:39 AM, Philip Martin
> <ph...@wandisco.com> wrote:
>> Ivan Zhakov <iv...@visualsvn.com> writes:
>>
>>> Config files are also used for authz settings and they can be even
>>> more than 100 MB in real world scenarios.
>>
>> Yes, particularly for setups that use SVNParentPath but not the new
>> AuthzSVNReposRelativeAccessFile.
>
> Woah. Wait a second here.
>
> My understanding is that these are read on *every* request (per
> Daniel). Are you suggesting that we are reading/parsing 100 Mb on
> every request?
>
> There is something wrong here. 100 Mb config files are flat out wrong.
> We should be doing better. And if we are *actually* parsing those
> per-request, then we've gone off the deep-end.
>
> ???
Currently Subversion reads authz file per-connection, not per-request.


-- 
Ivan Zhakov

Re: svn commit: r1344347 - /subversion/trunk/subversion/libsvn_subr/config_file.c

Posted by Greg Stein <gs...@gmail.com>.
On Thu, May 31, 2012 at 4:39 AM, Philip Martin
<ph...@wandisco.com> wrote:
> Ivan Zhakov <iv...@visualsvn.com> writes:
>
>> Config files are also used for authz settings and they can be even
>> more than 100 MB in real world scenarios.
>
> Yes, particularly for setups that use SVNParentPath but not the new
> AuthzSVNReposRelativeAccessFile.

Woah. Wait a second here.

My understanding is that these are read on *every* request (per
Daniel). Are you suggesting that we are reading/parsing 100 Mb on
every request?

There is something wrong here. 100 Mb config files are flat out wrong.
We should be doing better. And if we are *actually* parsing those
per-request, then we've gone off the deep-end.

???

Re: svn commit: r1344347 - /subversion/trunk/subversion/libsvn_subr/config_file.c

Posted by Philip Martin <ph...@wandisco.com>.
Ivan Zhakov <iv...@visualsvn.com> writes:

> Config files are also used for authz settings and they can be even
> more than 100 MB in real world scenarios.

Yes, particularly for setups that use SVNParentPath but not the new
AuthzSVNReposRelativeAccessFile.

-- 
uberSVN: Apache Subversion Made Easy
http://www.uberSVN.com

Re: svn commit: r1344347 - /subversion/trunk/subversion/libsvn_subr/config_file.c

Posted by Ivan Zhakov <iv...@visualsvn.com>.
On Thu, May 31, 2012 at 11:49 AM, Greg Stein <gs...@gmail.com> wrote:
> On Thu, May 31, 2012 at 3:41 AM, Ivan Zhakov <iv...@visualsvn.com> wrote:
>> On Wed, May 30, 2012 at 8:52 PM,  <rh...@apache.org> wrote:
>>> Author: rhuijben
>>> Date: Wed May 30 16:52:36 2012
>>> New Revision: 1344347
>>>
>>> URL: http://svn.apache.org/viewvc?rev=1344347&view=rev
>>> Log:
>>> Remove about 15% of the cpu overhead of svnserve and httpd while running the
>>> test suite by introducing a very simple parser buffer in the config file parser
>>> code.
>>>
>>> The nice clean stream translation code had an 7 times overhead on this very
>>> performance critical code path, while clients of svnserve and httpd were
>>> waiting. (Before this patch the this code path used 22% of the cpu time,
>>> now just 3%)
>>
>> Great improvement, but it may be worth to factor out this code to
>> special stream like svn_stream_buffered().
>
> When I saw this change, my first thought was, "wtf? just read the
> whole damned file into memory". (I just haven't had time to look into
> this to provide some actual advice beyond amazement)
>
> Config files are NOT 100 megabytes. Read the damned things into memory
> in one giant piece. I/O is reduced, memory is available. Everybody
> wins.
>
Config files are also used for authz settings and they can be even
more than 100 MB in real world scenarios.


-- 
Ivan Zhakov

RE: svn commit: r1344347 - /subversion/trunk/subversion/libsvn_subr/config_file.c

Posted by Bert Huijben <be...@qqmail.nl>.

> -----Original Message-----
> From: Greg Stein [mailto:gstein@gmail.com]
> Sent: donderdag 31 mei 2012 9:49
> To: Ivan Zhakov
> Cc: dev@subversion.apache.org; Bert Huijben
> Subject: Re: svn commit: r1344347 -
> /subversion/trunk/subversion/libsvn_subr/config_file.c
> 
> On Thu, May 31, 2012 at 3:41 AM, Ivan Zhakov <iv...@visualsvn.com>
> wrote:
> > On Wed, May 30, 2012 at 8:52 PM,  <rh...@apache.org> wrote:
> >> Author: rhuijben
> >> Date: Wed May 30 16:52:36 2012
> >> New Revision: 1344347
> >>
> >> URL: http://svn.apache.org/viewvc?rev=1344347&view=rev
> >> Log:
> >> Remove about 15% of the cpu overhead of svnserve and httpd while
> running the
> >> test suite by introducing a very simple parser buffer in the config
file
> parser
> >> code.
> >>
> >> The nice clean stream translation code had an 7 times overhead on this
> very
> >> performance critical code path, while clients of svnserve and httpd
were
> >> waiting. (Before this patch the this code path used 22% of the cpu
time,
> >> now just 3%)
> >
> > Great improvement, but it may be worth to factor out this code to
> > special stream like svn_stream_buffered().
> 
> When I saw this change, my first thought was, "wtf? just read the
> whole damned file into memory". (I just haven't had time to look into
> this to provide some actual advice beyond amazement)
> 
> Config files are NOT 100 megabytes. Read the damned things into memory
> in one giant piece. I/O is reduced, memory is available. Everybody
> wins.

I'm sure it could use further improvements. I don't know what really large
authz files some users have, but reading in 10 Kbyte chunks as it does now
gives a huge boost by itself.

The current code assumes the line endings have at least a '\n' (and ignores
'\r'), which would work for any typical user, but we used to support '\r' by
itself, because that is what our translated streams do.
I'm not sure if we should call this a regression as we probably never really
intended to support that. (And the Mac switched to '\n' years ago)


Most of the server side time was spend in reading the FSFS config files. I
didn't know we had that many user editable files in a typical repository and
all those commented lines explaining the flags were (and are) the
performance killer.


	Bert


Re: svn commit: r1344347 - /subversion/trunk/subversion/libsvn_subr/config_file.c

Posted by Greg Stein <gs...@gmail.com>.
On Thu, May 31, 2012 at 3:41 AM, Ivan Zhakov <iv...@visualsvn.com> wrote:
> On Wed, May 30, 2012 at 8:52 PM,  <rh...@apache.org> wrote:
>> Author: rhuijben
>> Date: Wed May 30 16:52:36 2012
>> New Revision: 1344347
>>
>> URL: http://svn.apache.org/viewvc?rev=1344347&view=rev
>> Log:
>> Remove about 15% of the cpu overhead of svnserve and httpd while running the
>> test suite by introducing a very simple parser buffer in the config file parser
>> code.
>>
>> The nice clean stream translation code had an 7 times overhead on this very
>> performance critical code path, while clients of svnserve and httpd were
>> waiting. (Before this patch the this code path used 22% of the cpu time,
>> now just 3%)
>
> Great improvement, but it may be worth to factor out this code to
> special stream like svn_stream_buffered().

When I saw this change, my first thought was, "wtf? just read the
whole damned file into memory". (I just haven't had time to look into
this to provide some actual advice beyond amazement)

Config files are NOT 100 megabytes. Read the damned things into memory
in one giant piece. I/O is reduced, memory is available. Everybody
wins.

-g

Re: svn commit: r1344347 - /subversion/trunk/subversion/libsvn_subr/config_file.c

Posted by Ivan Zhakov <iv...@visualsvn.com>.
On Wed, May 30, 2012 at 8:52 PM,  <rh...@apache.org> wrote:
> Author: rhuijben
> Date: Wed May 30 16:52:36 2012
> New Revision: 1344347
>
> URL: http://svn.apache.org/viewvc?rev=1344347&view=rev
> Log:
> Remove about 15% of the cpu overhead of svnserve and httpd while running the
> test suite by introducing a very simple parser buffer in the config file parser
> code.
>
> The nice clean stream translation code had an 7 times overhead on this very
> performance critical code path, while clients of svnserve and httpd were
> waiting. (Before this patch the this code path used 22% of the cpu time,
> now just 3%)
>
Great improvement, but it may be worth to factor out this code to
special stream like svn_stream_buffered().


-- 
Ivan Zhakov