You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@subversion.apache.org by Daniel Shahaf <da...@elego.de> on 2012/01/06 19:06:21 UTC
Re: svn commit: r1224647 - in /subversion/trunk/subversion:
include/svn_string.h libsvn_fs_base/id.c libsvn_fs_fs/fs_fs.c
libsvn_fs_fs/id.c libsvn_fs_fs/tree.c libsvn_ra_neon/session.c
libsvn_ra_serf/serf.c libsvn_ra_serf/util.c libsvn_subr/svn_string.c
Stefan,
stefan2@apache.org wrote on Sun, Dec 25, 2011 at 21:40:37 -0000:
> Author: stefan2
> Date: Sun Dec 25 21:40:37 2011
> New Revision: 1224647
>
> URL: http://svn.apache.org/viewvc?rev=1224647&view=rev
> Log:
> Improve parsing speed of IDs and other structures by introducing
> a wrapper around apr_strtok(). Since the latter has abysmal
> performance if the number of separators is small, the new wrapper
> uses its own implementation for the frequent case that there is
> exactly one separator.
>
> Replace calls to apr_strtok with calls to the new function if there
> is the separator string may contain just one char (not always known
> for pass-through parameters).
>
...
> Modified: subversion/trunk/subversion/libsvn_ra_neon/session.c
> URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_ra_neon/session.c?rev=1224647&r1=1224646&r2=1224647&view=diff
> ==============================================================================
> --- subversion/trunk/subversion/libsvn_ra_neon/session.c (original)
> +++ subversion/trunk/subversion/libsvn_ra_neon/session.c Sun Dec 25 21:40:37 2011
> @@ -590,10 +590,10 @@ static svn_error_t *get_server_settings(
> #ifdef SVN_NEON_0_26
> if (http_auth_types)
> {
> - char *token, *last;
> + char *token;
> char *auth_types_list = apr_palloc(pool, strlen(http_auth_types) + 1);
> apr_collapse_spaces(auth_types_list, http_auth_types);
> - while ((token = apr_strtok(auth_types_list, ";", &last)) != NULL)
> + while ((token = svn_cstring_tokenize(";", &auth_types_list)) != NULL)
> {
> auth_types_list = NULL;
> if (svn_cstring_casecmp("basic", token) == 0)
> @@ -985,10 +985,10 @@ svn_ra_neon__open(svn_ra_session_t *sess
>
> if (authorities != NULL)
> {
> - char *files, *file, *last;
> + char *files, *file;
> files = apr_pstrdup(pool, authorities);
>
> - while ((file = apr_strtok(files, ";", &last)) != NULL)
> + while ((file = svn_cstring_tokenize(";", &files)) != NULL)
> {
> ne_ssl_certificate *ca_cert;
> files = NULL;
This code will segfault on the second iteration of the loop.
Philip fixed this instance in r1228310. Please fix the remaining
instances in the other callers to svn_cstring_tokenize().
Thanks,
Daniel
>
> Modified: subversion/trunk/subversion/libsvn_ra_serf/serf.c
> URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_ra_serf/serf.c?rev=1224647&r1=1224646&r2=1224647&view=diff
> ==============================================================================
> --- subversion/trunk/subversion/libsvn_ra_serf/serf.c (original)
> +++ subversion/trunk/subversion/libsvn_ra_serf/serf.c Sun Dec 25 21:40:37 2011
> @@ -103,10 +103,10 @@ load_http_auth_types(apr_pool_t *pool, s
>
> if (http_auth_types)
> {
> - char *token, *last;
> + char *token;
> char *auth_types_list = apr_palloc(pool, strlen(http_auth_types) + 1);
> apr_collapse_spaces(auth_types_list, http_auth_types);
> - while ((token = apr_strtok(auth_types_list, ";", &last)) != NULL)
> + while ((token = svn_cstring_tokenize(";", &auth_types_list)) != NULL)
> {
> auth_types_list = NULL;
> if (svn_cstring_casecmp("basic", token) == 0)
>
> Modified: subversion/trunk/subversion/libsvn_ra_serf/util.c
> URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_ra_serf/util.c?rev=1224647&r1=1224646&r2=1224647&view=diff
> ==============================================================================
> --- subversion/trunk/subversion/libsvn_ra_serf/util.c (original)
> +++ subversion/trunk/subversion/libsvn_ra_serf/util.c Sun Dec 25 21:40:37 2011
> @@ -383,10 +383,10 @@ static svn_error_t *
> load_authorities(svn_ra_serf__connection_t *conn, const char *authorities,
> apr_pool_t *pool)
> {
> - char *files, *file, *last;
> + char *files, *file;
> files = apr_pstrdup(pool, authorities);
>
> - while ((file = apr_strtok(files, ";", &last)) != NULL)
> + while ((file = svn_cstring_tokenize(";", &files)) != NULL)
> {
> serf_ssl_certificate_t *ca_cert;
> apr_status_t status = serf_ssl_load_cert_file(&ca_cert, file, pool);
>
> Modified: subversion/trunk/subversion/libsvn_subr/svn_string.c
> URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_subr/svn_string.c?rev=1224647&r1=1224646&r2=1224647&view=diff
> ==============================================================================
> --- subversion/trunk/subversion/libsvn_subr/svn_string.c (original)
> +++ subversion/trunk/subversion/libsvn_subr/svn_string.c Sun Dec 25 21:40:37 2011
> @@ -642,12 +642,11 @@ svn_cstring_split_append(apr_array_heade
> svn_boolean_t chop_whitespace,
> apr_pool_t *pool)
> {
> - char *last;
> char *pats;
> char *p;
>
> pats = apr_pstrdup(pool, input); /* strtok wants non-const data */
> - p = apr_strtok(pats, sep_chars, &last);
> + p = svn_cstring_tokenize(sep_chars, &pats);
>
> while (p)
> {
> @@ -667,7 +666,7 @@ svn_cstring_split_append(apr_array_heade
> if (p[0] != '\0')
> APR_ARRAY_PUSH(array, const char *) = p;
>
> - p = apr_strtok(NULL, sep_chars, &last);
> + p = svn_cstring_tokenize(sep_chars, &pats);
> }
>
> return;
> @@ -718,6 +717,43 @@ svn_cstring_match_list(const char *str,
> return FALSE;
> }
>
> +char *
> +svn_cstring_tokenize(const char *sep, char **str)
> +{
> + char *token;
> + const char * next;
> + char csep;
> +
> + /* let APR handle edge cases and multiple separators */
> + csep = *sep;
> + if (csep == '\0' || sep[2] != '\0')
> + return apr_strtok(NULL, sep, str);
> +
> + /* skip characters in sep (will terminate at '\0') */
> + token = *str;
> + while (*token == csep)
> + ++token;
> +
> + if (!*token) /* no more tokens */
> + return NULL;
> +
> + /* skip valid token characters to terminate token and
> + * prepare for the next call (will terminate at '\0)
> + */
> + next = strchr(token, csep);
> + if (next == NULL)
> + {
> + *str = token + strlen(token);
> + }
> + else
> + {
> + *(char *)next = '\0';
> + *str = (char *)next + 1;
> + }
> +
> + return token;
> +}
> +
> int svn_cstring_count_newlines(const char *msg)
> {
> int count = 0;
>
>
Re: svn commit: r1224647 - in /subversion/trunk/subversion: include/svn_string.h
libsvn_fs_base/id.c libsvn_fs_fs/fs_fs.c libsvn_fs_fs/id.c libsvn_fs_fs/tree.c
libsvn_ra_neon/session.c libsvn_ra_serf/serf.c libsvn_ra_serf/util.c libsvn_subr/svn_string.c
Posted by Stefan Fuhrmann <st...@alice-dsl.de>.
On 06.01.2012 19:06, Daniel Shahaf wrote:
> Stefan,
>
> stefan2@apache.org wrote on Sun, Dec 25, 2011 at 21:40:37 -0000:
>> Author: stefan2
>> Date: Sun Dec 25 21:40:37 2011
>> New Revision: 1224647
>>
>> URL: http://svn.apache.org/viewvc?rev=1224647&view=rev
>> Log:
>> Improve parsing speed of IDs and other structures by introducing
>> a wrapper around apr_strtok(). Since the latter has abysmal
>> performance if the number of separators is small, the new wrapper
>> uses its own implementation for the frequent case that there is
>> exactly one separator.
>>
>> Replace calls to apr_strtok with calls to the new function if there
>> is the separator string may contain just one char (not always known
>> for pass-through parameters).
>>
> ...
>> Modified: subversion/trunk/subversion/libsvn_ra_neon/session.c
>> URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_ra_neon/session.c?rev=1224647&r1=1224646&r2=1224647&view=diff
>> ==============================================================================
>> --- subversion/trunk/subversion/libsvn_ra_neon/session.c (original)
>> +++ subversion/trunk/subversion/libsvn_ra_neon/session.c Sun Dec 25 21:40:37 2011
>> @@ -590,10 +590,10 @@ static svn_error_t *get_server_settings(
>> #ifdef SVN_NEON_0_26
>> if (http_auth_types)
>> {
>> - char *token, *last;
>> + char *token;
>> char *auth_types_list = apr_palloc(pool, strlen(http_auth_types) + 1);
>> apr_collapse_spaces(auth_types_list, http_auth_types);
>> - while ((token = apr_strtok(auth_types_list, ";",&last)) != NULL)
>> + while ((token = svn_cstring_tokenize(";",&auth_types_list)) != NULL)
>> {
>> auth_types_list = NULL;
>> if (svn_cstring_casecmp("basic", token) == 0)
>> @@ -985,10 +985,10 @@ svn_ra_neon__open(svn_ra_session_t *sess
>>
>> if (authorities != NULL)
>> {
>> - char *files, *file, *last;
>> + char *files, *file;
>> files = apr_pstrdup(pool, authorities);
>>
>> - while ((file = apr_strtok(files, ";",&last)) != NULL)
>> + while ((file = svn_cstring_tokenize(";",&files)) != NULL)
>> {
>> ne_ssl_certificate *ca_cert;
>> files = NULL;
> This code will segfault on the second iteration of the loop.
>
> Philip fixed this instance in r1228310. Please fix the remaining
> instances in the other callers to svn_cstring_tokenize().
Done in r1228602.
-- Stefan^2.
Re: svn commit: r1224647 - in /subversion/trunk/subversion: include/svn_string.h libsvn_fs_base/id.c libsvn_fs_fs/fs_fs.c libsvn_fs_fs/id.c libsvn_fs_fs/tree.c libsvn_ra_neon/session.c libsvn_ra_serf/serf.c libsvn_ra_serf/util.c libsvn_subr/svn_string.c
Posted by Julian Foad <ju...@btopenworld.com>.
Travis wrote:
> stefan2@apache.org wrote on Sun, Dec 25, 2011 at 21:40:37 -0000:
>> + if (csep == '\0' || sep[2] != '\0')
>> + return apr_strtok(NULL, sep, str);
>
> Entirely possible that I'm not understanding, but shouldn't that be
>
> sep[1] != '\0'
Yes; that's already been fixed in r1224653.
- Julian
Re: svn commit: r1224647 - in /subversion/trunk/subversion: include/svn_string.h libsvn_fs_base/id.c libsvn_fs_fs/fs_fs.c libsvn_fs_fs/id.c libsvn_fs_fs/tree.c libsvn_ra_neon/session.c libsvn_ra_serf/serf.c libsvn_ra_serf/util.c libsvn_subr/svn_string.c
Posted by Travis <sv...@castle.fastmail.fm>.
stefan2@apache.org wrote on Sun, Dec 25, 2011 at 21:40:37 -0000:
> Author: stefan2
> Date: Sun Dec 25 21:40:37 2011
> New Revision: 1224647
>
> URL: http://svn.apache.org/viewvc?rev=1224647&view=rev
> Log:
> Improve parsing speed of IDs and other structures by introducing
> a wrapper around apr_strtok(). Since the latter has abysmal
> performance if the number of separators is small, the new wrapper
> uses its own implementation for the frequent case that there is
> exactly one separator.
>
> Replace calls to apr_strtok with calls to the new function if there
> is the separator string may contain just one char (not always known
> for pass-through parameters).
>
...
> +char *
> +svn_cstring_tokenize(const char *sep, char **str)
> +{
> + char *token;
> + const char * next;
> + char csep;
> +
> + /* let APR handle edge cases and multiple separators */
> + csep = *sep;
> + if (csep == '\0' || sep[2] != '\0')
> + return apr_strtok(NULL, sep, str);
> +
Entirely possible that I'm not understanding, but shouldn't that be
sep[1] != '\0'
if you're trying to pass on the case where sep is > 1 character to
apr_strtok()?
In the exactly-1-char case:
sep[0] = <the char of interest>
sep[1] = 0
sep[2] = <unknown value>
-Travis