You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@subversion.apache.org by st...@apache.org on 2012/09/08 00:59:10 UTC
svn commit: r1382204 -
/subversion/trunk/subversion/libsvn_subr/utf_validate.c
Author: stefan2
Date: Fri Sep 7 22:59:09 2012
New Revision: 1382204
URL: http://svn.apache.org/viewvc?rev=1382204&view=rev
Log:
Make clear that this is, in fact, the *UTF*_validate file.
So, we are not dealing with true ASCII representations here
but 1-byte encoded UTF-8 codepoints.
Also, support platforms with unsigned chars being default
without giving away any of the efficiency on the others.
* subversion/libsvn_subr/utf_validate.c
(first_non_ascii_char,
first_non_ascii_char_cstring): rename to ...
(first_non_fsm_start_char,
first_non_fsm_start_char_cstring): ... this; support platforms
on which chars are not signed by default
(svn_utf__last_valid,
svn_utf__cstring_is_valid,
svn_utf__is_valid,
svn_utf__last_valid2): update callers
Modified:
subversion/trunk/subversion/libsvn_subr/utf_validate.c
Modified: subversion/trunk/subversion/libsvn_subr/utf_validate.c
URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_subr/utf_validate.c?rev=1382204&r1=1382203&r2=1382204&view=diff
==============================================================================
--- subversion/trunk/subversion/libsvn_subr/utf_validate.c (original)
+++ subversion/trunk/subversion/libsvn_subr/utf_validate.c Fri Sep 7 22:59:09 2012
@@ -250,11 +250,12 @@ static const char machine [9][14] = {
FSM_ERROR}, /* 0xf5-0xff */
};
-/* Scan MAX_LEN bytes in *DATA for non-ASCII chars. Return the position
- * of the first non-ASCII char or DATA + MAX_LEN if all were ASCII.
+/* Scan MAX_LEN bytes in *DATA for chars that are not in the octet
+ * category 0 (FSM_START). Return the position of the first such char
+ * or DATA + MAX_LEN if all were cat 0.
*/
static const char *
-first_non_ascii_char(const char *data, apr_size_t max_len)
+first_non_fsm_start_char(const char *data, apr_size_t max_len)
{
#if !SVN_UNALIGNED_ACCESS_IS_OK
@@ -269,8 +270,8 @@ first_non_ascii_char(const char *data, a
max_len -= len;
for (; len > 0; ++data, --len)
- if (*data < 0)
- return data;
+ if (*data < 0 || *data >= 0x80)
+ return data;
}
#endif
@@ -283,17 +284,18 @@ first_non_ascii_char(const char *data, a
/* The remaining odd bytes will be examined the naive way: */
for (; max_len > 0; ++data, --max_len)
- if (*data < 0)
+ if (*data < 0 || *data >= 0x80)
return data;
return data;
}
-/* Scan the C string in *DATA for non-ASCII chars. Return the position
- * of either the first non-ASCII char or the terminating NUL.
+/* Scan the C string in *DATA for chars that are not in the octet
+ * category 0 (FSM_START). Return the position of either the such
+ * char or of the terminating NUL.
*/
static const char *
-first_non_ascii_char_cstring(const char *data)
+first_non_fsm_start_char_cstring(const char *data)
{
/* We need to make sure that BUF is properly aligned for chunky data
* access because we don't know the string's length. Unaligned chunk
@@ -301,7 +303,7 @@ first_non_ascii_char_cstring(const char
* segfault.
*/
for (; (apr_uintptr_t)data & (sizeof(apr_uintptr_t)-1); ++data)
- if (*data <= 0)
+ if (*data <= 0 || *data >= 0x80)
return data;
/* Scan the input one machine word at a time. */
@@ -320,7 +322,7 @@ first_non_ascii_char_cstring(const char
/* The remaining odd bytes will be examined the naive way: */
for (; ; ++data)
- if (*data <= 0)
+ if (*data <= 0 || *data >= 0x80)
return data;
return data;
@@ -329,7 +331,7 @@ first_non_ascii_char_cstring(const char
const char *
svn_utf__last_valid(const char *data, apr_size_t len)
{
- const char *start = first_non_ascii_char(data, len);
+ const char *start = first_non_fsm_start_char(data, len);
const char *end = data + len;
int state = FSM_START;
@@ -349,7 +351,7 @@ svn_boolean_t
svn_utf__cstring_is_valid(const char *data)
{
int state = FSM_START;
- data = first_non_ascii_char_cstring(data);
+ data = first_non_fsm_start_char_cstring(data);
while (*data)
{
@@ -365,7 +367,7 @@ svn_utf__is_valid(const char *data, apr_
{
const char *end = data + len;
int state = FSM_START;
- data = first_non_ascii_char(data, len);
+ data = first_non_fsm_start_char(data, len);
while (data < end)
{
@@ -379,7 +381,7 @@ svn_utf__is_valid(const char *data, apr_
const char *
svn_utf__last_valid2(const char *data, apr_size_t len)
{
- const char *start = first_non_ascii_char(data, len);
+ const char *start = first_non_fsm_start_char(data, len);
const char *end = data + len;
int state = FSM_START;
Re: svn commit: r1382204 - /subversion/trunk/subversion/libsvn_subr/utf_validate.c
Posted by Stefan Fuhrmann <st...@wandisco.com>.
On Sat, Sep 8, 2012 at 3:59 AM, Peter Samuelson <pe...@p12n.org> wrote:
>
> > for (; len > 0; ++data, --len)
> > - if (*data < 0)
> > - return data;
> > + if (*data < 0 || *data >= 0x80)
> > + return data;
>
> A reasonable compiler will collapse it anyway, but this is shorter and
> more direct:
>
> if (*data & 0x80)
>
You are right. I'm just using the same pattern
as for the "<=" variant.
-- Stefan^2.
--
*
Join us this October at Subversion Live
2012<http://www.wandisco.com/svn-live-2012>
for two days of best practice SVN training, networking, live demos,
committer meet and greet, and more! Space is limited, so get signed up
today<http://www.wandisco.com/svn-live-2012>
!
*
Re: svn commit: r1382204 - /subversion/trunk/subversion/libsvn_subr/utf_validate.c
Posted by Branko Čibej <br...@wandisco.com>.
On 08.09.2012 03:59, Peter Samuelson wrote:
>> for (; len > 0; ++data, --len)
>> - if (*data < 0)
>> - return data;
>> + if (*data < 0 || *data >= 0x80)
>> + return data;
> A reasonable compiler will collapse it anyway, but this is shorter and
> more direct:
>
> if (*data & 0x80)
Also less obvious to the casual reader. Let's just leave it to
reasonable compilers to optimize the code for us. I can live with
unreasonable compilers causing the CPU to do more grunt work. :)
-- Brane
--
Certified & Supported Apache Subversion Downloads:
http://www.wandisco.com/subversion/download
Re: svn commit: r1382204 -
/subversion/trunk/subversion/libsvn_subr/utf_validate.c
Posted by Peter Samuelson <pe...@p12n.org>.
> for (; len > 0; ++data, --len)
> - if (*data < 0)
> - return data;
> + if (*data < 0 || *data >= 0x80)
> + return data;
A reasonable compiler will collapse it anyway, but this is shorter and
more direct:
if (*data & 0x80)
Peter