You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@subversion.apache.org by ko...@apache.org on 2016/02/24 14:26:30 UTC
svn commit: r1732152 - in /subversion/trunk/subversion:
include/private/svn_utf_private.h libsvn_repos/dump.c
libsvn_subr/utf8proc.c svn/log-cmd.c svn/svn.c tests/libsvn_subr/utf-test.c
Author: kotkov
Date: Wed Feb 24 13:26:30 2016
New Revision: 1732152
URL: http://svn.apache.org/viewvc?rev=1732152&view=rev
Log:
Expose the ability to normalize and case fold UTF-8 strings in a new
private API, svn_utf__casefold().
That's better than having an additional flag for the svn_utf__normalize()
function. Internally, the case folding is still implemented by passing an
extra flag to the static helper, but now we have a distinction at the API
boundary.
* subversion\include\private\svn_utf_private.h
(svn_utf__normalize): Drop 'casefold' argument.
(svn_utf__casefold): Declare new function.
* subversion\libsvn_subr\utf8proc.c
(svn_utf__normalize): Adjust the call to normalize_string().
(svn_utf__casefold): New function. Normalize and fold the case of the
string by issuing an appropriate call to normalize_string().
* subversion/libsvn_repos/dump.c
(extract_mergeinfo_paths, verify_mergeinfo_normalization,
check_name_collision): Update callers of svn_utf__normalize().
* subversion\svn\log-cmd.c
(match): Call svn_utf__casefold().
* subversion\svn\svn.c
(sub_main): Call svn_utf__casefold().
* subversion\tests\libsvn_subr\utf-test.c
(test_utf_normalize): Split into separate tests for svn_utf__normalize()
and svn_utf__casefold().
(test_utf_casefold): New test.
(test_funcs): Add new test.
Modified:
subversion/trunk/subversion/include/private/svn_utf_private.h
subversion/trunk/subversion/libsvn_repos/dump.c
subversion/trunk/subversion/libsvn_subr/utf8proc.c
subversion/trunk/subversion/svn/log-cmd.c
subversion/trunk/subversion/svn/svn.c
subversion/trunk/subversion/tests/libsvn_subr/utf-test.c
Modified: subversion/trunk/subversion/include/private/svn_utf_private.h
URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/include/private/svn_utf_private.h?rev=1732152&r1=1732151&r2=1732152&view=diff
==============================================================================
--- subversion/trunk/subversion/include/private/svn_utf_private.h (original)
+++ subversion/trunk/subversion/include/private/svn_utf_private.h Wed Feb 24 13:26:30 2016
@@ -139,9 +139,6 @@ svn_utf__normcmp(int *result,
* null-terminated; otherwise, consider the string only up to the
* given length.
*
- * If CASEFOLD is non-zero, perform Unicode case folding, e.g., for
- * case-insensitive string comparison.
- *
* Return the normalized string in *RESULT, which shares storage with
* BUF and is valid only until the next time BUF is modified.
*
@@ -151,9 +148,25 @@ svn_utf__normcmp(int *result,
svn_error_t*
svn_utf__normalize(const char **result,
const char *str, apr_size_t len,
- svn_boolean_t casefold,
svn_membuf_t *buf);
+/* Normalize the UTF-8 string STR to form C and remove case distinctions
+ * with Unicode's Default Caseless Matching algorithm. Use BUF as a
+ * temporary storage. If LEN is SVN_UTF__UNKNOWN_LENGTH, assume STR
+ * is null-terminated; otherwise, consider the string only up to the
+ * given length.
+ *
+ * Return the resulting string in *RESULT, which shares storage with
+ * BUF and is valid only until the next time BUF is modified.
+ *
+ * A returned error may indicate that STRING contains invalid UTF-8 or
+ * invalid Unicode codepoints.
+ */
+svn_error_t *
+svn_utf__casefold(const char **result,
+ const char *str, apr_size_t len,
+ svn_membuf_t *buf);
+
/* Check if STRING is a valid, NFC-normalized UTF-8 string. Note that
* a FALSE return value may indicate that STRING is not valid UTF-8 at
* all.
Modified: subversion/trunk/subversion/libsvn_repos/dump.c
URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_repos/dump.c?rev=1732152&r1=1732151&r2=1732152&view=diff
==============================================================================
--- subversion/trunk/subversion/libsvn_repos/dump.c (original)
+++ subversion/trunk/subversion/libsvn_repos/dump.c Wed Feb 24 13:26:30 2016
@@ -897,7 +897,7 @@ extract_mergeinfo_paths(void *baton, con
if (xb->normalize)
{
const char *normkey;
- SVN_ERR(svn_utf__normalize(&normkey, key, klen, FALSE, &xb->buffer));
+ SVN_ERR(svn_utf__normalize(&normkey, key, klen, &xb->buffer));
svn_hash_sets(xb->result,
apr_pstrdup(xb->buffer.pool, normkey),
normalized_unique);
@@ -951,7 +951,7 @@ verify_mergeinfo_normalization(void *bat
const char *normpath;
const char *found;
- SVN_ERR(svn_utf__normalize(&normpath, path, klen, FALSE, &vb->buffer));
+ SVN_ERR(svn_utf__normalize(&normpath, path, klen, &vb->buffer));
found = svn_hash_gets(vb->normalized_paths, normpath);
if (!found)
svn_hash_sets(vb->normalized_paths,
@@ -2233,7 +2233,7 @@ check_name_collision(void *baton, const
const char *name;
const char *found;
- SVN_ERR(svn_utf__normalize(&name, key, klen, FALSE, &cb->buffer));
+ SVN_ERR(svn_utf__normalize(&name, key, klen, &cb->buffer));
found = svn_hash_gets(cb->normalized, name);
if (!found)
@@ -2252,7 +2252,7 @@ check_name_collision(void *baton, const
SVN_ERR(svn_utf__normalize(
&normpath, svn_relpath_join(db->path, name, iterpool),
- SVN_UTF__UNKNOWN_LENGTH, FALSE, &cb->buffer));
+ SVN_UTF__UNKNOWN_LENGTH, &cb->buffer));
notify_warning(iterpool, eb->notify_func, eb->notify_baton,
svn_repos_notify_warning_name_collision,
_("Duplicate representation of path '%s'"), normpath);
Modified: subversion/trunk/subversion/libsvn_subr/utf8proc.c
URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_subr/utf8proc.c?rev=1732152&r1=1732151&r2=1732152&view=diff
==============================================================================
--- subversion/trunk/subversion/libsvn_subr/utf8proc.c (original)
+++ subversion/trunk/subversion/libsvn_subr/utf8proc.c Wed Feb 24 13:26:30 2016
@@ -204,11 +204,21 @@ svn_utf__normcmp(int *result,
svn_error_t*
svn_utf__normalize(const char **result,
const char *str, apr_size_t len,
- svn_boolean_t casefold,
svn_membuf_t *buf)
{
apr_size_t result_length;
- SVN_ERR(normalize_cstring(&result_length, str, len, casefold, buf));
+ SVN_ERR(normalize_cstring(&result_length, str, len, FALSE, buf));
+ *result = (const char*)(buf->data);
+ return SVN_NO_ERROR;
+}
+
+svn_error_t *
+svn_utf__casefold(const char **result,
+ const char *str, apr_size_t len,
+ svn_membuf_t *buf)
+{
+ apr_size_t result_length;
+ SVN_ERR(normalize_cstring(&result_length, str, len, TRUE, buf));
*result = (const char*)(buf->data);
return SVN_NO_ERROR;
}
Modified: subversion/trunk/subversion/svn/log-cmd.c
URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/svn/log-cmd.c?rev=1732152&r1=1732151&r2=1732152&view=diff
==============================================================================
--- subversion/trunk/subversion/svn/log-cmd.c (original)
+++ subversion/trunk/subversion/svn/log-cmd.c Wed Feb 24 13:26:30 2016
@@ -119,7 +119,7 @@ match(const char *pattern, const char *s
{
svn_error_t *err;
- err = svn_utf__normalize(&str, str, strlen(str), TRUE, buf);
+ err = svn_utf__casefold(&str, str, strlen(str), buf);
if (err)
{
/* Can't match invalid data. */
Modified: subversion/trunk/subversion/svn/svn.c
URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/svn/svn.c?rev=1732152&r1=1732151&r2=1732152&view=diff
==============================================================================
--- subversion/trunk/subversion/svn/svn.c (original)
+++ subversion/trunk/subversion/svn/svn.c Wed Feb 24 13:26:30 2016
@@ -2397,16 +2397,16 @@ sub_main(int *exit_code, int argc, const
break;
case opt_search:
SVN_ERR(svn_utf_cstring_to_utf8(&utf8_opt_arg, opt_arg, pool));
- SVN_ERR(svn_utf__normalize(&utf8_opt_arg, utf8_opt_arg,
- strlen(utf8_opt_arg), TRUE, &buf));
+ SVN_ERR(svn_utf__casefold(&utf8_opt_arg, utf8_opt_arg,
+ strlen(utf8_opt_arg), &buf));
add_search_pattern_group(&opt_state,
apr_pstrdup(pool, utf8_opt_arg),
pool);
break;
case opt_search_and:
SVN_ERR(svn_utf_cstring_to_utf8(&utf8_opt_arg, opt_arg, pool));
- SVN_ERR(svn_utf__normalize(&utf8_opt_arg, utf8_opt_arg,
- strlen(utf8_opt_arg), TRUE, &buf));
+ SVN_ERR(svn_utf__casefold(&utf8_opt_arg, utf8_opt_arg,
+ strlen(utf8_opt_arg), &buf));
add_search_pattern_to_latest_group(&opt_state,
apr_pstrdup(pool, utf8_opt_arg),
pool);
Modified: subversion/trunk/subversion/tests/libsvn_subr/utf-test.c
URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/tests/libsvn_subr/utf-test.c?rev=1732152&r1=1732151&r2=1732152&view=diff
==============================================================================
--- subversion/trunk/subversion/tests/libsvn_subr/utf-test.c (original)
+++ subversion/trunk/subversion/tests/libsvn_subr/utf-test.c Wed Feb 24 13:26:30 2016
@@ -839,6 +839,80 @@ test_utf_normalize(apr_pool_t *pool)
"\xe1\xbb\x9d" /* o with grave and hook */
"\xe1\xb9\x8b"; /* n with circumflex below */
+ /* Normalized: NFD */
+ static const char nfd[] =
+ "S\xcc\xa3\xcc\x87" /* S with dot above and below */
+ "u\xcc\x8a" /* u with ring */
+ "b\xcc\xb1" /* b with macron below */
+ "v\xcc\x83" /* v with tilde */
+ "e\xcc\xa7\xcc\x86" /* e with breve and cedilla */
+ "r\xcc\x8f" /* r with double grave */
+ "s\xcc\x8c" /* s with caron */
+ "i\xcc\x88\xcc\x81" /* i with diaeresis and acute */
+ "o\xcc\x9b\xcc\x80" /* o with grave and hook */
+ "n\xcc\xad"; /* n with circumflex below */
+
+ /* Mixed, denormalized */
+ static const char mixup[] =
+ "S\xcc\x87\xcc\xa3" /* S with dot above and below */
+ "\xc5\xaf" /* u with ring */
+ "b\xcc\xb1" /* b with macron below */
+ "\xe1\xb9\xbd" /* v with tilde */
+ "e\xcc\xa7\xcc\x86" /* e with breve and cedilla */
+ "\xc8\x91" /* r with double grave */
+ "s\xcc\x8c" /* s with caron */
+ "\xe1\xb8\xaf" /* i with diaeresis and acute */
+ "o\xcc\x80\xcc\x9b" /* o with grave and hook */
+ "\xe1\xb9\x8b"; /* n with circumflex below */
+
+ /* Invalid UTF-8 */
+ static const char invalid[] =
+ "\xe1\xb9\xa8" /* S with dot above and below */
+ "\xc5\xaf" /* u with ring */
+ "\xe1\xb8\x87" /* b with macron below */
+ "\xe1\xb9\xbd" /* v with tilde */
+ "\xe1\xb8\x9d" /* e with breve and cedilla */
+ "\xc8\x91" /* r with double grave */
+ "\xc5\xa1" /* s with caron */
+ "\xe1\xb8\xaf" /* i with diaeresis and acute */
+ "\xe6" /* Invalid byte */
+ "\xe1\xb9\x8b"; /* n with circumflex below */
+
+ const char *result;
+ svn_membuf_t buf;
+
+ svn_membuf__create(&buf, 0, pool);
+ SVN_ERR(svn_utf__normalize(&result, nfc, strlen(nfc), &buf));
+ SVN_TEST_STRING_ASSERT(result, nfc);
+ SVN_ERR(svn_utf__normalize(&result, nfd, strlen(nfd), &buf));
+ SVN_TEST_STRING_ASSERT(result, nfc);
+ SVN_ERR(svn_utf__normalize(&result, mixup, strlen(mixup), &buf));
+ SVN_TEST_STRING_ASSERT(result, nfc);
+
+ SVN_TEST_ASSERT_ERROR(svn_utf__normalize(&result, invalid, strlen(invalid),
+ &buf),
+ SVN_ERR_UTF8PROC_ERROR);
+
+ return SVN_NO_ERROR;
+}
+
+
+static svn_error_t *
+test_utf_casefold(apr_pool_t *pool)
+{
+ /* Normalized: NFC */
+ static const char nfc[] =
+ "\xe1\xb9\xa8" /* S with dot above and below */
+ "\xc5\xaf" /* u with ring */
+ "\xe1\xb8\x87" /* b with macron below */
+ "\xe1\xb9\xbd" /* v with tilde */
+ "\xe1\xb8\x9d" /* e with breve and cedilla */
+ "\xc8\x91" /* r with double grave */
+ "\xc5\xa1" /* s with caron */
+ "\xe1\xb8\xaf" /* i with diaeresis and acute */
+ "\xe1\xbb\x9d" /* o with grave and hook */
+ "\xe1\xb9\x8b"; /* n with circumflex below */
+
/* Normalized: NFC, case folded */
static const char nfc_casefold[] =
"\xe1\xb9\xa9" /* s with dot above and below */
@@ -895,20 +969,15 @@ test_utf_normalize(apr_pool_t *pool)
svn_membuf_t buf;
svn_membuf__create(&buf, 0, pool);
- SVN_ERR(svn_utf__normalize(&result, nfd, strlen(nfd), FALSE, &buf));
- SVN_TEST_STRING_ASSERT(result, nfc);
- SVN_ERR(svn_utf__normalize(&result, nfd, strlen(nfd), TRUE, &buf));
+ SVN_ERR(svn_utf__casefold(&result, nfc, strlen(nfc), &buf));
SVN_TEST_STRING_ASSERT(result, nfc_casefold);
- SVN_ERR(svn_utf__normalize(&result, mixup, strlen(mixup), FALSE, &buf));
- SVN_TEST_STRING_ASSERT(result, nfc);
- SVN_ERR(svn_utf__normalize(&result, mixup, strlen(mixup), TRUE, &buf));
+ SVN_ERR(svn_utf__casefold(&result, nfd, strlen(nfd), &buf));
+ SVN_TEST_STRING_ASSERT(result, nfc_casefold);
+ SVN_ERR(svn_utf__casefold(&result, mixup, strlen(mixup), &buf));
SVN_TEST_STRING_ASSERT(result, nfc_casefold);
- SVN_TEST_ASSERT_ERROR(svn_utf__normalize(&result, invalid, strlen(invalid),
- FALSE, &buf),
- SVN_ERR_UTF8PROC_ERROR);
- SVN_TEST_ASSERT_ERROR(svn_utf__normalize(&result, invalid, strlen(invalid),
- TRUE, &buf),
+ SVN_TEST_ASSERT_ERROR(svn_utf__casefold(&result, invalid, strlen(invalid),
+ &buf),
SVN_ERR_UTF8PROC_ERROR);
return SVN_NO_ERROR;
@@ -942,6 +1011,8 @@ static struct svn_test_descriptor_t test
"test svn_utf__utf{16,32}_to_utf8"),
SVN_TEST_PASS2(test_utf_normalize,
"test svn_utf__normalize"),
+ SVN_TEST_PASS2(test_utf_casefold,
+ "test svn_utf__casefold"),
SVN_TEST_NULL
};