You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@subversion.apache.org by ko...@apache.org on 2016/02/24 14:26:30 UTC

svn commit: r1732152 - in /subversion/trunk/subversion: include/private/svn_utf_private.h libsvn_repos/dump.c libsvn_subr/utf8proc.c svn/log-cmd.c svn/svn.c tests/libsvn_subr/utf-test.c

Author: kotkov
Date: Wed Feb 24 13:26:30 2016
New Revision: 1732152

URL: http://svn.apache.org/viewvc?rev=1732152&view=rev
Log:
Expose the ability to normalize and case fold UTF-8 strings in a new
private API, svn_utf__casefold().

That's better than having an additional flag for the svn_utf__normalize()
function.  Internally, the case folding is still implemented by passing an
extra flag to the static helper, but now we have a distinction at the API
boundary.

* subversion\include\private\svn_utf_private.h
  (svn_utf__normalize): Drop 'casefold' argument.
  (svn_utf__casefold): Declare new function.

* subversion\libsvn_subr\utf8proc.c
  (svn_utf__normalize): Adjust the call to normalize_string().
  (svn_utf__casefold): New function.  Normalize and fold the case of the
   string by issuing an appropriate call to normalize_string().

* subversion/libsvn_repos/dump.c
  (extract_mergeinfo_paths, verify_mergeinfo_normalization,
   check_name_collision): Update callers of svn_utf__normalize().

* subversion\svn\log-cmd.c
  (match): Call svn_utf__casefold().

* subversion\svn\svn.c
  (sub_main): Call svn_utf__casefold().

* subversion\tests\libsvn_subr\utf-test.c
  (test_utf_normalize): Split into separate tests for svn_utf__normalize()
   and svn_utf__casefold().
  (test_utf_casefold): New test.
  (test_funcs): Add new test.

Modified:
    subversion/trunk/subversion/include/private/svn_utf_private.h
    subversion/trunk/subversion/libsvn_repos/dump.c
    subversion/trunk/subversion/libsvn_subr/utf8proc.c
    subversion/trunk/subversion/svn/log-cmd.c
    subversion/trunk/subversion/svn/svn.c
    subversion/trunk/subversion/tests/libsvn_subr/utf-test.c

Modified: subversion/trunk/subversion/include/private/svn_utf_private.h
URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/include/private/svn_utf_private.h?rev=1732152&r1=1732151&r2=1732152&view=diff
==============================================================================
--- subversion/trunk/subversion/include/private/svn_utf_private.h (original)
+++ subversion/trunk/subversion/include/private/svn_utf_private.h Wed Feb 24 13:26:30 2016
@@ -139,9 +139,6 @@ svn_utf__normcmp(int *result,
  * null-terminated; otherwise, consider the string only up to the
  * given length.
  *
- * If CASEFOLD is non-zero, perform Unicode case folding, e.g., for
- * case-insensitive string comparison.
- *
  * Return the normalized string in *RESULT, which shares storage with
  * BUF and is valid only until the next time BUF is modified.
  *
@@ -151,9 +148,25 @@ svn_utf__normcmp(int *result,
 svn_error_t*
 svn_utf__normalize(const char **result,
                    const char *str, apr_size_t len,
-                   svn_boolean_t casefold,
                    svn_membuf_t *buf);
 
+/* Normalize the UTF-8 string STR to form C and remove case distinctions
+ * with Unicode's Default Caseless Matching algorithm. Use BUF as a
+ * temporary storage. If LEN is SVN_UTF__UNKNOWN_LENGTH, assume STR
+ * is null-terminated; otherwise, consider the string only up to the
+ * given length.
+ *
+ * Return the resulting string in *RESULT, which shares storage with
+ * BUF and is valid only until the next time BUF is modified.
+ *
+ * A returned error may indicate that STRING contains invalid UTF-8 or
+ * invalid Unicode codepoints.
+ */
+svn_error_t *
+svn_utf__casefold(const char **result,
+                  const char *str, apr_size_t len,
+                  svn_membuf_t *buf);
+
 /* Check if STRING is a valid, NFC-normalized UTF-8 string.  Note that
  * a FALSE return value may indicate that STRING is not valid UTF-8 at
  * all.

Modified: subversion/trunk/subversion/libsvn_repos/dump.c
URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_repos/dump.c?rev=1732152&r1=1732151&r2=1732152&view=diff
==============================================================================
--- subversion/trunk/subversion/libsvn_repos/dump.c (original)
+++ subversion/trunk/subversion/libsvn_repos/dump.c Wed Feb 24 13:26:30 2016
@@ -897,7 +897,7 @@ extract_mergeinfo_paths(void *baton, con
   if (xb->normalize)
     {
       const char *normkey;
-      SVN_ERR(svn_utf__normalize(&normkey, key, klen, FALSE, &xb->buffer));
+      SVN_ERR(svn_utf__normalize(&normkey, key, klen, &xb->buffer));
       svn_hash_sets(xb->result,
                     apr_pstrdup(xb->buffer.pool, normkey),
                     normalized_unique);
@@ -951,7 +951,7 @@ verify_mergeinfo_normalization(void *bat
   const char *normpath;
   const char *found;
 
-  SVN_ERR(svn_utf__normalize(&normpath, path, klen, FALSE, &vb->buffer));
+  SVN_ERR(svn_utf__normalize(&normpath, path, klen, &vb->buffer));
   found = svn_hash_gets(vb->normalized_paths, normpath);
   if (!found)
       svn_hash_sets(vb->normalized_paths,
@@ -2233,7 +2233,7 @@ check_name_collision(void *baton, const
   const char *name;
   const char *found;
 
-  SVN_ERR(svn_utf__normalize(&name, key, klen, FALSE, &cb->buffer));
+  SVN_ERR(svn_utf__normalize(&name, key, klen, &cb->buffer));
 
   found = svn_hash_gets(cb->normalized, name);
   if (!found)
@@ -2252,7 +2252,7 @@ check_name_collision(void *baton, const
 
       SVN_ERR(svn_utf__normalize(
                   &normpath, svn_relpath_join(db->path, name, iterpool),
-                  SVN_UTF__UNKNOWN_LENGTH, FALSE, &cb->buffer));
+                  SVN_UTF__UNKNOWN_LENGTH, &cb->buffer));
       notify_warning(iterpool, eb->notify_func, eb->notify_baton,
                      svn_repos_notify_warning_name_collision,
                      _("Duplicate representation of path '%s'"), normpath);

Modified: subversion/trunk/subversion/libsvn_subr/utf8proc.c
URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_subr/utf8proc.c?rev=1732152&r1=1732151&r2=1732152&view=diff
==============================================================================
--- subversion/trunk/subversion/libsvn_subr/utf8proc.c (original)
+++ subversion/trunk/subversion/libsvn_subr/utf8proc.c Wed Feb 24 13:26:30 2016
@@ -204,11 +204,21 @@ svn_utf__normcmp(int *result,
 svn_error_t*
 svn_utf__normalize(const char **result,
                    const char *str, apr_size_t len,
-                   svn_boolean_t casefold,
                    svn_membuf_t *buf)
 {
   apr_size_t result_length;
-  SVN_ERR(normalize_cstring(&result_length, str, len, casefold, buf));
+  SVN_ERR(normalize_cstring(&result_length, str, len, FALSE, buf));
+  *result = (const char*)(buf->data);
+  return SVN_NO_ERROR;
+}
+
+svn_error_t *
+svn_utf__casefold(const char **result,
+                  const char *str, apr_size_t len,
+                  svn_membuf_t *buf)
+{
+  apr_size_t result_length;
+  SVN_ERR(normalize_cstring(&result_length, str, len, TRUE, buf));
   *result = (const char*)(buf->data);
   return SVN_NO_ERROR;
 }

Modified: subversion/trunk/subversion/svn/log-cmd.c
URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/svn/log-cmd.c?rev=1732152&r1=1732151&r2=1732152&view=diff
==============================================================================
--- subversion/trunk/subversion/svn/log-cmd.c (original)
+++ subversion/trunk/subversion/svn/log-cmd.c Wed Feb 24 13:26:30 2016
@@ -119,7 +119,7 @@ match(const char *pattern, const char *s
 {
   svn_error_t *err;
 
-  err = svn_utf__normalize(&str, str, strlen(str), TRUE, buf);
+  err = svn_utf__casefold(&str, str, strlen(str), buf);
   if (err)
     {
       /* Can't match invalid data. */

Modified: subversion/trunk/subversion/svn/svn.c
URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/svn/svn.c?rev=1732152&r1=1732151&r2=1732152&view=diff
==============================================================================
--- subversion/trunk/subversion/svn/svn.c (original)
+++ subversion/trunk/subversion/svn/svn.c Wed Feb 24 13:26:30 2016
@@ -2397,16 +2397,16 @@ sub_main(int *exit_code, int argc, const
         break;
       case opt_search:
         SVN_ERR(svn_utf_cstring_to_utf8(&utf8_opt_arg, opt_arg, pool));
-        SVN_ERR(svn_utf__normalize(&utf8_opt_arg, utf8_opt_arg,
-                                   strlen(utf8_opt_arg), TRUE, &buf));
+        SVN_ERR(svn_utf__casefold(&utf8_opt_arg, utf8_opt_arg,
+                                  strlen(utf8_opt_arg), &buf));
         add_search_pattern_group(&opt_state,
                                  apr_pstrdup(pool, utf8_opt_arg),
                                  pool);
         break;
       case opt_search_and:
         SVN_ERR(svn_utf_cstring_to_utf8(&utf8_opt_arg, opt_arg, pool));
-        SVN_ERR(svn_utf__normalize(&utf8_opt_arg, utf8_opt_arg,
-                                   strlen(utf8_opt_arg), TRUE, &buf));
+        SVN_ERR(svn_utf__casefold(&utf8_opt_arg, utf8_opt_arg,
+                                  strlen(utf8_opt_arg), &buf));
         add_search_pattern_to_latest_group(&opt_state,
                                            apr_pstrdup(pool, utf8_opt_arg),
                                            pool);

Modified: subversion/trunk/subversion/tests/libsvn_subr/utf-test.c
URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/tests/libsvn_subr/utf-test.c?rev=1732152&r1=1732151&r2=1732152&view=diff
==============================================================================
--- subversion/trunk/subversion/tests/libsvn_subr/utf-test.c (original)
+++ subversion/trunk/subversion/tests/libsvn_subr/utf-test.c Wed Feb 24 13:26:30 2016
@@ -839,6 +839,80 @@ test_utf_normalize(apr_pool_t *pool)
     "\xe1\xbb\x9d"              /* o with grave and hook */
     "\xe1\xb9\x8b";             /* n with circumflex below */
 
+  /* Normalized: NFD */
+  static const char nfd[] =
+    "S\xcc\xa3\xcc\x87"         /* S with dot above and below */
+    "u\xcc\x8a"                 /* u with ring */
+    "b\xcc\xb1"                 /* b with macron below */
+    "v\xcc\x83"                 /* v with tilde */
+    "e\xcc\xa7\xcc\x86"         /* e with breve and cedilla */
+    "r\xcc\x8f"                 /* r with double grave */
+    "s\xcc\x8c"                 /* s with caron */
+    "i\xcc\x88\xcc\x81"         /* i with diaeresis and acute */
+    "o\xcc\x9b\xcc\x80"         /* o with grave and hook */
+    "n\xcc\xad";                /* n with circumflex below */
+
+  /* Mixed, denormalized */
+  static const char mixup[] =
+    "S\xcc\x87\xcc\xa3"         /* S with dot above and below */
+    "\xc5\xaf"                  /* u with ring */
+    "b\xcc\xb1"                 /* b with macron below */
+    "\xe1\xb9\xbd"              /* v with tilde */
+    "e\xcc\xa7\xcc\x86"         /* e with breve and cedilla */
+    "\xc8\x91"                  /* r with double grave */
+    "s\xcc\x8c"                 /* s with caron */
+    "\xe1\xb8\xaf"              /* i with diaeresis and acute */
+    "o\xcc\x80\xcc\x9b"         /* o with grave and hook */
+    "\xe1\xb9\x8b";             /* n with circumflex below */
+
+  /* Invalid UTF-8 */
+  static const char invalid[] =
+    "\xe1\xb9\xa8"              /* S with dot above and below */
+    "\xc5\xaf"                  /* u with ring */
+    "\xe1\xb8\x87"              /* b with macron below */
+    "\xe1\xb9\xbd"              /* v with tilde */
+    "\xe1\xb8\x9d"              /* e with breve and cedilla */
+    "\xc8\x91"                  /* r with double grave */
+    "\xc5\xa1"                  /* s with caron */
+    "\xe1\xb8\xaf"              /* i with diaeresis and acute */
+    "\xe6"                      /* Invalid byte */
+    "\xe1\xb9\x8b";             /* n with circumflex below */
+
+  const char *result;
+  svn_membuf_t buf;
+
+  svn_membuf__create(&buf, 0, pool);
+  SVN_ERR(svn_utf__normalize(&result, nfc, strlen(nfc), &buf));
+  SVN_TEST_STRING_ASSERT(result, nfc);
+  SVN_ERR(svn_utf__normalize(&result, nfd, strlen(nfd), &buf));
+  SVN_TEST_STRING_ASSERT(result, nfc);
+  SVN_ERR(svn_utf__normalize(&result, mixup, strlen(mixup), &buf));
+  SVN_TEST_STRING_ASSERT(result, nfc);
+
+  SVN_TEST_ASSERT_ERROR(svn_utf__normalize(&result, invalid, strlen(invalid),
+                                           &buf),
+                        SVN_ERR_UTF8PROC_ERROR);
+
+  return SVN_NO_ERROR;
+}
+
+
+static svn_error_t *
+test_utf_casefold(apr_pool_t *pool)
+{
+  /* Normalized: NFC */
+  static const char nfc[] =
+    "\xe1\xb9\xa8"              /* S with dot above and below */
+    "\xc5\xaf"                  /* u with ring */
+    "\xe1\xb8\x87"              /* b with macron below */
+    "\xe1\xb9\xbd"              /* v with tilde */
+    "\xe1\xb8\x9d"              /* e with breve and cedilla */
+    "\xc8\x91"                  /* r with double grave */
+    "\xc5\xa1"                  /* s with caron */
+    "\xe1\xb8\xaf"              /* i with diaeresis and acute */
+    "\xe1\xbb\x9d"              /* o with grave and hook */
+    "\xe1\xb9\x8b";             /* n with circumflex below */
+
   /* Normalized: NFC, case folded */
   static const char nfc_casefold[] =
     "\xe1\xb9\xa9"              /* s with dot above and below */
@@ -895,20 +969,15 @@ test_utf_normalize(apr_pool_t *pool)
   svn_membuf_t buf;
 
   svn_membuf__create(&buf, 0, pool);
-  SVN_ERR(svn_utf__normalize(&result, nfd, strlen(nfd), FALSE, &buf));
-  SVN_TEST_STRING_ASSERT(result, nfc);
-  SVN_ERR(svn_utf__normalize(&result, nfd, strlen(nfd), TRUE, &buf));
+  SVN_ERR(svn_utf__casefold(&result, nfc, strlen(nfc), &buf));
   SVN_TEST_STRING_ASSERT(result, nfc_casefold);
-  SVN_ERR(svn_utf__normalize(&result, mixup, strlen(mixup), FALSE, &buf));
-  SVN_TEST_STRING_ASSERT(result, nfc);
-  SVN_ERR(svn_utf__normalize(&result, mixup, strlen(mixup), TRUE, &buf));
+  SVN_ERR(svn_utf__casefold(&result, nfd, strlen(nfd), &buf));
+  SVN_TEST_STRING_ASSERT(result, nfc_casefold);
+  SVN_ERR(svn_utf__casefold(&result, mixup, strlen(mixup), &buf));
   SVN_TEST_STRING_ASSERT(result, nfc_casefold);
 
-  SVN_TEST_ASSERT_ERROR(svn_utf__normalize(&result, invalid, strlen(invalid),
-                                           FALSE, &buf),
-                        SVN_ERR_UTF8PROC_ERROR);
-  SVN_TEST_ASSERT_ERROR(svn_utf__normalize(&result, invalid, strlen(invalid),
-                                           TRUE, &buf),
+  SVN_TEST_ASSERT_ERROR(svn_utf__casefold(&result, invalid, strlen(invalid),
+                                          &buf),
                         SVN_ERR_UTF8PROC_ERROR);
 
   return SVN_NO_ERROR;
@@ -942,6 +1011,8 @@ static struct svn_test_descriptor_t test
                    "test svn_utf__utf{16,32}_to_utf8"),
     SVN_TEST_PASS2(test_utf_normalize,
                    "test svn_utf__normalize"),
+    SVN_TEST_PASS2(test_utf_casefold,
+                   "test svn_utf__casefold"),
     SVN_TEST_NULL
   };