You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@subversion.apache.org by br...@apache.org on 2012/11/09 15:04:43 UTC

svn commit: r1407461 - in /subversion/branches/wc-collate-path/subversion: include/private/svn_utf_private.h libsvn_subr/sqlite.c libsvn_subr/utf8proc.c tests/libsvn_subr/utf-test.c

Author: brane
Date: Fri Nov  9 14:04:42 2012
New Revision: 1407461

URL: http://svn.apache.org/viewvc?rev=1407461&view=rev
Log:
On the wc-collate-path branch: Move comparators to their proper module, etc.

* subversion/include/private/svn_utf_private.h: Include svn_string.h.
  (svn_utf__encode_ucs4_to_stringbuf,
  svn_utf__normcmp, svn_utf__glob): New prototypes.
* subversion/libsvn_subr/utf8proc.c: Include svn_string_private.h.
  (svn_utf__encode_ucs4_to_stringbuf): Implement.
  (decompose_normcmp_arg): New; helper for ...
  (svn_utf__normcmp): Refactor implementation from sqlite.c.
   Use stringbufs for decomposition buffers, which is a bit of an abuse,
   but better than managing a new type of self-aware pool-based buffer.
   Incidentally fix a bug wehere we stupidly assumed we could shortcut
   the empty-string case.
  (svn_utf__glob): Stub implmentation.

* subversion/libsvn_subr/sqlite.c: Ignore more warnings from amalgamated SQLite.
  (svn_sqlite__db_t): Use stringbufs for SQLite extension buffers.
  (collate_ucs_nfd): Rewrite; use svn_utf__normcmp.

* subversion/tests/libsvn_subr/utf-test.c
  (normalized_compare, test_utf_collated_compare): Update.

Modified:
    subversion/branches/wc-collate-path/subversion/include/private/svn_utf_private.h
    subversion/branches/wc-collate-path/subversion/libsvn_subr/sqlite.c
    subversion/branches/wc-collate-path/subversion/libsvn_subr/utf8proc.c
    subversion/branches/wc-collate-path/subversion/tests/libsvn_subr/utf-test.c

Modified: subversion/branches/wc-collate-path/subversion/include/private/svn_utf_private.h
URL: http://svn.apache.org/viewvc/subversion/branches/wc-collate-path/subversion/include/private/svn_utf_private.h?rev=1407461&r1=1407460&r2=1407461&view=diff
==============================================================================
--- subversion/branches/wc-collate-path/subversion/include/private/svn_utf_private.h (original)
+++ subversion/branches/wc-collate-path/subversion/include/private/svn_utf_private.h Fri Nov  9 14:04:42 2012
@@ -31,6 +31,7 @@
 #include <apr_pools.h>
 
 #include "svn_types.h"
+#include "svn_string.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -105,6 +106,40 @@ svn_utf__ucs4cmp(const apr_int32_t *bufa
                  const apr_int32_t *bufb, apr_size_t lenb);
 
 
+/* Decode a single UCS-4 code point to UTF-8, appending the result to BUF. */
+svn_error_t *
+svn_utf__encode_ucs4_to_stringbuf(apr_int32_t ucs4, svn_stringbuf_t *buf);
+
+
+/* Compare two UTF-8 strings, ignoring normalization, using
+ * buffers BUF1 and BUF2 for temporary storage.
+ * Return compare value in *RESULT.
+ */
+svn_error_t *
+svn_utf__normcmp(const void *str1, apr_size_t len1,
+                 const void *str2, apr_size_t len2,
+                 svn_stringbuf_t *buf1, svn_stringbuf_t *buf2,
+                 int *result);
+
+
+/* Pattern matching similar to the the SQLite LIKE and GLOB
+ * operators. PATTERN, KEY and ESCAPE must all point to UTF-8
+ * strings. Furthermore, ESCAPE, if provided, must be a character from
+ * the ASCII subset.
+ *
+ * Use buffers BUF1 and BUF2 for temporary storage.
+ *
+ * If SQL_LIKE is true, interpret PATTERN as a pattern used by the SQL
+ * LIKE operator and notice ESCAPE. Otherwise it's a Unix fileglob
+ * pattern, and ESCAPE must be NULL.
+ *
+ * Set *MATCH to the result of the comparison.
+*/
+svn_error_t *
+svn_utf__glob(const void *pattern, const void *string, const void *escape,
+              svn_stringbuf_t *buf1, svn_stringbuf_t *buf2,
+              svn_boolean_t sql_like, svn_boolean_t *match);
+
 /* Return the version of the wrapped utf8proc library. */
 const char *
 svn_utf__utf8proc_version(void);

Modified: subversion/branches/wc-collate-path/subversion/libsvn_subr/sqlite.c
URL: http://svn.apache.org/viewvc/subversion/branches/wc-collate-path/subversion/libsvn_subr/sqlite.c?rev=1407461&r1=1407460&r2=1407461&view=diff
==============================================================================
--- subversion/branches/wc-collate-path/subversion/libsvn_subr/sqlite.c (original)
+++ subversion/branches/wc-collate-path/subversion/libsvn_subr/sqlite.c Fri Nov  9 14:04:42 2012
@@ -38,6 +38,7 @@
 #include "private/svn_skel.h"
 #include "private/svn_token.h"
 #include "private/svn_utf_private.h"
+#include "private/svn_string_private.h"
 
 #ifdef SQLITE3_DEBUG
 #include "private/svn_debug.h"
@@ -46,10 +47,23 @@
 #ifdef SVN_SQLITE_INLINE
 /* Include sqlite3 inline, making all symbols private. */
   #define SQLITE_API static
-  #if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)
+  #if __GNUC__ > 4 || (__GNUC__ == 4 && (__GNUC_MINOR__ >= 6 || __APPLE_CC__))
+    #if !__APPLE_CC__ || __GNUC_MINOR__ >= 6
+      #pragma GCC diagnostic push
+    #endif
+    #pragma GCC diagnostic ignored "-Wunreachable-code"
     #pragma GCC diagnostic ignored "-Wunused-function"
+    #pragma GCC diagnostic ignored "-Wcast-qual"
+    #pragma GCC diagnostic ignored "-Wunused"
+    #pragma GCC diagnostic ignored "-Wshadow"
+    #if __APPLE_CC__
+      #pragma GCC diagnostic ignored "-Wshorten-64-to-32"
+    #endif
   #endif
   #include <sqlite3.c>
+  #if __GNUC__ > 4 || (__GNUC__ == 4 && (__GNUC_MINOR__ >= 6))
+    #pragma GCC diagnostic pop
+  #endif
 #else
   #include <sqlite3.h>
 #endif
@@ -103,11 +117,9 @@ struct svn_sqlite__db_t
   svn_sqlite__stmt_t **prepared_stmts;
   apr_pool_t *state_pool;
 
-  /* Buffers for normalized unicode string comparison. */
-  apr_int32_t *nfd_buf1;
-  apr_size_t nfd_len1;
-  apr_int32_t *nfd_buf2;
-  apr_size_t nfd_len2;
+  /* Buffers for SQLite extensoins. */
+  svn_stringbuf_t *sqlext_buf1;
+  svn_stringbuf_t *sqlext_buf2;
 };
 
 struct svn_sqlite__stmt_t
@@ -868,53 +880,20 @@ collate_ucs_nfd(void *baton,
                 int len2, const void *key2)
 {
   svn_sqlite__db_t *db = baton;
-  apr_size_t rlen1;
-  apr_size_t rlen2;
-  svn_error_t *err;
-
-  if (0 == len1 || 0 == len2)
-    return (len1 == len2 ? 0 : (len1 < len2 ? -1 : 1));
+  int result;
 
-  for (;;)
+  if (svn_utf__normcmp(key1, len1, key2, len2,
+                       db->sqlext_buf1, db->sqlext_buf2, &result))
     {
-      err = svn_utf__decompose_normalized(key1, len1,
-                                          db->nfd_buf1, db->nfd_len1,
-                                          &rlen1);
       /* There is really nothing we can do here if an error occurs
          during Unicode normalizetion, and attempting to recover could
          result in the wc.db index being corrupted. Presumably this
          can only happen if the index already contains invalid UTF-8
          strings, which should never happen in any case ... */
-      if (err)
-        SVN_ERR_MALFUNCTION_NO_RETURN();
-
-      if (rlen1 <= db->nfd_len1)
-        break;
-
-      /* Double the decomposition buffer size and retry */
-      db->nfd_len1 *= 2;
-      db->nfd_buf1 = apr_palloc(db->state_pool,
-                                db->nfd_len1 * sizeof(*db->nfd_buf1));
+      SVN_ERR_MALFUNCTION_NO_RETURN();
     }
 
-  /* And repeat with the second string */
-  for (;;)
-    {
-      err = svn_utf__decompose_normalized(key2, len2,
-                                          db->nfd_buf2, db->nfd_len2,
-                                          &rlen2);
-      if (err)
-        SVN_ERR_MALFUNCTION_NO_RETURN();
-
-      if (rlen2 <= db->nfd_len2)
-        break;
-
-      db->nfd_len2 *= 2;
-      db->nfd_buf2 = apr_palloc(db->state_pool,
-                                db->nfd_len2 * sizeof(*db->nfd_buf2));
-    }
-
-  return svn_utf__ucs4cmp(db->nfd_buf1, rlen1, db->nfd_buf2, rlen2);
+  return result;
 }
 
 
@@ -931,11 +910,8 @@ svn_sqlite__open(svn_sqlite__db_t **db, 
 
   SVN_ERR(internal_open(&(*db)->db3, path, mode, scratch_pool));
 
-  (*db)->nfd_len1 = (*db)->nfd_len2 = 2048;
-  (*db)->nfd_buf1 = apr_palloc(result_pool,
-                               (*db)->nfd_len1 * sizeof(*(*db)->nfd_buf1));
-  (*db)->nfd_buf2 = apr_palloc(result_pool,
-                               (*db)->nfd_len2 * sizeof(*(*db)->nfd_buf2));
+  (*db)->sqlext_buf1 = svn_stringbuf_create_ensure(4096, result_pool);
+  (*db)->sqlext_buf2 = svn_stringbuf_create_ensure(4096, result_pool);
   SQLITE_ERR(sqlite3_create_collation((*db)->db3,
                                       "svn-ucs-nfd", SQLITE_UTF8,
                                       *db, collate_ucs_nfd),

Modified: subversion/branches/wc-collate-path/subversion/libsvn_subr/utf8proc.c
URL: http://svn.apache.org/viewvc/subversion/branches/wc-collate-path/subversion/libsvn_subr/utf8proc.c?rev=1407461&r1=1407460&r2=1407461&view=diff
==============================================================================
--- subversion/branches/wc-collate-path/subversion/libsvn_subr/utf8proc.c (original)
+++ subversion/branches/wc-collate-path/subversion/libsvn_subr/utf8proc.c Fri Nov  9 14:04:42 2012
@@ -26,6 +26,7 @@
 #define UTF8PROC_INLINE
 #include "utf8proc/utf8proc.c"
 
+#include "private/svn_string_private.h"
 #include "private/svn_utf_private.h"
 #include "svn_private_config.h"
 #define UNUSED(x) ((void)(x))
@@ -63,8 +64,9 @@ svn_utf__decompose_normalized(const char
 }
 
 
-int svn_utf__ucs4cmp(const apr_int32_t *bufa, apr_size_t lena,
-                     const apr_int32_t *bufb, apr_size_t lenb)
+int
+svn_utf__ucs4cmp(const apr_int32_t *bufa, apr_size_t lena,
+                 const apr_int32_t *bufb, apr_size_t lenb)
 {
   const apr_size_t len = (lena < lenb ? lena : lenb);
   apr_size_t i;
@@ -77,3 +79,68 @@ int svn_utf__ucs4cmp(const apr_int32_t *
     }
   return (lena == lenb ? 0 : (lena < lenb ? -1 : 1));
 }
+
+
+svn_error_t *
+svn_utf__encode_ucs4_to_stringbuf(apr_int32_t ucs4, svn_stringbuf_t *buf)
+{
+  char utf8buf[8];     /* The longest UTF-8 sequence has 4 bytes */
+  const apr_size_t utf8len = utf8proc_encode_char(ucs4, (void *)utf8buf);
+
+  if (utf8len)
+    {
+      svn_stringbuf_appendbytes(buf, utf8buf, utf8len);
+      return SVN_NO_ERROR;
+    }
+
+  return svn_error_createf(SVN_ERR_UTF8PROC_ERROR, NULL,
+                           "Invalid Unicode character U+%04lX",
+                           (long)ucs4);
+}
+
+
+/* Decompose the given UTF-8 KEY of length KEYLEN.  This function
+   really horribly abuses stringbufs, because the result does not
+   conform to published stringbuf semantics. However, these results
+   should never be used outside the very carefully closed world of
+   SQLite extensions.
+ */
+static svn_error_t *
+decompose_normcmp_arg(const void *arg, apr_size_t arglen,
+                      svn_stringbuf_t *buf)
+{
+  for (;;)
+    {
+      apr_int32_t *const ucsbuf = (void *)buf->data;
+      const apr_size_t ucslen = buf->blocksize / sizeof(*ucsbuf);
+      SVN_ERR(svn_utf__decompose_normalized(arg, arglen, ucsbuf, ucslen,
+                                            &buf->len));
+      if (buf->len <= ucslen)
+        return SVN_NO_ERROR;
+
+      /* Increase the decomposition buffer size and retry */
+      svn_stringbuf__reserve(buf, buf->len * sizeof(*ucsbuf));
+    }
+}
+
+svn_error_t *
+svn_utf__normcmp(const void *str1, apr_size_t len1,
+                 const void *str2, apr_size_t len2,
+                 svn_stringbuf_t *buf1, svn_stringbuf_t *buf2,
+                 int *result)
+{
+  SVN_ERR(decompose_normcmp_arg(str1, len1, buf1));
+  SVN_ERR(decompose_normcmp_arg(str2, len2, buf2));
+  *result = svn_utf__ucs4cmp((void *)buf1->data, buf1->len,
+                             (void *)buf2->data, buf2->len);
+  return SVN_NO_ERROR;
+}
+
+
+svn_error_t *
+svn_utf__glob(const void *pattern, const void *string, const void *escape,
+              svn_stringbuf_t *buf1, svn_stringbuf_t *buf2,
+              svn_boolean_t sql_like, svn_boolean_t *match)
+{
+  return SVN_NO_ERROR;
+}

Modified: subversion/branches/wc-collate-path/subversion/tests/libsvn_subr/utf-test.c
URL: http://svn.apache.org/viewvc/subversion/branches/wc-collate-path/subversion/tests/libsvn_subr/utf-test.c?rev=1407461&r1=1407460&r2=1407461&view=diff
==============================================================================
--- subversion/branches/wc-collate-path/subversion/tests/libsvn_subr/utf-test.c (original)
+++ subversion/branches/wc-collate-path/subversion/tests/libsvn_subr/utf-test.c Fri Nov  9 14:04:42 2012
@@ -299,33 +299,33 @@ static svn_error_t *
 normalized_compare(const char *stra, int expected, const char *strb,
                    svn_boolean_t implicit_size,
                    const char *stratag, const char *strbtag,
-                   apr_int32_t *bufa, apr_size_t lena,
-                   apr_int32_t *bufb, apr_size_t lenb)
+                   svn_stringbuf_t *bufa, svn_stringbuf_t *bufb)
 {
-  apr_size_t rlena, rlenb;
   int result;
 
-  SVN_ERR(svn_utf__decompose_normalized(stra,
-                                        (implicit_size ? 0 : strlen(stra)),
-                                        bufa, lena, &rlena));
-  SVN_ERR(svn_utf__decompose_normalized(strb,
-                                        (implicit_size ? 0 : strlen(strb)),
-                                        bufb, lenb, &rlenb));
-  result = svn_utf__ucs4cmp(bufa, rlena, bufb, rlenb);
+  SVN_ERR(svn_utf__normcmp(stra, (implicit_size ? 0 : strlen(stra)),
+                           strb, (implicit_size ? 0 : strlen(strb)),
+                           bufa, bufb, &result));
 
   /* UCS-4 debugging dump of the decomposed strings
-  apr_size_t i;
-  printf("(%c)%7s %c %s\n", expected,
-         stratag, (!result ? '=' : (result < 0 ? '<' : '>')), strbtag);
-  for (i = 0; i < rlena || i < rlenb; ++i)
+  {
+    const apr_int32_t *const ucsbufa = (void*)bufa->data;
+    const apr_int32_t *const ucsbufb = (void*)bufb->data;
+    apr_size_t i;
+
+    printf("(%c)%7s %c %s\n", expected,
+           stratag, (!result ? '=' : (result < 0 ? '<' : '>')), strbtag);
+
+    for (i = 0; i < bufa->len || i < bufb->len; ++i)
     {
-      if (i < rlena && i < rlenb)
-        printf("    U+%04X   U+%04X\n", bufa[i], bufb[i]);
-      else if (i < rlena)
-        printf("    U+%04X\n", bufa[i]);
+      if (i < bufa->len && i < bufb->len)
+        printf("    U+%04X   U+%04X\n", ucsbufa[i], ucsbufb[i]);
+      else if (i < bufa->len)
+        printf("    U+%04X\n", ucsbufa[i]);
       else
-        printf("             U+%04X\n", bufb[i]);
+        printf("             U+%04X\n", ucsbufb[i]);
     }
+  }
   */
 
   if (('=' == expected && 0 != result)
@@ -343,7 +343,7 @@ normalized_compare(const char *stra, int
 }
 
 static svn_error_t *
-test_utf_decompose_normalized_ucs4cmp(apr_pool_t *pool)
+test_utf_collated_compare(apr_pool_t *pool)
 {
   /* Normalized: NFC */
   static const char nfc[] =
@@ -420,61 +420,59 @@ test_utf_decompose_normalized_ucs4cmp(ap
     "o\xcc\x80\xcc\x9b"         /* o with grave and hook */
     "\xe1\xb9\x8b";             /* n with circumflex below */
 
-  const apr_size_t buflen = 200;
-  apr_int32_t *bufa = apr_palloc(pool, buflen * sizeof(apr_int32_t));
-  apr_int32_t *bufb = apr_palloc(pool, buflen * sizeof(apr_int32_t));
-
+  svn_stringbuf_t *bufa = svn_stringbuf_create_empty(pool);
+  svn_stringbuf_t *bufb = svn_stringbuf_create_empty(pool);
 
   /* Empty key */
   SVN_ERR(normalized_compare("", '=', "", TRUE, "empty", "empty",
-                             bufa, buflen, bufb, buflen));
+                             bufa, bufb));
   SVN_ERR(normalized_compare("", '<', "a", TRUE, "empty", "nonempty",
-                             bufa, buflen, bufb, buflen));
+                             bufa, bufb));
   SVN_ERR(normalized_compare("a", '>', "", TRUE, "nonempty", "empty",
-                             bufa, buflen, bufb, buflen));
+                             bufa, bufb));
 
   /* Deterministic ordering */
   SVN_ERR(normalized_compare("a", '<', "b", TRUE, "a", "b",
-                             bufa, buflen, bufb, buflen));
+                             bufa, bufb));
   SVN_ERR(normalized_compare("b", '<', "c", TRUE, "b", "c",
-                             bufa, buflen, bufb, buflen));
+                             bufa, bufb));
   SVN_ERR(normalized_compare("a", '<', "c", TRUE, "a", "c",
-                             bufa, buflen, bufb, buflen));
+                             bufa, bufb));
 
   SVN_ERR(normalized_compare("b", '>', "a", FALSE, "b", "a",
-                             bufa, buflen, bufb, buflen));
+                             bufa, bufb));
   SVN_ERR(normalized_compare("c", '>', "b", FALSE, "c", "b",
-                             bufa, buflen, bufb, buflen));
+                             bufa, bufb));
   SVN_ERR(normalized_compare("c", '>', "a", FALSE, "c", "a",
-                             bufa, buflen, bufb, buflen));
+                             bufa, bufb));
 
   /* Normalized equality */
   SVN_ERR(normalized_compare(nfc, '=', nfd, TRUE, "nfc", "nfd",
-                             bufa, buflen, bufb, buflen));
+                             bufa, bufb));
   SVN_ERR(normalized_compare(nfd, '=', nfc, TRUE, "nfd", "nfc",
-                             bufa, buflen, bufb, buflen));
+                             bufa, bufb));
   SVN_ERR(normalized_compare(nfc, '=', mixup, TRUE, "nfc", "mixup",
-                             bufa, buflen, bufb, buflen));
+                             bufa, bufb));
   SVN_ERR(normalized_compare(nfd, '=', mixup, TRUE, "nfd", "mixup",
-                             bufa, buflen, bufb, buflen));
+                             bufa, bufb));
   SVN_ERR(normalized_compare(mixup, '=', nfd, FALSE, "mixup", "nfd",
-                             bufa, buflen, bufb, buflen));
+                             bufa, bufb));
   SVN_ERR(normalized_compare(mixup, '=', nfc, FALSE, "mixup", "nfc",
-                             bufa, buflen, bufb, buflen));
+                             bufa, bufb));
 
   /* Key length */
   SVN_ERR(normalized_compare(nfc, '<', longer, FALSE, "nfc", "longer",
-                             bufa, buflen, bufb, buflen));
+                             bufa, bufb));
   SVN_ERR(normalized_compare(longer, '>', nfc, FALSE, "longer","nfc",
-                             bufa, buflen, bufb, buflen));
+                             bufa, bufb));
   SVN_ERR(normalized_compare(nfd, '>', shorter, TRUE, "nfd", "shorter",
-                             bufa, buflen, bufb, buflen));
+                             bufa, bufb));
   SVN_ERR(normalized_compare(shorter, '<', nfd, TRUE, "shorter", "nfd",
-                             bufa, buflen, bufb, buflen));
+                             bufa, bufb));
   SVN_ERR(normalized_compare(mixup, '<', lowcase, FALSE, "mixup", "lowcase",
-                             bufa, buflen, bufb, buflen));
+                             bufa, bufb));
   SVN_ERR(normalized_compare(lowcase, '>', mixup, FALSE, "lowcase", "mixup",
-                             bufa, buflen, bufb, buflen));
+                             bufa, bufb));
 
   return SVN_NO_ERROR;
 }
@@ -494,7 +492,7 @@ struct svn_test_descriptor_t test_funcs[
                    "test svn_utf_cstring_to_utf8_ex2"),
     SVN_TEST_PASS2(test_utf_cstring_from_utf8_ex2,
                    "test svn_utf_cstring_from_utf8_ex2"),
-    SVN_TEST_PASS2(test_utf_decompose_normalized_ucs4cmp,
-                   "test normalized unicode comparison"),
+    SVN_TEST_PASS2(test_utf_collated_compare,
+                   "test svn_utf__normcmp"),
     SVN_TEST_NULL
   };