You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@subversion.apache.org by st...@apache.org on 2017/10/24 12:40:48 UTC
svn commit: r1813173 [3/6] - in /subversion/branches/addremove: ./
build/generator/ subversion/bindings/javahl/native/
subversion/bindings/swig/include/ subversion/include/
subversion/include/private/ subversion/libsvn_client/
subversion/libsvn_ra_serf...
Modified: subversion/branches/addremove/subversion/libsvn_repos/load-fs-vtable.c
URL: http://svn.apache.org/viewvc/subversion/branches/addremove/subversion/libsvn_repos/load-fs-vtable.c?rev=1813173&r1=1813172&r2=1813173&view=diff
==============================================================================
--- subversion/branches/addremove/subversion/libsvn_repos/load-fs-vtable.c (original)
+++ subversion/branches/addremove/subversion/libsvn_repos/load-fs-vtable.c Tue Oct 24 12:40:47 2017
@@ -55,6 +55,7 @@ struct parse_baton
svn_boolean_t use_history;
svn_boolean_t validate_props;
svn_boolean_t ignore_dates;
+ svn_boolean_t normalize_props;
svn_boolean_t use_pre_commit_hook;
svn_boolean_t use_post_commit_hook;
enum svn_repos_load_uuid uuid_action;
@@ -163,8 +164,12 @@ change_rev_prop(svn_repos_t *repos,
const char *name,
const svn_string_t *value,
svn_boolean_t validate_props,
+ svn_boolean_t normalize_props,
apr_pool_t *pool)
{
+ if (normalize_props)
+ SVN_ERR(svn_repos__normalize_prop(&value, NULL, name, value, pool, pool));
+
if (validate_props)
return svn_repos_fs_change_rev_prop4(repos, revision, NULL, name,
NULL, value, FALSE, FALSE,
@@ -1024,7 +1029,8 @@ close_revision(void *baton)
const svn_prop_t *prop = &APR_ARRAY_IDX(diff, i, svn_prop_t);
SVN_ERR(change_rev_prop(pb->repos, 0, prop->name, prop->value,
- pb->validate_props, rb->pool));
+ pb->validate_props, pb->normalize_props,
+ rb->pool));
}
}
@@ -1042,6 +1048,23 @@ close_revision(void *baton)
prop->value = NULL;
}
+ if (rb->pb->normalize_props)
+ {
+ apr_pool_t *iterpool;
+ int i;
+
+ iterpool = svn_pool_create(rb->pool);
+ for (i = 0; i < rb->revprops->nelts; i++)
+ {
+ svn_prop_t *prop = &APR_ARRAY_IDX(rb->revprops, i, svn_prop_t);
+
+ svn_pool_clear(iterpool);
+ SVN_ERR(svn_repos__normalize_prop(&prop->value, NULL, prop->name,
+ prop->value, rb->pool, iterpool));
+ }
+ svn_pool_destroy(iterpool);
+ }
+
/* Apply revision property changes. */
if (rb->pb->validate_props)
SVN_ERR(svn_repos_fs_change_txn_props(rb->txn, rb->revprops, rb->pool));
@@ -1158,7 +1181,7 @@ close_revision(void *baton)
svn_error_t *
-svn_repos_get_fs_build_parser5(const svn_repos_parse_fns3_t **callbacks,
+svn_repos_get_fs_build_parser6(const svn_repos_parse_fns3_t **callbacks,
void **parse_baton,
svn_repos_t *repos,
svn_revnum_t start_rev,
@@ -1170,6 +1193,7 @@ svn_repos_get_fs_build_parser5(const svn
svn_boolean_t use_pre_commit_hook,
svn_boolean_t use_post_commit_hook,
svn_boolean_t ignore_dates,
+ svn_boolean_t normalize_props,
svn_repos_notify_func_t notify_func,
void *notify_baton,
apr_pool_t *pool)
@@ -1218,6 +1242,7 @@ svn_repos_get_fs_build_parser5(const svn
pb->use_pre_commit_hook = use_pre_commit_hook;
pb->use_post_commit_hook = use_post_commit_hook;
pb->ignore_dates = ignore_dates;
+ pb->normalize_props = normalize_props;
*callbacks = parser;
*parse_baton = pb;
@@ -1226,7 +1251,7 @@ svn_repos_get_fs_build_parser5(const svn
svn_error_t *
-svn_repos_load_fs5(svn_repos_t *repos,
+svn_repos_load_fs6(svn_repos_t *repos,
svn_stream_t *dumpstream,
svn_revnum_t start_rev,
svn_revnum_t end_rev,
@@ -1235,6 +1260,7 @@ svn_repos_load_fs5(svn_repos_t *repos,
svn_boolean_t use_pre_commit_hook,
svn_boolean_t use_post_commit_hook,
svn_boolean_t validate_props,
+ svn_boolean_t normalize_props,
svn_boolean_t ignore_dates,
svn_repos_notify_func_t notify_func,
void *notify_baton,
@@ -1247,7 +1273,7 @@ svn_repos_load_fs5(svn_repos_t *repos,
/* This is really simple. */
- SVN_ERR(svn_repos_get_fs_build_parser5(&parser, &parse_baton,
+ SVN_ERR(svn_repos_get_fs_build_parser6(&parser, &parse_baton,
repos,
start_rev, end_rev,
TRUE, /* look for copyfrom revs */
@@ -1257,6 +1283,7 @@ svn_repos_load_fs5(svn_repos_t *repos,
use_pre_commit_hook,
use_post_commit_hook,
ignore_dates,
+ normalize_props,
notify_func,
notify_baton,
pool));
@@ -1345,7 +1372,8 @@ revprops_close_revision(void *baton)
const svn_prop_t *prop = &APR_ARRAY_IDX(diff, i, svn_prop_t);
SVN_ERR(change_rev_prop(pb->repos, rb->rev, prop->name, prop->value,
- pb->validate_props, rb->pool));
+ pb->validate_props, pb->normalize_props,
+ rb->pool));
}
if (pb->notify_func)
@@ -1386,6 +1414,11 @@ revprops_close_revision(void *baton)
*
* If IGNORE_DATES is set, ignore any revision datestamps found in
* DUMPSTREAM, keeping whatever timestamps the revisions currently have.
+ *
+ * If NORMALIZE_PROPS is set, attempt to normalize invalid Subversion
+ * revision and node properties (those in the svn: namespace) so that
+ * their values would follow the established rules for them. Currently,
+ * this means translating non-LF line endings in the property values to LF.
*/
static svn_error_t *
build_revprop_parser(const svn_repos_parse_fns3_t **callbacks,
@@ -1395,6 +1428,7 @@ build_revprop_parser(const svn_repos_par
svn_revnum_t end_rev,
svn_boolean_t validate_props,
svn_boolean_t ignore_dates,
+ svn_boolean_t normalize_props,
svn_repos_notify_func_t notify_func,
void *notify_baton,
apr_pool_t *result_pool)
@@ -1440,6 +1474,7 @@ build_revprop_parser(const svn_repos_par
pb->use_pre_commit_hook = FALSE;
pb->use_post_commit_hook = FALSE;
pb->ignore_dates = ignore_dates;
+ pb->normalize_props = normalize_props;
*callbacks = parser;
*parse_baton = pb;
@@ -1454,6 +1489,7 @@ svn_repos_load_fs_revprops(svn_repos_t *
svn_revnum_t end_rev,
svn_boolean_t validate_props,
svn_boolean_t ignore_dates,
+ svn_boolean_t normalize_props,
svn_repos_notify_func_t notify_func,
void *notify_baton,
svn_cancel_func_t cancel_func,
@@ -1470,6 +1506,7 @@ svn_repos_load_fs_revprops(svn_repos_t *
start_rev, end_rev,
validate_props,
ignore_dates,
+ normalize_props,
notify_func,
notify_baton,
scratch_pool));
Modified: subversion/branches/addremove/subversion/libsvn_subr/config_win.c
URL: http://svn.apache.org/viewvc/subversion/branches/addremove/subversion/libsvn_subr/config_win.c?rev=1813173&r1=1813172&r2=1813173&view=diff
==============================================================================
--- subversion/branches/addremove/subversion/libsvn_subr/config_win.c (original)
+++ subversion/branches/addremove/subversion/libsvn_subr/config_win.c Tue Oct 24 12:40:47 2017
@@ -273,4 +273,11 @@ svn_config__parse_registry(svn_config_t
return svn_err;
}
+#else /* !WIN32 */
+
+/* Silence OSX ranlib warnings about object files with no symbols. */
+#include <apr.h>
+extern const apr_uint32_t svn__fake__config_win;
+const apr_uint32_t svn__fake__config_win = 0xdeadbeef;
+
#endif /* WIN32 */
Modified: subversion/branches/addremove/subversion/libsvn_subr/deprecated.c
URL: http://svn.apache.org/viewvc/subversion/branches/addremove/subversion/libsvn_subr/deprecated.c?rev=1813173&r1=1813172&r2=1813173&view=diff
==============================================================================
--- subversion/branches/addremove/subversion/libsvn_subr/deprecated.c (original)
+++ subversion/branches/addremove/subversion/libsvn_subr/deprecated.c Tue Oct 24 12:40:47 2017
@@ -1592,3 +1592,12 @@ svn_base64_encode(svn_stream_t *output,
{
return svn_base64_encode2(output, TRUE, pool);
}
+
+/*** From string.c ***/
+char *
+svn_cstring_join(const apr_array_header_t *strings,
+ const char *separator,
+ apr_pool_t *pool)
+{
+ return svn_cstring_join2(strings, separator, TRUE, pool);
+}
Modified: subversion/branches/addremove/subversion/libsvn_subr/io.c
URL: http://svn.apache.org/viewvc/subversion/branches/addremove/subversion/libsvn_subr/io.c?rev=1813173&r1=1813172&r2=1813173&view=diff
==============================================================================
--- subversion/branches/addremove/subversion/libsvn_subr/io.c (original)
+++ subversion/branches/addremove/subversion/libsvn_subr/io.c Tue Oct 24 12:40:47 2017
@@ -3940,21 +3940,20 @@ svn_io_file_write_full(apr_file_t *file,
apr_size_t nbytes, apr_size_t *bytes_written,
apr_pool_t *pool)
{
- /* We cannot simply call apr_file_write_full on Win32 as it may fail
- for larger values of NBYTES. In that case, we have to emulate the
- "_full" part here. Thus, always call apr_file_write directly on
- Win32 as this minimizes overhead for small data buffers. */
#ifdef WIN32
#define MAXBUFSIZE 30*1024
apr_size_t bw = nbytes;
apr_size_t to_write = nbytes;
+ apr_status_t rv;
- /* try a simple "write everything at once" first */
- apr_status_t rv = apr_file_write(file, buf, &bw);
+ rv = apr_file_write_full(file, buf, nbytes, &bw);
buf = (char *)buf + bw;
to_write -= bw;
- /* if the OS cannot handle that, use smaller chunks */
+ /* Issue #1789: On Windows, writing may fail for large values of NBYTES.
+ If that is the case, keep track of how many bytes have been written
+ by the apr_file_write_full() call, and attempt to write the remaining
+ part in smaller chunks. */
if (rv == APR_FROM_OS_ERROR(ERROR_NOT_ENOUGH_MEMORY)
&& nbytes > MAXBUFSIZE)
{
Modified: subversion/branches/addremove/subversion/libsvn_subr/lz4/lz4.c
URL: http://svn.apache.org/viewvc/subversion/branches/addremove/subversion/libsvn_subr/lz4/lz4.c?rev=1813173&r1=1813172&r2=1813173&view=diff
==============================================================================
--- subversion/branches/addremove/subversion/libsvn_subr/lz4/lz4.c (original)
+++ subversion/branches/addremove/subversion/libsvn_subr/lz4/lz4.c Tue Oct 24 12:40:47 2017
@@ -94,6 +94,12 @@
#include "lz4internal.h"
/* see also "memory routines" below */
+/* Silence GCC's -Wmissing-prototypes warning. */
+int LZ4_compress_fast_force(const char*, char*, int, int, int);
+int LZ4_compress_forceExtDict (LZ4_stream_t*, const char*, char*, int);
+int LZ4_decompress_safe_forceExtDict(const char*, char*, int, int, const char*, int);
+int LZ4_uncompress (const char*, char*, int);
+int LZ4_uncompress_unknownOutputSize (const char*, char*, int, int);
/*-************************************
* Compiler Options
@@ -397,7 +403,7 @@ typedef enum { full = 0, partial = 1 } e
int LZ4_versionNumber (void) { return LZ4_VERSION_NUMBER; }
const char* LZ4_versionString(void) { return LZ4_VERSION_STRING; }
int LZ4_compressBound(int isize) { return LZ4_COMPRESSBOUND(isize); }
-int LZ4_sizeofState() { return LZ4_STREAMSIZE; }
+int LZ4_sizeofState(void) { return LZ4_STREAMSIZE; }
/*-******************************
@@ -1423,7 +1429,7 @@ int LZ4_uncompress_unknownOutputSize (co
/* Obsolete Streaming functions */
-int LZ4_sizeofStreamState() { return LZ4_STREAMSIZE; }
+int LZ4_sizeofStreamState(void) { return LZ4_STREAMSIZE; }
static void LZ4_init(LZ4_stream_t* lz4ds, BYTE* base)
{
Modified: subversion/branches/addremove/subversion/libsvn_subr/mergeinfo.c
URL: http://svn.apache.org/viewvc/subversion/branches/addremove/subversion/libsvn_subr/mergeinfo.c?rev=1813173&r1=1813172&r2=1813173&view=diff
==============================================================================
--- subversion/branches/addremove/subversion/libsvn_subr/mergeinfo.c (original)
+++ subversion/branches/addremove/subversion/libsvn_subr/mergeinfo.c Tue Oct 24 12:40:47 2017
@@ -271,197 +271,186 @@ combine_with_lastrange(const svn_merge_r
APR_ARRAY_PUSH(rangelist, svn_merge_range_t *) =
svn_merge_range_dup(new_range, result_pool);
}
+ else if (combine_ranges(&combined_range, lastrange, new_range,
+ consider_inheritance))
+ {
+ *lastrange = combined_range;
+ }
else if (!consider_inheritance)
{
/* We are not considering inheritance so we can merge intersecting
ranges of different inheritability. Of course if the ranges
don't intersect at all we simply push NEW_RANGE onto RANGELIST. */
- if (combine_ranges(&combined_range, lastrange, new_range, FALSE))
- {
- *lastrange = combined_range;
- }
- else
- {
- APR_ARRAY_PUSH(rangelist, svn_merge_range_t *) =
+ APR_ARRAY_PUSH(rangelist, svn_merge_range_t *) =
svn_merge_range_dup(new_range, result_pool);
- }
}
else /* Considering inheritance */
{
- if (combine_ranges(&combined_range, lastrange, new_range, TRUE))
- {
- /* Even when considering inheritance two intersection ranges
- of the same inheritability can simply be combined. */
- *lastrange = combined_range;
- }
- else
- {
- /* If we are here then the ranges either don't intersect or do
- intersect but have differing inheritability. Check for the
- first case as that is easy to handle. */
- intersection_type_t intersection_type;
- svn_boolean_t sorted = FALSE;
+ /* If we are here then the ranges either don't intersect or do
+ intersect but have differing inheritability. Check for the
+ first case as that is easy to handle. */
+ intersection_type_t intersection_type;
+ svn_boolean_t sorted = FALSE;
- SVN_ERR(get_type_of_intersection(new_range, lastrange,
- &intersection_type));
+ SVN_ERR(get_type_of_intersection(new_range, lastrange,
+ &intersection_type));
- switch (intersection_type)
+ switch (intersection_type)
+ {
+ case svn__no_intersection:
+ /* NEW_RANGE and *LASTRANGE *really* don't intersect so
+ just push NEW_RANGE onto RANGELIST. */
+ APR_ARRAY_PUSH(rangelist, svn_merge_range_t *) =
+ svn_merge_range_dup(new_range, result_pool);
+ sorted = (svn_sort_compare_ranges(&lastrange,
+ &new_range) < 0);
+ break;
+
+ case svn__equal_intersection:
+ /* They range are equal so all we do is force the
+ inheritability of lastrange to true. */
+ lastrange->inheritable = TRUE;
+ sorted = TRUE;
+ break;
+
+ case svn__adjoining_intersection:
+ /* They adjoin but don't overlap so just push NEW_RANGE
+ onto RANGELIST. */
+ APR_ARRAY_PUSH(rangelist, svn_merge_range_t *) =
+ svn_merge_range_dup(new_range, result_pool);
+ sorted = (svn_sort_compare_ranges(&lastrange,
+ &new_range) < 0);
+ break;
+
+ case svn__overlapping_intersection:
+ /* They ranges overlap but neither is a proper subset of
+ the other. We'll end up pusing two new ranges onto
+ RANGELIST, the intersecting part and the part unique to
+ NEW_RANGE.*/
{
- case svn__no_intersection:
- /* NEW_RANGE and *LASTRANGE *really* don't intersect so
- just push NEW_RANGE onto RANGELIST. */
- APR_ARRAY_PUSH(rangelist, svn_merge_range_t *) =
- svn_merge_range_dup(new_range, result_pool);
- sorted = (svn_sort_compare_ranges(&lastrange,
- &new_range) < 0);
- break;
-
- case svn__equal_intersection:
- /* They range are equal so all we do is force the
- inheritability of lastrange to true. */
- lastrange->inheritable = TRUE;
- sorted = TRUE;
- break;
-
- case svn__adjoining_intersection:
- /* They adjoin but don't overlap so just push NEW_RANGE
- onto RANGELIST. */
- APR_ARRAY_PUSH(rangelist, svn_merge_range_t *) =
- svn_merge_range_dup(new_range, result_pool);
- sorted = (svn_sort_compare_ranges(&lastrange,
- &new_range) < 0);
- break;
-
- case svn__overlapping_intersection:
- /* They ranges overlap but neither is a proper subset of
- the other. We'll end up pusing two new ranges onto
- RANGELIST, the intersecting part and the part unique to
- NEW_RANGE.*/
- {
- svn_merge_range_t *r1 = svn_merge_range_dup(lastrange,
- result_pool);
- svn_merge_range_t *r2 = svn_merge_range_dup(new_range,
- result_pool);
-
- /* Pop off *LASTRANGE to make our manipulations
- easier. */
- apr_array_pop(rangelist);
-
- /* Ensure R1 is the older range. */
- if (r2->start < r1->start)
- {
- /* Swap R1 and R2. */
- *r2 = *r1;
- *r1 = *new_range;
- }
+ svn_merge_range_t *r1 = svn_merge_range_dup(lastrange,
+ result_pool);
+ svn_merge_range_t *r2 = svn_merge_range_dup(new_range,
+ result_pool);
+
+ /* Pop off *LASTRANGE to make our manipulations
+ easier. */
+ apr_array_pop(rangelist);
- /* Absorb the intersecting ranges into the
- inheritable range. */
- if (r1->inheritable)
- r2->start = r1->end;
- else
- r1->end = r2->start;
-
- /* Push everything back onto RANGELIST. */
- APR_ARRAY_PUSH(rangelist, svn_merge_range_t *) = r1;
- sorted = (svn_sort_compare_ranges(&lastrange,
- &r1) < 0);
- APR_ARRAY_PUSH(rangelist, svn_merge_range_t *) = r2;
- if (sorted)
- sorted = (svn_sort_compare_ranges(&r1, &r2) < 0);
- break;
+ /* Ensure R1 is the older range. */
+ if (r2->start < r1->start)
+ {
+ /* Swap R1 and R2. */
+ *r2 = *r1;
+ *r1 = *new_range;
}
- default: /* svn__proper_subset_intersection */
- {
- /* One range is a proper subset of the other. */
- svn_merge_range_t *r1 = svn_merge_range_dup(lastrange,
- result_pool);
- svn_merge_range_t *r2 = svn_merge_range_dup(new_range,
- result_pool);
- svn_merge_range_t *r3 = NULL;
+ /* Absorb the intersecting ranges into the
+ inheritable range. */
+ if (r1->inheritable)
+ r2->start = r1->end;
+ else
+ r1->end = r2->start;
+
+ /* Push everything back onto RANGELIST. */
+ APR_ARRAY_PUSH(rangelist, svn_merge_range_t *) = r1;
+ sorted = (svn_sort_compare_ranges(&lastrange,
+ &r1) < 0);
+ APR_ARRAY_PUSH(rangelist, svn_merge_range_t *) = r2;
+ if (sorted)
+ sorted = (svn_sort_compare_ranges(&r1, &r2) < 0);
+ break;
+ }
- /* Pop off *LASTRANGE to make our manipulations
- easier. */
- apr_array_pop(rangelist);
+ default: /* svn__proper_subset_intersection */
+ {
+ /* One range is a proper subset of the other. */
+ svn_merge_range_t *r1 = svn_merge_range_dup(lastrange,
+ result_pool);
+ svn_merge_range_t *r2 = svn_merge_range_dup(new_range,
+ result_pool);
+ svn_merge_range_t *r3 = NULL;
+
+ /* Pop off *LASTRANGE to make our manipulations
+ easier. */
+ apr_array_pop(rangelist);
- /* Ensure R1 is the superset. */
- if (r2->start < r1->start || r2->end > r1->end)
- {
- /* Swap R1 and R2. */
- *r2 = *r1;
- *r1 = *new_range;
- }
+ /* Ensure R1 is the superset. */
+ if (r2->start < r1->start || r2->end > r1->end)
+ {
+ /* Swap R1 and R2. */
+ *r2 = *r1;
+ *r1 = *new_range;
+ }
- if (r1->inheritable)
- {
- /* The simple case: The superset is inheritable, so
- just combine r1 and r2. */
- r1->start = MIN(r1->start, r2->start);
- r1->end = MAX(r1->end, r2->end);
- r2 = NULL;
- }
- else if (r1->start == r2->start)
- {
- svn_revnum_t tmp_revnum;
+ if (r1->inheritable)
+ {
+ /* The simple case: The superset is inheritable, so
+ just combine r1 and r2. */
+ r1->start = MIN(r1->start, r2->start);
+ r1->end = MAX(r1->end, r2->end);
+ r2 = NULL;
+ }
+ else if (r1->start == r2->start)
+ {
+ svn_revnum_t tmp_revnum;
- /* *LASTRANGE and NEW_RANGE share an end point. */
- tmp_revnum = r1->end;
- r1->end = r2->end;
- r2->inheritable = r1->inheritable;
- r1->inheritable = TRUE;
- r2->start = r1->end;
- r2->end = tmp_revnum;
- }
- else if (r1->end == r2->end)
- {
- /* *LASTRANGE and NEW_RANGE share an end point. */
- r1->end = r2->start;
- r2->inheritable = TRUE;
- }
- else
- {
- /* NEW_RANGE and *LASTRANGE share neither start
- nor end points. */
- r3 = apr_pcalloc(result_pool, sizeof(*r3));
- r3->start = r2->end;
- r3->end = r1->end;
- r3->inheritable = r1->inheritable;
- r2->inheritable = TRUE;
- r1->end = r2->start;
- }
+ /* *LASTRANGE and NEW_RANGE share an end point. */
+ tmp_revnum = r1->end;
+ r1->end = r2->end;
+ r2->inheritable = r1->inheritable;
+ r1->inheritable = TRUE;
+ r2->start = r1->end;
+ r2->end = tmp_revnum;
+ }
+ else if (r1->end == r2->end)
+ {
+ /* *LASTRANGE and NEW_RANGE share an end point. */
+ r1->end = r2->start;
+ r2->inheritable = TRUE;
+ }
+ else
+ {
+ /* NEW_RANGE and *LASTRANGE share neither start
+ nor end points. */
+ r3 = apr_pcalloc(result_pool, sizeof(*r3));
+ r3->start = r2->end;
+ r3->end = r1->end;
+ r3->inheritable = r1->inheritable;
+ r2->inheritable = TRUE;
+ r1->end = r2->start;
+ }
- /* Push everything back onto RANGELIST. */
- APR_ARRAY_PUSH(rangelist, svn_merge_range_t *) = r1;
- sorted = (svn_sort_compare_ranges(&lastrange, &r1) < 0);
- if (r2)
- {
- APR_ARRAY_PUSH(rangelist, svn_merge_range_t *) = r2;
- if (sorted)
- sorted = (svn_sort_compare_ranges(&r1, &r2) < 0);
- }
- if (r3)
+ /* Push everything back onto RANGELIST. */
+ APR_ARRAY_PUSH(rangelist, svn_merge_range_t *) = r1;
+ sorted = (svn_sort_compare_ranges(&lastrange, &r1) < 0);
+ if (r2)
+ {
+ APR_ARRAY_PUSH(rangelist, svn_merge_range_t *) = r2;
+ if (sorted)
+ sorted = (svn_sort_compare_ranges(&r1, &r2) < 0);
+ }
+ if (r3)
+ {
+ APR_ARRAY_PUSH(rangelist, svn_merge_range_t *) = r3;
+ if (sorted)
{
- APR_ARRAY_PUSH(rangelist, svn_merge_range_t *) = r3;
- if (sorted)
- {
- if (r2)
- sorted = (svn_sort_compare_ranges(&r2,
- &r3) < 0);
- else
- sorted = (svn_sort_compare_ranges(&r1,
- &r3) < 0);
- }
+ if (r2)
+ sorted = (svn_sort_compare_ranges(&r2,
+ &r3) < 0);
+ else
+ sorted = (svn_sort_compare_ranges(&r1,
+ &r3) < 0);
}
- break;
}
+ break;
}
-
- /* Some of the above cases might have put *RANGELIST out of
- order, so re-sort.*/
- if (!sorted)
- svn_sort__array(rangelist, svn_sort_compare_ranges);
}
+
+ /* Some of the above cases might have put *RANGELIST out of
+ order, so re-sort.*/
+ if (!sorted)
+ svn_sort__array(rangelist, svn_sort_compare_ranges);
}
return SVN_NO_ERROR;
Modified: subversion/branches/addremove/subversion/libsvn_subr/spillbuf.c
URL: http://svn.apache.org/viewvc/subversion/branches/addremove/subversion/libsvn_subr/spillbuf.c?rev=1813173&r1=1813172&r2=1813173&view=diff
==============================================================================
--- subversion/branches/addremove/subversion/libsvn_subr/spillbuf.c (original)
+++ subversion/branches/addremove/subversion/libsvn_subr/spillbuf.c Tue Oct 24 12:40:47 2017
@@ -265,10 +265,10 @@ svn_spillbuf__write(svn_spillbuf_t *buf,
/* Adjust the start offset for reading from the spill file.
- ### FIXME: Instead, we should simply discard the memory
- buffers; but currently some tests expect to read data in
- the same chunk sizes as were written, so we'll leave this
- change for later.*/
+ This way, the first `buf->memory_size` bytes of data will
+ be read from the existing in-memory buffers, which makes
+ more sense than discarding the buffers and re-reading
+ data from the file. */
buf->spill_start = buf->memory_size;
}
}
Modified: subversion/branches/addremove/subversion/libsvn_subr/sqlite3wrapper.c
URL: http://svn.apache.org/viewvc/subversion/branches/addremove/subversion/libsvn_subr/sqlite3wrapper.c?rev=1813173&r1=1813172&r2=1813173&view=diff
==============================================================================
--- subversion/branches/addremove/subversion/libsvn_subr/sqlite3wrapper.c (original)
+++ subversion/branches/addremove/subversion/libsvn_subr/sqlite3wrapper.c Tue Oct 24 12:40:47 2017
@@ -58,4 +58,12 @@
const sqlite3_api_routines *const svn_sqlite3__api_funcs = &sqlite3Apis;
int (*const svn_sqlite3__api_initialize)(void) = sqlite3_initialize;
int (*const svn_sqlite3__api_config)(int, ...) = sqlite3_config;
-#endif
+
+#else /* !SVN_SQLITE_INLINE */
+
+/* Silence OSX ranlib warnings about object files with no symbols. */
+#include <apr.h>
+extern const apr_uint32_t svn__fake__sqlite3wrapper;
+const apr_uint32_t svn__fake__sqlite3wrapper = 0xdeadbeef;
+
+#endif /* SVN_SQLITE_INLINE */
Modified: subversion/branches/addremove/subversion/libsvn_subr/string.c
URL: http://svn.apache.org/viewvc/subversion/branches/addremove/subversion/libsvn_subr/string.c?rev=1813173&r1=1813172&r2=1813173&view=diff
==============================================================================
--- subversion/branches/addremove/subversion/libsvn_subr/string.c (original)
+++ subversion/branches/addremove/subversion/libsvn_subr/string.c Tue Oct 24 12:40:47 2017
@@ -1021,9 +1021,10 @@ int svn_cstring_count_newlines(const cha
}
char *
-svn_cstring_join(const apr_array_header_t *strings,
- const char *separator,
- apr_pool_t *pool)
+svn_cstring_join2(const apr_array_header_t *strings,
+ const char *separator,
+ svn_boolean_t trailing_separator,
+ apr_pool_t *pool)
{
svn_stringbuf_t *new_str = svn_stringbuf_create_empty(pool);
size_t sep_len = strlen(separator);
@@ -1032,9 +1033,14 @@ svn_cstring_join(const apr_array_header_
for (i = 0; i < strings->nelts; i++)
{
const char *string = APR_ARRAY_IDX(strings, i, const char *);
+ if (i > 0)
+ svn_stringbuf_appendbytes(new_str, separator, sep_len);
svn_stringbuf_appendbytes(new_str, string, strlen(string));
- svn_stringbuf_appendbytes(new_str, separator, sep_len);
}
+
+ if (strings->nelts > 0 && trailing_separator)
+ svn_stringbuf_appendbytes(new_str, separator, sep_len);
+
return new_str->data;
}
Propchange: subversion/branches/addremove/subversion/libsvn_subr/utf8proc/
------------------------------------------------------------------------------
--- svn:mergeinfo (added)
+++ svn:mergeinfo Tue Oct 24 12:40:47 2017
@@ -0,0 +1,101 @@
+/subversion/branches/1.10-cache-improvements/subversion/libsvn_subr/utf8proc:1669168-1694487
+/subversion/branches/1.5.x-r30215/subversion/libsvn_subr/utf8proc:870312
+/subversion/branches/1.7.x-fs-verify/subversion/libsvn_subr/utf8proc:1146708,1161180
+/subversion/branches/1.9-cache-improvements/subversion/libsvn_subr/utf8proc:1678948-1679863
+/subversion/branches/1.9.x/subversion/libsvn_subr/utf8proc:1735680
+/subversion/branches/10Gb/subversion/libsvn_subr/utf8proc:1388102,1388163-1388190,1388195,1388202,1388205,1388211,1388276,1388362,1388375,1388394,1388636,1388639-1388640,1388643-1388644,1388654,1388720,1388789,1388795,1388801,1388805,1388807,1388810,1388816,1389044,1389276,1389289,1389662,1389867,1390017,1390209,1390216,1390407,1390409,1390414,1390419,1390955
+/subversion/branches/atomic-revprop/subversion/libsvn_subr/utf8proc:965046-1000689
+/subversion/branches/authzperf/subversion/libsvn_subr/utf8proc:1613053-1776831
+/subversion/branches/auto-props-sdc/subversion/libsvn_subr/utf8proc:1384106-1401643
+/subversion/branches/bdb-reverse-deltas/subversion/libsvn_subr/utf8proc:872050-872529
+/subversion/branches/cache-server/subversion/libsvn_subr/utf8proc:1458643-1476567
+/subversion/branches/diff-callbacks3/subversion/libsvn_subr/utf8proc:870059-870761
+/subversion/branches/diff-optimizations/subversion/libsvn_subr/utf8proc:1031270-1037352
+/subversion/branches/diff-optimizations-bytes/subversion/libsvn_subr/utf8proc:1037353-1067789
+/subversion/branches/dont-save-plaintext-passwords-by-default/subversion/libsvn_subr/utf8proc:870728-871118
+/subversion/branches/double-delete/subversion/libsvn_subr/utf8proc:870511-872970
+/subversion/branches/dump-load-cross-check/subversion/libsvn_subr/utf8proc:1654853-1657295
+/subversion/branches/ev2-export/subversion/libsvn_subr/utf8proc:1325914,1332738,1413107
+/subversion/branches/explore-wc/subversion/libsvn_subr/utf8proc:875486,875493,875497,875507,875511,875514,875559,875580-875581,875584,875587,875611,875627,875647,875667-875668,875711-875712,875733-875734,875736,875744-875748,875751,875758,875782,875795-875796,875830,875836,875838,875842,875852,875855,875864,875870,875873,875880,875885-875888,875890,875897-875898,875905,875907-875909,875935,875943-875944,875946,875979,875982-875983,875985-875986,875990,875997
+/subversion/branches/file-externals/subversion/libsvn_subr/utf8proc:871779-873302
+/subversion/branches/fs-rep-sharing/subversion/libsvn_subr/utf8proc:869036-873803
+/subversion/branches/fsfs-format7/subversion/libsvn_subr/utf8proc:1426304,1430673,1433848,1438408,1438982,1441129,1442051,1442068,1442504,1442910,1443171,1443803,1444690,1444693,1444695,1445040,1445080,1446103,1451129,1453590,1454307,1460579,1461851,1461865,1462837,1462904,1463120,1467362,1467382,1469487,1471208,1477166,1478055,1481447,1489817,1489949,1490673-1490674,1491784,1493042,1498029,1498103,1498155,1500054,1507729-1507731,1507735-1507736
+/subversion/branches/fsfs-improvements/subversion/libsvn_subr/utf8proc:1499981-1547039
+/subversion/branches/fsfs-lock-many/subversion/libsvn_subr/utf8proc:1571740-1577217
+/subversion/branches/fsfs-pack/subversion/libsvn_subr/utf8proc:873717-874575
+/subversion/branches/fsx/subversion/libsvn_subr/utf8proc:1507845-1509914
+/subversion/branches/fsx-1.10/subversion/libsvn_subr/utf8proc:1658219-1694500
+/subversion/branches/fsx-id/subversion/libsvn_subr/utf8proc:1645603-1649011
+/subversion/branches/gnome-keyring/subversion/libsvn_subr/utf8proc:870558-871410
+/subversion/branches/gpg-agent-password-store/subversion/libsvn_subr/utf8proc:1005036-1150766
+/subversion/branches/gtest_addition/subversion/libsvn_subr/utf8proc:1452117-1502138
+/subversion/branches/http-protocol-v2/subversion/libsvn_subr/utf8proc:874395-876041
+/subversion/branches/in-memory-cache/subversion/libsvn_subr/utf8proc:869829-871452
+/subversion/branches/in-repo-authz/subversion/libsvn_subr/utf8proc:1414342-1424779
+/subversion/branches/inheritable-props/subversion/libsvn_subr/utf8proc:1297080-1395089
+/subversion/branches/integrate-cache-item-serialization/subversion/libsvn_subr/utf8proc:1068724-1068739
+/subversion/branches/integrate-cache-membuffer/subversion/libsvn_subr/utf8proc:998649-998852
+/subversion/branches/integrate-compression-level/subversion/libsvn_subr/utf8proc:1068651-1072287
+/subversion/branches/integrate-io-improvements/subversion/libsvn_subr/utf8proc:1068684-1072297
+/subversion/branches/integrate-is-cachable/subversion/libsvn_subr/utf8proc:1072568-1074082
+/subversion/branches/integrate-partial-getter/subversion/libsvn_subr/utf8proc:1072558-1076552
+/subversion/branches/integrate-readline-speedup/subversion/libsvn_subr/utf8proc:1072553-1072555
+/subversion/branches/integrate-stream-api-extensions/subversion/libsvn_subr/utf8proc:1068695-1072516
+/subversion/branches/integrate-string-improvements/subversion/libsvn_subr/utf8proc:1068251-1190617
+/subversion/branches/integrate-txdelta-caching/subversion/libsvn_subr/utf8proc:1072541-1078213
+/subversion/branches/issue-2779-dev/subversion/libsvn_subr/utf8proc:965496-984198
+/subversion/branches/issue-2843-dev/subversion/libsvn_subr/utf8proc:871432-874179
+/subversion/branches/issue-3000/subversion/libsvn_subr/utf8proc:871713,871716-871719,871721-871726,871728,871734
+/subversion/branches/issue-3067-deleted-subtrees/subversion/libsvn_subr/utf8proc:873375-874084
+/subversion/branches/issue-3148-dev/subversion/libsvn_subr/utf8proc:875193-875204
+/subversion/branches/issue-3220-dev/subversion/libsvn_subr/utf8proc:872210-872226
+/subversion/branches/issue-3242-dev/subversion/libsvn_subr/utf8proc:879653-896436
+/subversion/branches/issue-3334-dirs/subversion/libsvn_subr/utf8proc:875156-875867
+/subversion/branches/issue-3975/subversion/libsvn_subr/utf8proc:1152931-1160746
+/subversion/branches/issue-4116-dev/subversion/libsvn_subr/utf8proc:1424719-1425040
+/subversion/branches/issue-4194-dev/subversion/libsvn_subr/utf8proc:1410507-1414880
+/subversion/branches/javahl-ra/subversion/libsvn_subr/utf8proc:991978-1494640
+/subversion/branches/kwallet/subversion/libsvn_subr/utf8proc:870785-871314
+/subversion/branches/log-addressing/subversion/libsvn_subr/utf8proc:1509279-1546844
+/subversion/branches/log-g-performance/subversion/libsvn_subr/utf8proc:870941-871032
+/subversion/branches/merge-skips-obstructions/subversion/libsvn_subr/utf8proc:874525-874615
+/subversion/branches/move-tracking-2/subversion/libsvn_subr/utf8proc:1606692-1714632
+/subversion/branches/multi-layer-moves/subversion/libsvn_subr/utf8proc:1239019-1300930
+/subversion/branches/nfc-nfd-aware-client/subversion/libsvn_subr/utf8proc:870276,870376
+/subversion/branches/node_pool/subversion/libsvn_subr/utf8proc:1304828-1305388
+/subversion/branches/patch-exec/subversion/libsvn_subr/utf8proc:1692717-1705390
+/subversion/branches/performance/subversion/libsvn_subr/utf8proc:979193,980118,981087,981090,981189,981194,981287,981684,981827,982043,982355,983398,983406,983430,983474,983488,983490,983760,983764,983766,983770,984927,984973,984984,985014,985037,985046,985472,985477,985482,985487-985488,985493,985497,985500,985514,985601,985603,985606,985669,985673,985695,985697,986453,986465,986485,986491-986492,986517,986521,986605,986608,986817,986832,987865,987868-987869,987872,987886-987888,987893,988319,988898,990330,990533,990535-990537,990541,990568,990572,990574-990575,990600,990759,992899,992904,992911,993127,993141,994956,995478,995507,995603,998012,998858,999098,1001413,1001417,1004291,1022668,1022670,1022676,1022715,1022719,1025660,1025672,1027193,1027203,1027206,1027214,1027227,1028077,1028092,1028094,1028104,1028107,1028111,1028354,1029038,1029042-1029043,1029054-1029055,1029062-1029063,1029078,1029080,1029090,1029092-1029093,1029111,1029151,1029158,1029229-1029230,1029232,1029335-10
29336,1029339-1029340,1029342,1029344,1030763,1030827,1031203,1031235,1032285,1032333,1033040,1033057,1033294,1035869,1035882,1039511,1043705,1053735,1056015,1066452,1067683,1067697-1078365
+/subversion/branches/pin-externals/subversion/libsvn_subr/utf8proc:1643757-1659392
+/subversion/branches/py-tests-as-modules/subversion/libsvn_subr/utf8proc:956579-1033052
+/subversion/branches/ra-svn-tuning/subversion/libsvn_subr/utf8proc:1658201-1694489
+/subversion/branches/ra_serf-digest-authn/subversion/libsvn_subr/utf8proc:875693-876404
+/subversion/branches/reintegrate-improvements/subversion/libsvn_subr/utf8proc:873853-874164
+/subversion/branches/remote-only-status/subversion/libsvn_subr/utf8proc:1581845-1586090
+/subversion/branches/resolve-incoming-add/subversion/libsvn_subr/utf8proc:1762797-1764284
+/subversion/branches/revprop-cache/subversion/libsvn_subr/utf8proc:1298521-1326293
+/subversion/branches/revprop-caching-ng/subversion/libsvn_subr/utf8proc:1620597,1620599
+/subversion/branches/revprop-packing/subversion/libsvn_subr/utf8proc:1143907,1143971,1143997,1144017,1144499,1144568,1146145
+/subversion/branches/subtree-mergeinfo/subversion/libsvn_subr/utf8proc:876734-878766
+/subversion/branches/svn-auth-x509/subversion/libsvn_subr/utf8proc:1603509-1655900
+/subversion/branches/svn-info-detail/subversion/libsvn_subr/utf8proc:1660035-1662618
+/subversion/branches/svn-mergeinfo-enhancements/subversion/libsvn_subr/utf8proc:870119-870195,870197-870288
+/subversion/branches/svn-mergeinfo-normalizer/subversion/libsvn_subr/utf8proc:1642232-1695991
+/subversion/branches/svn-patch-improvements/subversion/libsvn_subr/utf8proc:918519-934609
+/subversion/branches/svn_mutex/subversion/libsvn_subr/utf8proc:1141683-1182099
+/subversion/branches/svnpatch-diff/subversion/libsvn_subr/utf8proc:865738-876477
+/subversion/branches/svnraisetc/subversion/libsvn_subr/utf8proc:874709-875149
+/subversion/branches/svnserve-logging/subversion/libsvn_subr/utf8proc:869828-870893
+/subversion/branches/tc-issue-3334/subversion/libsvn_subr/utf8proc:874697-874773
+/subversion/branches/tc-merge-notify/subversion/libsvn_subr/utf8proc:874017-874062
+/subversion/branches/tc-resolve/subversion/libsvn_subr/utf8proc:874191-874239
+/subversion/branches/tc_url_rev/subversion/libsvn_subr/utf8proc:874351-874483
+/subversion/branches/tree-conflicts/subversion/libsvn_subr/utf8proc:868291-873154
+/subversion/branches/tree-conflicts-notify/subversion/libsvn_subr/utf8proc:873926-874008
+/subversion/branches/tristate-chunked-request/subversion/libsvn_subr/utf8proc:1502394-1502681
+/subversion/branches/tweak-build-take-two/subversion/libsvn_subr/utf8proc:1424288-1425049,1425051-1425613
+/subversion/branches/uris-as-urls/subversion/libsvn_subr/utf8proc:1060426-1064427
+/subversion/branches/verify-at-commit/subversion/libsvn_subr/utf8proc:1462039-1462408
+/subversion/branches/verify-keep-going/subversion/libsvn_subr/utf8proc:1439280-1546110
+/subversion/branches/wc-collate-path/subversion/libsvn_subr/utf8proc:1402685-1480384
+/subversion/trunk/subversion/libsvn_subr/utf8proc:1802696-1813172
+/subversion/upstream/utf8proc:1405750-1809082
Modified: subversion/branches/addremove/subversion/libsvn_subr/utf8proc.c
URL: http://svn.apache.org/viewvc/subversion/branches/addremove/subversion/libsvn_subr/utf8proc.c?rev=1813173&r1=1813172&r2=1813173&view=diff
==============================================================================
--- subversion/branches/addremove/subversion/libsvn_subr/utf8proc.c (original)
+++ subversion/branches/addremove/subversion/libsvn_subr/utf8proc.c Tue Oct 24 12:40:47 2017
@@ -56,6 +56,12 @@ const char *
svn_utf__utf8proc_runtime_version(void)
{
/* Unused static function warning removal hack. */
+ SVN_UNUSED(utf8proc_grapheme_break);
+ SVN_UNUSED(utf8proc_tolower);
+ SVN_UNUSED(utf8proc_toupper);
+ SVN_UNUSED(utf8proc_totitle);
+ SVN_UNUSED(utf8proc_charwidth);
+ SVN_UNUSED(utf8proc_category_string);
SVN_UNUSED(utf8proc_NFD);
SVN_UNUSED(utf8proc_NFC);
SVN_UNUSED(utf8proc_NFKD);
@@ -77,7 +83,7 @@ svn_utf__utf8proc_runtime_version(void)
* that STRING contains invalid UTF-8 or was so long that an overflow
* occurred.
*/
-static ssize_t
+static apr_ssize_t
unicode_decomposition(int transform_flags,
const char *string, apr_size_t length,
svn_membuf_t *buffer)
@@ -88,8 +94,8 @@ unicode_decomposition(int transform_flag
for (;;)
{
apr_int32_t *const ucs4buf = buffer->data;
- const ssize_t ucs4len = buffer->size / sizeof(*ucs4buf);
- const ssize_t result =
+ const apr_ssize_t ucs4len = buffer->size / sizeof(*ucs4buf);
+ const apr_ssize_t result =
utf8proc_decompose((const void*) string, length, ucs4buf, ucs4len,
UTF8PROC_DECOMPOSE | UTF8PROC_STABLE
| transform_flags | nullterm);
@@ -116,7 +122,7 @@ decompose_normalized(apr_size_t *result_
const char *string, apr_size_t length,
svn_membuf_t *buffer)
{
- ssize_t result = unicode_decomposition(0, string, length, buffer);
+ apr_ssize_t result = unicode_decomposition(0, string, length, buffer);
if (result < 0)
return svn_error_create(SVN_ERR_UTF8PROC_ERROR, NULL,
gettext(utf8proc_errmsg(result)));
@@ -145,7 +151,7 @@ normalize_cstring(apr_size_t *result_len
svn_membuf_t *buffer)
{
int flags = 0;
- ssize_t result;
+ apr_ssize_t result;
if (casefold)
flags |= UTF8PROC_CASEFOLD;
@@ -240,6 +246,36 @@ svn_utf__xfrm(const char **result,
return SVN_NO_ERROR;
}
+svn_boolean_t
+svn_utf__fuzzy_glob_match(const char *str,
+ const apr_array_header_t *patterns,
+ svn_membuf_t *buf)
+{
+ const char *normalized;
+ svn_error_t *err;
+ int i;
+
+ /* Try to normalize case and accents in STR.
+ *
+ * If that should fail for some reason, consider STR a mismatch. */
+ err = svn_utf__xfrm(&normalized, str, strlen(str), TRUE, TRUE, buf);
+ if (err)
+ {
+ svn_error_clear(err);
+ return FALSE;
+ }
+
+ /* Now see whether it matches any/all of the patterns. */
+ for (i = 0; i < patterns->nelts; ++i)
+ {
+ const char *pattern = APR_ARRAY_IDX(patterns, i, const char *);
+ if (apr_fnmatch(pattern, normalized, 0) == APR_SUCCESS)
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
/* Decode a single UCS-4 code point to UTF-8, appending the result to BUFFER.
* Assume BUFFER is already filled to *LENGTH and return the new size there.
* This function does *not* nul-terminate the stringbuf!
@@ -254,7 +290,7 @@ encode_ucs4(svn_membuf_t *buffer, apr_in
if (buffer->size - *length < 4)
svn_membuf__resize(buffer, buffer->size + 4);
- utf8len = utf8proc_encode_char(ucs4chr, ((uint8_t*)buffer->data + *length));
+ utf8len = utf8proc_encode_char(ucs4chr, ((apr_byte_t*)buffer->data + *length));
if (!utf8len)
return svn_error_createf(SVN_ERR_UTF8PROC_ERROR, NULL,
_("Invalid Unicode character U+%04lX"),
@@ -317,7 +353,7 @@ svn_utf__glob(svn_boolean_t *match,
{
const int nullterm = (escape_len == SVN_UTF__UNKNOWN_LENGTH
? UTF8PROC_NULLTERM : 0);
- ssize_t result =
+ apr_ssize_t result =
utf8proc_decompose((const void*) escape, escape_len, &ucs4esc, 1,
UTF8PROC_DECOMPOSE | UTF8PROC_STABLE | nullterm);
if (result < 0)
@@ -415,8 +451,8 @@ svn_utf__fuzzy_escape(const char *src, a
svn_stringbuf_t *result;
svn_membuf_t buffer;
- ssize_t decomp_length;
- ssize_t len;
+ apr_ssize_t decomp_length;
+ apr_ssize_t len;
/* Decompose to a non-reversible compatibility format. */
svn_membuf__create(&buffer, length * sizeof(apr_int32_t), pool);
@@ -445,7 +481,7 @@ svn_utf__fuzzy_escape(const char *src, a
while (done < length)
{
- len = utf8proc_iterate((uint8_t*)src + done, length - done, &uc);
+ len = utf8proc_iterate((apr_byte_t*)src + done, length - done, &uc);
if (len < 0)
break;
done += len;
@@ -473,7 +509,7 @@ svn_utf__fuzzy_escape(const char *src, a
/* Determine the length of the UTF-8 sequence */
const char *const p = src + done;
- len = utf8proc_utf8class[(uint8_t)*p];
+ len = utf8proc_utf8class[(apr_byte_t)*p];
/* Check if the multi-byte sequence is valid UTF-8. */
if (len > 1 && len <= (apr_ssize_t)(length - done))
Modified: subversion/branches/addremove/subversion/libsvn_subr/utf8proc/utf8proc.c
URL: http://svn.apache.org/viewvc/subversion/branches/addremove/subversion/libsvn_subr/utf8proc/utf8proc.c?rev=1813173&r1=1813172&r2=1813173&view=diff
==============================================================================
--- subversion/branches/addremove/subversion/libsvn_subr/utf8proc/utf8proc.c (original)
+++ subversion/branches/addremove/subversion/libsvn_subr/utf8proc/utf8proc.c Tue Oct 24 12:40:47 2017
@@ -1,6 +1,6 @@
-#include "svn_private_config.h"
-#if SVN_INTERNAL_UTF8PROC
+/* -*- mode: c; c-basic-offset: 2; tab-width: 2; indent-tabs-mode: nil -*- */
/*
+ * Copyright (c) 2015 Steven G. Johnson, Jiahao Chen, Peter Colberg, Tony Kelman, Scott P. Jones, and other contributors.
* Copyright (c) 2009 Public Software Group e. V., Berlin, Germany
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -45,8 +45,7 @@
#include "utf8proc_data.c"
-UTF8PROC_DATA
-const int8_t utf8proc_utf8class[256] = {
+UTF8PROC_DLLEXPORT const utf8proc_int8_t utf8proc_utf8class[256] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
@@ -84,134 +83,144 @@ const int8_t utf8proc_utf8class[256] = {
#define UTF8PROC_HANGUL_S_START 0xAC00
#define UTF8PROC_HANGUL_S_END 0xD7A4
-
-#define UTF8PROC_BOUNDCLASS_START 0
-#define UTF8PROC_BOUNDCLASS_OTHER 1
-#define UTF8PROC_BOUNDCLASS_CR 2
-#define UTF8PROC_BOUNDCLASS_LF 3
-#define UTF8PROC_BOUNDCLASS_CONTROL 4
-#define UTF8PROC_BOUNDCLASS_EXTEND 5
-#define UTF8PROC_BOUNDCLASS_L 6
-#define UTF8PROC_BOUNDCLASS_V 7
-#define UTF8PROC_BOUNDCLASS_T 8
-#define UTF8PROC_BOUNDCLASS_LV 9
-#define UTF8PROC_BOUNDCLASS_LVT 10
-
-
-UTF8PROC_API
-const char *utf8proc_version(void) {
- return "1.1.5";
+/* Should follow semantic-versioning rules (semver.org) based on API
+ compatibility. (Note that the shared-library version number will
+ be different, being based on ABI compatibility.): */
+#define STRINGIZEx(x) #x
+#define STRINGIZE(x) STRINGIZEx(x)
+UTF8PROC_DLLEXPORT const char *utf8proc_version(void) {
+ return STRINGIZE(UTF8PROC_VERSION_MAJOR) "." STRINGIZE(UTF8PROC_VERSION_MINOR) "." STRINGIZE(UTF8PROC_VERSION_PATCH) "";
}
-/*
- * This macro tells translators that string X should be translated,
- * but does not look up the translation at run time. This is standard
- * GNU gettext notation for annotating compile-time constant strings.
- */
-#ifndef N_
-#define N_(x) x
-#endif
-
-UTF8PROC_API
-const char *utf8proc_errmsg(ssize_t errcode) {
+UTF8PROC_DLLEXPORT const char *utf8proc_errmsg(utf8proc_ssize_t errcode) {
switch (errcode) {
case UTF8PROC_ERROR_NOMEM:
- return N_("Memory for processing UTF-8 data could not be allocated.");
+ return "Memory for processing UTF-8 data could not be allocated.";
case UTF8PROC_ERROR_OVERFLOW:
- return N_("UTF-8 string is too long to be processed.");
+ return "UTF-8 string is too long to be processed.";
case UTF8PROC_ERROR_INVALIDUTF8:
- return N_("Invalid UTF-8 string");
+ return "Invalid UTF-8 string";
case UTF8PROC_ERROR_NOTASSIGNED:
- return N_("Unassigned Unicode code point found in UTF-8 string.");
+ return "Unassigned Unicode code point found in UTF-8 string.";
case UTF8PROC_ERROR_INVALIDOPTS:
- return N_("Invalid options for UTF-8 processing chosen.");
+ return "Invalid options for UTF-8 processing chosen.";
default:
- return N_("An unknown error occured while processing UTF-8 data.");
+ return "An unknown error occurred while processing UTF-8 data.";
}
}
-UTF8PROC_API
-ssize_t utf8proc_iterate(
- const uint8_t *str, ssize_t strlen, int32_t *dst
+#define utf_cont(ch) (((ch) & 0xc0) == 0x80)
+UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_iterate(
+ const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_int32_t *dst
) {
- int length;
- int i;
- int32_t uc = -1;
+ utf8proc_uint32_t uc;
+ const utf8proc_uint8_t *end;
+
*dst = -1;
if (!strlen) return 0;
- length = utf8proc_utf8class[str[0]];
- if (!length) return UTF8PROC_ERROR_INVALIDUTF8;
- if (strlen >= 0 && length > strlen) return UTF8PROC_ERROR_INVALIDUTF8;
- for (i=1; i<length; i++) {
- if ((str[i] & 0xC0) != 0x80) return UTF8PROC_ERROR_INVALIDUTF8;
- }
- switch (length) {
- case 1:
- uc = str[0];
- break;
- case 2:
- uc = ((str[0] & 0x1F) << 6) + (str[1] & 0x3F);
- if (uc < 0x80) uc = -1;
- break;
- case 3:
- uc = ((str[0] & 0x0F) << 12) + ((str[1] & 0x3F) << 6)
- + (str[2] & 0x3F);
- if (uc < 0x800 || (uc >= 0xD800 && uc < 0xE000) ||
- (uc >= 0xFDD0 && uc < 0xFDF0)) uc = -1;
- break;
- case 4:
- uc = ((str[0] & 0x07) << 18) + ((str[1] & 0x3F) << 12)
- + ((str[2] & 0x3F) << 6) + (str[3] & 0x3F);
- if (uc < 0x10000 || uc >= 0x110000) uc = -1;
- break;
- }
- if (uc < 0 || ((uc & 0xFFFF) >= 0xFFFE))
- return UTF8PROC_ERROR_INVALIDUTF8;
- *dst = uc;
- return length;
+ end = str + ((strlen < 0) ? 4 : strlen);
+ uc = *str++;
+ if (uc < 0x80) {
+ *dst = uc;
+ return 1;
+ }
+ /* Must be between 0xc2 and 0xf4 inclusive to be valid */
+ if ((uc - 0xc2) > (0xf4-0xc2)) return UTF8PROC_ERROR_INVALIDUTF8;
+ if (uc < 0xe0) { /* 2-byte sequence */
+ /* Must have valid continuation character */
+ if (str >= end || !utf_cont(*str)) return UTF8PROC_ERROR_INVALIDUTF8;
+ *dst = ((uc & 0x1f)<<6) | (*str & 0x3f);
+ return 2;
+ }
+ if (uc < 0xf0) { /* 3-byte sequence */
+ if ((str + 1 >= end) || !utf_cont(*str) || !utf_cont(str[1]))
+ return UTF8PROC_ERROR_INVALIDUTF8;
+ /* Check for surrogate chars */
+ if (uc == 0xed && *str > 0x9f)
+ return UTF8PROC_ERROR_INVALIDUTF8;
+ uc = ((uc & 0xf)<<12) | ((*str & 0x3f)<<6) | (str[1] & 0x3f);
+ if (uc < 0x800)
+ return UTF8PROC_ERROR_INVALIDUTF8;
+ *dst = uc;
+ return 3;
+ }
+ /* 4-byte sequence
+ Must have 3 valid continuation characters */
+ if ((str + 2 >= end) || !utf_cont(*str) || !utf_cont(str[1]) || !utf_cont(str[2]))
+ return UTF8PROC_ERROR_INVALIDUTF8;
+ /* Make sure in correct range (0x10000 - 0x10ffff) */
+ if (uc == 0xf0) {
+ if (*str < 0x90) return UTF8PROC_ERROR_INVALIDUTF8;
+ } else if (uc == 0xf4) {
+ if (*str > 0x8f) return UTF8PROC_ERROR_INVALIDUTF8;
+ }
+ *dst = ((uc & 7)<<18) | ((*str & 0x3f)<<12) | ((str[1] & 0x3f)<<6) | (str[2] & 0x3f);
+ return 4;
}
-UTF8PROC_API
-bool utf8proc_codepoint_valid(int32_t uc) {
- if (uc < 0 || uc >= 0x110000 ||
- ((uc & 0xFFFF) >= 0xFFFE) || (uc >= 0xD800 && uc < 0xE000) ||
- (uc >= 0xFDD0 && uc < 0xFDF0)) return false;
- else return true;
+UTF8PROC_DLLEXPORT utf8proc_bool utf8proc_codepoint_valid(utf8proc_int32_t uc) {
+ return (((utf8proc_uint32_t)uc)-0xd800 > 0x07ff) && ((utf8proc_uint32_t)uc < 0x110000);
}
-UTF8PROC_API
-ssize_t utf8proc_encode_char(int32_t uc, uint8_t *dst) {
+UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_encode_char(utf8proc_int32_t uc, utf8proc_uint8_t *dst) {
if (uc < 0x00) {
return 0;
} else if (uc < 0x80) {
- dst[0] = (uint8_t)uc;
+ dst[0] = (utf8proc_uint8_t) uc;
return 1;
} else if (uc < 0x800) {
- dst[0] = 0xC0 + (uint8_t)(uc >> 6);
- dst[1] = 0x80 + (uc & 0x3F);
+ dst[0] = (utf8proc_uint8_t)(0xC0 + (uc >> 6));
+ dst[1] = (utf8proc_uint8_t)(0x80 + (uc & 0x3F));
return 2;
- } else if (uc == 0xFFFF) {
- dst[0] = 0xFF;
- return 1;
- } else if (uc == 0xFFFE) {
- dst[0] = 0xFE;
- return 1;
+ /* Note: we allow encoding 0xd800-0xdfff here, so as not to change
+ the API, however, these are actually invalid in UTF-8 */
} else if (uc < 0x10000) {
- dst[0] = 0xE0 + (uint8_t)(uc >> 12);
- dst[1] = 0x80 + ((uc >> 6) & 0x3F);
- dst[2] = 0x80 + (uc & 0x3F);
+ dst[0] = (utf8proc_uint8_t)(0xE0 + (uc >> 12));
+ dst[1] = (utf8proc_uint8_t)(0x80 + ((uc >> 6) & 0x3F));
+ dst[2] = (utf8proc_uint8_t)(0x80 + (uc & 0x3F));
return 3;
} else if (uc < 0x110000) {
- dst[0] = 0xF0 + (uint8_t)(uc >> 18);
- dst[1] = 0x80 + ((uc >> 12) & 0x3F);
- dst[2] = 0x80 + ((uc >> 6) & 0x3F);
- dst[3] = 0x80 + (uc & 0x3F);
+ dst[0] = (utf8proc_uint8_t)(0xF0 + (uc >> 18));
+ dst[1] = (utf8proc_uint8_t)(0x80 + ((uc >> 12) & 0x3F));
+ dst[2] = (utf8proc_uint8_t)(0x80 + ((uc >> 6) & 0x3F));
+ dst[3] = (utf8proc_uint8_t)(0x80 + (uc & 0x3F));
return 4;
} else return 0;
}
-UTF8PROC_API
-const utf8proc_property_t *utf8proc_get_property(int32_t uc) {
+/* internal "unsafe" version that does not check whether uc is in range */
+static utf8proc_ssize_t unsafe_encode_char(utf8proc_int32_t uc, utf8proc_uint8_t *dst) {
+ if (uc < 0x00) {
+ return 0;
+ } else if (uc < 0x80) {
+ dst[0] = (utf8proc_uint8_t)uc;
+ return 1;
+ } else if (uc < 0x800) {
+ dst[0] = (utf8proc_uint8_t)(0xC0 + (uc >> 6));
+ dst[1] = (utf8proc_uint8_t)(0x80 + (uc & 0x3F));
+ return 2;
+ } else if (uc == 0xFFFF) {
+ dst[0] = (utf8proc_uint8_t)0xFF;
+ return 1;
+ } else if (uc == 0xFFFE) {
+ dst[0] = (utf8proc_uint8_t)0xFE;
+ return 1;
+ } else if (uc < 0x10000) {
+ dst[0] = (utf8proc_uint8_t)(0xE0 + (uc >> 12));
+ dst[1] = (utf8proc_uint8_t)(0x80 + ((uc >> 6) & 0x3F));
+ dst[2] = (utf8proc_uint8_t)(0x80 + (uc & 0x3F));
+ return 3;
+ } else if (uc < 0x110000) {
+ dst[0] = (utf8proc_uint8_t)(0xF0 + (uc >> 18));
+ dst[1] = (utf8proc_uint8_t)(0x80 + ((uc >> 12) & 0x3F));
+ dst[2] = (utf8proc_uint8_t)(0x80 + ((uc >> 6) & 0x3F));
+ dst[3] = (utf8proc_uint8_t)(0x80 + (uc & 0x3F));
+ return 4;
+ } else return 0;
+}
+
+/* internal "unsafe" version that does not check whether uc is in range */
+static const utf8proc_property_t *unsafe_get_property(utf8proc_int32_t uc) {
/* ASSERT: uc >= 0 && uc < 0x110000 */
return utf8proc_properties + (
utf8proc_stage2table[
@@ -220,23 +229,184 @@ const utf8proc_property_t *utf8proc_get_
);
}
+UTF8PROC_DLLEXPORT const utf8proc_property_t *utf8proc_get_property(utf8proc_int32_t uc) {
+ return uc < 0 || uc >= 0x110000 ? utf8proc_properties : unsafe_get_property(uc);
+}
+
+/* return whether there is a grapheme break between boundclasses lbc and tbc
+ (according to the definition of extended grapheme clusters)
+
+ Rule numbering refers to TR29 Version 29 (Unicode 9.0.0):
+ http://www.unicode.org/reports/tr29/tr29-29.html
+
+ CAVEATS:
+ Please note that evaluation of GB10 (grapheme breaks between emoji zwj sequences)
+ and GB 12/13 (regional indicator code points) require knowledge of previous characters
+ and are thus not handled by this function. This may result in an incorrect break before
+ an E_Modifier class codepoint and an incorrectly missing break between two
+ REGIONAL_INDICATOR class code points if such support does not exist in the caller.
+
+ See the special support in grapheme_break_extended, for required bookkeeping by the caller.
+*/
+static utf8proc_bool grapheme_break_simple(int lbc, int tbc) {
+ return
+ (lbc == UTF8PROC_BOUNDCLASS_START) ? true : /* GB1 */
+ (lbc == UTF8PROC_BOUNDCLASS_CR && /* GB3 */
+ tbc == UTF8PROC_BOUNDCLASS_LF) ? false : /* --- */
+ (lbc >= UTF8PROC_BOUNDCLASS_CR && lbc <= UTF8PROC_BOUNDCLASS_CONTROL) ? true : /* GB4 */
+ (tbc >= UTF8PROC_BOUNDCLASS_CR && tbc <= UTF8PROC_BOUNDCLASS_CONTROL) ? true : /* GB5 */
+ (lbc == UTF8PROC_BOUNDCLASS_L && /* GB6 */
+ (tbc == UTF8PROC_BOUNDCLASS_L || /* --- */
+ tbc == UTF8PROC_BOUNDCLASS_V || /* --- */
+ tbc == UTF8PROC_BOUNDCLASS_LV || /* --- */
+ tbc == UTF8PROC_BOUNDCLASS_LVT)) ? false : /* --- */
+ ((lbc == UTF8PROC_BOUNDCLASS_LV || /* GB7 */
+ lbc == UTF8PROC_BOUNDCLASS_V) && /* --- */
+ (tbc == UTF8PROC_BOUNDCLASS_V || /* --- */
+ tbc == UTF8PROC_BOUNDCLASS_T)) ? false : /* --- */
+ ((lbc == UTF8PROC_BOUNDCLASS_LVT || /* GB8 */
+ lbc == UTF8PROC_BOUNDCLASS_T) && /* --- */
+ tbc == UTF8PROC_BOUNDCLASS_T) ? false : /* --- */
+ (tbc == UTF8PROC_BOUNDCLASS_EXTEND || /* GB9 */
+ tbc == UTF8PROC_BOUNDCLASS_ZWJ || /* --- */
+ tbc == UTF8PROC_BOUNDCLASS_SPACINGMARK || /* GB9a */
+ lbc == UTF8PROC_BOUNDCLASS_PREPEND) ? false : /* GB9b */
+ ((lbc == UTF8PROC_BOUNDCLASS_E_BASE || /* GB10 (requires additional handling below) */
+ lbc == UTF8PROC_BOUNDCLASS_E_BASE_GAZ) && /* ---- */
+ tbc == UTF8PROC_BOUNDCLASS_E_MODIFIER) ? false : /* ---- */
+ (lbc == UTF8PROC_BOUNDCLASS_ZWJ && /* GB11 */
+ (tbc == UTF8PROC_BOUNDCLASS_GLUE_AFTER_ZWJ || /* ---- */
+ tbc == UTF8PROC_BOUNDCLASS_E_BASE_GAZ)) ? false : /* ---- */
+ (lbc == UTF8PROC_BOUNDCLASS_REGIONAL_INDICATOR && /* GB12/13 (requires additional handling below) */
+ tbc == UTF8PROC_BOUNDCLASS_REGIONAL_INDICATOR) ? false : /* ---- */
+ true; /* GB999 */
+}
+
+static utf8proc_bool grapheme_break_extended(int lbc, int tbc, utf8proc_int32_t *state)
+{
+ utf8proc_bool break_permitted;
+ int lbc_override = lbc;
+ if (state && *state != UTF8PROC_BOUNDCLASS_START)
+ lbc_override = *state;
+ break_permitted = grapheme_break_simple(lbc_override, tbc);
+ if (state) {
+ /* Special support for GB 12/13 made possible by GB999. After two RI
+ class codepoints we want to force a break. Do this by resetting the
+ second RI's bound class to UTF8PROC_BOUNDCLASS_OTHER, to force a break
+ after that character according to GB999 (unless of course such a break is
+ forbidden by a different rule such as GB9). */
+ if (*state == tbc && tbc == UTF8PROC_BOUNDCLASS_REGIONAL_INDICATOR)
+ *state = UTF8PROC_BOUNDCLASS_OTHER;
+ /* Special support for GB10. Fold any EXTEND codepoints into the previous
+ boundclass if we're dealing with an emoji base boundclass. */
+ else if ((*state == UTF8PROC_BOUNDCLASS_E_BASE ||
+ *state == UTF8PROC_BOUNDCLASS_E_BASE_GAZ) &&
+ tbc == UTF8PROC_BOUNDCLASS_EXTEND)
+ *state = UTF8PROC_BOUNDCLASS_E_BASE;
+ else
+ *state = tbc;
+ }
+ return break_permitted;
+}
+
+UTF8PROC_DLLEXPORT utf8proc_bool utf8proc_grapheme_break_stateful(
+ utf8proc_int32_t c1, utf8proc_int32_t c2, utf8proc_int32_t *state) {
+
+ return grapheme_break_extended(utf8proc_get_property(c1)->boundclass,
+ utf8proc_get_property(c2)->boundclass,
+ state);
+}
+
+
+UTF8PROC_DLLEXPORT utf8proc_bool utf8proc_grapheme_break(
+ utf8proc_int32_t c1, utf8proc_int32_t c2) {
+ return utf8proc_grapheme_break_stateful(c1, c2, NULL);
+}
+
+static utf8proc_int32_t seqindex_decode_entry(const utf8proc_uint16_t **entry)
+{
+ utf8proc_int32_t entry_cp = **entry;
+ if ((entry_cp & 0xF800) == 0xD800) {
+ *entry = *entry + 1;
+ entry_cp = ((entry_cp & 0x03FF) << 10) | (**entry & 0x03FF);
+ entry_cp += 0x10000;
+ }
+ return entry_cp;
+}
+
+static utf8proc_int32_t seqindex_decode_index(const utf8proc_uint32_t seqindex)
+{
+ const utf8proc_uint16_t *entry = &utf8proc_sequences[seqindex];
+ return seqindex_decode_entry(&entry);
+}
+
+static utf8proc_ssize_t seqindex_write_char_decomposed(utf8proc_uint16_t seqindex, utf8proc_int32_t *dst, utf8proc_ssize_t bufsize, utf8proc_option_t options, int *last_boundclass) {
+ utf8proc_ssize_t written = 0;
+ const utf8proc_uint16_t *entry = &utf8proc_sequences[seqindex & 0x1FFF];
+ int len = seqindex >> 13;
+ if (len >= 7) {
+ len = *entry;
+ entry++;
+ }
+ for (; len >= 0; entry++, len--) {
+ utf8proc_int32_t entry_cp = seqindex_decode_entry(&entry);
+
+ written += utf8proc_decompose_char(entry_cp, dst+written,
+ (bufsize > written) ? (bufsize - written) : 0, options,
+ last_boundclass);
+ if (written < 0) return UTF8PROC_ERROR_OVERFLOW;
+ }
+ return written;
+}
+
+UTF8PROC_DLLEXPORT utf8proc_int32_t utf8proc_tolower(utf8proc_int32_t c)
+{
+ utf8proc_int32_t cl = utf8proc_get_property(c)->lowercase_seqindex;
+ return cl != UINT16_MAX ? seqindex_decode_index(cl) : c;
+}
+
+UTF8PROC_DLLEXPORT utf8proc_int32_t utf8proc_toupper(utf8proc_int32_t c)
+{
+ utf8proc_int32_t cu = utf8proc_get_property(c)->uppercase_seqindex;
+ return cu != UINT16_MAX ? seqindex_decode_index(cu) : c;
+}
+
+UTF8PROC_DLLEXPORT utf8proc_int32_t utf8proc_totitle(utf8proc_int32_t c)
+{
+ utf8proc_int32_t cu = utf8proc_get_property(c)->titlecase_seqindex;
+ return cu != UINT16_MAX ? seqindex_decode_index(cu) : c;
+}
+
+/* return a character width analogous to wcwidth (except portable and
+ hopefully less buggy than most system wcwidth functions). */
+UTF8PROC_DLLEXPORT int utf8proc_charwidth(utf8proc_int32_t c) {
+ return utf8proc_get_property(c)->charwidth;
+}
+
+UTF8PROC_DLLEXPORT utf8proc_category_t utf8proc_category(utf8proc_int32_t c) {
+ return utf8proc_get_property(c)->category;
+}
+
+UTF8PROC_DLLEXPORT const char *utf8proc_category_string(utf8proc_int32_t c) {
+ static const char s[][3] = {"Cn","Lu","Ll","Lt","Lm","Lo","Mn","Mc","Me","Nd","Nl","No","Pc","Pd","Ps","Pe","Pi","Pf","Po","Sm","Sc","Sk","So","Zs","Zl","Zp","Cc","Cf","Cs","Co"};
+ return s[utf8proc_category(c)];
+}
+
#define utf8proc_decompose_lump(replacement_uc) \
return utf8proc_decompose_char((replacement_uc), dst, bufsize, \
options & ~UTF8PROC_LUMP, last_boundclass)
-UTF8PROC_API
-ssize_t utf8proc_decompose_char(int32_t uc, int32_t *dst, ssize_t bufsize,
- int options, int *last_boundclass) {
- /* ASSERT: uc >= 0 && uc < 0x110000 */
+UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose_char(utf8proc_int32_t uc, utf8proc_int32_t *dst, utf8proc_ssize_t bufsize, utf8proc_option_t options, int *last_boundclass) {
const utf8proc_property_t *property;
utf8proc_propval_t category;
- int32_t hangul_sindex;
- property = utf8proc_get_property(uc);
+ utf8proc_int32_t hangul_sindex;
+ if (uc < 0 || uc >= 0x110000) return UTF8PROC_ERROR_NOTASSIGNED;
+ property = unsafe_get_property(uc);
category = property->category;
hangul_sindex = uc - UTF8PROC_HANGUL_SBASE;
if (options & (UTF8PROC_COMPOSE|UTF8PROC_DECOMPOSE)) {
if (hangul_sindex >= 0 && hangul_sindex < UTF8PROC_HANGUL_SCOUNT) {
- int32_t hangul_tindex;
+ utf8proc_int32_t hangul_tindex;
if (bufsize >= 1) {
dst[0] = UTF8PROC_HANGUL_LBASE +
hangul_sindex / UTF8PROC_HANGUL_NCOUNT;
@@ -287,79 +457,20 @@ ssize_t utf8proc_decompose_char(int32_t
category == UTF8PROC_CATEGORY_ME) return 0;
}
if (options & UTF8PROC_CASEFOLD) {
- if (property->casefold_mapping) {
- const int32_t *casefold_entry;
- ssize_t written = 0;
- for (casefold_entry = property->casefold_mapping;
- *casefold_entry >= 0; casefold_entry++) {
- written += utf8proc_decompose_char(*casefold_entry, dst+written,
- (bufsize > written) ? (bufsize - written) : 0, options,
- last_boundclass);
- if (written < 0) return UTF8PROC_ERROR_OVERFLOW;
- }
- return written;
+ if (property->casefold_seqindex != UINT16_MAX) {
+ return seqindex_write_char_decomposed(property->casefold_seqindex, dst, bufsize, options, last_boundclass);
}
}
if (options & (UTF8PROC_COMPOSE|UTF8PROC_DECOMPOSE)) {
- if (property->decomp_mapping &&
+ if (property->decomp_seqindex != UINT16_MAX &&
(!property->decomp_type || (options & UTF8PROC_COMPAT))) {
- const int32_t *decomp_entry;
- ssize_t written = 0;
- for (decomp_entry = property->decomp_mapping;
- *decomp_entry >= 0; decomp_entry++) {
- written += utf8proc_decompose_char(*decomp_entry, dst+written,
- (bufsize > written) ? (bufsize - written) : 0, options,
- last_boundclass);
- if (written < 0) return UTF8PROC_ERROR_OVERFLOW;
- }
- return written;
+ return seqindex_write_char_decomposed(property->decomp_seqindex, dst, bufsize, options, last_boundclass);
}
}
if (options & UTF8PROC_CHARBOUND) {
- bool boundary;
- int tbc, lbc;
- tbc =
- (uc == 0x000D) ? UTF8PROC_BOUNDCLASS_CR :
- (uc == 0x000A) ? UTF8PROC_BOUNDCLASS_LF :
- ((category == UTF8PROC_CATEGORY_ZL ||
- category == UTF8PROC_CATEGORY_ZP ||
- category == UTF8PROC_CATEGORY_CC ||
- category == UTF8PROC_CATEGORY_CF) &&
- !(uc == 0x200C || uc == 0x200D)) ? UTF8PROC_BOUNDCLASS_CONTROL :
- property->extend ? UTF8PROC_BOUNDCLASS_EXTEND :
- ((uc >= UTF8PROC_HANGUL_L_START && uc < UTF8PROC_HANGUL_L_END) ||
- uc == UTF8PROC_HANGUL_L_FILLER) ? UTF8PROC_BOUNDCLASS_L :
- (uc >= UTF8PROC_HANGUL_V_START && uc < UTF8PROC_HANGUL_V_END) ?
- UTF8PROC_BOUNDCLASS_V :
- (uc >= UTF8PROC_HANGUL_T_START && uc < UTF8PROC_HANGUL_T_END) ?
- UTF8PROC_BOUNDCLASS_T :
- (uc >= UTF8PROC_HANGUL_S_START && uc < UTF8PROC_HANGUL_S_END) ? (
- ((uc-UTF8PROC_HANGUL_SBASE) % UTF8PROC_HANGUL_TCOUNT == 0) ?
- UTF8PROC_BOUNDCLASS_LV : UTF8PROC_BOUNDCLASS_LVT
- ) :
- UTF8PROC_BOUNDCLASS_OTHER;
- lbc = *last_boundclass;
- boundary =
- (tbc == UTF8PROC_BOUNDCLASS_EXTEND) ? false :
- (lbc == UTF8PROC_BOUNDCLASS_START) ? true :
- (lbc == UTF8PROC_BOUNDCLASS_CR &&
- tbc == UTF8PROC_BOUNDCLASS_LF) ? false :
- (lbc == UTF8PROC_BOUNDCLASS_CONTROL) ? true :
- (tbc == UTF8PROC_BOUNDCLASS_CONTROL) ? true :
- (lbc == UTF8PROC_BOUNDCLASS_L &&
- (tbc == UTF8PROC_BOUNDCLASS_L ||
- tbc == UTF8PROC_BOUNDCLASS_V ||
- tbc == UTF8PROC_BOUNDCLASS_LV ||
- tbc == UTF8PROC_BOUNDCLASS_LVT)) ? false :
- ((lbc == UTF8PROC_BOUNDCLASS_LV ||
- lbc == UTF8PROC_BOUNDCLASS_V) &&
- (tbc == UTF8PROC_BOUNDCLASS_V ||
- tbc == UTF8PROC_BOUNDCLASS_T)) ? false :
- ((lbc == UTF8PROC_BOUNDCLASS_LVT ||
- lbc == UTF8PROC_BOUNDCLASS_T) &&
- tbc == UTF8PROC_BOUNDCLASS_T) ? false :
- true;
- *last_boundclass = tbc;
+ utf8proc_bool boundary;
+ int tbc = property->boundclass;
+ boundary = grapheme_break_extended(*last_boundclass, tbc, last_boundclass);
if (boundary) {
if (bufsize >= 1) dst[0] = 0xFFFF;
if (bufsize >= 2) dst[1] = uc;
@@ -370,27 +481,34 @@ ssize_t utf8proc_decompose_char(int32_t
return 1;
}
-UTF8PROC_API
-ssize_t utf8proc_decompose(
- const uint8_t *str, ssize_t strlen,
- int32_t *buffer, ssize_t bufsize, int options
+UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose(
+ const utf8proc_uint8_t *str, utf8proc_ssize_t strlen,
+ utf8proc_int32_t *buffer, utf8proc_ssize_t bufsize, utf8proc_option_t options
+) {
+ return utf8proc_decompose_custom(str, strlen, buffer, bufsize, options, NULL, NULL);
+}
+
+UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose_custom(
+ const utf8proc_uint8_t *str, utf8proc_ssize_t strlen,
+ utf8proc_int32_t *buffer, utf8proc_ssize_t bufsize, utf8proc_option_t options,
+ utf8proc_custom_func custom_func, void *custom_data
) {
/* strlen will be ignored, if UTF8PROC_NULLTERM is set in options */
- ssize_t wpos = 0;
+ utf8proc_ssize_t wpos = 0;
if ((options & UTF8PROC_COMPOSE) && (options & UTF8PROC_DECOMPOSE))
return UTF8PROC_ERROR_INVALIDOPTS;
if ((options & UTF8PROC_STRIPMARK) &&
!(options & UTF8PROC_COMPOSE) && !(options & UTF8PROC_DECOMPOSE))
return UTF8PROC_ERROR_INVALIDOPTS;
{
- int32_t uc;
- ssize_t rpos = 0;
- ssize_t decomp_result;
+ utf8proc_int32_t uc;
+ utf8proc_ssize_t rpos = 0;
+ utf8proc_ssize_t decomp_result;
int boundclass = UTF8PROC_BOUNDCLASS_START;
while (1) {
if (options & UTF8PROC_NULLTERM) {
rpos += utf8proc_iterate(str + rpos, -1, &uc);
- /* checking of return value is not neccessary,
+ /* checking of return value is not necessary,
as 'uc' is < 0 in case of error */
if (uc < 0) return UTF8PROC_ERROR_INVALIDUTF8;
if (rpos < 0) return UTF8PROC_ERROR_OVERFLOW;
@@ -400,6 +518,9 @@ ssize_t utf8proc_decompose(
rpos += utf8proc_iterate(str + rpos, strlen - rpos, &uc);
if (uc < 0) return UTF8PROC_ERROR_INVALIDUTF8;
}
+ if (custom_func != NULL) {
+ uc = custom_func(uc, custom_data); /* user-specified custom mapping */
+ }
decomp_result = utf8proc_decompose_char(
uc, buffer + wpos, (bufsize > wpos) ? (bufsize - wpos) : 0, options,
&boundclass
@@ -407,19 +528,20 @@ ssize_t utf8proc_decompose(
if (decomp_result < 0) return decomp_result;
wpos += decomp_result;
/* prohibiting integer overflows due to too long strings: */
- if (wpos < 0 || wpos > SSIZE_MAX/sizeof(int32_t)/2)
+ if (wpos < 0 ||
+ wpos > (utf8proc_ssize_t)(SSIZE_MAX/sizeof(utf8proc_int32_t)/2))
return UTF8PROC_ERROR_OVERFLOW;
}
}
if ((options & (UTF8PROC_COMPOSE|UTF8PROC_DECOMPOSE)) && bufsize >= wpos) {
- ssize_t pos = 0;
+ utf8proc_ssize_t pos = 0;
while (pos < wpos-1) {
- int32_t uc1, uc2;
+ utf8proc_int32_t uc1, uc2;
const utf8proc_property_t *property1, *property2;
uc1 = buffer[pos];
uc2 = buffer[pos+1];
- property1 = utf8proc_get_property(uc1);
- property2 = utf8proc_get_property(uc2);
+ property1 = unsafe_get_property(uc1);
+ property2 = unsafe_get_property(uc2);
if (property1->combining_class > property2->combining_class &&
property2->combining_class > 0) {
buffer[pos] = uc2;
@@ -433,14 +555,12 @@ ssize_t utf8proc_decompose(
return wpos;
}
-UTF8PROC_API
-ssize_t utf8proc_reencode(int32_t *buffer, ssize_t length, int options) {
- /* UTF8PROC_NULLTERM option will be ignored, 'length' is never ignored
- ASSERT: 'buffer' has one spare byte of free space at the end! */
+UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_normalize_utf32(utf8proc_int32_t *buffer, utf8proc_ssize_t length, utf8proc_option_t options) {
+ /* UTF8PROC_NULLTERM option will be ignored, 'length' is never ignored */
if (options & (UTF8PROC_NLF2LS | UTF8PROC_NLF2PS | UTF8PROC_STRIPCC)) {
- ssize_t rpos;
- ssize_t wpos = 0;
- int32_t uc;
+ utf8proc_ssize_t rpos;
+ utf8proc_ssize_t wpos = 0;
+ utf8proc_int32_t uc;
for (rpos = 0; rpos < length; rpos++) {
uc = buffer[rpos];
if (uc == 0x000D && rpos < length-1 && buffer[rpos+1] == 0x000A) rpos++;
@@ -469,23 +589,23 @@ ssize_t utf8proc_reencode(int32_t *buffe
length = wpos;
}
if (options & UTF8PROC_COMPOSE) {
- int32_t *starter = NULL;
- int32_t current_char;
+ utf8proc_int32_t *starter = NULL;
+ utf8proc_int32_t current_char;
const utf8proc_property_t *starter_property = NULL, *current_property;
utf8proc_propval_t max_combining_class = -1;
- ssize_t rpos;
- ssize_t wpos = 0;
- int32_t composition;
+ utf8proc_ssize_t rpos;
+ utf8proc_ssize_t wpos = 0;
+ utf8proc_int32_t composition;
for (rpos = 0; rpos < length; rpos++) {
current_char = buffer[rpos];
- current_property = utf8proc_get_property(current_char);
+ current_property = unsafe_get_property(current_char);
if (starter && current_property->combining_class > max_combining_class) {
/* combination perhaps possible */
- int32_t hangul_lindex;
- int32_t hangul_sindex;
+ utf8proc_int32_t hangul_lindex;
+ utf8proc_int32_t hangul_sindex;
hangul_lindex = *starter - UTF8PROC_HANGUL_LBASE;
if (hangul_lindex >= 0 && hangul_lindex < UTF8PROC_HANGUL_LCOUNT) {
- int32_t hangul_vindex;
+ utf8proc_int32_t hangul_vindex;
hangul_vindex = current_char - UTF8PROC_HANGUL_VBASE;
if (hangul_vindex >= 0 && hangul_vindex < UTF8PROC_HANGUL_VCOUNT) {
*starter = UTF8PROC_HANGUL_SBASE +
@@ -498,7 +618,7 @@ ssize_t utf8proc_reencode(int32_t *buffe
hangul_sindex = *starter - UTF8PROC_HANGUL_SBASE;
if (hangul_sindex >= 0 && hangul_sindex < UTF8PROC_HANGUL_SCOUNT &&
(hangul_sindex % UTF8PROC_HANGUL_TCOUNT) == 0) {
- int32_t hangul_tindex;
+ utf8proc_int32_t hangul_tindex;
hangul_tindex = current_char - UTF8PROC_HANGUL_TBASE;
if (hangul_tindex >= 0 && hangul_tindex < UTF8PROC_HANGUL_TCOUNT) {
*starter += hangul_tindex;
@@ -507,19 +627,26 @@ ssize_t utf8proc_reencode(int32_t *buffe
}
}
if (!starter_property) {
- starter_property = utf8proc_get_property(*starter);
+ starter_property = unsafe_get_property(*starter);
}
- if (starter_property->comb1st_index >= 0 &&
- current_property->comb2nd_index >= 0) {
- composition = utf8proc_combinations[
- starter_property->comb1st_index +
- current_property->comb2nd_index
- ];
- if (composition >= 0 && (!(options & UTF8PROC_STABLE) ||
- !(utf8proc_get_property(composition)->comp_exclusion))) {
- *starter = composition;
- starter_property = NULL;
- continue;
+ if (starter_property->comb_index < 0x8000 &&
+ current_property->comb_index != UINT16_MAX &&
+ current_property->comb_index >= 0x8000) {
+ int sidx = starter_property->comb_index;
+ int idx = (current_property->comb_index & 0x3FFF) - utf8proc_combinations[sidx];
+ if (idx >= 0 && idx <= utf8proc_combinations[sidx + 1] ) {
+ idx += sidx + 2;
+ if (current_property->comb_index & 0x4000) {
+ composition = (utf8proc_combinations[idx] << 16) | utf8proc_combinations[idx+1];
+ } else
+ composition = utf8proc_combinations[idx];
+
+ if (composition > 0 && (!(options & UTF8PROC_STABLE) ||
+ !(unsafe_get_property(composition)->comp_exclusion))) {
+ *starter = composition;
+ starter_property = NULL;
+ continue;
+ }
}
}
}
@@ -537,30 +664,51 @@ ssize_t utf8proc_reencode(int32_t *buffe
}
length = wpos;
}
+ return length;
+}
+
+UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_reencode(utf8proc_int32_t *buffer, utf8proc_ssize_t length, utf8proc_option_t options) {
+ /* UTF8PROC_NULLTERM option will be ignored, 'length' is never ignored
+ ASSERT: 'buffer' has one spare byte of free space at the end! */
+ length = utf8proc_normalize_utf32(buffer, length, options);
+ if (length < 0) return length;
{
- ssize_t rpos, wpos = 0;
- int32_t uc;
- for (rpos = 0; rpos < length; rpos++) {
- uc = buffer[rpos];
- wpos += utf8proc_encode_char(uc, ((uint8_t *)buffer) + wpos);
+ utf8proc_ssize_t rpos, wpos = 0;
+ utf8proc_int32_t uc;
+ if (options & UTF8PROC_CHARBOUND) {
+ for (rpos = 0; rpos < length; rpos++) {
+ uc = buffer[rpos];
+ wpos += unsafe_encode_char(uc, ((utf8proc_uint8_t *)buffer) + wpos);
+ }
+ } else {
+ for (rpos = 0; rpos < length; rpos++) {
+ uc = buffer[rpos];
+ wpos += utf8proc_encode_char(uc, ((utf8proc_uint8_t *)buffer) + wpos);
+ }
}
- ((uint8_t *)buffer)[wpos] = 0;
+ ((utf8proc_uint8_t *)buffer)[wpos] = 0;
return wpos;
}
}
-UTF8PROC_API
-ssize_t utf8proc_map(
- const uint8_t *str, ssize_t strlen, uint8_t **dstptr, int options
+UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_map(
+ const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_uint8_t **dstptr, utf8proc_option_t options
) {
- int32_t *buffer;
- ssize_t result;
+ return utf8proc_map_custom(str, strlen, dstptr, options, NULL, NULL);
+}
+
+UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_map_custom(
+ const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_uint8_t **dstptr, utf8proc_option_t options,
+ utf8proc_custom_func custom_func, void *custom_data
+) {
+ utf8proc_int32_t *buffer;
+ utf8proc_ssize_t result;
*dstptr = NULL;
- result = utf8proc_decompose(str, strlen, NULL, 0, options);
+ result = utf8proc_decompose_custom(str, strlen, NULL, 0, options, custom_func, custom_data);
if (result < 0) return result;
- buffer = malloc(result * sizeof(int32_t) + 1);
+ buffer = (utf8proc_int32_t *) malloc(result * sizeof(utf8proc_int32_t) + 1);
if (!buffer) return UTF8PROC_ERROR_NOMEM;
- result = utf8proc_decompose(str, strlen, buffer, result, options);
+ result = utf8proc_decompose_custom(str, strlen, buffer, result, options, custom_func, custom_data);
if (result < 0) {
free(buffer);
return result;
@@ -571,44 +719,38 @@ ssize_t utf8proc_map(
return result;
}
{
- int32_t *newptr;
- newptr = realloc(buffer, (size_t)result+1);
+ utf8proc_int32_t *newptr;
+ newptr = (utf8proc_int32_t *) realloc(buffer, (size_t)result+1);
if (newptr) buffer = newptr;
}
- *dstptr = (uint8_t *)buffer;
+ *dstptr = (utf8proc_uint8_t *)buffer;
return result;
}
-UTF8PROC_API
-uint8_t *utf8proc_NFD(const uint8_t *str) {
- uint8_t *retval;
+UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFD(const utf8proc_uint8_t *str) {
+ utf8proc_uint8_t *retval;
utf8proc_map(str, 0, &retval, UTF8PROC_NULLTERM | UTF8PROC_STABLE |
UTF8PROC_DECOMPOSE);
return retval;
}
-UTF8PROC_API
-uint8_t *utf8proc_NFC(const uint8_t *str) {
- uint8_t *retval;
+UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFC(const utf8proc_uint8_t *str) {
+ utf8proc_uint8_t *retval;
utf8proc_map(str, 0, &retval, UTF8PROC_NULLTERM | UTF8PROC_STABLE |
UTF8PROC_COMPOSE);
return retval;
}
-UTF8PROC_API
-uint8_t *utf8proc_NFKD(const uint8_t *str) {
- uint8_t *retval;
+UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFKD(const utf8proc_uint8_t *str) {
+ utf8proc_uint8_t *retval;
utf8proc_map(str, 0, &retval, UTF8PROC_NULLTERM | UTF8PROC_STABLE |
UTF8PROC_DECOMPOSE | UTF8PROC_COMPAT);
return retval;
}
-UTF8PROC_API
-uint8_t *utf8proc_NFKC(const uint8_t *str) {
- uint8_t *retval;
+UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFKC(const utf8proc_uint8_t *str) {
+ utf8proc_uint8_t *retval;
utf8proc_map(str, 0, &retval, UTF8PROC_NULLTERM | UTF8PROC_STABLE |
UTF8PROC_COMPOSE | UTF8PROC_COMPAT);
return retval;
}
-
-#endif /* SVN_INTERNAL_UTF8PROC */