You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@couchdb.apache.org by be...@apache.org on 2014/01/09 01:08:16 UTC
[61/96] [abbrv] support static build
http://git-wip-us.apache.org/repos/asf/couchdb/blob/79f67b6a/src/apps/couch_collate/platform/osx/icu/unicode/unorm.h
----------------------------------------------------------------------
diff --git a/src/apps/couch_collate/platform/osx/icu/unicode/unorm.h b/src/apps/couch_collate/platform/osx/icu/unicode/unorm.h
new file mode 100644
index 0000000..c22b808
--- /dev/null
+++ b/src/apps/couch_collate/platform/osx/icu/unicode/unorm.h
@@ -0,0 +1,576 @@
+/*
+*******************************************************************************
+* Copyright (c) 1996-2007, International Business Machines Corporation
+* and others. All Rights Reserved.
+*******************************************************************************
+* File unorm.h
+*
+* Created by: Vladimir Weinstein 12052000
+*
+* Modification history :
+*
+* Date Name Description
+* 02/01/01 synwee Added normalization quickcheck enum and method.
+*/
+#ifndef UNORM_H
+#define UNORM_H
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_NORMALIZATION
+
+#include "unicode/uiter.h"
+
+/**
+ * \file
+ * \brief C API: Unicode Normalization
+ *
+ * <h2>Unicode normalization API</h2>
+ *
+ * <code>unorm_normalize</code> transforms Unicode text into an equivalent composed or
+ * decomposed form, allowing for easier sorting and searching of text.
+ * <code>unorm_normalize</code> supports the standard normalization forms described in
+ * <a href="http://www.unicode.org/unicode/reports/tr15/" target="unicode">
+ * Unicode Standard Annex #15: Unicode Normalization Forms</a>.
+ *
+ * Characters with accents or other adornments can be encoded in
+ * several different ways in Unicode. For example, take the character A-acute.
+ * In Unicode, this can be encoded as a single character (the
+ * "composed" form):
+ *
+ * \code
+ * 00C1 LATIN CAPITAL LETTER A WITH ACUTE
+ * \endcode
+ *
+ * or as two separate characters (the "decomposed" form):
+ *
+ * \code
+ * 0041 LATIN CAPITAL LETTER A
+ * 0301 COMBINING ACUTE ACCENT
+ * \endcode
+ *
+ * To a user of your program, however, both of these sequences should be
+ * treated as the same "user-level" character "A with acute accent". When you are searching or
+ * comparing text, you must ensure that these two sequences are treated
+ * equivalently. In addition, you must handle characters with more than one
+ * accent. Sometimes the order of a character's combining accents is
+ * significant, while in other cases accent sequences in different orders are
+ * really equivalent.
+ *
+ * Similarly, the string "ffi" can be encoded as three separate letters:
+ *
+ * \code
+ * 0066 LATIN SMALL LETTER F
+ * 0066 LATIN SMALL LETTER F
+ * 0069 LATIN SMALL LETTER I
+ * \endcode
+ *
+ * or as the single character
+ *
+ * \code
+ * FB03 LATIN SMALL LIGATURE FFI
+ * \endcode
+ *
+ * The ffi ligature is not a distinct semantic character, and strictly speaking
+ * it shouldn't be in Unicode at all, but it was included for compatibility
+ * with existing character sets that already provided it. The Unicode standard
+ * identifies such characters by giving them "compatibility" decompositions
+ * into the corresponding semantic characters. When sorting and searching, you
+ * will often want to use these mappings.
+ *
+ * <code>unorm_normalize</code> helps solve these problems by transforming text into the
+ * canonical composed and decomposed forms as shown in the first example above.
+ * In addition, you can have it perform compatibility decompositions so that
+ * you can treat compatibility characters the same as their equivalents.
+ * Finally, <code>unorm_normalize</code> rearranges accents into the proper canonical
+ * order, so that you do not have to worry about accent rearrangement on your
+ * own.
+ *
+ * Form FCD, "Fast C or D", is also designed for collation.
+ * It allows to work on strings that are not necessarily normalized
+ * with an algorithm (like in collation) that works under "canonical closure", i.e., it treats precomposed
+ * characters and their decomposed equivalents the same.
+ *
+ * It is not a normalization form because it does not provide for uniqueness of representation. Multiple strings
+ * may be canonically equivalent (their NFDs are identical) and may all conform to FCD without being identical
+ * themselves.
+ *
+ * The form is defined such that the "raw decomposition", the recursive canonical decomposition of each character,
+ * results in a string that is canonically ordered. This means that precomposed characters are allowed for as long
+ * as their decompositions do not need canonical reordering.
+ *
+ * Its advantage for a process like collation is that all NFD and most NFC texts - and many unnormalized texts -
+ * already conform to FCD and do not need to be normalized (NFD) for such a process. The FCD quick check will
+ * return UNORM_YES for most strings in practice.
+ *
+ * unorm_normalize(UNORM_FCD) may be implemented with UNORM_NFD.
+ *
+ * For more details on FCD see the collation design document:
+ * http://source.icu-project.org/repos/icu/icuhtml/trunk/design/collation/ICU_collation_design.htm
+ *
+ * ICU collation performs either NFD or FCD normalization automatically if normalization
+ * is turned on for the collator object.
+ * Beyond collation and string search, normalized strings may be useful for string equivalence comparisons,
+ * transliteration/transcription, unique representations, etc.
+ *
+ * The W3C generally recommends to exchange texts in NFC.
+ * Note also that most legacy character encodings use only precomposed forms and often do not
+ * encode any combining marks by themselves. For conversion to such character encodings the
+ * Unicode text needs to be normalized to NFC.
+ * For more usage examples, see the Unicode Standard Annex.
+ */
+
+/**
+ * Constants for normalization modes.
+ * @stable ICU 2.0
+ */
+typedef enum {
+ /** No decomposition/composition. @stable ICU 2.0 */
+ UNORM_NONE = 1,
+ /** Canonical decomposition. @stable ICU 2.0 */
+ UNORM_NFD = 2,
+ /** Compatibility decomposition. @stable ICU 2.0 */
+ UNORM_NFKD = 3,
+ /** Canonical decomposition followed by canonical composition. @stable ICU 2.0 */
+ UNORM_NFC = 4,
+ /** Default normalization. @stable ICU 2.0 */
+ UNORM_DEFAULT = UNORM_NFC,
+ /** Compatibility decomposition followed by canonical composition. @stable ICU 2.0 */
+ UNORM_NFKC =5,
+ /** "Fast C or D" form. @stable ICU 2.0 */
+ UNORM_FCD = 6,
+
+ /** One more than the highest normalization mode constant. @stable ICU 2.0 */
+ UNORM_MODE_COUNT
+} UNormalizationMode;
+
+/**
+ * Constants for options flags for normalization.
+ * Use 0 for default options,
+ * including normalization according to the Unicode version
+ * that is currently supported by ICU (see u_getUnicodeVersion).
+ * @stable ICU 2.6
+ */
+enum {
+ /**
+ * Options bit set value to select Unicode 3.2 normalization
+ * (except NormalizationCorrections).
+ * At most one Unicode version can be selected at a time.
+ * @stable ICU 2.6
+ */
+ UNORM_UNICODE_3_2=0x20
+};
+
+/**
+ * Lowest-order bit number of unorm_compare() options bits corresponding to
+ * normalization options bits.
+ *
+ * The options parameter for unorm_compare() uses most bits for
+ * itself and for various comparison and folding flags.
+ * The most significant bits, however, are shifted down and passed on
+ * to the normalization implementation.
+ * (That is, from unorm_compare(..., options, ...),
+ * options>>UNORM_COMPARE_NORM_OPTIONS_SHIFT will be passed on to the
+ * internal normalization functions.)
+ *
+ * @see unorm_compare
+ * @stable ICU 2.6
+ */
+#define UNORM_COMPARE_NORM_OPTIONS_SHIFT 20
+
+/**
+ * Normalize a string.
+ * The string will be normalized according the specified normalization mode
+ * and options.
+ * The source and result buffers must not be the same, nor overlap.
+ *
+ * @param source The string to normalize.
+ * @param sourceLength The length of source, or -1 if NUL-terminated.
+ * @param mode The normalization mode; one of UNORM_NONE,
+ * UNORM_NFD, UNORM_NFC, UNORM_NFKC, UNORM_NFKD, UNORM_DEFAULT.
+ * @param options The normalization options, ORed together (0 for no options).
+ * @param result A pointer to a buffer to receive the result string.
+ * The result string is NUL-terminated if possible.
+ * @param resultLength The maximum size of result.
+ * @param status A pointer to a UErrorCode to receive any errors.
+ * @return The total buffer size needed; if greater than resultLength,
+ * the output was truncated, and the error code is set to U_BUFFER_OVERFLOW_ERROR.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+unorm_normalize(const UChar *source, int32_t sourceLength,
+ UNormalizationMode mode, int32_t options,
+ UChar *result, int32_t resultLength,
+ UErrorCode *status);
+#endif
+/**
+ * Result values for unorm_quickCheck().
+ * For details see Unicode Technical Report 15.
+ * @stable ICU 2.0
+ */
+typedef enum UNormalizationCheckResult {
+ /**
+ * Indicates that string is not in the normalized format
+ */
+ UNORM_NO,
+ /**
+ * Indicates that string is in the normalized format
+ */
+ UNORM_YES,
+ /**
+ * Indicates that string cannot be determined if it is in the normalized
+ * format without further thorough checks.
+ */
+ UNORM_MAYBE
+} UNormalizationCheckResult;
+#if !UCONFIG_NO_NORMALIZATION
+/**
+ * Performing quick check on a string, to quickly determine if the string is
+ * in a particular normalization format.
+ * Three types of result can be returned UNORM_YES, UNORM_NO or
+ * UNORM_MAYBE. Result UNORM_YES indicates that the argument
+ * string is in the desired normalized format, UNORM_NO determines that
+ * argument string is not in the desired normalized format. A
+ * UNORM_MAYBE result indicates that a more thorough check is required,
+ * the user may have to put the string in its normalized form and compare the
+ * results.
+ *
+ * @param source string for determining if it is in a normalized format
+ * @param sourcelength length of source to test, or -1 if NUL-terminated
+ * @param mode which normalization form to test for
+ * @param status a pointer to a UErrorCode to receive any errors
+ * @return UNORM_YES, UNORM_NO or UNORM_MAYBE
+ *
+ * @see unorm_isNormalized
+ * @stable ICU 2.0
+ */
+U_STABLE UNormalizationCheckResult U_EXPORT2
+unorm_quickCheck(const UChar *source, int32_t sourcelength,
+ UNormalizationMode mode,
+ UErrorCode *status);
+
+/**
+ * Performing quick check on a string; same as unorm_quickCheck but
+ * takes an extra options parameter like most normalization functions.
+ *
+ * @param src String that is to be tested if it is in a normalization format.
+ * @param srcLength Length of source to test, or -1 if NUL-terminated.
+ * @param mode Which normalization form to test for.
+ * @param options The normalization options, ORed together (0 for no options).
+ * @param pErrorCode ICU error code in/out parameter.
+ * Must fulfill U_SUCCESS before the function call.
+ * @return UNORM_YES, UNORM_NO or UNORM_MAYBE
+ *
+ * @see unorm_quickCheck
+ * @see unorm_isNormalized
+ * @stable ICU 2.6
+ */
+U_STABLE UNormalizationCheckResult U_EXPORT2
+unorm_quickCheckWithOptions(const UChar *src, int32_t srcLength,
+ UNormalizationMode mode, int32_t options,
+ UErrorCode *pErrorCode);
+
+/**
+ * Test if a string is in a given normalization form.
+ * This is semantically equivalent to source.equals(normalize(source, mode)) .
+ *
+ * Unlike unorm_quickCheck(), this function returns a definitive result,
+ * never a "maybe".
+ * For NFD, NFKD, and FCD, both functions work exactly the same.
+ * For NFC and NFKC where quickCheck may return "maybe", this function will
+ * perform further tests to arrive at a TRUE/FALSE result.
+ *
+ * @param src String that is to be tested if it is in a normalization format.
+ * @param srcLength Length of source to test, or -1 if NUL-terminated.
+ * @param mode Which normalization form to test for.
+ * @param pErrorCode ICU error code in/out parameter.
+ * Must fulfill U_SUCCESS before the function call.
+ * @return Boolean value indicating whether the source string is in the
+ * "mode" normalization form.
+ *
+ * @see unorm_quickCheck
+ * @stable ICU 2.2
+ */
+U_STABLE UBool U_EXPORT2
+unorm_isNormalized(const UChar *src, int32_t srcLength,
+ UNormalizationMode mode,
+ UErrorCode *pErrorCode);
+
+/**
+ * Test if a string is in a given normalization form; same as unorm_isNormalized but
+ * takes an extra options parameter like most normalization functions.
+ *
+ * @param src String that is to be tested if it is in a normalization format.
+ * @param srcLength Length of source to test, or -1 if NUL-terminated.
+ * @param mode Which normalization form to test for.
+ * @param options The normalization options, ORed together (0 for no options).
+ * @param pErrorCode ICU error code in/out parameter.
+ * Must fulfill U_SUCCESS before the function call.
+ * @return Boolean value indicating whether the source string is in the
+ * "mode/options" normalization form.
+ *
+ * @see unorm_quickCheck
+ * @see unorm_isNormalized
+ * @stable ICU 2.6
+ */
+U_STABLE UBool U_EXPORT2
+unorm_isNormalizedWithOptions(const UChar *src, int32_t srcLength,
+ UNormalizationMode mode, int32_t options,
+ UErrorCode *pErrorCode);
+
+/**
+ * Iterative normalization forward.
+ * This function (together with unorm_previous) is somewhat
+ * similar to the C++ Normalizer class (see its non-static functions).
+ *
+ * Iterative normalization is useful when only a small portion of a longer
+ * string/text needs to be processed.
+ *
+ * For example, the likelihood may be high that processing the first 10% of some
+ * text will be sufficient to find certain data.
+ * Another example: When one wants to concatenate two normalized strings and get a
+ * normalized result, it is much more efficient to normalize just a small part of
+ * the result around the concatenation place instead of re-normalizing everything.
+ *
+ * The input text is an instance of the C character iteration API UCharIterator.
+ * It may wrap around a simple string, a CharacterIterator, a Replaceable, or any
+ * other kind of text object.
+ *
+ * If a buffer overflow occurs, then the caller needs to reset the iterator to the
+ * old index and call the function again with a larger buffer - if the caller cares
+ * for the actual output.
+ * Regardless of the output buffer, the iterator will always be moved to the next
+ * normalization boundary.
+ *
+ * This function (like unorm_previous) serves two purposes:
+ *
+ * 1) To find the next boundary so that the normalization of the part of the text
+ * from the current position to that boundary does not affect and is not affected
+ * by the part of the text beyond that boundary.
+ *
+ * 2) To normalize the text up to the boundary.
+ *
+ * The second step is optional, per the doNormalize parameter.
+ * It is omitted for operations like string concatenation, where the two adjacent
+ * string ends need to be normalized together.
+ * In such a case, the output buffer will just contain a copy of the text up to the
+ * boundary.
+ *
+ * pNeededToNormalize is an output-only parameter. Its output value is only defined
+ * if normalization was requested (doNormalize) and successful (especially, no
+ * buffer overflow).
+ * It is useful for operations like a normalizing transliterator, where one would
+ * not want to replace a piece of text if it is not modified.
+ *
+ * If doNormalize==TRUE and pNeededToNormalize!=NULL then *pNeeded... is set TRUE
+ * if the normalization was necessary.
+ *
+ * If doNormalize==FALSE then *pNeededToNormalize will be set to FALSE.
+ *
+ * If the buffer overflows, then *pNeededToNormalize will be undefined;
+ * essentially, whenever U_FAILURE is true (like in buffer overflows), this result
+ * will be undefined.
+ *
+ * @param src The input text in the form of a C character iterator.
+ * @param dest The output buffer; can be NULL if destCapacity==0 for pure preflighting.
+ * @param destCapacity The number of UChars that fit into dest.
+ * @param mode The normalization mode.
+ * @param options The normalization options, ORed together (0 for no options).
+ * @param doNormalize Indicates if the source text up to the next boundary
+ * is to be normalized (TRUE) or just copied (FALSE).
+ * @param pNeededToNormalize Output flag indicating if the normalization resulted in
+ * different text from the input.
+ * Not defined if an error occurs including buffer overflow.
+ * Always FALSE if !doNormalize.
+ * @param pErrorCode ICU error code in/out parameter.
+ * Must fulfill U_SUCCESS before the function call.
+ * @return Length of output (number of UChars) when successful or buffer overflow.
+ *
+ * @see unorm_previous
+ * @see unorm_normalize
+ *
+ * @stable ICU 2.1
+ */
+U_STABLE int32_t U_EXPORT2
+unorm_next(UCharIterator *src,
+ UChar *dest, int32_t destCapacity,
+ UNormalizationMode mode, int32_t options,
+ UBool doNormalize, UBool *pNeededToNormalize,
+ UErrorCode *pErrorCode);
+
+/**
+ * Iterative normalization backward.
+ * This function (together with unorm_next) is somewhat
+ * similar to the C++ Normalizer class (see its non-static functions).
+ * For all details see unorm_next.
+ *
+ * @param src The input text in the form of a C character iterator.
+ * @param dest The output buffer; can be NULL if destCapacity==0 for pure preflighting.
+ * @param destCapacity The number of UChars that fit into dest.
+ * @param mode The normalization mode.
+ * @param options The normalization options, ORed together (0 for no options).
+ * @param doNormalize Indicates if the source text up to the next boundary
+ * is to be normalized (TRUE) or just copied (FALSE).
+ * @param pNeededToNormalize Output flag indicating if the normalization resulted in
+ * different text from the input.
+ * Not defined if an error occurs including buffer overflow.
+ * Always FALSE if !doNormalize.
+ * @param pErrorCode ICU error code in/out parameter.
+ * Must fulfill U_SUCCESS before the function call.
+ * @return Length of output (number of UChars) when successful or buffer overflow.
+ *
+ * @see unorm_next
+ * @see unorm_normalize
+ *
+ * @stable ICU 2.1
+ */
+U_STABLE int32_t U_EXPORT2
+unorm_previous(UCharIterator *src,
+ UChar *dest, int32_t destCapacity,
+ UNormalizationMode mode, int32_t options,
+ UBool doNormalize, UBool *pNeededToNormalize,
+ UErrorCode *pErrorCode);
+
+/**
+ * Concatenate normalized strings, making sure that the result is normalized as well.
+ *
+ * If both the left and the right strings are in
+ * the normalization form according to "mode/options",
+ * then the result will be
+ *
+ * \code
+ * dest=normalize(left+right, mode, options)
+ * \endcode
+ *
+ * With the input strings already being normalized,
+ * this function will use unorm_next() and unorm_previous()
+ * to find the adjacent end pieces of the input strings.
+ * Only the concatenation of these end pieces will be normalized and
+ * then concatenated with the remaining parts of the input strings.
+ *
+ * It is allowed to have dest==left to avoid copying the entire left string.
+ *
+ * @param left Left source string, may be same as dest.
+ * @param leftLength Length of left source string, or -1 if NUL-terminated.
+ * @param right Right source string. Must not be the same as dest, nor overlap.
+ * @param rightLength Length of right source string, or -1 if NUL-terminated.
+ * @param dest The output buffer; can be NULL if destCapacity==0 for pure preflighting.
+ * @param destCapacity The number of UChars that fit into dest.
+ * @param mode The normalization mode.
+ * @param options The normalization options, ORed together (0 for no options).
+ * @param pErrorCode ICU error code in/out parameter.
+ * Must fulfill U_SUCCESS before the function call.
+ * @return Length of output (number of UChars) when successful or buffer overflow.
+ *
+ * @see unorm_normalize
+ * @see unorm_next
+ * @see unorm_previous
+ *
+ * @stable ICU 2.1
+ */
+U_STABLE int32_t U_EXPORT2
+unorm_concatenate(const UChar *left, int32_t leftLength,
+ const UChar *right, int32_t rightLength,
+ UChar *dest, int32_t destCapacity,
+ UNormalizationMode mode, int32_t options,
+ UErrorCode *pErrorCode);
+
+/**
+ * Option bit for unorm_compare:
+ * Both input strings are assumed to fulfill FCD conditions.
+ * @stable ICU 2.2
+ */
+#define UNORM_INPUT_IS_FCD 0x20000
+
+/**
+ * Option bit for unorm_compare:
+ * Perform case-insensitive comparison.
+ * @stable ICU 2.2
+ */
+#define U_COMPARE_IGNORE_CASE 0x10000
+
+#ifndef U_COMPARE_CODE_POINT_ORDER
+/* see also unistr.h and ustring.h */
+/**
+ * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:
+ * Compare strings in code point order instead of code unit order.
+ * @stable ICU 2.2
+ */
+#define U_COMPARE_CODE_POINT_ORDER 0x8000
+#endif
+
+/**
+ * Compare two strings for canonical equivalence.
+ * Further options include case-insensitive comparison and
+ * code point order (as opposed to code unit order).
+ *
+ * Canonical equivalence between two strings is defined as their normalized
+ * forms (NFD or NFC) being identical.
+ * This function compares strings incrementally instead of normalizing
+ * (and optionally case-folding) both strings entirely,
+ * improving performance significantly.
+ *
+ * Bulk normalization is only necessary if the strings do not fulfill the FCD
+ * conditions. Only in this case, and only if the strings are relatively long,
+ * is memory allocated temporarily.
+ * For FCD strings and short non-FCD strings there is no memory allocation.
+ *
+ * Semantically, this is equivalent to
+ * strcmp[CodePointOrder](NFD(foldCase(NFD(s1))), NFD(foldCase(NFD(s2))))
+ * where code point order and foldCase are all optional.
+ *
+ * UAX 21 2.5 Caseless Matching specifies that for a canonical caseless match
+ * the case folding must be performed first, then the normalization.
+ *
+ * @param s1 First source string.
+ * @param length1 Length of first source string, or -1 if NUL-terminated.
+ *
+ * @param s2 Second source string.
+ * @param length2 Length of second source string, or -1 if NUL-terminated.
+ *
+ * @param options A bit set of options:
+ * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
+ * Case-sensitive comparison in code unit order, and the input strings
+ * are quick-checked for FCD.
+ *
+ * - UNORM_INPUT_IS_FCD
+ * Set if the caller knows that both s1 and s2 fulfill the FCD conditions.
+ * If not set, the function will quickCheck for FCD
+ * and normalize if necessary.
+ *
+ * - U_COMPARE_CODE_POINT_ORDER
+ * Set to choose code point order instead of code unit order
+ * (see u_strCompare for details).
+ *
+ * - U_COMPARE_IGNORE_CASE
+ * Set to compare strings case-insensitively using case folding,
+ * instead of case-sensitively.
+ * If set, then the following case folding options are used.
+ *
+ * - Options as used with case-insensitive comparisons, currently:
+ *
+ * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ * (see u_strCaseCompare for details)
+ *
+ * - regular normalization options shifted left by UNORM_COMPARE_NORM_OPTIONS_SHIFT
+ *
+ * @param pErrorCode ICU error code in/out parameter.
+ * Must fulfill U_SUCCESS before the function call.
+ * @return <0 or 0 or >0 as usual for string comparisons
+ *
+ * @see unorm_normalize
+ * @see UNORM_FCD
+ * @see u_strCompare
+ * @see u_strCaseCompare
+ *
+ * @stable ICU 2.2
+ */
+U_STABLE int32_t U_EXPORT2
+unorm_compare(const UChar *s1, int32_t length1,
+ const UChar *s2, int32_t length2,
+ uint32_t options,
+ UErrorCode *pErrorCode);
+
+#endif /* #if !UCONFIG_NO_NORMALIZATION */
+
+#endif
http://git-wip-us.apache.org/repos/asf/couchdb/blob/79f67b6a/src/apps/couch_collate/platform/osx/icu/unicode/unum.h
----------------------------------------------------------------------
diff --git a/src/apps/couch_collate/platform/osx/icu/unicode/unum.h b/src/apps/couch_collate/platform/osx/icu/unicode/unum.h
new file mode 100644
index 0000000..c5df2bb
--- /dev/null
+++ b/src/apps/couch_collate/platform/osx/icu/unicode/unum.h
@@ -0,0 +1,869 @@
+/*
+*******************************************************************************
+* Copyright (C) 1997-2008, International Business Machines Corporation and others.
+* All Rights Reserved.
+* Modification History:
+*
+* Date Name Description
+* 06/24/99 helena Integrated Alan's NF enhancements and Java2 bug fixes
+*******************************************************************************
+*/
+
+#ifndef _UNUM
+#define _UNUM
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_FORMATTING
+
+#include "unicode/uloc.h"
+#include "unicode/umisc.h"
+#include "unicode/parseerr.h"
+/**
+ * \file
+ * \brief C API: NumberFormat
+ *
+ * <h2> Number Format C API </h2>
+ *
+ * Number Format C API Provides functions for
+ * formatting and parsing a number. Also provides methods for
+ * determining which locales have number formats, and what their names
+ * are.
+ * <P>
+ * UNumberFormat helps you to format and parse numbers for any locale.
+ * Your code can be completely independent of the locale conventions
+ * for decimal points, thousands-separators, or even the particular
+ * decimal digits used, or whether the number format is even decimal.
+ * There are different number format styles like decimal, currency,
+ * percent and spellout.
+ * <P>
+ * To format a number for the current Locale, use one of the static
+ * factory methods:
+ * <pre>
+ * \code
+ * UChar myString[20];
+ * double myNumber = 7.0;
+ * UErrorCode status = U_ZERO_ERROR;
+ * UNumberFormat* nf = unum_open(UNUM_DEFAULT, NULL, -1, NULL, NULL, &status);
+ * unum_formatDouble(nf, myNumber, myString, 20, NULL, &status);
+ * printf(" Example 1: %s\n", austrdup(myString) ); //austrdup( a function used to convert UChar* to char*)
+ * \endcode
+ * </pre>
+ * If you are formatting multiple numbers, it is more efficient to get
+ * the format and use it multiple times so that the system doesn't
+ * have to fetch the information about the local language and country
+ * conventions multiple times.
+ * <pre>
+ * \code
+ * uint32_t i, resultlength, reslenneeded;
+ * UErrorCode status = U_ZERO_ERROR;
+ * UFieldPosition pos;
+ * uint32_t a[] = { 123, 3333, -1234567 };
+ * const uint32_t a_len = sizeof(a) / sizeof(a[0]);
+ * UNumberFormat* nf;
+ * UChar* result = NULL;
+ *
+ * nf = unum_open(UNUM_DEFAULT, NULL, -1, NULL, NULL, &status);
+ * for (i = 0; i < a_len; i++) {
+ * resultlength=0;
+ * reslenneeded=unum_format(nf, a[i], NULL, resultlength, &pos, &status);
+ * result = NULL;
+ * if(status==U_BUFFER_OVERFLOW_ERROR){
+ * status=U_ZERO_ERROR;
+ * resultlength=reslenneeded+1;
+ * result=(UChar*)malloc(sizeof(UChar) * resultlength);
+ * unum_format(nf, a[i], result, resultlength, &pos, &status);
+ * }
+ * printf( " Example 2: %s\n", austrdup(result));
+ * free(result);
+ * }
+ * \endcode
+ * </pre>
+ * To format a number for a different Locale, specify it in the
+ * call to unum_open().
+ * <pre>
+ * \code
+ * UNumberFormat* nf = unum_open(UNUM_DEFAULT, NULL, -1, "fr_FR", NULL, &success)
+ * \endcode
+ * </pre>
+ * You can use a NumberFormat API unum_parse() to parse.
+ * <pre>
+ * \code
+ * UErrorCode status = U_ZERO_ERROR;
+ * int32_t pos=0;
+ * int32_t num;
+ * num = unum_parse(nf, str, u_strlen(str), &pos, &status);
+ * \endcode
+ * </pre>
+ * Use UNUM_DECIMAL to get the normal number format for that country.
+ * There are other static options available. Use UNUM_CURRENCY
+ * to get the currency number format for that country. Use UNUM_PERCENT
+ * to get a format for displaying percentages. With this format, a
+ * fraction from 0.53 is displayed as 53%.
+ * <P>
+ * Use a pattern to create either a DecimalFormat or a RuleBasedNumberFormat
+ * formatter. The pattern must conform to the syntax defined for those
+ * formatters.
+ * <P>
+ * You can also control the display of numbers with such function as
+ * unum_getAttribues() and unum_setAtributes(), which let you set the
+ * miminum fraction digits, grouping, etc.
+ * @see UNumberFormatAttributes for more details
+ * <P>
+ * You can also use forms of the parse and format methods with
+ * ParsePosition and UFieldPosition to allow you to:
+ * <ul type=round>
+ * <li>(a) progressively parse through pieces of a string.
+ * <li>(b) align the decimal point and other areas.
+ * </ul>
+ * <p>
+ * It is also possible to change or set the symbols used for a particular
+ * locale like the currency symbol, the grouping seperator , monetary seperator
+ * etc by making use of functions unum_setSymbols() and unum_getSymbols().
+ */
+
+/** A number formatter.
+ * For usage in C programs.
+ * @stable ICU 2.0
+ */
+typedef void* UNumberFormat;
+
+/** The possible number format styles.
+ * @stable ICU 2.0
+ */
+typedef enum UNumberFormatStyle {
+ /**
+ * Decimal format defined by pattern
+ * @stable ICU 3.0
+ */
+ UNUM_PATTERN_DECIMAL=0,
+ /** Decimal format */
+ UNUM_DECIMAL=1,
+ /** Currency format */
+ UNUM_CURRENCY,
+ /** Percent format */
+ UNUM_PERCENT,
+ /** Scientific format */
+ UNUM_SCIENTIFIC,
+ /** Spellout rule-based format */
+ UNUM_SPELLOUT,
+ /**
+ * Ordinal rule-based format
+ * @stable ICU 3.0
+ */
+ UNUM_ORDINAL,
+ /**
+ * Duration rule-based format
+ * @stable ICU 3.0
+ */
+ UNUM_DURATION,
+ /**
+ * Rule-based format defined by pattern
+ * @stable ICU 3.0
+ */
+ UNUM_PATTERN_RULEBASED,
+ /** Default format */
+ UNUM_DEFAULT = UNUM_DECIMAL,
+ /** (Alias for UNUM_PATTERN_DECIMAL) */
+ UNUM_IGNORE = UNUM_PATTERN_DECIMAL
+} UNumberFormatStyle;
+
+/** The possible number format rounding modes.
+ * @stable ICU 2.0
+ */
+typedef enum UNumberFormatRoundingMode {
+ UNUM_ROUND_CEILING,
+ UNUM_ROUND_FLOOR,
+ UNUM_ROUND_DOWN,
+ UNUM_ROUND_UP,
+ /**
+ * Half-even rounding, misspelled name
+ * @deprecated, ICU 3.8
+ */
+ UNUM_FOUND_HALFEVEN,
+ UNUM_ROUND_HALFDOWN,
+ UNUM_ROUND_HALFUP,
+ /**
+ * Half-even rounding
+ * @stable, ICU 3.8
+ */
+ UNUM_ROUND_HALFEVEN = UNUM_FOUND_HALFEVEN
+} UNumberFormatRoundingMode;
+
+/** The possible number format pad positions.
+ * @stable ICU 2.0
+ */
+typedef enum UNumberFormatPadPosition {
+ UNUM_PAD_BEFORE_PREFIX,
+ UNUM_PAD_AFTER_PREFIX,
+ UNUM_PAD_BEFORE_SUFFIX,
+ UNUM_PAD_AFTER_SUFFIX
+} UNumberFormatPadPosition;
+
+/**
+ * Create and return a new UNumberFormat for formatting and parsing
+ * numbers. A UNumberFormat may be used to format numbers by calling
+ * {@link #unum_format }, and to parse numbers by calling {@link #unum_parse }.
+ * The caller must call {@link #unum_close } when done to release resources
+ * used by this object.
+ * @param style The type of number format to open: one of
+ * UNUM_DECIMAL, UNUM_CURRENCY, UNUM_PERCENT, UNUM_SCIENTIFIC, UNUM_SPELLOUT,
+ * UNUM_PATTERN_DECIMAL, UNUM_PATTERN_RULEBASED, or UNUM_DEFAULT.
+ * If UNUM_PATTERN_DECIMAL or UNUM_PATTERN_RULEBASED is passed then the
+ * number format is opened using the given pattern, which must conform
+ * to the syntax described in DecimalFormat or RuleBasedNumberFormat,
+ * respectively.
+ * @param pattern A pattern specifying the format to use.
+ * This parameter is ignored unless the style is
+ * UNUM_PATTERN_DECIMAL or UNUM_PATTERN_RULEBASED.
+ * @param patternLength The number of characters in the pattern, or -1
+ * if null-terminated. This parameter is ignored unless the style is
+ * UNUM_PATTERN.
+ * @param locale A locale identifier to use to determine formatting
+ * and parsing conventions, or NULL to use the default locale.
+ * @param parseErr A pointer to a UParseError struct to receive the
+ * details of any parsing errors, or NULL if no parsing error details
+ * are desired.
+ * @param status A pointer to an input-output UErrorCode.
+ * @return A pointer to a newly created UNumberFormat, or NULL if an
+ * error occurred.
+ * @see unum_close
+ * @see DecimalFormat
+ * @stable ICU 2.0
+ */
+U_STABLE UNumberFormat* U_EXPORT2
+unum_open( UNumberFormatStyle style,
+ const UChar* pattern,
+ int32_t patternLength,
+ const char* locale,
+ UParseError* parseErr,
+ UErrorCode* status);
+
+
+/**
+* Close a UNumberFormat.
+* Once closed, a UNumberFormat may no longer be used.
+* @param fmt The formatter to close.
+* @stable ICU 2.0
+*/
+U_STABLE void U_EXPORT2
+unum_close(UNumberFormat* fmt);
+
+/**
+ * Open a copy of a UNumberFormat.
+ * This function performs a deep copy.
+ * @param fmt The format to copy
+ * @param status A pointer to an UErrorCode to receive any errors.
+ * @return A pointer to a UNumberFormat identical to fmt.
+ * @stable ICU 2.0
+ */
+U_STABLE UNumberFormat* U_EXPORT2
+unum_clone(const UNumberFormat *fmt,
+ UErrorCode *status);
+
+/**
+* Format an integer using a UNumberFormat.
+* The integer will be formatted according to the UNumberFormat's locale.
+* @param fmt The formatter to use.
+* @param number The number to format.
+* @param result A pointer to a buffer to receive the formatted number.
+* @param resultLength The maximum size of result.
+* @param pos A pointer to a UFieldPosition. On input, position->field
+* is read. On output, position->beginIndex and position->endIndex indicate
+* the beginning and ending indices of field number position->field, if such
+* a field exists. This parameter may be NULL, in which case no field
+* @param status A pointer to an UErrorCode to receive any errors
+* @return The total buffer size needed; if greater than resultLength, the output was truncated.
+* @see unum_formatInt64
+* @see unum_formatDouble
+* @see unum_parse
+* @see unum_parseInt64
+* @see unum_parseDouble
+* @see UFieldPosition
+* @stable ICU 2.0
+*/
+U_STABLE int32_t U_EXPORT2
+unum_format( const UNumberFormat* fmt,
+ int32_t number,
+ UChar* result,
+ int32_t resultLength,
+ UFieldPosition *pos,
+ UErrorCode* status);
+
+/**
+* Format an int64 using a UNumberFormat.
+* The int64 will be formatted according to the UNumberFormat's locale.
+* @param fmt The formatter to use.
+* @param number The number to format.
+* @param result A pointer to a buffer to receive the formatted number.
+* @param resultLength The maximum size of result.
+* @param pos A pointer to a UFieldPosition. On input, position->field
+* is read. On output, position->beginIndex and position->endIndex indicate
+* the beginning and ending indices of field number position->field, if such
+* a field exists. This parameter may be NULL, in which case no field
+* @param status A pointer to an UErrorCode to receive any errors
+* @return The total buffer size needed; if greater than resultLength, the output was truncated.
+* @see unum_format
+* @see unum_formatDouble
+* @see unum_parse
+* @see unum_parseInt64
+* @see unum_parseDouble
+* @see UFieldPosition
+* @stable ICU 2.0
+*/
+U_STABLE int32_t U_EXPORT2
+unum_formatInt64(const UNumberFormat *fmt,
+ int64_t number,
+ UChar* result,
+ int32_t resultLength,
+ UFieldPosition *pos,
+ UErrorCode* status);
+
+/**
+* Format a double using a UNumberFormat.
+* The double will be formatted according to the UNumberFormat's locale.
+* @param fmt The formatter to use.
+* @param number The number to format.
+* @param result A pointer to a buffer to receive the formatted number.
+* @param resultLength The maximum size of result.
+* @param pos A pointer to a UFieldPosition. On input, position->field
+* is read. On output, position->beginIndex and position->endIndex indicate
+* the beginning and ending indices of field number position->field, if such
+* a field exists. This parameter may be NULL, in which case no field
+* @param status A pointer to an UErrorCode to receive any errors
+* @return The total buffer size needed; if greater than resultLength, the output was truncated.
+* @see unum_format
+* @see unum_formatInt64
+* @see unum_parse
+* @see unum_parseInt64
+* @see unum_parseDouble
+* @see UFieldPosition
+* @stable ICU 2.0
+*/
+U_STABLE int32_t U_EXPORT2
+unum_formatDouble( const UNumberFormat* fmt,
+ double number,
+ UChar* result,
+ int32_t resultLength,
+ UFieldPosition *pos, /* 0 if ignore */
+ UErrorCode* status);
+
+/**
+ * Format a double currency amount using a UNumberFormat.
+ * The double will be formatted according to the UNumberFormat's locale.
+ * @param fmt the formatter to use
+ * @param number the number to format
+ * @param currency the 3-letter null-terminated ISO 4217 currency code
+ * @param result a pointer to the buffer to receive the formatted number
+ * @param resultLength the maximum number of UChars to write to result
+ * @param pos a pointer to a UFieldPosition. On input,
+ * position->field is read. On output, position->beginIndex and
+ * position->endIndex indicate the beginning and ending indices of
+ * field number position->field, if such a field exists. This
+ * parameter may be NULL, in which case it is ignored.
+ * @param status a pointer to an input-output UErrorCode
+ * @return the total buffer size needed; if greater than resultLength,
+ * the output was truncated.
+ * @see unum_formatDouble
+ * @see unum_parseDoubleCurrency
+ * @see UFieldPosition
+ * @stable ICU 3.0
+ */
+U_STABLE int32_t U_EXPORT2
+unum_formatDoubleCurrency(const UNumberFormat* fmt,
+ double number,
+ UChar* currency,
+ UChar* result,
+ int32_t resultLength,
+ UFieldPosition* pos, /* ignored if 0 */
+ UErrorCode* status);
+
+/**
+* Parse a string into an integer using a UNumberFormat.
+* The string will be parsed according to the UNumberFormat's locale.
+* @param fmt The formatter to use.
+* @param text The text to parse.
+* @param textLength The length of text, or -1 if null-terminated.
+* @param parsePos If not 0, on input a pointer to an integer specifying the offset at which
+* to begin parsing. If not 0, on output the offset at which parsing ended.
+* @param status A pointer to an UErrorCode to receive any errors
+* @return The value of the parsed integer
+* @see unum_parseInt64
+* @see unum_parseDouble
+* @see unum_format
+* @see unum_formatInt64
+* @see unum_formatDouble
+* @stable ICU 2.0
+*/
+U_STABLE int32_t U_EXPORT2
+unum_parse( const UNumberFormat* fmt,
+ const UChar* text,
+ int32_t textLength,
+ int32_t *parsePos /* 0 = start */,
+ UErrorCode *status);
+
+/**
+* Parse a string into an int64 using a UNumberFormat.
+* The string will be parsed according to the UNumberFormat's locale.
+* @param fmt The formatter to use.
+* @param text The text to parse.
+* @param textLength The length of text, or -1 if null-terminated.
+* @param parsePos If not 0, on input a pointer to an integer specifying the offset at which
+* to begin parsing. If not 0, on output the offset at which parsing ended.
+* @param status A pointer to an UErrorCode to receive any errors
+* @return The value of the parsed integer
+* @see unum_parse
+* @see unum_parseDouble
+* @see unum_format
+* @see unum_formatInt64
+* @see unum_formatDouble
+* @stable ICU 2.8
+*/
+U_STABLE int64_t U_EXPORT2
+unum_parseInt64(const UNumberFormat* fmt,
+ const UChar* text,
+ int32_t textLength,
+ int32_t *parsePos /* 0 = start */,
+ UErrorCode *status);
+
+/**
+* Parse a string into a double using a UNumberFormat.
+* The string will be parsed according to the UNumberFormat's locale.
+* @param fmt The formatter to use.
+* @param text The text to parse.
+* @param textLength The length of text, or -1 if null-terminated.
+* @param parsePos If not 0, on input a pointer to an integer specifying the offset at which
+* to begin parsing. If not 0, on output the offset at which parsing ended.
+* @param status A pointer to an UErrorCode to receive any errors
+* @return The value of the parsed double
+* @see unum_parse
+* @see unum_parseInt64
+* @see unum_format
+* @see unum_formatInt64
+* @see unum_formatDouble
+* @stable ICU 2.0
+*/
+U_STABLE double U_EXPORT2
+unum_parseDouble( const UNumberFormat* fmt,
+ const UChar* text,
+ int32_t textLength,
+ int32_t *parsePos /* 0 = start */,
+ UErrorCode *status);
+
+/**
+ * Parse a string into a double and a currency using a UNumberFormat.
+ * The string will be parsed according to the UNumberFormat's locale.
+ * @param fmt the formatter to use
+ * @param text the text to parse
+ * @param textLength the length of text, or -1 if null-terminated
+ * @param parsePos a pointer to an offset index into text at which to
+ * begin parsing. On output, *parsePos will point after the last
+ * parsed character. This parameter may be 0, in which case parsing
+ * begins at offset 0.
+ * @param currency a pointer to the buffer to receive the parsed null-
+ * terminated currency. This buffer must have a capacity of at least
+ * 4 UChars.
+ * @param status a pointer to an input-output UErrorCode
+ * @return the parsed double
+ * @see unum_parseDouble
+ * @see unum_formatDoubleCurrency
+ * @stable ICU 3.0
+ */
+U_STABLE double U_EXPORT2
+unum_parseDoubleCurrency(const UNumberFormat* fmt,
+ const UChar* text,
+ int32_t textLength,
+ int32_t* parsePos, /* 0 = start */
+ UChar* currency,
+ UErrorCode* status);
+
+/**
+ * Set the pattern used by a UNumberFormat. This can only be used
+ * on a DecimalFormat, other formats return U_ILLEGAL_ARGUMENT_ERROR
+ * in the status.
+ * @param format The formatter to set.
+ * @param localized TRUE if the pattern is localized, FALSE otherwise.
+ * @param pattern The new pattern
+ * @param patternLength The length of pattern, or -1 if null-terminated.
+ * @param parseError A pointer to UParseError to recieve information
+ * about errors occurred during parsing, or NULL if no parse error
+ * information is desired.
+ * @param status A pointer to an input-output UErrorCode.
+ * @see unum_toPattern
+ * @see DecimalFormat
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+unum_applyPattern( UNumberFormat *format,
+ UBool localized,
+ const UChar *pattern,
+ int32_t patternLength,
+ UParseError *parseError,
+ UErrorCode *status
+ );
+
+/**
+* Get a locale for which decimal formatting patterns are available.
+* A UNumberFormat in a locale returned by this function will perform the correct
+* formatting and parsing for the locale. The results of this call are not
+* valid for rule-based number formats.
+* @param index The index of the desired locale.
+* @return A locale for which number formatting patterns are available, or 0 if none.
+* @see unum_countAvailable
+* @stable ICU 2.0
+*/
+U_STABLE const char* U_EXPORT2
+unum_getAvailable(int32_t index);
+
+/**
+* Determine how many locales have decimal formatting patterns available. The
+* results of this call are not valid for rule-based number formats.
+* This function is useful for determining the loop ending condition for
+* calls to {@link #unum_getAvailable }.
+* @return The number of locales for which decimal formatting patterns are available.
+* @see unum_getAvailable
+* @stable ICU 2.0
+*/
+U_STABLE int32_t U_EXPORT2
+unum_countAvailable(void);
+
+/** The possible UNumberFormat numeric attributes @stable ICU 2.0 */
+typedef enum UNumberFormatAttribute {
+ /** Parse integers only */
+ UNUM_PARSE_INT_ONLY,
+ /** Use grouping separator */
+ UNUM_GROUPING_USED,
+ /** Always show decimal point */
+ UNUM_DECIMAL_ALWAYS_SHOWN,
+ /** Maximum integer digits */
+ UNUM_MAX_INTEGER_DIGITS,
+ /** Minimum integer digits */
+ UNUM_MIN_INTEGER_DIGITS,
+ /** Integer digits */
+ UNUM_INTEGER_DIGITS,
+ /** Maximum fraction digits */
+ UNUM_MAX_FRACTION_DIGITS,
+ /** Minimum fraction digits */
+ UNUM_MIN_FRACTION_DIGITS,
+ /** Fraction digits */
+ UNUM_FRACTION_DIGITS,
+ /** Multiplier */
+ UNUM_MULTIPLIER,
+ /** Grouping size */
+ UNUM_GROUPING_SIZE,
+ /** Rounding Mode */
+ UNUM_ROUNDING_MODE,
+ /** Rounding increment */
+ UNUM_ROUNDING_INCREMENT,
+ /** The width to which the output of <code>format()</code> is padded. */
+ UNUM_FORMAT_WIDTH,
+ /** The position at which padding will take place. */
+ UNUM_PADDING_POSITION,
+ /** Secondary grouping size */
+ UNUM_SECONDARY_GROUPING_SIZE,
+ /** Use significant digits
+ * @stable ICU 3.0 */
+ UNUM_SIGNIFICANT_DIGITS_USED,
+ /** Minimum significant digits
+ * @stable ICU 3.0 */
+ UNUM_MIN_SIGNIFICANT_DIGITS,
+ /** Maximum significant digits
+ * @stable ICU 3.0 */
+ UNUM_MAX_SIGNIFICANT_DIGITS,
+ /** Lenient parse mode used by rule-based formats.
+ * @stable ICU 3.0
+ */
+ UNUM_LENIENT_PARSE
+} UNumberFormatAttribute;
+
+/**
+* Get a numeric attribute associated with a UNumberFormat.
+* An example of a numeric attribute is the number of integer digits a formatter will produce.
+* @param fmt The formatter to query.
+* @param attr The attribute to query; one of UNUM_PARSE_INT_ONLY, UNUM_GROUPING_USED,
+* UNUM_DECIMAL_ALWAYS_SHOWN, UNUM_MAX_INTEGER_DIGITS, UNUM_MIN_INTEGER_DIGITS, UNUM_INTEGER_DIGITS,
+* UNUM_MAX_FRACTION_DIGITS, UNUM_MIN_FRACTION_DIGITS, UNUM_FRACTION_DIGITS, UNUM_MULTIPLIER,
+* UNUM_GROUPING_SIZE, UNUM_ROUNDING_MODE, UNUM_FORMAT_WIDTH, UNUM_PADDING_POSITION, UNUM_SECONDARY_GROUPING_SIZE.
+* @return The value of attr.
+* @see unum_setAttribute
+* @see unum_getDoubleAttribute
+* @see unum_setDoubleAttribute
+* @see unum_getTextAttribute
+* @see unum_setTextAttribute
+* @stable ICU 2.0
+*/
+U_STABLE int32_t U_EXPORT2
+unum_getAttribute(const UNumberFormat* fmt,
+ UNumberFormatAttribute attr);
+
+/**
+* Set a numeric attribute associated with a UNumberFormat.
+* An example of a numeric attribute is the number of integer digits a formatter will produce. If the
+* formatter does not understand the attribute, the call is ignored. Rule-based formatters only understand
+* the lenient-parse attribute.
+* @param fmt The formatter to set.
+* @param attr The attribute to set; one of UNUM_PARSE_INT_ONLY, UNUM_GROUPING_USED,
+* UNUM_DECIMAL_ALWAYS_SHOWN, UNUM_MAX_INTEGER_DIGITS, UNUM_MIN_INTEGER_DIGITS, UNUM_INTEGER_DIGITS,
+* UNUM_MAX_FRACTION_DIGITS, UNUM_MIN_FRACTION_DIGITS, UNUM_FRACTION_DIGITS, UNUM_MULTIPLIER,
+* UNUM_GROUPING_SIZE, UNUM_ROUNDING_MODE, UNUM_FORMAT_WIDTH, UNUM_PADDING_POSITION, UNUM_SECONDARY_GROUPING_SIZE,
+* or UNUM_LENIENT_PARSE.
+* @param newValue The new value of attr.
+* @see unum_getAttribute
+* @see unum_getDoubleAttribute
+* @see unum_setDoubleAttribute
+* @see unum_getTextAttribute
+* @see unum_setTextAttribute
+* @stable ICU 2.0
+*/
+U_STABLE void U_EXPORT2
+unum_setAttribute( UNumberFormat* fmt,
+ UNumberFormatAttribute attr,
+ int32_t newValue);
+
+
+/**
+* Get a numeric attribute associated with a UNumberFormat.
+* An example of a numeric attribute is the number of integer digits a formatter will produce.
+* If the formatter does not understand the attribute, -1 is returned.
+* @param fmt The formatter to query.
+* @param attr The attribute to query; e.g. UNUM_ROUNDING_INCREMENT.
+* @return The value of attr.
+* @see unum_getAttribute
+* @see unum_setAttribute
+* @see unum_setDoubleAttribute
+* @see unum_getTextAttribute
+* @see unum_setTextAttribute
+* @stable ICU 2.0
+*/
+U_STABLE double U_EXPORT2
+unum_getDoubleAttribute(const UNumberFormat* fmt,
+ UNumberFormatAttribute attr);
+
+/**
+* Set a numeric attribute associated with a UNumberFormat.
+* An example of a numeric attribute is the number of integer digits a formatter will produce.
+* If the formatter does not understand the attribute, this call is ignored.
+* @param fmt The formatter to set.
+* @param attr The attribute to set; e.g. UNUM_ROUNDING_INCREMENT.
+* @param newValue The new value of attr.
+* @see unum_getAttribute
+* @see unum_setAttribute
+* @see unum_getDoubleAttribute
+* @see unum_getTextAttribute
+* @see unum_setTextAttribute
+* @stable ICU 2.0
+*/
+U_STABLE void U_EXPORT2
+unum_setDoubleAttribute( UNumberFormat* fmt,
+ UNumberFormatAttribute attr,
+ double newValue);
+
+/** The possible UNumberFormat text attributes @stable ICU 2.0*/
+typedef enum UNumberFormatTextAttribute {
+ /** Positive prefix */
+ UNUM_POSITIVE_PREFIX,
+ /** Positive suffix */
+ UNUM_POSITIVE_SUFFIX,
+ /** Negative prefix */
+ UNUM_NEGATIVE_PREFIX,
+ /** Negative suffix */
+ UNUM_NEGATIVE_SUFFIX,
+ /** The character used to pad to the format width. */
+ UNUM_PADDING_CHARACTER,
+ /** The ISO currency code */
+ UNUM_CURRENCY_CODE,
+ /**
+ * The default rule set. This is only available with rule-based formatters.
+ * @stable ICU 3.0
+ */
+ UNUM_DEFAULT_RULESET,
+ /**
+ * The public rule sets. This is only available with rule-based formatters.
+ * This is a read-only attribute. The public rulesets are returned as a
+ * single string, with each ruleset name delimited by ';' (semicolon).
+ * @stable ICU 3.0
+ */
+ UNUM_PUBLIC_RULESETS
+} UNumberFormatTextAttribute;
+
+/**
+* Get a text attribute associated with a UNumberFormat.
+* An example of a text attribute is the suffix for positive numbers. If the formatter
+* does not understand the attributre, U_UNSUPPORTED_ERROR is returned as the status.
+* Rule-based formatters only understand UNUM_DEFAULT_RULESET and UNUM_PUBLIC_RULESETS.
+* @param fmt The formatter to query.
+* @param tag The attribute to query; one of UNUM_POSITIVE_PREFIX, UNUM_POSITIVE_SUFFIX,
+* UNUM_NEGATIVE_PREFIX, UNUM_NEGATIVE_SUFFIX, UNUM_PADDING_CHARACTER, UNUM_CURRENCY_CODE,
+* UNUM_DEFAULT_RULESET, or UNUM_PUBLIC_RULESETS.
+* @param result A pointer to a buffer to receive the attribute.
+* @param resultLength The maximum size of result.
+* @param status A pointer to an UErrorCode to receive any errors
+* @return The total buffer size needed; if greater than resultLength, the output was truncated.
+* @see unum_setTextAttribute
+* @see unum_getAttribute
+* @see unum_setAttribute
+* @stable ICU 2.0
+*/
+U_STABLE int32_t U_EXPORT2
+unum_getTextAttribute( const UNumberFormat* fmt,
+ UNumberFormatTextAttribute tag,
+ UChar* result,
+ int32_t resultLength,
+ UErrorCode* status);
+
+/**
+* Set a text attribute associated with a UNumberFormat.
+* An example of a text attribute is the suffix for positive numbers. Rule-based formatters
+* only understand UNUM_DEFAULT_RULESET.
+* @param fmt The formatter to set.
+* @param tag The attribute to set; one of UNUM_POSITIVE_PREFIX, UNUM_POSITIVE_SUFFIX,
+* UNUM_NEGATIVE_PREFIX, UNUM_NEGATIVE_SUFFIX, UNUM_PADDING_CHARACTER, UNUM_CURRENCY_CODE,
+* or UNUM_DEFAULT_RULESET.
+* @param newValue The new value of attr.
+* @param newValueLength The length of newValue, or -1 if null-terminated.
+* @param status A pointer to an UErrorCode to receive any errors
+* @see unum_getTextAttribute
+* @see unum_getAttribute
+* @see unum_setAttribute
+* @stable ICU 2.0
+*/
+U_STABLE void U_EXPORT2
+unum_setTextAttribute( UNumberFormat* fmt,
+ UNumberFormatTextAttribute tag,
+ const UChar* newValue,
+ int32_t newValueLength,
+ UErrorCode *status);
+
+/**
+ * Extract the pattern from a UNumberFormat. The pattern will follow
+ * the DecimalFormat pattern syntax.
+ * @param fmt The formatter to query.
+ * @param isPatternLocalized TRUE if the pattern should be localized,
+ * FALSE otherwise. This is ignored if the formatter is a rule-based
+ * formatter.
+ * @param result A pointer to a buffer to receive the pattern.
+ * @param resultLength The maximum size of result.
+ * @param status A pointer to an input-output UErrorCode.
+ * @return The total buffer size needed; if greater than resultLength,
+ * the output was truncated.
+ * @see unum_applyPattern
+ * @see DecimalFormat
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+unum_toPattern( const UNumberFormat* fmt,
+ UBool isPatternLocalized,
+ UChar* result,
+ int32_t resultLength,
+ UErrorCode* status);
+
+
+/**
+ * Constants for specifying a number format symbol.
+ * @stable ICU 2.0
+ */
+typedef enum UNumberFormatSymbol {
+ /** The decimal separator */
+ UNUM_DECIMAL_SEPARATOR_SYMBOL = 0,
+ /** The grouping separator */
+ UNUM_GROUPING_SEPARATOR_SYMBOL = 1,
+ /** The pattern separator */
+ UNUM_PATTERN_SEPARATOR_SYMBOL = 2,
+ /** The percent sign */
+ UNUM_PERCENT_SYMBOL = 3,
+ /** Zero*/
+ UNUM_ZERO_DIGIT_SYMBOL = 4,
+ /** Character representing a digit in the pattern */
+ UNUM_DIGIT_SYMBOL = 5,
+ /** The minus sign */
+ UNUM_MINUS_SIGN_SYMBOL = 6,
+ /** The plus sign */
+ UNUM_PLUS_SIGN_SYMBOL = 7,
+ /** The currency symbol */
+ UNUM_CURRENCY_SYMBOL = 8,
+ /** The international currency symbol */
+ UNUM_INTL_CURRENCY_SYMBOL = 9,
+ /** The monetary separator */
+ UNUM_MONETARY_SEPARATOR_SYMBOL = 10,
+ /** The exponential symbol */
+ UNUM_EXPONENTIAL_SYMBOL = 11,
+ /** Per mill symbol */
+ UNUM_PERMILL_SYMBOL = 12,
+ /** Escape padding character */
+ UNUM_PAD_ESCAPE_SYMBOL = 13,
+ /** Infinity symbol */
+ UNUM_INFINITY_SYMBOL = 14,
+ /** Nan symbol */
+ UNUM_NAN_SYMBOL = 15,
+ /** Significant digit symbol
+ * @stable ICU 3.0 */
+ UNUM_SIGNIFICANT_DIGIT_SYMBOL = 16,
+ /** The monetary grouping separator
+ * @stable ICU 3.6
+ */
+ UNUM_MONETARY_GROUPING_SEPARATOR_SYMBOL = 17,
+ /** count symbol constants */
+ UNUM_FORMAT_SYMBOL_COUNT = 18
+} UNumberFormatSymbol;
+
+/**
+* Get a symbol associated with a UNumberFormat.
+* A UNumberFormat uses symbols to represent the special locale-dependent
+* characters in a number, for example the percent sign. This API is not
+* supported for rule-based formatters.
+* @param fmt The formatter to query.
+* @param symbol The UNumberFormatSymbol constant for the symbol to get
+* @param buffer The string buffer that will receive the symbol string;
+* if it is NULL, then only the length of the symbol is returned
+* @param size The size of the string buffer
+* @param status A pointer to an UErrorCode to receive any errors
+* @return The length of the symbol; the buffer is not modified if
+* <code>length>=size</code>
+* @see unum_setSymbol
+* @stable ICU 2.0
+*/
+U_STABLE int32_t U_EXPORT2
+unum_getSymbol(const UNumberFormat *fmt,
+ UNumberFormatSymbol symbol,
+ UChar *buffer,
+ int32_t size,
+ UErrorCode *status);
+
+/**
+* Set a symbol associated with a UNumberFormat.
+* A UNumberFormat uses symbols to represent the special locale-dependent
+* characters in a number, for example the percent sign. This API is not
+* supported for rule-based formatters.
+* @param fmt The formatter to set.
+* @param symbol The UNumberFormatSymbol constant for the symbol to set
+* @param value The string to set the symbol to
+* @param length The length of the string, or -1 for a zero-terminated string
+* @param status A pointer to an UErrorCode to receive any errors.
+* @see unum_getSymbol
+* @stable ICU 2.0
+*/
+U_STABLE void U_EXPORT2
+unum_setSymbol(UNumberFormat *fmt,
+ UNumberFormatSymbol symbol,
+ const UChar *value,
+ int32_t length,
+ UErrorCode *status);
+
+
+/**
+ * Get the locale for this number format object.
+ * You can choose between valid and actual locale.
+ * @param fmt The formatter to get the locale from
+ * @param type type of the locale we're looking for (valid or actual)
+ * @param status error code for the operation
+ * @return the locale name
+ * @stable ICU 2.8
+ */
+U_STABLE const char* U_EXPORT2
+unum_getLocaleByType(const UNumberFormat *fmt,
+ ULocDataLocaleType type,
+ UErrorCode* status);
+
+#endif /* #if !UCONFIG_NO_FORMATTING */
+
+#endif
http://git-wip-us.apache.org/repos/asf/couchdb/blob/79f67b6a/src/apps/couch_collate/platform/osx/icu/unicode/uobject.h
----------------------------------------------------------------------
diff --git a/src/apps/couch_collate/platform/osx/icu/unicode/uobject.h b/src/apps/couch_collate/platform/osx/icu/unicode/uobject.h
new file mode 100644
index 0000000..3d8b96e
--- /dev/null
+++ b/src/apps/couch_collate/platform/osx/icu/unicode/uobject.h
@@ -0,0 +1,308 @@
+/*
+******************************************************************************
+*
+* Copyright (C) 2002-2007, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+* file name: uobject.h
+* encoding: US-ASCII
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2002jun26
+* created by: Markus W. Scherer
+*/
+
+#ifndef __UOBJECT_H__
+#define __UOBJECT_H__
+
+#include "unicode/utypes.h"
+
+U_NAMESPACE_BEGIN
+
+/**
+ * \file
+ * \brief C++ API: Common ICU base class UObject.
+ */
+
+/** U_OVERRIDE_CXX_ALLOCATION - Define this to override operator new and
+ * delete in UMemory. Enabled by default for ICU.
+ *
+ * Enabling forces all allocation of ICU object types to use ICU's
+ * memory allocation. On Windows, this allows the ICU DLL to be used by
+ * applications that statically link the C Runtime library, meaning that
+ * the app and ICU will be using different heaps.
+ *
+ * @stable ICU 2.2
+ */
+#ifndef U_OVERRIDE_CXX_ALLOCATION
+#define U_OVERRIDE_CXX_ALLOCATION 1
+#endif
+
+/** U_HAVE_PLACEMENT_NEW - Define this to define the placement new and
+ * delete in UMemory for STL.
+ *
+ * @stable ICU 2.6
+ */
+#ifndef U_HAVE_PLACEMENT_NEW
+#define U_HAVE_PLACEMENT_NEW 1
+#endif
+
+
+/** U_HAVE_DEBUG_LOCATION_NEW - Define this to define the MFC debug
+ * version of the operator new.
+ *
+ * @stable ICU 3.4
+ */
+#ifndef U_HAVE_DEBUG_LOCATION_NEW
+#define U_HAVE_DEBUG_LOCATION_NEW 0
+#endif
+
+/**
+ * UMemory is the common ICU base class.
+ * All other ICU C++ classes are derived from UMemory (starting with ICU 2.4).
+ *
+ * This is primarily to make it possible and simple to override the
+ * C++ memory management by adding new/delete operators to this base class.
+ *
+ * To override ALL ICU memory management, including that from plain C code,
+ * replace the allocation functions declared in cmemory.h
+ *
+ * UMemory does not contain any virtual functions.
+ * Common "boilerplate" functions are defined in UObject.
+ *
+ * @stable ICU 2.4
+ */
+class U_COMMON_API UMemory {
+public:
+
+#if U_OVERRIDE_CXX_ALLOCATION
+ /**
+ * Override for ICU4C C++ memory management.
+ * simple, non-class types are allocated using the macros in common/cmemory.h
+ * (uprv_malloc(), uprv_free(), uprv_realloc());
+ * they or something else could be used here to implement C++ new/delete
+ * for ICU4C C++ classes
+ * @stable ICU 2.4
+ */
+ static void * U_EXPORT2 operator new(size_t size);
+
+ /**
+ * Override for ICU4C C++ memory management.
+ * See new().
+ * @stable ICU 2.4
+ */
+ static void * U_EXPORT2 operator new[](size_t size);
+
+ /**
+ * Override for ICU4C C++ memory management.
+ * simple, non-class types are allocated using the macros in common/cmemory.h
+ * (uprv_malloc(), uprv_free(), uprv_realloc());
+ * they or something else could be used here to implement C++ new/delete
+ * for ICU4C C++ classes
+ * @stable ICU 2.4
+ */
+ static void U_EXPORT2 operator delete(void *p);
+
+ /**
+ * Override for ICU4C C++ memory management.
+ * See delete().
+ * @stable ICU 2.4
+ */
+ static void U_EXPORT2 operator delete[](void *p);
+
+#if U_HAVE_PLACEMENT_NEW
+ /**
+ * Override for ICU4C C++ memory management for STL.
+ * See new().
+ * @stable ICU 2.6
+ */
+ static inline void * U_EXPORT2 operator new(size_t, void *ptr) { return ptr; }
+
+ /**
+ * Override for ICU4C C++ memory management for STL.
+ * See delete().
+ * @stable ICU 2.6
+ */
+ static inline void U_EXPORT2 operator delete(void *, void *) {}
+#endif /* U_HAVE_PLACEMENT_NEW */
+#if U_HAVE_DEBUG_LOCATION_NEW
+ /**
+ * This method overrides the MFC debug version of the operator new
+ *
+ * @param size The requested memory size
+ * @param file The file where the allocation was requested
+ * @param line The line where the allocation was requested
+ */
+ static void * U_EXPORT2 operator new(size_t size, const char* file, int line);
+ /**
+ * This method provides a matching delete for the MFC debug new
+ *
+ * @param p The pointer to the allocated memory
+ * @param file The file where the allocation was requested
+ * @param line The line where the allocation was requested
+ */
+ static void U_EXPORT2 operator delete(void* p, const char* file, int line);
+#endif /* U_HAVE_DEBUG_LOCATION_NEW */
+#endif /* U_OVERRIDE_CXX_ALLOCATION */
+
+ /*
+ * Assignment operator not declared. The compiler will provide one
+ * which does nothing since this class does not contain any data members.
+ * API/code coverage may show the assignment operator as present and
+ * untested - ignore.
+ * Subclasses need this assignment operator if they use compiler-provided
+ * assignment operators of their own. An alternative to not declaring one
+ * here would be to declare and empty-implement a protected or public one.
+ UMemory &UMemory::operator=(const UMemory &);
+ */
+};
+
+/**
+ * UObject is the common ICU "boilerplate" class.
+ * UObject inherits UMemory (starting with ICU 2.4),
+ * and all other public ICU C++ classes
+ * are derived from UObject (starting with ICU 2.2).
+ *
+ * UObject contains common virtual functions like for ICU's "poor man's RTTI".
+ * It does not contain default implementations of virtual methods
+ * like getDynamicClassID to allow derived classes such as Format
+ * to declare these as pure virtual.
+ *
+ * The clone() function is not available in UObject because it is not
+ * implemented by all ICU classes.
+ * Many ICU services provide a clone() function for their class trees,
+ * defined on the service's C++ base class, and all subclasses within that
+ * service class tree return a pointer to the service base class
+ * (which itself is a subclass of UObject).
+ * This is because some compilers do not support covariant (same-as-this)
+ * return types; cast to the appropriate subclass if necessary.
+ *
+ * @stable ICU 2.2
+ */
+class U_COMMON_API UObject : public UMemory {
+public:
+ /**
+ * Destructor.
+ *
+ * @stable ICU 2.2
+ */
+ virtual ~UObject();
+
+ /**
+ * ICU4C "poor man's RTTI", returns a UClassID for the actual ICU class.
+ *
+ * @stable ICU 2.2
+ */
+ virtual UClassID getDynamicClassID() const = 0;
+
+protected:
+ // the following functions are protected to prevent instantiation and
+ // direct use of UObject itself
+
+ // default constructor
+ // commented out because UObject is abstract (see getDynamicClassID)
+ // inline UObject() {}
+
+ // copy constructor
+ // commented out because UObject is abstract (see getDynamicClassID)
+ // inline UObject(const UObject &other) {}
+
+#if 0
+ // TODO Sometime in the future. Implement operator==().
+ // (This comment inserted in 2.2)
+ // some or all of the following "boilerplate" functions may be made public
+ // in a future ICU4C release when all subclasses implement them
+
+ // assignment operator
+ // (not virtual, see "Taligent's Guide to Designing Programs" pp.73..74)
+ // commented out because the implementation is the same as a compiler's default
+ // UObject &operator=(const UObject &other) { return *this; }
+
+ // comparison operators
+ virtual inline UBool operator==(const UObject &other) const { return this==&other; }
+ inline UBool operator!=(const UObject &other) const { return !operator==(other); }
+
+ // clone() commented out from the base class:
+ // some compilers do not support co-variant return types
+ // (i.e., subclasses would have to return UObject * as well, instead of SubClass *)
+ // see also UObject class documentation.
+ // virtual UObject *clone() const;
+#endif
+
+ /*
+ * Assignment operator not declared. The compiler will provide one
+ * which does nothing since this class does not contain any data members.
+ * API/code coverage may show the assignment operator as present and
+ * untested - ignore.
+ * Subclasses need this assignment operator if they use compiler-provided
+ * assignment operators of their own. An alternative to not declaring one
+ * here would be to declare and empty-implement a protected or public one.
+ UObject &UObject::operator=(const UObject &);
+ */
+
+// Future implementation for RTTI that support subtyping. [alan]
+//
+// public:
+// /**
+// * @internal
+// */
+// static UClassID getStaticClassID();
+//
+// /**
+// * @internal
+// */
+// UBool instanceOf(UClassID type) const;
+};
+
+/**
+ * This is a simple macro to add ICU RTTI to an ICU object implementation.
+ * This does not go into the header. This should only be used in *.cpp files.
+ *
+ * @param myClass The name of the class that needs RTTI defined.
+ * @internal
+ */
+#define UOBJECT_DEFINE_RTTI_IMPLEMENTATION(myClass) \
+ UClassID U_EXPORT2 myClass::getStaticClassID() { \
+ static char classID = 0; \
+ return (UClassID)&classID; \
+ } \
+ UClassID myClass::getDynamicClassID() const \
+ { return myClass::getStaticClassID(); }
+
+
+/**
+ * This macro adds ICU RTTI to an ICU abstract class implementation.
+ * This macro should be invoked in *.cpp files. The corresponding
+ * header should declare getStaticClassID.
+ *
+ * @param myClass The name of the class that needs RTTI defined.
+ * @internal
+ */
+#define UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(myClass) \
+ UClassID U_EXPORT2 myClass::getStaticClassID() { \
+ static char classID = 0; \
+ return (UClassID)&classID; \
+ }
+
+// /**
+// * This macro adds ICU RTTI to an ICU concrete class implementation.
+// * This macro should be invoked in *.cpp files. The corresponding
+// * header should declare getDynamicClassID and getStaticClassID.
+// *
+// * @param myClass The name of the class that needs RTTI defined.
+// * @param myParent The name of the myClass's parent.
+// * @internal
+// */
+/*#define UOBJECT_DEFINE_RTTI_IMPLEMENTATION(myClass, myParent) \
+ UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(myClass, myParent) \
+ UClassID myClass::getDynamicClassID() const { \
+ return myClass::getStaticClassID(); \
+ }
+*/
+
+
+U_NAMESPACE_END
+
+#endif
http://git-wip-us.apache.org/repos/asf/couchdb/blob/79f67b6a/src/apps/couch_collate/platform/osx/icu/unicode/uobslete.h
----------------------------------------------------------------------
diff --git a/src/apps/couch_collate/platform/osx/icu/unicode/uobslete.h b/src/apps/couch_collate/platform/osx/icu/unicode/uobslete.h
new file mode 100644
index 0000000..91be243
--- /dev/null
+++ b/src/apps/couch_collate/platform/osx/icu/unicode/uobslete.h
@@ -0,0 +1,32 @@
+/*
+*******************************************************************************
+* Copyright (C) 2004-2008, International Business Machines
+* Corporation and others. All Rights Reserved.
+*******************************************************************************
+*
+* file name:
+* encoding: US-ASCII
+* tab size: 8 (not used)
+* indentation:4
+*
+* Created by: genheaders.pl, a perl script written by Ram Viswanadha
+*
+* Contains data for commenting out APIs.
+* Gets included by umachine.h
+*
+* THIS FILE IS MACHINE-GENERATED, DON'T PLAY WITH IT IF YOU DON'T KNOW WHAT
+* YOU ARE DOING, OTHERWISE VERY BAD THINGS WILL HAPPEN!
+*/
+
+#ifndef UOBSLETE_H
+#define UOBSLETE_H
+
+#ifdef U_HIDE_OBSOLETE_API
+
+# if U_DISABLE_RENAMING
+# else
+# endif /* U_DISABLE_RENAMING */
+
+#endif /* U_HIDE_OBSOLETE_API */
+#endif /* UOBSLETE_H */
+
http://git-wip-us.apache.org/repos/asf/couchdb/blob/79f67b6a/src/apps/couch_collate/platform/osx/icu/unicode/urbtok.h
----------------------------------------------------------------------
diff --git a/src/apps/couch_collate/platform/osx/icu/unicode/urbtok.h b/src/apps/couch_collate/platform/osx/icu/unicode/urbtok.h
new file mode 100644
index 0000000..936ddde
--- /dev/null
+++ b/src/apps/couch_collate/platform/osx/icu/unicode/urbtok.h
@@ -0,0 +1,126 @@
+/*
+******************************************************************************
+* Copyright (C) 2006-2008 Apple Inc. All Rights Reserved.
+******************************************************************************
+*/
+
+#ifndef URBTOK_H
+#define URBTOK_H
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_BREAK_ITERATION
+
+#include "unicode/utext.h"
+#include "unicode/ubrk.h"
+#include "unicode/parseerr.h"
+
+
+typedef struct RuleBasedTokenRange {
+ signed long location;
+ signed long length;
+} RuleBasedTokenRange;
+
+/**
+ * Open a new UBreakIterator for tokenizing text using specified breaking rules.
+ * The rule syntax is ... (TBD)
+ * @param rules A set of rules specifying the text breaking conventions.
+ * @param rulesLength The number of characters in rules, or -1 if null-terminated.
+ * @param parseErr Receives position and context information for any syntax errors
+ * detected while parsing the rules.
+ * @param status A UErrorCode to receive any errors.
+ * @return A UBreakIterator for the specified rules.
+ * @see ubrk_open
+ * @internal
+ */
+U_INTERNAL UBreakIterator* U_EXPORT2
+urbtok_openRules(const UChar *rules,
+ int32_t rulesLength,
+ UParseError *parseErr,
+ UErrorCode *status);
+
+/**
+ * Open a new UBreakIterator for tokenizing text using specified breaking rules.
+ * @param rules A set of rules specifying the text breaking conventions. The binary rules
+ * must be at least 32-bit aligned. Note: This version makes a copy of the
+ * rules, so after calling this function the caller can close or release
+ * the rules that were passed to this function. The copy created by this
+ * call will be freed when ubrk_close() is called on the UBreakIterator*.
+ * @param status A UErrorCode to receive any errors.
+ * @return A UBreakIterator for the specified rules.
+ * @see ubrk_open
+ * @internal
+ */
+U_INTERNAL UBreakIterator* U_EXPORT2
+urbtok_openBinaryRules(const uint8_t *rules,
+ UErrorCode *status);
+
+/**
+ * Open a new UBreakIterator for tokenizing text using specified breaking rules.
+ * @param rules A set of rules specifying the text breaking conventions. The binary rules
+ * must be at least 32-bit aligned. Note: This version does NOT make a copy
+ * of the rules, so after calling this function the caller must not close or
+ * release the rules passed to this function until after they are finished
+ * with this UBreakIterator* (and any others created using the same rules)
+ * and have called ubrk_close() to close the UBreakIterator* (and any others
+ * using the same rules).
+ * @param status A UErrorCode to receive any errors.
+ * @return A UBreakIterator for the specified rules.
+ * @see ubrk_open
+ * @internal
+ */
+U_INTERNAL UBreakIterator* U_EXPORT2
+urbtok_openBinaryRulesNoCopy(const uint8_t *rules,
+ UErrorCode *status);
+
+/**
+ * Get the (native-endian) binary break rules for this tokenizer.
+ * @param bi The tokenizer to use.
+ * @param buffer The output buffer for the rules. You can pass 0 to get the required size.
+ * @param buffSize The size of the output buffer.
+ * @param status A UErrorCode to receive any errors.
+ * @return The actual size of the binary rules, whether they fit the buffer or not.
+ * @internal
+ */
+U_INTERNAL uint32_t U_EXPORT2
+urbtok_getBinaryRules(UBreakIterator *bi,
+ uint8_t *buffer,
+ uint32_t buffSize,
+ UErrorCode *status);
+
+/**
+ * Tokenize text using a rule-based tokenizer.
+ * @param bi The tokenizer to use.
+ * @param maxTokens The maximum number of tokens to return.
+ * @param outTokens An array of RuleBasedTokenRange to fill in with the tokens.
+ * @param outTokenFlags An (optional) array of uint32_t to fill in with token flags.
+ * @return The number of tokens returned, 0 if done.
+ * @internal
+ */
+U_INTERNAL int32_t U_EXPORT2
+urbtok_tokenize(UBreakIterator *bi,
+ int32_t maxTokens,
+ RuleBasedTokenRange *outTokens,
+ unsigned long *outTokenFlags);
+
+/**
+ * Swap the endianness of a set of binary break rules.
+ * @param rules A set of rules which need swapping.
+ * @param buffer The output buffer for the swapped rules, which must be the same
+ * size as the input rules buffer.
+ * @param inIsBigEndian UBool indicating whether the input is big-endian
+ * @param outIsBigEndian UBool indicating whether the output should be big-endian
+ * @param status A UErrorCode to receive any errors.
+ * @internal
+ */
+U_INTERNAL void U_EXPORT2
+urbtok_swapBinaryRules(const uint8_t *rules,
+ uint8_t *buffer,
+ UBool inIsBigEndian,
+ UBool outIsBigEndian,
+ UErrorCode *status);
+
+
+#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
+
+#endif