You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by ea...@apache.org on 2007/02/03 17:58:56 UTC
svn commit: r503251 [12/13] -
/incubator/uima/uimacpp/trunk/src/framework/uima/
Added: incubator/uima/uimacpp/trunk/src/framework/uima/unistrref.hpp
URL: http://svn.apache.org/viewvc/incubator/uima/uimacpp/trunk/src/framework/uima/unistrref.hpp?view=auto&rev=503251
==============================================================================
--- incubator/uima/uimacpp/trunk/src/framework/uima/unistrref.hpp (added)
+++ incubator/uima/uimacpp/trunk/src/framework/uima/unistrref.hpp Sat Feb 3 08:58:54 2007
@@ -0,0 +1,2279 @@
+#ifndef UIMA_UNICODESTRINGREF_HPP
+#define UIMA_UNICODESTRINGREF_HPP
+/** \file unistrref.hpp .
+-----------------------------------------------------------------------------
+
+
+
+ string interface of uima::UnicodeStringRef
+
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+
+-----------------------------------------------------------------------------
+
+
+ \brief Shallow string object consisting of a pair of string pointer and a length
+
+-----------------------------------------------------------------------------
+*/
+
+#include "uima/pragmas.hpp" //must be included first to disable warnings
+
+#include <vector>
+#include <string>
+#include <iostream>
+
+#include "uima/types.h"
+#include "uima/assertmsg.h"
+#include "uima/ccsid.hpp"
+#include "unicode/unistr.h"
+#include "unicode/ustring.h"
+#include "unicode/uchar.h"
+#include "uima/strtools.hpp"
+
+/* ----------------------------------------------------------------------- */
+/* Interface dependencies */
+/* ----------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------- */
+/* Types / Classes */
+/* ----------------------------------------------------------------------- */
+
+namespace uima {
+
+ /**
+ * The class <TT>UnicodeStringRef</TT> provides support for non zero-terminated
+ * strings that are presented as pointers to Unicode character arrays
+ * with an associated length.
+ * As this type of string is supposed to be used only as string reference into
+ * read-only buffers, the string pointer is constant.
+ * The member functions are named to implement the icu::UnicodeString interface
+ * but only providing const member functions
+ * This class is a quick ,light-weight, shallow string
+ * (internally it consists only of a pointer and a length)
+ * which can be copied by value without performance penalty.
+ * It allows references into other string buffers to be treated like real
+ * string objects.
+ * Since it does not own it's string memory care must be taken to make sure
+ * the lifetime of an UnicodeStringRef object does not exceed the lifetime
+ * of the Unicode character buffer it references.
+ */
+ class UIMA_LINK_IMPORTSPEC UnicodeStringRef {
+ public:
+ /**
+ * Default Constructor
+ */
+ UnicodeStringRef( void );
+
+ /**
+ * Constructor from icu::UnicodeString
+ */
+ UnicodeStringRef( const icu::UnicodeString & crUniString );
+
+ /**
+ * Constructor from zero terminated string
+ */
+ explicit UnicodeStringRef( UChar const * cpacString );
+
+ /**
+ * Constructor from string and length
+ */
+ UnicodeStringRef( UChar const * cpacString, int32_t uiLength );
+
+ /**
+ * Constructor from a two pointers (begin/end).
+ * Note: end points to the first char <em>behind</em> the string.
+ * @deprecated Replace with UnicodeStringRef(paucStringBegin,paucStringEnd-paucStringBegin).
+ */
+ UnicodeStringRef( UChar const * paucStringBegin, UChar const * paucStringEnd );
+
+ ///Accessor for the number of bytes occupied by this string
+ int32_t getSizeInBytes( void ) const;
+
+ ///CONST Accessor for the string content (NOT ZERO DELIMITED!).
+ UChar const * getBuffer( void ) const;
+
+ ///Assignment operator
+ UnicodeStringRef & operator=( UnicodeStringRef const & crclRHS );
+
+ ///Equality operator
+ int operator==( const UnicodeStringRef & crclRHS ) const;
+ ///Inequality operator
+ int operator!=( const UnicodeStringRef & crclRHS ) const;
+ ///less operator
+ bool operator< ( UnicodeStringRef const & text ) const;
+ ///less equal operator
+ bool operator<=( UnicodeStringRef const & text ) const;
+ ///greater operator
+ bool operator> ( UnicodeStringRef const & text ) const;
+ ///greater equal operator
+ bool operator>=( UnicodeStringRef const & text ) const;
+
+ /**
+ * Compare the characters bitwise in this UnicodeStringRef to
+ * the characters in <TT>text</TT>.
+ * @param text The UnicodeStringRef to compare to this one.
+ * @return The result of bitwise character comparison: 0 if <TT>text</TT>
+ * contains the same characters as this, -1 if the characters in
+ * <TT>text</TT> are bitwise less than the characters in this, +1 if the
+ * characters in <TT>text</TT> are bitwise greater than the characters
+ * in this.
+ * @stable
+ */
+ inline int8_t compare(const UnicodeStringRef& text) const;
+
+ /**
+ * Compare the characters bitwise in this UnicodeStringRef to
+ * the characters in <TT>text</TT>.
+ * @param text The UnicodeString to compare to this one.
+ * @return The result of bitwise character comparison: 0 if <TT>text</TT>
+ * contains the same characters as this, -1 if the characters in
+ * <TT>text</TT> are bitwise less than the characters in this, +1 if the
+ * characters in <TT>text</TT> are bitwise greater than the characters
+ * in this.
+ * @stable
+ */
+ inline int8_t compare(const icu::UnicodeString& text) const;
+
+ /**
+ * Compare the characters bitwise in the range
+ * [<TT>start</TT>, <TT>start + length</TT>) with the characters
+ * in <TT>srcText</TT>
+ * @param start the offset at which the compare operation begins
+ * @param length the number of characters of text to compare.
+ * @param srcText the text to be compared
+ * @return The result of bitwise character comparison: 0 if <TT>text</TT>
+ * contains the same characters as this, -1 if the characters in
+ * <TT>text</TT> are bitwise less than the characters in this, +1 if the
+ * characters in <TT>text</TT> are bitwise greater than the characters
+ * in this.
+ * @stable
+ */
+ inline int8_t compare(int32_t start,
+ int32_t length,
+ const UnicodeStringRef& srcText) const;
+
+ /**
+ * Compare the characters bitwise in the range
+ * [<TT>start</TT>, <TT>start + length</TT>) with the characters
+ * in <TT>srcText</TT> in the range
+ * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
+ * @param start the offset at which the compare operation begins
+ * @param length the number of characters in this to compare.
+ * @param srcText the text to be compared
+ * @param srcStart the offset into <TT>srcText</TT> to start comparison
+ * @param srcLength the number of characters in <TT>src</TT> to compare
+ * @return The result of bitwise character comparison: 0 if <TT>text</TT>
+ * contains the same characters as this, -1 if the characters in
+ * <TT>text</TT> are bitwise less than the characters in this, +1 if the
+ * characters in <TT>text</TT> are bitwise greater than the characters
+ * in this.
+ * @stable
+ */
+ inline int8_t compare(int32_t start,
+ int32_t length,
+ const UnicodeStringRef& srcText,
+ int32_t srcStart,
+ int32_t srcLength) const;
+
+ /**
+ * Compare the characters bitwise in this UnicodeStringRef with the first
+ * <TT>srcLength</TT> characters in <TT>srcChars</TT>.
+ * @param srcChars The characters to compare to this UnicodeStringRef.
+ * @param srcLength the number of characters in <TT>srcChars</TT> to compare
+ * @return The result of bitwise character comparison: 0 if <TT>text</TT>
+ * contains the same characters as this, -1 if the characters in
+ * <TT>text</TT> are bitwise less than the characters in this, +1 if the
+ * characters in <TT>text</TT> are bitwise greater than the characters
+ * in this.
+ * @stable
+ */
+ inline int8_t compare(UChar const *srcChars,
+ int32_t srcLength) const;
+
+ /**
+ * Compare the characters bitwise in the range
+ * [<TT>start</TT>, <TT>start + length</TT>) with the first
+ * <TT>length</TT> characters in <TT>srcChars</TT>
+ * @param start the offset at which the compare operation begins
+ * @param length the number of characters to compare.
+ * @param srcChars the characters to be compared
+ * @return The result of bitwise character comparison: 0 if <TT>text</TT>
+ * contains the same characters as this, -1 if the characters in
+ * <TT>text</TT> are bitwise less than the characters in this, +1 if the
+ * characters in <TT>text</TT> are bitwise greater than the characters
+ * in this.
+ * @stable
+ */
+ inline int8_t compare(int32_t start,
+ int32_t length,
+ UChar const *srcChars) const;
+
+ /**
+ * Compare the characters bitwise in the range
+ * [<TT>start</TT>, <TT>start + length</TT>) with the characters
+ * in <TT>srcChars</TT> in the range
+ * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
+ * @param start the offset at which the compare operation begins
+ * @param length the number of characters in this to compare
+ * @param srcChars the characters to be compared
+ * @param srcStart the offset into <TT>srcChars</TT> to start comparison
+ * @param srcLength the number of characters in <TT>srcChars</TT> to compare
+ * @return The result of bitwise character comparison: 0 if <TT>text</TT>
+ * contains the same characters as this, -1 if the characters in
+ * <TT>text</TT> are bitwise less than the characters in this, +1 if the
+ * characters in <TT>text</TT> are bitwise greater than the characters
+ * in this.
+ * @stable
+ */
+ inline int8_t compare(int32_t start,
+ int32_t length,
+ UChar const *srcChars,
+ int32_t srcStart,
+ int32_t srcLength) const;
+
+ /**
+ * Compare the characters bitwise in the range
+ * [<TT>start</TT>, <TT>limit</TT>) with the characters
+ * in <TT>srcText</TT> in the range
+ * [<TT>srcStart</TT>, <TT>srcLimit</TT>).
+ * @param start the offset at which the compare operation begins
+ * @param limit the offset immediately following the compare operation
+ * @param srcText the text to be compared
+ * @param srcStart the offset into <TT>srcText</TT> to start comparison
+ * @param srcLimit the offset into <TT>srcText</TT> to limit comparison
+ * @return The result of bitwise character comparison: 0 if <TT>text</TT>
+ * contains the same characters as this, -1 if the characters in
+ * <TT>text</TT> are bitwise less than the characters in this, +1 if the
+ * characters in <TT>text</TT> are bitwise greater than the characters
+ * in this.
+ * @stable
+ */
+ inline int8_t compareBetween(int32_t start,
+ int32_t limit,
+ const UnicodeStringRef& srcText,
+ int32_t srcStart,
+ int32_t srcLimit) const;
+
+ /**
+ * Compare two Unicode strings in code point order.
+ * This is different in UTF-16 from how compare(), operator==, startsWith() etc. work
+ * if supplementary characters are present:
+ *
+ * In UTF-16, supplementary characters (with code points U+10000 and above) are
+ * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
+ * which means that they compare as less than some other BMP characters like U+feff.
+ * This function compares Unicode strings in code point order.
+ * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
+ *
+ * @param text Another string to compare this one to.
+ * @return a negative/zero/positive integer corresponding to whether
+ * this string is less than/equal to/greater than the second one
+ * in code point order
+ */
+ inline int8_t compareCodePointOrder(const UnicodeStringRef& text) const;
+
+ /**
+ * Compare two Unicode strings in code point order.
+ * This is different in UTF-16 from how compare(), operator==, startsWith() etc. work
+ * if supplementary characters are present:
+ *
+ * In UTF-16, supplementary characters (with code points U+10000 and above) are
+ * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
+ * which means that they compare as less than some other BMP characters like U+feff.
+ * This function compares Unicode strings in code point order.
+ * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
+ *
+ * @param start The start offset in this string at which the compare operation begins.
+ * @param length The number of code units from this string to compare.
+ * @param srcText Another string to compare this one to.
+ * @return a negative/zero/positive integer corresponding to whether
+ * this string is less than/equal to/greater than the second one
+ * in code point order
+ */
+ inline int8_t compareCodePointOrder(int32_t start,
+ int32_t length,
+ const UnicodeStringRef& srcText) const;
+
+ /**
+ * Compare two Unicode strings in code point order.
+ * This is different in UTF-16 from how compare(), operator==, startsWith() etc. work
+ * if supplementary characters are present:
+ *
+ * In UTF-16, supplementary characters (with code points U+10000 and above) are
+ * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
+ * which means that they compare as less than some other BMP characters like U+feff.
+ * This function compares Unicode strings in code point order.
+ * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
+ *
+ * @param start The start offset in this string at which the compare operation begins.
+ * @param length The number of code units from this string to compare.
+ * @param srcText Another string to compare this one to.
+ * @param srcStart The start offset in that string at which the compare operation begins.
+ * @param srcLength The number of code units from that string to compare.
+ * @return a negative/zero/positive integer corresponding to whether
+ * this string is less than/equal to/greater than the second one
+ * in code point order
+ */
+ inline int8_t compareCodePointOrder(int32_t start,
+ int32_t length,
+ const UnicodeStringRef& srcText,
+ int32_t srcStart,
+ int32_t srcLength) const;
+
+ /**
+ * Compare two Unicode strings in code point order.
+ * This is different in UTF-16 from how compare(), operator==, startsWith() etc. work
+ * if supplementary characters are present:
+ *
+ * In UTF-16, supplementary characters (with code points U+10000 and above) are
+ * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
+ * which means that they compare as less than some other BMP characters like U+feff.
+ * This function compares Unicode strings in code point order.
+ * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
+ *
+ * @param srcChars A pointer to another string to compare this one to.
+ * @param srcLength The number of code units from that string to compare.
+ * @return a negative/zero/positive integer corresponding to whether
+ * this string is less than/equal to/greater than the second one
+ * in code point order
+ */
+ inline int8_t compareCodePointOrder(UChar const *srcChars,
+ int32_t srcLength) const;
+
+ /**
+ * Compare two Unicode strings in code point order.
+ * This is different in UTF-16 from how compare(), operator==, startsWith() etc. work
+ * if supplementary characters are present:
+ *
+ * In UTF-16, supplementary characters (with code points U+10000 and above) are
+ * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
+ * which means that they compare as less than some other BMP characters like U+feff.
+ * This function compares Unicode strings in code point order.
+ * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
+ *
+ * @param start The start offset in this string at which the compare operation begins.
+ * @param length The number of code units from this string to compare.
+ * @param srcChars A pointer to another string to compare this one to.
+ * @return a negative/zero/positive integer corresponding to whether
+ * this string is less than/equal to/greater than the second one
+ * in code point order
+ */
+ inline int8_t compareCodePointOrder(int32_t start,
+ int32_t length,
+ UChar const *srcChars) const;
+
+ /**
+ * Compare two Unicode strings in code point order.
+ * This is different in UTF-16 from how compare(), operator==, startsWith() etc. work
+ * if supplementary characters are present:
+ *
+ * In UTF-16, supplementary characters (with code points U+10000 and above) are
+ * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
+ * which means that they compare as less than some other BMP characters like U+feff.
+ * This function compares Unicode strings in code point order.
+ * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
+ *
+ * @param start The start offset in this string at which the compare operation begins.
+ * @param length The number of code units from this string to compare.
+ * @param srcChars A pointer to another string to compare this one to.
+ * @param srcStart The start offset in that string at which the compare operation begins.
+ * @param srcLength The number of code units from that string to compare.
+ * @return a negative/zero/positive integer corresponding to whether
+ * this string is less than/equal to/greater than the second one
+ * in code point order
+ */
+ inline int8_t compareCodePointOrder(int32_t start,
+ int32_t length,
+ UChar const *srcChars,
+ int32_t srcStart,
+ int32_t srcLength) const;
+
+ /**
+ * Compare two Unicode strings in code point order.
+ * This is different in UTF-16 from how compare(), operator==, startsWith() etc. work
+ * if supplementary characters are present:
+ *
+ * In UTF-16, supplementary characters (with code points U+10000 and above) are
+ * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
+ * which means that they compare as less than some other BMP characters like U+feff.
+ * This function compares Unicode strings in code point order.
+ * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
+ *
+ * @param start The start offset in this string at which the compare operation begins.
+ * @param limit The offset after the last code unit from this string to compare.
+ * @param srcText Another string to compare this one to.
+ * @param srcStart The start offset in that string at which the compare operation begins.
+ * @param srcLimit The offset after the last code unit from that string to compare.
+ * @return a negative/zero/positive integer corresponding to whether
+ * this string is less than/equal to/greater than the second one
+ * in code point order
+ */
+ inline int8_t compareCodePointOrderBetween(int32_t start,
+ int32_t limit,
+ const UnicodeStringRef& srcText,
+ int32_t srcStart,
+ int32_t srcLimit) const;
+
+ /**
+ * Compare two strings case-insensitively using full case folding.
+ * This is equivalent to this->foldCase(options).compare(text.foldCase(options)).
+ *
+ * @param text Another string to compare this one to.
+ * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ * @return A negative, zero, or positive integer indicating the comparison result.
+ */
+ inline int8_t caseCompare(const UnicodeStringRef& text, uint32_t options) const;
+
+ /**
+ * Compare two strings case-insensitively using full case folding.
+ * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).
+ *
+ * @param start The start offset in this string at which the compare operation begins.
+ * @param length The number of code units from this string to compare.
+ * @param srcText Another string to compare this one to.
+ * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ * @return A negative, zero, or positive integer indicating the comparison result.
+ */
+ inline int8_t caseCompare(int32_t start,
+ int32_t length,
+ const UnicodeStringRef& srcText,
+ uint32_t options) const;
+
+ /**
+ * Compare two strings case-insensitively using full case folding.
+ * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).
+ *
+ * @param start The start offset in this string at which the compare operation begins.
+ * @param length The number of code units from this string to compare.
+ * @param srcText Another string to compare this one to.
+ * @param srcStart The start offset in that string at which the compare operation begins.
+ * @param srcLength The number of code units from that string to compare.
+ * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ * @return A negative, zero, or positive integer indicating the comparison result.
+ */
+ inline int8_t caseCompare(int32_t start,
+ int32_t length,
+ const UnicodeStringRef& srcText,
+ int32_t srcStart,
+ int32_t srcLength,
+ uint32_t options) const;
+
+ /**
+ * Compare two strings case-insensitively using full case folding.
+ * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
+ *
+ * @param srcChars A pointer to another string to compare this one to.
+ * @param srcLength The number of code units from that string to compare.
+ * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ * @return A negative, zero, or positive integer indicating the comparison result.
+ */
+ inline int8_t caseCompare(UChar const *srcChars,
+ int32_t srcLength,
+ uint32_t options) const;
+
+ /**
+ * Compare two strings case-insensitively using full case folding.
+ * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
+ *
+ * @param start The start offset in this string at which the compare operation begins.
+ * @param length The number of code units from this string to compare.
+ * @param srcChars A pointer to another string to compare this one to.
+ * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ * @return A negative, zero, or positive integer indicating the comparison result.
+ */
+ inline int8_t caseCompare(int32_t start,
+ int32_t length,
+ UChar const *srcChars,
+ uint32_t options) const;
+
+ /**
+ * Compare two strings case-insensitively using full case folding.
+ * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
+ *
+ * @param start The start offset in this string at which the compare operation begins.
+ * @param length The number of code units from this string to compare.
+ * @param srcChars A pointer to another string to compare this one to.
+ * @param srcStart The start offset in that string at which the compare operation begins.
+ * @param srcLength The number of code units from that string to compare.
+ * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ * @return A negative, zero, or positive integer indicating the comparison result.
+ */
+ inline int8_t caseCompare(int32_t start,
+ int32_t length,
+ UChar const *srcChars,
+ int32_t srcStart,
+ int32_t srcLength,
+ uint32_t options) const;
+
+ /**
+ * Compare two strings case-insensitively using full case folding.
+ * This is equivalent to this->foldCase(options).compareBetween(text.foldCase(options)).
+ *
+ * @param start The start offset in this string at which the compare operation begins.
+ * @param limit The offset after the last code unit from this string to compare.
+ * @param srcText Another string to compare this one to.
+ * @param srcStart The start offset in that string at which the compare operation begins.
+ * @param srcLimit The offset after the last code unit from that string to compare.
+ * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ * @return A negative, zero, or positive integer indicating the comparison result.
+ */
+ inline int8_t caseCompareBetween(int32_t start,
+ int32_t limit,
+ const UnicodeStringRef& srcText,
+ int32_t srcStart,
+ int32_t srcLimit,
+ uint32_t options) const;
+
+ /**
+ * Determine if this starts with the characters in <TT>text</TT>
+ * @param text The text to match.
+ * @return TRUE if this starts with the characters in <TT>text</TT>,
+ * FALSE otherwise
+ * @stable
+ */
+ inline bool startsWith(const UnicodeStringRef& text) const;
+
+ /**
+ * Determine if this starts with the characters in <TT>srcText</TT>
+ * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
+ * @param srcText The text to match.
+ * @param srcStart the offset into <TT>srcText</TT> to start matching
+ * @param srcLength the number of characters in <TT>srcText</TT> to match
+ * @return TRUE if this starts with the characters in <TT>text</TT>,
+ * FALSE otherwise
+ * @stable
+ */
+ inline bool startsWith(const UnicodeStringRef& srcText,
+ int32_t srcStart,
+ int32_t srcLength) const;
+
+ /**
+ * Determine if this starts with the characters in <TT>srcChars</TT>
+ * @param srcChars The characters to match.
+ * @param srcLength the number of characters in <TT>srcChars</TT>
+ * @return TRUE if this starts with the characters in <TT>srcChars</TT>,
+ * FALSE otherwise
+ * @stable
+ */
+ inline bool startsWith(UChar const *srcChars,
+ int32_t srcLength) const;
+
+ /**
+ * Determine if this starts with the characters in <TT>srcChars</TT>
+ * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
+ * @param srcChars The characters to match.
+ * @param srcStart the offset into <TT>srcText</TT> to start matching
+ * @param srcLength the number of characters in <TT>srcChars</TT> to match
+ * @return TRUE if this starts with the characters in <TT>srcChars</TT>,
+ * FALSE otherwise
+ * @stable
+ */
+ inline bool startsWith(UChar const *srcChars,
+ int32_t srcStart,
+ int32_t srcLength) const;
+
+ /**
+ * Determine if this ends with the characters in <TT>text</TT>
+ * @param text The text to match.
+ * @return TRUE if this ends with the characters in <TT>text</TT>,
+ * FALSE otherwise
+ * @stable
+ */
+ inline bool endsWith(const UnicodeStringRef& text) const;
+
+ /**
+ * Determine if this ends with the characters in <TT>srcText</TT>
+ * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
+ * @param srcText The text to match.
+ * @param srcStart the offset into <TT>srcText</TT> to start matching
+ * @param srcLength the number of characters in <TT>srcText</TT> to match
+ * @return TRUE if this ends with the characters in <TT>text</TT>,
+ * FALSE otherwise
+ * @stable
+ */
+ inline bool endsWith(const UnicodeStringRef& srcText,
+ int32_t srcStart,
+ int32_t srcLength) const;
+
+ /**
+ * Determine if this ends with the characters in <TT>srcChars</TT>
+ * @param srcChars The characters to match.
+ * @param srcLength the number of characters in <TT>srcChars</TT>
+ * @return TRUE if this ends with the characters in <TT>srcChars</TT>,
+ * FALSE otherwise
+ * @stable
+ */
+ inline bool endsWith(UChar const *srcChars,
+ int32_t srcLength) const;
+
+ /**
+ * Determine if this ends with the characters in <TT>srcChars</TT>
+ * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
+ * @param srcChars The characters to match.
+ * @param srcStart the offset into <TT>srcText</TT> to start matching
+ * @param srcLength the number of characters in <TT>srcChars</TT> to match
+ * @return TRUE if this ends with the characters in <TT>srcChars</TT>,
+ * FALSE otherwise
+ * @stable
+ */
+ inline bool endsWith(UChar const *srcChars,
+ int32_t srcStart,
+ int32_t srcLength) const;
+
+
+ /* Searching - bitwise only */
+
+ /**
+ * Locate in this the first occurrence of the characters in <TT>text</TT>,
+ * using bitwise comparison.
+ * @param text The text to search for.
+ * @return The offset into this of the start of <TT>text</TT>,
+ * or -1 if not found.
+ * @stable
+ */
+ inline int32_t indexOf(const UnicodeStringRef& text) const;
+
+ /**
+ * Locate in this the first occurrence of the characters in <TT>text</TT>
+ * starting at offset <TT>start</TT>, using bitwise comparison.
+ * @param text The text to search for.
+ * @param start The offset at which searching will start.
+ * @return The offset into this of the start of <TT>text</TT>,
+ * or -1 if not found.
+ * @stable
+ */
+ inline int32_t indexOf(const UnicodeStringRef& text,
+ int32_t start) const;
+
+ /**
+ * Locate in this the first occurrence in the range
+ * [<TT>start</TT>, <TT>start + length</TT>) of the characters
+ * in <TT>text</TT>, using bitwise comparison.
+ * @param text The text to search for.
+ * @param start The offset at which searching will start.
+ * @param length The number of characters to search
+ * @return The offset into this of the start of <TT>text</TT>,
+ * or -1 if not found.
+ * @stable
+ */
+ inline int32_t indexOf(const UnicodeStringRef& text,
+ int32_t start,
+ int32_t length) const;
+
+ /**
+ * Locate in this the first occurrence in the range
+ * [<TT>start</TT>, <TT>start + length</TT>) of the characters
+ * in <TT>srcText</TT> in the range
+ * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
+ * using bitwise comparison.
+ * @param srcText The text to search for.
+ * @param srcStart the offset into <TT>srcText</TT> at which
+ * to start matching
+ * @param srcLength the number of characters in <TT>srcText</TT> to match
+ * @param start the offset into this at which to start matching
+ * @param length the number of characters in this to search
+ * @return The offset into this of the start of <TT>text</TT>,
+ * or -1 if not found.
+ * @stable
+ */
+ inline int32_t indexOf(const UnicodeStringRef& srcText,
+ int32_t srcStart,
+ int32_t srcLength,
+ int32_t start,
+ int32_t length) const;
+
+ /**
+ * Locate in this the first occurrence of the characters in
+ * <TT>srcChars</TT>
+ * starting at offset <TT>start</TT>, using bitwise comparison.
+ * @param srcChars The text to search for.
+ * @param srcLength the number of characters in <TT>srcChars</TT> to match
+ * @param start the offset into this at which to start matching
+ * @return The offset into this of the start of <TT>text</TT>,
+ * or -1 if not found.
+ * @stable
+ */
+ inline int32_t indexOf(UChar const *srcChars,
+ int32_t srcLength,
+ int32_t start) const;
+
+ /**
+ * Locate in this the first occurrence in the range
+ * [<TT>start</TT>, <TT>start + length</TT>) of the characters
+ * in <TT>srcChars</TT>, using bitwise comparison.
+ * @param srcChars The text to search for.
+ * @param srcLength the number of characters in <TT>srcChars</TT>
+ * @param start The offset at which searching will start.
+ * @param length The number of characters to search
+ * @return The offset into this of the start of <TT>srcChars</TT>,
+ * or -1 if not found.
+ * @stable
+ */
+ inline int32_t indexOf(UChar const *srcChars,
+ int32_t srcLength,
+ int32_t start,
+ int32_t length) const;
+
+ /**
+ * Locate in this the first occurrence in the range
+ * [<TT>start</TT>, <TT>start + length</TT>) of the characters
+ * in <TT>srcChars</TT> in the range
+ * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
+ * using bitwise comparison.
+ * @param srcChars The text to search for.
+ * @param srcStart the offset into <TT>srcChars</TT> at which
+ * to start matching
+ * @param srcLength the number of characters in <TT>srcChars</TT> to match
+ * @param start the offset into this at which to start matching
+ * @param length the number of characters in this to search
+ * @return The offset into this of the start of <TT>text</TT>,
+ * or -1 if not found.
+ * @stable
+ */
+ int32_t indexOf(UChar const *srcChars,
+ int32_t srcStart,
+ int32_t srcLength,
+ int32_t start,
+ int32_t length) const;
+
+ /**
+ * Locate in this the first occurrence of the code unit <TT>c</TT>,
+ * using bitwise comparison.
+ * @param c The code unit to search for.
+ * @return The offset into this of <TT>c</TT>, or -1 if not found.
+ * @stable
+ */
+ inline int32_t indexOf(UChar c) const;
+
+ /**
+ * Locate in this the first occurrence of the code point <TT>c</TT>,
+ * using bitwise comparison.
+ * @param c The code point to search for.
+ * @return The offset into this of <TT>c</TT>, or -1 if not found.
+ * @stable
+ */
+ inline int32_t indexOf(UChar32 c) const;
+
+ /**
+ * Locate in this the first occurrence of the code unit <TT>c</TT>
+ * starting at offset <TT>start</TT>, using bitwise comparison.
+ * @param c The code unit to search for.
+ * @param start The offset at which searching will start.
+ * @return The offset into this of <TT>c</TT>, or -1 if not found.
+ * @stable
+ */
+ inline int32_t indexOf(UChar c,
+ int32_t start) const;
+
+ /**
+ * Locate in this the first occurrence of the code point <TT>c</TT>
+ * starting at offset <TT>start</TT>, using bitwise comparison.
+ * @param c The code point to search for.
+ * @param start The offset at which searching will start.
+ * @return The offset into this of <TT>c</TT>, or -1 if not found.
+ * @stable
+ */
+ inline int32_t indexOf(UChar32 c,
+ int32_t start) const;
+
+ /**
+ * Locate in this the first occurrence of the code unit <TT>c</TT>
+ * in the range [<TT>start</TT>, <TT>start + length</TT>),
+ * using bitwise comparison.
+ * @param c The code unit to search for.
+ * @param start the offset into this at which to start matching
+ * @param length the number of characters in this to search
+ * @return The offset into this of <TT>c</TT>, or -1 if not found.
+ * @stable
+ */
+ inline int32_t indexOf(UChar c,
+ int32_t start,
+ int32_t length) const;
+
+ /**
+ * Locate in this the first occurrence of the code point <TT>c</TT>
+ * in the range [<TT>start</TT>, <TT>start + length</TT>),
+ * using bitwise comparison.
+ * @param c The code point to search for.
+ * @param start the offset into this at which to start matching
+ * @param length the number of characters in this to search
+ * @return The offset into this of <TT>c</TT>, or -1 if not found.
+ * @stable
+ */
+ inline int32_t indexOf(UChar32 c,
+ int32_t start,
+ int32_t length) const;
+
+ /**
+ * Locate in this the last occurrence of the characters in <TT>text</TT>,
+ * using bitwise comparison.
+ * @param text The text to search for.
+ * @return The offset into this of the start of <TT>text</TT>,
+ * or -1 if not found.
+ * @stable
+ */
+ inline int32_t lastIndexOf(const UnicodeStringRef& text) const;
+
+ /**
+ * Locate in this the last occurrence of the characters in <TT>text</TT>
+ * starting at offset <TT>start</TT>, using bitwise comparison.
+ * @param text The text to search for.
+ * @param start The offset at which searching will start.
+ * @return The offset into this of the start of <TT>text</TT>,
+ * or -1 if not found.
+ * @stable
+ */
+ inline int32_t lastIndexOf(const UnicodeStringRef& text,
+ int32_t start) const;
+
+ /**
+ * Locate in this the last occurrence in the range
+ * [<TT>start</TT>, <TT>start + length</TT>) of the characters
+ * in <TT>text</TT>, using bitwise comparison.
+ * @param text The text to search for.
+ * @param start The offset at which searching will start.
+ * @param length The number of characters to search
+ * @return The offset into this of the start of <TT>text</TT>,
+ * or -1 if not found.
+ * @stable
+ */
+ inline int32_t lastIndexOf(const UnicodeStringRef& text,
+ int32_t start,
+ int32_t length) const;
+
+ /**
+ * Locate in this the last occurrence in the range
+ * [<TT>start</TT>, <TT>start + length</TT>) of the characters
+ * in <TT>srcText</TT> in the range
+ * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
+ * using bitwise comparison.
+ * @param srcText The text to search for.
+ * @param srcStart the offset into <TT>srcText</TT> at which
+ * to start matching
+ * @param srcLength the number of characters in <TT>srcText</TT> to match
+ * @param start the offset into this at which to start matching
+ * @param length the number of characters in this to search
+ * @return The offset into this of the start of <TT>text</TT>,
+ * or -1 if not found.
+ * @stable
+ */
+ inline int32_t lastIndexOf(const UnicodeStringRef& srcText,
+ int32_t srcStart,
+ int32_t srcLength,
+ int32_t start,
+ int32_t length) const;
+
+ /**
+ * Locate in this the last occurrence of the characters in <TT>srcChars</TT>
+ * starting at offset <TT>start</TT>, using bitwise comparison.
+ * @param srcChars The text to search for.
+ * @param srcLength the number of characters in <TT>srcChars</TT> to match
+ * @param start the offset into this at which to start matching
+ * @return The offset into this of the start of <TT>text</TT>,
+ * or -1 if not found.
+ * @stable
+ */
+ inline int32_t lastIndexOf(UChar const *srcChars,
+ int32_t srcLength,
+ int32_t start) const;
+
+ /**
+ * Locate in this the last occurrence in the range
+ * [<TT>start</TT>, <TT>start + length</TT>) of the characters
+ * in <TT>srcChars</TT>, using bitwise comparison.
+ * @param srcChars The text to search for.
+ * @param srcLength the number of characters in <TT>srcChars</TT>
+ * @param start The offset at which searching will start.
+ * @param length The number of characters to search
+ * @return The offset into this of the start of <TT>srcChars</TT>,
+ * or -1 if not found.
+ * @stable
+ */
+ inline int32_t lastIndexOf(UChar const *srcChars,
+ int32_t srcLength,
+ int32_t start,
+ int32_t length) const;
+
+ /**
+ * Locate in this the last occurrence in the range
+ * [<TT>start</TT>, <TT>start + length</TT>) of the characters
+ * in <TT>srcChars</TT> in the range
+ * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
+ * using bitwise comparison.
+ * @param srcChars The text to search for.
+ * @param srcStart the offset into <TT>srcChars</TT> at which
+ * to start matching
+ * @param srcLength the number of characters in <TT>srcChars</TT> to match
+ * @param start the offset into this at which to start matching
+ * @param length the number of characters in this to search
+ * @return The offset into this of the start of <TT>text</TT>,
+ * or -1 if not found.
+ * @stable
+ */
+ int32_t lastIndexOf(UChar const *srcChars,
+ int32_t srcStart,
+ int32_t srcLength,
+ int32_t start,
+ int32_t length) const;
+
+ /**
+ * Locate in this the last occurrence of the code unit <TT>c</TT>,
+ * using bitwise comparison.
+ * @param c The code unit to search for.
+ * @return The offset into this of <TT>c</TT>, or -1 if not found.
+ * @stable
+ */
+ inline int32_t lastIndexOf(UChar c) const;
+
+ /**
+ * Locate in this the last occurrence of the code point <TT>c</TT>,
+ * using bitwise comparison.
+ * @param c The code point to search for.
+ * @return The offset into this of <TT>c</TT>, or -1 if not found.
+ * @stable
+ */
+ inline int32_t lastIndexOf(UChar32 c) const;
+
+ /**
+ * Locate in this the last occurrence of the code unit <TT>c</TT>
+ * starting at offset <TT>start</TT>, using bitwise comparison.
+ * @param c The code unit to search for.
+ * @param start The offset at which searching will start.
+ * @return The offset into this of <TT>c</TT>, or -1 if not found.
+ * @stable
+ */
+ inline int32_t lastIndexOf(UChar c,
+ int32_t start) const;
+
+ /**
+ * Locate in this the last occurrence of the code point <TT>c</TT>
+ * starting at offset <TT>start</TT>, using bitwise comparison.
+ * @param c The code point to search for.
+ * @param start The offset at which searching will start.
+ * @return The offset into this of <TT>c</TT>, or -1 if not found.
+ * @stable
+ */
+ inline int32_t lastIndexOf(UChar32 c,
+ int32_t start) const;
+
+ /**
+ * Locate in this the last occurrence of the code unit <TT>c</TT>
+ * in the range [<TT>start</TT>, <TT>start + length</TT>),
+ * using bitwise comparison.
+ * @param c The code unit to search for.
+ * @param start the offset into this at which to start matching
+ * @param length the number of characters in this to search
+ * @return The offset into this of <TT>c</TT>, or -1 if not found.
+ * @stable
+ */
+ inline int32_t lastIndexOf(UChar c,
+ int32_t start,
+ int32_t length) const;
+
+ /**
+ * Locate in this the last occurrence of the code point <TT>c</TT>
+ * in the range [<TT>start</TT>, <TT>start + length</TT>),
+ * using bitwise comparison.
+ * @param c The code point to search for.
+ * @param start the offset into this at which to start matching
+ * @param length the number of characters in this to search
+ * @return The offset into this of <TT>c</TT>, or -1 if not found.
+ * @stable
+ */
+ inline int32_t lastIndexOf(UChar32 c,
+ int32_t start,
+ int32_t length) const;
+
+
+ /* Character access */
+
+ /**
+ * Return the code unit at offset <tt>offset</tt>.
+ * @param offset a valid offset into the text
+ * @returns the code unit at offset <tt>offset</tt>
+ * @stable
+ */
+ inline UChar charAt(int32_t offset) const;
+
+ /**
+ * Return the code unit at offset <tt>offset</tt>.
+ * @param offset a valid offset into the text
+ * @returns the code unit at offset <tt>offset</tt>
+ * @stable
+ */
+ inline UChar operator [] (int32_t offset) const;
+
+ /**
+ * Return the code point that contains the code unit
+ * at offset <tt>offset</tt>.
+ * @param offset a valid offset into the text
+ * that indicates the text offset of any of the code units
+ * that will be assembled into a code point (21-bit value) and returned
+ * @returns the code point of text at <tt>offset</tt>
+ * @stable
+ */
+ inline UChar32 char32At(int32_t offset) const;
+
+ /**
+ * Adjust a random-access offset so that
+ * it points to the beginning of a Unicode character.
+ * The offset that is passed in points to
+ * any code unit of a code point,
+ * while the returned offset will point to the first code unit
+ * of the same code point.
+ * In UTF-16, if the input offset points to a iv_uiLength surrogate
+ * of a surrogate pair, then the returned offset will point
+ * to the first surrogate.
+ * @param offset a valid offset into one code point of the text
+ * @return offset of the first code unit of the same code point
+ */
+ inline int32_t getChar32Start(int32_t offset) const;
+
+ /**
+ * Adjust a random-access offset so that
+ * it points behind a Unicode character.
+ * The offset that is passed in points behind
+ * any code unit of a code point,
+ * while the returned offset will point behind the last code unit
+ * of the same code point.
+ * In UTF-16, if the input offset points behind the first surrogate
+ * (i.e., to the iv_uiLength surrogate)
+ * of a surrogate pair, then the returned offset will point
+ * behind the iv_uiLength surrogate (i.e., to the first surrogate).
+ * @param offset a valid offset after any code unit of a code point of the text
+ * @return offset of the first code unit after the same code point
+ */
+ inline int32_t getChar32Limit(int32_t offset) const;
+
+ /**
+ * Move the code unit index along the string by delta code points.
+ * Interpret the input index as a code unit-based offset into the string,
+ * move the index forward or backward by delta code points, and
+ * return the resulting index.
+ * The input index should point to the first code unit of a code point,
+ * if there is more than one.
+ *
+ * Both input and output indexes are code unit-based as for all
+ * string indexes/offsets in ICU (and other libraries, like MBCS char*).
+ * If delta<0 then the index is moved backward (toward the start of the string).
+ * If delta>0 then the index is moved forward (toward the end of the string).
+ *
+ * This behaves like CharacterIterator::move32(delta, kCurrent).
+ *
+ * Examples:
+ * <code>
+ * // s has code points 'a' U+10000 'b' U+10ffff U+2029
+ * UnicodeStringRef s=UNICODE_STRING("a\\U00010000b\\U0010ffff\\u2029", 31).unescape();
+ *
+ * // initial index: position of U+10000
+ * int32_t index=1;
+ *
+ * // the following examples will all result in index==4, position of U+10ffff
+ *
+ * // skip 2 code points from some position in the string
+ * index=s.moveIndex32(index, 2); // skips U+10000 and 'b'
+ *
+ * // go to the 3rd code point from the start of s (0-based)
+ * index=s.moveIndex32(0, 3); // skips 'a', U+10000, and 'b'
+ *
+ * // go to the next-to-last code point of s
+ *
+ * index=s.moveIndex32(s.length(), -2); // backward-skips U+2029 and U+10ffff
+ * </code>
+ *
+ * @param index input code unit index
+ * @param delta (signed) code point count to move the index forward or backward
+ * in the string
+ * @return the resulting code unit index
+ */
+ int32_t moveIndex32(int32_t index, int32_t delta) const;
+
+ /* Substring extraction without conversion */
+
+ /**
+ * Copy the characters in the range
+ * [<tt>start</tt>, <tt>start + length</tt>) into the array <tt>dst</tt>,
+ * beginning at <tt>dstStart</tt>.
+ * If the string aliases to <code>dst</code> itself as an external buffer,
+ * then extract() will not copy the contents.
+ *
+ * @param start offset of first character which will be copied into the array
+ * @param length the number of characters to extract
+ * @param dst array in which to copy characters. The length of <tt>dst</tt>
+ * must be at least (<tt>dstStart + length</tt>).
+ * @param dstStart the offset in <TT>dst</TT> where the first character
+ * will be extracted
+ * @stable
+ */
+ inline void extract(int32_t start,
+ int32_t length,
+ UChar *dst,
+ int32_t dstStart = 0) const;
+
+ /**
+ * Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>)
+ * into the array <tt>dst</tt>, beginning at <tt>dstStart</tt>.
+ * @param start offset of first character which will be copied into the array
+ * @param limit offset immediately following the last character to be copied
+ * @param dst array in which to copy characters. The length of <tt>dst</tt>
+ * must be at least (<tt>dstStart + (limit - start)</tt>).
+ * @param dstStart the offset in <TT>dst</TT> where the first character
+ * will be extracted
+ * @stable
+ */
+ inline void extractBetween(int32_t start,
+ int32_t limit,
+ UChar *dst,
+ int32_t dstStart = 0) const;
+
+ /**
+ * Copy the contents of the string into dst.
+ * This is a convenience function that
+ * checks if there is enough space in dst,
+ * extracts the entire string if possible,
+ * and NUL-terminates dst if possible.
+ *
+ * If the string fits into dst but cannot be NUL-terminated
+ * (length()==dstCapacity) then the error code is set to U_STRING_NOT_TERMINATED_WARNING.
+ * If the string itself does not fit into dst
+ * (length()>dstCapacity) then the error code is set to U_BUFFER_OVERFLOW_ERROR.
+ *
+ * If the string aliases to <code>dst</code> itself as an external buffer,
+ * then extract() will not copy the contents.
+ *
+ * @param dst Destination string buffer.
+ * @param dstCapacity Number of UChars available at dst.
+ * @param errorCode ICU error code.
+ * @return length()
+ */
+ int32_t
+ extract(UChar *dst, int32_t dstCapacity,
+ UErrorCode &errorCode) const;
+
+ /**
+ * Copy the characters in the range
+ * [<tt>start</tt>, <tt>start + length</tt>) into the UnicodeString
+ * <tt>dst</tt>.
+ * @param start offset of first character which will be copied
+ * @param length the number of characters to extract
+ * @param dst UnicodeString into which to copy characters.
+ * @return A reference to <TT>dst</TT>
+ * @stable
+ */
+ inline void extract(int32_t start,
+ int32_t length,
+ UnicodeString& dst) const;
+
+ /**
+ * Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>)
+ * into the UnicodeString <tt>dst</tt>.
+ * @param start offset of first character which will be copied
+ * @param limit offset immediately following the last character to be copied
+ * @param dst UnicodeString into which to copy characters.
+ * @return A reference to <TT>dst</TT>
+ * @stable
+ */
+ inline void extractBetween(int32_t start,
+ int32_t limit,
+ UnicodeString& dst) const;
+
+ /* Substring extraction with conversion */
+
+ /**
+ * Copy the characters in the range
+ * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters
+ * in a specified codepage.
+ * The output string is NUL-terminated.
+ *
+ * @param start offset of first character which will be copied
+ * @param startLength the number of characters to extract
+ * @param target the target buffer for extraction
+ * @param codepage the desired codepage for the characters. 0 has
+ * the special meaning of the default codepage
+ * If <code>codepage</code> is an empty string (<code>""</code>),
+ * then a simple conversion is performed on the codepage-invariant
+ * subset ("invariant characters") of the platform encoding. See utypes.h.
+ * If <TT>target</TT> is NULL, then the number of bytes required for
+ * <TT>target</TT> is returned.
+ * NOTE: It is assumed that the target is big enough to fit all of the characters.
+ * @return the output string length, not including the terminating NUL
+ * @stable
+ */
+ inline int32_t extract(int32_t start,
+ int32_t startLength,
+ char *target,
+ const char *codepage = 0) const;
+
+ /**
+ * Copy the characters in the range
+ * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters
+ * in a specified codepage.
+ * This function does not write any more than <code>targetLength</code>
+ * characters but returns the length of the entire output string
+ * so that one can allocate a larger buffer and call the function again
+ * if necessary.
+ * The output string is NUL-terminated if possible.
+ *
+ * @param start offset of first character which will be copied
+ * @param startLength the number of characters to extract
+ * @param target the target buffer for extraction
+ * @param targetLength the length of the target buffer
+ * @param codepage the desired codepage for the characters. 0 has
+ * the special meaning of the default codepage
+ * If <code>codepage</code> is an empty string (<code>""</code>),
+ * then a simple conversion is performed on the codepage-invariant
+ * subset ("invariant characters") of the platform encoding. See utypes.h.
+ * If <TT>target</TT> is NULL, then the number of bytes required for
+ * <TT>target</TT> is returned.
+ * @return the output string length, not including the terminating NUL
+ * @stable
+ */
+ int32_t extract(int32_t start,
+ int32_t startLength,
+ char *target,
+ uint32_t targetLength,
+ const char *codepage = 0) const;
+
+ /**
+ * Convert the UnicodeStringRef into a codepage string using an existing UConverter.
+ * The output string is NUL-terminated if possible.
+ *
+ * This function avoids the overhead of opening and closing a converter if
+ * multiple strings are extracted.
+ *
+ * @param target destination string buffer, can be NULL if targetCapacity==0
+ * @param targetCapacity the number of chars available at target
+ * @param cnv the converter object to be used (ucnv_resetFromUnicode() will be called),
+ * or NULL for the default converter
+ * @param errorCode normal ICU error code
+ * @return the length of the output string, not counting the terminating NUL;
+ * if the length is greater than targetCapacity, then the string will not fit
+ * and a buffer of the indicated length would need to be passed in
+ * @stable
+ */
+ int32_t extract(char *target, int32_t targetCapacity,
+ UConverter *cnv,
+ UErrorCode &errorCode) const;
+
+ /**
+ * Copy the characters in the range
+ * [<tt>start</TT>, <tt>start + length</TT>) into a std::string object
+ * in a specified codepage.
+ * The output string is NUL-terminated.
+ *
+ * @param start offset of first character which will be copied
+ * @param startLength the number of characters to extract
+ * @param target the target string for extraction
+ * @param codepage the desired codepage for the characters. 0 has
+ * the special meaning of the default codepage.
+ * If <code>codepage</code> is an empty string (<code>""</code>),
+ * then a simple conversion is performed on the codepage-invariant
+ * subset ("invariant characters") of the platform encoding. See utypes.h.
+ * @return the output string length, not including the terminating NUL
+ * @stable
+ */
+ int32_t extract(int32_t start,
+ int32_t startLength,
+ std::string & target,
+ const char *codepage = 0) const;
+
+ /**
+ * Copy all the characters in the string into an std::string object
+ * in a specified codepage. Equivalent to
+ * extract(0, length(), target, codepage)
+ *
+ * @param target the target string for extraction
+ * @param codepage the desired codepage for the characters.
+ * @return the output string length, not including the terminating NUL
+ * @stable
+ */
+ inline int32_t extract(std::string & target,
+ const char *codepage = 0) const;
+
+ /**
+ * Copy all the characters in the string into an std::string object
+ * in UTF-8. Slightly more efficient than asUTF8() as avoids
+ * one copy.
+ *
+ * @param target the target string for extraction
+ * @return the output string length, not including the terminating NUL
+ */
+ int32_t extractUTF8(std::string & target) const;
+
+ /**
+ * Convert to a UTF8 string
+ * @return a std::string
+ */
+ inline std::string asUTF8(void) const;
+
+ /**
+ * Release contents of string container allocated by extract methods
+ * Useful when caller and callee use different heaps,
+ * e.g. when debug code uses a release library.
+ * Is static so can be called on the <TT>UnicodeStringRef</TT> class directly.
+ */
+ static void release(std::string & target);
+
+ /* Length operations */
+
+ /**
+ * Return the length of the UnicodeStringRef object.
+ * The length is the number of characters in the text.
+ * @returns the length of the UnicodeStringRef object
+ * @stable
+ */
+ inline int32_t length(void) const;
+
+ /**
+ * Count Unicode code points in the length UChar code units of the string.
+ * A code point may occupy either one or two UChar code units.
+ * Counting code points involves reading all code units.
+ *
+ * This functions is basically the inverse of moveIndex32().
+ *
+ * @param start the index of the first code unit to check
+ * @param length the number of UChar code units to check
+ * @return the number of code points in the specified code units
+ */
+ int32_t
+ countChar32(int32_t start=0, int32_t length=0x7fffffff) const;
+
+ /**
+ * Determine if this string is empty.
+ * @return TRUE if this string contains 0 characters, FALSE otherwise.
+ */
+ inline bool isEmpty(void) const;
+
+ /**
+ * Set the text in the UnicodeString object to the characters in
+ * <TT>srcText</TT>.
+ * <TT>srcText</TT> is not modified.
+ * @param srcText the source for the new characters
+ * @return a reference to this
+ * @stable
+ */
+ inline UnicodeStringRef& setTo(const UnicodeStringRef& srcText);
+
+ /**
+ * Set the text in the UnicodeString object to the characters in
+ * <TT>srcText</TT>.
+ * <TT>srcText</TT> is not modified.
+ * @param srcText the source for the new characters
+ * @return a reference to this
+ * @stable
+ */
+ inline UnicodeStringRef& setTo(const UnicodeString& srcText);
+
+ /**
+ * Set the characters in the UnicodeString object to the characters
+ * in <TT>srcChars</TT>. <TT>srcChars</TT> is not modified.
+ * @param srcChars the source for the new characters
+ * @param srcLength the number of Unicode characters in srcChars.
+ * @return a reference to this
+ * @stable
+ */
+ inline UnicodeStringRef& setTo(const UChar *srcChars,
+ int32_t srcLength);
+ /**
+ * Print a single byte version to outStream.
+ * The encoding is UTF-8 if outStream is directed to disk,
+ * if outStream is cout our cerr the encoding is a Console-CCSID
+ * that will allow most character to be readable in a shell/command window.
+ */
+ void toSingleByteStream(std::ostream & outStream) const;
+
+
+
+ private:
+ /* --- functions -------------------------------------------------------- */
+
+ inline int8_t
+ doCompare( int32_t start,
+ int32_t length,
+ const UnicodeStringRef& srcText,
+ int32_t srcStart,
+ int32_t srcLength) const;
+
+ int8_t
+ doCompare( int32_t start,
+ int32_t length,
+ const UChar *srcChars,
+ int32_t srcStart,
+ int32_t srcLength) const;
+ inline int8_t
+ doCompareCodePointOrder(int32_t start,
+ int32_t length,
+ const UnicodeStringRef& srcText,
+ int32_t srcStart,
+ int32_t srcLength) const;
+ int8_t
+ doCompareCodePointOrder(int32_t start,
+ int32_t length,
+ const UChar *srcChars,
+ int32_t srcStart,
+ int32_t srcLength) const;
+ inline int8_t
+ doCaseCompare(int32_t start,
+ int32_t length,
+ const UnicodeStringRef& srcText,
+ int32_t srcStart,
+ int32_t srcLength,
+ uint32_t options) const;
+
+ int8_t
+ doCaseCompare(int32_t start,
+ int32_t length,
+ const UChar *srcChars,
+ int32_t srcStart,
+ int32_t srcLength,
+ uint32_t options) const;
+ int32_t doIndexOf(UChar c,
+ int32_t start,
+ int32_t length) const;
+ int32_t doLastIndexOf(UChar c,
+ int32_t start,
+ int32_t length) const;
+
+ inline void doExtract(int32_t start,
+ int32_t length,
+ UChar *dst,
+ int32_t dstStart) const;
+ inline void doExtract(int32_t start,
+ int32_t length,
+ UnicodeString& dst) const;
+
+ inline void
+ pinIndices(int32_t& start,
+ int32_t& length) const;
+ // constants
+ enum {
+ kInvalidUChar=0xffff // invalid UChar index
+ };
+ /* --- variables -------------------------------------------------------- */
+ UChar const * iv_pUChars;
+ int32_t iv_uiLength;
+ }
+ ; // class UnicodeStringRef
+
+ ///Output stream support for UnicodeStringRef (Note: inside namespace)
+ UIMA_LINK_IMPORTSPEC std::ostream &
+ operator << (
+ std::ostream & outStream,
+ const uima::UnicodeStringRef & crUStrRef
+ );
+} // namespace uima
+
+
+/* ----------------------------------------------------------------------- */
+/* Implementation UnicodeStringRef */
+/* ----------------------------------------------------------------------- */
+
+namespace uima {
+
+ inline
+ UnicodeStringRef::UnicodeStringRef( void ) :
+ iv_pUChars(NULL),
+ iv_uiLength(0) {}
+
+ inline UnicodeStringRef::UnicodeStringRef(
+ const icu::UnicodeString & crUniString
+ ) :
+ iv_pUChars(crUniString.getBuffer()),
+ iv_uiLength(crUniString.length()) {}
+
+ inline
+ UnicodeStringRef::UnicodeStringRef(
+ UChar const * cpacString
+ ) :
+ iv_pUChars(cpacString),
+ iv_uiLength(cpacString==NULL ? 0 : u_strlen(cpacString)) {
+ assert( (EXISTS(iv_pUChars) )
+ || ((iv_pUChars == NULL ) && (iv_uiLength == 0)) );
+ }
+
+ inline
+ UnicodeStringRef::UnicodeStringRef(
+ UChar const * cpacString,
+ int32_t uiLength
+ ) :
+ iv_pUChars(cpacString),
+ iv_uiLength(uiLength) {
+ assert( (EXISTS(iv_pUChars) )
+ || ((iv_pUChars == NULL ) && (iv_uiLength == 0)) );
+ }
+
+ inline
+ UnicodeStringRef::UnicodeStringRef(
+ UChar const * paucStringBegin,
+ UChar const * paucStringEnd
+ ) :
+ iv_pUChars(paucStringBegin),
+ iv_uiLength(paucStringEnd - paucStringBegin) {
+ assert(EXISTS(paucStringBegin));
+ assert(EXISTS(paucStringEnd));
+ assert(paucStringEnd >= paucStringBegin);
+ assert( (EXISTS(iv_pUChars) )
+ || ((iv_pUChars == NULL) && (iv_uiLength == 0)) );
+ }
+
+ inline int32_t
+ UnicodeStringRef::length( void ) const {
+ return iv_uiLength;
+ }
+
+ inline int32_t
+ UnicodeStringRef::getSizeInBytes( void ) const {
+ return (iv_uiLength * sizeof(UChar));
+ }
+
+ inline UChar
+ UnicodeStringRef::operator[]( int32_t uiIndex ) const {
+ assert(uiIndex < iv_uiLength);
+ assert(EXISTS(iv_pUChars));
+ return iv_pUChars[uiIndex]; //lint !e613: Possible use of null pointer 'UnicodeStringRef<wchar_t>::iv_pUChars' in left argument to operator '['
+ }
+
+ inline int
+ UnicodeStringRef::operator==( const UnicodeStringRef & crclRHS ) const {
+ if (iv_uiLength != crclRHS.iv_uiLength) {
+ return false;
+ }
+ return u_strncmp(iv_pUChars, crclRHS.iv_pUChars, iv_uiLength) == 0;
+ }
+
+ inline int
+ UnicodeStringRef::operator!=( const UnicodeStringRef & crclRHS ) const {
+ return !((*this)==crclRHS);
+ }
+
+ inline UnicodeStringRef &
+ UnicodeStringRef::operator=( UnicodeStringRef const & crclRHS ) {
+ iv_pUChars = crclRHS.iv_pUChars;
+ iv_uiLength = crclRHS.iv_uiLength;
+ return (*this);
+ }
+
+//========================================
+// Read-only alias methods
+//========================================
+ inline void
+ UnicodeStringRef::pinIndices(int32_t& start,
+ int32_t& length) const {
+ // pin indices
+ if (start > iv_uiLength) {
+ start = iv_uiLength;
+ }
+ if (length > (iv_uiLength - start)) {
+ length = (iv_uiLength - start);
+ }
+ }
+
+ inline bool
+ UnicodeStringRef::operator> (const UnicodeStringRef& text) const {
+ return doCompare(0, iv_uiLength, text, 0, text.iv_uiLength) == 1;
+ }
+
+ inline bool
+ UnicodeStringRef::operator< (const UnicodeStringRef& text) const {
+ return doCompare(0, iv_uiLength, text, 0, text.iv_uiLength) == -1;
+ }
+
+ inline bool
+ UnicodeStringRef::operator>= (const UnicodeStringRef& text) const {
+ return doCompare(0, iv_uiLength, text, 0, text.iv_uiLength) != -1;
+ }
+
+ inline bool
+ UnicodeStringRef::operator<= (const UnicodeStringRef& text) const {
+ return doCompare(0, iv_uiLength, text, 0, text.iv_uiLength) != 1;
+ }
+
+ inline int8_t
+ UnicodeStringRef::compare(const UnicodeStringRef& text) const {
+ return doCompare(0, iv_uiLength, text, 0, text.iv_uiLength);
+ }
+
+ inline int8_t
+ UnicodeStringRef::compare(int32_t start,
+ int32_t length,
+ const UnicodeStringRef& srcText) const {
+ return doCompare(start, length, srcText, 0, srcText.iv_uiLength);
+ }
+
+ inline int8_t
+ UnicodeStringRef::compare(const UChar *srcChars,
+ int32_t srcLength) const {
+ return doCompare(0, iv_uiLength, srcChars, 0, srcLength);
+ }
+
+
+ inline int8_t
+ UnicodeStringRef::compare(icu::UnicodeString const &src ) const {
+ return doCompare(0, iv_uiLength, src.getBuffer(), 0, src.length());
+ }
+
+
+ inline int8_t
+ UnicodeStringRef::compare(int32_t start,
+ int32_t length,
+ const UChar *srcChars) const {
+ return doCompare(start, length, srcChars, 0, length);
+ }
+
+ inline int8_t
+ UnicodeStringRef::compare(int32_t start,
+ int32_t length,
+ const UChar *srcChars,
+ int32_t srcStart,
+ int32_t srcLength) const {
+ return doCompare(start, length, srcChars, srcStart, srcLength);
+ }
+
+ inline int8_t
+ UnicodeStringRef::compare(int32_t start,
+ int32_t length,
+ const UnicodeStringRef& srcText,
+ int32_t srcStart,
+ int32_t srcLength) const {
+ return doCompare(start, length, srcText, srcStart, srcLength);
+ }
+
+ inline int8_t
+ UnicodeStringRef::compareBetween(int32_t start,
+ int32_t limit,
+ const UnicodeStringRef& srcText,
+ int32_t srcStart,
+ int32_t srcLimit) const {
+ return doCompare(start, limit - start,
+ srcText, srcStart, srcLimit - srcStart);
+ }
+
+ inline int8_t
+ UnicodeStringRef::doCompare(int32_t start,
+ int32_t length,
+ const UnicodeStringRef& srcText,
+ int32_t srcStart,
+ int32_t srcLength) const {
+ const UChar *srcChars = srcText.getBuffer();
+ return doCompare(start, length, srcChars, srcStart, srcLength);
+ }
+
+ inline int8_t
+ UnicodeStringRef::compareCodePointOrder(const UnicodeStringRef& text) const {
+ return doCompareCodePointOrder(0, iv_uiLength, text, 0, text.iv_uiLength);
+ }
+
+ inline int8_t
+ UnicodeStringRef::compareCodePointOrder(int32_t start,
+ int32_t length,
+ const UnicodeStringRef& srcText) const {
+ return doCompareCodePointOrder(start, length, srcText, 0, srcText.iv_uiLength);
+ }
+
+ inline int8_t
+ UnicodeStringRef::compareCodePointOrder(const UChar *srcChars,
+ int32_t srcLength) const {
+ return doCompareCodePointOrder(0, iv_uiLength, srcChars, 0, srcLength);
+ }
+
+ inline int8_t
+ UnicodeStringRef::compareCodePointOrder(int32_t start,
+ int32_t length,
+ const UnicodeStringRef& srcText,
+ int32_t srcStart,
+ int32_t srcLength) const {
+ return doCompareCodePointOrder(start, length, srcText, srcStart, srcLength);
+ }
+
+ inline int8_t
+ UnicodeStringRef::compareCodePointOrder(int32_t start,
+ int32_t length,
+ const UChar *srcChars) const {
+ return doCompareCodePointOrder(start, length, srcChars, 0, length);
+ }
+
+ inline int8_t
+ UnicodeStringRef::compareCodePointOrder(int32_t start,
+ int32_t length,
+ const UChar *srcChars,
+ int32_t srcStart,
+ int32_t srcLength) const {
+ return doCompareCodePointOrder(start, length, srcChars, srcStart, srcLength);
+ }
+
+ inline int8_t
+ UnicodeStringRef::compareCodePointOrderBetween(int32_t start,
+ int32_t limit,
+ const UnicodeStringRef& srcText,
+ int32_t srcStart,
+ int32_t srcLimit) const {
+ return doCompareCodePointOrder(start, limit - start,
+ srcText, srcStart, srcLimit - srcStart);
+ }
+
+ inline int8_t
+ UnicodeStringRef::doCompareCodePointOrder(int32_t start,
+ int32_t length,
+ const UnicodeStringRef& srcText,
+ int32_t srcStart,
+ int32_t srcLength) const {
+ const UChar *srcChars = srcText.getBuffer();
+ return doCompareCodePointOrder(start, length, srcChars, srcStart, srcLength);
+ }
+
+ inline int8_t
+ UnicodeStringRef::caseCompare(const UnicodeStringRef &text, uint32_t options) const {
+ return doCaseCompare(0, iv_uiLength, text, 0, text.iv_uiLength, options);
+ }
+
+ inline int8_t
+ UnicodeStringRef::caseCompare(int32_t start,
+ int32_t length,
+ const UnicodeStringRef &srcText,
+ uint32_t options) const {
+ return doCaseCompare(start, length, srcText, 0, srcText.iv_uiLength, options);
+ }
+
+ inline int8_t
+ UnicodeStringRef::caseCompare(const UChar *srcChars,
+ int32_t srcLength,
+ uint32_t options) const {
+ return doCaseCompare(0, iv_uiLength, srcChars, 0, srcLength, options);
+ }
+
+ inline int8_t
+ UnicodeStringRef::caseCompare(int32_t start,
+ int32_t length,
+ const UnicodeStringRef &srcText,
+ int32_t srcStart,
+ int32_t srcLength,
+ uint32_t options) const {
+ return doCaseCompare(start, length, srcText, srcStart, srcLength, options);
+ }
+
+ inline int8_t
+ UnicodeStringRef::caseCompare(int32_t start,
+ int32_t length,
+ const UChar *srcChars,
+ uint32_t options) const {
+ return doCaseCompare(start, length, srcChars, 0, length, options);
+ }
+
+ inline int8_t
+ UnicodeStringRef::caseCompare(int32_t start,
+ int32_t length,
+ const UChar *srcChars,
+ int32_t srcStart,
+ int32_t srcLength,
+ uint32_t options) const {
+ return doCaseCompare(start, length, srcChars, srcStart, srcLength, options);
+ }
+
+ inline int8_t
+ UnicodeStringRef::caseCompareBetween(int32_t start,
+ int32_t limit,
+ const UnicodeStringRef &srcText,
+ int32_t srcStart,
+ int32_t srcLimit,
+ uint32_t options) const {
+ return doCaseCompare(start, limit - start, srcText, srcStart, srcLimit - srcStart, options);
+ }
+
+ inline int8_t
+ UnicodeStringRef::doCaseCompare(int32_t start,
+ int32_t length,
+ const UnicodeStringRef &srcText,
+ int32_t srcStart,
+ int32_t srcLength,
+ uint32_t options) const {
+ const UChar *srcChars = srcText.getBuffer();
+ return doCaseCompare(start, length, srcChars, srcStart, srcLength, options);
+ }
+
+ inline int32_t
+ UnicodeStringRef::indexOf(const UnicodeStringRef& text) const {
+ return indexOf(text, 0, text.iv_uiLength, 0, iv_uiLength);
+ }
+
+ inline int32_t
+ UnicodeStringRef::indexOf(const UnicodeStringRef& text,
+ int32_t start) const {
+ return indexOf(text, 0, text.iv_uiLength, start, iv_uiLength - start);
+ }
+
+ inline int32_t
+ UnicodeStringRef::indexOf(const UnicodeStringRef& text,
+ int32_t start,
+ int32_t length) const {
+ return indexOf(text, 0, text.iv_uiLength, start, length);
+ }
+
+ inline int32_t
+ UnicodeStringRef::indexOf(const UnicodeStringRef& srcText,
+ int32_t srcStart,
+ int32_t srcLength,
+ int32_t start,
+ int32_t length) const {
+ return indexOf(srcText.getBuffer(), srcStart, srcLength, start, length);
+ }
+
+ inline int32_t
+ UnicodeStringRef::indexOf(const UChar *srcChars,
+ int32_t srcLength,
+ int32_t start) const {
+ return indexOf(srcChars, 0, srcLength, start, iv_uiLength - start);
+ }
+
+ inline int32_t
+ UnicodeStringRef::indexOf(const UChar *srcChars,
+ int32_t srcLength,
+ int32_t start,
+ int32_t length) const {
+ return indexOf(srcChars, 0, srcLength, start, length);
+ }
+
+ inline int32_t
+ UnicodeStringRef::indexOf(UChar c) const {
+ return doIndexOf(c, 0, iv_uiLength);
+ }
+
+ inline int32_t
+ UnicodeStringRef::indexOf(UChar32 c) const {
+ if (!UTF_NEED_MULTIPLE_UCHAR(c)) {
+ return doIndexOf((UChar)c, 0, iv_uiLength);
+ } else {
+ UChar buffer[UTF_MAX_CHAR_LENGTH];
+ int32_t length = 0;
+ UTF_APPEND_CHAR_UNSAFE(buffer, length, c);
+ return indexOf(buffer, length, 0);
+ }
+ }
+
+ inline int32_t
+ UnicodeStringRef::indexOf(UChar c,
+ int32_t start) const {
+ return doIndexOf(c, start, iv_uiLength - start);
+ }
+
+ inline int32_t
+ UnicodeStringRef::indexOf(UChar32 c,
+ int32_t start) const {
+ if (!UTF_NEED_MULTIPLE_UCHAR(c)) {
+ return doIndexOf((UChar)c, start, iv_uiLength - start);
+ } else {
+ UChar buffer[UTF_MAX_CHAR_LENGTH];
+ int32_t length = 0;
+ UTF_APPEND_CHAR_UNSAFE(buffer, length, c);
+ return indexOf(buffer, length, start);
+ }
+ }
+
+ inline int32_t
+ UnicodeStringRef::indexOf(UChar c,
+ int32_t start,
+ int32_t length) const {
+ return doIndexOf(c, start, length);
+ }
+
+ inline int32_t
+ UnicodeStringRef::indexOf(UChar32 c,
+ int32_t start,
+ int32_t length) const {
+ if (!UTF_NEED_MULTIPLE_UCHAR(c)) {
+ return doIndexOf((UChar)c, start, length);
+ } else {
+ UChar buffer[UTF_MAX_CHAR_LENGTH];
+ int32_t cLength = 0;
+ UTF_APPEND_CHAR_UNSAFE(buffer, cLength, c);
+ return indexOf(buffer, cLength, start, length);
+ }
+ }
+
+ inline int32_t
+ UnicodeStringRef::lastIndexOf(const UnicodeStringRef& text) const {
+ return lastIndexOf(text, 0, text.iv_uiLength, 0, iv_uiLength);
+ }
+
+ inline int32_t
+ UnicodeStringRef::lastIndexOf(const UnicodeStringRef& text,
+ int32_t start) const {
+ return lastIndexOf(text, 0, text.iv_uiLength, start, iv_uiLength - start);
+ }
+
+ inline int32_t
+ UnicodeStringRef::lastIndexOf(const UnicodeStringRef& text,
+ int32_t start,
+ int32_t length) const {
+ return lastIndexOf(text, 0, text.iv_uiLength, start, length);
+ }
+
+ inline int32_t
+ UnicodeStringRef::lastIndexOf(const UnicodeStringRef& srcText,
+ int32_t srcStart,
+ int32_t srcLength,
+ int32_t start,
+ int32_t length) const {
+ return lastIndexOf(srcText.getBuffer(), srcStart, srcLength, start, length);
+ }
+
+ inline int32_t
+ UnicodeStringRef::lastIndexOf(const UChar *srcChars,
+ int32_t srcLength,
+ int32_t start) const {
+ return lastIndexOf(srcChars, 0, srcLength, start, iv_uiLength - start);
+ }
+
+ inline int32_t
+ UnicodeStringRef::lastIndexOf(const UChar *srcChars,
+ int32_t srcLength,
+ int32_t start,
+ int32_t length) const {
+ return lastIndexOf(srcChars, 0, srcLength, start, length);
+ }
+
+ inline int32_t
+ UnicodeStringRef::lastIndexOf(UChar c) const {
+ return doLastIndexOf(c, 0, iv_uiLength);
+ }
+
+ inline int32_t
+ UnicodeStringRef::lastIndexOf(UChar32 c) const {
+ if (!UTF_NEED_MULTIPLE_UCHAR(c)) {
+ return doLastIndexOf((UChar)c, 0, iv_uiLength);
+ } else {
+ UChar buffer[UTF_MAX_CHAR_LENGTH];
+ int32_t count = 0;
+ UTF_APPEND_CHAR_UNSAFE(buffer, count, c);
+ return lastIndexOf(buffer, count, 0);
+ }
+ }
+
+ inline int32_t
+ UnicodeStringRef::lastIndexOf(UChar c,
+ int32_t start) const {
+ return doLastIndexOf(c, start, iv_uiLength - start);
+ }
+
+ inline int32_t
+ UnicodeStringRef::lastIndexOf(UChar32 c,
+ int32_t start) const {
+ if (!UTF_NEED_MULTIPLE_UCHAR(c)) {
+ return doLastIndexOf((UChar)c, start, iv_uiLength - start);
+ } else {
+ UChar buffer[UTF_MAX_CHAR_LENGTH];
+ int32_t count = 0;
+ UTF_APPEND_CHAR_UNSAFE(buffer, count, c);
+ return lastIndexOf(buffer, count, start);
+ }
+ }
+
+ inline int32_t
+ UnicodeStringRef::lastIndexOf(UChar c,
+ int32_t start,
+ int32_t length) const {
+ return doLastIndexOf(c, start, length);
+ }
+
+ inline int32_t
+ UnicodeStringRef::lastIndexOf(UChar32 c,
+ int32_t start,
+ int32_t length) const {
+ if (!UTF_NEED_MULTIPLE_UCHAR(c)) {
+ return doLastIndexOf((UChar)c, start, length);
+ } else {
+ UChar buffer[UTF_MAX_CHAR_LENGTH];
+ int32_t count = 0;
+ UTF_APPEND_CHAR_UNSAFE(buffer, count, c);
+ return lastIndexOf(buffer, count, start, length);
+ }
+ }
+
+ inline bool
+ UnicodeStringRef::startsWith(const UnicodeStringRef& text) const {
+ return compare(0, text.iv_uiLength, text, 0, text.iv_uiLength) == 0;
+ }
+
+ inline bool
+ UnicodeStringRef::startsWith(const UnicodeStringRef& srcText,
+ int32_t srcStart,
+ int32_t srcLength) const {
+ return doCompare(0, srcLength, srcText, srcStart, srcLength) == 0;
+ }
+
+ inline bool
+ UnicodeStringRef::startsWith(const UChar *srcChars,
+ int32_t srcLength) const {
+ return doCompare(0, srcLength, srcChars, 0, srcLength) == 0;
+ }
+
+ inline bool
+ UnicodeStringRef::startsWith(const UChar *srcChars,
+ int32_t srcStart,
+ int32_t srcLength) const {
+ return doCompare(0, srcLength, srcChars, srcStart, srcLength) == 0;
+ }
+
+ inline bool
+ UnicodeStringRef::endsWith(const UnicodeStringRef& text) const {
+ return doCompare(iv_uiLength - text.iv_uiLength, text.iv_uiLength,
+ text, 0, text.iv_uiLength) == 0;
+ }
+
+ inline bool
+ UnicodeStringRef::endsWith(const UnicodeStringRef& srcText,
+ int32_t srcStart,
+ int32_t srcLength) const {
+ return doCompare(iv_uiLength - srcLength, srcLength,
+ srcText, srcStart, srcLength) == 0;
+ }
+
+ inline bool
+ UnicodeStringRef::endsWith(const UChar *srcChars,
+ int32_t srcLength) const {
+ return doCompare(iv_uiLength - srcLength, srcLength,
+ srcChars, 0, srcLength) == 0;
+ }
+
+ inline bool
+ UnicodeStringRef::endsWith(const UChar *srcChars,
+ int32_t srcStart,
+ int32_t srcLength) const {
+ return doCompare(iv_uiLength - srcLength, srcLength,
+ srcChars, srcStart, srcLength) == 0;
+ }
+
+// ============================
+// extract implementations (some in .cpp)
+// ============================
+ inline void
+ UnicodeStringRef::extract(int32_t start,
+ int32_t length,
+ UChar *dst,
+ int32_t dstStart) const {
+ pinIndices(start, length);
+ memcpy(dst+dstStart, getBuffer()+start, length*sizeof(UChar));
+ }
+
+
+ inline void
+ UnicodeStringRef::extract(int32_t start,
+ int32_t length,
+ UnicodeString& target) const {
+ target.replace(0, target.length(), getBuffer(), start, length);
+ }
+// Replaces all of target by substring of src
+// Could use setTo(getBuffer()+start,length) but that is implemented as a replace
+
+ inline void
+ UnicodeStringRef::extractBetween(int32_t start,
+ int32_t limit,
+ UChar *dst,
+ int32_t dstStart) const {
+ extract(start, limit - start, dst, dstStart);
+ }
+
+ inline void
+ UnicodeStringRef::extractBetween(int32_t start,
+ int32_t limit,
+ UnicodeString& dst) const {
+ extract(start, limit - start, dst);
+ }
+
+
+
+ inline int32_t
+ UnicodeStringRef::extract(int32_t start,
+ int32_t length,
+ char *target,
+ const char *codepage) const {
+ // User-beware ... assumes target buffer is large enough
+ // Capacity assumed to be either large, or 0 if no buffer provided (pre-flighting)
+ return extract(start, length, target, target!=0 ? 0xffffffff : 0, codepage);
+ }
+
+ inline int32_t
+ UnicodeStringRef::extract(std::string & target,
+ const char *codepage) const {
+ return extract(0, iv_uiLength, target, codepage);
+ }
+
+ inline std::string
+ UnicodeStringRef::asUTF8(void) const {
+ std::string target;
+ extractUTF8(target);
+ return target;
+ }
+
+ inline UChar
+ UnicodeStringRef::charAt(int32_t offset) const {
+ assert(EXISTS(iv_pUChars));
+ if ((uint32_t)offset < (uint32_t)iv_uiLength) {
+ return iv_pUChars[offset];
+ } else {
+ return kInvalidUChar;
+ }
+ }
+
+ inline UChar32
+ UnicodeStringRef::char32At(int32_t offset) const {
+ if ((uint32_t)offset < (uint32_t)iv_uiLength) {
+ UChar32 c;
+ UTF_GET_CHAR(iv_pUChars, 0, offset, iv_uiLength, c);
+ return c;
+ } else {
+ return kInvalidUChar;
+ }
+ }
+
+ inline int32_t
+ UnicodeStringRef::getChar32Start(int32_t offset) const {
+ if ((uint32_t)offset < (uint32_t)iv_uiLength) {
+ UTF_SET_CHAR_START(iv_pUChars, 0, offset);
+ return offset;
+ } else {
+ return 0;
+ }
+ }
+
+ inline int32_t
+ UnicodeStringRef::getChar32Limit(int32_t offset) const {
+ if ((uint32_t)offset < (uint32_t)iv_uiLength) {
+ UTF_SET_CHAR_LIMIT(iv_pUChars, 0, offset, iv_uiLength);
+ return offset;
+ } else {
+ return iv_uiLength;
+ }
+ }
+
[... 117 lines stripped ...]