You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@couchdb.apache.org by be...@apache.org on 2014/02/13 19:12:24 UTC
[26/57] [abbrv] remove couch_collate
http://git-wip-us.apache.org/repos/asf/couchdb/blob/81332b78/apps/couch_collate/platform/osx/icu/unicode/stsearch.h
----------------------------------------------------------------------
diff --git a/apps/couch_collate/platform/osx/icu/unicode/stsearch.h b/apps/couch_collate/platform/osx/icu/unicode/stsearch.h
deleted file mode 100644
index 8499752..0000000
--- a/apps/couch_collate/platform/osx/icu/unicode/stsearch.h
+++ /dev/null
@@ -1,518 +0,0 @@
-/*
-**********************************************************************
-* Copyright (C) 2001-2008 IBM and others. All rights reserved.
-**********************************************************************
-* Date Name Description
-* 03/22/2000 helena Creation.
-**********************************************************************
-*/
-
-#ifndef STSEARCH_H
-#define STSEARCH_H
-
-#include "unicode/utypes.h"
-
-/**
- * \file
- * \brief C++ API: Service for searching text based on RuleBasedCollator.
- */
-
-#if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION
-
-#include "unicode/tblcoll.h"
-#include "unicode/coleitr.h"
-#include "unicode/search.h"
-
-U_NAMESPACE_BEGIN
-
-/**
- *
- * <tt>StringSearch</tt> is a <tt>SearchIterator</tt> that provides
- * language-sensitive text searching based on the comparison rules defined
- * in a {@link RuleBasedCollator} object.
- * StringSearch ensures that language eccentricity can be
- * handled, e.g. for the German collator, characters ß and SS will be matched
- * if case is chosen to be ignored.
- * See the <a href="http://source.icu-project.org/repos/icu/icuhtml/trunk/design/collation/ICU_collation_design.htm">
- * "ICU Collation Design Document"</a> for more information.
- * <p>
- * The algorithm implemented is a modified form of the Boyer Moore's search.
- * For more information see
- * <a href="http://icu-project.org/docs/papers/efficient_text_searching_in_java.html">
- * "Efficient Text Searching in Java"</a>, published in <i>Java Report</i>
- * in February, 1999, for further information on the algorithm.
- * <p>
- * There are 2 match options for selection:<br>
- * Let S' be the sub-string of a text string S between the offsets start and
- * end <start, end>.
- * <br>
- * A pattern string P matches a text string S at the offsets <start, end>
- * if
- * <pre>
- * option 1. Some canonical equivalent of P matches some canonical equivalent
- * of S'
- * option 2. P matches S' and if P starts or ends with a combining mark,
- * there exists no non-ignorable combining mark before or after S?
- * in S respectively.
- * </pre>
- * Option 2. will be the default.
- * <p>
- * This search has APIs similar to that of other text iteration mechanisms
- * such as the break iterators in <tt>BreakIterator</tt>. Using these
- * APIs, it is easy to scan through text looking for all occurances of
- * a given pattern. This search iterator allows changing of direction by
- * calling a <tt>reset</tt> followed by a <tt>next</tt> or <tt>previous</tt>.
- * Though a direction change can occur without calling <tt>reset</tt> first,
- * this operation comes with some speed penalty.
- * Match results in the forward direction will match the result matches in
- * the backwards direction in the reverse order
- * <p>
- * <tt>SearchIterator</tt> provides APIs to specify the starting position
- * within the text string to be searched, e.g. <tt>setOffset</tt>,
- * <tt>preceding</tt> and <tt>following</tt>. Since the
- * starting position will be set as it is specified, please take note that
- * there are some danger points which the search may render incorrect
- * results:
- * <ul>
- * <li> The midst of a substring that requires normalization.
- * <li> If the following match is to be found, the position should not be the
- * second character which requires to be swapped with the preceding
- * character. Vice versa, if the preceding match is to be found,
- * position to search from should not be the first character which
- * requires to be swapped with the next character. E.g certain Thai and
- * Lao characters require swapping.
- * <li> If a following pattern match is to be found, any position within a
- * contracting sequence except the first will fail. Vice versa if a
- * preceding pattern match is to be found, a invalid starting point
- * would be any character within a contracting sequence except the last.
- * </ul>
- * <p>
- * A breakiterator can be used if only matches at logical breaks are desired.
- * Using a breakiterator will only give you results that exactly matches the
- * boundaries given by the breakiterator. For instance the pattern "e" will
- * not be found in the string "\u00e9" if a character break iterator is used.
- * <p>
- * Options are provided to handle overlapping matches.
- * E.g. In English, overlapping matches produces the result 0 and 2
- * for the pattern "abab" in the text "ababab", where else mutually
- * exclusive matches only produce the result of 0.
- * <p>
- * Though collator attributes will be taken into consideration while
- * performing matches, there are no APIs here for setting and getting the
- * attributes. These attributes can be set by getting the collator
- * from <tt>getCollator</tt> and using the APIs in <tt>coll.h</tt>.
- * Lastly to update StringSearch to the new collator attributes,
- * reset() has to be called.
- * <p>
- * Restriction: <br>
- * Currently there are no composite characters that consists of a
- * character with combining class > 0 before a character with combining
- * class == 0. However, if such a character exists in the future,
- * StringSearch does not guarantee the results for option 1.
- * <p>
- * Consult the <tt>SearchIterator</tt> documentation for information on
- * and examples of how to use instances of this class to implement text
- * searching.
- * <pre><code>
- * UnicodeString target("The quick brown fox jumps over the lazy dog.");
- * UnicodeString pattern("fox");
- *
- * UErrorCode error = U_ZERO_ERROR;
- * StringSearch iter(pattern, target, Locale::getUS(), NULL, status);
- * for (int pos = iter.first(error);
- * pos != USEARCH_DONE;
- * pos = iter.next(error))
- * {
- * printf("Found match at %d pos, length is %d\n", pos,
- * iter.getMatchLength());
- * }
- * </code></pre>
- * <p>
- * Note, StringSearch is not to be subclassed.
- * </p>
- * @see SearchIterator
- * @see RuleBasedCollator
- * @since ICU 2.0
- */
-
-class U_I18N_API StringSearch : public SearchIterator
-{
-public:
-
- // public constructors and destructors --------------------------------
-
- /**
- * Creating a <tt>StringSearch</tt> instance using the argument locale
- * language rule set. A collator will be created in the process, which
- * will be owned by this instance and will be deleted during
- * destruction
- * @param pattern The text for which this object will search.
- * @param text The text in which to search for the pattern.
- * @param locale A locale which defines the language-sensitive
- * comparison rules used to determine whether text in the
- * pattern and target matches.
- * @param breakiter A <tt>BreakIterator</tt> object used to constrain
- * the matches that are found. Matches whose start and end
- * indices in the target text are not boundaries as
- * determined by the <tt>BreakIterator</tt> are
- * ignored. If this behavior is not desired,
- * <tt>NULL</tt> can be passed in instead.
- * @param status for errors if any. If pattern or text is NULL, or if
- * either the length of pattern or text is 0 then an
- * U_ILLEGAL_ARGUMENT_ERROR is returned.
- * @stable ICU 2.0
- */
- StringSearch(const UnicodeString &pattern, const UnicodeString &text,
- const Locale &locale,
- BreakIterator *breakiter,
- UErrorCode &status);
-
- /**
- * Creating a <tt>StringSearch</tt> instance using the argument collator
- * language rule set. Note, user retains the ownership of this collator,
- * it does not get destroyed during this instance's destruction.
- * @param pattern The text for which this object will search.
- * @param text The text in which to search for the pattern.
- * @param coll A <tt>RuleBasedCollator</tt> object which defines
- * the language-sensitive comparison rules used to
- * determine whether text in the pattern and target
- * matches. User is responsible for the clearing of this
- * object.
- * @param breakiter A <tt>BreakIterator</tt> object used to constrain
- * the matches that are found. Matches whose start and end
- * indices in the target text are not boundaries as
- * determined by the <tt>BreakIterator</tt> are
- * ignored. If this behavior is not desired,
- * <tt>NULL</tt> can be passed in instead.
- * @param status for errors if any. If either the length of pattern or
- * text is 0 then an U_ILLEGAL_ARGUMENT_ERROR is returned.
- * @stable ICU 2.0
- */
- StringSearch(const UnicodeString &pattern,
- const UnicodeString &text,
- RuleBasedCollator *coll,
- BreakIterator *breakiter,
- UErrorCode &status);
-
- /**
- * Creating a <tt>StringSearch</tt> instance using the argument locale
- * language rule set. A collator will be created in the process, which
- * will be owned by this instance and will be deleted during
- * destruction
- * <p>
- * Note: No parsing of the text within the <tt>CharacterIterator</tt>
- * will be done during searching for this version. The block of text
- * in <tt>CharacterIterator</tt> will be used as it is.
- * @param pattern The text for which this object will search.
- * @param text The text iterator in which to search for the pattern.
- * @param locale A locale which defines the language-sensitive
- * comparison rules used to determine whether text in the
- * pattern and target matches. User is responsible for
- * the clearing of this object.
- * @param breakiter A <tt>BreakIterator</tt> object used to constrain
- * the matches that are found. Matches whose start and end
- * indices in the target text are not boundaries as
- * determined by the <tt>BreakIterator</tt> are
- * ignored. If this behavior is not desired,
- * <tt>NULL</tt> can be passed in instead.
- * @param status for errors if any. If either the length of pattern or
- * text is 0 then an U_ILLEGAL_ARGUMENT_ERROR is returned.
- * @stable ICU 2.0
- */
- StringSearch(const UnicodeString &pattern, CharacterIterator &text,
- const Locale &locale,
- BreakIterator *breakiter,
- UErrorCode &status);
-
- /**
- * Creating a <tt>StringSearch</tt> instance using the argument collator
- * language rule set. Note, user retains the ownership of this collator,
- * it does not get destroyed during this instance's destruction.
- * <p>
- * Note: No parsing of the text within the <tt>CharacterIterator</tt>
- * will be done during searching for this version. The block of text
- * in <tt>CharacterIterator</tt> will be used as it is.
- * @param pattern The text for which this object will search.
- * @param text The text in which to search for the pattern.
- * @param coll A <tt>RuleBasedCollator</tt> object which defines
- * the language-sensitive comparison rules used to
- * determine whether text in the pattern and target
- * matches. User is responsible for the clearing of this
- * object.
- * @param breakiter A <tt>BreakIterator</tt> object used to constrain
- * the matches that are found. Matches whose start and end
- * indices in the target text are not boundaries as
- * determined by the <tt>BreakIterator</tt> are
- * ignored. If this behavior is not desired,
- * <tt>NULL</tt> can be passed in instead.
- * @param status for errors if any. If either the length of pattern or
- * text is 0 then an U_ILLEGAL_ARGUMENT_ERROR is returned.
- * @stable ICU 2.0
- */
- StringSearch(const UnicodeString &pattern, CharacterIterator &text,
- RuleBasedCollator *coll,
- BreakIterator *breakiter,
- UErrorCode &status);
-
- /**
- * Copy constructor that creates a StringSearch instance with the same
- * behavior, and iterating over the same text.
- * @param that StringSearch instance to be copied.
- * @stable ICU 2.0
- */
- StringSearch(const StringSearch &that);
-
- /**
- * Destructor. Cleans up the search iterator data struct.
- * If a collator is created in the constructor, it will be destroyed here.
- * @stable ICU 2.0
- */
- virtual ~StringSearch(void);
-
- /**
- * Clone this object.
- * Clones can be used concurrently in multiple threads.
- * If an error occurs, then NULL is returned.
- * The caller must delete the clone.
- *
- * @return a clone of this object
- *
- * @see getDynamicClassID
- * @stable ICU 2.8
- */
- StringSearch *clone() const;
-
- // operator overloading ---------------------------------------------
-
- /**
- * Assignment operator. Sets this iterator to have the same behavior,
- * and iterate over the same text, as the one passed in.
- * @param that instance to be copied.
- * @stable ICU 2.0
- */
- StringSearch & operator=(const StringSearch &that);
-
- /**
- * Equality operator.
- * @param that instance to be compared.
- * @return TRUE if both instances have the same attributes,
- * breakiterators, collators and iterate over the same text
- * while looking for the same pattern.
- * @stable ICU 2.0
- */
- virtual UBool operator==(const SearchIterator &that) const;
-
- // public get and set methods ----------------------------------------
-
- /**
- * Sets the index to point to the given position, and clears any state
- * that's affected.
- * <p>
- * This method takes the argument index and sets the position in the text
- * string accordingly without checking if the index is pointing to a
- * valid starting point to begin searching.
- * @param position within the text to be set. If position is less
- * than or greater than the text range for searching,
- * an U_INDEX_OUTOFBOUNDS_ERROR will be returned
- * @param status for errors if it occurs
- * @stable ICU 2.0
- */
- virtual void setOffset(int32_t position, UErrorCode &status);
-
- /**
- * Return the current index in the text being searched.
- * If the iteration has gone past the end of the text
- * (or past the beginning for a backwards search), USEARCH_DONE
- * is returned.
- * @return current index in the text being searched.
- * @stable ICU 2.0
- */
- virtual int32_t getOffset(void) const;
-
- /**
- * Set the target text to be searched.
- * Text iteration will hence begin at the start of the text string.
- * This method is
- * useful if you want to re-use an iterator to search for the same
- * pattern within a different body of text.
- * @param text text string to be searched
- * @param status for errors if any. If the text length is 0 then an
- * U_ILLEGAL_ARGUMENT_ERROR is returned.
- * @stable ICU 2.0
- */
- virtual void setText(const UnicodeString &text, UErrorCode &status);
-
- /**
- * Set the target text to be searched.
- * Text iteration will hence begin at the start of the text string.
- * This method is
- * useful if you want to re-use an iterator to search for the same
- * pattern within a different body of text.
- * Note: No parsing of the text within the <tt>CharacterIterator</tt>
- * will be done during searching for this version. The block of text
- * in <tt>CharacterIterator</tt> will be used as it is.
- * @param text text string to be searched
- * @param status for errors if any. If the text length is 0 then an
- * U_ILLEGAL_ARGUMENT_ERROR is returned.
- * @stable ICU 2.0
- */
- virtual void setText(CharacterIterator &text, UErrorCode &status);
-
- /**
- * Gets the collator used for the language rules.
- * <p>
- * Caller may modify but <b>must not</b> delete the <tt>RuleBasedCollator</tt>!
- * Modifications to this collator will affect the original collator passed in to
- * the <tt>StringSearch></tt> constructor or to setCollator, if any.
- * @return collator used for string search
- * @stable ICU 2.0
- */
- RuleBasedCollator * getCollator() const;
-
- /**
- * Sets the collator used for the language rules. User retains the
- * ownership of this collator, thus the responsibility of deletion lies
- * with the user. This method causes internal data such as Boyer-Moore
- * shift tables to be recalculated, but the iterator's position is
- * unchanged.
- * @param coll collator
- * @param status for errors if any
- * @stable ICU 2.0
- */
- void setCollator(RuleBasedCollator *coll, UErrorCode &status);
-
- /**
- * Sets the pattern used for matching.
- * Internal data like the Boyer Moore table will be recalculated, but
- * the iterator's position is unchanged.
- * @param pattern search pattern to be found
- * @param status for errors if any. If the pattern length is 0 then an
- * U_ILLEGAL_ARGUMENT_ERROR is returned.
- * @stable ICU 2.0
- */
- void setPattern(const UnicodeString &pattern, UErrorCode &status);
-
- /**
- * Gets the search pattern.
- * @return pattern used for matching
- * @stable ICU 2.0
- */
- const UnicodeString & getPattern() const;
-
- // public methods ----------------------------------------------------
-
- /**
- * Reset the iteration.
- * Search will begin at the start of the text string if a forward
- * iteration is initiated before a backwards iteration. Otherwise if
- * a backwards iteration is initiated before a forwards iteration, the
- * search will begin at the end of the text string.
- * @stable ICU 2.0
- */
- virtual void reset();
-
- /**
- * Returns a copy of StringSearch with the same behavior, and
- * iterating over the same text, as this one. Note that all data will be
- * replicated, except for the user-specified collator and the
- * breakiterator.
- * @return cloned object
- * @stable ICU 2.0
- */
- virtual SearchIterator * safeClone(void) const;
-
- /**
- * ICU "poor man's RTTI", returns a UClassID for the actual class.
- *
- * @stable ICU 2.2
- */
- virtual UClassID getDynamicClassID() const;
-
- /**
- * ICU "poor man's RTTI", returns a UClassID for this class.
- *
- * @stable ICU 2.2
- */
- static UClassID U_EXPORT2 getStaticClassID();
-
-protected:
-
- // protected method -------------------------------------------------
-
- /**
- * Search forward for matching text, starting at a given location.
- * Clients should not call this method directly; instead they should
- * call {@link SearchIterator#next }.
- * <p>
- * If a match is found, this method returns the index at which the match
- * starts and calls {@link SearchIterator#setMatchLength } with the number
- * of characters in the target text that make up the match. If no match
- * is found, the method returns <tt>USEARCH_DONE</tt>.
- * <p>
- * The <tt>StringSearch</tt> is adjusted so that its current index
- * (as returned by {@link #getOffset }) is the match position if one was
- * found.
- * If a match is not found, <tt>USEARCH_DONE</tt> will be returned and
- * the <tt>StringSearch</tt> will be adjusted to the index USEARCH_DONE.
- * @param position The index in the target text at which the search
- * starts
- * @param status for errors if any occurs
- * @return The index at which the matched text in the target starts, or
- * USEARCH_DONE if no match was found.
- * @stable ICU 2.0
- */
- virtual int32_t handleNext(int32_t position, UErrorCode &status);
-
- /**
- * Search backward for matching text, starting at a given location.
- * Clients should not call this method directly; instead they should call
- * <tt>SearchIterator.previous()</tt>, which this method overrides.
- * <p>
- * If a match is found, this method returns the index at which the match
- * starts and calls {@link SearchIterator#setMatchLength } with the number
- * of characters in the target text that make up the match. If no match
- * is found, the method returns <tt>USEARCH_DONE</tt>.
- * <p>
- * The <tt>StringSearch</tt> is adjusted so that its current index
- * (as returned by {@link #getOffset }) is the match position if one was
- * found.
- * If a match is not found, <tt>USEARCH_DONE</tt> will be returned and
- * the <tt>StringSearch</tt> will be adjusted to the index USEARCH_DONE.
- * @param position The index in the target text at which the search
- * starts.
- * @param status for errors if any occurs
- * @return The index at which the matched text in the target starts, or
- * USEARCH_DONE if no match was found.
- * @stable ICU 2.0
- */
- virtual int32_t handlePrev(int32_t position, UErrorCode &status);
-
-private :
- StringSearch(); // default constructor not implemented
-
- // private data members ----------------------------------------------
-
- /**
- * RuleBasedCollator, contains exactly the same UCollator * in m_strsrch_
- * @stable ICU 2.0
- */
- RuleBasedCollator m_collator_;
- /**
- * Pattern text
- * @stable ICU 2.0
- */
- UnicodeString m_pattern_;
- /**
- * String search struct data
- * @stable ICU 2.0
- */
- UStringSearch *m_strsrch_;
-
-};
-
-U_NAMESPACE_END
-
-#endif /* #if !UCONFIG_NO_COLLATION */
-
-#endif
-
http://git-wip-us.apache.org/repos/asf/couchdb/blob/81332b78/apps/couch_collate/platform/osx/icu/unicode/symtable.h
----------------------------------------------------------------------
diff --git a/apps/couch_collate/platform/osx/icu/unicode/symtable.h b/apps/couch_collate/platform/osx/icu/unicode/symtable.h
deleted file mode 100644
index 428f8bf..0000000
--- a/apps/couch_collate/platform/osx/icu/unicode/symtable.h
+++ /dev/null
@@ -1,112 +0,0 @@
-/*
-**********************************************************************
-* Copyright (c) 2000-2005, International Business Machines
-* Corporation and others. All Rights Reserved.
-**********************************************************************
-* Date Name Description
-* 02/04/00 aliu Creation.
-**********************************************************************
-*/
-#ifndef SYMTABLE_H
-#define SYMTABLE_H
-
-#include "unicode/utypes.h"
-#include "unicode/uobject.h"
-
-/**
- * \file
- * \brief C++ API: An interface that defines both lookup protocol and parsing of
- * symbolic names.
- */
-
-U_NAMESPACE_BEGIN
-
-class ParsePosition;
-class UnicodeFunctor;
-class UnicodeSet;
-class UnicodeString;
-
-/**
- * An interface that defines both lookup protocol and parsing of
- * symbolic names.
- *
- * <p>A symbol table maintains two kinds of mappings. The first is
- * between symbolic names and their values. For example, if the
- * variable with the name "start" is set to the value "alpha"
- * (perhaps, though not necessarily, through an expression such as
- * "$start=alpha"), then the call lookup("start") will return the
- * char[] array ['a', 'l', 'p', 'h', 'a'].
- *
- * <p>The second kind of mapping is between character values and
- * UnicodeMatcher objects. This is used by RuleBasedTransliterator,
- * which uses characters in the private use area to represent objects
- * such as UnicodeSets. If U+E015 is mapped to the UnicodeSet [a-z],
- * then lookupMatcher(0xE015) will return the UnicodeSet [a-z].
- *
- * <p>Finally, a symbol table defines parsing behavior for symbolic
- * names. All symbolic names start with the SYMBOL_REF character.
- * When a parser encounters this character, it calls parseReference()
- * with the position immediately following the SYMBOL_REF. The symbol
- * table parses the name, if there is one, and returns it.
- *
- * @stable ICU 2.8
- */
-class U_COMMON_API SymbolTable /* not : public UObject because this is an interface/mixin class */ {
-public:
-
- /**
- * The character preceding a symbol reference name.
- * @stable ICU 2.8
- */
- enum { SYMBOL_REF = 0x0024 /*$*/ };
-
- /**
- * Destructor.
- * @stable ICU 2.8
- */
- virtual ~SymbolTable();
-
- /**
- * Lookup the characters associated with this string and return it.
- * Return <tt>NULL</tt> if no such name exists. The resultant
- * string may have length zero.
- * @param s the symbolic name to lookup
- * @return a string containing the name's value, or <tt>NULL</tt> if
- * there is no mapping for s.
- * @stable ICU 2.8
- */
- virtual const UnicodeString* lookup(const UnicodeString& s) const = 0;
-
- /**
- * Lookup the UnicodeMatcher associated with the given character, and
- * return it. Return <tt>NULL</tt> if not found.
- * @param ch a 32-bit code point from 0 to 0x10FFFF inclusive.
- * @return the UnicodeMatcher object represented by the given
- * character, or NULL if there is no mapping for ch.
- * @stable ICU 2.8
- */
- virtual const UnicodeFunctor* lookupMatcher(UChar32 ch) const = 0;
-
- /**
- * Parse a symbol reference name from the given string, starting
- * at the given position. If no valid symbol reference name is
- * found, return the empty string and leave pos unchanged. That is, if the
- * character at pos cannot start a name, or if pos is at or after
- * text.length(), then return an empty string. This indicates an
- * isolated SYMBOL_REF character.
- * @param text the text to parse for the name
- * @param pos on entry, the index of the first character to parse.
- * This is the character following the SYMBOL_REF character. On
- * exit, the index after the last parsed character. If the parse
- * failed, pos is unchanged on exit.
- * @param limit the index after the last character to be parsed.
- * @return the parsed name, or an empty string if there is no
- * valid symbolic name at the given position.
- * @stable ICU 2.8
- */
- virtual UnicodeString parseReference(const UnicodeString& text,
- ParsePosition& pos, int32_t limit) const = 0;
-};
-U_NAMESPACE_END
-
-#endif
http://git-wip-us.apache.org/repos/asf/couchdb/blob/81332b78/apps/couch_collate/platform/osx/icu/unicode/tblcoll.h
----------------------------------------------------------------------
diff --git a/apps/couch_collate/platform/osx/icu/unicode/tblcoll.h b/apps/couch_collate/platform/osx/icu/unicode/tblcoll.h
deleted file mode 100644
index 2fdd63b..0000000
--- a/apps/couch_collate/platform/osx/icu/unicode/tblcoll.h
+++ /dev/null
@@ -1,926 +0,0 @@
-/*
-******************************************************************************
-* Copyright (C) 1996-2008, International Business Machines Corporation and
-* others. All Rights Reserved.
-******************************************************************************
-*/
-
-/**
- * \file
- * \brief C++ API: RuleBasedCollator class provides the simple implementation of Collator.
- */
-
-/**
-* File tblcoll.h
-*
-* Created by: Helena Shih
-*
-* Modification History:
-*
-* Date Name Description
-* 2/5/97 aliu Added streamIn and streamOut methods. Added
-* constructor which reads RuleBasedCollator object from
-* a binary file. Added writeToFile method which streams
-* RuleBasedCollator out to a binary file. The streamIn
-* and streamOut methods use istream and ostream objects
-* in binary mode.
-* 2/12/97 aliu Modified to use TableCollationData sub-object to
-* hold invariant data.
-* 2/13/97 aliu Moved several methods into this class from Collation.
-* Added a private RuleBasedCollator(Locale&) constructor,
-* to be used by Collator::createDefault(). General
-* clean up.
-* 2/20/97 helena Added clone, operator==, operator!=, operator=, and copy
-* constructor and getDynamicClassID.
-* 3/5/97 aliu Modified constructFromFile() to add parameter
-* specifying whether or not binary loading is to be
-* attempted. This is required for dynamic rule loading.
-* 05/07/97 helena Added memory allocation error detection.
-* 6/17/97 helena Added IDENTICAL strength for compare, changed getRules to
-* use MergeCollation::getPattern.
-* 6/20/97 helena Java class name change.
-* 8/18/97 helena Added internal API documentation.
-* 09/03/97 helena Added createCollationKeyValues().
-* 02/10/98 damiba Added compare with "length" parameter
-* 08/05/98 erm Synched with 1.2 version of RuleBasedCollator.java
-* 04/23/99 stephen Removed EDecompositionMode, merged with
-* Normalizer::EMode
-* 06/14/99 stephen Removed kResourceBundleSuffix
-* 11/02/99 helena Collator performance enhancements. Eliminates the
-* UnicodeString construction and special case for NO_OP.
-* 11/23/99 srl More performance enhancements. Updates to NormalizerIterator
-* internal state management.
-* 12/15/99 aliu Update to support Thai collation. Move NormalizerIterator
-* to implementation file.
-* 01/29/01 synwee Modified into a C++ wrapper which calls C API
-* (ucol.h)
-*/
-
-#ifndef TBLCOLL_H
-#define TBLCOLL_H
-
-#include "unicode/utypes.h"
-
-
-#if !UCONFIG_NO_COLLATION
-
-#include "unicode/coll.h"
-#include "unicode/ucol.h"
-#include "unicode/sortkey.h"
-#include "unicode/normlzr.h"
-
-U_NAMESPACE_BEGIN
-
-/**
-* @stable ICU 2.0
-*/
-class StringSearch;
-/**
-* @stable ICU 2.0
-*/
-class CollationElementIterator;
-
-/**
- * The RuleBasedCollator class provides the simple implementation of
- * Collator, using data-driven tables. The user can create a customized
- * table-based collation.
- * <P>
- * <em>Important: </em>The ICU collation service has been reimplemented
- * in order to achieve better performance and UCA compliance.
- * For details, see the
- * <a href="http://source.icu-project.org/repos/icu/icuhtml/trunk/design/collation/ICU_collation_design.htm">
- * collation design document</a>.
- * <p>
- * RuleBasedCollator is a thin C++ wrapper over the C implementation.
- * <p>
- * For more information about the collation service see
- * <a href="http://icu-project.org/userguide/Collate_Intro.html">the users guide</a>.
- * <p>
- * Collation service provides correct sorting orders for most locales supported in ICU.
- * If specific data for a locale is not available, the orders eventually falls back
- * to the <a href="http://www.unicode.org/unicode/reports/tr10/">UCA sort order</a>.
- * <p>
- * Sort ordering may be customized by providing your own set of rules. For more on
- * this subject see the <a href="http://icu-project.org/userguide/Collate_Customization.html">
- * Collation customization</a> section of the users guide.
- * <p>
- * Note, RuleBasedCollator is not to be subclassed.
- * @see Collator
- * @version 2.0 11/15/2001
- */
-class U_I18N_API RuleBasedCollator : public Collator
-{
-public:
-
- // constructor -------------------------------------------------------------
-
- /**
- * RuleBasedCollator constructor. This takes the table rules and builds a
- * collation table out of them. Please see RuleBasedCollator class
- * description for more details on the collation rule syntax.
- * @param rules the collation rules to build the collation table from.
- * @param status reporting a success or an error.
- * @see Locale
- * @stable ICU 2.0
- */
- RuleBasedCollator(const UnicodeString& rules, UErrorCode& status);
-
- /**
- * RuleBasedCollator constructor. This takes the table rules and builds a
- * collation table out of them. Please see RuleBasedCollator class
- * description for more details on the collation rule syntax.
- * @param rules the collation rules to build the collation table from.
- * @param collationStrength default strength for comparison
- * @param status reporting a success or an error.
- * @see Locale
- * @stable ICU 2.0
- */
- RuleBasedCollator(const UnicodeString& rules,
- ECollationStrength collationStrength,
- UErrorCode& status);
-
- /**
- * RuleBasedCollator constructor. This takes the table rules and builds a
- * collation table out of them. Please see RuleBasedCollator class
- * description for more details on the collation rule syntax.
- * @param rules the collation rules to build the collation table from.
- * @param decompositionMode the normalisation mode
- * @param status reporting a success or an error.
- * @see Locale
- * @stable ICU 2.0
- */
- RuleBasedCollator(const UnicodeString& rules,
- UColAttributeValue decompositionMode,
- UErrorCode& status);
-
- /**
- * RuleBasedCollator constructor. This takes the table rules and builds a
- * collation table out of them. Please see RuleBasedCollator class
- * description for more details on the collation rule syntax.
- * @param rules the collation rules to build the collation table from.
- * @param collationStrength default strength for comparison
- * @param decompositionMode the normalisation mode
- * @param status reporting a success or an error.
- * @see Locale
- * @stable ICU 2.0
- */
- RuleBasedCollator(const UnicodeString& rules,
- ECollationStrength collationStrength,
- UColAttributeValue decompositionMode,
- UErrorCode& status);
-
- /**
- * Copy constructor.
- * @param other the RuleBasedCollator object to be copied
- * @see Locale
- * @stable ICU 2.0
- */
- RuleBasedCollator(const RuleBasedCollator& other);
-
-
- /** Opens a collator from a collator binary image created using
- * cloneBinary. Binary image used in instantiation of the
- * collator remains owned by the user and should stay around for
- * the lifetime of the collator. The API also takes a base collator
- * which usualy should be UCA.
- * @param bin binary image owned by the user and required through the
- * lifetime of the collator
- * @param length size of the image. If negative, the API will try to
- * figure out the length of the image
- * @param base fallback collator, usually UCA. Base is required to be
- * present through the lifetime of the collator. Currently
- * it cannot be NULL.
- * @param status for catching errors
- * @return newly created collator
- * @see cloneBinary
- * @stable ICU 3.4
- */
- RuleBasedCollator(const uint8_t *bin, int32_t length,
- const RuleBasedCollator *base,
- UErrorCode &status);
- // destructor --------------------------------------------------------------
-
- /**
- * Destructor.
- * @stable ICU 2.0
- */
- virtual ~RuleBasedCollator();
-
- // public methods ----------------------------------------------------------
-
- /**
- * Assignment operator.
- * @param other other RuleBasedCollator object to compare with.
- * @stable ICU 2.0
- */
- RuleBasedCollator& operator=(const RuleBasedCollator& other);
-
- /**
- * Returns true if argument is the same as this object.
- * @param other Collator object to be compared.
- * @return true if arguments is the same as this object.
- * @stable ICU 2.0
- */
- virtual UBool operator==(const Collator& other) const;
-
- /**
- * Returns true if argument is not the same as this object.
- * @param other Collator object to be compared
- * @return returns true if argument is not the same as this object.
- * @stable ICU 2.0
- */
- virtual UBool operator!=(const Collator& other) const;
-
- /**
- * Makes a deep copy of the object.
- * The caller owns the returned object.
- * @return the cloned object.
- * @stable ICU 2.0
- */
- virtual Collator* clone(void) const;
-
- /**
- * Creates a collation element iterator for the source string. The caller of
- * this method is responsible for the memory management of the return
- * pointer.
- * @param source the string over which the CollationElementIterator will
- * iterate.
- * @return the collation element iterator of the source string using this as
- * the based Collator.
- * @stable ICU 2.2
- */
- virtual CollationElementIterator* createCollationElementIterator(
- const UnicodeString& source) const;
-
- /**
- * Creates a collation element iterator for the source. The caller of this
- * method is responsible for the memory management of the returned pointer.
- * @param source the CharacterIterator which produces the characters over
- * which the CollationElementItgerator will iterate.
- * @return the collation element iterator of the source using this as the
- * based Collator.
- * @stable ICU 2.2
- */
- virtual CollationElementIterator* createCollationElementIterator(
- const CharacterIterator& source) const;
-
- /**
- * Compares a range of character data stored in two different strings based
- * on the collation rules. Returns information about whether a string is
- * less than, greater than or equal to another string in a language.
- * This can be overriden in a subclass.
- * @param source the source string.
- * @param target the target string to be compared with the source string.
- * @return the comparison result. GREATER if the source string is greater
- * than the target string, LESS if the source is less than the
- * target. Otherwise, returns EQUAL.
- * @deprecated ICU 2.6 Use overload with UErrorCode&
- */
- virtual EComparisonResult compare(const UnicodeString& source,
- const UnicodeString& target) const;
-
-
- /**
- * The comparison function compares the character data stored in two
- * different strings. Returns information about whether a string is less
- * than, greater than or equal to another string.
- * @param source the source string to be compared with.
- * @param target the string that is to be compared with the source string.
- * @param status possible error code
- * @return Returns an enum value. UCOL_GREATER if source is greater
- * than target; UCOL_EQUAL if source is equal to target; UCOL_LESS if source is less
- * than target
- * @stable ICU 2.6
- **/
- virtual UCollationResult compare(const UnicodeString& source,
- const UnicodeString& target,
- UErrorCode &status) const;
-
- /**
- * Compares a range of character data stored in two different strings based
- * on the collation rules up to the specified length. Returns information
- * about whether a string is less than, greater than or equal to another
- * string in a language. This can be overriden in a subclass.
- * @param source the source string.
- * @param target the target string to be compared with the source string.
- * @param length compares up to the specified length
- * @return the comparison result. GREATER if the source string is greater
- * than the target string, LESS if the source is less than the
- * target. Otherwise, returns EQUAL.
- * @deprecated ICU 2.6 Use overload with UErrorCode&
- */
- virtual EComparisonResult compare(const UnicodeString& source,
- const UnicodeString& target,
- int32_t length) const;
-
- /**
- * Does the same thing as compare but limits the comparison to a specified
- * length
- * @param source the source string to be compared with.
- * @param target the string that is to be compared with the source string.
- * @param length the length the comparison is limited to
- * @param status possible error code
- * @return Returns an enum value. UCOL_GREATER if source (up to the specified
- * length) is greater than target; UCOL_EQUAL if source (up to specified
- * length) is equal to target; UCOL_LESS if source (up to the specified
- * length) is less than target.
- * @stable ICU 2.6
- */
- virtual UCollationResult compare(const UnicodeString& source,
- const UnicodeString& target,
- int32_t length,
- UErrorCode &status) const;
-
- /**
- * The comparison function compares the character data stored in two
- * different string arrays. Returns information about whether a string array
- * is less than, greater than or equal to another string array.
- * <p>Example of use:
- * <pre>
- * . UChar ABC[] = {0x41, 0x42, 0x43, 0}; // = "ABC"
- * . UChar abc[] = {0x61, 0x62, 0x63, 0}; // = "abc"
- * . UErrorCode status = U_ZERO_ERROR;
- * . Collator *myCollation =
- * . Collator::createInstance(Locale::US, status);
- * . if (U_FAILURE(status)) return;
- * . myCollation->setStrength(Collator::PRIMARY);
- * . // result would be Collator::EQUAL ("abc" == "ABC")
- * . // (no primary difference between "abc" and "ABC")
- * . Collator::EComparisonResult result =
- * . myCollation->compare(abc, 3, ABC, 3);
- * . myCollation->setStrength(Collator::TERTIARY);
- * . // result would be Collator::LESS ("abc" <<< "ABC")
- * . // (with tertiary difference between "abc" and "ABC")
- * . result = myCollation->compare(abc, 3, ABC, 3);
- * </pre>
- * @param source the source string array to be compared with.
- * @param sourceLength the length of the source string array. If this value
- * is equal to -1, the string array is null-terminated.
- * @param target the string that is to be compared with the source string.
- * @param targetLength the length of the target string array. If this value
- * is equal to -1, the string array is null-terminated.
- * @return Returns a byte value. GREATER if source is greater than target;
- * EQUAL if source is equal to target; LESS if source is less than
- * target
- * @deprecated ICU 2.6 Use overload with UErrorCode&
- */
- virtual EComparisonResult compare(const UChar* source, int32_t sourceLength,
- const UChar* target, int32_t targetLength)
- const;
-
- /**
- * The comparison function compares the character data stored in two
- * different string arrays. Returns information about whether a string array
- * is less than, greater than or equal to another string array.
- * @param source the source string array to be compared with.
- * @param sourceLength the length of the source string array. If this value
- * is equal to -1, the string array is null-terminated.
- * @param target the string that is to be compared with the source string.
- * @param targetLength the length of the target string array. If this value
- * is equal to -1, the string array is null-terminated.
- * @param status possible error code
- * @return Returns an enum value. UCOL_GREATER if source is greater
- * than target; UCOL_EQUAL if source is equal to target; UCOL_LESS if source is less
- * than target
- * @stable ICU 2.6
- */
- virtual UCollationResult compare(const UChar* source, int32_t sourceLength,
- const UChar* target, int32_t targetLength,
- UErrorCode &status) const;
-
- /**
- * Transforms a specified region of the string into a series of characters
- * that can be compared with CollationKey.compare. Use a CollationKey when
- * you need to do repeated comparisions on the same string. For a single
- * comparison the compare method will be faster.
- * @param source the source string.
- * @param key the transformed key of the source string.
- * @param status the error code status.
- * @return the transformed key.
- * @see CollationKey
- * @deprecated ICU 2.8 Use getSortKey(...) instead
- */
- virtual CollationKey& getCollationKey(const UnicodeString& source,
- CollationKey& key,
- UErrorCode& status) const;
-
- /**
- * Transforms a specified region of the string into a series of characters
- * that can be compared with CollationKey.compare. Use a CollationKey when
- * you need to do repeated comparisions on the same string. For a single
- * comparison the compare method will be faster.
- * @param source the source string.
- * @param sourceLength the length of the source string.
- * @param key the transformed key of the source string.
- * @param status the error code status.
- * @return the transformed key.
- * @see CollationKey
- * @deprecated ICU 2.8 Use getSortKey(...) instead
- */
- virtual CollationKey& getCollationKey(const UChar *source,
- int32_t sourceLength,
- CollationKey& key,
- UErrorCode& status) const;
-
- /**
- * Generates the hash code for the rule-based collation object.
- * @return the hash code.
- * @stable ICU 2.0
- */
- virtual int32_t hashCode(void) const;
-
- /**
- * Gets the locale of the Collator
- * @param type can be either requested, valid or actual locale. For more
- * information see the definition of ULocDataLocaleType in
- * uloc.h
- * @param status the error code status.
- * @return locale where the collation data lives. If the collator
- * was instantiated from rules, locale is empty.
- * @deprecated ICU 2.8 likely to change in ICU 3.0, based on feedback
- */
- virtual const Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const;
-
- /**
- * Gets the table-based rules for the collation object.
- * @return returns the collation rules that the table collation object was
- * created from.
- * @stable ICU 2.0
- */
- const UnicodeString& getRules(void) const;
-
- /**
- * Gets the version information for a Collator.
- * @param info the version # information, the result will be filled in
- * @stable ICU 2.0
- */
- virtual void getVersion(UVersionInfo info) const;
-
- /**
- * Return the maximum length of any expansion sequences that end with the
- * specified comparison order.
- * @param order a collation order returned by previous or next.
- * @return maximum size of the expansion sequences ending with the collation
- * element or 1 if collation element does not occur at the end of
- * any expansion sequence
- * @see CollationElementIterator#getMaxExpansion
- * @stable ICU 2.0
- */
- int32_t getMaxExpansion(int32_t order) const;
-
- /**
- * Returns a unique class ID POLYMORPHICALLY. Pure virtual override. This
- * method is to implement a simple version of RTTI, since not all C++
- * compilers support genuine RTTI. Polymorphic operator==() and clone()
- * methods call this method.
- * @return The class ID for this object. All objects of a given class have
- * the same class ID. Objects of other classes have different class
- * IDs.
- * @stable ICU 2.0
- */
- virtual UClassID getDynamicClassID(void) const;
-
- /**
- * Returns the class ID for this class. This is useful only for comparing to
- * a return value from getDynamicClassID(). For example:
- * <pre>
- * Base* polymorphic_pointer = createPolymorphicObject();
- * if (polymorphic_pointer->getDynamicClassID() ==
- * Derived::getStaticClassID()) ...
- * </pre>
- * @return The class ID for all objects of this class.
- * @stable ICU 2.0
- */
- static UClassID U_EXPORT2 getStaticClassID(void);
-
- /**
- * Returns the binary format of the class's rules. The format is that of
- * .col files.
- * @param length Returns the length of the data, in bytes
- * @param status the error code status.
- * @return memory, owned by the caller, of size 'length' bytes.
- * @stable ICU 2.2
- */
- uint8_t *cloneRuleData(int32_t &length, UErrorCode &status);
-
-
- /** Creates a binary image of a collator. This binary image can be stored and
- * later used to instantiate a collator using ucol_openBinary.
- * This API supports preflighting.
- * @param buffer a fill-in buffer to receive the binary image
- * @param capacity capacity of the destination buffer
- * @param status for catching errors
- * @return size of the image
- * @see ucol_openBinary
- * @stable ICU 3.4
- */
- int32_t cloneBinary(uint8_t *buffer, int32_t capacity, UErrorCode &status);
-
- /**
- * Returns current rules. Delta defines whether full rules are returned or
- * just the tailoring.
- * @param delta one of UCOL_TAILORING_ONLY, UCOL_FULL_RULES.
- * @param buffer UnicodeString to store the result rules
- * @stable ICU 2.2
- */
- void getRules(UColRuleOption delta, UnicodeString &buffer);
-
- /**
- * Universal attribute setter
- * @param attr attribute type
- * @param value attribute value
- * @param status to indicate whether the operation went on smoothly or there were errors
- * @stable ICU 2.2
- */
- virtual void setAttribute(UColAttribute attr, UColAttributeValue value,
- UErrorCode &status);
-
- /**
- * Universal attribute getter.
- * @param attr attribute type
- * @param status to indicate whether the operation went on smoothly or there were errors
- * @return attribute value
- * @stable ICU 2.2
- */
- virtual UColAttributeValue getAttribute(UColAttribute attr,
- UErrorCode &status);
-
- /**
- * Sets the variable top to a collation element value of a string supplied.
- * @param varTop one or more (if contraction) UChars to which the variable top should be set
- * @param len length of variable top string. If -1 it is considered to be zero terminated.
- * @param status error code. If error code is set, the return value is undefined. Errors set by this function are: <br>
- * U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such a contraction<br>
- * U_PRIMARY_TOO_LONG_ERROR if the primary for the variable top has more than two bytes
- * @return a 32 bit value containing the value of the variable top in upper 16 bits. Lower 16 bits are undefined
- * @stable ICU 2.0
- */
- virtual uint32_t setVariableTop(const UChar *varTop, int32_t len, UErrorCode &status);
-
- /**
- * Sets the variable top to a collation element value of a string supplied.
- * @param varTop an UnicodeString size 1 or more (if contraction) of UChars to which the variable top should be set
- * @param status error code. If error code is set, the return value is undefined. Errors set by this function are: <br>
- * U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such a contraction<br>
- * U_PRIMARY_TOO_LONG_ERROR if the primary for the variable top has more than two bytes
- * @return a 32 bit value containing the value of the variable top in upper 16 bits. Lower 16 bits are undefined
- * @stable ICU 2.0
- */
- virtual uint32_t setVariableTop(const UnicodeString varTop, UErrorCode &status);
-
- /**
- * Sets the variable top to a collation element value supplied. Variable top is set to the upper 16 bits.
- * Lower 16 bits are ignored.
- * @param varTop CE value, as returned by setVariableTop or ucol)getVariableTop
- * @param status error code (not changed by function)
- * @stable ICU 2.0
- */
- virtual void setVariableTop(const uint32_t varTop, UErrorCode &status);
-
- /**
- * Gets the variable top value of a Collator.
- * Lower 16 bits are undefined and should be ignored.
- * @param status error code (not changed by function). If error code is set, the return value is undefined.
- * @stable ICU 2.0
- */
- virtual uint32_t getVariableTop(UErrorCode &status) const;
-
- /**
- * Get an UnicodeSet that contains all the characters and sequences tailored in
- * this collator.
- * @param status error code of the operation
- * @return a pointer to a UnicodeSet object containing all the
- * code points and sequences that may sort differently than
- * in the UCA. The object must be disposed of by using delete
- * @stable ICU 2.4
- */
- virtual UnicodeSet *getTailoredSet(UErrorCode &status) const;
-
- /**
- * Thread safe cloning operation.
- * @return pointer to the new clone, user should remove it.
- * @stable ICU 2.2
- */
- virtual Collator* safeClone(void);
-
- /**
- * Get the sort key as an array of bytes from an UnicodeString.
- * @param source string to be processed.
- * @param result buffer to store result in. If NULL, number of bytes needed
- * will be returned.
- * @param resultLength length of the result buffer. If if not enough the
- * buffer will be filled to capacity.
- * @return Number of bytes needed for storing the sort key
- * @stable ICU 2.0
- */
- virtual int32_t getSortKey(const UnicodeString& source, uint8_t *result,
- int32_t resultLength) const;
-
- /**
- * Get the sort key as an array of bytes from an UChar buffer.
- * @param source string to be processed.
- * @param sourceLength length of string to be processed. If -1, the string
- * is 0 terminated and length will be decided by the function.
- * @param result buffer to store result in. If NULL, number of bytes needed
- * will be returned.
- * @param resultLength length of the result buffer. If if not enough the
- * buffer will be filled to capacity.
- * @return Number of bytes needed for storing the sort key
- * @stable ICU 2.2
- */
- virtual int32_t getSortKey(const UChar *source, int32_t sourceLength,
- uint8_t *result, int32_t resultLength) const;
-
- /**
- * Determines the minimum strength that will be use in comparison or
- * transformation.
- * <p>E.g. with strength == SECONDARY, the tertiary difference is ignored
- * <p>E.g. with strength == PRIMARY, the secondary and tertiary difference
- * are ignored.
- * @return the current comparison level.
- * @see RuleBasedCollator#setStrength
- * @deprecated ICU 2.6 Use getAttribute(UCOL_STRENGTH...) instead
- */
- virtual ECollationStrength getStrength(void) const;
-
- /**
- * Sets the minimum strength to be used in comparison or transformation.
- * @see RuleBasedCollator#getStrength
- * @param newStrength the new comparison level.
- * @deprecated ICU 2.6 Use setAttribute(UCOL_STRENGTH...) instead
- */
- virtual void setStrength(ECollationStrength newStrength);
-
-private:
-
- // private static constants -----------------------------------------------
-
- enum {
- /* need look up in .commit() */
- CHARINDEX = 0x70000000,
- /* Expand index follows */
- EXPANDCHARINDEX = 0x7E000000,
- /* contract indexes follows */
- CONTRACTCHARINDEX = 0x7F000000,
- /* unmapped character values */
- UNMAPPED = 0xFFFFFFFF,
- /* primary strength increment */
- PRIMARYORDERINCREMENT = 0x00010000,
- /* secondary strength increment */
- SECONDARYORDERINCREMENT = 0x00000100,
- /* tertiary strength increment */
- TERTIARYORDERINCREMENT = 0x00000001,
- /* mask off anything but primary order */
- PRIMARYORDERMASK = 0xffff0000,
- /* mask off anything but secondary order */
- SECONDARYORDERMASK = 0x0000ff00,
- /* mask off anything but tertiary order */
- TERTIARYORDERMASK = 0x000000ff,
- /* mask off ignorable char order */
- IGNORABLEMASK = 0x0000ffff,
- /* use only the primary difference */
- PRIMARYDIFFERENCEONLY = 0xffff0000,
- /* use only the primary and secondary difference */
- SECONDARYDIFFERENCEONLY = 0xffffff00,
- /* primary order shift */
- PRIMARYORDERSHIFT = 16,
- /* secondary order shift */
- SECONDARYORDERSHIFT = 8,
- /* starting value for collation elements */
- COLELEMENTSTART = 0x02020202,
- /* testing mask for primary low element */
- PRIMARYLOWZEROMASK = 0x00FF0000,
- /* reseting value for secondaries and tertiaries */
- RESETSECONDARYTERTIARY = 0x00000202,
- /* reseting value for tertiaries */
- RESETTERTIARY = 0x00000002,
-
- PRIMIGNORABLE = 0x0202
- };
-
- // private data members ---------------------------------------------------
-
- UBool dataIsOwned;
-
- UBool isWriteThroughAlias;
-
- /**
- * c struct for collation. All initialisation for it has to be done through
- * setUCollator().
- */
- UCollator *ucollator;
-
- /**
- * Rule UnicodeString
- */
- UnicodeString urulestring;
-
- // friend classes --------------------------------------------------------
-
- /**
- * Used to iterate over collation elements in a character source.
- */
- friend class CollationElementIterator;
-
- /**
- * Collator ONLY needs access to RuleBasedCollator(const Locale&,
- * UErrorCode&)
- */
- friend class Collator;
-
- /**
- * Searching over collation elements in a character source
- */
- friend class StringSearch;
-
- // private constructors --------------------------------------------------
-
- /**
- * Default constructor
- */
- RuleBasedCollator();
-
- /**
- * RuleBasedCollator constructor. This constructor takes a locale. The
- * only caller of this class should be Collator::createInstance(). If
- * createInstance() happens to know that the requested locale's collation is
- * implemented as a RuleBasedCollator, it can then call this constructor.
- * OTHERWISE IT SHOULDN'T, since this constructor ALWAYS RETURNS A VALID
- * COLLATION TABLE. It does this by falling back to defaults.
- * @param desiredLocale locale used
- * @param status error code status
- */
- RuleBasedCollator(const Locale& desiredLocale, UErrorCode& status);
-
- /**
- * common constructor implementation
- *
- * @param rules the collation rules to build the collation table from.
- * @param collationStrength default strength for comparison
- * @param decompositionMode the normalisation mode
- * @param status reporting a success or an error.
- */
- void
- construct(const UnicodeString& rules,
- UColAttributeValue collationStrength,
- UColAttributeValue decompositionMode,
- UErrorCode& status);
-
- // private methods -------------------------------------------------------
-
- /**
- * Creates the c struct for ucollator
- * @param locale desired locale
- * @param status error status
- */
- void setUCollator(const Locale& locale, UErrorCode& status);
-
- /**
- * Creates the c struct for ucollator
- * @param locale desired locale name
- * @param status error status
- */
- void setUCollator(const char* locale, UErrorCode& status);
-
- /**
- * Creates the c struct for ucollator. This used internally by StringSearch.
- * Hence the responsibility of cleaning up the ucollator is not done by
- * this RuleBasedCollator. The isDataOwned flag is set to FALSE.
- * @param collator new ucollator data
- * @param rules corresponding collation rules
- */
- void setUCollator(UCollator *collator);
-
-public:
- /**
- * Get UCollator data struct. Used only by StringSearch & intltest.
- * @return UCollator data struct
- * @internal
- */
- const UCollator * getUCollator();
-
-protected:
- /**
- * Used internally by registraton to define the requested and valid locales.
- * @param requestedLocale the requsted locale
- * @param validLocale the valid locale
- * @param actualLocale the actual locale
- * @internal
- */
- virtual void setLocales(const Locale& requestedLocale, const Locale& validLocale, const Locale& actualLocale);
-
-private:
-
- // if not owned and not a write through alias, copy the ucollator
- void checkOwned(void);
-
- // utility to init rule string used by checkOwned and construct
- void setRuleStringFromCollator();
-
- /**
- * Converts C's UCollationResult to EComparisonResult
- * @param result member of the enum UComparisonResult
- * @return EComparisonResult equivalent of UCollationResult
- * @deprecated ICU 2.6. We will not need it.
- */
- Collator::EComparisonResult getEComparisonResult(
- const UCollationResult &result) const;
-
- /**
- * Converts C's UCollationStrength to ECollationStrength
- * @param strength member of the enum UCollationStrength
- * @return ECollationStrength equivalent of UCollationStrength
- */
- Collator::ECollationStrength getECollationStrength(
- const UCollationStrength &strength) const;
-
- /**
- * Converts C++'s ECollationStrength to UCollationStrength
- * @param strength member of the enum ECollationStrength
- * @return UCollationStrength equivalent of ECollationStrength
- */
- UCollationStrength getUCollationStrength(
- const Collator::ECollationStrength &strength) const;
-};
-
-// inline method implementation ---------------------------------------------
-
-inline void RuleBasedCollator::setUCollator(const Locale &locale,
- UErrorCode &status)
-{
- setUCollator(locale.getName(), status);
-}
-
-
-inline void RuleBasedCollator::setUCollator(UCollator *collator)
-{
-
- if (ucollator && dataIsOwned) {
- ucol_close(ucollator);
- }
- ucollator = collator;
- dataIsOwned = FALSE;
- isWriteThroughAlias = TRUE;
- setRuleStringFromCollator();
-}
-
-inline const UCollator * RuleBasedCollator::getUCollator()
-{
- return ucollator;
-}
-
-inline Collator::EComparisonResult RuleBasedCollator::getEComparisonResult(
- const UCollationResult &result) const
-{
- switch (result)
- {
- case UCOL_LESS :
- return Collator::LESS;
- case UCOL_EQUAL :
- return Collator::EQUAL;
- default :
- return Collator::GREATER;
- }
-}
-
-inline Collator::ECollationStrength RuleBasedCollator::getECollationStrength(
- const UCollationStrength &strength) const
-{
- switch (strength)
- {
- case UCOL_PRIMARY :
- return Collator::PRIMARY;
- case UCOL_SECONDARY :
- return Collator::SECONDARY;
- case UCOL_TERTIARY :
- return Collator::TERTIARY;
- case UCOL_QUATERNARY :
- return Collator::QUATERNARY;
- default :
- return Collator::IDENTICAL;
- }
-}
-
-inline UCollationStrength RuleBasedCollator::getUCollationStrength(
- const Collator::ECollationStrength &strength) const
-{
- switch (strength)
- {
- case Collator::PRIMARY :
- return UCOL_PRIMARY;
- case Collator::SECONDARY :
- return UCOL_SECONDARY;
- case Collator::TERTIARY :
- return UCOL_TERTIARY;
- case Collator::QUATERNARY :
- return UCOL_QUATERNARY;
- default :
- return UCOL_IDENTICAL;
- }
-}
-
-U_NAMESPACE_END
-
-#endif /* #if !UCONFIG_NO_COLLATION */
-
-#endif