You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@directory.apache.org by el...@apache.org on 2016/05/09 17:22:21 UTC
svn commit: r1743011 [9/13] - in /directory/shared/branches/shared-value:
dsml/parser/src/main/java/org/apache/directory/api/dsmlv2/request/
dsml/parser/src/main/java/org/apache/directory/api/dsmlv2/response/
dsml/parser/src/test/java/org/apache/direct...
Modified: directory/shared/branches/shared-value/ldap/model/src/main/java/org/apache/directory/api/ldap/model/schema/PrepareString.java
URL: http://svn.apache.org/viewvc/directory/shared/branches/shared-value/ldap/model/src/main/java/org/apache/directory/api/ldap/model/schema/PrepareString.java?rev=1743011&r1=1743010&r2=1743011&view=diff
==============================================================================
--- directory/shared/branches/shared-value/ldap/model/src/main/java/org/apache/directory/api/ldap/model/schema/PrepareString.java (original)
+++ directory/shared/branches/shared-value/ldap/model/src/main/java/org/apache/directory/api/ldap/model/schema/PrepareString.java Mon May 9 17:22:19 2016
@@ -17,11 +17,10 @@
* under the License.
*
*/
-
package org.apache.directory.api.ldap.model.schema;
-import java.io.IOException;
+import java.text.Normalizer;
import org.apache.directory.api.util.Strings;
import org.apache.directory.api.util.exception.InvalidCharacterException;
@@ -35,142 +34,37 @@ import org.apache.directory.api.util.exc
*/
public final class PrepareString
{
+ private enum NormStateEnum
+ {
+ START,
+ INITIAL_CHAR,
+ INITIAL_SPACES,
+ SPACES,
+ CHARS,
+ SPACE_CHAR,
+ END
+ }
+
/** A flag used to lowercase chars during the map process */
- private static final boolean CASE_SENSITIVE = true;
+ public static final boolean CASE_SENSITIVE = true;
/** A flag used to keep casing during the map process */
- private static final boolean IGNORE_CASE = false;
-
- /** All the possible combining marks */
- private static final char[][] COMBINING_MARKS = new char[][]
- {
- { 0x0300, 0x034F },
- { 0x0360, 0x036F },
- { 0x0483, 0x0486 },
- { 0x0488, 0x0489 },
- { 0x0591, 0x05A1 },
- { 0x05A3, 0x05B9 },
- { 0x05BB, 0x05BC },
- { 0x05BF, 0x05BF },
- { 0x05C1, 0x05C2 },
- { 0x05C4, 0x05C4 },
- { 0x064B, 0x0655 },
- { 0x0670, 0x0670 },
- { 0x06D6, 0x06DC },
- { 0x06DE, 0x06E4 },
- { 0x06E7, 0x06E8 },
- { 0x06EA, 0x06ED },
- { 0x0711, 0x0711 },
- { 0x0730, 0x074A },
- { 0x07A6, 0x07B0 },
- { 0x0901, 0x0903 },
- { 0x093C, 0x093C },
- { 0x093E, 0x094F },
- { 0x0951, 0x0954 },
- { 0x0962, 0x0963 },
- { 0x0981, 0x0983 },
- { 0x09BC, 0x09BC },
- { 0x09BE, 0x09C4 },
- { 0x09C7, 0x09C8 },
- { 0x09CB, 0x09CD },
- { 0x09D7, 0x09D7 },
- { 0x09E2, 0x09E3 },
- { 0x0A02, 0x0A02 },
- { 0x0A3C, 0x0A3C },
- { 0x0A3E, 0x0A42 },
- { 0x0A47, 0x0A48 },
- { 0x0A4B, 0x0A4D },
- { 0x0A70, 0x0A71 },
- { 0x0A81, 0x0A83 },
- { 0x0ABC, 0x0ABC },
- { 0x0ABE, 0x0AC5 },
- { 0x0AC7, 0x0AC9 },
- { 0x0ACB, 0x0ACD },
- { 0x0B01, 0x0B03 },
- { 0x0B3C, 0x0B3C },
- { 0x0B3E, 0x0B43 },
- { 0x0B47, 0x0B48 },
- { 0x0B4B, 0x0B4D },
- { 0x0B56, 0x0B57 },
- { 0x0B82, 0x0B82 },
- { 0x0BBE, 0x0BC2 },
- { 0x0BC6, 0x0BC8 },
- { 0x0BCA, 0x0BCD },
- { 0x0BD7, 0x0BD7 },
- { 0x0C01, 0x0C03 },
- { 0x0C3E, 0x0C44 },
- { 0x0C46, 0x0C48 },
- { 0x0C4A, 0x0C4D },
- { 0x0C55, 0x0C56 },
- { 0x0C82, 0x0C83 },
- { 0x0CBE, 0x0CC4 },
- { 0x0CC6, 0x0CC8 },
- { 0x0CCA, 0x0CCD },
- { 0x0CD5, 0x0CD6 },
- { 0x0D02, 0x0D03 },
- { 0x0D3E, 0x0D43 },
- { 0x0D46, 0x0D48 },
- { 0x0D4A, 0x0D4D },
- { 0x0D57, 0x0D57 },
- { 0x0D82, 0x0D83 },
- { 0x0DCA, 0x0DCA },
- { 0x0DCF, 0x0DD4 },
- { 0x0DD6, 0x0DD6 },
- { 0x0DD8, 0x0DDF },
- { 0x0DF2, 0x0DF3 },
- { 0x0E31, 0x0E31 },
- { 0x0E34, 0x0E3A },
- { 0x0E47, 0x0E4E },
- { 0x0EB1, 0x0EB1 },
- { 0x0EB4, 0x0EB9 },
- { 0x0EBB, 0x0EBC },
- { 0x0EC8, 0x0ECD },
- { 0x0F18, 0x0F19 },
- { 0x0F35, 0x0F35 },
- { 0x0F37, 0x0F37 },
- { 0x0F39, 0x0F39 },
- { 0x0F3E, 0x0F3F },
- { 0x0F71, 0x0F84 },
- { 0x0F86, 0x0F87 },
- { 0x0F90, 0x0F97 },
- { 0x0F99, 0x0FBC },
- { 0x0FC6, 0x0FC6 },
- { 0x102C, 0x1032 },
- { 0x1036, 0x1039 },
- { 0x1056, 0x1059 },
- { 0x1712, 0x1714 },
- { 0x1732, 0x1734 },
- { 0x1752, 0x1753 },
- { 0x1772, 0x1773 },
- { 0x17B4, 0x17D3 },
- { 0x180B, 0x180D },
- { 0x18A9, 0x18A9 },
- { 0x20D0, 0x20EA },
- { 0x302A, 0x302F },
- { 0x3099, 0x309A },
- { 0xFB1E, 0xFB1E },
- { 0xFE00, 0xFE0F },
- { 0xFE20, 0xFE23 }
- };
+ public static final boolean IGNORE_CASE = false;
/**
- * The type of String we have to normalize
+ * The type of Assertion we have to normalize
*/
- public enum StringType
+ public enum AssertionType
{
- NOT_STRING,
- NUMERIC_STRING,
- CASE_EXACT,
- CASE_EXACT_IA5,
- CASE_IGNORE_IA5,
- CASE_IGNORE_LIST,
- CASE_IGNORE,
- DIRECTORY_STRING,
- TELEPHONE_NUMBER,
- WORD
+ SUBSTRING_INITIAL, // The INITIAL part of a substring assertion value
+ SUBSTRING_ANY, // The ANY part of a substring assertion value
+ SUBSTRING_FINAL, // The FINAL part of a substring assertion value
+ ATTRIBUTE_VALUE // An Attribute Value
}
-
-
+
+ /** An exception used to get out of the map method quickly */
+ private static final ArrayIndexOutOfBoundsException AIOOBE = new ArrayIndexOutOfBoundsException();
+
/**
* A private constructor, to avoid instance creation of this static class.
*/
@@ -181,110 +75,63 @@ public final class PrepareString
/**
- * Tells if a char is a combining mark.
- *
- * @param c The char to check
- * @return true if the char is a combining mark, false otherwise
+ * The first step defined by RFC 4518 : Transcode, which transform an
+ * UTF-8 encoded String to Unicode. This is done using the {@link Strings#utf8ToString}
+ * method. This
+ */
+ public static String transcode( byte[] bytes )
+ {
+ return Strings.utf8ToString( bytes );
+ }
+
+
+ /**
+ * Normalize a String
+ *
+ * @param value the value to normalize
+ * @return The normalized value
*/
- private static boolean isCombiningMark( char c )
+ public static String normalize( String value )
{
- if ( c < COMBINING_MARKS[0][0] )
+ if ( !Normalizer.isNormalized( value, Normalizer.Form.NFKC ) )
{
- return false;
+ return Normalizer.normalize( value, Normalizer.Form.NFKC );
}
-
- for ( char[] interval : COMBINING_MARKS )
+ else
{
- if ( ( c >= interval[0] ) && ( c <= interval[1] ) )
- {
- return true;
- }
+ return value;
}
-
- return false;
}
-
-
+
+
/**
- *
- * We have to go through 6 steps :
- *
- * 1) Transcode
- * 2) Map
- * 3) Normalize
- * 4) Prohibit
- * 5) Bidi
- * 6) Insignifiant Character Handling
- *
- * The first step is already done, the step (3) is not done.
- *
- * @param str The String to normalize
- * @param type The string type
- * @return A normalized string.
- * @throws IOException
- */
- public static String normalize( String str, StringType type ) throws IOException
+ * Apply the RFC 4518 MAP transformation, case sensitive
+ *
+ * @param unicode The original String
+ * @return The mapped String
+ */
+ public static String mapCaseSensitive( String unicode )
{
- switch ( type )
+ try
{
- case NUMERIC_STRING:
- return insignifiantCharNumericString( str );
-
- case TELEPHONE_NUMBER:
- return insignifiantCharTelephoneNumber( str );
-
- case CASE_EXACT:
- case CASE_EXACT_IA5:
- case DIRECTORY_STRING:
- try
- {
- return insignifiantSpacesStringAscii( str, CASE_SENSITIVE );
- }
- catch ( Exception e )
- {
- return insignifiantSpacesString( str, CASE_SENSITIVE );
- }
-
- case CASE_IGNORE_IA5:
- case CASE_IGNORE_LIST:
- case CASE_IGNORE:
- try
- {
- return insignifiantSpacesStringAscii( str, IGNORE_CASE );
- }
- catch ( Exception e )
- {
- return insignifiantSpacesString( str, IGNORE_CASE );
- }
-
- case WORD:
- return str;
-
- default:
- return str;
-
+ return mapCaseSensitiveAscii( unicode );
+ }
+ catch ( ArrayIndexOutOfBoundsException aioobe )
+ {
+ // There
}
- }
+ char[] source = unicode.toCharArray();
+
+ // Create a target char array which is 3 times bigger than the original size.
+ // We have to do that because the map phase may transform a char to
+ // three chars.
+ // TODO : we have to find a way to prevent this waste of space.
+ char[] target = new char[unicode.length() * 3 + 2];
- /**
- * Execute the mapping step of the string preparation :
- * - suppress useless chars
- * - transform to spaces
- * - lowercase
- *
- * @param c The char to map
- * @param array The array which will collect the transformed char
- * @param pos The current position in the target
- * @param lowerCase A mask to lowercase the char, if necessary
- * @return The transformed StringBuilder
- */
- // CHECKSTYLE:OFF
- private static int map( char[] src, char[] target, char lowerCase )
- {
int limit = 0;
- for ( char c : src )
+ for ( char c : source )
{
switch ( c )
{
@@ -297,6 +144,9 @@ public final class PrepareString
case 0x0006:
case 0x0007:
case 0x0008:
+ // All other control code (e.g., Cc) points or code points with a
+ // control function (e.g., Cf) are mapped to nothing. The following is
+ // a complete list of these code points: U+0000-0008...
break;
case 0x0009:
@@ -304,7 +154,10 @@ public final class PrepareString
case 0x000B:
case 0x000C:
case 0x000D:
- target[limit++] = ( char ) 0x20;
+ // CHARACTER TABULATION (U+0009), LINE FEED (LF) (U+000A), LINE
+ // TABULATION (U+000B), FORM FEED (FF) (U+000C), CARRIAGE RETURN (CR)
+ // (U+000D), ... are mapped to SPACE (U+0020).
+ target[limit++] = 0x0020;
break;
case 0x000E:
@@ -325,6 +178,9 @@ public final class PrepareString
case 0x001D:
case 0x001E:
case 0x001F:
+ // All other control code (e.g., Cc) points or code points with a
+ // control function (e.g., Cf) are mapped to nothing. The following is
+ // a complete list of these code points: ... U+000E-001F...
break;
case 0x0041:
@@ -353,7 +209,9 @@ public final class PrepareString
case 0x0058:
case 0x0059:
case 0x005A:
- target[limit++] = ( char ) ( c | lowerCase );
+ // For case ignore, numeric, and stored prefix string matching rules,
+ // characters are case folded per B.2 of [RFC3454] : U+0041-005A
+ target[limit++] = c;
break;
case 0x007F:
@@ -362,10 +220,14 @@ public final class PrepareString
case 0x0082:
case 0x0083:
case 0x0084:
+ // All other control code (e.g., Cc) points or code points with a
+ // control function (e.g., Cf) are mapped to nothing. The following is
+ // a complete list of these code points: ... U+007F-0084...
break;
case 0x0085:
- target[limit] = ( char ) 0x20;
+ // ... and NEXT LINE (NEL) (U+0085) are mapped to SPACE (U+0020).
+ target[limit++] = 0x0020;
break;
case 0x0086:
@@ -394,17 +256,30 @@ public final class PrepareString
case 0x009D:
case 0x009E:
case 0x009F:
+ // All other control code (e.g., Cc) points or code points with a
+ // control function (e.g., Cf) are mapped to nothing. The following is
+ // a complete list of these code points: ... U+0086-009F...
break;
case 0x00A0:
- target[limit++] = ( char ) 0x20;
+ // All other code points with Separator (space, line, or paragraph) property
+ // (e.g., Zs, Zl, or Zp) are mapped to SPACE (U+0020). The following is a complete
+ // list of these code points: ... 00A0 ...
+ target[limit++] = 0x0020;
break;
case 0x00AD:
+ // SOFT HYPHEN (U+00AD) and MONGOLIAN TODO SOFT HYPHEN (U+1806) code
+ // points are mapped to nothing. COMBINING GRAPHEME JOINER (U+034F) and
+ // VARIATION SELECTORs (U+180B-180D, FF00-FE0F) code points are also
+ // mapped to nothing. The OBJECT REPLACEMENT CHARACTER (U+FFFC) is
+ // mapped to nothing.
break;
case 0x00B5:
- target[limit++] = ( char ) 0x03BC;
+ // For case ignore, numeric, and stored prefix string matching rules,
+ // characters are case folded per B.2 of [RFC3454] : U+00B5
+ target[limit++] = 0x03BC;
break;
case 0x00C0:
@@ -429,6 +304,7 @@ public final class PrepareString
case 0x00D3:
case 0x00D4:
case 0x00D5:
+ // no 0x00D7
case 0x00D6:
case 0x00D8:
case 0x00D9:
@@ -437,2789 +313,1466 @@ public final class PrepareString
case 0x00DC:
case 0x00DD:
case 0x00DE:
- target[limit++] = ( char ) ( c | lowerCase );
+ // For case ignore, numeric, and stored prefix string matching rules,
+ // characters are case folded per B.2 of [RFC3454] : U+00C0-00D6,
+ // U+00D8-00DE
+ target[limit++] = c;
break;
case 0x00DF:
- target[limit++] = ( char ) 0x0073;
- target[limit++] = ( char ) 0x0073;
+ // For case ignore, numeric, and stored prefix string matching rules,
+ // characters are case folded per B.2 of [RFC3454] : U+00DF
+ target[limit++] = 0x0073;
+ target[limit++] = 0x0073;
break;
case 0x0100:
- target[limit++] = ( char ) 0x0101;
- break;
-
case 0x0102:
- target[limit++] = ( char ) 0x0103;
- break;
-
case 0x0104:
- target[limit++] = 0x0105;
- break;
-
case 0x0106:
- target[limit++] = 0x0107;
- break;
-
case 0x0108:
- target[limit++] = 0x0109;
- break;
-
case 0x010A:
- target[limit++] = 0x010B;
- break;
-
case 0x010C:
- target[limit++] = 0x010D;
- break;
-
case 0x010E:
- target[limit++] = 0x010F;
- break;
-
case 0x0110:
- target[limit++] = 0x0111;
- break;
-
case 0x0112:
- target[limit++] = 0x0113;
- break;
-
case 0x0114:
- target[limit++] = 0x0115;
- break;
-
case 0x0116:
- target[limit++] = 0x0117;
- break;
-
case 0x0118:
- target[limit++] = 0x0119;
- break;
-
case 0x011A:
- target[limit++] = 0x011B;
- break;
-
case 0x011C:
- target[limit++] = 0x011D;
- break;
-
case 0x011E:
- target[limit++] = 0x011F;
- break;
-
case 0x0120:
- target[limit++] = 0x0121;
- break;
-
case 0x0122:
- target[limit++] = 0x0123;
- break;
-
case 0x0124:
- target[limit++] = 0x0125;
- break;
-
case 0x0126:
- target[limit++] = 0x0127;
- break;
-
case 0x0128:
- target[limit++] = 0x0129;
- break;
-
case 0x012A:
- target[limit++] = 0x012B;
- break;
-
case 0x012C:
- target[limit++] = 0x012D;
- break;
-
case 0x012E:
- target[limit++] = 0x012F;
+ // For case ignore, numeric, and stored prefix string matching rules,
+ // characters are case folded per B.2 of [RFC3454] : U+0100-012E
+ target[limit++] = ( char ) ( c + 0x0001 );
break;
case 0x0130:
+ // For case ignore, numeric, and stored prefix string matching rules,
+ // characters are case folded per B.2 of [RFC3454] : U+0130
target[limit++] = 0x0069;
target[limit++] = 0x0307;
break;
case 0x0132:
- target[limit++] = 0x0133;
- break;
-
case 0x0134:
- target[limit++] = 0x0135;
- break;
-
case 0x0136:
- target[limit++] = 0x0137;
- break;
-
case 0x0139:
- target[limit++] = 0x013A;
- break;
-
case 0x013B:
- target[limit++] = 0x013C;
- break;
-
case 0x013D:
- target[limit++] = 0x013E;
- break;
-
case 0x013F:
- target[limit++] = 0x0140;
- break;
-
case 0x0141:
- target[limit++] = 0x0142;
- break;
-
case 0x0143:
- target[limit++] = 0x0144;
- break;
-
case 0x0145:
- target[limit++] = 0x0146;
- break;
-
case 0x0147:
- target[limit++] = 0x0148;
+ // For case ignore, numeric, and stored prefix string matching rules,
+ // characters are case folded per B.2 of [RFC3454] : U+0132-0147
+ target[limit++] = ( char ) ( c + 0x0001 );
break;
case 0x0149:
+ // For case ignore, numeric, and stored prefix string matching rules,
+ // characters are case folded per B.2 of [RFC3454] : U+0149
target[limit++] = 0x02BC;
target[limit++] = 0x006E;
break;
case 0x014A:
- target[limit++] = 0x014B;
- break;
-
case 0x014C:
- target[limit++] = 0x014D;
- break;
-
case 0x014E:
- target[limit++] = 0x014F;
- break;
-
case 0x0150:
- target[limit++] = 0x0151;
- break;
-
case 0x0152:
- target[limit++] = 0x0153;
- break;
-
case 0x0154:
- target[limit++] = 0x0155;
- break;
-
case 0x0156:
- target[limit++] = 0x0157;
- break;
-
case 0x0158:
- target[limit++] = 0x0159;
- break;
-
case 0x015A:
- target[limit++] = 0x015B;
- break;
-
case 0x015C:
- target[limit++] = 0x015D;
- break;
-
case 0x015E:
- target[limit++] = 0x015F;
- break;
-
case 0x0160:
- target[limit++] = 0x0161;
- break;
-
case 0x0162:
- target[limit++] = 0x0163;
- break;
-
case 0x0164:
- target[limit++] = 0x0165;
- break;
-
case 0x0166:
- target[limit++] = 0x0167;
- break;
-
case 0x0168:
- target[limit++] = 0x0169;
- break;
-
case 0x016A:
- target[limit++] = 0x016B;
- break;
-
case 0x016C:
- target[limit++] = 0x016D;
- break;
-
case 0x016E:
- target[limit++] = 0x016F;
- break;
-
case 0x0170:
- target[limit++] = 0x0171;
- break;
-
case 0x0172:
- target[limit++] = 0x0173;
- break;
-
case 0x0174:
- target[limit++] = 0x0175;
- break;
-
case 0x0176:
- target[limit++] = 0x0177;
+ // For case ignore, numeric, and stored prefix string matching rules,
+ // characters are case folded per B.2 of [RFC3454] : U+0141-0176
+ target[limit++] = ( char ) ( c + 0x0001 );
break;
case 0x0178:
+ // For case ignore, numeric, and stored prefix string matching rules,
+ // characters are case folded per B.2 of [RFC3454] : U+0178
target[limit++] = 0x00FF;
break;
case 0x0179:
- target[limit++] = 0x017A;
- break;
-
case 0x017B:
- target[limit++] = 0x017C;
- break;
-
case 0x017D:
- target[limit++] = 0x017E;
+ // For case ignore, numeric, and stored prefix string matching rules,
+ // characters are case folded per B.2 of [RFC3454] : U+0179-017D
+ target[limit++] = ( char ) ( c + 0x0001 );
break;
case 0x017F:
+ // For case ignore, numeric, and stored prefix string matching rules,
+ // characters are case folded per B.2 of [RFC3454] : U+017F
target[limit++] = 0x0073;
break;
case 0x0181:
+ // For case ignore, numeric, and stored prefix string matching rules,
+ // characters are case folded per B.2 of [RFC3454] : U+0181
target[limit++] = 0x0253;
break;
case 0x0182:
- target[limit++] = 0x0183;
- break;
-
case 0x0184:
- target[limit++] = 0x0185;
+ // For case ignore, numeric, and stored prefix string matching rules,
+ // characters are case folded per B.2 of [RFC3454] : U+0182, U+0x0184
+ target[limit++] = ( char ) ( c + 0x0001 );
break;
case 0x0186:
+ // For case ignore, numeric, and stored prefix string matching rules,
+ // characters are case folded per B.2 of [RFC3454] : U+0186
target[limit++] = 0x0254;
break;
case 0x0187:
+ // For case ignore, numeric, and stored prefix string matching rules,
+ // characters are case folded per B.2 of [RFC3454] : U+0188
target[limit++] = 0x0188;
break;
case 0x0189:
- target[limit++] = 0x0256;
- break;
-
case 0x018A:
- target[limit++] = 0x0257;
+ // For case ignore, numeric, and stored prefix string matching rules,
+ // characters are case folded per B.2 of [RFC3454] : U+0189, U+018A
+ target[limit++] = ( char ) ( c + 0x00CD );
break;
case 0x018B:
+ // For case ignore, numeric, and stored prefix string matching rules,
+ // characters are case folded per B.2 of [RFC3454] : U+018B
target[limit++] = 0x018C;
break;
case 0x018E:
+ // For case ignore, numeric, and stored prefix string matching rules,
+ // characters are case folded per B.2 of [RFC3454] : U+018E
target[limit++] = 0x01DD;
break;
case 0x018F:
+ // For case ignore, numeric, and stored prefix string matching rules,
+ // characters are case folded per B.2 of [RFC3454] : U+018F
target[limit++] = 0x0259;
break;
case 0x0190:
+ // For case ignore, numeric, and stored prefix string matching rules,
+ // characters are case folded per B.2 of [RFC3454] : U+0190
target[limit++] = 0x025B;
break;
case 0x0191:
+ // For case ignore, numeric, and stored prefix string matching rules,
+ // characters are case folded per B.2 of [RFC3454] : U+0191
target[limit++] = 0x0192;
break;
case 0x0193:
+ // For case ignore, numeric, and stored prefix string matching rules,
+ // characters are case folded per B.2 of [RFC3454] : U+0193
target[limit++] = 0x0260;
break;
case 0x0194:
+ // For case ignore, numeric, and stored prefix string matching rules,
+ // characters are case folded per B.2 of [RFC3454] : U+0194
target[limit++] = 0x0263;
break;
case 0x0196:
+ // For case ignore, numeric, and stored prefix string matching rules,
+ // characters are case folded per B.2 of [RFC3454] : U+0196
target[limit++] = 0x0269;
break;
case 0x0197:
+ // For case ignore, numeric, and stored prefix string matching rules,
+ // characters are case folded per B.2 of [RFC3454] : U+0197
target[limit++] = 0x0268;
break;
case 0x0198:
+ // For case ignore, numeric, and stored prefix string matching rules,
+ // characters are case folded per B.2 of [RFC3454] : U+0198
target[limit++] = 0x0199;
break;
case 0x019C:
+ // For case ignore, numeric, and stored prefix string matching rules,
+ // characters are case folded per B.2 of [RFC3454] : U+019C
target[limit++] = 0x026F;
break;
case 0x019D:
+ // For case ignore, numeric, and stored prefix string matching rules,
+ // characters are case folded per B.2 of [RFC3454] : U+019D
target[limit++] = 0x0272;
break;
case 0x019F:
+ // For case ignore, numeric, and stored prefix string matching rules,
+ // characters are case folded per B.2 of [RFC3454] : U+019F
target[limit++] = 0x0275;
break;
case 0x01A0:
- target[limit++] = 0x01A1;
- break;
-
case 0x01A2:
- target[limit++] = 0x01A3;
- break;
-
case 0x01A4:
- target[limit++] = 0x01A5;
+ // For case ignore, numeric, and stored prefix string matching rules,
+ // characters are case folded per B.2 of [RFC3454] : U+01A0-U+01A4
+ target[limit++] = ( char ) ( c + 0x0001 );
break;
case 0x01A6:
+ // For case ignore, numeric, and stored prefix string matching rules,
+ // characters are case folded per B.2 of [RFC3454] : U+01A6
target[limit++] = 0x0280;
break;
case 0x01A7:
+ // For case ignore, numeric, and stored prefix string matching rules,
+ // characters are case folded per B.2 of [RFC3454] : U+01A7
target[limit++] = 0x01A8;
break;
case 0x01A9:
+ // For case ignore, numeric, and stored prefix string matching rules,
+ // characters are case folded per B.2 of [RFC3454] : U+01A9
target[limit++] = 0x0283;
break;
case 0x01AC:
+ // For case ignore, numeric, and stored prefix string matching rules,
+ // characters are case folded per B.2 of [RFC3454] : U+01AC
target[limit++] = 0x01AD;
break;
case 0x01AE:
+ // For case ignore, numeric, and stored prefix string matching rules,
+ // characters are case folded per B.2 of [RFC3454] : U+01AE
target[limit++] = 0x0288;
break;
case 0x01AF:
+ // For case ignore, numeric, and stored prefix string matching rules,
+ // characters are case folded per B.2 of [RFC3454] : U+01AF
target[limit++] = 0x01B0;
break;
case 0x01B1:
- target[limit++] = 0x028A;
- break;
-
case 0x01B2:
- target[limit++] = 0x028B;
+ // For case ignore, numeric, and stored prefix string matching rules,
+ // characters are case folded per B.2 of [RFC3454] : U+01AF, U+01B2
+ target[limit++] = ( char ) ( c + 0x00D9 );
break;
case 0x01B3:
- target[limit++] = 0x01B4;
- break;
-
case 0x01B5:
- target[limit++] = 0x01B6;
+ // For case ignore, numeric, and stored prefix string matching rules,
+ // characters are case folded per B.2 of [RFC3454] : U+01B3, U+01B5
+ target[limit++] = ( char ) ( c + 0x0001 );
break;
case 0x01B7:
+ // For case ignore, numeric, and stored prefix string matching rules,
+ // characters are case folded per B.2 of [RFC3454] : U+01B7
target[limit++] = 0x0292;
break;
case 0x01B8:
- target[limit++] = 0x01B9;
- break;
-
case 0x01BC:
- target[limit++] = 0x01BD;
+ // For case ignore, numeric, and stored prefix string matching rules,
+ // characters are case folded per B.2 of [RFC3454] : U+01B8, U+01BC
+ target[limit++] = ( char ) ( c + 0x0001 );
break;
case 0x01C4:
- target[limit++] = 0x01C6;
- break;
-
- case 0x01C5:
+ // For case ignore, numeric, and stored prefix string matching rules,
+ // characters are case folded per B.2 of [RFC3454] : U+01C4,U+01C5
target[limit++] = 0x01C6;
break;
case 0x01C7:
- target[limit++] = 0x01C9;
- break;
-
- case 0x01C8:
+ // For case ignore, numeric, and stored prefix string matching rules,
+ // characters are case folded per B.2 of [RFC3454] : U+01C7,U+01C8
target[limit++] = 0x01C9;
break;
case 0x01CA:
- target[limit++] = 0x01CC;
- break;
-
case 0x01CB:
+ // For case ignore, numeric, and stored prefix string matching rules,
+ // characters are case folded per B.2 of [RFC3454] : U+01CA,U+01CB
target[limit++] = 0x01CC;
break;
case 0x01CD:
- target[limit++] = 0x01CE;
- break;
-
case 0x01CF:
- target[limit++] = 0x01D0;
- break;
-
case 0x01D1:
- target[limit++] = 0x01D2;
- break;
-
case 0x01D3:
- target[limit++] = 0x01D4;
- break;
-
case 0x01D5:
- target[limit++] = 0x01D6;
- break;
-
case 0x01D7:
- target[limit++] = 0x01D8;
- break;
-
case 0x01D9:
- target[limit++] = 0x01DA;
- break;
-
case 0x01DB:
- target[limit++] = 0x01DC;
- break;
-
case 0x01DE:
- target[limit++] = 0x01DF;
- break;
-
case 0x01E0:
- target[limit++] = 0x01E1;
- break;
-
case 0x01E2:
- target[limit++] = 0x01E3;
- break;
-
case 0x01E4:
- target[limit++] = 0x01E5;
- break;
-
case 0x01E6:
- target[limit++] = 0x01E7;
- break;
-
case 0x01E8:
- target[limit++] = 0x01E9;
- break;
-
case 0x01EA:
- target[limit++] = 0x01EB;
- break;
-
case 0x01EC:
- target[limit++] = 0x01ED;
- break;
-
case 0x01EE:
- target[limit++] = 0x01EF;
+ // For case ignore, numeric, and stored prefix string matching rules,
+ // characters are case folded per B.2 of [RFC3454] : U+01CD, U+01EE
+ target[limit++] = ( char ) ( c + 0x0001 );
break;
case 0x01F0:
+ // For case ignore, numeric, and stored prefix string matching rules,
+ // characters are case folded per B.2 of [RFC3454] : U+01F0
target[limit++] = 0x006A;
target[limit++] = 0x030C;
break;
case 0x01F1:
- target[limit++] = 0x01F3;
- break;
-
case 0x01F2:
+ // For case ignore, numeric, and stored prefix string matching rules,
+ // characters are case folded per B.2 of [RFC3454] : U+01F1, U+01F2
target[limit++] = 0x01F3;
break;
case 0x01F4:
+ // For case ignore, numeric, and stored prefix string matching rules,
+ // characters are case folded per B.2 of [RFC3454] : U+01F4
target[limit++] = 0x01F5;
break;
case 0x01F6:
+ // For case ignore, numeric, and stored prefix string matching rules,
+ // characters are case folded per B.2 of [RFC3454] : U+01F6
target[limit++] = 0x0195;
break;
case 0x01F7:
+ // For case ignore, numeric, and stored prefix string matching rules,
+ // characters are case folded per B.2 of [RFC3454] : U+01F7
target[limit++] = 0x01BF;
break;
case 0x01F8:
- target[limit++] = 0x01F9;
- break;
-
case 0x01FA:
- target[limit++] = 0x01FB;
- break;
-
case 0x01FC:
- target[limit++] = 0x01FD;
- break;
-
case 0x01FE:
- target[limit++] = 0x01FF;
- break;
-
case 0x0200:
- target[limit++] = 0x0201;
- break;
-
case 0x0202:
- target[limit++] = 0x0203;
- break;
-
case 0x0204:
- target[limit++] = 0x0205;
- break;
-
case 0x0206:
- target[limit++] = 0x0207;
- break;
-
case 0x0208:
- target[limit++] = 0x0209;
- break;
-
case 0x020A:
- target[limit++] = 0x020B;
- break;
-
case 0x020C:
- target[limit++] = 0x020D;
- break;
-
case 0x020E:
- target[limit++] = 0x020F;
- break;
-
case 0x0210:
- target[limit++] = 0x0211;
- break;
-
case 0x0212:
- target[limit++] = 0x0213;
- break;
-
case 0x0214:
- target[limit++] = 0x0215;
- break;
-
case 0x0216:
- target[limit++] = 0x0217;
- break;
-
case 0x0218:
- target[limit++] = 0x0219;
- break;
-
case 0x021A:
- target[limit++] = 0x021B;
- break;
-
case 0x021C:
- target[limit++] = 0x021D;
- break;
-
case 0x021E:
- target[limit++] = 0x021F;
+ // For case ignore, numeric, and stored prefix string matching rules,
+ // characters are case folded per B.2 of [RFC3454] : U+01F8-U+021E
+ target[limit++] = ( char ) ( c + 0x0001 );
break;
+
case 0x0220:
+ // For case ignore, numeric, and stored prefix string matching rules,
+ // characters are case folded per B.2 of [RFC3454] : U+0220
target[limit++] = 0x019E;
break;
case 0x0222:
- target[limit++] = 0x0223;
- break;
-
case 0x0224:
- target[limit++] = 0x0225;
- break;
-
case 0x0226:
- target[limit++] = 0x0227;
- break;
-
case 0x0228:
- target[limit++] = 0x0229;
- break;
-
case 0x022A:
- target[limit++] = 0x022B;
- break;
-
case 0x022C:
- target[limit++] = 0x022D;
- break;
-
case 0x022E:
- target[limit++] = 0x022F;
- break;
-
case 0x0230:
- target[limit++] = 0x0231;
- break;
-
case 0x0232:
- target[limit++] = 0x0233;
+ // For case ignore, numeric, and stored prefix string matching rules,
+ // characters are case folded per B.2 of [RFC3454] : U+0222-U+0232
+ target[limit++] = ( char ) ( c + 0x0001 );
break;
case 0x0345:
+ // For case ignore, numeric, and stored prefix string matching rules,
+ // characters are case folded per B.2 of [RFC3454] : U+0220
target[limit++] = 0x03B9;
break;
case 0x034F:
+ // SOFT HYPHEN (U+00AD) and MONGOLIAN TODO SOFT HYPHEN (U+1806) code
+ // points are mapped to nothing. COMBINING GRAPHEME JOINER (U+034F) and
+ // VARIATION SELECTORs (U+180B-180D, FF00-FE0F) code points are also
+ // mapped to nothing. The OBJECT REPLACEMENT CHARACTER (U+FFFC) is
+ // mapped to nothing.
break;
case 0x037A:
+ // For case ignore, numeric, and stored prefix string matching rules,
+ // characters are case folded per B.2 of [RFC3454] : U+037A
target[limit++] = 0x0020;
target[limit++] = 0x03B9;
break;
case 0x0386:
+ // For case ignore, numeric, and stored prefix string matching rules,
+ // characters are case folded per B.2 of [RFC3454] : U+0386
target[limit++] = 0x03AC;
break;
case 0x0388:
- target[limit++] = 0x03AD;
- break;
-
case 0x0389:
- target[limit++] = 0x03AE;
- break;
-
case 0x038A:
- target[limit++] = 0x03AF;
+ // For case ignore, numeric, and stored prefix string matching rules,
+ // characters are case folded per B.2 of [RFC3454] : U+0388, U+0389, U+038A
+ target[limit++] = ( char ) ( c + 0x0025 );
break;
case 0x038C:
+ // For case ignore, numeric, and stored prefix string matching rules,
+ // characters are case folded per B.2 of [RFC3454] : U+038C
target[limit++] = 0x03CC;
break;
case 0x038E:
- target[limit++] = 0x03CD;
- break;
-
case 0x038F:
- target[limit++] = 0x03CE;
+ // For case ignore, numeric, and stored prefix string matching rules,
+ // characters are case folded per B.2 of [RFC3454] : U+038E, U+038F
+ target[limit++] = ( char ) ( c + 0x0025 );
break;
case 0x0390:
+ // For case ignore, numeric, and stored prefix string matching rules,
+ // characters are case folded per B.2 of [RFC3454] : U+0390
target[limit++] = 0x03B9;
target[limit++] = 0x0308;
target[limit++] = 0x0301;
break;
case 0x0391:
- target[limit++] = 0x03B1;
- break;
-
case 0x0392:
- target[limit++] = 0x03B2;
- break;
-
case 0x0393:
- target[limit++] = 0x03B3;
- break;
-
case 0x0394:
- target[limit++] = 0x03B4;
- break;
-
case 0x0395:
- target[limit++] = 0x03B5;
- break;
-
case 0x0396:
- target[limit++] = 0x03B6;
- break;
-
case 0x0397:
- target[limit++] = 0x03B7;
- break;
-
case 0x0398:
- target[limit++] = 0x03B8;
- break;
-
case 0x0399:
- target[limit++] = 0x03B9;
- break;
-
case 0x039A:
- target[limit++] = 0x03BA;
- break;
-
case 0x039B:
- target[limit++] = 0x03BB;
- break;
-
case 0x039C:
- target[limit++] = 0x03BC;
- break;
-
case 0x039D:
- target[limit++] = 0x03BD;
- break;
-
case 0x039E:
- target[limit++] = 0x03BE;
- break;
-
case 0x039F:
- target[limit++] = 0x03BF;
- break;
-
case 0x03A0:
- target[limit++] = 0x03C0;
- break;
-
case 0x03A1:
- target[limit++] = 0x03C1;
- break;
-
case 0x03A3:
- target[limit++] = 0x03C3;
- break;
-
case 0x03A4:
- target[limit++] = 0x03C4;
- break;
-
case 0x03A5:
- target[limit++] = 0x03C5;
- break;
-
case 0x03A6:
- target[limit++] = 0x03C6;
- break;
-
case 0x03A7:
- target[limit++] = 0x03C7;
- break;
-
case 0x03A8:
- target[limit++] = 0x03C8;
- break;
-
case 0x03A9:
- target[limit++] = 0x03C9;
- break;
-
case 0x03AA:
- target[limit++] = 0x03CA;
- break;
-
case 0x03AB:
- target[limit++] = 0x03CB;
+ // For case ignore, numeric, and stored prefix string matching rules,
+ // characters are case folded per B.2 of [RFC3454] : U+0391-U+03AB
+ target[limit++] = ( char ) ( c + 0x0020 );
break;
+
case 0x03B0:
+ // For case ignore, numeric, and stored prefix string matching rules,
+ // characters are case folded per B.2 of [RFC3454] : U+03B0
target[limit++] = 0x03C5;
target[limit++] = 0x0308;
target[limit++] = 0x0301;
break;
case 0x03C2:
+ // For case ignore, numeric, and stored prefix string matching rules,
+ // characters are case folded per B.2 of [RFC3454] : U+03C2
target[limit++] = 0x03C3;
break;
case 0x03D0:
+ // For case ignore, numeric, and stored prefix string matching rules,
+ // characters are case folded per B.2 of [RFC3454] : U+03D0
target[limit++] = 0x03B2;
break;
case 0x03D1:
+ // For case ignore, numeric, and stored prefix string matching rules,
+ // characters are case folded per B.2 of [RFC3454] : U+03D1
target[limit++] = 0x03B8;
break;
case 0x03D2:
+ // For case ignore, numeric, and stored prefix string matching rules,
+ // characters are case folded per B.2 of [RFC3454] : U+03D2
target[limit++] = 0x03C5;
break;
case 0x03D3:
+ // For case ignore, numeric, and stored prefix string matching rules,
+ // characters are case folded per B.2 of [RFC3454] : U+03D3
target[limit++] = 0x03CD;
break;
case 0x03D4:
+ // For case ignore, numeric, and stored prefix string matching rules,
+ // characters are case folded per B.2 of [RFC3454] : U+03D4
target[limit++] = 0x03CB;
break;
case 0x03D5:
+ // For case ignore, numeric, and stored prefix string matching rules,
+ // characters are case folded per B.2 of [RFC3454] : U+03D5
target[limit++] = 0x03C6;
break;
case 0x03D6:
+ // For case ignore, numeric, and stored prefix string matching rules,
+ // characters are case folded per B.2 of [RFC3454] : U+03D6
target[limit++] = 0x03C0;
break;
case 0x03D8:
- target[limit++] = 0x03D9;
- break;
-
case 0x03DA:
- target[limit++] = 0x03DB;
- break;
-
case 0x03DC:
- target[limit++] = 0x03DD;
- break;
-
case 0x03DE:
- target[limit++] = 0x03DF;
- break;
-
case 0x03E0:
- target[limit++] = 0x03E1;
- break;
-
case 0x03E2:
- target[limit++] = 0x03E3;
- break;
-
case 0x03E4:
- target[limit++] = 0x03E5;
- break;
-
case 0x03E6:
- target[limit++] = 0x03E7;
- break;
-
case 0x03E8:
- target[limit++] = 0x03E9;
- break;
-
case 0x03EA:
- target[limit++] = 0x03EB;
- break;
-
case 0x03EC:
- target[limit++] = 0x03ED;
- break;
-
case 0x03EE:
- target[limit++] = 0x03EF;
+ // For case ignore, numeric, and stored prefix string matching rules,
+ // characters are case folded per B.2 of [RFC3454] : U+03D8-U+03EE
+ target[limit++] = ( char ) ( c + 0x0001 );
break;
case 0x03F0:
+ // For case ignore, numeric, and stored prefix string matching rules,
+ // characters are case folded per B.2 of [RFC3454] : U+03F0
target[limit++] = 0x03BA;
break;
case 0x03F1:
+ // For case ignore, numeric, and stored prefix string matching rules,
+ // characters are case folded per B.2 of [RFC3454] : U+03F1
target[limit++] = 0x03C1;
break;
case 0x03F2:
+ // For case ignore, numeric, and stored prefix string matching rules,
+ // characters are case folded per B.2 of [RFC3454] : U+03F2
target[limit++] = 0x03C3;
break;
case 0x03F4:
+ // For case ignore, numeric, and stored prefix string matching rules,
+ // characters are case folded per B.2 of [RFC3454] : U+03F4
target[limit++] = 0x03B8;
break;
case 0x03F5:
+ // For case ignore, numeric, and stored prefix string matching rules,
+ // characters are case folded per B.2 of [RFC3454] : U+03F5
target[limit++] = 0x03B5;
break;
case 0x0400:
- target[limit++] = 0x0450;
- break;
-
case 0x0401:
- target[limit++] = 0x0451;
- break;
-
case 0x0402:
- target[limit++] = 0x0452;
- break;
-
case 0x0403:
- target[limit++] = 0x0453;
- break;
-
case 0x0404:
- target[limit++] = 0x0454;
- break;
-
case 0x0405:
- target[limit++] = 0x0455;
- break;
-
case 0x0406:
- target[limit++] = 0x0456;
- break;
-
case 0x0407:
- target[limit++] = 0x0457;
- break;
-
case 0x0408:
- target[limit++] = 0x0458;
- break;
-
case 0x0409:
- target[limit++] = 0x0459;
- break;
-
case 0x040A:
- target[limit++] = 0x045A;
- break;
-
case 0x040B:
- target[limit++] = 0x045B;
- break;
-
case 0x040C:
- target[limit++] = 0x045C;
- break;
-
case 0x040D:
- target[limit++] = 0x045D;
- break;
-
case 0x040E:
- target[limit++] = 0x045E;
- break;
-
case 0x040F:
- target[limit++] = 0x045F;
+ // For case ignore, numeric, and stored prefix string matching rules,
+ // characters are case folded per B.2 of [RFC3454] : U+0400-U+040F
+ target[limit++] = ( char ) ( c + 0x0050 );
break;
case 0x0410:
- target[limit++] = 0x0430;
- break;
-
case 0x0411:
- target[limit++] = 0x0431;
- break;
-
case 0x0412:
- target[limit++] = 0x0432;
- break;
-
case 0x0413:
- target[limit++] = 0x0433;
- break;
-
case 0x0414:
- target[limit++] = 0x0434;
- break;
-
case 0x0415:
- target[limit++] = 0x0435;
- break;
-
case 0x0416:
- target[limit++] = 0x0436;
- break;
-
case 0x0417:
- target[limit++] = 0x0437;
- break;
-
case 0x0418:
- target[limit++] = 0x0438;
- break;
-
case 0x0419:
- target[limit++] = 0x0439;
- break;
-
case 0x041A:
- target[limit++] = 0x043A;
- break;
-
case 0x041B:
- target[limit++] = 0x043B;
- break;
-
case 0x041C:
- target[limit++] = 0x043C;
- break;
-
case 0x041D:
- target[limit++] = 0x043D;
- break;
-
case 0x041E:
- target[limit++] = 0x043E;
- break;
-
case 0x041F:
- target[limit++] = 0x043F;
- break;
-
case 0x0420:
- target[limit++] = 0x0440;
- break;
-
case 0x0421:
- target[limit++] = 0x0441;
- break;
-
case 0x0422:
- target[limit++] = 0x0442;
- break;
-
case 0x0423:
- target[limit++] = 0x0443;
- break;
-
case 0x0424:
- target[limit++] = 0x0444;
- break;
-
case 0x0425:
- target[limit++] = 0x0445;
- break;
-
case 0x0426:
- target[limit++] = 0x0446;
- break;
-
case 0x0427:
- target[limit++] = 0x0447;
- break;
-
case 0x0428:
- target[limit++] = 0x0448;
- break;
-
case 0x0429:
- target[limit++] = 0x0449;
- break;
-
case 0x042A:
- target[limit++] = 0x044A;
- break;
-
case 0x042B:
- target[limit++] = 0x044B;
- break;
-
case 0x042C:
- target[limit++] = 0x044C;
- break;
-
case 0x042D:
- target[limit++] = 0x044D;
- break;
-
case 0x042E:
- target[limit++] = 0x044E;
- break;
-
case 0x042F:
- target[limit++] = 0x044F;
+ // For case ignore, numeric, and stored prefix string matching rules,
+ // characters are case folded per B.2 of [RFC3454] : U+0410-U+042F
+ target[limit++] = ( char ) ( c + 0x0020 );
break;
case 0x0460:
- target[limit++] = 0x0461;
- break;
-
case 0x0462:
- target[limit++] = 0x0463;
- break;
-
case 0x0464:
- target[limit++] = 0x0465;
- break;
-
case 0x0466:
- target[limit++] = 0x0467;
- break;
-
case 0x0468:
- target[limit++] = 0x0469;
- break;
-
case 0x046A:
- target[limit++] = 0x046B;
- break;
-
case 0x046C:
- target[limit++] = 0x046D;
- break;
-
case 0x046E:
- target[limit++] = 0x046F;
- break;
-
case 0x0470:
- target[limit++] = 0x0471;
- break;
-
case 0x0472:
- target[limit++] = 0x0473;
- break;
-
case 0x0474:
- target[limit++] = 0x0475;
- break;
-
case 0x0476:
- target[limit++] = 0x0477;
- break;
-
case 0x0478:
- target[limit++] = 0x0479;
- break;
-
case 0x047A:
- target[limit++] = 0x047B;
- break;
-
case 0x047C:
- target[limit++] = 0x047D;
- break;
-
case 0x047E:
- target[limit++] = 0x047F;
- break;
-
case 0x0480:
- target[limit++] = 0x0481;
- break;
-
case 0x048A:
- target[limit++] = 0x048B;
- break;
-
case 0x048C:
- target[limit++] = 0x048D;
- break;
-
case 0x048E:
- target[limit++] = 0x048F;
- break;
-
case 0x0490:
- target[limit++] = 0x0491;
- break;
-
case 0x0492:
- target[limit++] = 0x0493;
- break;
-
case 0x0494:
- target[limit++] = 0x0495;
- break;
-
case 0x0496:
- target[limit++] = 0x0497;
- break;
-
case 0x0498:
- target[limit++] = 0x0499;
- break;
-
case 0x049A:
- target[limit++] = 0x049B;
- break;
-
case 0x049C:
- target[limit++] = 0x049D;
- break;
-
case 0x049E:
- target[limit++] = 0x049F;
- break;
-
case 0x04A0:
- target[limit++] = 0x04A1;
- break;
-
case 0x04A2:
- target[limit++] = 0x04A3;
- break;
-
case 0x04A4:
- target[limit++] = 0x04A5;
- break;
-
case 0x04A6:
- target[limit++] = 0x04A7;
- break;
-
case 0x04A8:
- target[limit++] = 0x04A9;
- break;
-
case 0x04AA:
- target[limit++] = 0x04AB;
- break;
-
case 0x04AC:
- target[limit++] = 0x04AD;
- break;
-
case 0x04AE:
- target[limit++] = 0x04AF;
- break;
-
case 0x04B0:
- target[limit++] = 0x04B1;
- break;
-
case 0x04B2:
- target[limit++] = 0x04B3;
- break;
-
case 0x04B4:
- target[limit++] = 0x04B5;
- break;
-
case 0x04B6:
- target[limit++] = 0x04B7;
- break;
-
case 0x04B8:
- target[limit++] = 0x04B9;
- break;
-
case 0x04BA:
- target[limit++] = 0x04BB;
- break;
-
case 0x04BC:
- target[limit++] = 0x04BD;
- break;
-
case 0x04BE:
- target[limit++] = 0x04BF;
- break;
-
case 0x04C1:
- target[limit++] = 0x04C2;
- break;
-
case 0x04C3:
- target[limit++] = 0x04C4;
- break;
-
case 0x04C5:
- target[limit++] = 0x04C6;
- break;
-
case 0x04C7:
- target[limit++] = 0x04C8;
- break;
-
case 0x04C9:
- target[limit++] = 0x04CA;
- break;
-
case 0x04CB:
- target[limit++] = 0x04CC;
- break;
-
case 0x04CD:
- target[limit++] = 0x04CE;
- break;
-
case 0x04D0:
- target[limit++] = 0x04D1;
- break;
-
case 0x04D2:
- target[limit++] = 0x04D3;
- break;
-
case 0x04D4:
- target[limit++] = 0x04D5;
- break;
-
case 0x04D6:
- target[limit++] = 0x04D7;
- break;
-
case 0x04D8:
- target[limit++] = 0x04D9;
- break;
-
case 0x04DA:
- target[limit++] = 0x04DB;
- break;
-
case 0x04DC:
- target[limit++] = 0x04DD;
- break;
-
case 0x04DE:
- target[limit++] = 0x04DF;
- break;
-
case 0x04E0:
- target[limit++] = 0x04E1;
- break;
-
case 0x04E2:
- target[limit++] = 0x04E3;
- break;
-
case 0x04E4:
- target[limit++] = 0x04E5;
- break;
-
case 0x04E6:
- target[limit++] = 0x04E7;
- break;
-
case 0x04E8:
- target[limit++] = 0x04E9;
- break;
-
case 0x04EA:
- target[limit++] = 0x04EB;
- break;
-
case 0x04EC:
- target[limit++] = 0x04ED;
- break;
-
case 0x04EE:
- target[limit++] = 0x04EF;
- break;
-
case 0x04F0:
- target[limit++] = 0x04F1;
- break;
-
case 0x04F2:
- target[limit++] = 0x04F3;
- break;
-
case 0x04F4:
- target[limit++] = 0x04F5;
- break;
-
case 0x04F8:
- target[limit++] = 0x04F9;
- break;
-
case 0x0500:
- target[limit++] = 0x0501;
- break;
-
case 0x0502:
- target[limit++] = 0x0503;
- break;
-
case 0x0504:
- target[limit++] = 0x0505;
- break;
-
case 0x0506:
- target[limit++] = 0x0507;
- break;
-
case 0x0508:
- target[limit++] = 0x0509;
- break;
-
case 0x050A:
- target[limit++] = 0x050B;
- break;
-
case 0x050C:
- target[limit++] = 0x050D;
- break;
-
case 0x050E:
- target[limit++] = 0x050F;
+ // For case ignore, numeric, and stored prefix string matching rules,
+ // characters are case folded per B.2 of [RFC3454] : U+0460-U+050E
+ target[limit++] = ( char ) ( c + 0x0001 );
break;
case 0x0531:
- target[limit++] = 0x0561;
- break;
-
case 0x0532:
- target[limit++] = 0x0562;
- break;
-
case 0x0533:
- target[limit++] = 0x0563;
- break;
-
case 0x0534:
- target[limit++] = 0x0564;
- break;
-
case 0x0535:
- target[limit++] = 0x0565;
- break;
-
case 0x0536:
- target[limit++] = 0x0566;
- break;
-
case 0x0537:
- target[limit++] = 0x0567;
- break;
-
case 0x0538:
- target[limit++] = 0x0568;
- break;
-
case 0x0539:
- target[limit++] = 0x0569;
- break;
-
case 0x053A:
- target[limit++] = 0x056A;
- break;
-
case 0x053B:
- target[limit++] = 0x056B;
- break;
-
case 0x053C:
- target[limit++] = 0x056C;
- break;
-
case 0x053D:
- target[limit++] = 0x056D;
- break;
-
case 0x053E:
- target[limit++] = 0x056E;
- break;
-
case 0x053F:
- target[limit++] = 0x056F;
- break;
-
case 0x0540:
- target[limit++] = 0x0570;
- break;
-
case 0x0541:
- target[limit++] = 0x0571;
- break;
-
case 0x0542:
- target[limit++] = 0x0572;
- break;
-
case 0x0543:
- target[limit++] = 0x0573;
- break;
-
case 0x0544:
- target[limit++] = 0x0574;
- break;
-
case 0x0545:
- target[limit++] = 0x0575;
- break;
-
case 0x0546:
- target[limit++] = 0x0576;
- break;
-
case 0x0547:
- target[limit++] = 0x0577;
- break;
-
case 0x0548:
- target[limit++] = 0x0578;
- break;
-
case 0x0549:
- target[limit++] = 0x0579;
- break;
-
case 0x054A:
- target[limit++] = 0x057A;
- break;
-
case 0x054B:
- target[limit++] = 0x057B;
- break;
-
case 0x054C:
- target[limit++] = 0x057C;
- break;
-
case 0x054D:
- target[limit++] = 0x057D;
- break;
-
case 0x054E:
- target[limit++] = 0x057E;
- break;
-
case 0x054F:
- target[limit++] = 0x057F;
- break;
-
case 0x0550:
- target[limit++] = 0x0580;
- break;
-
case 0x0551:
- target[limit++] = 0x0581;
- break;
-
case 0x0552:
- target[limit++] = 0x0582;
- break;
-
case 0x0553:
- target[limit++] = 0x0583;
- break;
-
case 0x0554:
- target[limit++] = 0x0584;
- break;
-
case 0x0555:
- target[limit++] = 0x0585;
- break;
-
case 0x0556:
- target[limit++] = 0x0586;
+ // For case ignore, numeric, and stored prefix string matching rules,
+ // characters are case folded per B.2 of [RFC3454] : U+0531-U+0556
+ target[limit++] = ( char ) ( c + 0x0030 );
break;
+
case 0x0587:
+ // For case ignore, numeric, and stored prefix string matching rules,
+ // characters are case folded per B.2 of [RFC3454] : U+0587
target[limit++] = 0x0565;
target[limit++] = 0x0582;
break;
case 0x06DD:
- break;
-
case 0x070F:
+ // All other control code (e.g., Cc) points or code points with a
+ // control function (e.g., Cf) are mapped to nothing. The following is
+ // a complete list of these code points: ... U+06DD-070F...
break;
case 0x1680:
+ // All other code points with Separator (space, line, or paragraph) property
+ // (e.g., Zs, Zl, or Zp) are mapped to SPACE (U+0020). The following is a complete
+ // list of these code points: ...1680...
target[limit++] = 0x0020;
break;
case 0x1806:
+ // SOFT HYPHEN (U+00AD) and MONGOLIAN TODO SOFT HYPHEN (U+1806) code
+ // points are mapped to nothing. COMBINING GRAPHEME JOINER (U+034F) and
+ // VARIATION SELECTORs (U+180B-180D, FF00-FE0F) code points are also
+ // mapped to nothing. The OBJECT REPLACEMENT CHARACTER (U+FFFC) is
+ // mapped to nothing.
break;
case 0x180B:
case 0x180C:
case 0x180D:
+ // SOFT HYPHEN (U+00AD) and MONGOLIAN TODO SOFT HYPHEN (U+1806) code
+ // points are mapped to nothing. COMBINING GRAPHEME JOINER (U+034F) and
+ // VARIATION SELECTORs (U+180B-180D, FF00-FE0F) code points are also
+ // mapped to nothing. The OBJECT REPLACEMENT CHARACTER (U+FFFC) is
+ // mapped to nothing.
+ break;
+
case 0x180E:
+ // All other control code (e.g., Cc) points or code points with a
+ // control function (e.g., Cf) are mapped to nothing. The following is
+ // a complete list of these code points: ... U+180E...
break;
case 0x1E00:
- target[limit++] = 0x1E01;
- break;
-
case 0x1E02:
- target[limit++] = 0x1E03;
- break;
-
case 0x1E04:
- target[limit++] = 0x1E05;
- break;
-
case 0x1E06:
- target[limit++] = 0x1E07;
- break;
-
case 0x1E08:
- target[limit++] = 0x1E09;
- break;
-
case 0x1E0A:
- target[limit++] = 0x1E0B;
- break;
-
case 0x1E0C:
- target[limit++] = 0x1E0D;
- break;
-
case 0x1E0E:
- target[limit++] = 0x1E0F;
- break;
-
case 0x1E10:
- target[limit++] = 0x1E11;
- break;
-
case 0x1E12:
- target[limit++] = 0x1E13;
- break;
-
case 0x1E14:
- target[limit++] = 0x1E15;
- break;
-
case 0x1E16:
- target[limit++] = 0x1E17;
- break;
-
case 0x1E18:
- target[limit++] = 0x1E19;
- break;
-
case 0x1E1A:
- target[limit++] = 0x1E1B;
- break;
-
case 0x1E1C:
- target[limit++] = 0x1E1D;
- break;
-
case 0x1E1E:
- target[limit++] = 0x1E1F;
- break;
-
case 0x1E20:
- target[limit++] = 0x1E21;
- break;
-
case 0x1E22:
- target[limit++] = 0x1E23;
- break;
-
case 0x1E24:
- target[limit++] = 0x1E25;
- break;
-
case 0x1E26:
- target[limit++] = 0x1E27;
- break;
-
case 0x1E28:
- target[limit++] = 0x1E29;
- break;
-
case 0x1E2A:
- target[limit++] = 0x1E2B;
- break;
-
case 0x1E2C:
- target[limit++] = 0x1E2D;
- break;
-
case 0x1E2E:
- target[limit++] = 0x1E2F;
- break;
-
case 0x1E30:
- target[limit++] = 0x1E31;
- break;
-
case 0x1E32:
- target[limit++] = 0x1E33;
- break;
-
case 0x1E34:
- target[limit++] = 0x1E35;
- break;
-
case 0x1E36:
- target[limit++] = 0x1E37;
- break;
-
case 0x1E38:
- target[limit++] = 0x1E39;
- break;
-
case 0x1E3A:
- target[limit++] = 0x1E3B;
- break;
-
case 0x1E3C:
- target[limit++] = 0x1E3D;
- break;
-
case 0x1E3E:
- target[limit++] = 0x1E3F;
- break;
-
case 0x1E40:
- target[limit++] = 0x1E41;
- break;
-
case 0x1E42:
- target[limit++] = 0x1E43;
- break;
-
case 0x1E44:
- target[limit++] = 0x1E45;
- break;
-
case 0x1E46:
- target[limit++] = 0x1E47;
- break;
-
case 0x1E48:
- target[limit++] = 0x1E49;
- break;
-
case 0x1E4A:
- target[limit++] = 0x1E4B;
- break;
-
case 0x1E4C:
- target[limit++] = 0x1E4D;
- break;
-
case 0x1E4E:
- target[limit++] = 0x1E4F;
- break;
-
case 0x1E50:
- target[limit++] = 0x1E51;
- break;
-
case 0x1E52:
- target[limit++] = 0x1E53;
- break;
-
case 0x1E54:
- target[limit++] = 0x1E55;
- break;
-
case 0x1E56:
- target[limit++] = 0x1E57;
- break;
-
case 0x1E58:
- target[limit++] = 0x1E59;
- break;
-
case 0x1E5A:
- target[limit++] = 0x1E5B;
- break;
-
case 0x1E5C:
- target[limit++] = 0x1E5D;
- break;
-
case 0x1E5E:
- target[limit++] = 0x1E5F;
- break;
-
case 0x1E60:
- target[limit++] = 0x1E61;
- break;
-
case 0x1E62:
- target[limit++] = 0x1E63;
- break;
-
case 0x1E64:
- target[limit++] = 0x1E65;
- break;
-
case 0x1E66:
- target[limit++] = 0x1E67;
- break;
-
case 0x1E68:
- target[limit++] = 0x1E69;
- break;
-
case 0x1E6A:
- target[limit++] = 0x1E6B;
- break;
-
case 0x1E6C:
- target[limit++] = 0x1E6D;
- break;
-
case 0x1E6E:
- target[limit++] = 0x1E6F;
- break;
-
case 0x1E70:
- target[limit++] = 0x1E71;
- break;
-
case 0x1E72:
- target[limit++] = 0x1E73;
- break;
-
case 0x1E74:
- target[limit++] = 0x1E75;
- break;
-
case 0x1E76:
- target[limit++] = 0x1E77;
- break;
-
case 0x1E78:
- target[limit++] = 0x1E79;
- break;
-
case 0x1E7A:
- target[limit++] = 0x1E7B;
- break;
-
case 0x1E7C:
- target[limit++] = 0x1E7D;
- break;
-
case 0x1E7E:
- target[limit++] = 0x1E7F;
- break;
-
case 0x1E80:
- target[limit++] = 0x1E81;
- break;
-
case 0x1E82:
- target[limit++] = 0x1E83;
- break;
-
case 0x1E84:
- target[limit++] = 0x1E85;
- break;
-
case 0x1E86:
- target[limit++] = 0x1E87;
- break;
-
case 0x1E88:
- target[limit++] = 0x1E89;
- break;
-
case 0x1E8A:
- target[limit++] = 0x1E8B;
- break;
-
case 0x1E8C:
- target[limit++] = 0x1E8D;
- break;
-
case 0x1E8E:
- target[limit++] = 0x1E8F;
- break;
-
case 0x1E90:
- target[limit++] = 0x1E91;
- break;
-
case 0x1E92:
- target[limit++] = 0x1E93;
- break;
-
case 0x1E94:
- target[limit++] = 0x1E95;
+ // For case ignore, numeric, and stored prefix string matching rules,
+ // characters are case folded per B.2 of [RFC3454] : U+1E00-U+1E94
+ target[limit++] = ( char ) ( c + 0x0001 );
break;
case 0x1E96:
+ // For case ignore, numeric, and stored prefix string matching rules,
+ // characters are case folded per B.2 of [RFC3454] : U+1E96
target[limit++] = 0x0068;
target[limit++] = 0x0331;
break;
case 0x1E97:
+ // For case ignore, numeric, and stored prefix string matching rules,
+ // characters are case folded per B.2 of [RFC3454] : U+1E97
target[limit++] = 0x0074;
target[limit++] = 0x0308;
break;
case 0x1E98:
+ // For case ignore, numeric, and stored prefix string matching rules,
+ // characters are case folded per B.2 of [RFC3454] : U+1E98
target[limit++] = 0x0077;
target[limit++] = 0x030A;
break;
case 0x1E99:
+ // For case ignore, numeric, and stored prefix string matching rules,
+ // characters are case folded per B.2 of [RFC3454] : U+1E99
target[limit++] = 0x0079;
target[limit++] = 0x030A;
break;
case 0x1E9A:
+ // For case ignore, numeric, and stored prefix string matching rules,
+ // characters are case folded per B.2 of [RFC3454] : U+1E9A
target[limit++] = 0x0061;
target[limit++] = 0x02BE;
break;
case 0x1E9B:
+ // For case ignore, numeric, and stored prefix string matching rules,
+ // characters are case folded per B.2 of [RFC3454] : U+1E9B
target[limit++] = 0x1E61;
break;
case 0x1EA0:
- target[limit++] = 0x1EA1;
- break;
-
case 0x1EA2:
- target[limit++] = 0x1EA3;
- break;
-
case 0x1EA4:
- target[limit++] = 0x1EA5;
- break;
-
case 0x1EA6:
- target[limit++] = 0x1EA7;
- break;
-
case 0x1EA8:
- target[limit++] = 0x1EA9;
- break;
-
case 0x1EAA:
- target[limit++] = 0x1EAB;
- break;
-
case 0x1EAC:
- target[limit++] = 0x1EAD;
- break;
-
case 0x1EAE:
- target[limit++] = 0x1EAF;
- break;
-
case 0x1EB0:
- target[limit++] = 0x1EB1;
- break;
-
case 0x1EB2:
- target[limit++] = 0x1EB3;
- break;
-
case 0x1EB4:
- target[limit++] = 0x1EB5;
- break;
-
case 0x1EB6:
- target[limit++] = 0x1EB7;
- break;
-
case 0x1EB8:
- target[limit++] = 0x1EB9;
- break;
-
case 0x1EBA:
- target[limit++] = 0x1EBB;
- break;
-
case 0x1EBC:
- target[limit++] = 0x1EBD;
- break;
-
case 0x1EBE:
- target[limit++] = 0x1EBF;
- break;
-
case 0x1EC0:
- target[limit++] = 0x1EC1;
- break;
-
case 0x1EC2:
- target[limit++] = 0x1EC3;
- break;
-
case 0x1EC4:
- target[limit++] = 0x1EC5;
- break;
-
case 0x1EC6:
- target[limit++] = 0x1EC7;
- break;
-
case 0x1EC8:
- target[limit++] = 0x1EC9;
- break;
-
case 0x1ECA:
- target[limit++] = 0x1ECB;
- break;
-
case 0x1ECC:
- target[limit++] = 0x1ECD;
- break;
-
case 0x1ECE:
- target[limit++] = 0x1ECF;
- break;
-
case 0x1ED0:
- target[limit++] = 0x1ED1;
- break;
-
case 0x1ED2:
- target[limit++] = 0x1ED3;
- break;
-
case 0x1ED4:
- target[limit++] = 0x1ED5;
- break;
-
case 0x1ED6:
- target[limit++] = 0x1ED7;
- break;
-
case 0x1ED8:
- target[limit++] = 0x1ED9;
- break;
-
case 0x1EDA:
- target[limit++] = 0x1EDB;
- break;
-
case 0x1EDC:
- target[limit++] = 0x1EDD;
- break;
-
case 0x1EDE:
- target[limit++] = 0x1EDF;
- break;
-
case 0x1EE0:
- target[limit++] = 0x1EE1;
- break;
-
case 0x1EE2:
- target[limit++] = 0x1EE3;
- break;
-
case 0x1EE4:
- target[limit++] = 0x1EE5;
- break;
-
case 0x1EE6:
- target[limit++] = 0x1EE7;
- break;
-
case 0x1EE8:
- target[limit++] = 0x1EE9;
- break;
-
case 0x1EEA:
- target[limit++] = 0x1EEB;
- break;
-
case 0x1EEC:
- target[limit++] = 0x1EED;
- break;
-
case 0x1EEE:
[... 6413 lines stripped ...]