You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@directory.apache.org by el...@apache.org on 2006/12/26 21:07:53 UTC
svn commit: r490362 -
/directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/schema/PrepareString.java
Author: elecharny
Date: Tue Dec 26 12:07:50 2006
New Revision: 490362
URL: http://svn.apache.org/viewvc?view=rev&rev=490362
Log:
Added all the prepareString steps
Modified:
directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/schema/PrepareString.java
Modified: directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/schema/PrepareString.java
URL: http://svn.apache.org/viewvc/directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/schema/PrepareString.java?view=diff&rev=490362&r1=490361&r2=490362
==============================================================================
--- directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/schema/PrepareString.java (original)
+++ directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/schema/PrepareString.java Tue Dec 26 12:07:50 2006
@@ -22,11 +22,78 @@
import java.io.IOException;
+import org.apache.directory.shared.ldap.util.StringTools;
+import org.apache.directory.shared.ldap.util.unicode.InvalidCharacterException;
import org.apache.directory.shared.ldap.util.unicode.Normalizer;
+/**
+ *
+ * This class implements the 6 steps described in RFC 4518
+ *
+ * @author <a href="mailto:dev@directory.apache.org">Apache Directory Project</a>
+ * @version $Rev$, $Date$
+ */
public class PrepareString
{
+ /** ALl the possible combining marks */
+ private static final char[][] COMBINING_MARKS = new char[][]
+ {
+ { 0x0300, 0x034F }, { 0x0360, 0x036F }, { 0x0483, 0x0486 }, { 0x0488, 0x0489 },
+ { 0x0591, 0x05A1 }, { 0x05A3, 0x05B9 }, { 0x05BB, 0x05BC }, { 0x05BF, 0x05BF },
+ { 0x05C1, 0x05C2 }, { 0x05C4, 0x05C4 }, { 0x064B, 0x0655 }, { 0x0670, 0x0670 },
+ { 0x06D6, 0x06DC }, { 0x06DE, 0x06E4 }, { 0x06E7, 0x06E8 }, { 0x06EA, 0x06ED },
+ { 0x0711, 0x0711 }, { 0x0730, 0x074A }, { 0x07A6, 0x07B0 }, { 0x0901, 0x0903 },
+ { 0x093C, 0x093C }, { 0x093E, 0x094F }, { 0x0951, 0x0954 }, { 0x0962, 0x0963 },
+ { 0x0981, 0x0983 }, { 0x09BC, 0x09BC }, { 0x09BE, 0x09C4 }, { 0x09C7, 0x09C8 },
+ { 0x09CB, 0x09CD }, { 0x09D7, 0x09D7 }, { 0x09E2, 0x09E3 }, { 0x0A02, 0x0A02 },
+ { 0x0A3C, 0x0A3C }, { 0x0A3E, 0x0A42 }, { 0x0A47, 0x0A48 }, { 0x0A4B, 0x0A4D },
+ { 0x0A70, 0x0A71 }, { 0x0A81, 0x0A83 }, { 0x0ABC, 0x0ABC }, { 0x0ABE, 0x0AC5 },
+ { 0x0AC7, 0x0AC9 }, { 0x0ACB, 0x0ACD }, { 0x0B01, 0x0B03 }, { 0x0B3C, 0x0B3C },
+ { 0x0B3E, 0x0B43 }, { 0x0B47, 0x0B48 }, { 0x0B4B, 0x0B4D }, { 0x0B56, 0x0B57 },
+ { 0x0B82, 0x0B82 }, { 0x0BBE, 0x0BC2 }, { 0x0BC6, 0x0BC8 }, { 0x0BCA, 0x0BCD },
+ { 0x0BD7, 0x0BD7 }, { 0x0C01, 0x0C03 }, { 0x0C3E, 0x0C44 }, { 0x0C46, 0x0C48 },
+ { 0x0C4A, 0x0C4D }, { 0x0C55, 0x0C56 }, { 0x0C82, 0x0C83 }, { 0x0CBE, 0x0CC4 },
+ { 0x0CC6, 0x0CC8 }, { 0x0CCA, 0x0CCD }, { 0x0CD5, 0x0CD6 }, { 0x0D02, 0x0D03 },
+ { 0x0D3E, 0x0D43 }, { 0x0D46, 0x0D48 }, { 0x0D4A, 0x0D4D }, { 0x0D57, 0x0D57 },
+ { 0x0D82, 0x0D83 }, { 0x0DCA, 0x0DCA }, { 0x0DCF, 0x0DD4 }, { 0x0DD6, 0x0DD6 },
+ { 0x0DD8, 0x0DDF }, { 0x0DF2, 0x0DF3 }, { 0x0E31, 0x0E31 }, { 0x0E34, 0x0E3A },
+ { 0x0E47, 0x0E4E }, { 0x0EB1, 0x0EB1 }, { 0x0EB4, 0x0EB9 }, { 0x0EBB, 0x0EBC },
+ { 0x0EC8, 0x0ECD }, { 0x0F18, 0x0F19 }, { 0x0F35, 0x0F35 }, { 0x0F37, 0x0F37 },
+ { 0x0F39, 0x0F39 }, { 0x0F3E, 0x0F3F }, { 0x0F71, 0x0F84 }, { 0x0F86, 0x0F87 },
+ { 0x0F90, 0x0F97 }, { 0x0F99, 0x0FBC }, { 0x0FC6, 0x0FC6 }, { 0x102C, 0x1032 },
+ { 0x1036, 0x1039 }, { 0x1056, 0x1059 }, { 0x1712, 0x1714 }, { 0x1732, 0x1734 },
+ { 0x1752, 0x1753 }, { 0x1772, 0x1773 }, { 0x17B4, 0x17D3 }, { 0x180B, 0x180D },
+ { 0x18A9, 0x18A9 }, { 0x20D0, 0x20EA }, { 0x302A, 0x302F }, { 0x3099, 0x309A },
+ { 0xFB1E, 0xFB1E }, { 0xFE00, 0xFE0F }, { 0xFE20, 0xFE23 }
+ };
+
+ /**
+ * Tells if a char is a combining mark.
+ *
+ * @param c The char to check
+ * @return <code>true> if the char is a combining mark, false otherwise
+ */
+ private static boolean isCombiningMark( char c )
+ {
+ for ( char[] interval:COMBINING_MARKS )
+ {
+ if ( ( c >= interval[0] ) && ( c <= interval[1] ) )
+ {
+ return true;
+ }
+ }
+
+ return false;
+ }
+ /**
+ *
+ * TODO normalize.
+ *
+ * @param str
+ * @return
+ * @throws IOException
+ */
public static StringBuilder normalize( String str ) throws IOException
{
return Normalizer.normalize( str, Normalizer.Form.KC );
@@ -3988,5 +4055,2348 @@
}
return sb;
+ }
+
+ /**
+ *
+ * Prohibit characters described in RFC 4518 :
+ * - Table A.1 of RFC 3454
+ * - Table C.3 of RFC 3454
+ * - Table C.4 of RFC 3454
+ * - Table C.5 of RFC 3454
+ * - Table C.8 of RFC 3454
+ * - character U-FFFD
+ *
+ * @param array That char array to analyze
+ * @throws InvalidCharacterException If any character is prohibited
+ */
+ public static void prohibit( char[] array ) throws InvalidCharacterException
+ {
+ for ( char c:array )
+ {
+ // RFC 3454, Table A.1
+ switch ( c )
+ {
+ case 0x0221 :
+ case 0x038B :
+ case 0x038D :
+ case 0x03A2 :
+ case 0x03CF :
+ case 0x0487 :
+ case 0x04CF :
+ case 0x0560 :
+ case 0x0588 :
+ case 0x05A2 :
+ case 0x05BA :
+ case 0x0620 :
+ case 0x06FF :
+ case 0x070E :
+ case 0x0904 :
+ case 0x0984 :
+ case 0x09A9 :
+ case 0x09B1 :
+ case 0x09BD :
+ case 0x09DE :
+ case 0x0A29 :
+ case 0x0A31 :
+ case 0x0A34 :
+ case 0x0A37 :
+ case 0x0A3D :
+ case 0x0A5D :
+ case 0x0A84 :
+ case 0x0A8C :
+ case 0x0A8E :
+ case 0x0A92 :
+ case 0x0AA9 :
+ case 0x0AB1 :
+ case 0x0AB4 :
+ case 0x0AC6 :
+ case 0x0ACA :
+ case 0x0B04 :
+ case 0x0B29 :
+ case 0x0B31 :
+ case 0x0B5E :
+ case 0x0B84 :
+ case 0x0B91 :
+ case 0x0B9B :
+ case 0x0B9D :
+ case 0x0BB6 :
+ case 0x0BC9 :
+ case 0x0C04 :
+ case 0x0C0D :
+ case 0x0C11 :
+ case 0x0C29 :
+ case 0x0C34 :
+ case 0x0C45 :
+ case 0x0C49 :
+ case 0x0C84 :
+ case 0x0C8D :
+ case 0x0C91 :
+ case 0x0CA9 :
+ case 0x0CB4 :
+ case 0x0CC5 :
+ case 0x0CC9 :
+ case 0x0CDF :
+ case 0x0D04 :
+ case 0x0D0D :
+ case 0x0D11 :
+ case 0x0D29 :
+ case 0x0D49 :
+ case 0x0D84 :
+ case 0x0DB2 :
+ case 0x0DBC :
+ case 0x0DD5 :
+ case 0x0DD7 :
+ case 0x0E83 :
+ case 0x0E89 :
+ case 0x0E98 :
+ case 0x0EA0 :
+ case 0x0EA4 :
+ case 0x0EA6 :
+ case 0x0EAC :
+ case 0x0EBA :
+ case 0x0EC5 :
+ case 0x0EC7 :
+ case 0x0F48 :
+ case 0x0F98 :
+ case 0x0FBD :
+ case 0x1022 :
+ case 0x1028 :
+ case 0x102B :
+ case 0x1207 :
+ case 0x1247 :
+ case 0x1249 :
+ case 0x1257 :
+ case 0x1259 :
+ case 0x1287 :
+ case 0x1289 :
+ case 0x12AF :
+ case 0x12B1 :
+ case 0x12BF :
+ case 0x12C1 :
+ case 0x12CF :
+ case 0x12D7 :
+ case 0x12EF :
+ case 0x130F :
+ case 0x1311 :
+ case 0x131F :
+ case 0x1347 :
+ case 0x170D :
+ case 0x176D :
+ case 0x1771 :
+ case 0x180F :
+ case 0x1F58 :
+ case 0x1F5A :
+ case 0x1F5C :
+ case 0x1F5E :
+ case 0x1FB5 :
+ case 0x1FC5 :
+ case 0x1FDC :
+ case 0x1FF5 :
+ case 0x1FFF :
+ case 0x24FF :
+ case 0x2618 :
+ case 0x2705 :
+ case 0x2728 :
+ case 0x274C :
+ case 0x274E :
+ case 0x2757 :
+ case 0x27B0 :
+ case 0x2E9A :
+ case 0x3040 :
+ case 0x318F :
+ case 0x32FF :
+ case 0x33FF :
+ case 0xFB37 :
+ case 0xFB3D :
+ case 0xFB3F :
+ case 0xFB42 :
+ case 0xFB45 :
+ case 0xFE53 :
+ case 0xFE67 :
+ case 0xFE75 :
+ case 0xFF00 :
+ case 0xFFE7 :
+ throw new InvalidCharacterException( c );
+ }
+
+ // RFC 3454, Table A.1, intervals
+ if ( ( c >= 0x0234 ) && ( c <= 0x024F ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x02AE ) && ( c <= 0x02AF ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x02EF ) && ( c <= 0x02FF ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x0350 ) && ( c <= 0x035F ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x0370 ) && ( c <= 0x0373 ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x0376 ) && ( c <= 0x0379 ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x037B ) && ( c <= 0x037D ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x037F ) && ( c <= 0x0383 ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x03F7 ) && ( c <= 0x03FF ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x04F6 ) && ( c <= 0x04F7 ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x04FA ) && ( c <= 0x04FF ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x0510 ) && ( c <= 0x0530 ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x0557 ) && ( c <= 0x0558 ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x058B ) && ( c <= 0x0590 ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x05C5 ) && ( c <= 0x05CF ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x05EB ) && ( c <= 0x05EF ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x05F5 ) && ( c <= 0x060B ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x060D ) && ( c <= 0x061A ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x061C ) && ( c <= 0x061E ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x063B ) && ( c <= 0x063F ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x0656 ) && ( c <= 0x065F ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x06EE ) && ( c <= 0x06EF ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x072D ) && ( c <= 0x072F ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x074B ) && ( c <= 0x077F ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x07B2 ) && ( c <= 0x0900 ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x093A ) && ( c <= 0x093B ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x094E ) && ( c <= 0x094F ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x0955 ) && ( c <= 0x0957 ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x0971 ) && ( c <= 0x0980 ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x098D ) && ( c <= 0x098E ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x0991 ) && ( c <= 0x0992 ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x09B3 ) && ( c <= 0x09B5 ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x09BA ) && ( c <= 0x09BB ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x09C5 ) && ( c <= 0x09C6 ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x09C9 ) && ( c <= 0x09CA ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x09CE ) && ( c <= 0x09D6 ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x09D8 ) && ( c <= 0x09DB ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x09E4 ) && ( c <= 0x09E5 ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x09FB ) && ( c <= 0x0A01 ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x0A03 ) && ( c <= 0x0A04 ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x0A0B ) && ( c <= 0x0A0E ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x0A11 ) && ( c <= 0x0A12 ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x0A3A ) && ( c <= 0x0A3B ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x0A43 ) && ( c <= 0x0A46 ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x0A49 ) && ( c <= 0x0A4A ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x0A4E ) && ( c <= 0x0A58 ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x0A5F ) && ( c <= 0x0A65 ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x0A75 ) && ( c <= 0x0A80 ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x0ABA ) && ( c <= 0x0ABB ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x0ACE ) && ( c <= 0x0ACF ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x0AD1 ) && ( c <= 0x0ADF ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x0AE1 ) && ( c <= 0x0AE5 ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x0AF0 ) && ( c <= 0x0B00 ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x0B0D ) && ( c <= 0x0B0E ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x0B11 ) && ( c <= 0x0B12 ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x0B34 ) && ( c <= 0x0B35 ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x0B3A ) && ( c <= 0x0B3B ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x0B44 ) && ( c <= 0x0B46 ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x0B49 ) && ( c <= 0x0B4A ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x0B4E ) && ( c <= 0x0B55 ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x0B58 ) && ( c <= 0x0B5B ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x0B62 ) && ( c <= 0x0B65 ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x0B71 ) && ( c <= 0x0B81 ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x0B8B ) && ( c <= 0x0B8D ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x0B96 ) && ( c <= 0x0B98 ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x0BA0 ) && ( c <= 0x0BA2 ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x0BA5 ) && ( c <= 0x0BA7 ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x0BAB ) && ( c <= 0x0BAD ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x0BBA ) && ( c <= 0x0BBD ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x0BC3 ) && ( c <= 0x0BC5 ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x0BCE ) && ( c <= 0x0BD6 ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x0BD8 ) && ( c <= 0x0BE6 ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x0BF3 ) && ( c <= 0x0C00 ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ // RFC 3454, Table C.3
+ if ( ( c >= 0xE000 ) && ( c <= 0xF8FF ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ // RFC 3454, Table C.4
+ if ( ( c >= 0xFDD0 ) && ( c <= 0xFDEF ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c == 0xFFFE ) || ( c <= 0xFFFF ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ // RFC 3454, Table C.5 (Surrogates)
+ if ( ( c >= 0xD800 ) && ( c <= 0xDFFF ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ // RFC 3454, Table C.8
+ switch ( c)
+ {
+ case 0x0340 : // COMBINING GRAVE TONE MARK
+ case 0x0341 : // COMBINING ACUTE TONE MARK
+ case 0x200E : // LEFT-TO-RIGHT MARK
+ case 0x200F : // RIGHT-TO-LEFT MARK
+ case 0x202A : // LEFT-TO-RIGHT EMBEDDING
+ case 0x202B : // RIGHT-TO-LEFT EMBEDDING
+ case 0x202C : // POP DIRECTIONAL FORMATTING
+ case 0x202D : // LEFT-TO-RIGHT OVERRIDE
+ case 0x202E : // RIGHT-TO-LEFT OVERRIDE
+ case 0x206A : // INHIBIT SYMMETRIC SWAPPING
+ case 0x206B : // ACTIVATE SYMMETRIC SWAPPING
+ case 0x206C : // INHIBIT ARABIC FORM SHAPING
+ case 0x206D : // ACTIVATE ARABIC FORM SHAPING
+ case 0x206E : // NATIONAL DIGIT SHAPES
+ case 0x206F : // NOMINAL DIGIT SHAPES
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( c == 0xFFFD )
+ {
+ throw new InvalidCharacterException( c );
+ }
+ }
+ }
+
+ /**
+ *
+ * TODO bidi.
+ *
+ * @param array
+ * @return
+ */
+ public static StringBuilder bidi( char[] array )
+ {
+ StringBuilder sb = new StringBuilder( array.length );
+
+ for ( char c:array )
+ {
+ // RFC 3454, Table D1
+ switch ( c )
+ {
+ case 0x05BE :
+ case 0x05C0 :
+ case 0x05C3 :
+ case 0x061B :
+ case 0x061F :
+ case 0x06DD :
+ case 0x0710 :
+ case 0x07B1 :
+ case 0x200F :
+ case 0xFB1D :
+ case 0xFB3E :
+ continue;
+ }
+
+ // RFC 3454, Table D1, intervals
+ if ( ( c >= 0x05D0 ) && ( c <= 0x05EA ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x05F0 ) && ( c <= 0x05F4 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0621 ) && ( c <= 0x063A ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0640 ) && ( c <= 0x064A ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x066D ) && ( c <= 0x066F ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0671 ) && ( c <= 0x06D5 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x06E5 ) && ( c <= 0x06E6 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x06FA ) && ( c <= 0x06FE ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0700 ) && ( c <= 0x070D ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0712 ) && ( c <= 0x072C ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0780 ) && ( c <= 0x07A5 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0xFB1F ) && ( c <= 0xFB28 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0xFB2A ) && ( c <= 0xFB36 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0xFB38 ) && ( c <= 0xFB3C ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0xFB40 ) && ( c <= 0xFB41 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0xFB43 ) && ( c <= 0xFB44 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0xFB46 ) && ( c <= 0xFBB1 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0xFBD3 ) && ( c <= 0xFD3D ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0xFD50 ) && ( c <= 0xFD8F ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0xFD92 ) && ( c <= 0xFDC7 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0xFDF0 ) && ( c <= 0xFDFC ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0xFE70 ) && ( c <= 0xFE74 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0xFE76 ) && ( c <= 0xFEFC ) )
+ {
+ continue;
+ }
+
+ // RFC 3454, Table D.2
+ switch ( c )
+ {
+ case 0x00AA :
+ case 0x00B5 :
+ case 0x00BA :
+ case 0x02EE :
+ case 0x037A :
+ case 0x0386 :
+ case 0x038C :
+ case 0x0589 :
+ case 0x0903 :
+ case 0x0950 :
+ case 0x09B2 :
+ case 0x09D7 :
+ case 0x0A5E :
+ case 0x0A83 :
+ case 0x0A8D :
+ case 0x0AC9 :
+ case 0x0AD0 :
+ case 0x0AE0 :
+ case 0x0B40 :
+ case 0x0B57 :
+ case 0x0B83 :
+ case 0x0B9C :
+ case 0x0BD7 :
+ case 0x0CBE :
+ case 0x0CDE :
+ case 0x0D57 :
+ case 0x0DBD :
+ case 0x0E84 :
+ case 0x0E8A :
+ case 0x0E8D :
+ case 0x0EA5 :
+ case 0x0EA7 :
+ case 0x0EBD :
+ case 0x0EC6 :
+ case 0x0F36 :
+ case 0x0F38 :
+ case 0x0F7F :
+ case 0x0F85 :
+ case 0x0FCF :
+ case 0x102C :
+ case 0x1031 :
+ case 0x1038 :
+ case 0x10FB :
+ case 0x1248 :
+ case 0x1258 :
+ case 0x1288 :
+ case 0x12B0 :
+ case 0x12C0 :
+ case 0x1310 :
+ case 0x17DC :
+ case 0x1F59 :
+ case 0x1F5B :
+ case 0x1F5D :
+ case 0x1FBE :
+ case 0x200E :
+ case 0x2071 :
+ case 0x207F :
+ case 0x2102 :
+ case 0x2107 :
+ case 0x2115 :
+ case 0x2124 :
+ case 0x2126 :
+ case 0x2128 :
+ continue;
+ }
+
+ // RFC 3454, Table D.2 intervals
+ if ( ( c >= 0x0041 ) && ( c <= 0x005A ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0061 ) && ( c <= 0x007A ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x00C0 ) && ( c <= 0x00D6 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x00D8 ) && ( c <= 0x00F6 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x00F8 ) && ( c <= 0x0220 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0222 ) && ( c <= 0x0233 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0250 ) && ( c <= 0x02AD ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x02B0 ) && ( c <= 0x02B8 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x02BB ) && ( c <= 0x02C1 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x02D0 ) && ( c <= 0x02D1 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x02E0 ) && ( c <= 0x02E4 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0388 ) && ( c <= 0x038A ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x038E ) && ( c <= 0x03A1 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x03A3 ) && ( c <= 0x03CE ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x03D0 ) && ( c <= 0x03F5 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0400 ) && ( c <= 0x0482 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x048A ) && ( c <= 0x04CE ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x04D0 ) && ( c <= 0x04F5 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x04F8 ) && ( c <= 0x04F9 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0500 ) && ( c <= 0x050F ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0531 ) && ( c <= 0x0556 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0559 ) && ( c <= 0x055F ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0561 ) && ( c <= 0x0587 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0905 ) && ( c <= 0x0939 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x093D ) && ( c <= 0x0940 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0949 ) && ( c <= 0x094C ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0958 ) && ( c <= 0x0961 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0964 ) && ( c <= 0x0970 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0982 ) && ( c <= 0x0983 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0985 ) && ( c <= 0x098C ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x098F ) && ( c <= 0x0990 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0993 ) && ( c <= 0x09A8 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x09AA ) && ( c <= 0x09B0 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x09B6 ) && ( c <= 0x09B9 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x09BE ) && ( c <= 0x09C0 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x09C7 ) && ( c <= 0x09C8 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x09CB ) && ( c <= 0x09CC ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x09DC ) && ( c <= 0x09DD ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x09DF ) && ( c <= 0x09E1 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x09E6 ) && ( c <= 0x09F1 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x09F4 ) && ( c <= 0x09FA ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0A05 ) && ( c <= 0x0A0A ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0A0F ) && ( c <= 0x0A10 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0A13 ) && ( c <= 0x0A28 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0A2A ) && ( c <= 0x0A30 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0A32 ) && ( c <= 0x0A33 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0A35 ) && ( c <= 0x0A36 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0A38 ) && ( c <= 0x0A39 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0A3E ) && ( c <= 0x0A40 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0A59 ) && ( c <= 0x0A5C ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0A66 ) && ( c <= 0x0A6F ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0A72 ) && ( c <= 0x0A74 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0A85 ) && ( c <= 0x0A8B ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0A8F ) && ( c <= 0x0A91 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0A93 ) && ( c <= 0x0AA8 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0AAA ) && ( c <= 0x0AB0 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0AB2 ) && ( c <= 0x0AB3 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0AB5 ) && ( c <= 0x0AB9 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0ABD ) && ( c <= 0x0AC0 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0ACB ) && ( c <= 0x0ACC ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0AE6 ) && ( c <= 0x0AEF ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0B02 ) && ( c <= 0x0B03 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0B05 ) && ( c <= 0x0B0C ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0B0F ) && ( c <= 0x0B10 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0B13 ) && ( c <= 0x0B28 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0B2A ) && ( c <= 0x0B30 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0B32 ) && ( c <= 0x0B33 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0B36 ) && ( c <= 0x0B39 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0B3D ) && ( c <= 0x0B3E ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0B47 ) && ( c <= 0x0B48 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0B4B ) && ( c <= 0x0B4C ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0B5C ) && ( c <= 0x0B5D ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0B5F ) && ( c <= 0x0B61 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0B66 ) && ( c <= 0x0B70 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0B85 ) && ( c <= 0x0B8A ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0B8E ) && ( c <= 0x0B90 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0B92 ) && ( c <= 0x0B95 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0B99 ) && ( c <= 0x0B9A ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0B9E ) && ( c <= 0x0B9F ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0BA3 ) && ( c <= 0x0BA4 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0BA8 ) && ( c <= 0x0BAA ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0BAE ) && ( c <= 0x0BB5 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0BB7 ) && ( c <= 0x0BB9 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0BBE ) && ( c <= 0x0BBF ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0BC1 ) && ( c <= 0x0BC2 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0BC6 ) && ( c <= 0x0BC8 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0BCA ) && ( c <= 0x0BCC ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0BE7 ) && ( c <= 0x0BF2 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0C01 ) && ( c <= 0x0C03 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0C05 ) && ( c <= 0x0C0C ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0C0E ) && ( c <= 0x0C10 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0C12 ) && ( c <= 0x0C28 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0C2A ) && ( c <= 0x0C33 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0C35 ) && ( c <= 0x0C39 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0C41 ) && ( c <= 0x0C44 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0C60 ) && ( c <= 0x0C61 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0C66 ) && ( c <= 0x0C6F ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0C82 ) && ( c <= 0x0C83 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0C85 ) && ( c <= 0x0C8C ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0C8E ) && ( c <= 0x0C90 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0C92 ) && ( c <= 0x0CA8 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0CAA ) && ( c <= 0x0CB3 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0CB5 ) && ( c <= 0x0CB9 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0CC0 ) && ( c <= 0x0CC4 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0CC7 ) && ( c <= 0x0CC8 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0CCA ) && ( c <= 0x0CCB ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0CD5 ) && ( c <= 0x0CD6 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0CE0 ) && ( c <= 0x0CE1 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0CE6 ) && ( c <= 0x0CEF ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0D02 ) && ( c <= 0x0D03 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0D05 ) && ( c <= 0x0D0C ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0D0E ) && ( c <= 0x0D10 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0D12 ) && ( c <= 0x0D28 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0D2A ) && ( c <= 0x0D39 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0D3E ) && ( c <= 0x0D40 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0D46 ) && ( c <= 0x0D48 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0D4A ) && ( c <= 0x0D4C ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0D60 ) && ( c <= 0x0D61 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0D66 ) && ( c <= 0x0D6F ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0D82 ) && ( c <= 0x0D83 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0D85 ) && ( c <= 0x0D96 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0D9A ) && ( c <= 0x0DB1 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0DB3 ) && ( c <= 0x0DBB ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0DC0 ) && ( c <= 0x0DC6 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0DCF ) && ( c <= 0x0DD1 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0DD8 ) && ( c <= 0x0DDF ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0DF2 ) && ( c <= 0x0DF4 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0E01 ) && ( c <= 0x0E30 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0E32 ) && ( c <= 0x0E33 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0E40 ) && ( c <= 0x0E46 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0E4F ) && ( c <= 0x0E5B ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0E81 ) && ( c <= 0x0E82 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0E87 ) && ( c <= 0x0E88 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0E94 ) && ( c <= 0x0E97 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0E99 ) && ( c <= 0x0E9F ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0EA1 ) && ( c <= 0x0EA3 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0EAA ) && ( c <= 0x0EAB ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0EAD ) && ( c <= 0x0EB0 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0EB2 ) && ( c <= 0x0EB3 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0EC0 ) && ( c <= 0x0EC4 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0ED0 ) && ( c <= 0x0ED9 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0EDC ) && ( c <= 0x0EDD ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0F00 ) && ( c <= 0x0F17 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0F1A ) && ( c <= 0x0F34 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0F3E ) && ( c <= 0x0F47 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0F49 ) && ( c <= 0x0F6A ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0F88 ) && ( c <= 0x0F8B ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0FBE ) && ( c <= 0x0FC5 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x0FC7 ) && ( c <= 0x0FCC ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x1000 ) && ( c <= 0x1021 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x1023 ) && ( c <= 0x1027 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x1029 ) && ( c <= 0x102A ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x1040 ) && ( c <= 0x1057 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x10A0 ) && ( c <= 0x10C5 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x10D0 ) && ( c <= 0x10F8 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x1100 ) && ( c <= 0x1159 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x115F ) && ( c <= 0x11A2 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x11A8 ) && ( c <= 0x11F9 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x1200 ) && ( c <= 0x1206 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x1208 ) && ( c <= 0x1246 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x124A ) && ( c <= 0x124D ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x1250 ) && ( c <= 0x1256 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x125A ) && ( c <= 0x125D ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x1260 ) && ( c <= 0x1286 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x128A ) && ( c <= 0x128D ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x1290 ) && ( c <= 0x12AE ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x12B2 ) && ( c <= 0x12B5 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x12B8 ) && ( c <= 0x12BE ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x12C2 ) && ( c <= 0x12C5 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x12C8 ) && ( c <= 0x12CE ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x12D0 ) && ( c <= 0x12D6 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x12D8 ) && ( c <= 0x12EE ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x12F0 ) && ( c <= 0x130E ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x1312 ) && ( c <= 0x1315 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x1318 ) && ( c <= 0x131E ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x1320 ) && ( c <= 0x1346 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x1348 ) && ( c <= 0x135A ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x1361 ) && ( c <= 0x137C ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x13A0 ) && ( c <= 0x13F4 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x1401 ) && ( c <= 0x1676 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x1681 ) && ( c <= 0x169A ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x16A0 ) && ( c <= 0x16F0 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x1700 ) && ( c <= 0x170C ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x170E ) && ( c <= 0x1711 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x1720 ) && ( c <= 0x1731 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x1735 ) && ( c <= 0x1736 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x1740 ) && ( c <= 0x1751 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x1760 ) && ( c <= 0x176C ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x176E ) && ( c <= 0x1770 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x1780 ) && ( c <= 0x17B6 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x17BE ) && ( c <= 0x17C5 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x17C7 ) && ( c <= 0x17C8 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x17D4 ) && ( c <= 0x17DA ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x17E0 ) && ( c <= 0x17E9 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x1810 ) && ( c <= 0x1819 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x1820 ) && ( c <= 0x1877 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x1880 ) && ( c <= 0x18A8 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x1E00 ) && ( c <= 0x1E9B ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x1EA0 ) && ( c <= 0x1EF9 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x1F00 ) && ( c <= 0x1F15 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x1F18 ) && ( c <= 0x1F1D ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x1F20 ) && ( c <= 0x1F45 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x1F48 ) && ( c <= 0x1F4D ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x1F50 ) && ( c <= 0x1F57 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x1F5F ) && ( c <= 0x1F7D ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x1F80 ) && ( c <= 0x1FB4 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x1FB6 ) && ( c <= 0x1FBC ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x1FC2 ) && ( c <= 0x1FC4 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x1FC6 ) && ( c <= 0x1FCC ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x1FD0 ) && ( c <= 0x1FD3 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x1FD6 ) && ( c <= 0x1FDB ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x1FE0 ) && ( c <= 0x1FEC ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x1FF2 ) && ( c <= 0x1FF4 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x1FF6 ) && ( c <= 0x1FFC ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x210A ) && ( c <= 0x2113 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x2119 ) && ( c <= 0x211D ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x212A ) && ( c <= 0x212D ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x212F ) && ( c <= 0x2131 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x2133 ) && ( c <= 0x2139 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x213D ) && ( c <= 0x213F ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x2145 ) && ( c <= 0x2149 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x2160 ) && ( c <= 0x2183 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x2336 ) && ( c <= 0x237A ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x249C ) && ( c <= 0x24E9 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x3005 ) && ( c <= 0x3007 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x3021 ) && ( c <= 0x3029 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x3031 ) && ( c <= 0x3035 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x3038 ) && ( c <= 0x303C ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x3041 ) && ( c <= 0x3096 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x309D ) && ( c <= 0x309F ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x30A1 ) && ( c <= 0x30FA ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x30FC ) && ( c <= 0x30FF ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x3105 ) && ( c <= 0x312C ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x3131 ) && ( c <= 0x318E ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x3190 ) && ( c <= 0x31B7 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x31F0 ) && ( c <= 0x321C ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x3220 ) && ( c <= 0x3243 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x3260 ) && ( c <= 0x327B ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x327F ) && ( c <= 0x32B0 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x32C0 ) && ( c <= 0x32CB ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x32D0 ) && ( c <= 0x32FE ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x3300 ) && ( c <= 0x3376 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x337B ) && ( c <= 0x33DD ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x33E0 ) && ( c <= 0x33FE ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x3400 ) && ( c <= 0x4DB5 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0x4E00 ) && ( c <= 0x9FA5 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0xA000 ) && ( c <= 0xA48C ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0xAC00 ) && ( c <= 0xD7A3 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0xD800 ) && ( c <= 0xFA2D ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0xFA30 ) && ( c <= 0xFA6A ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0xFB00 ) && ( c <= 0xFB06 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0xFB13 ) && ( c <= 0xFB17 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0xFF21 ) && ( c <= 0xFF3A ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0xFF41 ) && ( c <= 0xFF5A ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0xFF66 ) && ( c <= 0xFFBE ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0xFFC2 ) && ( c <= 0xFFC7 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0xFFCA ) && ( c <= 0xFFCF ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0xFFD2 ) && ( c <= 0xFFD7 ) )
+ {
+ continue;
+ }
+
+ if ( ( c >= 0xFFDA ) && ( c <= 0xFFDC ) )
+ {
+ continue;
+ }
+
+ // Now, fo every other chars, add them to the buffer.
+ sb.append( c );
+ }
+
+ return sb;
+ }
+
+ /**
+ *
+ * Remove all insignifiant chars in a Telephone Number :
+ * Hyphen and spaces.
+ *
+ * For instance, the following telephone number :
+ * "+ (33) 1-123--456 789"
+ * will be trasnformed to :
+ * "+(33)1123456789"
+ *
+ * @param str The telephone number
+ * @return The modified telephone number
+ */
+ public static String insignifiantCharTelephoneNumber( String str )
+ {
+ StringBuilder sb = new StringBuilder();
+ boolean isSpaceOrHyphen = false;
+ char soh = '\0';
+
+ for ( char c:str.toCharArray() )
+ {
+ switch ( c )
+ {
+ case 0x0020 : // SPACE
+ case 0x002D : // HYPHEN-MINUS
+ case 0x058A : // ARMENIAN HYPHEN
+ case 0x2010 : // HYPHEN
+ case 0x2011 : // NON-BREAKING HYPHEN
+ case 0x2212 : // MINUS SIGN
+ case 0xFE63 : // SMALL HYPHEN-MINUS
+ case 0xFF0D : // FULLWIDTH HYPHEN-MINUS
+ soh = c;
+ break;
+
+ default :
+ if ( isSpaceOrHyphen && isCombiningMark( c ) )
+ {
+ sb.append( soh );
+ isSpaceOrHyphen = false;
+ }
+
+ sb.append( c );
+ }
+ }
+
+ return sb.toString();
+ }
+
+ /**
+ *
+ * Remove all insignifiant spaces in a numeric string. For
+ * instance, the following numeric string :
+ * " 123 456 789 "
+ * will be transformed to :
+ * "123456789"
+ *
+ * @param str The numeric string
+ * @return The modified numeric String
+ */
+ public static String insignifiantCharNumericString( String str )
+ {
+ StringBuilder sb = new StringBuilder();
+ boolean isSpace = false;
+
+ for ( char c:str.toCharArray() )
+ {
+ if ( c != 0x20 )
+ {
+ if ( isSpace && isCombiningMark( c ) )
+ {
+ sb.append( ' ' );
+ isSpace = false;
+ }
+
+ sb.append( c );
+ }
+ else
+ {
+ isSpace = true;
+ }
+ }
+
+ return sb.toString();
+ }
+
+ /**
+ *
+ * TODO State.
+ *
+ * @author <a href="mailto:dev@directory.apache.org">Apache Directory Project</a>
+ * @version $Rev$, $Date$
+ */
+ private enum State
+ {
+ START,
+ FIRST_SPACE,
+ ONLY_SPACES,
+ CHAR,
+ COMBINING,
+ SPACE
+ };
+
+ /**
+ *
+ * Remove all insignifiant spaces in a string. We use a state
+ * engine with 4 states, 4 endings, 3 startings.
+ *
+ * @param str The string
+ * @return The modified String
+ */
+ public static String insignifiantSpacesString( String str )
+ {
+ StringBuilder sb = new StringBuilder();
+
+ if ( StringTools.isEmpty( str ) )
+ {
+ // Special case : an empty strings is replaced by 2 spaces
+ return " ";
+ }
+
+ // Initialise the starting state
+ State state = State.START;
+
+ for ( char c:str.toCharArray() )
+ {
+ switch ( state )
+ {
+ case START :
+ if ( c == ' ' )
+ {
+ state = State.FIRST_SPACE;
+ }
+ else if ( isCombiningMark( c ) )
+ {
+ state = State.COMBINING;
+ }
+ else
+ {
+ state = State.CHAR;
+ }
+
+ sb.append( c );
+ break;
+
+ case FIRST_SPACE :
+ if ( c == ' ' )
+ {
+ state = State.ONLY_SPACES;
+ }
+ else if ( isCombiningMark( c ) )
+ {
+ state = State.COMBINING;
+ sb.append( ' ' );
+ sb.append( c );
+ }
+ else
+ {
+ state = State.CHAR;
+ sb.append( ' ' );
+ sb.append( c );
+ }
+
+ break;
+
+ case ONLY_SPACES :
+ if ( isCombiningMark( c ) )
+ {
+ state = State.COMBINING;
+ sb.append( ' ' );
+ sb.append( c );
+ }
+ else if ( c != ' ' )
+ {
+ state = State.CHAR;
+ sb.append( ' ' );
+ sb.append( c );
+ }
+
+ break;
+
+ case CHAR :
+ if ( c == ' ' )
+ {
+ state = State.FIRST_SPACE;
+ }
+ else if ( isCombiningMark( c ) )
+ {
+ state = State.COMBINING;
+ }
+
+ sb.append( c );
+ break;
+
+ case COMBINING :
+ if ( c == ' ' )
+ {
+ state = State.FIRST_SPACE;
+ }
+ else if ( !isCombiningMark( c ) )
+ {
+ state = State.CHAR;
+ sb.append( c );
+ }
+ else
+ {
+ sb.append( c );
+ }
+
+ break;
+
+ case SPACE :
+ if ( isCombiningMark( c ) )
+ {
+ state = State.COMBINING;
+ sb.append( ' ' );
+ sb.append( c );
+ }
+ else if ( c != ' ' )
+ {
+ state = State.CHAR;
+ sb.append( ' ' );
+ sb.append( c );
+ }
+
+ break;
+ }
+ }
+
+ // Last, add final space if needed
+ switch ( state )
+ {
+ case FIRST_SPACE :
+ case COMBINING :
+ case CHAR :
+ case ONLY_SPACES :
+ sb.append( ' ' );
+ break;
+
+ default :
+ break;
+
+ }
+ return sb.toString();
}
}