You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@directory.apache.org by el...@apache.org on 2007/01/02 00:12:32 UTC
svn commit: r491684 -
/directory/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/schema/PrepareString.java
Author: elecharny
Date: Mon Jan 1 15:12:32 2007
New Revision: 491684
URL: http://svn.apache.org/viewvc?view=rev&rev=491684
Log:
Modified the code to avoid costly iterations, and to add some shortcuts. It now runs twice faster.
Modified:
directory/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/schema/PrepareString.java
Modified: directory/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/schema/PrepareString.java
URL: http://svn.apache.org/viewvc/directory/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/schema/PrepareString.java?view=diff&rev=491684&r1=491683&r2=491684
==============================================================================
--- directory/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/schema/PrepareString.java (original)
+++ directory/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/schema/PrepareString.java Mon Jan 1 15:12:32 2007
@@ -91,6 +91,11 @@
*/
private static boolean isCombiningMark( char c )
{
+ if ( c < COMBINING_MARKS[0][0] )
+ {
+ return false;
+ }
+
for ( char[] interval:COMBINING_MARKS )
{
if ( ( c >= interval[0] ) && ( c <= interval[1] ) )
@@ -124,15 +129,15 @@
switch ( type )
{
case NUMERIC_STRING :
- return insignifiantCharNumericString( bidi( prohibit( map( str ) ) ) );
+ return insignifiantCharNumericString( map( str ) );
case TELEPHONE_NUMBER :
- return insignifiantCharTelephoneNumber( bidi( prohibit( map( str ) ) ) );
+ return insignifiantCharTelephoneNumber( map( str ) );
case CASE_EXACT :
case CASE_EXACT_IA5 :
case DIRECTORY_STRING :
- return insignifiantSpacesString( bidi( prohibit( map( str ) ) ) );
+ return insignifiantSpacesString( map( str ) );
case CASE_IGNORE_IA5 :
case CASE_IGNORE_LIST :
@@ -182,13 +187,6 @@
for ( char c:array )
{
- // First, eliminate surrogates, and replace them by FFFD char
- if ( ( c >= 0xD800 ) && ( c <= 0xDFFF ) )
- {
- sb.append( (char)0xFFFD );
- continue;
- }
-
switch ( c )
{
case 0x0000:
@@ -3956,6 +3954,13 @@
break;
default :
+ // First, eliminate surrogates, and replace them by FFFD char
+ if ( ( c >= 0xD800 ) && ( c <= 0xDFFF ) )
+ {
+ sb.append( (char)0xFFFD );
+ continue;
+ }
+
sb.append( c );
}
}
@@ -4558,6 +4563,591 @@
/**
*
+ * Prohibit characters described in RFC 4518 :
+ * - Table A.1 of RFC 3454
+ * - Table C.3 of RFC 3454
+ * - Table C.4 of RFC 3454
+ * - Table C.5 of RFC 3454
+ * - Table C.8 of RFC 3454
+ * - character U-FFFD
+ *
+ * @param str The String to analyze
+ * @throws InvalidCharacterException If any character is prohibited
+ */
+ private static void checkProhibited( char c ) throws InvalidCharacterException
+ {
+ // Shortcut chars above 0x0221
+ if ( c < 0x221 )
+ {
+ return;
+ }
+
+ // RFC 3454, Table A.1
+ switch ( c )
+ {
+ case 0x0221 :
+ case 0x038B :
+ case 0x038D :
+ case 0x03A2 :
+ case 0x03CF :
+ case 0x0487 :
+ case 0x04CF :
+ case 0x0560 :
+ case 0x0588 :
+ case 0x05A2 :
+ case 0x05BA :
+ case 0x0620 :
+ case 0x06FF :
+ case 0x070E :
+ case 0x0904 :
+ case 0x0984 :
+ case 0x09A9 :
+ case 0x09B1 :
+ case 0x09BD :
+ case 0x09DE :
+ case 0x0A29 :
+ case 0x0A31 :
+ case 0x0A34 :
+ case 0x0A37 :
+ case 0x0A3D :
+ case 0x0A5D :
+ case 0x0A84 :
+ case 0x0A8C :
+ case 0x0A8E :
+ case 0x0A92 :
+ case 0x0AA9 :
+ case 0x0AB1 :
+ case 0x0AB4 :
+ case 0x0AC6 :
+ case 0x0ACA :
+ case 0x0B04 :
+ case 0x0B29 :
+ case 0x0B31 :
+ case 0x0B5E :
+ case 0x0B84 :
+ case 0x0B91 :
+ case 0x0B9B :
+ case 0x0B9D :
+ case 0x0BB6 :
+ case 0x0BC9 :
+ case 0x0C04 :
+ case 0x0C0D :
+ case 0x0C11 :
+ case 0x0C29 :
+ case 0x0C34 :
+ case 0x0C45 :
+ case 0x0C49 :
+ case 0x0C84 :
+ case 0x0C8D :
+ case 0x0C91 :
+ case 0x0CA9 :
+ case 0x0CB4 :
+ case 0x0CC5 :
+ case 0x0CC9 :
+ case 0x0CDF :
+ case 0x0D04 :
+ case 0x0D0D :
+ case 0x0D11 :
+ case 0x0D29 :
+ case 0x0D49 :
+ case 0x0D84 :
+ case 0x0DB2 :
+ case 0x0DBC :
+ case 0x0DD5 :
+ case 0x0DD7 :
+ case 0x0E83 :
+ case 0x0E89 :
+ case 0x0E98 :
+ case 0x0EA0 :
+ case 0x0EA4 :
+ case 0x0EA6 :
+ case 0x0EAC :
+ case 0x0EBA :
+ case 0x0EC5 :
+ case 0x0EC7 :
+ case 0x0F48 :
+ case 0x0F98 :
+ case 0x0FBD :
+ case 0x1022 :
+ case 0x1028 :
+ case 0x102B :
+ case 0x1207 :
+ case 0x1247 :
+ case 0x1249 :
+ case 0x1257 :
+ case 0x1259 :
+ case 0x1287 :
+ case 0x1289 :
+ case 0x12AF :
+ case 0x12B1 :
+ case 0x12BF :
+ case 0x12C1 :
+ case 0x12CF :
+ case 0x12D7 :
+ case 0x12EF :
+ case 0x130F :
+ case 0x1311 :
+ case 0x131F :
+ case 0x1347 :
+ case 0x170D :
+ case 0x176D :
+ case 0x1771 :
+ case 0x180F :
+ case 0x1F58 :
+ case 0x1F5A :
+ case 0x1F5C :
+ case 0x1F5E :
+ case 0x1FB5 :
+ case 0x1FC5 :
+ case 0x1FDC :
+ case 0x1FF5 :
+ case 0x1FFF :
+ case 0x24FF :
+ case 0x2618 :
+ case 0x2705 :
+ case 0x2728 :
+ case 0x274C :
+ case 0x274E :
+ case 0x2757 :
+ case 0x27B0 :
+ case 0x2E9A :
+ case 0x3040 :
+ case 0x318F :
+ case 0x32FF :
+ case 0x33FF :
+ case 0xFB37 :
+ case 0xFB3D :
+ case 0xFB3F :
+ case 0xFB42 :
+ case 0xFB45 :
+ case 0xFE53 :
+ case 0xFE67 :
+ case 0xFE75 :
+ case 0xFF00 :
+ case 0xFFE7 :
+ throw new InvalidCharacterException( c );
+ }
+
+ // RFC 3454, Table A.1, intervals
+ if ( ( c >= 0x0234 ) && ( c <= 0x024F ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x02AE ) && ( c <= 0x02AF ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x02EF ) && ( c <= 0x02FF ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x0350 ) && ( c <= 0x035F ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x0370 ) && ( c <= 0x0373 ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x0376 ) && ( c <= 0x0379 ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x037B ) && ( c <= 0x037D ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x037F ) && ( c <= 0x0383 ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x03F7 ) && ( c <= 0x03FF ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x04F6 ) && ( c <= 0x04F7 ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x04FA ) && ( c <= 0x04FF ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x0510 ) && ( c <= 0x0530 ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x0557 ) && ( c <= 0x0558 ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x058B ) && ( c <= 0x0590 ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x05C5 ) && ( c <= 0x05CF ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x05EB ) && ( c <= 0x05EF ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x05F5 ) && ( c <= 0x060B ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x060D ) && ( c <= 0x061A ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x061C ) && ( c <= 0x061E ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x063B ) && ( c <= 0x063F ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x0656 ) && ( c <= 0x065F ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x06EE ) && ( c <= 0x06EF ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x072D ) && ( c <= 0x072F ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x074B ) && ( c <= 0x077F ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x07B2 ) && ( c <= 0x0900 ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x093A ) && ( c <= 0x093B ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x094E ) && ( c <= 0x094F ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x0955 ) && ( c <= 0x0957 ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x0971 ) && ( c <= 0x0980 ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x098D ) && ( c <= 0x098E ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x0991 ) && ( c <= 0x0992 ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x09B3 ) && ( c <= 0x09B5 ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x09BA ) && ( c <= 0x09BB ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x09C5 ) && ( c <= 0x09C6 ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x09C9 ) && ( c <= 0x09CA ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x09CE ) && ( c <= 0x09D6 ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x09D8 ) && ( c <= 0x09DB ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x09E4 ) && ( c <= 0x09E5 ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x09FB ) && ( c <= 0x0A01 ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x0A03 ) && ( c <= 0x0A04 ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x0A0B ) && ( c <= 0x0A0E ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x0A11 ) && ( c <= 0x0A12 ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x0A3A ) && ( c <= 0x0A3B ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x0A43 ) && ( c <= 0x0A46 ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x0A49 ) && ( c <= 0x0A4A ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x0A4E ) && ( c <= 0x0A58 ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x0A5F ) && ( c <= 0x0A65 ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x0A75 ) && ( c <= 0x0A80 ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x0ABA ) && ( c <= 0x0ABB ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x0ACE ) && ( c <= 0x0ACF ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x0AD1 ) && ( c <= 0x0ADF ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x0AE1 ) && ( c <= 0x0AE5 ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x0AF0 ) && ( c <= 0x0B00 ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x0B0D ) && ( c <= 0x0B0E ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x0B11 ) && ( c <= 0x0B12 ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x0B34 ) && ( c <= 0x0B35 ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x0B3A ) && ( c <= 0x0B3B ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x0B44 ) && ( c <= 0x0B46 ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x0B49 ) && ( c <= 0x0B4A ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x0B4E ) && ( c <= 0x0B55 ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x0B58 ) && ( c <= 0x0B5B ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x0B62 ) && ( c <= 0x0B65 ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x0B71 ) && ( c <= 0x0B81 ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x0B8B ) && ( c <= 0x0B8D ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x0B96 ) && ( c <= 0x0B98 ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x0BA0 ) && ( c <= 0x0BA2 ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x0BA5 ) && ( c <= 0x0BA7 ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x0BAB ) && ( c <= 0x0BAD ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x0BBA ) && ( c <= 0x0BBD ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x0BC3 ) && ( c <= 0x0BC5 ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x0BCE ) && ( c <= 0x0BD6 ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x0BD8 ) && ( c <= 0x0BE6 ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c >= 0x0BF3 ) && ( c <= 0x0C00 ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ // RFC 3454, Table C.3
+ if ( ( c >= 0xE000 ) && ( c <= 0xF8FF ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ // RFC 3454, Table C.4
+ if ( ( c >= 0xFDD0 ) && ( c <= 0xFDEF ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( ( c == 0xFFFE ) || ( c == 0xFFFF ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ // RFC 3454, Table C.5 (Surrogates)
+ if ( ( c >= 0xD800 ) && ( c <= 0xDFFF ) )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ // RFC 3454, Table C.8
+ switch ( c)
+ {
+ case 0x0340 : // COMBINING GRAVE TONE MARK
+ case 0x0341 : // COMBINING ACUTE TONE MARK
+ case 0x200E : // LEFT-TO-RIGHT MARK
+ case 0x200F : // RIGHT-TO-LEFT MARK
+ case 0x202A : // LEFT-TO-RIGHT EMBEDDING
+ case 0x202B : // RIGHT-TO-LEFT EMBEDDING
+ case 0x202C : // POP DIRECTIONAL FORMATTING
+ case 0x202D : // LEFT-TO-RIGHT OVERRIDE
+ case 0x202E : // RIGHT-TO-LEFT OVERRIDE
+ case 0x206A : // INHIBIT SYMMETRIC SWAPPING
+ case 0x206B : // ACTIVATE SYMMETRIC SWAPPING
+ case 0x206C : // INHIBIT ARABIC FORM SHAPING
+ case 0x206D : // ACTIVATE ARABIC FORM SHAPING
+ case 0x206E : // NATIONAL DIGIT SHAPES
+ case 0x206F : // NOMINAL DIGIT SHAPES
+ throw new InvalidCharacterException( c );
+ }
+
+ if ( c == 0xFFFD )
+ {
+ throw new InvalidCharacterException( c );
+ }
+
+ return;
+ }
+
+ /**
+ *
* Remove all bidirectionnal chars. This is not really clear in RFC 4518
* what we should do with bidi chars :
* "Bidirectional characters are ignored."
@@ -4789,6 +5379,8 @@
for ( char c:array )
{
+ checkProhibited( c );
+
switch ( state )
{
case START :