You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@directory.apache.org by el...@apache.org on 2006/12/27 18:25:48 UTC
svn commit: r490543 -
/directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/schema/PrepareString.java
Author: elecharny
Date: Wed Dec 27 09:25:47 2006
New Revision: 490543
URL: http://svn.apache.org/viewvc?view=rev&rev=490543
Log:
Created a second version of all the methods, o handle Sring directly
Modified:
directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/schema/PrepareString.java
Modified: directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/schema/PrepareString.java
URL: http://svn.apache.org/viewvc/directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/schema/PrepareString.java?view=diff&rev=490543&r1=490542&r2=490543
==============================================================================
--- directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/schema/PrepareString.java (original)
+++ directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/schema/PrepareString.java Wed Dec 27 09:25:47 2006
@@ -22,7 +22,6 @@
import java.io.IOException;
-import org.apache.directory.shared.ldap.util.StringTools;
import org.apache.directory.shared.ldap.util.unicode.InvalidCharacterException;
import org.apache.directory.shared.ldap.util.unicode.Normalizer;
@@ -108,11 +107,30 @@
* @param str The string to transform
* @return The transformed string
*/
- public static StringBuilder map( String str )
+ public static String map( String str )
{
- StringBuilder sb = new StringBuilder( str.length() );
+ return ( str == null ? null : map( str.toCharArray() ).toString() );
+ }
+
+ /**
+ * Execute the mapping step of the string preparation :
+ * - suppress useless chars
+ * - transform to spaces
+ * - lowercase
+ *
+ * @param array The char array to transform
+ * @return The transformed StringBuilder
+ */
+ public static StringBuilder map( char[] array )
+ {
+ if ( array == null )
+ {
+ return null;
+ }
+
+ StringBuilder sb = new StringBuilder( array.length );
- for ( char c:str.toCharArray() )
+ for ( char c:array )
{
// First, eliminate surrogates, and replace them by FFFD char
if ( ( c >= 0xD800 ) && ( c <= 0xDFFF ) )
@@ -163,107 +181,32 @@
break;
case 0x0041 :
- sb.append( (char)0x0061 );
- break;
-
case 0x0042 :
- sb.append( (char)0x0062 );
- break;
-
case 0x0043 :
- sb.append( (char)0x0063 );
- break;
-
case 0x0044 :
- sb.append( (char)0x0064 );
- break;
-
case 0x0045 :
- sb.append( (char)0x0065 );
- break;
-
case 0x0046 :
- sb.append( (char)0x0066 );
- break;
-
case 0x0047 :
- sb.append( (char)0x0067 );
- break;
-
case 0x0048 :
- sb.append( (char)0x0068 );
- break;
-
case 0x0049 :
- sb.append( (char)0x0069 );
- break;
-
case 0x004A :
- sb.append( (char)0x006A );
- break;
-
case 0x004B :
- sb.append( (char)0x006B );
- break;
-
case 0x004C :
- sb.append( (char)0x006C );
- break;
-
case 0x004D :
- sb.append( (char)0x006D );
- break;
-
case 0x004E :
- sb.append( (char)0x006E );
- break;
-
case 0x004F :
- sb.append( (char)0x006F );
- break;
-
case 0x0050 :
- sb.append( (char)0x0070 );
- break;
-
case 0x0051 :
- sb.append( (char)0x0071 );
- break;
-
case 0x0052 :
- sb.append( (char)0x0072 );
- break;
-
case 0x0053 :
- sb.append( (char)0x0073 );
- break;
-
case 0x0054 :
- sb.append( (char)0x0074 );
- break;
-
case 0x0055 :
- sb.append( (char)0x0075 );
- break;
-
case 0x0056 :
- sb.append( (char)0x0076 );
- break;
-
case 0x0057 :
- sb.append( (char)0x0077 );
- break;
-
case 0x0058 :
- sb.append( (char)0x0078 );
- break;
-
case 0x0059 :
- sb.append( (char)0x0079 );
- break;
-
case 0x005A :
- sb.append( (char)0x007A );
+ sb.append( (char)( c | 0x0020 ) );
break;
case 0x007F:
@@ -319,123 +262,36 @@
break;
case 0x00C0 :
- sb.append( (char)0x00E0 );
- break;
-
case 0x00C1 :
- sb.append( (char)0x00E1 );
- break;
-
case 0x00C2 :
- sb.append( (char)0x00E2 );
- break;
-
case 0x00C3 :
- sb.append( (char)0x00E3 );
- break;
-
case 0x00C4 :
- sb.append( (char)0x00E4 );
- break;
-
case 0x00C5 :
- sb.append( (char)0x00E5 );
- break;
-
case 0x00C6 :
- sb.append( (char)0x00E6 );
- break;
-
case 0x00C7 :
- sb.append( (char)0x00E7 );
- break;
-
case 0x00C8 :
- sb.append( (char)0x00E8 );
- break;
-
case 0x00C9 :
- sb.append( (char)0x00E9 );
- break;
-
case 0x00CA :
- sb.append( (char)0x00EA );
- break;
-
case 0x00CB :
- sb.append( (char)0x00EB );
- break;
-
case 0x00CC :
- sb.append( (char)0x00EC );
- break;
-
case 0x00CD :
- sb.append( (char)0x00ED );
- break;
-
case 0x00CE :
- sb.append( (char)0x00EE );
- break;
-
case 0x00CF :
- sb.append( (char)0x00EF );
- break;
-
case 0x00D0 :
- sb.append( (char)0x00F0 );
- break;
-
case 0x00D1 :
- sb.append( (char)0x00F1 );
- break;
-
case 0x00D2 :
- sb.append( (char)0x00F2 );
- break;
-
case 0x00D3 :
- sb.append( (char)0x00F3 );
- break;
-
case 0x00D4 :
- sb.append( (char)0x00F4 );
- break;
-
case 0x00D5 :
- sb.append( (char)0x00F5 );
- break;
-
case 0x00D6 :
- sb.append( (char)0x00F6 );
- break;
-
case 0x00D8 :
- sb.append( (char)0x00F8 );
- break;
-
case 0x00D9 :
- sb.append( (char)0x00F9 );
- break;
-
case 0x00DA :
- sb.append( (char)0x00FA );
- break;
-
case 0x00DB :
- sb.append( (char)0x00FB );
- break;
-
case 0x00DC :
- sb.append( (char)0x00FC );
- break;
-
case 0x00DD :
- sb.append( (char)0x00FD );
- break;
-
case 0x00DE :
- sb.append( (char)0x00FE );
+ sb.append( (char)( c | 0x0020 ) );
break;
case 0x00DF :
@@ -4067,11 +3923,34 @@
* - Table C.8 of RFC 3454
* - character U-FFFD
*
- * @param array That char array to analyze
+ * @param str The String to analyze
+ * @throws InvalidCharacterException If any character is prohibited
+ */
+ public static void prohibit( String str ) throws InvalidCharacterException
+ {
+ prohibit( str.toCharArray() );
+ }
+
+ /**
+ *
+ * Prohibit characters described in RFC 4518 :
+ * - Table A.1 of RFC 3454
+ * - Table C.3 of RFC 3454
+ * - Table C.4 of RFC 3454
+ * - Table C.5 of RFC 3454
+ * - Table C.8 of RFC 3454
+ * - character U-FFFD
+ *
+ * @param array The char array to analyze
* @throws InvalidCharacterException If any character is prohibited
*/
public static void prohibit( char[] array ) throws InvalidCharacterException
{
+ if ( array == null )
+ {
+ return;
+ }
+
for ( char c:array )
{
// RFC 3454, Table A.1
@@ -4639,13 +4518,30 @@
/**
*
- * TODO bidi.
+ * Remove all bidirectionnal chars
*
- * @param array
- * @return
+ * @param str The string where bidi chars are to be removed
+ * @return The cleaned string
+ */
+ public static String bidi( String str )
+ {
+ return bidi( str.toCharArray() ).toString();
+ }
+
+ /**
+ *
+ * Remove all bidirectionnal chars
+ *
+ * @param array The char array where bidi chars are to be removed
+ * @return The cleaned StringBuilder
*/
public static StringBuilder bidi( char[] array )
{
+ if ( array == null )
+ {
+ return null;
+ }
+
StringBuilder sb = new StringBuilder( array.length );
for ( char c:array )
@@ -6170,11 +6066,34 @@
*/
public static String insignifiantCharTelephoneNumber( String str )
{
- StringBuilder sb = new StringBuilder();
+ return insignifiantCharTelephoneNumber( str.toCharArray() ).toString();
+ }
+
+ /**
+ *
+ * Remove all insignifiant chars in a Telephone Number :
+ * Hyphen and spaces.
+ *
+ * For instance, the following telephone number :
+ * "+ (33) 1-123--456 789"
+ * will be trasnformed to :
+ * "+(33)1123456789"
+ *
+ * @param array The telephone number char array
+ * @return The modified telephone number StringBuilder
+ */
+ public static StringBuilder insignifiantCharTelephoneNumber( char[] array )
+ {
+ if ( array == null )
+ {
+ return null;
+ }
+
+ StringBuilder sb = new StringBuilder( array.length );
boolean isSpaceOrHyphen = false;
char soh = '\0';
- for ( char c:str.toCharArray() )
+ for ( char c:array )
{
switch ( c )
{
@@ -6200,7 +6119,7 @@
}
}
- return sb.toString();
+ return sb;
}
/**
@@ -6216,16 +6135,37 @@
*/
public static String insignifiantCharNumericString( String str )
{
- StringBuilder sb = new StringBuilder();
+ return ( str == null ? null : insignifiantCharNumericString( str.toCharArray() ).toString() );
+ }
+
+ /**
+ *
+ * Remove all insignifiant spaces in a numeric string. For
+ * instance, the following numeric string :
+ * " 123 456 789 "
+ * will be transformed to :
+ * "123456789"
+ *
+ * @param array The numeric char array
+ * @return The modified numeric StringBuilder
+ */
+ public static StringBuilder insignifiantCharNumericString( char[] array )
+ {
+ if ( array == null )
+ {
+ return null;
+ }
+
+ StringBuilder sb = new StringBuilder( array.length );
boolean isSpace = false;
- for ( char c:str.toCharArray() )
+ for ( char c:array )
{
if ( c != 0x20 )
{
if ( isSpace && isCombiningMark( c ) )
{
- sb.append( ' ' );
+ sb.append( ' ' );
isSpace = false;
}
@@ -6237,93 +6177,97 @@
}
}
- return sb.toString();
+ return sb;
}
/**
*
- * TODO State.
- *
- * @author <a href="mailto:dev@directory.apache.org">Apache Directory Project</a>
- * @version $Rev$, $Date$
+ * The 6 possible states for the insignifiant state machine
*/
private enum State
{
START,
- FIRST_SPACE,
- ONLY_SPACES,
+ START_SPACE,
+ INNER_START_SPACE,
CHAR,
COMBINING,
- SPACE
+ INNER_SPACE
};
/**
*
- * Remove all insignifiant spaces in a string. We use a state
- * engine with 4 states, 4 endings, 3 startings.
+ * Remove all insignifiant spaces in a string.
+ *
+ * This method use a finite state machine to parse
+ * the text.
*
* @param str The string
* @return The modified String
*/
- public static String insignifiantSpacesString( String str )
+ public static String insignifiantSpacesString( String str ) throws InvalidCharacterException
{
- StringBuilder sb = new StringBuilder();
-
- if ( StringTools.isEmpty( str ) )
+ if ( str == null )
{
- // Special case : an empty strings is replaced by 2 spaces
return " ";
}
+ else
+ {
+ return insignifiantSpacesString( str.toCharArray() ).toString();
+ }
+ }
+
+ /**
+ *
+ * Remove all insignifiant spaces in a string.
+ *
+ * This method use a finite state machine to parse
+ * the text.
+ *
+ * @param array The char array representing the string
+ * @return The modified StringBuilder
+ */
+ public static StringBuilder insignifiantSpacesString( char[] array ) throws InvalidCharacterException
+ {
+ if ( ( array == null ) || ( array.length == 0 ) )
+ {
+ // Special case : an empty strings is replaced by 2 spaces
+ return new StringBuilder( " " );
+ }
+
+ StringBuilder sb = new StringBuilder( array.length );
// Initialise the starting state
State state = State.START;
- for ( char c:str.toCharArray() )
+ for ( char c:array )
{
switch ( state )
{
case START :
if ( c == ' ' )
{
- state = State.FIRST_SPACE;
+ state = State.START_SPACE;
}
else if ( isCombiningMark( c ) )
{
- state = State.COMBINING;
+ // The first char can't be a combining char
+ throw new InvalidCharacterException( c );
}
else
{
- state = State.CHAR;
- }
-
- sb.append( c );
- break;
-
- case FIRST_SPACE :
- if ( c == ' ' )
- {
- state = State.ONLY_SPACES;
- }
- else if ( isCombiningMark( c ) )
- {
- state = State.COMBINING;
sb.append( ' ' );
sb.append( c );
- }
- else
- {
state = State.CHAR;
- sb.append( ' ' );
- sb.append( c );
}
break;
-
- case ONLY_SPACES :
+
+ case START_SPACE :
if ( isCombiningMark( c ) )
{
state = State.COMBINING;
sb.append( ' ' );
+ sb.append( ' ' );
sb.append( c );
}
else if ( c != ' ' )
@@ -6332,26 +6276,30 @@
sb.append( ' ' );
sb.append( c );
}
-
- break;
+ break;
+
case CHAR :
if ( c == ' ' )
{
- state = State.FIRST_SPACE;
+ state = State.INNER_START_SPACE;
}
else if ( isCombiningMark( c ) )
{
state = State.COMBINING;
+ sb.append( c );
+ }
+ else
+ {
+ sb.append( c );
}
- sb.append( c );
break;
case COMBINING :
if ( c == ' ' )
{
- state = State.FIRST_SPACE;
+ state = State.INNER_START_SPACE;
}
else if ( !isCombiningMark( c ) )
{
@@ -6365,11 +6313,32 @@
break;
- case SPACE :
+ case INNER_START_SPACE :
+ if ( isCombiningMark( c ) )
+ {
+ state = State.COMBINING;
+ sb.append( ' ' );
+ sb.append( c );
+ }
+ else if ( c == ' ' )
+ {
+ state = State.INNER_SPACE;
+ }
+ else
+ {
+ state = State.CHAR;
+ sb.append( ' ' );
+ sb.append( c );
+ }
+
+ break;
+
+ case INNER_SPACE :
if ( isCombiningMark( c ) )
{
state = State.COMBINING;
sb.append( ' ' );
+ sb.append( ' ' );
sb.append( c );
}
else if ( c != ' ' )
@@ -6384,19 +6353,13 @@
}
// Last, add final space if needed
- switch ( state )
+ sb.append( ' ' );
+
+ if ( state == State.START_SPACE )
{
- case FIRST_SPACE :
- case COMBINING :
- case CHAR :
- case ONLY_SPACES :
- sb.append( ' ' );
- break;
-
- default :
- break;
-
+ sb.append( ' ' );
}
- return sb.toString();
+
+ return sb;
}
}