You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@directory.apache.org by el...@apache.org on 2006/12/27 18:25:48 UTC

svn commit: r490543 - /directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/schema/PrepareString.java

Author: elecharny
Date: Wed Dec 27 09:25:47 2006
New Revision: 490543

URL: http://svn.apache.org/viewvc?view=rev&rev=490543
Log:
Created a second version of all the methods, o handle Sring directly

Modified:
    directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/schema/PrepareString.java

Modified: directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/schema/PrepareString.java
URL: http://svn.apache.org/viewvc/directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/schema/PrepareString.java?view=diff&rev=490543&r1=490542&r2=490543
==============================================================================
--- directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/schema/PrepareString.java (original)
+++ directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/schema/PrepareString.java Wed Dec 27 09:25:47 2006
@@ -22,7 +22,6 @@
 
 import java.io.IOException;
 
-import org.apache.directory.shared.ldap.util.StringTools;
 import org.apache.directory.shared.ldap.util.unicode.InvalidCharacterException;
 import org.apache.directory.shared.ldap.util.unicode.Normalizer;
 
@@ -108,11 +107,30 @@
      * @param str The string to transform
      * @return The transformed string
      */
-    public static StringBuilder map( String str )
+    public static String map( String str )
     {
-        StringBuilder sb = new StringBuilder( str.length() );
+        return ( str == null ? null : map( str.toCharArray() ).toString() );
+    }
+    
+    /**
+     * Execute the mapping step of the string preparation :
+     * - suppress useless chars
+     * - transform to spaces
+     * - lowercase
+     * 
+     * @param array The char array to transform
+     * @return The transformed StringBuilder
+     */
+    public static StringBuilder map( char[] array )
+    {
+        if ( array == null )
+        {
+            return null;
+        }
+
+        StringBuilder sb = new StringBuilder( array.length );
         
-        for ( char c:str.toCharArray() )
+        for ( char c:array )
         {
             // First, eliminate surrogates, and replace them by FFFD char
             if ( ( c >= 0xD800 ) && ( c <= 0xDFFF ) )
@@ -163,107 +181,32 @@
                     break;
 
                 case 0x0041 : 
-                    sb.append( (char)0x0061 );
-                    break;
-        
                 case 0x0042 : 
-                    sb.append( (char)0x0062 );
-                    break;
-        
                 case 0x0043 : 
-                    sb.append( (char)0x0063 );
-                    break;
-        
                 case 0x0044 : 
-                    sb.append( (char)0x0064 );
-                    break;
-        
                 case 0x0045 : 
-                    sb.append( (char)0x0065 );
-                    break;
-        
                 case 0x0046 : 
-                    sb.append( (char)0x0066 );
-                    break;
-        
                 case 0x0047 : 
-                    sb.append( (char)0x0067 );
-                    break;
-        
                 case 0x0048 : 
-                    sb.append( (char)0x0068 );
-                    break;
-        
                 case 0x0049 : 
-                    sb.append( (char)0x0069 );
-                    break;
-        
                 case 0x004A : 
-                    sb.append( (char)0x006A );
-                    break;
-        
                 case 0x004B : 
-                    sb.append( (char)0x006B );
-                    break;
-        
                 case 0x004C : 
-                    sb.append( (char)0x006C );
-                    break;
-        
                 case 0x004D : 
-                    sb.append( (char)0x006D );
-                    break;
-        
                 case 0x004E : 
-                    sb.append( (char)0x006E );
-                    break;
-        
                 case 0x004F : 
-                    sb.append( (char)0x006F );
-                    break;
-        
                 case 0x0050 : 
-                    sb.append( (char)0x0070 );
-                    break;
-        
                 case 0x0051 : 
-                    sb.append( (char)0x0071 );
-                    break;
-        
                 case 0x0052 : 
-                    sb.append( (char)0x0072 );
-                    break;
-        
                 case 0x0053 : 
-                    sb.append( (char)0x0073 );
-                    break;
-        
                 case 0x0054 : 
-                    sb.append( (char)0x0074 );
-                    break;
-        
                 case 0x0055 : 
-                    sb.append( (char)0x0075 );
-                    break;
-        
                 case 0x0056 : 
-                    sb.append( (char)0x0076 );
-                    break;
-        
                 case 0x0057 : 
-                    sb.append( (char)0x0077 );
-                    break;
-        
                 case 0x0058 : 
-                    sb.append( (char)0x0078 );
-                    break;
-        
                 case 0x0059 : 
-                    sb.append( (char)0x0079 );
-                    break;
-        
                 case 0x005A : 
-                    sb.append( (char)0x007A );
+                    sb.append( (char)( c | 0x0020 ) );
                     break;
         
                 case 0x007F:
@@ -319,123 +262,36 @@
                     break;
         
                 case 0x00C0 : 
-                    sb.append( (char)0x00E0 );
-                    break;
-        
                 case 0x00C1 : 
-                    sb.append( (char)0x00E1 );
-                    break;
-        
                 case 0x00C2 : 
-                    sb.append( (char)0x00E2 );
-                    break;
-        
                 case 0x00C3 : 
-                    sb.append( (char)0x00E3 );
-                    break;
-        
                 case 0x00C4 : 
-                    sb.append( (char)0x00E4 );
-                    break;
-        
                 case 0x00C5 : 
-                    sb.append( (char)0x00E5 );
-                    break;
-        
                 case 0x00C6 : 
-                    sb.append( (char)0x00E6 );
-                    break;
-        
                 case 0x00C7 : 
-                    sb.append( (char)0x00E7 );
-                    break;
-        
                 case 0x00C8 : 
-                    sb.append( (char)0x00E8 );
-                    break;
-        
                 case 0x00C9 : 
-                    sb.append( (char)0x00E9 );
-                    break;
-        
                 case 0x00CA : 
-                    sb.append( (char)0x00EA );
-                    break;
-        
                 case 0x00CB : 
-                    sb.append( (char)0x00EB );
-                    break;
-        
                 case 0x00CC : 
-                    sb.append( (char)0x00EC );
-                    break;
-        
                 case 0x00CD : 
-                    sb.append( (char)0x00ED );
-                    break;
-        
                 case 0x00CE : 
-                    sb.append( (char)0x00EE );
-                    break;
-        
                 case 0x00CF : 
-                    sb.append( (char)0x00EF );
-                    break;
-        
                 case 0x00D0 : 
-                    sb.append( (char)0x00F0 );
-                    break;
-        
                 case 0x00D1 : 
-                    sb.append( (char)0x00F1 );
-                    break;
-        
                 case 0x00D2 : 
-                    sb.append( (char)0x00F2 );
-                    break;
-        
                 case 0x00D3 : 
-                    sb.append( (char)0x00F3 );
-                    break;
-        
                 case 0x00D4 : 
-                    sb.append( (char)0x00F4 );
-                    break;
-        
                 case 0x00D5 : 
-                    sb.append( (char)0x00F5 );
-                    break;
-        
                 case 0x00D6 : 
-                    sb.append( (char)0x00F6 );
-                    break;
-        
                 case 0x00D8 : 
-                    sb.append( (char)0x00F8 );
-                    break;
-        
                 case 0x00D9 : 
-                    sb.append( (char)0x00F9 );
-                    break;
-        
                 case 0x00DA : 
-                    sb.append( (char)0x00FA );
-                    break;
-        
                 case 0x00DB : 
-                    sb.append( (char)0x00FB );
-                    break;
-        
                 case 0x00DC : 
-                    sb.append( (char)0x00FC );
-                    break;
-        
                 case 0x00DD : 
-                    sb.append( (char)0x00FD );
-                    break;
-        
                 case 0x00DE : 
-                    sb.append( (char)0x00FE );
+                    sb.append( (char)( c | 0x0020 ) );
                     break;
         
                 case 0x00DF : 
@@ -4067,11 +3923,34 @@
      *  - Table C.8 of RFC 3454
      *  - character U-FFFD
      *
-     * @param array That char array to analyze
+     * @param str The String to analyze
+     * @throws InvalidCharacterException If any character is prohibited
+     */
+    public static void prohibit( String str ) throws InvalidCharacterException
+    {
+        prohibit( str.toCharArray() );
+    }
+    
+    /**
+     * 
+     * Prohibit characters described in RFC 4518 :
+     *  - Table A.1 of RFC 3454
+     *  - Table C.3 of RFC 3454
+     *  - Table C.4 of RFC 3454
+     *  - Table C.5 of RFC 3454
+     *  - Table C.8 of RFC 3454
+     *  - character U-FFFD
+     *
+     * @param array The char array to analyze
      * @throws InvalidCharacterException If any character is prohibited
      */
     public static void prohibit( char[] array ) throws InvalidCharacterException
     {
+        if ( array == null )
+        {
+            return;
+        }
+
         for ( char c:array )
         {
             // RFC 3454, Table A.1
@@ -4639,13 +4518,30 @@
     
     /**
      * 
-     * TODO bidi.
+     * Remove all bidirectionnal chars
      *
-     * @param array
-     * @return
+     * @param str The string where bidi chars are to be removed
+     * @return The cleaned string
+     */
+    public static String bidi( String str )
+    {
+        return bidi( str.toCharArray() ).toString();
+    }
+    
+    /**
+     * 
+     * Remove all bidirectionnal chars
+     *
+     * @param array The char array where bidi chars are to be removed
+     * @return The cleaned StringBuilder
      */
     public static StringBuilder bidi( char[] array )
     {
+        if ( array == null )
+        {
+            return null;
+        }
+
         StringBuilder sb = new StringBuilder( array.length );
         
         for ( char c:array )
@@ -6170,11 +6066,34 @@
      */
     public static String insignifiantCharTelephoneNumber( String str )
     {
-        StringBuilder sb = new StringBuilder();
+        return insignifiantCharTelephoneNumber( str.toCharArray() ).toString();
+    }
+    
+    /**
+     * 
+     * Remove all insignifiant chars in a Telephone Number :
+     * Hyphen and spaces. 
+     * 
+     * For instance, the following telephone number :
+     * "+ (33) 1-123--456  789"
+     * will be trasnformed to :
+     * "+(33)1123456789"
+     *
+     * @param array The telephone number char array
+     * @return The modified telephone number StringBuilder
+     */
+    public static StringBuilder insignifiantCharTelephoneNumber( char[] array )
+    {
+        if ( array == null )
+        {
+            return null;
+        }
+
+        StringBuilder sb = new StringBuilder( array.length );
         boolean isSpaceOrHyphen = false;
         char soh = '\0';
         
-        for ( char c:str.toCharArray() )
+        for ( char c:array )
         {
             switch ( c )
             {
@@ -6200,7 +6119,7 @@
             }
         }
         
-        return sb.toString();
+        return sb;
     }
 
     /**
@@ -6216,16 +6135,37 @@
      */
     public static String insignifiantCharNumericString( String str )
     {
-        StringBuilder sb = new StringBuilder();
+        return ( str == null ? null : insignifiantCharNumericString( str.toCharArray() ).toString() );
+    }
+    
+    /**
+     * 
+     * Remove all insignifiant spaces in a numeric string. For
+     * instance, the following numeric string :
+     * "  123  456  789  "
+     * will be transformed to :
+     * "123456789"
+     *
+     * @param array The numeric char array
+     * @return The modified numeric StringBuilder
+     */
+    public static StringBuilder insignifiantCharNumericString( char[] array )
+    {
+        if ( array == null )
+        {
+            return null;
+        }
+
+        StringBuilder sb = new StringBuilder( array.length );
         boolean isSpace = false;
         
-        for ( char c:str.toCharArray() )
+        for ( char c:array )
         {
             if ( c != 0x20 )
             {
                 if ( isSpace && isCombiningMark( c ) )
                 {
-                    sb.append(  ' ' );
+                    sb.append( ' ' );
                     isSpace = false;
                 }
                     
@@ -6237,93 +6177,97 @@
             }
         }
         
-        return sb.toString();
+        return sb;
     }
 
     /**
      * 
-     * TODO State.
-     *
-     * @author <a href="mailto:dev@directory.apache.org">Apache Directory Project</a>
-     * @version $Rev$, $Date$
+     * The 6 possible states for the insignifiant state machine
      */
     private enum State 
     {
         START,
-        FIRST_SPACE,
-        ONLY_SPACES,
+        START_SPACE,
+        INNER_START_SPACE,
         CHAR,
         COMBINING,
-        SPACE
+        INNER_SPACE
     };
 
     /**
      * 
-     * Remove all insignifiant spaces in a string. We use a state
-     * engine with 4 states, 4 endings, 3 startings.
+     * Remove all insignifiant spaces in a string.
+     * 
+     * This method use a finite state machine to parse
+     * the text.
      * 
      * @param str The string
      * @return The modified String
      */
-    public static String insignifiantSpacesString( String str )
+    public static String insignifiantSpacesString( String str ) throws InvalidCharacterException
     {
-        StringBuilder sb = new StringBuilder();
-        
-        if ( StringTools.isEmpty( str ) )
+        if ( str == null )
         {
-            // Special case : an empty strings is replaced by 2 spaces
             return "  ";
         }
+        else
+        {
+            return insignifiantSpacesString( str.toCharArray() ).toString();
+        }
+    }
+    
+    /**
+     * 
+     * Remove all insignifiant spaces in a string.
+     * 
+     * This method use a finite state machine to parse
+     * the text.
+     * 
+     * @param array The char array  representing the string
+     * @return The modified StringBuilder
+     */
+    public static StringBuilder insignifiantSpacesString( char[] array ) throws InvalidCharacterException
+    {
+        if ( ( array == null ) || ( array.length == 0 ) )
+        {
+            // Special case : an empty strings is replaced by 2 spaces
+            return new StringBuilder( "  " );
+        }
+        
+        StringBuilder sb = new StringBuilder( array.length );
         
         // Initialise the starting state
         State state = State.START;
         
-        for ( char c:str.toCharArray() )
+        for ( char c:array )
         {
             switch ( state )
             {
                 case START :
                     if ( c == ' ' )
                     {
-                        state = State.FIRST_SPACE;
+                        state = State.START_SPACE;
                     }
                     else if ( isCombiningMark( c ) )
                     {
-                        state = State.COMBINING;
+                        // The first char can't be a combining char
+                        throw new InvalidCharacterException( c );
                     }
                     else
                     {
-                        state = State.CHAR;
-                    }
-
-                    sb.append( c );
-                    break;
-                    
-                case FIRST_SPACE :
-                    if ( c == ' ' )
-                    {
-                        state = State.ONLY_SPACES;
-                    }
-                    else if ( isCombiningMark( c ) )
-                    {
-                        state = State.COMBINING;
                         sb.append( ' ' );
                         sb.append( c );
-                    }
-                    else
-                    {
                         state = State.CHAR;
-                        sb.append( ' ' );
-                        sb.append( c );
                     }
 
                     break;
-                     
-                case ONLY_SPACES :
+                    
+                case START_SPACE :
                     if ( isCombiningMark( c ) )
                     {
                         state = State.COMBINING;
                         sb.append( ' ' );
+                        sb.append( ' ' );
                         sb.append( c );
                     }
                     else if ( c != ' ' )
@@ -6332,26 +6276,30 @@
                         sb.append( ' ' );
                         sb.append( c );
                     }
-                    
-                    break;
 
+                    break;
+                     
                 case CHAR :
                     if ( c == ' ' )
                     {
-                        state = State.FIRST_SPACE;
+                        state = State.INNER_START_SPACE;
                     }
                     else if ( isCombiningMark( c ) )
                     {
                         state = State.COMBINING;
+                        sb.append( c );
+                    }
+                    else
+                    {
+                        sb.append( c );
                     }
                     
-                    sb.append( c );
                     break;
                     
                 case COMBINING :
                     if ( c == ' ' )
                     {
-                        state = State.FIRST_SPACE;
+                        state = State.INNER_START_SPACE;
                     }
                     else if ( !isCombiningMark( c ) )
                     {
@@ -6365,11 +6313,32 @@
                     
                     break;
                     
-                case SPACE :
+                case INNER_START_SPACE :
+                    if ( isCombiningMark( c ) )
+                    {
+                        state = State.COMBINING;
+                        sb.append( ' ' );
+                        sb.append( c );
+                    }
+                    else if ( c == ' ' )
+                    {
+                        state = State.INNER_SPACE;
+                    }
+                    else
+                    {
+                        state = State.CHAR;
+                        sb.append( ' ' );
+                        sb.append( c );
+                    }
+                    
+                    break;
+
+                case INNER_SPACE :
                     if ( isCombiningMark( c ) )
                     {
                         state = State.COMBINING;
                         sb.append( ' ' );
+                        sb.append( ' ' );
                         sb.append( c );
                     }
                     else if ( c != ' ' )
@@ -6384,19 +6353,13 @@
         }
         
         // Last, add final space if needed
-        switch ( state )
+        sb.append( ' ' );
+        
+        if ( state == State.START_SPACE )
         {
-            case FIRST_SPACE :
-            case COMBINING :
-            case CHAR :
-            case ONLY_SPACES :
-                sb.append( ' ' );
-                break;
-                
-            default :
-                break;
-                
+            sb.append( ' ' );
         }
-        return sb.toString();
+        
+        return sb;
     }
 }