You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@commons.apache.org by to...@apache.org on 2003/02/03 03:19:41 UTC

cvs commit: jakarta-commons-sandbox/codec/src/java/org/apache/commons/codec Encoder.java EncoderComparator.java Metaphone.java RefinedSoundex.java Soundex.java

tobrien     2003/02/02 18:19:41

  Modified:    codec/src/java/org/apache/commons/codec Encoder.java
                        EncoderComparator.java Metaphone.java
                        RefinedSoundex.java Soundex.java
  Log:
  Fixed a number of CR/LF problems
  
  Revision  Changes    Path
  1.3       +34 -2     jakarta-commons-sandbox/codec/src/java/org/apache/commons/codec/Encoder.java
  
  Index: Encoder.java
  ===================================================================
  RCS file: /home/cvs/jakarta-commons-sandbox/codec/src/java/org/apache/commons/codec/Encoder.java,v
  retrieving revision 1.2
  retrieving revision 1.3
  diff -u -r1.2 -r1.3
  --- Encoder.java	18 Nov 2002 12:41:24 -0000	1.2
  +++ Encoder.java	3 Feb 2003 02:19:41 -0000	1.3
  @@ -1,4 +1,17 @@
  -/* ====================================================================
 * The Apache Software License, Version 1.1
 *
 * Copyright (c) 2002 The Apache Software Foundation.  All rights
 * reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 *
 * 2. Redistributions in binary form must reproduce the above copyright
  +/* ====================================================================
  + * The Apache Software License, Version 1.1
  + *
  + * Copyright (c) 2002 The Apache Software Foundation.  All rights
  + * reserved.
  + *
  + * Redistribution and use in source and binary forms, with or without
  + * modification, are permitted provided that the following conditions
  + * are met:
  + *
  + * 1. Redistributions of source code must retain the above copyright
  + *    notice, this list of conditions and the following disclaimer.
  + *
  + * 2. Redistributions in binary form must reproduce the above copyright
    *    notice, this list of conditions and the following disclaimer in
    *    the documentation and/or other materials provided with the
    *    distribution.
  @@ -31,4 +44,23 @@
    * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
    * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
    * SUCH DAMAGE.
  - * ====================================================================
 *
 * This software consists of voluntary contributions made by many
 * individuals on behalf of the Apache Software Foundation.  For more
 * information on the Apache Software Foundation, please see
 * <http://www.apache.org/>.
 */
package org.apache.commons.codec;

/**
 * Encoder is an interface, which is implemented by Soundex,
 * Metaphone, Soundex2, etc.
 *
 * @author tobrien@transolutions.net
 * @version $Revision$ $Date$
 */
public interface Encoder {
    String encode(String str);
}  
  + * ====================================================================
  + *
  + * This software consists of voluntary contributions made by many
  + * individuals on behalf of the Apache Software Foundation.  For more
  + * information on the Apache Software Foundation, please see
  + * <http://www.apache.org/>.
  + */
  +package org.apache.commons.codec;
  +
  +/**
  + * Encoder is an interface, which is implemented by Soundex,
  + * Metaphone, Soundex2, etc.
  + *
  + * @author tobrien@transolutions.net
  + * @version $Revision$ $Date$
  + */
  +public interface Encoder {
  +    String encode(String str);
  +}  
  +
  
  
  
  1.3       +89 -2     jakarta-commons-sandbox/codec/src/java/org/apache/commons/codec/EncoderComparator.java
  
  Index: EncoderComparator.java
  ===================================================================
  RCS file: /home/cvs/jakarta-commons-sandbox/codec/src/java/org/apache/commons/codec/EncoderComparator.java,v
  retrieving revision 1.2
  retrieving revision 1.3
  diff -u -r1.2 -r1.3
  --- EncoderComparator.java	18 Nov 2002 12:41:24 -0000	1.2
  +++ EncoderComparator.java	3 Feb 2003 02:19:41 -0000	1.3
  @@ -1,2 +1,89 @@
  -/* ====================================================================
 * The Apache Software License, Version 1.1
 *
 * Copyright (c) 2002 The Apache Software Foundation.  All rights
 * reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 *
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in
 *    the documentation and/or other materials provided with the
 *    distribution.
 *
 * 3. The end-user documentation included with the redistribution,
 *    if any, must include the following acknowledgment:
 *       "This product includes software developed by the
 *        Apache Software Foundation (http://www.apache.org/)."
 *    Alternately, this acknowledgment may appear in the software itself,
 *    if and wherever such third-party acknowledgments normally appear.
 *
 * 4. The names "Apache" and "Apache Software Foundation" and
 *    "Apache Commons" must not be used to endorse or promote products
 *    derived from this software without prior written permission. For
 *    written permission, please contact apache@apache.org.
 *
 * 5. Products derived from this software may not be called "Apache",
 *    "Apache Turbine", nor may "Apache" appear in their name, without
 *    prior written permission of the Apache Software Foundation.
 *
 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 * ====================================================================
 *
 * This software consists of voluntary contributions made by many
 * individuals on behalf of the Apache Software Foundation.  For more
 * information on the Apache Software Foundation, please see
 * <http://www.apache.org/>.
 */
package org.apache.commons.codec;
  -import java.util.Comparator;

/**
 * Compare using an Encoder.
 *
 * @author tobrien@transolutions.net
 * @version $Revision$ $Date$
 */
public class EncoderComparator implements Comparator {

    private Encoder encoder;

    /**
     * Use the default soundex algorithm, US_ENGLISH.
     */
    public EncoderComparator() {
        this(RefinedSoundex.US_ENGLISH);
    }

    /**
     * Use the provided soundex algorithm.
     */
    public EncoderComparator(Encoder en) {
        this.encoder = en;
    }

    public int compare(Object o1, Object o2) {
        String s1 = encoder.encode(o1.toString());
        String s2 = encoder.encode(o2.toString());
        return s1.compareTo(s2);
    }

}
  +/* ====================================================================
  + * The Apache Software License, Version 1.1
  + *
  + * Copyright (c) 2002 The Apache Software Foundation.  All rights
  + * reserved.
  + *
  + * Redistribution and use in source and binary forms, with or without
  + * modification, are permitted provided that the following conditions
  + * are met:
  + *
  + * 1. Redistributions of source code must retain the above copyright
  + *    notice, this list of conditions and the following disclaimer.
  + *
  + * 2. Redistributions in binary form must reproduce the above copyright
  + *    notice, this list of conditions and the following disclaimer in
  + *    the documentation and/or other materials provided with the
  + *    distribution.
  + *
  + * 3. The end-user documentation included with the redistribution,
  + *    if any, must include the following acknowledgment:
  + *       "This product includes software developed by the
  + *        Apache Software Foundation (http://www.apache.org/)."
  + *    Alternately, this acknowledgment may appear in the software itself,
  + *    if and wherever such third-party acknowledgments normally appear.
  + *
  + * 4. The names "Apache" and "Apache Software Foundation" and
  + *    "Apache Commons" must not be used to endorse or promote products
  + *    derived from this software without prior written permission. For
  + *    written permission, please contact apache@apache.org.
  + *
  + * 5. Products derived from this software may not be called "Apache",
  + *    "Apache Turbine", nor may "Apache" appear in their name, without
  + *    prior written permission of the Apache Software Foundation.
  + *
  + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
  + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  + * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
  + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  + * SUCH DAMAGE.
  + * ====================================================================
  + *
  + * This software consists of voluntary contributions made by many
  + * individuals on behalf of the Apache Software Foundation.  For more
  + * information on the Apache Software Foundation, please see
  + * <http://www.apache.org/>.
  + */
  +package org.apache.commons.codec;
  +
  +import java.util.Comparator;
  +
  +/**
  + * Compare using an Encoder.
  + *
  + * @author tobrien@transolutions.net
  + * @version $Revision$ $Date$
  + */
  +public class EncoderComparator implements Comparator {
  +
  +    private Encoder encoder;
  +
  +    /**
  +     * Use the default soundex algorithm, US_ENGLISH.
  +     */
  +    public EncoderComparator() {
  +        this(RefinedSoundex.US_ENGLISH);
  +    }
  +
  +    /**
  +     * Use the provided soundex algorithm.
  +     */
  +    public EncoderComparator(Encoder en) {
  +        this.encoder = en;
  +    }
  +
  +    public int compare(Object o1, Object o2) {
  +        String s1 = encoder.encode(o1.toString());
  +        String s2 = encoder.encode(o2.toString());
  +        return s1.compareTo(s2);
  +    }
  +
  +}
  +
  
  
  
  1.5       +306 -2    jakarta-commons-sandbox/codec/src/java/org/apache/commons/codec/Metaphone.java
  
  Index: Metaphone.java
  ===================================================================
  RCS file: /home/cvs/jakarta-commons-sandbox/codec/src/java/org/apache/commons/codec/Metaphone.java,v
  retrieving revision 1.4
  retrieving revision 1.5
  diff -u -r1.4 -r1.5
  --- Metaphone.java	18 Nov 2002 12:41:24 -0000	1.4
  +++ Metaphone.java	3 Feb 2003 02:19:41 -0000	1.5
  @@ -1,2 +1,306 @@
  -/* ====================================================================
 * The Apache Software License, Version 1.1
 *
 * Copyright (c) 2001-2002 The Apache Software Foundation.  All rights
 * reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 *
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in
 *    the documentation and/or other materials provided with the
 *    distribution.
 *
 * 3. The end-user documentation included with the redistribution,
 *    if any, must include the following acknowledgment:
 *       "This product includes software developed by the
 *        Apache Software Foundation (http://www.apache.org/)."
 *    Alternately, this acknowledgment may appear in the software itself,
 *    if and wherever such third-party acknowledgments normally appear.
 *
 * 4. The names "Apache" and "Apache Software Foundation" and
 *    "Apache Commons" must not be used to endorse or promote products
 *    derived from this software without prior written permission. For
 *    written permission, please contact apache@apache.org.
 *
 * 5. Products derived from this software may not be called "Apache",
 *    "Apache Turbine", nor may "Apache" appear in their name, without
 *    prior written permission of the Apache Software Foundation.
 *
 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 * ====================================================================
 *
 * This software consists of voluntary contributions made by many
 * individuals on behalf of the Apache Software Foundation.  For more
 * information on the Apache Software Foundation, please see
 * <http://www.apache.org/>.
 */
package org.apache.commons.codec;

/**
 * A class to generate phonetic code.
 * The initial Java implementation, William B. Brogden.  December, 1997
 * Permission given by wbrogden for code to be used anywhere.
 * 
 * @see "Hanging on the Metaphone" by Lawrence Philips
 *      <i>Computer Language</i> of Dec. 1990, p 39   
 * 
 * @version $Revision$ $Date$
 * @author wbrogden@bga.com
 * @author bayard@generationjava.com
 * @author tobrien@transolutions.net
 */
public class Metaphone implements Encoder {
  -    private String vowels = "AEIOU" ;
    private String frontv = "EIY"   ;
    private String varson = "CSPTG" ;

    private int maxCodeLen = 4 ;

    public Metaphone() {
        super();
    }

    /**
     * Find the metaphone value of a String. This is similar to the
     * soundex algorithm, but better at finding similar sounding words.
     * All input is converted to upper case.
     * Limitations: Input format is expected to be a single ASCII word
     * with only characters in the A - Z range, no punctuation or numbers.
     */
    public String metaphone( String txt ){
      int mtsz = 0  ;
      boolean hard = false ;
      if(( txt == null ) ||
         ( txt.length() == 0 )) return "" ;
      // single character is itself
      if( txt.length() == 1 ) return txt.toUpperCase() ;
      
      char[] inwd = txt.toUpperCase().toCharArray() ;
      
      String tmpS ;
      StringBuffer local = new StringBuffer( 40 ); // manipulate
      StringBuffer code = new StringBuffer( 10 ) ; //   output
      // handle initial 2 characters exceptions
      switch( inwd[0] ){
        case 'K': case 'G' : case 'P' : /* looking for KN, etc*/
          if( inwd[1] == 'N')local.append(inwd, 1, inwd.length - 1 );
          else local.append( inwd );
          break;
        case 'A': /* looking for AE */
          if( inwd[1] == 'E' )local.append(inwd, 1, inwd.length - 1 );
          else local.append( inwd );
          break;
        case 'W' : /* looking for WR or WH */
          if( inwd[1] == 'R' ){   // WR -> R
            local.append(inwd, 1, inwd.length - 1 ); break ;
          }
          if( inwd[1] == 'H'){
            local.append(inwd, 1, inwd.length - 1 );
            local.setCharAt( 0,'W'); // WH -> W
          }
          else local.append( inwd );
          break;
        case 'X' : /* initial X becomes S */
          inwd[0] = 'S' ;local.append( inwd );
          break ;
        default :
          local.append( inwd );
      } // now local has working string with initials fixed
      int wdsz = local.length();
      int n = 0 ;
      while((mtsz < maxCodeLen ) && // max code size of 4 works well
            (n < wdsz ) ){
        char symb = local.charAt(n) ;
        // remove duplicate letters except C
        if(( symb != 'C' ) &&
           (n > 0 ) && ( local.charAt(n - 1 ) == symb )) n++ ;
        else{ // not dup
          switch( symb ){
            case 'A' : case 'E' : case 'I' : case 'O' : case 'U' :
              if( n == 0 ) { code.append(symb );mtsz++;
              }
              break ; // only use vowel if leading char
            case 'B' :
              if( (n > 0 ) &&
                  !(n + 1 == wdsz ) && // not MB at end of word
                  ( local.charAt(n - 1) == 'M')) {
                    code.append(symb);
                  }
              else code.append(symb);
              mtsz++ ;
              break ;
            case 'C' : // lots of C special cases
              /* discard if SCI, SCE or SCY */
              if( ( n > 0 ) &&
                  ( local.charAt(n-1) == 'S' ) &&
                  ( n + 1 < wdsz ) &&
                  ( frontv.indexOf( local.charAt(n + 1)) >= 0 )){ break ;}
              tmpS = local.toString();
              if( tmpS.indexOf("CIA", n ) == n ) { // "CIA" -> X
                 code.append('X' ); mtsz++; break ;
              }
              if( ( n + 1 < wdsz ) &&
                  (frontv.indexOf( local.charAt(n+1) )>= 0 )){
                 code.append('S');mtsz++; break ; // CI,CE,CY -> S
              }
              if(( n > 0) &&
                 ( tmpS.indexOf("SCH",n-1 )== n-1 )){ // SCH->sk
                 code.append('K') ; mtsz++;break ;
              }
              if( tmpS.indexOf("CH", n ) == n ){ // detect CH
                if((n == 0 ) &&
                   (wdsz >= 3 ) &&    // CH consonant -> K consonant
                   (vowels.indexOf( local.charAt( 2) ) < 0 )){
                     code.append('K');
                }
                else { code.append('X'); // CHvowel -> X
                }
                mtsz++;
              }
              else { code.append('K' );mtsz++;
              }
              break ;
            case 'D' :
              if(( n + 2 < wdsz )&&  // DGE DGI DGY -> J
                 ( local.charAt(n+1) == 'G' )&&
                 (frontv.indexOf( local.charAt(n+2) )>= 0)){
                    code.append('J' ); n += 2 ;
              }
              else { code.append( 'T' );
              }
              mtsz++;
              break ;
            case 'G' : // GH silent at end or before consonant
              if(( n + 2 == wdsz )&&
                 (local.charAt(n+1) == 'H' )) break ;
              if(( n + 2 < wdsz ) &&
                 (local.charAt(n+1) == 'H' )&&
                 (vowels.indexOf( local.charAt(n+2)) < 0 )) break ;
              tmpS = local.toString();
              if((n > 0) &&
                 ( tmpS.indexOf("GN", n ) == n)||
                 ( tmpS.indexOf("GNED",n) == n )) break ; // silent G
              if(( n > 0 ) &&
                 (local.charAt(n-1) == 'G')) hard = true ;
              else hard = false ;
              if((n+1 < wdsz) &&
                 (frontv.indexOf( local.charAt(n+1) ) >= 0 )&&
                 (!hard) ) code.append( 'J' );
              else code.append('K');
              mtsz++;
              break ;
            case 'H':
              if( n + 1 == wdsz ) break ; // terminal H
              if((n > 0) &&
                 (varson.indexOf( local.charAt(n-1)) >= 0)) break ;
              if( vowels.indexOf( local.charAt(n+1)) >=0 ){
                  code.append('H') ; mtsz++;// Hvowel
              }
              break;
            case 'F': case 'J' : case 'L' :
            case 'M': case 'N' : case 'R' :
              code.append( symb ); mtsz++; break ;
            case 'K' :
              if( n > 0 ){ // not initial
                if( local.charAt( n -1) != 'C' ) {
                     code.append(symb );
                }
              }
              else   code.append( symb ); // initial K
              mtsz++ ;
              break ;
            case 'P' :
              if((n + 1 < wdsz) &&  // PH -> F
                 (local.charAt( n+1) == 'H'))code.append('F');
              else code.append( symb );
              mtsz++;
              break ;
            case 'Q' :
              code.append('K' );mtsz++; break ;
            case 'S' :
              tmpS = local.toString();
              if((tmpS.indexOf("SH", n )== n) ||
                 (tmpS.indexOf("SIO",n )== n) ||
                 (tmpS.indexOf("SIA",n )== n)) code.append('X');
              else code.append( 'S' );
              mtsz++ ;
              break ;
            case 'T' :
              tmpS = local.toString(); // TIA TIO -> X
              if((tmpS.indexOf("TIA",n )== n)||
                 (tmpS.indexOf("TIO",n )== n) ){
                    code.append('X'); mtsz++; break;
              }
              if( tmpS.indexOf("TCH",n )==n) break;
              // substitute numeral 0 for TH (resembles theta after all)
              if( tmpS.indexOf("TH", n )==n) code.append('0');
              else code.append( 'T' );
              mtsz++ ;
              break ;
            case 'V' :
              code.append('F'); mtsz++;break ;
            case 'W' : case 'Y' : // silent if not followed by vowel
              if((n+1 < wdsz) &&
                 (vowels.indexOf( local.charAt(n+1))>=0)){
                    code.append( symb );mtsz++;
              }
              break ;
            case 'X' :
              code.append('K'); code.append('S');mtsz += 2;
              break ;
            case 'Z' :
              code.append('S'); mtsz++; break ;
          } // end switch
          n++ ;
        } // end else from symb != 'C'
        if( mtsz > 4 )code.setLength( 4);
      }
      return code.toString();
    } // end static method metaPhone()
    
    public String encode(String pString) {
        return( metaphone( pString ) );   
    }

    /**
     * Are the metaphones of two strings the same.
     */
    public boolean isMetaphoneEqual(String str1, String str2) {
        return metaphone(str1).equals(metaphone(str2));
    }

	/**
	 * Returns the maxCodeLen.
	 * @return int
	 */
	public int getMaxCodeLen() {
		return maxCodeLen;
	}

	/**
	 * Sets the maxCodeLen.
	 * @param maxCodeLen The maxCodeLen to set
	 */
	public void setMaxCodeLen(int maxCodeLen) {
		this.maxCodeLen = maxCodeLen;
	}

}
  \ No newline at end of file
  +/* ====================================================================
  + * The Apache Software License, Version 1.1
  + *
  + * Copyright (c) 2001-2002 The Apache Software Foundation.  All rights
  + * reserved.
  + *
  + * Redistribution and use in source and binary forms, with or without
  + * modification, are permitted provided that the following conditions
  + * are met:
  + *
  + * 1. Redistributions of source code must retain the above copyright
  + *    notice, this list of conditions and the following disclaimer.
  + *
  + * 2. Redistributions in binary form must reproduce the above copyright
  + *    notice, this list of conditions and the following disclaimer in
  + *    the documentation and/or other materials provided with the
  + *    distribution.
  + *
  + * 3. The end-user documentation included with the redistribution,
  + *    if any, must include the following acknowledgment:
  + *       "This product includes software developed by the
  + *        Apache Software Foundation (http://www.apache.org/)."
  + *    Alternately, this acknowledgment may appear in the software itself,
  + *    if and wherever such third-party acknowledgments normally appear.
  + *
  + * 4. The names "Apache" and "Apache Software Foundation" and
  + *    "Apache Commons" must not be used to endorse or promote products
  + *    derived from this software without prior written permission. For
  + *    written permission, please contact apache@apache.org.
  + *
  + * 5. Products derived from this software may not be called "Apache",
  + *    "Apache Turbine", nor may "Apache" appear in their name, without
  + *    prior written permission of the Apache Software Foundation.
  + *
  + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
  + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  + * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
  + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  + * SUCH DAMAGE.
  + * ====================================================================
  + *
  + * This software consists of voluntary contributions made by many
  + * individuals on behalf of the Apache Software Foundation.  For more
  + * information on the Apache Software Foundation, please see
  + * <http://www.apache.org/>.
  + */
  +package org.apache.commons.codec;
  +
  +/**
  + * A class to generate phonetic code.
  + * The initial Java implementation, William B. Brogden.  December, 1997
  + * Permission given by wbrogden for code to be used anywhere.
  + * 
  + * @see "Hanging on the Metaphone" by Lawrence Philips
  + *      <i>Computer Language</i> of Dec. 1990, p 39   
  + * 
  + * @version $Revision$ $Date$
  + * @author wbrogden@bga.com
  + * @author bayard@generationjava.com
  + * @author tobrien@transolutions.net
  + */
  +public class Metaphone implements Encoder {
  +
  +    private String vowels = "AEIOU" ;
  +    private String frontv = "EIY"   ;
  +    private String varson = "CSPTG" ;
  +
  +    private int maxCodeLen = 4 ;
  +
  +    public Metaphone() {
  +        super();
  +    }
  +
  +    /**
  +     * Find the metaphone value of a String. This is similar to the
  +     * soundex algorithm, but better at finding similar sounding words.
  +     * All input is converted to upper case.
  +     * Limitations: Input format is expected to be a single ASCII word
  +     * with only characters in the A - Z range, no punctuation or numbers.
  +     */
  +    public String metaphone( String txt ){
  +      int mtsz = 0  ;
  +      boolean hard = false ;
  +      if(( txt == null ) ||
  +         ( txt.length() == 0 )) return "" ;
  +      // single character is itself
  +      if( txt.length() == 1 ) return txt.toUpperCase() ;
  +      
  +      char[] inwd = txt.toUpperCase().toCharArray() ;
  +      
  +      String tmpS ;
  +      StringBuffer local = new StringBuffer( 40 ); // manipulate
  +      StringBuffer code = new StringBuffer( 10 ) ; //   output
  +      // handle initial 2 characters exceptions
  +      switch( inwd[0] ){
  +        case 'K': case 'G' : case 'P' : /* looking for KN, etc*/
  +          if( inwd[1] == 'N')local.append(inwd, 1, inwd.length - 1 );
  +          else local.append( inwd );
  +          break;
  +        case 'A': /* looking for AE */
  +          if( inwd[1] == 'E' )local.append(inwd, 1, inwd.length - 1 );
  +          else local.append( inwd );
  +          break;
  +        case 'W' : /* looking for WR or WH */
  +          if( inwd[1] == 'R' ){   // WR -> R
  +            local.append(inwd, 1, inwd.length - 1 ); break ;
  +          }
  +          if( inwd[1] == 'H'){
  +            local.append(inwd, 1, inwd.length - 1 );
  +            local.setCharAt( 0,'W'); // WH -> W
  +          }
  +          else local.append( inwd );
  +          break;
  +        case 'X' : /* initial X becomes S */
  +          inwd[0] = 'S' ;local.append( inwd );
  +          break ;
  +        default :
  +          local.append( inwd );
  +      } // now local has working string with initials fixed
  +      int wdsz = local.length();
  +      int n = 0 ;
  +      while((mtsz < maxCodeLen ) && // max code size of 4 works well
  +            (n < wdsz ) ){
  +        char symb = local.charAt(n) ;
  +        // remove duplicate letters except C
  +        if(( symb != 'C' ) &&
  +           (n > 0 ) && ( local.charAt(n - 1 ) == symb )) n++ ;
  +        else{ // not dup
  +          switch( symb ){
  +            case 'A' : case 'E' : case 'I' : case 'O' : case 'U' :
  +              if( n == 0 ) { code.append(symb );mtsz++;
  +              }
  +              break ; // only use vowel if leading char
  +            case 'B' :
  +              if( (n > 0 ) &&
  +                  !(n + 1 == wdsz ) && // not MB at end of word
  +                  ( local.charAt(n - 1) == 'M')) {
  +                    code.append(symb);
  +                  }
  +              else code.append(symb);
  +              mtsz++ ;
  +              break ;
  +            case 'C' : // lots of C special cases
  +              /* discard if SCI, SCE or SCY */
  +              if( ( n > 0 ) &&
  +                  ( local.charAt(n-1) == 'S' ) &&
  +                  ( n + 1 < wdsz ) &&
  +                  ( frontv.indexOf( local.charAt(n + 1)) >= 0 )){ break ;}
  +              tmpS = local.toString();
  +              if( tmpS.indexOf("CIA", n ) == n ) { // "CIA" -> X
  +                 code.append('X' ); mtsz++; break ;
  +              }
  +              if( ( n + 1 < wdsz ) &&
  +                  (frontv.indexOf( local.charAt(n+1) )>= 0 )){
  +                 code.append('S');mtsz++; break ; // CI,CE,CY -> S
  +              }
  +              if(( n > 0) &&
  +                 ( tmpS.indexOf("SCH",n-1 )== n-1 )){ // SCH->sk
  +                 code.append('K') ; mtsz++;break ;
  +              }
  +              if( tmpS.indexOf("CH", n ) == n ){ // detect CH
  +                if((n == 0 ) &&
  +                   (wdsz >= 3 ) &&    // CH consonant -> K consonant
  +                   (vowels.indexOf( local.charAt( 2) ) < 0 )){
  +                     code.append('K');
  +                }
  +                else { code.append('X'); // CHvowel -> X
  +                }
  +                mtsz++;
  +              }
  +              else { code.append('K' );mtsz++;
  +              }
  +              break ;
  +            case 'D' :
  +              if(( n + 2 < wdsz )&&  // DGE DGI DGY -> J
  +                 ( local.charAt(n+1) == 'G' )&&
  +                 (frontv.indexOf( local.charAt(n+2) )>= 0)){
  +                    code.append('J' ); n += 2 ;
  +              }
  +              else { code.append( 'T' );
  +              }
  +              mtsz++;
  +              break ;
  +            case 'G' : // GH silent at end or before consonant
  +              if(( n + 2 == wdsz )&&
  +                 (local.charAt(n+1) == 'H' )) break ;
  +              if(( n + 2 < wdsz ) &&
  +                 (local.charAt(n+1) == 'H' )&&
  +                 (vowels.indexOf( local.charAt(n+2)) < 0 )) break ;
  +              tmpS = local.toString();
  +              if((n > 0) &&
  +                 ( tmpS.indexOf("GN", n ) == n)||
  +                 ( tmpS.indexOf("GNED",n) == n )) break ; // silent G
  +              if(( n > 0 ) &&
  +                 (local.charAt(n-1) == 'G')) hard = true ;
  +              else hard = false ;
  +              if((n+1 < wdsz) &&
  +                 (frontv.indexOf( local.charAt(n+1) ) >= 0 )&&
  +                 (!hard) ) code.append( 'J' );
  +              else code.append('K');
  +              mtsz++;
  +              break ;
  +            case 'H':
  +              if( n + 1 == wdsz ) break ; // terminal H
  +              if((n > 0) &&
  +                 (varson.indexOf( local.charAt(n-1)) >= 0)) break ;
  +              if( vowels.indexOf( local.charAt(n+1)) >=0 ){
  +                  code.append('H') ; mtsz++;// Hvowel
  +              }
  +              break;
  +            case 'F': case 'J' : case 'L' :
  +            case 'M': case 'N' : case 'R' :
  +              code.append( symb ); mtsz++; break ;
  +            case 'K' :
  +              if( n > 0 ){ // not initial
  +                if( local.charAt( n -1) != 'C' ) {
  +                     code.append(symb );
  +                }
  +              }
  +              else   code.append( symb ); // initial K
  +              mtsz++ ;
  +              break ;
  +            case 'P' :
  +              if((n + 1 < wdsz) &&  // PH -> F
  +                 (local.charAt( n+1) == 'H'))code.append('F');
  +              else code.append( symb );
  +              mtsz++;
  +              break ;
  +            case 'Q' :
  +              code.append('K' );mtsz++; break ;
  +            case 'S' :
  +              tmpS = local.toString();
  +              if((tmpS.indexOf("SH", n )== n) ||
  +                 (tmpS.indexOf("SIO",n )== n) ||
  +                 (tmpS.indexOf("SIA",n )== n)) code.append('X');
  +              else code.append( 'S' );
  +              mtsz++ ;
  +              break ;
  +            case 'T' :
  +              tmpS = local.toString(); // TIA TIO -> X
  +              if((tmpS.indexOf("TIA",n )== n)||
  +                 (tmpS.indexOf("TIO",n )== n) ){
  +                    code.append('X'); mtsz++; break;
  +              }
  +              if( tmpS.indexOf("TCH",n )==n) break;
  +              // substitute numeral 0 for TH (resembles theta after all)
  +              if( tmpS.indexOf("TH", n )==n) code.append('0');
  +              else code.append( 'T' );
  +              mtsz++ ;
  +              break ;
  +            case 'V' :
  +              code.append('F'); mtsz++;break ;
  +            case 'W' : case 'Y' : // silent if not followed by vowel
  +              if((n+1 < wdsz) &&
  +                 (vowels.indexOf( local.charAt(n+1))>=0)){
  +                    code.append( symb );mtsz++;
  +              }
  +              break ;
  +            case 'X' :
  +              code.append('K'); code.append('S');mtsz += 2;
  +              break ;
  +            case 'Z' :
  +              code.append('S'); mtsz++; break ;
  +          } // end switch
  +          n++ ;
  +        } // end else from symb != 'C'
  +        if( mtsz > 4 )code.setLength( 4);
  +      }
  +      return code.toString();
  +    } // end static method metaPhone()
  +    
  +    public String encode(String pString) {
  +        return( metaphone( pString ) );   
  +    }
  +
  +    /**
  +     * Are the metaphones of two strings the same.
  +     */
  +    public boolean isMetaphoneEqual(String str1, String str2) {
  +        return metaphone(str1).equals(metaphone(str2));
  +    }
  +
  +	/**
  +	 * Returns the maxCodeLen.
  +	 * @return int
  +	 */
  +	public int getMaxCodeLen() {
  +		return maxCodeLen;
  +	}
  +
  +	/**
  +	 * Sets the maxCodeLen.
  +	 * @param maxCodeLen The maxCodeLen to set
  +	 */
  +	public void setMaxCodeLen(int maxCodeLen) {
  +		this.maxCodeLen = maxCodeLen;
  +	}
  +
  +}
  
  
  
  1.4       +130 -2    jakarta-commons-sandbox/codec/src/java/org/apache/commons/codec/RefinedSoundex.java
  
  Index: RefinedSoundex.java
  ===================================================================
  RCS file: /home/cvs/jakarta-commons-sandbox/codec/src/java/org/apache/commons/codec/RefinedSoundex.java,v
  retrieving revision 1.3
  retrieving revision 1.4
  diff -u -r1.3 -r1.4
  --- RefinedSoundex.java	18 Nov 2002 13:00:25 -0000	1.3
  +++ RefinedSoundex.java	3 Feb 2003 02:19:41 -0000	1.4
  @@ -1,3 +1,131 @@
  -/* ====================================================================
 * The Apache Software License, Version 1.1
 *
 * Copyright (c) 2002 The Apache Software Foundation.  All rights
 * reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 *
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in
 *    the documentation and/or other materials provided with the
 *    distribution.
 *
 * 3. The end-user documentation included with the redistribution,
 *    if any, must include the following acknowledgment:
 *       "This product includes software developed by the
 *        Apache Software Foundation (http://www.apache.org/)."
 *    Alternately, this acknowledgment may appear in the software itself,
 *    if and wherever such third-party acknowledgments normally appear.
 *
 * 4. The names "Apache" and "Apache Software Foundation" and
 *    "Apache Commons" must not be used to endorse or promote products
 *    derived from this software without prior written permission. For
 *    written permission, please contact apache@apache.org.
 *
 * 5. Products derived from this software may not be called "Apache",
 *    "Apache Turbine", nor may "Apache" appear in their name, without
 *    prior written permission of the Apache Software Foundation.
 *
 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 * ====================================================================
 *
 * This software consists of voluntary contributions made by many
 * individuals on behalf of the Apache Software Foundation.  For more
 * information on the Apache Software Foundation, please see
 * <http://www.apache.org/>.
 */
package org.apache.commons.codec;
  +/* ====================================================================
  + * The Apache Software License, Version 1.1
  + *
  + * Copyright (c) 2002 The Apache Software Foundation.  All rights
  + * reserved.
  + *
  + * Redistribution and use in source and binary forms, with or without
  + * modification, are permitted provided that the following conditions
  + * are met:
  + *
  + * 1. Redistributions of source code must retain the above copyright
  + *    notice, this list of conditions and the following disclaimer.
  + *
  + * 2. Redistributions in binary form must reproduce the above copyright
  + *    notice, this list of conditions and the following disclaimer in
  + *    the documentation and/or other materials provided with the
  + *    distribution.
  + *
  + * 3. The end-user documentation included with the redistribution,
  + *    if any, must include the following acknowledgment:
  + *       "This product includes software developed by the
  + *        Apache Software Foundation (http://www.apache.org/)."
  + *    Alternately, this acknowledgment may appear in the software itself,
  + *    if and wherever such third-party acknowledgments normally appear.
  + *
  + * 4. The names "Apache" and "Apache Software Foundation" and
  + *    "Apache Commons" must not be used to endorse or promote products
  + *    derived from this software without prior written permission. For
  + *    written permission, please contact apache@apache.org.
  + *
  + * 5. Products derived from this software may not be called "Apache",
  + *    "Apache Turbine", nor may "Apache" appear in their name, without
  + *    prior written permission of the Apache Software Foundation.
  + *
  + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
  + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  + * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
  + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  + * SUCH DAMAGE.
  + * ====================================================================
  + *
  + * This software consists of voluntary contributions made by many
  + * individuals on behalf of the Apache Software Foundation.  For more
  + * information on the Apache Software Foundation, please see
  + * <http://www.apache.org/>.
  + */
  +package org.apache.commons.codec;
   
  -/**
 * Encodes a string into a soundex value.  Sounde is an encoding used to
 * relate similar names, but can also be used as a general purpose
 * scheme to find word with similar phonemes. 
 * More information may be found at: http://www.bluepoof.com/Soundex/info2.html
 * 
 * @todo Needs internationalisation in a future release.
 *
 * @author tobrien@transolutions.net
 * @version $Revision$ $Date$
 */
public class RefinedSoundex implements Encoder {

    static public final char[] US_ENGLISH_MAPPING =
        "01360240043788015936020505".toCharArray();

    static public final RefinedSoundex US_ENGLISH = new RefinedSoundex();
    
    private char[] soundexMapping;

    public RefinedSoundex() {
        this(US_ENGLISH_MAPPING);
    }

    public RefinedSoundex(char[] mapping) {
        this.soundexMapping = mapping;
    }

    /**
     * Get the SoundEx value of a string.
     * This implementation is taken from the code-snippers on 
     * http://www.sourceforge.net/
     */
    public String soundex(String str) {
        if(null == str || str.length() == 0) { return str; }
       
        StringBuffer sBuf = new StringBuffer();        
        str = str.toUpperCase();

        sBuf.append( str.charAt(0) );

        char last, mapped, current;
        last = '*';

        for( int i = 0; i < str.length(); i++ ) {

            current = getMappingCode( str.charAt(i) );
            if( current == last ) {
                continue;
            } else if( current != 0 ) {
                sBuf.append( current );   
            }
            
            last = current;             
            
        }
        
        return sBuf.toString();
    }

    public String encode(String pString) {
        return( soundex( pString ) );   
    }

    /**
     * Used internally by the SoundEx algorithm.
     */
    private char getMappingCode(char c) {
        if( !Character.isLetter(c) ) {
            return 0;
        } else {
            return soundexMapping[Character.toUpperCase(c) - 'A'];
        }
    }
}
  \ No newline at end of file
  +
  +/**
  + * Encodes a string into a soundex value.  Sounde is an encoding used to
  + * relate similar names, but can also be used as a general purpose
  + * scheme to find word with similar phonemes. 
  + * More information may be found at: http://www.bluepoof.com/Soundex/info2.html
  + * 
  + * @todo Needs internationalisation in a future release.
  + *
  + * @author tobrien@transolutions.net
  + * @version $Revision$ $Date$
  + */
  +public class RefinedSoundex implements Encoder {
  +
  +    static public final char[] US_ENGLISH_MAPPING =
  +        "01360240043788015936020505".toCharArray();
  +
  +    static public final RefinedSoundex US_ENGLISH = new RefinedSoundex();
  +    
  +    private char[] soundexMapping;
  +
  +    public RefinedSoundex() {
  +        this(US_ENGLISH_MAPPING);
  +    }
  +
  +    public RefinedSoundex(char[] mapping) {
  +        this.soundexMapping = mapping;
  +    }
  +
  +    /**
  +     * Get the SoundEx value of a string.
  +     * This implementation is taken from the code-snippers on 
  +     * http://www.sourceforge.net/
  +     */
  +    public String soundex(String str) {
  +        if(null == str || str.length() == 0) { return str; }
  +       
  +        StringBuffer sBuf = new StringBuffer();        
  +        str = str.toUpperCase();
  +
  +        sBuf.append( str.charAt(0) );
  +
  +        char last, mapped, current;
  +        last = '*';
  +
  +        for( int i = 0; i < str.length(); i++ ) {
  +
  +            current = getMappingCode( str.charAt(i) );
  +            if( current == last ) {
  +                continue;
  +            } else if( current != 0 ) {
  +                sBuf.append( current );   
  +            }
  +            
  +            last = current;             
  +            
  +        }
  +        
  +        return sBuf.toString();
  +    }
  +
  +    public String encode(String pString) {
  +        return( soundex( pString ) );   
  +    }
  +
  +    /**
  +     * Used internally by the SoundEx algorithm.
  +     */
  +    private char getMappingCode(char c) {
  +        if( !Character.isLetter(c) ) {
  +            return 0;
  +        } else {
  +            return soundexMapping[Character.toUpperCase(c) - 'A'];
  +        }
  +    }
  +}
  
  
  
  1.5       +145 -4    jakarta-commons-sandbox/codec/src/java/org/apache/commons/codec/Soundex.java
  
  Index: Soundex.java
  ===================================================================
  RCS file: /home/cvs/jakarta-commons-sandbox/codec/src/java/org/apache/commons/codec/Soundex.java,v
  retrieving revision 1.4
  retrieving revision 1.5
  diff -u -r1.4 -r1.5
  --- Soundex.java	18 Nov 2002 13:00:26 -0000	1.4
  +++ Soundex.java	3 Feb 2003 02:19:41 -0000	1.5
  @@ -1,4 +1,145 @@
  -/* ====================================================================
 * The Apache Software License, Version 1.1
 *
 * Copyright (c) 2001-2002 The Apache Software Foundation.  All rights
 * reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 *
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in
 *    the documentation and/or other materials provided with the
 *    distribution.
 *
 * 3. The end-user documentation included with the redistribution,
 *    if any, must include the following acknowledgment:
 *       "This product includes software developed by the
 *        Apache Software Foundation (http://www.apache.org/)."
 *    Alternately, this acknowledgment may appear in the software itself,
 *    if and wherever such third-party acknowledgments normally appear.
 *
 * 4. The names "Apache" and "Apache Software Foundation" and
 *    "Apache Commons" must not be used to endorse or promote products
 *    derived from this software without prior written permission. For
 *    written permission, please contact apache@apache.org.
 *
 * 5. Products derived from this software may not be called "Apache",
 *    "Apache Turbine", nor may "Apache" appear in their name, without
 *    prior written permission of the Apache Software Foundation.
 *
 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 * ====================================================================
 *
 * This software consists of voluntary contributions made by many
 * individuals on behalf of the Apache Software Foundation.  For more
 * information on the Apache Software Foundation, please see
 * <http://www.apache.org/>.
 */
package org.apache.commons.codec;
  -/**
 * Encodes a string into a refined soundex value.  
 * A refined soundex code is optimized for spell checking word. 
 * "Soundex" method originally developed by Margaret Odell and 
 *          Robert Russell
 * 
 * http://www.bluepoof.com/Soundex/info2.html
 * 
 * @todo Needs internationalisation in a future release.
 *
 * @author bayard@generationjava.com
 * @author tobrien@transolutions.net
 * @version $Revision$ $Date$
 */
public class Soundex implements Encoder {

    static public final char[] US_ENGLISH_MAPPING =
        "01230120022455012623010202".toCharArray();

    static public final Soundex US_ENGLISH = new Soundex();
    
    private char[] soundexMapping;
    private int maxLength = 4;

  -   public Soundex() {
        this(US_ENGLISH_MAPPING);
    }
  -    public Soundex(char[] mapping) {
        this.soundexMapping = mapping;
    }

    /**
     * Get the SoundEx value of a string.
     * This implementation is taken from the code-snippers on 
     * http://www.sourceforge.net/
     */
    public String soundex(String str) {
        if(null == str || str.length() == 0) { return str; }
        
        char out[] = { '0', '0', '0', '0' };
        char last, mapped;
        int incount = 1, count = 1;
        out[0] = Character.toUpperCase( str.charAt(0) );
        last = getMappingCode( str.charAt(0) );
        while( (incount < str.length() ) && 
               (mapped = getMappingCode(str.charAt(incount++))) != 0 &&
               (count < maxLength) )
        {
            if( (mapped != '0') && (mapped != last) ) {
                out[count++] = mapped;
            }
            last = mapped;
        }
        return new String(out);
    }

    public String encode(String pString) {
        return( soundex( pString ) );   
    }

    /**
     * Used internally by the SoundEx algorithm.
     */
    private char getMappingCode(char c) {
        if( !Character.isLetter(c) ) {
            return 0;
        } else {
            return soundexMapping[Character.toUpperCase(c) - 'A'];
        }
    }

	/**
	 * Returns the maxLength.  Standard Soundex
	 * @return int
	 */
	public int getMaxLength() {
		return maxLength;
	}

	/**
	 * Sets the maxLength.
	 * @param maxLength The maxLength to set
	 */
	public void setMaxLength(int maxLength) {
		this.maxLength = maxLength;
	}

}
  \ No newline at end of file
  +/* ====================================================================
  + * The Apache Software License, Version 1.1
  + *
  + * Copyright (c) 2001-2002 The Apache Software Foundation.  All rights
  + * reserved.
  + *
  + * Redistribution and use in source and binary forms, with or without
  + * modification, are permitted provided that the following conditions
  + * are met:
  + *
  + * 1. Redistributions of source code must retain the above copyright
  + *    notice, this list of conditions and the following disclaimer.
  + *
  + * 2. Redistributions in binary form must reproduce the above copyright
  + *    notice, this list of conditions and the following disclaimer in
  + *    the documentation and/or other materials provided with the
  + *    distribution.
  + *
  + * 3. The end-user documentation included with the redistribution,
  + *    if any, must include the following acknowledgment:
  + *       "This product includes software developed by the
  + *        Apache Software Foundation (http://www.apache.org/)."
  + *    Alternately, this acknowledgment may appear in the software itself,
  + *    if and wherever such third-party acknowledgments normally appear.
  + *
  + * 4. The names "Apache" and "Apache Software Foundation" and
  + *    "Apache Commons" must not be used to endorse or promote products
  + *    derived from this software without prior written permission. For
  + *    written permission, please contact apache@apache.org.
  + *
  + * 5. Products derived from this software may not be called "Apache",
  + *    "Apache Turbine", nor may "Apache" appear in their name, without
  + *    prior written permission of the Apache Software Foundation.
  + *
  + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
  + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  + * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
  + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  + * SUCH DAMAGE.
  + * ====================================================================
  + *
  + * This software consists of voluntary contributions made by many
  + * individuals on behalf of the Apache Software Foundation.  For more
  + * information on the Apache Software Foundation, please see
  + * <http://www.apache.org/>.
  + */
  +package org.apache.commons.codec;
  +
  +/**
  + * Encodes a string into a refined soundex value.  
  + * A refined soundex code is optimized for spell checking word. 
  + * "Soundex" method originally developed by Margaret Odell and 
  + *          Robert Russell
  + * 
  + * http://www.bluepoof.com/Soundex/info2.html
  + * 
  + * @todo Needs internationalisation in a future release.
  + *
  + * @author bayard@generationjava.com
  + * @author tobrien@transolutions.net
  + * @version $Revision$ $Date$
  + */
  +public class Soundex implements Encoder {
  +
  +    static public final char[] US_ENGLISH_MAPPING =
  +        "01230120022455012623010202".toCharArray();
  +
  +    static public final Soundex US_ENGLISH = new Soundex();
  +    
  +    private char[] soundexMapping;
  +    private int maxLength = 4;
  +
  +
  +   public Soundex() {
  +        this(US_ENGLISH_MAPPING);
  +    }
  +
  +    public Soundex(char[] mapping) {
  +        this.soundexMapping = mapping;
  +    }
  +
  +    /**
  +     * Get the SoundEx value of a string.
  +     * This implementation is taken from the code-snippers on 
  +     * http://www.sourceforge.net/
  +     */
  +    public String soundex(String str) {
  +        if(null == str || str.length() == 0) { return str; }
  +        
  +        char out[] = { '0', '0', '0', '0' };
  +        char last, mapped;
  +        int incount = 1, count = 1;
  +        out[0] = Character.toUpperCase( str.charAt(0) );
  +        last = getMappingCode( str.charAt(0) );
  +        while( (incount < str.length() ) && 
  +               (mapped = getMappingCode(str.charAt(incount++))) != 0 &&
  +               (count < maxLength) )
  +        {
  +            if( (mapped != '0') && (mapped != last) ) {
  +                out[count++] = mapped;
  +            }
  +            last = mapped;
  +        }
  +        return new String(out);
  +    }
  +
  +    public String encode(String pString) {
  +        return( soundex( pString ) );   
  +    }
  +
  +    /**
  +     * Used internally by the SoundEx algorithm.
  +     */
  +    private char getMappingCode(char c) {
  +        if( !Character.isLetter(c) ) {
  +            return 0;
  +        } else {
  +            return soundexMapping[Character.toUpperCase(c) - 'A'];
  +        }
  +    }
  +
  +	/**
  +	 * Returns the maxLength.  Standard Soundex
  +	 * @return int
  +	 */
  +	public int getMaxLength() {
  +		return maxLength;
  +	}
  +
  +	/**
  +	 * Sets the maxLength.
  +	 * @param maxLength The maxLength to set
  +	 */
  +	public void setMaxLength(int maxLength) {
  +		this.maxLength = maxLength;
  +	}
  +
  +}
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: commons-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: commons-dev-help@jakarta.apache.org