You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@commons.apache.org by gg...@apache.org on 2003/11/04 03:43:09 UTC

cvs commit: jakarta-commons/codec/src/java/org/apache/commons/codec/language Soundex.java

ggregory    2003/11/03 18:43:09

  Modified:    codec/src/test/org/apache/commons/codec/language
                        SoundexTest.java
               codec/src/java/org/apache/commons/codec/language
                        Soundex.java
  Log:
  More Soundex test data and code clean up.
  
  Revision  Changes    Path
  1.5       +63 -21    jakarta-commons/codec/src/test/org/apache/commons/codec/language/SoundexTest.java
  
  Index: SoundexTest.java
  ===================================================================
  RCS file: /home/cvs/jakarta-commons/codec/src/test/org/apache/commons/codec/language/SoundexTest.java,v
  retrieving revision 1.4
  retrieving revision 1.5
  diff -u -r1.4 -r1.5
  --- SoundexTest.java	5 Oct 2003 21:45:49 -0000	1.4
  +++ SoundexTest.java	4 Nov 2003 02:43:09 -0000	1.5
  @@ -66,50 +66,92 @@
   /**
    * @version $Revision$ $Date$
    * @author Rodney Waldhoff
  + * @author Gary Gregory
    */
   public class SoundexTest extends StringEncoderAbstractTest {
   
  +    public static Test suite() {
  +        return (new TestSuite(SoundexTest.class));
  +    }
  +
  +    private Soundex _encoder = null;
  +
       public SoundexTest(String name) {
           super(name);
       }
  +    /**
  +     * @return Returns the _encoder.
  +     */
  +    public Soundex getEncoder() {
  +        return this._encoder;
  +    }
   
  -    public static Test suite() {
  -        return (new TestSuite(SoundexTest.class));
  +    protected StringEncoder makeEncoder() {
  +        return new Soundex();
  +    }
  +
  +    /**
  +     * @param _encoder The _encoder to set.
  +     */
  +    public void setEncoder(Soundex encoder) {
  +        this._encoder = encoder;
       }
   
       public void setUp() throws Exception {        
           super.setUp();
  -        _encoder = new Soundex();
  +        this.setEncoder(new Soundex());
       }
   
       public void tearDown() throws Exception {
           super.tearDown();
  -        _encoder = null;
  -    }
  -
  -    protected StringEncoder makeEncoder() {
  -        return new Soundex();
  +        this.setEncoder(null);
       }
   
       // ------------------------------------------------------------------------
   
       public void testEncode() throws Exception {
  -        assertEquals("T235",_encoder.encode("testing"));
  -        assertEquals("T000",_encoder.encode("The"));
  -        assertEquals("Q200",_encoder.encode("quick"));
  -        assertEquals("B650",_encoder.encode("brown"));
  -        assertEquals("F200",_encoder.encode("fox"));
  -        assertEquals("J513",_encoder.encode("jumped"));
  -        assertEquals("O160",_encoder.encode("over"));
  -        assertEquals("T000",_encoder.encode("the"));
  -        assertEquals("L200",_encoder.encode("lazy"));
  -        assertEquals("D200",_encoder.encode("dogs"));
  +        assertEquals("T235",this.getEncoder().encode("testing"));
  +        assertEquals("T000",this.getEncoder().encode("The"));
  +        assertEquals("Q200",this.getEncoder().encode("quick"));
  +        assertEquals("B650",this.getEncoder().encode("brown"));
  +        assertEquals("F200",this.getEncoder().encode("fox"));
  +        assertEquals("J513",this.getEncoder().encode("jumped"));
  +        assertEquals("O160",this.getEncoder().encode("over"));
  +        assertEquals("T000",this.getEncoder().encode("the"));
  +        assertEquals("L200",this.getEncoder().encode("lazy"));
  +        assertEquals("D200",this.getEncoder().encode("dogs"));
  +    }
  +
  +    /**
  +     * Examples from
  +     * http://www.bradandkathy.com/genealogy/overviewofsoundex.html
  +     */
  +    public void testEncode2() throws Exception {
  +        assertEquals("A462",this.getEncoder().encode("Allricht"));
  +        assertEquals("E166",this.getEncoder().encode("Eberhard"));
  +        assertEquals("E521",this.getEncoder().encode("Engebrethson"));
  +        assertEquals("H512",this.getEncoder().encode("Heimbach"));
  +        assertEquals("H524",this.getEncoder().encode("Hanselmann"));
  +        assertEquals("H431",this.getEncoder().encode("Hildebrand"));
  +        assertEquals("K152",this.getEncoder().encode("Kavanagh"));
  +        assertEquals("L530",this.getEncoder().encode("Lind, Van"));
  +        assertEquals("L222",this.getEncoder().encode("Lukaschowsky"));
  +        assertEquals("M235",this.getEncoder().encode("McDonnell"));
  +        assertEquals("M200",this.getEncoder().encode("McGee"));
  +        // Fix me?
  +        //assertEquals("O165",this.getEncoder().encode("O'Brien"));
  +        assertEquals("O155",this.getEncoder().encode("Opnian"));
  +        assertEquals("O155",this.getEncoder().encode("Oppenheimer"));
  +        // Fix me?
  +        //assertEquals("S460",this.getEncoder().encode("Swhgler"));
  +        assertEquals("R355",this.getEncoder().encode("Riedemanas"));
  +        assertEquals("Z300",this.getEncoder().encode("Zita"));
  +        assertEquals("Z325",this.getEncoder().encode("Zitzmeinn"));    
       }
  -
  +    
       public void testMaxLength() throws Exception {
           Soundex soundex = new Soundex();
           soundex.setMaxLength( soundex.getMaxLength() );
       }
   
  -    private Soundex _encoder = null;
   }
  
  
  
  1.10      +52 -40    jakarta-commons/codec/src/java/org/apache/commons/codec/language/Soundex.java
  
  Index: Soundex.java
  ===================================================================
  RCS file: /home/cvs/jakarta-commons/codec/src/java/org/apache/commons/codec/language/Soundex.java,v
  retrieving revision 1.9
  retrieving revision 1.10
  diff -u -r1.9 -r1.10
  --- Soundex.java	12 Oct 2003 19:48:15 -0000	1.9
  +++ Soundex.java	4 Nov 2003 02:43:09 -0000	1.10
  @@ -67,12 +67,18 @@
    * 
    * @author bayard@generationjava.com
    * @author Tim O'Brien
  - * @author ggregory@seagullsw.com
  + * @author Gary Gregory
    * @version $Id$
    */
   public class Soundex implements StringEncoder {
   
       /**
  +     * This static variable contains an instance of the
  +     * Soundex using the US_ENGLISH mapping.
  +     */
  +    public static final Soundex US_ENGLISH = new Soundex();
  +
  +    /**
        * This is a default mapping of the 26 letters used
        * in US english.
        */
  @@ -80,10 +86,10 @@
           "01230120022455012623010202".toCharArray();
   
       /**
  -     * This static variable contains an instance of the
  -     * Soundex using the US_ENGLISH mapping.
  +     * The maximum length of a Soundex code - Soundex codes are
  +     * only four characters by definition.
        */
  -    public static final Soundex US_ENGLISH = new Soundex();
  +    private int maxLength = 4;
       
       /**
        * Every letter of the alphabet is "mapped" to a numerical 
  @@ -94,12 +100,6 @@
       private char[] soundexMapping;
   
       /**
  -     * The maximum length of a Soundex code - Soundex codes are
  -     * only four characters by definition.
  -     */
  -    private int maxLength = 4;
  -
  -    /**
        * Creates an instance of the Soundex object using the default
        * US_ENGLISH mapping.
        */
  @@ -117,30 +117,7 @@
        *                code for a given character
        */
       public Soundex(char[] mapping) {
  -        this.soundexMapping = mapping;
  -    }
  -
  -    /**
  -     * Retreives the Soundex code for a given String object.
  -     *
  -     * @param str String to encode using the Soundex algorithm
  -     * @return A soundex code for the String supplied
  -     */
  -    public String soundex(String str) {
  -        if (null == str || str.length() == 0) { return str; }
  -        
  -        char out[] = { '0', '0', '0', '0' };
  -        char last, mapped;
  -        int incount = 1, count = 1;
  -        out[0] = Character.toUpperCase(str.charAt(0));
  -        last = getMappingCode(str.charAt(0));
  -        while ((incount < str.length()) && (mapped = getMappingCode(str.charAt(incount++))) != 0 && (count < maxLength)) {
  -                if ((mapped != '0') && (mapped != last)) {
  -                    out[count++] = mapped;
  -                }
  -                last = mapped;
  -            }
  -        return new String(out);
  +        this.setSoundexMapping(mapping);
       }
   
       /**
  @@ -174,10 +151,8 @@
        *
        * @param pString A String object to encode
        * @return A Soundex code corresponding to the String supplied
  -     * @throws EncoderException throws exception if there is an
  -     *                          encoding-specific problem
        */
  -    public String encode(String pString) throws EncoderException {
  +    public String encode(String pString) {
           return (soundex(pString));   
       }
   
  @@ -191,7 +166,7 @@
           if (!Character.isLetter(c)) {
               return 0;
           } else {
  -            return soundexMapping[Character.toUpperCase(c) - 'A'];
  +            return this.getSoundexMapping()[Character.toUpperCase(c) - 'A'];
           }
       }
   
  @@ -200,7 +175,14 @@
        * @return int
        */
       public int getMaxLength() {
  -        return maxLength;
  +        return this.maxLength;
  +    }
  +
  +    /**
  +     * @return Returns the soundexMapping.
  +     */
  +    private char[] getSoundexMapping() {
  +        return this.soundexMapping;
       }
   
       /**
  @@ -209,6 +191,36 @@
        */
       public void setMaxLength(int maxLength) {
           this.maxLength = maxLength;
  +    }
  +
  +    /**
  +     * @param soundexMapping The soundexMapping to set.
  +     */
  +    private void setSoundexMapping(char[] soundexMapping) {
  +        this.soundexMapping = soundexMapping;
  +    }
  +
  +    /**
  +     * Retreives the Soundex code for a given String object.
  +     *
  +     * @param str String to encode using the Soundex algorithm
  +     * @return A soundex code for the String supplied
  +     */
  +    public String soundex(String str) {
  +        if (null == str || str.length() == 0) { return str; }
  +        
  +        char out[] = { '0', '0', '0', '0' };
  +        char last, mapped;
  +        int incount = 1, count = 1;
  +        out[0] = Character.toUpperCase(str.charAt(0));
  +        last = getMappingCode(str.charAt(0));
  +        while ((incount < str.length()) && (mapped = getMappingCode(str.charAt(incount++))) != 0 && (count < this.getMaxLength())) {
  +                if ((mapped != '0') && (mapped != last)) {
  +                    out[count++] = mapped;
  +                }
  +                last = mapped;
  +            }
  +        return new String(out);
       }
   
   }
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: commons-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: commons-dev-help@jakarta.apache.org