You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@commons.apache.org by gg...@apache.org on 2003/11/04 03:43:09 UTC
cvs commit: jakarta-commons/codec/src/java/org/apache/commons/codec/language Soundex.java
ggregory 2003/11/03 18:43:09
Modified: codec/src/test/org/apache/commons/codec/language
SoundexTest.java
codec/src/java/org/apache/commons/codec/language
Soundex.java
Log:
More Soundex test data and code clean up.
Revision Changes Path
1.5 +63 -21 jakarta-commons/codec/src/test/org/apache/commons/codec/language/SoundexTest.java
Index: SoundexTest.java
===================================================================
RCS file: /home/cvs/jakarta-commons/codec/src/test/org/apache/commons/codec/language/SoundexTest.java,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -r1.4 -r1.5
--- SoundexTest.java 5 Oct 2003 21:45:49 -0000 1.4
+++ SoundexTest.java 4 Nov 2003 02:43:09 -0000 1.5
@@ -66,50 +66,92 @@
/**
* @version $Revision$ $Date$
* @author Rodney Waldhoff
+ * @author Gary Gregory
*/
public class SoundexTest extends StringEncoderAbstractTest {
+ public static Test suite() {
+ return (new TestSuite(SoundexTest.class));
+ }
+
+ private Soundex _encoder = null;
+
public SoundexTest(String name) {
super(name);
}
+ /**
+ * @return Returns the _encoder.
+ */
+ public Soundex getEncoder() {
+ return this._encoder;
+ }
- public static Test suite() {
- return (new TestSuite(SoundexTest.class));
+ protected StringEncoder makeEncoder() {
+ return new Soundex();
+ }
+
+ /**
+ * @param _encoder The _encoder to set.
+ */
+ public void setEncoder(Soundex encoder) {
+ this._encoder = encoder;
}
public void setUp() throws Exception {
super.setUp();
- _encoder = new Soundex();
+ this.setEncoder(new Soundex());
}
public void tearDown() throws Exception {
super.tearDown();
- _encoder = null;
- }
-
- protected StringEncoder makeEncoder() {
- return new Soundex();
+ this.setEncoder(null);
}
// ------------------------------------------------------------------------
public void testEncode() throws Exception {
- assertEquals("T235",_encoder.encode("testing"));
- assertEquals("T000",_encoder.encode("The"));
- assertEquals("Q200",_encoder.encode("quick"));
- assertEquals("B650",_encoder.encode("brown"));
- assertEquals("F200",_encoder.encode("fox"));
- assertEquals("J513",_encoder.encode("jumped"));
- assertEquals("O160",_encoder.encode("over"));
- assertEquals("T000",_encoder.encode("the"));
- assertEquals("L200",_encoder.encode("lazy"));
- assertEquals("D200",_encoder.encode("dogs"));
+ assertEquals("T235",this.getEncoder().encode("testing"));
+ assertEquals("T000",this.getEncoder().encode("The"));
+ assertEquals("Q200",this.getEncoder().encode("quick"));
+ assertEquals("B650",this.getEncoder().encode("brown"));
+ assertEquals("F200",this.getEncoder().encode("fox"));
+ assertEquals("J513",this.getEncoder().encode("jumped"));
+ assertEquals("O160",this.getEncoder().encode("over"));
+ assertEquals("T000",this.getEncoder().encode("the"));
+ assertEquals("L200",this.getEncoder().encode("lazy"));
+ assertEquals("D200",this.getEncoder().encode("dogs"));
+ }
+
+ /**
+ * Examples from
+ * http://www.bradandkathy.com/genealogy/overviewofsoundex.html
+ */
+ public void testEncode2() throws Exception {
+ assertEquals("A462",this.getEncoder().encode("Allricht"));
+ assertEquals("E166",this.getEncoder().encode("Eberhard"));
+ assertEquals("E521",this.getEncoder().encode("Engebrethson"));
+ assertEquals("H512",this.getEncoder().encode("Heimbach"));
+ assertEquals("H524",this.getEncoder().encode("Hanselmann"));
+ assertEquals("H431",this.getEncoder().encode("Hildebrand"));
+ assertEquals("K152",this.getEncoder().encode("Kavanagh"));
+ assertEquals("L530",this.getEncoder().encode("Lind, Van"));
+ assertEquals("L222",this.getEncoder().encode("Lukaschowsky"));
+ assertEquals("M235",this.getEncoder().encode("McDonnell"));
+ assertEquals("M200",this.getEncoder().encode("McGee"));
+ // Fix me?
+ //assertEquals("O165",this.getEncoder().encode("O'Brien"));
+ assertEquals("O155",this.getEncoder().encode("Opnian"));
+ assertEquals("O155",this.getEncoder().encode("Oppenheimer"));
+ // Fix me?
+ //assertEquals("S460",this.getEncoder().encode("Swhgler"));
+ assertEquals("R355",this.getEncoder().encode("Riedemanas"));
+ assertEquals("Z300",this.getEncoder().encode("Zita"));
+ assertEquals("Z325",this.getEncoder().encode("Zitzmeinn"));
}
-
+
public void testMaxLength() throws Exception {
Soundex soundex = new Soundex();
soundex.setMaxLength( soundex.getMaxLength() );
}
- private Soundex _encoder = null;
}
1.10 +52 -40 jakarta-commons/codec/src/java/org/apache/commons/codec/language/Soundex.java
Index: Soundex.java
===================================================================
RCS file: /home/cvs/jakarta-commons/codec/src/java/org/apache/commons/codec/language/Soundex.java,v
retrieving revision 1.9
retrieving revision 1.10
diff -u -r1.9 -r1.10
--- Soundex.java 12 Oct 2003 19:48:15 -0000 1.9
+++ Soundex.java 4 Nov 2003 02:43:09 -0000 1.10
@@ -67,12 +67,18 @@
*
* @author bayard@generationjava.com
* @author Tim O'Brien
- * @author ggregory@seagullsw.com
+ * @author Gary Gregory
* @version $Id$
*/
public class Soundex implements StringEncoder {
/**
+ * This static variable contains an instance of the
+ * Soundex using the US_ENGLISH mapping.
+ */
+ public static final Soundex US_ENGLISH = new Soundex();
+
+ /**
* This is a default mapping of the 26 letters used
* in US english.
*/
@@ -80,10 +86,10 @@
"01230120022455012623010202".toCharArray();
/**
- * This static variable contains an instance of the
- * Soundex using the US_ENGLISH mapping.
+ * The maximum length of a Soundex code - Soundex codes are
+ * only four characters by definition.
*/
- public static final Soundex US_ENGLISH = new Soundex();
+ private int maxLength = 4;
/**
* Every letter of the alphabet is "mapped" to a numerical
@@ -94,12 +100,6 @@
private char[] soundexMapping;
/**
- * The maximum length of a Soundex code - Soundex codes are
- * only four characters by definition.
- */
- private int maxLength = 4;
-
- /**
* Creates an instance of the Soundex object using the default
* US_ENGLISH mapping.
*/
@@ -117,30 +117,7 @@
* code for a given character
*/
public Soundex(char[] mapping) {
- this.soundexMapping = mapping;
- }
-
- /**
- * Retreives the Soundex code for a given String object.
- *
- * @param str String to encode using the Soundex algorithm
- * @return A soundex code for the String supplied
- */
- public String soundex(String str) {
- if (null == str || str.length() == 0) { return str; }
-
- char out[] = { '0', '0', '0', '0' };
- char last, mapped;
- int incount = 1, count = 1;
- out[0] = Character.toUpperCase(str.charAt(0));
- last = getMappingCode(str.charAt(0));
- while ((incount < str.length()) && (mapped = getMappingCode(str.charAt(incount++))) != 0 && (count < maxLength)) {
- if ((mapped != '0') && (mapped != last)) {
- out[count++] = mapped;
- }
- last = mapped;
- }
- return new String(out);
+ this.setSoundexMapping(mapping);
}
/**
@@ -174,10 +151,8 @@
*
* @param pString A String object to encode
* @return A Soundex code corresponding to the String supplied
- * @throws EncoderException throws exception if there is an
- * encoding-specific problem
*/
- public String encode(String pString) throws EncoderException {
+ public String encode(String pString) {
return (soundex(pString));
}
@@ -191,7 +166,7 @@
if (!Character.isLetter(c)) {
return 0;
} else {
- return soundexMapping[Character.toUpperCase(c) - 'A'];
+ return this.getSoundexMapping()[Character.toUpperCase(c) - 'A'];
}
}
@@ -200,7 +175,14 @@
* @return int
*/
public int getMaxLength() {
- return maxLength;
+ return this.maxLength;
+ }
+
+ /**
+ * @return Returns the soundexMapping.
+ */
+ private char[] getSoundexMapping() {
+ return this.soundexMapping;
}
/**
@@ -209,6 +191,36 @@
*/
public void setMaxLength(int maxLength) {
this.maxLength = maxLength;
+ }
+
+ /**
+ * @param soundexMapping The soundexMapping to set.
+ */
+ private void setSoundexMapping(char[] soundexMapping) {
+ this.soundexMapping = soundexMapping;
+ }
+
+ /**
+ * Retreives the Soundex code for a given String object.
+ *
+ * @param str String to encode using the Soundex algorithm
+ * @return A soundex code for the String supplied
+ */
+ public String soundex(String str) {
+ if (null == str || str.length() == 0) { return str; }
+
+ char out[] = { '0', '0', '0', '0' };
+ char last, mapped;
+ int incount = 1, count = 1;
+ out[0] = Character.toUpperCase(str.charAt(0));
+ last = getMappingCode(str.charAt(0));
+ while ((incount < str.length()) && (mapped = getMappingCode(str.charAt(incount++))) != 0 && (count < this.getMaxLength())) {
+ if ((mapped != '0') && (mapped != last)) {
+ out[count++] = mapped;
+ }
+ last = mapped;
+ }
+ return new String(out);
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commons-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: commons-dev-help@jakarta.apache.org