You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@commons.apache.org by Gary Gregory <ga...@gmail.com> on 2012/03/08 22:13:19 UTC
Re: svn commit: r1298576 - /commons/proper/codec/trunk/src/test/java/org/apache/commons/codec/language/NysiisTest.java
There are some typos in the spelling of "violates".
Gary
On Mar 8, 2012, at 15:57, "tn@apache.org" <tn...@apache.org> wrote:
> Author: tn
> Date: Thu Mar 8 20:56:35 2012
> New Revision: 1298576
>
> URL: http://svn.apache.org/viewvc?rev98576&view=rev
> Log:
> [CODEC-63] Added explanation for different results to dropby.com, Raised CC to 100/100
>
> Modified:
> commons/proper/codec/trunk/src/test/java/org/apache/commons/codec/language/NysiisTest.java
>
> Modified: commons/proper/codec/trunk/src/test/java/org/apache/commons/codec/language/NysiisTest.java
> URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/test/java/org/apache/commons/codec/language/NysiisTest.java?rev98576&r198575&r298576&view=diff
> =============================================================================--- commons/proper/codec/trunk/src/test/java/org/apache/commons/codec/language/NysiisTest.java (original)
> +++ commons/proper/codec/trunk/src/test/java/org/apache/commons/codec/language/NysiisTest.java Thu Mar 8 20:56:35 2012
> @@ -49,6 +49,15 @@ public class NysiisTest extends StringEn
> }
>
> @Test
> + public void testTrueVariant() {
> + Nysiis encoder = new Nysiis(true);
> +
> + String encoded = encoder.encode("WESTERLUND");
> + Assert.assertTrue(encoded.length() <= 6);
> + Assert.assertEquals("WASTAR", encoded);
> + }
> +
> + @Test
> public void testBran() throws EncoderException {
> encodeAll(new String[] { "Brian", "Brown", "Brun" }, "BRAN");
> }
> @@ -71,6 +80,17 @@ public class NysiisTest extends StringEn
> }
>
> @Test
> + public void testSpecialBranches() throws EncoderException {
> + this.encodeAll(new String[] { "Kobwick" }, "CABWAC");
> + this.encodeAll(new String[] { "Kocher" }, "CACAR");
> + this.encodeAll(new String[] { "Fesca" }, "FASC");
> + this.encodeAll(new String[] { "Shom" }, "SAN");
> + this.encodeAll(new String[] { "Ohlo" }, "OL");
> + this.encodeAll(new String[] { "Uhu" }, "UH");
> + this.encodeAll(new String[] { "Um" }, "UN");
> + }
> +
> + @Test
> public void testDropBy() throws EncoderException {
> List<String[]> testValues Arrays.asList(
> @@ -112,16 +132,62 @@ public class NysiisTest extends StringEn
> */
> @Test
> public void testDropBy2() throws EncoderException {
> + // Explanation of differences between this implementation and the one at dropby.com.
> + //
> + // Algorithm (taken from www.dropby.com/NYSIIS.html):
> + //
> + // 1. Transcode first characters of name:
> + // MAC » MCC
> + // KN » NN
> + // K » C
> + // PH » FF
> + // PF » FF
> + // SCH » SSS
> + //
> + // 2. Transcode last characters of name:
> + // EE, IE » Y
> + // DT,RT,RD,NT,ND » D
> + //
> + // 3. First character of key = first character of name.
> + //
> + // 4. Transcode remaining characters by following these rules, incrementing by one character each time:
> + // 4a. EV » AF else A,E,I,O,U » A
> + // 4b. Q » G
> + // 4c. Z » S
> + // 4d. M » N
> + // 4e. KN » N else K » C
> + // 4f. SCH » SSS
> + // 4g. PH » FF
> + // 4h. H » If previous or next is nonvowel, previous
> + // 4i. W » If previous is vowel, previous
> + // 4j. Add current to key if current != last key character
> + //
> + // 5. If last character is S, remove it
> + // 6. If last characters are AY, replace with Y
> + // 7. If last character is A, remove it
> + // 8. Collapse all strings of repeated characters
> + // 9. Add original first character of name as first character of key
> +
> List<String[]> testValues Arrays.asList(
> // http://www.dropby.com/indexLF.html?content=/NYSIIS.html
> // 1. Transcode first characters of name
> new String[] { "MACINTOSH", "MCANT" },
> - //new String[] { "KNUTH", "NNATH" }, // Original: NNAT; modified: NATH
> - //new String[] { "KOEHN", "C" },
> - //new String[] { "PHILLIPSON", "FFALAP" },
> - //new String[] { "PFEISTER", "FFASTA" },
> - //new String[] { "SCHOENHOEFT", "SSANAF" },
> + // violates 4j: the second N should not be added, as the first
> + // key char is already a N
> + new String[] { "KNUTH", "NAT" }, // Original: NNAT; modified: NATH
> + // O and E are transcoded to A because of rule 4a
> + // H also to A because of rule 4h
> + // the N gets mysteriously lost, maybe because of a wrongly implemented rule 4h
> + // that skips the next char in such a case?
> + // the remaining A is removed because of rule 7
> + new String[] { "KOEHN", "CAN" }, // Original: C
> + // violates 4j: see also KNUTH
> + new String[] { "PHILLIPSON", "FALAPSAN" }, // Original: FFALAP[SAN]
> + // violates 4j: see also KNUTH
> + new String[] { "PFEISTER", "FASTAR" }, // Original: FFASTA[R]
> + // violoates 4j: see also KNUTH
> + new String[] { "SCHOENHOEFT", "SANAFT" }, // Original: SSANAF[T]
> // http://www.dropby.com/indexLF.html?content=/NYSIIS.html
> // 2.Transcode last characters of name:
> new String[] { "MCKEE", "MCY" },
> @@ -139,14 +205,21 @@ public class NysiisTest extends StringEn
> new String[] { "BOWMAN", "BANAN" },
> new String[] { "MCKNIGHT", "MCNAGT" },
> new String[] { "RICKERT", "RACAD" },
> - //new String[] { "DEUTSCH", "DATS" },
> + // violates 5: the last S is not removed
> + // when comparing to DEUTS, which is phonetically similar
> + // the result it also DAT, which is correct for DEUTSCH too imo
> + new String[] { "DEUTSCH", "DAT" }, // Original: DATS
> new String[] { "WESTPHAL", "WASTFAL" },
> - //new String[] { "SHRIVER", "SHRAVA" },
> - //new String[] { "KUHL", "C" },
> + // violates 4h: the H should be transcoded to S and thus ignored as
> + // the first key character is also S
> + new String[] { "SHRIVER", "SRAVAR" }, // Original: SHRAVA[R]
> + // same as KOEHN, the L gets mysteriously lost, the correct one
> + new String[] { "KUHL", "CAL" }, // Original: C
> new String[] { "RAWSON", "RASAN" },
> // If last character is S, remove it
> new String[] { "JILES", "JAL" },
> - //new String[] { "CARRAWAY", "CARAY" },
> + // violates 6: if the last two characters are AY, remove A
> + new String[] { "CARRAWAY", "CARY" }, // Original: CARAY
> new String[] { "YAMADA", "YANAD" });
>
> for (String[] arr : testValues) {
>
>
---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@commons.apache.org
For additional commands, e-mail: dev-help@commons.apache.org
Re: svn commit: r1298576 - /commons/proper/codec/trunk/src/test/java/org/apache/commons/codec/language/NysiisTest.java
Posted by Thomas Neidhart <th...@gmail.com>.
On 03/08/2012 10:13 PM, Gary Gregory wrote:
> There are some typos in the spelling of "violates".
oops, I will fix it together with the non-ascii chars.
Thomas
---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@commons.apache.org
For additional commands, e-mail: dev-help@commons.apache.org