You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@commons.apache.org by Gary Gregory <ga...@gmail.com> on 2012/03/08 22:13:19 UTC

Re: svn commit: r1298576 - /commons/proper/codec/trunk/src/test/java/org/apache/commons/codec/language/NysiisTest.java

There are some typos in the spelling of "violates".

Gary

On Mar 8, 2012, at 15:57, "tn@apache.org" <tn...@apache.org> wrote:

> Author: tn
> Date: Thu Mar  8 20:56:35 2012
> New Revision: 1298576
>
> URL: http://svn.apache.org/viewvc?rev98576&view=rev
> Log:
> [CODEC-63] Added explanation for different results to dropby.com, Raised CC to 100/100
>
> Modified:
>    commons/proper/codec/trunk/src/test/java/org/apache/commons/codec/language/NysiisTest.java
>
> Modified: commons/proper/codec/trunk/src/test/java/org/apache/commons/codec/language/NysiisTest.java
> URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/test/java/org/apache/commons/codec/language/NysiisTest.java?rev98576&r198575&r298576&view=diff
> =============================================================================--- commons/proper/codec/trunk/src/test/java/org/apache/commons/codec/language/NysiisTest.java (original)
> +++ commons/proper/codec/trunk/src/test/java/org/apache/commons/codec/language/NysiisTest.java Thu Mar  8 20:56:35 2012
> @@ -49,6 +49,15 @@ public class NysiisTest extends StringEn
>     }
>
>     @Test
> +    public void testTrueVariant() {
> +        Nysiis encoder = new Nysiis(true);
> +
> +        String encoded = encoder.encode("WESTERLUND");
> +        Assert.assertTrue(encoded.length() <= 6);
> +        Assert.assertEquals("WASTAR", encoded);
> +    }
> +
> +    @Test
>     public void testBran() throws EncoderException {
>         encodeAll(new String[] { "Brian", "Brown", "Brun" }, "BRAN");
>     }
> @@ -71,6 +80,17 @@ public class NysiisTest extends StringEn
>     }
>
>     @Test
> +    public void testSpecialBranches() throws EncoderException {
> +        this.encodeAll(new String[] { "Kobwick" }, "CABWAC");
> +        this.encodeAll(new String[] { "Kocher" }, "CACAR");
> +        this.encodeAll(new String[] { "Fesca" }, "FASC");
> +        this.encodeAll(new String[] { "Shom" }, "SAN");
> +        this.encodeAll(new String[] { "Ohlo" }, "OL");
> +        this.encodeAll(new String[] { "Uhu" }, "UH");
> +        this.encodeAll(new String[] { "Um" }, "UN");
> +    }
> +
> +    @Test
>     public void testDropBy() throws EncoderException {
>         List<String[]> testValues                  Arrays.asList(
> @@ -112,16 +132,62 @@ public class NysiisTest extends StringEn
>      */
>     @Test
>     public void testDropBy2() throws EncoderException {
> +        // Explanation of differences between this implementation and the one at dropby.com.
> +        //
> +        // Algorithm (taken from www.dropby.com/NYSIIS.html):
> +        //
> +        // 1.  Transcode first characters of name:
> +        //    MAC »   MCC
> +        //    KN  »   NN
> +        //    K   »   C
> +        //    PH  »   FF
> +        //    PF  »   FF
> +        //    SCH »   SSS
> +        //
> +        // 2.  Transcode last characters of name:
> +        //    EE, IE  »   Y
> +        //    DT,RT,RD,NT,ND  »   D
> +        //
> +        // 3.  First character of key = first character of name.
> +        //
> +        // 4.  Transcode remaining characters by following these rules, incrementing by one character each time:
> +        //   4a.   EV  »   AF  else A,E,I,O,U » A
> +        //   4b.   Q   »   G
> +        //   4c.   Z   »   S
> +        //   4d.   M   »   N
> +        //   4e.   KN  »   N   else K » C
> +        //   4f.   SCH     »   SSS
> +        //   4g.   PH  »   FF
> +        //   4h.   H   »   If previous or next is nonvowel, previous
> +        //   4i.   W   »   If previous is vowel, previous
> +        //   4j.   Add current to key if current != last key character
> +        //
> +        // 5.  If last character is S, remove it
> +        // 6.  If last characters are AY, replace with Y
> +        // 7.  If last character is A, remove it
> +        // 8.  Collapse all strings of repeated characters
> +        // 9.  Add original first character of name as first character of key
> +
>         List<String[]> testValues                  Arrays.asList(
>                         // http://www.dropby.com/indexLF.html?content=/NYSIIS.html
>                         // 1. Transcode first characters of name
>                         new String[] { "MACINTOSH", "MCANT" },
> -                        //new String[] { "KNUTH", "NNATH" }, // Original: NNAT; modified: NATH
> -                        //new String[] { "KOEHN", "C" },
> -                        //new String[] { "PHILLIPSON", "FFALAP" },
> -                        //new String[] { "PFEISTER", "FFASTA" },
> -                        //new String[] { "SCHOENHOEFT", "SSANAF" },
> +                        // violates 4j: the second N should not be added, as the first
> +                        //              key char is already a N
> +                        new String[] { "KNUTH", "NAT" }, // Original: NNAT; modified: NATH
> +                        // O and E are transcoded to A because of rule 4a
> +                        // H also to A because of rule 4h
> +                        // the N gets mysteriously lost, maybe because of a wrongly implemented rule 4h
> +                        // that skips the next char in such a case?
> +                        // the remaining A is removed because of rule 7
> +                        new String[] { "KOEHN", "CAN" }, // Original: C
> +                        // violates 4j: see also KNUTH
> +                        new String[] { "PHILLIPSON", "FALAPSAN" }, // Original: FFALAP[SAN]
> +                        // violates 4j: see also KNUTH
> +                        new String[] { "PFEISTER", "FASTAR" }, // Original: FFASTA[R]
> +                        // violoates 4j: see also KNUTH
> +                        new String[] { "SCHOENHOEFT", "SANAFT" }, // Original: SSANAF[T]
>                         // http://www.dropby.com/indexLF.html?content=/NYSIIS.html
>                         // 2.Transcode last characters of name:
>                         new String[] { "MCKEE", "MCY" },
> @@ -139,14 +205,21 @@ public class NysiisTest extends StringEn
>                         new String[] { "BOWMAN", "BANAN" },
>                         new String[] { "MCKNIGHT", "MCNAGT" },
>                         new String[] { "RICKERT", "RACAD" },
> -                        //new String[] { "DEUTSCH", "DATS" },
> +                        // violates 5: the last S is not removed
> +                        // when comparing to DEUTS, which is phonetically similar
> +                        // the result it also DAT, which is correct for DEUTSCH too imo
> +                        new String[] { "DEUTSCH", "DAT" }, // Original: DATS
>                         new String[] { "WESTPHAL", "WASTFAL" },
> -                        //new String[] { "SHRIVER", "SHRAVA" },
> -                        //new String[] { "KUHL", "C" },
> +                        // violates 4h: the H should be transcoded to S and thus ignored as
> +                        // the first key character is also S
> +                        new String[] { "SHRIVER", "SRAVAR" }, // Original: SHRAVA[R]
> +                        // same as KOEHN, the L gets mysteriously lost, the correct one
> +                        new String[] { "KUHL", "CAL" }, // Original: C
>                         new String[] { "RAWSON", "RASAN" },
>                         // If last character is S, remove it
>                         new String[] { "JILES", "JAL" },
> -                        //new String[] { "CARRAWAY", "CARAY" },
> +                        // violates 6: if the last two characters are AY, remove A
> +                        new String[] { "CARRAWAY", "CARY" }, // Original: CARAY
>                         new String[] { "YAMADA", "YANAD" });
>
>         for (String[] arr : testValues) {
>
>

---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@commons.apache.org
For additional commands, e-mail: dev-help@commons.apache.org


Re: svn commit: r1298576 - /commons/proper/codec/trunk/src/test/java/org/apache/commons/codec/language/NysiisTest.java

Posted by Thomas Neidhart <th...@gmail.com>.
On 03/08/2012 10:13 PM, Gary Gregory wrote:
> There are some typos in the spelling of "violates".

oops, I will fix it together with the non-ascii chars.

Thomas

---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@commons.apache.org
For additional commands, e-mail: dev-help@commons.apache.org