You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by ni...@apache.org on 2017/06/27 20:33:48 UTC

[03/15] lucenenet git commit: Added Lucene.Net.Analysis.Phonetic + tests. Rather than porting over the entire commons-codec library, only the language features were ported and added to this library.

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Tests.Analysis.Phonetic/Language/MetaphoneTest.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Phonetic/Language/MetaphoneTest.cs b/src/Lucene.Net.Tests.Analysis.Phonetic/Language/MetaphoneTest.cs
new file mode 100644
index 0000000..18a9e59
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Phonetic/Language/MetaphoneTest.cs
@@ -0,0 +1,518 @@
+using NUnit.Framework;
+using System;
+
+namespace Lucene.Net.Analysis.Phonetic.Language
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    public class MetaphoneTest : StringEncoderAbstractTest<Metaphone>
+    {
+        public void AssertIsMetaphoneEqual(string source, string[] matches)
+        {
+            // match source to all matches
+            foreach (string matche in matches)
+            {
+                Assert.True(this.StringEncoder.IsMetaphoneEqual(source, matche),
+                    "Source: " + source + ", should have same Metaphone as: " + matche);
+            }
+            // match to each other
+            foreach (string matche in matches)
+            {
+                foreach (string matche2 in matches)
+                {
+                    Assert.True(this.StringEncoder.IsMetaphoneEqual(matche, matche2));
+                }
+            }
+        }
+
+        public void AssertMetaphoneEqual(String[][] pairs)
+        {
+            this.ValidateFixture(pairs);
+            foreach (String[] pair in pairs)
+            {
+                String name0 = pair[0];
+                String name1 = pair[1];
+                String failMsg = "Expected match between " + name0 + " and " + name1;
+                Assert.True(this.StringEncoder.IsMetaphoneEqual(name0, name1), failMsg);
+                Assert.True(this.StringEncoder.IsMetaphoneEqual(name1, name0), failMsg);
+            }
+        }
+
+        
+    protected override Metaphone CreateStringEncoder()
+        {
+            return new Metaphone();
+        }
+
+        [Test]
+    public void TestIsMetaphoneEqual1()
+        {
+            this.AssertMetaphoneEqual(new String[][] { new string[] {
+                "Case", "case" }, new string[] {
+                "CASE", "Case" }, new string[] {
+                "caSe", "cAsE" }, new string[] {
+                "quick", "cookie" }
+        });
+        }
+
+        /**
+         * Matches computed from http://www.lanw.com/java/phonetic/default.htm
+         */
+        [Test]
+        public void TestIsMetaphoneEqual2()
+        {
+            this.AssertMetaphoneEqual(new String[][] { new string[] { "Lawrence", "Lorenza" }, new string[] {
+                "Gary", "Cahra" }, });
+        }
+
+        /**
+         * Initial AE case.
+         *
+         * Match data computed from http://www.lanw.com/java/phonetic/default.htm
+         */
+        [Test]
+        public void TestIsMetaphoneEqualAero()
+        {
+            this.AssertIsMetaphoneEqual("Aero", new String[] { "Eure" });
+        }
+
+        /**
+         * Initial WH case.
+         *
+         * Match data computed from http://www.lanw.com/java/phonetic/default.htm
+         */
+        [Test]
+        public void TestIsMetaphoneEqualWhite()
+        {
+            this.AssertIsMetaphoneEqual(
+                "White",
+                new String[] { "Wade", "Wait", "Waite", "Wat", "Whit", "Wiatt", "Wit", "Wittie", "Witty", "Wood", "Woodie", "Woody" });
+        }
+
+        /**
+         * Initial A, not followed by an E case.
+         *
+         * Match data computed from http://www.lanw.com/java/phonetic/default.htm
+         */
+        [Test]
+        public void TestIsMetaphoneEqualAlbert()
+        {
+            this.AssertIsMetaphoneEqual("Albert", new String[] { "Ailbert", "Alberik", "Albert", "Alberto", "Albrecht" });
+        }
+
+        /**
+         * Match data computed from http://www.lanw.com/java/phonetic/default.htm
+         */
+        [Test]
+        public void TestIsMetaphoneEqualGary()
+        {
+            this.AssertIsMetaphoneEqual(
+                "Gary",
+                new String[] {
+                "Cahra",
+                "Cara",
+                "Carey",
+                "Cari",
+                "Caria",
+                "Carie",
+                "Caro",
+                "Carree",
+                "Carri",
+                "Carrie",
+                "Carry",
+                "Cary",
+                "Cora",
+                "Corey",
+                "Cori",
+                "Corie",
+                "Correy",
+                "Corri",
+                "Corrie",
+                "Corry",
+                "Cory",
+                "Gray",
+                "Kara",
+                "Kare",
+                "Karee",
+                "Kari",
+                "Karia",
+                "Karie",
+                "Karrah",
+                "Karrie",
+                "Karry",
+                "Kary",
+                "Keri",
+                "Kerri",
+                "Kerrie",
+                "Kerry",
+                "Kira",
+                "Kiri",
+                "Kora",
+                "Kore",
+                "Kori",
+                "Korie",
+                "Korrie",
+                "Korry" });
+        }
+
+        /**
+         * Match data computed from http://www.lanw.com/java/phonetic/default.htm
+         */
+        [Test]
+        public void TestIsMetaphoneEqualJohn()
+        {
+            this.AssertIsMetaphoneEqual(
+                "John",
+                new String[] {
+                "Gena",
+                "Gene",
+                "Genia",
+                "Genna",
+                "Genni",
+                "Gennie",
+                "Genny",
+                "Giana",
+                "Gianna",
+                "Gina",
+                "Ginni",
+                "Ginnie",
+                "Ginny",
+                "Jaine",
+                "Jan",
+                "Jana",
+                "Jane",
+                "Janey",
+                "Jania",
+                "Janie",
+                "Janna",
+                "Jany",
+                "Jayne",
+                "Jean",
+                "Jeana",
+                "Jeane",
+                "Jeanie",
+                "Jeanna",
+                "Jeanne",
+                "Jeannie",
+                "Jen",
+                "Jena",
+                "Jeni",
+                "Jenn",
+                "Jenna",
+                "Jennee",
+                "Jenni",
+                "Jennie",
+                "Jenny",
+                "Jinny",
+                "Jo Ann",
+                "Jo-Ann",
+                "Jo-Anne",
+                "Joan",
+                "Joana",
+                "Joane",
+                "Joanie",
+                "Joann",
+                "Joanna",
+                "Joanne",
+                "Joeann",
+                "Johna",
+                "Johnna",
+                "Joni",
+                "Jonie",
+                "Juana",
+                "June",
+                "Junia",
+                "Junie" });
+        }
+
+        /**
+         * Initial KN case.
+         *
+         * Match data computed from http://www.lanw.com/java/phonetic/default.htm
+         */
+        [Test]
+        public void TestIsMetaphoneEqualKnight()
+        {
+            this.AssertIsMetaphoneEqual(
+                "Knight",
+                new String[] {
+                "Hynda",
+                "Nada",
+                "Nadia",
+                "Nady",
+                "Nat",
+                "Nata",
+                "Natty",
+                "Neda",
+                "Nedda",
+                "Nedi",
+                "Netta",
+                "Netti",
+                "Nettie",
+                "Netty",
+                "Nita",
+                "Nydia" });
+        }
+        /**
+         * Match data computed from http://www.lanw.com/java/phonetic/default.htm
+         */
+        [Test]
+        public void TestIsMetaphoneEqualMary()
+        {
+            this.AssertIsMetaphoneEqual(
+                "Mary",
+                new String[] {
+                "Mair",
+                "Maire",
+                "Mara",
+                "Mareah",
+                "Mari",
+                "Maria",
+                "Marie",
+                "Mary",
+                "Maura",
+                "Maure",
+                "Meara",
+                "Merrie",
+                "Merry",
+                "Mira",
+                "Moira",
+                "Mora",
+                "Moria",
+                "Moyra",
+                "Muire",
+                "Myra",
+                "Myrah" });
+        }
+
+        /**
+         * Match data computed from http://www.lanw.com/java/phonetic/default.htm
+         */
+        [Test]
+        public void TestIsMetaphoneEqualParis()
+        {
+            this.AssertIsMetaphoneEqual("Paris", new String[] { "Pearcy", "Perris", "Piercy", "Pierz", "Pryse" });
+        }
+
+        /**
+         * Match data computed from http://www.lanw.com/java/phonetic/default.htm
+         */
+        [Test]
+        public void TestIsMetaphoneEqualPeter()
+        {
+            this.AssertIsMetaphoneEqual(
+                "Peter",
+                new String[] { "Peadar", "Peder", "Pedro", "Peter", "Petr", "Peyter", "Pieter", "Pietro", "Piotr" });
+        }
+
+        /**
+         * Match data computed from http://www.lanw.com/java/phonetic/default.htm
+         */
+        [Test]
+        public void TestIsMetaphoneEqualRay()
+        {
+            this.AssertIsMetaphoneEqual("Ray", new String[] { "Ray", "Rey", "Roi", "Roy", "Ruy" });
+        }
+
+        /**
+         * Match data computed from http://www.lanw.com/java/phonetic/default.htm
+         */
+        [Test]
+        public void TestIsMetaphoneEqualSusan()
+        {
+            this.AssertIsMetaphoneEqual(
+                "Susan",
+                new String[] {
+                "Siusan",
+                "Sosanna",
+                "Susan",
+                "Susana",
+                "Susann",
+                "Susanna",
+                "Susannah",
+                "Susanne",
+                "Suzann",
+                "Suzanna",
+                "Suzanne",
+                "Zuzana" });
+        }
+
+        /**
+         * Initial WR case.
+         *
+         * Match data computed from http://www.lanw.com/java/phonetic/default.htm
+         */
+        [Test]
+        public void TestIsMetaphoneEqualWright()
+        {
+            this.AssertIsMetaphoneEqual("Wright", new String[] { "Rota", "Rudd", "Ryde" });
+        }
+
+        /**
+         * Match data computed from http://www.lanw.com/java/phonetic/default.htm
+         */
+        [Test]
+        public void TestIsMetaphoneEqualXalan()
+        {
+            this.AssertIsMetaphoneEqual(
+                "Xalan",
+                new String[] { "Celene", "Celina", "Celine", "Selena", "Selene", "Selina", "Seline", "Suellen", "Xylina" });
+        }
+
+        [Test]
+        public void TestMetaphone()
+        {
+            Assert.AreEqual("HL", this.StringEncoder.GetMetaphone("howl"));
+            Assert.AreEqual("TSTN", this.StringEncoder.GetMetaphone("testing"));
+            Assert.AreEqual("0", this.StringEncoder.GetMetaphone("The"));
+            Assert.AreEqual("KK", this.StringEncoder.GetMetaphone("quick"));
+            Assert.AreEqual("BRN", this.StringEncoder.GetMetaphone("brown"));
+            Assert.AreEqual("FKS", this.StringEncoder.GetMetaphone("fox"));
+            Assert.AreEqual("JMPT", this.StringEncoder.GetMetaphone("jumped"));
+            Assert.AreEqual("OFR", this.StringEncoder.GetMetaphone("over"));
+            Assert.AreEqual("0", this.StringEncoder.GetMetaphone("the"));
+            Assert.AreEqual("LS", this.StringEncoder.GetMetaphone("lazy"));
+            Assert.AreEqual("TKS", this.StringEncoder.GetMetaphone("dogs"));
+        }
+
+        [Test]
+        public void TestWordEndingInMB()
+        {
+            Assert.AreEqual("KM", this.StringEncoder.GetMetaphone("COMB"));
+            Assert.AreEqual("TM", this.StringEncoder.GetMetaphone("TOMB"));
+            Assert.AreEqual("WM", this.StringEncoder.GetMetaphone("WOMB"));
+        }
+
+        [Test]
+        public void TestDiscardOfSCEOrSCIOrSCY()
+        {
+            Assert.AreEqual("SNS", this.StringEncoder.GetMetaphone("SCIENCE"));
+            Assert.AreEqual("SN", this.StringEncoder.GetMetaphone("SCENE"));
+            Assert.AreEqual("S", this.StringEncoder.GetMetaphone("SCY"));
+        }
+
+        /**
+         * Tests (CODEC-57) Metaphone.metaphone(String) returns an empty string when passed the word "why"
+         */
+        [Test]
+        public void TestWhy()
+        {
+            // PHP returns "H". The original metaphone returns an empty string.
+            Assert.AreEqual("", this.StringEncoder.GetMetaphone("WHY"));
+        }
+
+        [Test]
+        public void TestWordsWithCIA()
+        {
+            Assert.AreEqual("XP", this.StringEncoder.GetMetaphone("CIAPO"));
+        }
+
+        [Test]
+        public void TestTranslateOfSCHAndCH()
+        {
+            Assert.AreEqual("SKTL", this.StringEncoder.GetMetaphone("SCHEDULE"));
+            Assert.AreEqual("SKMT", this.StringEncoder.GetMetaphone("SCHEMATIC"));
+
+            Assert.AreEqual("KRKT", this.StringEncoder.GetMetaphone("CHARACTER"));
+            Assert.AreEqual("TX", this.StringEncoder.GetMetaphone("TEACH"));
+        }
+
+        [Test]
+        public void TestTranslateToJOfDGEOrDGIOrDGY()
+        {
+            Assert.AreEqual("TJ", this.StringEncoder.GetMetaphone("DODGY"));
+            Assert.AreEqual("TJ", this.StringEncoder.GetMetaphone("DODGE"));
+            Assert.AreEqual("AJMT", this.StringEncoder.GetMetaphone("ADGIEMTI"));
+        }
+
+        [Test]
+        public void TestDiscardOfSilentHAfterG()
+        {
+            Assert.AreEqual("KNT", this.StringEncoder.GetMetaphone("GHENT"));
+            Assert.AreEqual("B", this.StringEncoder.GetMetaphone("BAUGH"));
+        }
+
+        [Test]
+        public void TestDiscardOfSilentGN()
+        {
+            // NOTE: This does not test for silent GN, but for starting with GN
+            Assert.AreEqual("N", this.StringEncoder.GetMetaphone("GNU"));
+
+            // NOTE: Trying to test for GNED, but expected code does not appear to execute
+            Assert.AreEqual("SNT", this.StringEncoder.GetMetaphone("SIGNED"));
+        }
+
+        [Test]
+        public void TestPHTOF()
+        {
+            Assert.AreEqual("FX", this.StringEncoder.GetMetaphone("PHISH"));
+        }
+
+        [Test]
+        public void TestSHAndSIOAndSIAToX()
+        {
+            Assert.AreEqual("XT", this.StringEncoder.GetMetaphone("SHOT"));
+            Assert.AreEqual("OTXN", this.StringEncoder.GetMetaphone("ODSIAN"));
+            Assert.AreEqual("PLXN", this.StringEncoder.GetMetaphone("PULSION"));
+        }
+
+        [Test]
+        public void TestTIOAndTIAToX()
+        {
+            Assert.AreEqual("OX", this.StringEncoder.GetMetaphone("OTIA"));
+            Assert.AreEqual("PRXN", this.StringEncoder.GetMetaphone("PORTION"));
+        }
+
+        [Test]
+        public void TestTCH()
+        {
+            Assert.AreEqual("RX", this.StringEncoder.GetMetaphone("RETCH"));
+            Assert.AreEqual("WX", this.StringEncoder.GetMetaphone("WATCH"));
+        }
+
+        [Test]
+        public void TestExceedLength()
+        {
+            // should be AKSKS, but istruncated by Max Code Length
+            Assert.AreEqual("AKSK", this.StringEncoder.GetMetaphone("AXEAXE"));
+        }
+
+        [Test]
+        public void TestSetMaxLengthWithTruncation()
+        {
+            // should be AKSKS, but istruncated by Max Code Length
+            this.StringEncoder.MaxCodeLen=(6);
+            Assert.AreEqual("AKSKSK", this.StringEncoder.GetMetaphone("AXEAXEAXE"));
+        }
+
+        public void ValidateFixture(String[][] pairs)
+        {
+            if (pairs.Length == 0)
+            {
+                Assert.Fail("Test fixture is empty");
+            }
+            for (int i = 0; i < pairs.Length; i++)
+            {
+                if (pairs[i].Length != 2)
+                {
+                    Assert.Fail("Error in test fixture in the data array at index " + i);
+                }
+            }
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Tests.Analysis.Phonetic/Language/NysiisTest.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Phonetic/Language/NysiisTest.cs b/src/Lucene.Net.Tests.Analysis.Phonetic/Language/NysiisTest.cs
new file mode 100644
index 0000000..d1c04d1
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Phonetic/Language/NysiisTest.cs
@@ -0,0 +1,319 @@
+using NUnit.Framework;
+using System;
+
+namespace Lucene.Net.Analysis.Phonetic.Language
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    public class NysiisTest : StringEncoderAbstractTest<Nysiis>
+    {
+        private readonly Nysiis fullNysiis = new Nysiis(false);
+
+        /**
+         * Takes an array of String pairs where each pair's first element is the input and the second element the expected
+         * encoding.
+         *
+         * @param testValues
+         *            an array of String pairs where each pair's first element is the input and the second element the
+         *            expected encoding.
+         * @throws EncoderException
+         */
+        private void AssertEncodings(params String[][] testValues)
+        {
+            foreach (String[]
+                arr in testValues)
+            {
+                Assert.AreEqual(arr[1], this.fullNysiis.Encode(arr[0]), "Problem with " + arr[0]);
+            }
+        }
+
+        protected override Nysiis CreateStringEncoder()
+        {
+            return new Nysiis();
+        }
+
+        private void EncodeAll(String[] strings, String expectedEncoding)
+        {
+            foreach (String str in strings)
+            {
+                Assert.AreEqual(expectedEncoding, StringEncoder.Encode(str), "Problem with " + str);
+            }
+        }
+
+        [Test]
+        public void TestBran()
+        {
+            EncodeAll(new String[] { "Brian", "Brown", "Brun" }, "BRAN");
+        }
+
+        [Test]
+        public void TestCap()
+        {
+            this.EncodeAll(new String[] { "Capp", "Cope", "Copp", "Kipp" }, "CAP");
+        }
+
+        [Test]
+        public void TestDad()
+        {
+            // Data Quality and Record Linkage Techniques P.121 claims this is DAN,
+            // but it should be DAD, verified also with dropby.com
+            this.EncodeAll(new String[] { "Dent" }, "DAD");
+        }
+
+        [Test]
+        public void TestDan()
+        {
+            this.EncodeAll(new String[] { "Dane", "Dean", "Dionne" }, "DAN");
+        }
+
+        /**
+         * Tests data gathered from around the internet.
+         *
+         * @see <a href="http://www.dropby.com/NYSIISTextStrings.html">http://www.dropby.com/NYSIISTextStrings.html</a>
+         * @throws EncoderException
+         */
+        [Test]
+        public void TestDropBy()
+        {
+            // Explanation of differences between this implementation and the one at dropby.com is
+            // prepended to the test string. The referenced rules refer to the outlined steps the
+            // class description for Nysiis.
+
+            this.AssertEncodings(
+                    // 1. Transcode first characters of name
+                    new String[] { "MACINTOSH", "MCANT" },
+                    // violates 4j: the second N should not be added, as the first
+                    //              key char is already a N
+                    new String[] { "KNUTH", "NAT" },           // Original: NNAT; modified: NATH
+                                                               // O and E are transcoded to A because of rule 4a
+                                                               // H also to A because of rule 4h
+                                                               // the N gets mysteriously lost, maybe because of a wrongly implemented rule 4h
+                                                               // that skips the next char in such a case?
+                                                               // the remaining A is removed because of rule 7
+                    new String[] { "KOEHN", "CAN" },           // Original: C
+                                                               // violates 4j: see also KNUTH
+                    new String[] { "PHILLIPSON", "FALAPSAN" }, // Original: FFALAP[SAN]
+                                                               // violates 4j: see also KNUTH
+                    new String[] { "PFEISTER", "FASTAR" },     // Original: FFASTA[R]
+                                                               // violates 4j: see also KNUTH
+                    new String[] { "SCHOENHOEFT", "SANAFT" },  // Original: SSANAF[T]
+                                                               // 2. Transcode last characters of name:
+                    new String[] { "MCKEE", "MCY" },
+                    new String[] { "MACKIE", "MCY" },
+                    new String[] { "HEITSCHMIDT", "HATSNAD" },
+                    new String[] { "BART", "BAD" },
+                    new String[] { "HURD", "HAD" },
+                    new String[] { "HUNT", "HAD" },
+                    new String[] { "WESTERLUND", "WASTARLAD" },
+                    // 4. Transcode remaining characters by following these rules,
+                    //    incrementing by one character each time:
+                    new String[] { "CASSTEVENS", "CASTAFAN" },
+                    new String[] { "VASQUEZ", "VASG" },
+                    new String[] { "FRAZIER", "FRASAR" },
+                    new String[] { "BOWMAN", "BANAN" },
+                    new String[] { "MCKNIGHT", "MCNAGT" },
+                    new String[] { "RICKERT", "RACAD" },
+                    // violates 5: the last S is not removed
+                    // when comparing to DEUTS, which is phonetically similar
+                    // the result it also DAT, which is correct for DEUTSCH too imo
+                    new String[] { "DEUTSCH", "DAT" },         // Original: DATS
+                    new String[] { "WESTPHAL", "WASTFAL" },
+                    // violates 4h: the H should be transcoded to S and thus ignored as
+                    // the first key character is also S
+                    new String[] { "SHRIVER", "SRAVAR" },      // Original: SHRAVA[R]
+                                                               // same as KOEHN, the L gets mysteriously lost
+                    new String[] { "KUHL", "CAL" },            // Original: C
+                    new String[] { "RAWSON", "RASAN" },
+                    // If last character is S, remove it
+                    new String[] { "JILES", "JAL" },
+                    // violates 6: if the last two characters are AY, remove A
+                    new String[] { "CARRAWAY", "CARY" },       // Original: CARAY
+                    new String[] { "YAMADA", "YANAD" });
+        }
+
+        [Test]
+        public void TestFal()
+        {
+            this.EncodeAll(new String[] { "Phil" }, "FAL");
+        }
+
+        /**
+         * Tests data gathered from around the internets.
+         *
+         * @throws EncoderException
+         */
+        [Test]
+        public void TestOthers()
+        {
+            this.AssertEncodings(
+                    new String[] { "O'Daniel", "ODANAL" },
+                    new String[] { "O'Donnel", "ODANAL" },
+                    new String[] { "Cory", "CARY" },
+                    new String[] { "Corey", "CARY" },
+                    new String[] { "Kory", "CARY" },
+                    //
+                    new String[] { "FUZZY", "FASY" });
+        }
+
+        /**
+         * Tests rule 1: Translate first characters of name: MAC → MCC, KN → N, K → C, PH, PF → FF, SCH → SSS
+         *
+         * @throws EncoderException
+         */
+        [Test]
+        public void TestRule1()
+        {
+            this.AssertEncodings(
+                    new String[] { "MACX", "MCX" },
+                    new String[] { "KNX", "NX" },
+                    new String[] { "KX", "CX" },
+                    new String[] { "PHX", "FX" },
+                    new String[] { "PFX", "FX" },
+                    new String[] { "SCHX", "SX" });
+        }
+
+        /**
+         * Tests rule 2: Translate last characters of name: EE → Y, IE → Y, DT, RT, RD, NT, ND → D
+         *
+         * @throws EncoderException
+         */
+        [Test]
+        public void TestRule2()
+        {
+            this.AssertEncodings(
+                    new String[] { "XEE", "XY" },
+                    new String[] { "XIE", "XY" },
+                    new String[] { "XDT", "XD" },
+                    new String[] { "XRT", "XD" },
+                    new String[] { "XRD", "XD" },
+                    new String[] { "XNT", "XD" },
+                    new String[] { "XND", "XD" });
+        }
+
+        /**
+         * Tests rule 4.1: EV → AF else A, E, I, O, U → A
+         *
+         * @throws EncoderException
+         */
+        [Test]
+        public void TestRule4Dot1()
+        {
+            this.AssertEncodings(
+                    new String[] { "XEV", "XAF" },
+                    new String[] { "XAX", "XAX" },
+                    new String[] { "XEX", "XAX" },
+                    new String[] { "XIX", "XAX" },
+                    new String[] { "XOX", "XAX" },
+                    new String[] { "XUX", "XAX" });
+        }
+
+        /**
+         * Tests rule 4.2: Q → G, Z → S, M → N
+         *
+         * @throws EncoderException
+         */
+        [Test]
+        public void TestRule4Dot2()
+        {
+            this.AssertEncodings(
+                    new String[] { "XQ", "XG" },
+                    new String[] { "XZ", "X" },
+                    new String[] { "XM", "XN" });
+        }
+
+        /**
+         * Tests rule 5: If last character is S, remove it.
+         *
+         * @throws EncoderException
+         */
+        [Test]
+        public void TestRule5()
+        {
+            this.AssertEncodings(
+                    new String[] { "XS", "X" },
+                    new String[] { "XSS", "X" });
+        }
+
+        /**
+         * Tests rule 6: If last characters are AY, replace with Y.
+         *
+         * @throws EncoderException
+         */
+        [Test]
+        public void TestRule6()
+        {
+            this.AssertEncodings(
+                    new String[] { "XAY", "XY" },
+                    new String[] { "XAYS", "XY" }); // Rules 5, 6
+        }
+
+        /**
+         * Tests rule 7: If last character is A, remove it.
+         *
+         * @throws EncoderException
+         */
+        [Test]
+        public void TestRule7()
+        {
+            this.AssertEncodings(
+                    new String[] { "XA", "X" },
+                    new String[] { "XAS", "X" }); // Rules 5, 7
+        }
+        [Test]
+        public void TestSnad()
+        {
+            // Data Quality and Record Linkage Techniques P.121 claims this is SNAT,
+            // but it should be SNAD
+            this.EncodeAll(new String[] { "Schmidt" }, "SNAD");
+        }
+
+        [Test]
+        public void TestSnat()
+        {
+            this.EncodeAll(new String[] { "Smith", "Schmit" }, "SNAT");
+        }
+
+        [Test]
+        public void TestSpecialBranches()
+        {
+            this.EncodeAll(new String[] { "Kobwick" }, "CABWAC");
+            this.EncodeAll(new String[] { "Kocher" }, "CACAR");
+            this.EncodeAll(new String[] { "Fesca" }, "FASC");
+            this.EncodeAll(new String[] { "Shom" }, "SAN");
+            this.EncodeAll(new String[] { "Ohlo" }, "OL");
+            this.EncodeAll(new String[] { "Uhu" }, "UH");
+            this.EncodeAll(new String[] { "Um" }, "UN");
+        }
+
+        [Test]
+        public void TestTranan()
+        {
+            this.EncodeAll(new String[] { "Trueman", "Truman" }, "TRANAN");
+        }
+
+        [Test]
+        public void TestTrueVariant()
+        {
+            Nysiis encoder = new Nysiis(true);
+
+            String encoded = encoder.Encode("WESTERLUND");
+            Assert.True(encoded.Length <= 6);
+            Assert.AreEqual("WASTAR", encoded);
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Tests.Analysis.Phonetic/Language/RefinedSoundexTest.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Phonetic/Language/RefinedSoundexTest.cs b/src/Lucene.Net.Tests.Analysis.Phonetic/Language/RefinedSoundexTest.cs
new file mode 100644
index 0000000..eca1827
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Phonetic/Language/RefinedSoundexTest.cs
@@ -0,0 +1,99 @@
+using NUnit.Framework;
+
+namespace Lucene.Net.Analysis.Phonetic.Language
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Tests RefinedSoundex.
+    /// </summary>
+    public class RefinedSoundexTest : StringEncoderAbstractTest<RefinedSoundex>
+    {
+        protected override RefinedSoundex CreateStringEncoder()
+        {
+            return new RefinedSoundex();
+        }
+
+        [Test]
+        public void TestDifference()
+        {
+            // Edge cases
+            Assert.AreEqual(0, this.StringEncoder.Difference(null, null));
+            Assert.AreEqual(0, this.StringEncoder.Difference("", ""));
+            Assert.AreEqual(0, this.StringEncoder.Difference(" ", " "));
+            // Normal cases
+            Assert.AreEqual(6, this.StringEncoder.Difference("Smith", "Smythe"));
+            Assert.AreEqual(3, this.StringEncoder.Difference("Ann", "Andrew"));
+            Assert.AreEqual(1, this.StringEncoder.Difference("Margaret", "Andrew"));
+            Assert.AreEqual(1, this.StringEncoder.Difference("Janet", "Margaret"));
+            // Examples from
+            // http://msdn.microsoft.com/library/default.asp?url=/library/en-us/tsqlref/ts_de-dz_8co5.asp
+            Assert.AreEqual(5, this.StringEncoder.Difference("Green", "Greene"));
+            Assert.AreEqual(1, this.StringEncoder.Difference("Blotchet-Halls", "Greene"));
+            // Examples from
+            // http://msdn.microsoft.com/library/default.asp?url=/library/en-us/tsqlref/ts_setu-sus_3o6w.asp
+            Assert.AreEqual(6, this.StringEncoder.Difference("Smith", "Smythe"));
+            Assert.AreEqual(8, this.StringEncoder.Difference("Smithers", "Smythers"));
+            Assert.AreEqual(5, this.StringEncoder.Difference("Anothers", "Brothers"));
+        }
+
+        [Test]
+        public void TestEncode()
+        {
+            Assert.AreEqual("T6036084", this.StringEncoder.Encode("testing"));
+            Assert.AreEqual("T6036084", this.StringEncoder.Encode("TESTING"));
+            Assert.AreEqual("T60", this.StringEncoder.Encode("The"));
+            Assert.AreEqual("Q503", this.StringEncoder.Encode("quick"));
+            Assert.AreEqual("B1908", this.StringEncoder.Encode("brown"));
+            Assert.AreEqual("F205", this.StringEncoder.Encode("fox"));
+            Assert.AreEqual("J408106", this.StringEncoder.Encode("jumped"));
+            Assert.AreEqual("O0209", this.StringEncoder.Encode("over"));
+            Assert.AreEqual("T60", this.StringEncoder.Encode("the"));
+            Assert.AreEqual("L7050", this.StringEncoder.Encode("lazy"));
+            Assert.AreEqual("D6043", this.StringEncoder.Encode("dogs"));
+
+            // Testing CODEC-56
+            Assert.AreEqual("D6043", RefinedSoundex.US_ENGLISH.Encode("dogs"));
+        }
+
+        [Test]
+        public void TestGetMappingCodeNonLetter()
+        {
+            char code = this.StringEncoder.GetMappingCode('#');
+            Assert.AreEqual(0, code, "Code does not equals zero");
+        }
+
+        [Test]
+        public void TestNewInstance()
+        {
+            Assert.AreEqual("D6043", new RefinedSoundex().GetSoundex("dogs"));
+        }
+
+        [Test]
+        public void TestNewInstance2()
+        {
+            Assert.AreEqual("D6043", new RefinedSoundex(RefinedSoundex.US_ENGLISH_MAPPING_STRING.toCharArray()).GetSoundex("dogs"));
+        }
+
+        [Test]
+        public void TestNewInstance3()
+        {
+            Assert.AreEqual("D6043", new RefinedSoundex(RefinedSoundex.US_ENGLISH_MAPPING_STRING).GetSoundex("dogs"));
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Tests.Analysis.Phonetic/Language/SoundexTest.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Phonetic/Language/SoundexTest.cs b/src/Lucene.Net.Tests.Analysis.Phonetic/Language/SoundexTest.cs
new file mode 100644
index 0000000..5cc01ec
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Phonetic/Language/SoundexTest.cs
@@ -0,0 +1,424 @@
+// commons-codec version compatibility level: 1.10
+using NUnit.Framework;
+using System;
+
+namespace Lucene.Net.Analysis.Phonetic.Language
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Tests <see cref="Soundex"/>
+    /// </summary>
+    public class SoundexTest : StringEncoderAbstractTest<Soundex>
+    {
+        protected override Soundex CreateStringEncoder()
+        {
+            return new Soundex();
+        }
+
+        [Test]
+        public void TestB650()
+        {
+            this.CheckEncodingVariations("B650", new string[]{
+            "BARHAM",
+            "BARONE",
+            "BARRON",
+            "BERNA",
+            "BIRNEY",
+            "BIRNIE",
+            "BOOROM",
+            "BOREN",
+            "BORN",
+            "BOURN",
+            "BOURNE",
+            "BOWRON",
+            "BRAIN",
+            "BRAME",
+            "BRANN",
+            "BRAUN",
+            "BREEN",
+            "BRIEN",
+            "BRIM",
+            "BRIMM",
+            "BRINN",
+            "BRION",
+            "BROOM",
+            "BROOME",
+            "BROWN",
+            "BROWNE",
+            "BRUEN",
+            "BRUHN",
+            "BRUIN",
+            "BRUMM",
+            "BRUN",
+            "BRUNO",
+            "BRYAN",
+            "BURIAN",
+            "BURN",
+            "BURNEY",
+            "BYRAM",
+            "BYRNE",
+            "BYRON",
+            "BYRUM"});
+        }
+
+        [Test]
+        public void TestBadCharacters()
+        {
+            Assert.AreEqual("H452", this.StringEncoder.Encode("HOL>MES"));
+
+        }
+
+        [Test]
+        public void TestDifference()
+        {
+            // Edge cases
+            Assert.AreEqual(0, this.StringEncoder.Difference(null, null));
+            Assert.AreEqual(0, this.StringEncoder.Difference("", ""));
+            Assert.AreEqual(0, this.StringEncoder.Difference(" ", " "));
+            // Normal cases
+            Assert.AreEqual(4, this.StringEncoder.Difference("Smith", "Smythe"));
+            Assert.AreEqual(2, this.StringEncoder.Difference("Ann", "Andrew"));
+            Assert.AreEqual(1, this.StringEncoder.Difference("Margaret", "Andrew"));
+            Assert.AreEqual(0, this.StringEncoder.Difference("Janet", "Margaret"));
+            // Examples from http://msdn.microsoft.com/library/default.asp?url=/library/en-us/tsqlref/ts_de-dz_8co5.asp
+            Assert.AreEqual(4, this.StringEncoder.Difference("Green", "Greene"));
+            Assert.AreEqual(0, this.StringEncoder.Difference("Blotchet-Halls", "Greene"));
+            // Examples from http://msdn.microsoft.com/library/default.asp?url=/library/en-us/tsqlref/ts_setu-sus_3o6w.asp
+            Assert.AreEqual(4, this.StringEncoder.Difference("Smith", "Smythe"));
+            Assert.AreEqual(4, this.StringEncoder.Difference("Smithers", "Smythers"));
+            Assert.AreEqual(2, this.StringEncoder.Difference("Anothers", "Brothers"));
+        }
+
+        [Test]
+        public void TestEncodeBasic()
+        {
+            Assert.AreEqual("T235", this.StringEncoder.Encode("testing"));
+            Assert.AreEqual("T000", this.StringEncoder.Encode("The"));
+            Assert.AreEqual("Q200", this.StringEncoder.Encode("quick"));
+            Assert.AreEqual("B650", this.StringEncoder.Encode("brown"));
+            Assert.AreEqual("F200", this.StringEncoder.Encode("fox"));
+            Assert.AreEqual("J513", this.StringEncoder.Encode("jumped"));
+            Assert.AreEqual("O160", this.StringEncoder.Encode("over"));
+            Assert.AreEqual("T000", this.StringEncoder.Encode("the"));
+            Assert.AreEqual("L200", this.StringEncoder.Encode("lazy"));
+            Assert.AreEqual("D200", this.StringEncoder.Encode("dogs"));
+        }
+
+        /**
+         * Examples from http://www.bradandkathy.com/genealogy/overviewofsoundex.html
+         */
+        [Test]
+        public void RestEncodeBatch2()
+        {
+            Assert.AreEqual("A462", this.StringEncoder.Encode("Allricht"));
+            Assert.AreEqual("E166", this.StringEncoder.Encode("Eberhard"));
+            Assert.AreEqual("E521", this.StringEncoder.Encode("Engebrethson"));
+            Assert.AreEqual("H512", this.StringEncoder.Encode("Heimbach"));
+            Assert.AreEqual("H524", this.StringEncoder.Encode("Hanselmann"));
+            Assert.AreEqual("H431", this.StringEncoder.Encode("Hildebrand"));
+            Assert.AreEqual("K152", this.StringEncoder.Encode("Kavanagh"));
+            Assert.AreEqual("L530", this.StringEncoder.Encode("Lind"));
+            Assert.AreEqual("L222", this.StringEncoder.Encode("Lukaschowsky"));
+            Assert.AreEqual("M235", this.StringEncoder.Encode("McDonnell"));
+            Assert.AreEqual("M200", this.StringEncoder.Encode("McGee"));
+            Assert.AreEqual("O155", this.StringEncoder.Encode("Opnian"));
+            Assert.AreEqual("O155", this.StringEncoder.Encode("Oppenheimer"));
+            Assert.AreEqual("R355", this.StringEncoder.Encode("Riedemanas"));
+            Assert.AreEqual("Z300", this.StringEncoder.Encode("Zita"));
+            Assert.AreEqual("Z325", this.StringEncoder.Encode("Zitzmeinn"));
+        }
+
+        /**
+         * Examples from http://www.archives.gov/research_room/genealogy/census/soundex.html
+         */
+        [Test]
+        public void TestEncodeBatch3()
+        {
+            Assert.AreEqual("W252", this.StringEncoder.Encode("Washington"));
+            Assert.AreEqual("L000", this.StringEncoder.Encode("Lee"));
+            Assert.AreEqual("G362", this.StringEncoder.Encode("Gutierrez"));
+            Assert.AreEqual("P236", this.StringEncoder.Encode("Pfister"));
+            Assert.AreEqual("J250", this.StringEncoder.Encode("Jackson"));
+            Assert.AreEqual("T522", this.StringEncoder.Encode("Tymczak"));
+            // For VanDeusen: D-250 (D, 2 for the S, 5 for the N, 0 added) is also
+            // possible.
+            Assert.AreEqual("V532", this.StringEncoder.Encode("VanDeusen"));
+        }
+
+        /**
+         * Examples from: http://www.myatt.demon.co.uk/sxalg.htm
+         */
+        [Test]
+        public void TestEncodeBatch4()
+        {
+            Assert.AreEqual("H452", this.StringEncoder.Encode("HOLMES"));
+            Assert.AreEqual("A355", this.StringEncoder.Encode("ADOMOMI"));
+            Assert.AreEqual("V536", this.StringEncoder.Encode("VONDERLEHR"));
+            Assert.AreEqual("B400", this.StringEncoder.Encode("BALL"));
+            Assert.AreEqual("S000", this.StringEncoder.Encode("SHAW"));
+            Assert.AreEqual("J250", this.StringEncoder.Encode("JACKSON"));
+            Assert.AreEqual("S545", this.StringEncoder.Encode("SCANLON"));
+            Assert.AreEqual("S532", this.StringEncoder.Encode("SAINTJOHN"));
+
+        }
+
+        [Test]
+        public void TestEncodeIgnoreApostrophes()
+        {
+            this.CheckEncodingVariations("O165", new string[]{
+            "OBrien",
+            "'OBrien",
+            "O'Brien",
+            "OB'rien",
+            "OBr'ien",
+            "OBri'en",
+            "OBrie'n",
+            "OBrien'"});
+        }
+
+        /**
+         * Test data from http://www.myatt.demon.co.uk/sxalg.htm
+         *
+         * @throws EncoderException
+         */
+        [Test]
+        public void TestEncodeIgnoreHyphens()
+        {
+            this.CheckEncodingVariations("K525", new String[]{
+            "KINGSMITH",
+            "-KINGSMITH",
+            "K-INGSMITH",
+            "KI-NGSMITH",
+            "KIN-GSMITH",
+            "KING-SMITH",
+            "KINGS-MITH",
+            "KINGSM-ITH",
+            "KINGSMI-TH",
+            "KINGSMIT-H",
+            "KINGSMITH-"});
+        }
+
+        [Test]
+        public void TestEncodeIgnoreTrimmable()
+        {
+            Assert.AreEqual("W252", this.StringEncoder.Encode(" \t\n\r Washington \t\n\r "));
+        }
+
+        /**
+         * Consonants from the same code group separated by W or H are treated as one.
+         */
+        [Test]
+        public void TestHWRuleEx1()
+        {
+            // From
+            // http://www.archives.gov/research_room/genealogy/census/soundex.html:
+            // Ashcraft is coded A-261 (A, 2 for the S, C ignored, 6 for the R, 1
+            // for the F). It is not coded A-226.
+            Assert.AreEqual("A261", this.StringEncoder.Encode("Ashcraft"));
+        }
+
+        /**
+         * Consonants from the same code group separated by W or H are treated as one.
+         *
+         * Test data from http://www.myatt.demon.co.uk/sxalg.htm
+         */
+        [Test]
+        public void TestHWRuleEx2()
+        {
+            Assert.AreEqual("B312", this.StringEncoder.Encode("BOOTHDAVIS"));
+            Assert.AreEqual("B312", this.StringEncoder.Encode("BOOTH-DAVIS"));
+        }
+
+        /**
+         * Consonants from the same code group separated by W or H are treated as one.
+         *
+         * @throws EncoderException
+         */
+        [Test]
+        public void TestHWRuleEx3()
+        {
+            Assert.AreEqual("S460", this.StringEncoder.Encode("Sgler"));
+            Assert.AreEqual("S460", this.StringEncoder.Encode("Swhgler"));
+            // Also S460:
+            this.CheckEncodingVariations("S460", new String[]{
+            "SAILOR",
+            "SALYER",
+            "SAYLOR",
+            "SCHALLER",
+            "SCHELLER",
+            "SCHILLER",
+            "SCHOOLER",
+            "SCHULER",
+            "SCHUYLER",
+            "SEILER",
+            "SEYLER",
+            "SHOLAR",
+            "SHULER",
+            "SILAR",
+            "SILER",
+            "SILLER"});
+        }
+
+        /**
+         * Examples for MS SQLServer from
+         * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/tsqlref/ts_setu-sus_3o6w.asp
+         */
+        [Test]
+        public void TestMsSqlServer1()
+        {
+            Assert.AreEqual("S530", this.StringEncoder.Encode("Smith"));
+            Assert.AreEqual("S530", this.StringEncoder.Encode("Smythe"));
+        }
+
+        /**
+         * Examples for MS SQLServer from
+         * http://support.microsoft.com/default.aspx?scid=http://support.microsoft.com:80/support
+         * /kb/articles/Q100/3/65.asp&NoWebContent=1
+         *
+         * @throws EncoderException
+         */
+        [Test]
+        public void TestMsSqlServer2()
+        {
+            this.CheckEncodingVariations("E625", new String[] { "Erickson", "Erickson", "Erikson", "Ericson", "Ericksen", "Ericsen" });
+        }
+
+        /**
+         * Examples for MS SQLServer from http://databases.about.com/library/weekly/aa042901a.htm
+         */
+        [Test]
+        public void TestMsSqlServer3()
+        {
+            Assert.AreEqual("A500", this.StringEncoder.Encode("Ann"));
+            Assert.AreEqual("A536", this.StringEncoder.Encode("Andrew"));
+            Assert.AreEqual("J530", this.StringEncoder.Encode("Janet"));
+            Assert.AreEqual("M626", this.StringEncoder.Encode("Margaret"));
+            Assert.AreEqual("S315", this.StringEncoder.Encode("Steven"));
+            Assert.AreEqual("M240", this.StringEncoder.Encode("Michael"));
+            Assert.AreEqual("R163", this.StringEncoder.Encode("Robert"));
+            Assert.AreEqual("L600", this.StringEncoder.Encode("Laura"));
+            Assert.AreEqual("A500", this.StringEncoder.Encode("Anne"));
+        }
+
+        /**
+         * https://issues.apache.org/jira/browse/CODEC-54 https://issues.apache.org/jira/browse/CODEC-56
+         */
+        [Test]
+        public void TestNewInstance()
+        {
+            Assert.AreEqual("W452", new Soundex().GetSoundex("Williams"));
+        }
+
+        [Test]
+        public void TestNewInstance2()
+        {
+            Assert.AreEqual("W452", new Soundex(Soundex.US_ENGLISH_MAPPING_STRING.toCharArray()).GetSoundex("Williams"));
+        }
+
+        [Test]
+        public void TestNewInstance3()
+        {
+            Assert.AreEqual("W452", new Soundex(Soundex.US_ENGLISH_MAPPING_STRING).GetSoundex("Williams"));
+        }
+
+        [Test]
+        public void TestSoundexUtilsConstructable()
+        {
+            new SoundexUtils();
+        }
+
+        [Test]
+        public void TestSoundexUtilsNullBehaviour()
+        {
+            Assert.AreEqual(null, SoundexUtils.Clean(null));
+            Assert.AreEqual("", SoundexUtils.Clean(""));
+            Assert.AreEqual(0, SoundexUtils.DifferenceEncoded(null, ""));
+            Assert.AreEqual(0, SoundexUtils.DifferenceEncoded("", null));
+        }
+
+        /**
+         * https://issues.apache.org/jira/browse/CODEC-54 https://issues.apache.org/jira/browse/CODEC-56
+         */
+        [Test]
+        public void TestUsEnglishStatic()
+        {
+            Assert.AreEqual("W452", Soundex.US_ENGLISH.GetSoundex("Williams"));
+        }
+
+        /**
+         * Fancy characters are not mapped by the default US mapping.
+         *
+         * http://issues.apache.org/bugzilla/show_bug.cgi?id=29080
+         */
+        [Test]
+        public void TestUsMappingEWithAcute()
+        {
+            Assert.AreEqual("E000", this.StringEncoder.Encode("e"));
+            if (char.IsLetter('\u00e9'))
+            { // e-acute
+                try
+                {
+                    //         uppercase E-acute
+                    Assert.AreEqual("\u00c9000", this.StringEncoder.Encode("\u00e9"));
+                    Assert.Fail("Expected IllegalArgumentException not thrown");
+                }
+#pragma warning disable 168
+                catch (ArgumentException e)
+#pragma warning restore 168
+                {
+                    // expected
+                }
+            }
+            else
+            {
+                Assert.AreEqual("", this.StringEncoder.Encode("\u00e9"));
+            }
+        }
+
+        /**
+         * Fancy characters are not mapped by the default US mapping.
+         *
+         * http://issues.apache.org/bugzilla/show_bug.cgi?id=29080
+         */
+        [Test]
+        public void TestUsMappingOWithDiaeresis()
+        {
+            Assert.AreEqual("O000", this.StringEncoder.Encode("o"));
+            if (char.IsLetter('\u00f6'))
+            { // o-umlaut
+                try
+                {
+                    //         uppercase O-umlaut
+                    Assert.AreEqual("\u00d6000", this.StringEncoder.Encode("\u00f6"));
+                    Assert.Fail("Expected IllegalArgumentException not thrown");
+                }
+#pragma warning disable 168
+                catch (ArgumentException e)
+#pragma warning restore 168
+                {
+                    // expected
+                }
+            }
+            else
+            {
+                Assert.AreEqual("", this.StringEncoder.Encode("\u00f6"));
+            }
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Tests.Analysis.Phonetic/Language/StringEncoderAbstractTest.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Phonetic/Language/StringEncoderAbstractTest.cs b/src/Lucene.Net.Tests.Analysis.Phonetic/Language/StringEncoderAbstractTest.cs
new file mode 100644
index 0000000..8fd8b7f
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Phonetic/Language/StringEncoderAbstractTest.cs
@@ -0,0 +1,164 @@
+using NUnit.Framework;
+using System;
+using System.Globalization;
+using System.Threading;
+
+namespace Lucene.Net.Analysis.Phonetic.Language
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    public abstract class StringEncoderAbstractTest<T>
+        where T : IStringEncoder
+    {
+        protected T stringEncoder;
+
+        [SetUp]
+        public void SetUp()
+        {
+            stringEncoder = this.CreateStringEncoder();
+        }
+
+        public virtual void CheckEncoding(string expected, string source)
+        {
+            Assert.AreEqual(expected, this.StringEncoder.Encode(source), "Source: " + source);
+        }
+
+        protected virtual void CheckEncodings(string[][] data)
+        {
+            foreach (string[]
+                element in data)
+            {
+                this.CheckEncoding(element[1], element[0]);
+            }
+        }
+
+        protected virtual void CheckEncodingVariations(string expected, string[] data)
+        {
+            foreach (string element in data)
+            {
+                this.CheckEncoding(expected, element);
+            }
+        }
+
+        protected abstract T CreateStringEncoder();
+
+        public virtual T StringEncoder
+        {
+            get { return this.stringEncoder; }
+        }
+
+        [Test]
+        public virtual void TestEncodeEmpty()
+        {
+            IStringEncoder encoder = this.StringEncoder;
+            encoder.Encode("");
+            encoder.Encode(" ");
+            encoder.Encode("\t");
+        }
+
+        // LUCENENET specific - since strings are sealed in .NET, there
+        // is no point in implementing IEncoder or running these tests.
+        // Our version only accepts strings 
+        [Test]
+        public virtual void TestEncodeNull()
+        {
+            IStringEncoder encoder = this.StringEncoder;
+            try
+            {
+                encoder.Encode(null);
+            }
+#pragma warning disable 168
+            catch (/*Encoder*/Exception ee)
+#pragma warning restore 168
+            {
+                // An exception should be thrown
+            }
+        }
+
+        //[Test]
+        //public virtual void TestEncodeWithInvalidObject()
+        //{
+        //    bool exceptionThrown = false;
+        //    try
+        //    {
+        //        IStringEncoder encoder = this.StringEncoder;
+        //        encoder.Encode(3.4f);
+        //    }
+        //    catch (Exception e)
+        //    {
+        //        exceptionThrown = true;
+        //    }
+        //    Assert.True(exceptionThrown, "An exception was not thrown when we tried to encode " + "a Float object");
+        //}
+
+        [Test]
+        public virtual void TestLocaleIndependence()
+        {
+            IStringEncoder encoder = this.StringEncoder;
+
+            string[]
+            data = { "I", "i", };
+
+            CultureInfo orig = CultureInfo.CurrentCulture;
+            CultureInfo[] locales = { new CultureInfo("en"), new CultureInfo("tr"), CultureInfo.CurrentCulture };
+
+            try
+            {
+                foreach (string element in data)
+                {
+                    string @ref = null;
+                    for (int j = 0; j < locales.Length; j++)
+                    {
+                        //Locale.setDefault(locales[j]);
+#if NETSTANDARD
+                        CultureInfo.CurrentCulture = locales[j];
+#else
+                        Thread.CurrentThread.CurrentCulture = locales[j];
+#endif
+                        if (j <= 0)
+                        {
+                            @ref = encoder.Encode(element);
+                        }
+                        else
+                        {
+                            string cur = null;
+                            try
+                            {
+                                cur = encoder.Encode(element);
+                            }
+                            catch (Exception e)
+                            {
+                                Assert.Fail(CultureInfo.CurrentCulture.ToString() + ": " + e.Message);
+                            }
+                            Assert.AreEqual(@ref, cur, CultureInfo.CurrentCulture.ToString() + ": ");
+                        }
+                    }
+                }
+            }
+            finally
+            {
+                //Locale.setDefault(orig);
+#if NETSTANDARD
+                CultureInfo.CurrentCulture = orig;
+#else
+                Thread.CurrentThread.CurrentCulture = orig;
+#endif
+            }
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Tests.Analysis.Phonetic/Lucene.Net.Tests.Analysis.Phonetic.csproj
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Phonetic/Lucene.Net.Tests.Analysis.Phonetic.csproj b/src/Lucene.Net.Tests.Analysis.Phonetic/Lucene.Net.Tests.Analysis.Phonetic.csproj
new file mode 100644
index 0000000..5c38e1f
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Phonetic/Lucene.Net.Tests.Analysis.Phonetic.csproj
@@ -0,0 +1,108 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="14.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" />
+  <PropertyGroup>
+    <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
+    <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
+    <ProjectGuid>{A2867797-0A5D-4878-8F59-58C399C9A4E4}</ProjectGuid>
+    <OutputType>Library</OutputType>
+    <AppDesignerFolder>Properties</AppDesignerFolder>
+    <RootNamespace>Lucene.Net.Analysis.Phonetic</RootNamespace>
+    <AssemblyName>Lucene.Net.Tests.Analysis.Phonetic</AssemblyName>
+    <TargetFrameworkVersion>v4.5.1</TargetFrameworkVersion>
+    <FileAlignment>512</FileAlignment>
+  </PropertyGroup>
+  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
+    <DebugSymbols>true</DebugSymbols>
+    <DebugType>full</DebugType>
+    <Optimize>false</Optimize>
+    <OutputPath>bin\Debug\</OutputPath>
+    <DefineConstants>DEBUG;TRACE</DefineConstants>
+    <ErrorReport>prompt</ErrorReport>
+    <WarningLevel>4</WarningLevel>
+  </PropertyGroup>
+  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
+    <DebugType>pdbonly</DebugType>
+    <Optimize>true</Optimize>
+    <OutputPath>bin\Release\</OutputPath>
+    <DefineConstants>TRACE</DefineConstants>
+    <ErrorReport>prompt</ErrorReport>
+    <WarningLevel>4</WarningLevel>
+  </PropertyGroup>
+  <PropertyGroup>
+    <DefineConstants>$(DefineConstants);FEATURE_SERIALIZABLE</DefineConstants>
+  </PropertyGroup>
+  <ItemGroup>
+    <Reference Include="System" />
+    <Reference Include="System.Core" />
+    <Reference Include="System.Xml.Linq" />
+    <Reference Include="System.Data.DataSetExtensions" />
+    <Reference Include="Microsoft.CSharp" />
+    <Reference Include="System.Data" />
+    <Reference Include="System.Net.Http" />
+    <Reference Include="System.Xml" />
+  </ItemGroup>
+  <ItemGroup>
+    <Compile Include="DoubleMetaphoneFilterTest.cs" />
+    <Compile Include="Language\Bm\BeiderMorseEncoderTest.cs" />
+    <Compile Include="Language\Bm\CacheSubSequencePerformanceTest.cs" />
+    <Compile Include="Language\Bm\LanguageGuessingTest.cs" />
+    <Compile Include="Language\Bm\PhoneticEnginePerformanceTest.cs" />
+    <Compile Include="Language\Bm\PhoneticEngineRegressionTest.cs" />
+    <Compile Include="Language\Bm\PhoneticEngineTest.cs" />
+    <Compile Include="Language\Bm\RuleTest.cs" />
+    <Compile Include="Language\Caverphone1Test.cs" />
+    <Compile Include="Language\Caverphone2Test .cs" />
+    <Compile Include="Language\ColognePhoneticTest.cs" />
+    <Compile Include="Language\DaitchMokotoffSoundexTest.cs" />
+    <Compile Include="Language\DoubleMetaphone2Test.cs" />
+    <Compile Include="Language\DoubleMetaphoneTest.cs" />
+    <Compile Include="Language\MatchRatingApproachEncoderTest.cs" />
+    <Compile Include="Language\MetaphoneTest.cs" />
+    <Compile Include="Language\NysiisTest.cs" />
+    <Compile Include="Language\RefinedSoundexTest.cs" />
+    <Compile Include="Language\SoundexTest.cs" />
+    <Compile Include="Language\StringEncoderAbstractTest.cs" />
+    <Compile Include="Properties\AssemblyInfo.cs" />
+    <Compile Include="TestBeiderMorseFilter.cs" />
+    <Compile Include="TestBeiderMorseFilterFactory.cs" />
+    <Compile Include="TestDoubleMetaphoneFilterFactory.cs" />
+    <Compile Include="TestPhoneticFilter.cs" />
+    <Compile Include="TestPhoneticFilterFactory.cs" />
+    <Compile Include="..\CommonAssemblyInfo.cs">
+      <Link>Properties\CommonAssemblyInfo.cs</Link>
+    </Compile>
+  </ItemGroup>
+  <ItemGroup>
+    <ProjectReference Include="..\Lucene.Net.Analysis.Common\Lucene.Net.Analysis.Common.csproj">
+      <Project>{4ADD0BBC-B900-4715-9526-D871DE8EEA64}</Project>
+      <Name>Lucene.Net.Analysis.Common</Name>
+    </ProjectReference>
+    <ProjectReference Include="..\Lucene.Net.Analysis.Phonetic\Lucene.Net.Analysis.Phonetic.csproj">
+      <Project>{DAFE3B64-616A-4A2F-90E5-1F135E8A9AF5}</Project>
+      <Name>Lucene.Net.Analysis.Phonetic</Name>
+    </ProjectReference>
+    <ProjectReference Include="..\Lucene.Net.TestFramework\Lucene.Net.TestFramework.csproj">
+      <Project>{b2c0d749-ce34-4f62-a15e-00cb2ff5ddb3}</Project>
+      <Name>Lucene.Net.TestFramework</Name>
+    </ProjectReference>
+    <ProjectReference Include="..\Lucene.Net\Lucene.Net.csproj">
+      <Project>{5D4AD9BE-1FFB-41AB-9943-25737971BF57}</Project>
+      <Name>Lucene.Net</Name>
+    </ProjectReference>
+  </ItemGroup>
+  <ItemGroup>
+    <None Include="Lucene.Net.Tests.Analysis.Phonetic.project.json" />
+  </ItemGroup>
+  <ItemGroup>
+    <Service Include="{82A7F48D-3B50-4B1E-B82E-3ADA8210C358}" />
+  </ItemGroup>
+  <Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
+  <!-- To modify your build process, add your task inside one of the targets below and uncomment it. 
+       Other similar extension points exist, see Microsoft.Common.targets.
+  <Target Name="BeforeBuild">
+  </Target>
+  <Target Name="AfterBuild">
+  </Target>
+  -->
+</Project>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Tests.Analysis.Phonetic/Lucene.Net.Tests.Analysis.Phonetic.project.json
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Phonetic/Lucene.Net.Tests.Analysis.Phonetic.project.json b/src/Lucene.Net.Tests.Analysis.Phonetic/Lucene.Net.Tests.Analysis.Phonetic.project.json
new file mode 100644
index 0000000..8c631ab
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Phonetic/Lucene.Net.Tests.Analysis.Phonetic.project.json
@@ -0,0 +1,11 @@
+{
+  "runtimes": {
+    "win": {}
+  },
+  "dependencies": {
+    "NUnit": "3.5.0"
+  },
+  "frameworks": {
+    "net451": {}
+  }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Tests.Analysis.Phonetic/Lucene.Net.Tests.Analysis.Phonetic.xproj
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Phonetic/Lucene.Net.Tests.Analysis.Phonetic.xproj b/src/Lucene.Net.Tests.Analysis.Phonetic/Lucene.Net.Tests.Analysis.Phonetic.xproj
new file mode 100644
index 0000000..16b7fef
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Phonetic/Lucene.Net.Tests.Analysis.Phonetic.xproj
@@ -0,0 +1,21 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="14.0.25420" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <PropertyGroup>
+    <VisualStudioVersion Condition="'$(VisualStudioVersion)' == ''">14.0.25420</VisualStudioVersion>
+    <VSToolsPath Condition="'$(VSToolsPath)' == ''">$(MSBuildExtensionsPath32)\Microsoft\VisualStudio\v$(VisualStudioVersion)</VSToolsPath>
+  </PropertyGroup>
+  <Import Project="$(VSToolsPath)\DotNet\Microsoft.DotNet.Props" Condition="'$(VSToolsPath)' != ''" />
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>1fe12ef7-4c89-4d49-bdd1-e49dc285f21b</ProjectGuid>
+    <RootNamespace>Lucene.Net.Tests.Analysis.Phonetic</RootNamespace>
+    <BaseIntermediateOutputPath Condition="'$(BaseIntermediateOutputPath)'=='' ">.\obj</BaseIntermediateOutputPath>
+    <OutputPath Condition="'$(OutputPath)'=='' ">.\bin\</OutputPath>
+  </PropertyGroup>
+  <PropertyGroup>
+    <SchemaVersion>2.0</SchemaVersion>
+  </PropertyGroup>
+  <ItemGroup>
+    <Service Include="{82a7f48d-3b50-4b1e-b82e-3ada8210c358}" />
+  </ItemGroup>
+  <Import Project="$(VSToolsPath)\DotNet\Microsoft.DotNet.targets" Condition="'$(VSToolsPath)' != ''" />
+</Project>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Tests.Analysis.Phonetic/Properties/AssemblyInfo.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Phonetic/Properties/AssemblyInfo.cs b/src/Lucene.Net.Tests.Analysis.Phonetic/Properties/AssemblyInfo.cs
new file mode 100644
index 0000000..14e5b1c
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Phonetic/Properties/AssemblyInfo.cs
@@ -0,0 +1,42 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+*/
+
+using System.Reflection;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+
+// General Information about an assembly is controlled through the following 
+// set of attributes. Change these attribute values to modify the information
+// associated with an assembly.
+[assembly: AssemblyTitle("Lucene.Net.Tests.Analysis.Phonetic")]
+[assembly: AssemblyDescription("")]
+[assembly: AssemblyConfiguration("")]
+[assembly: AssemblyCulture("")]
+
+// Setting ComVisible to false makes the types in this assembly not visible 
+// to COM components.  If you need to access a type in this assembly from 
+// COM, set the ComVisible attribute to true on that type.
+[assembly: ComVisible(false)]
+
+// The following GUID is for the ID of the typelib if this project is exposed to COM
+[assembly: Guid("a2867797-0a5d-4878-8f59-58c399c9a4e4")]
+
+// NOTE: Version information is in CommonAssemblyInfo.cs

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Tests.Analysis.Phonetic/TestBeiderMorseFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Phonetic/TestBeiderMorseFilter.cs b/src/Lucene.Net.Tests.Analysis.Phonetic/TestBeiderMorseFilter.cs
new file mode 100644
index 0000000..cc0e897
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Phonetic/TestBeiderMorseFilter.cs
@@ -0,0 +1,132 @@
+using Lucene.Net.Analysis.Core;
+using Lucene.Net.Analysis.Miscellaneous;
+using Lucene.Net.Analysis.Phonetic.Language.Bm;
+using Lucene.Net.Analysis.TokenAttributes;
+using NUnit.Framework;
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Text.RegularExpressions;
+
+namespace Lucene.Net.Analysis.Phonetic
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Tests <see cref="BeiderMorseFilter"/>
+    /// </summary>
+    public class TestBeiderMorseFilter : BaseTokenStreamTestCase
+    {
+        private Analyzer analyzer = Analyzer.NewAnonymous(createComponents: (fieldName, reader) =>
+        {
+            Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+            return new TokenStreamComponents(tokenizer,
+                new BeiderMorseFilter(tokenizer, new PhoneticEngine(NameType.GENERIC, RuleType.EXACT, true)));
+        });
+
+
+        /** generic, "exact" configuration */
+        [Test]
+        public void TestBasicUsage()
+        {
+            AssertAnalyzesTo(analyzer, "Angelo",
+            new String[] { "anZelo", "andZelo", "angelo", "anhelo", "anjelo", "anxelo" },
+            new int[] { 0, 0, 0, 0, 0, 0 },
+            new int[] { 6, 6, 6, 6, 6, 6 },
+            new int[] { 1, 0, 0, 0, 0, 0 });
+
+
+            AssertAnalyzesTo(analyzer, "D'Angelo",
+                new String[] { "anZelo", "andZelo", "angelo", "anhelo", "anjelo", "anxelo",
+                  "danZelo", "dandZelo", "dangelo", "danhelo", "danjelo", "danxelo" },
+                new int[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+                new int[] { 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8 },
+                new int[] { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 });
+        }
+
+        /** restrict the output to a set of possible origin languages */
+        [Test]
+        public void TestLanguageSet()
+        {
+            LanguageSet languages = LanguageSet.From(new HashSet<String>() {
+                "italian", "greek", "spanish"
+            });
+            Analyzer analyzer = Analyzer.NewAnonymous(createComponents: (fieldName, reader) =>
+            {
+                Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+                return new TokenStreamComponents(tokenizer,
+                    new BeiderMorseFilter(tokenizer,
+                        new PhoneticEngine(NameType.GENERIC, RuleType.EXACT, true), languages));
+            });
+
+            AssertAnalyzesTo(analyzer, "Angelo",
+                new String[] { "andZelo", "angelo", "anxelo" },
+                new int[] { 0, 0, 0, },
+                new int[] { 6, 6, 6, },
+                new int[] { 1, 0, 0, });
+        }
+
+        /** for convenience, if the input yields no output, we pass it thru as-is */
+        [Test]
+        public void TestNumbers()
+        {
+            AssertAnalyzesTo(analyzer, "1234",
+                new String[] { "1234" },
+                new int[] { 0 },
+                new int[] { 4 },
+                new int[] { 1 });
+        }
+
+        [Test]
+        public void TestRandom()
+        {
+            CheckRandomData(Random(), analyzer, 1000 * RANDOM_MULTIPLIER);
+        }
+
+        [Test]
+        public void TestEmptyTerm()
+        {
+            Analyzer a = Analyzer.NewAnonymous(createComponents: (fieldName, reader) =>
+            {
+                Tokenizer tokenizer = new KeywordTokenizer(reader);
+                return new TokenStreamComponents(tokenizer, new BeiderMorseFilter(tokenizer, new PhoneticEngine(NameType.GENERIC, RuleType.EXACT, true)));
+            });
+
+            CheckOneTerm(a, "", "");
+        }
+
+        [Test]
+        public void TestCustomAttribute()
+        {
+            TokenStream stream = new KeywordTokenizer(new StringReader("D'Angelo"));
+            stream = new PatternKeywordMarkerFilter(stream, new Regex(".*"));
+            stream = new BeiderMorseFilter(stream, new PhoneticEngine(NameType.GENERIC, RuleType.EXACT, true));
+            IKeywordAttribute keyAtt = stream.AddAttribute<IKeywordAttribute>();
+            stream.Reset();
+            int i = 0;
+            while (stream.IncrementToken())
+            {
+                assertTrue(keyAtt.IsKeyword);
+                i++;
+            }
+            assertEquals(12, i);
+            stream.End();
+            stream.Dispose();
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Tests.Analysis.Phonetic/TestBeiderMorseFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Phonetic/TestBeiderMorseFilterFactory.cs b/src/Lucene.Net.Tests.Analysis.Phonetic/TestBeiderMorseFilterFactory.cs
new file mode 100644
index 0000000..5bdf1b7
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Phonetic/TestBeiderMorseFilterFactory.cs
@@ -0,0 +1,89 @@
+using Lucene.Net.Support;
+using NUnit.Framework;
+using System;
+using System.Collections.Generic;
+using System.IO;
+
+namespace Lucene.Net.Analysis.Phonetic
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Simple tests for <see cref="BeiderMorseFilterFactory"/>
+    /// </summary>
+    public class TestBeiderMorseFilterFactory : BaseTokenStreamTestCase
+    {
+        [Test]
+        public void TestBasics()
+        {
+            BeiderMorseFilterFactory factory = new BeiderMorseFilterFactory(new Dictionary<String, String>());
+            TokenStream ts = factory.Create(new MockTokenizer(new StringReader("Weinberg"), MockTokenizer.WHITESPACE, false));
+            AssertTokenStreamContents(ts,
+                new String[] { "vDnbirk", "vanbirk", "vinbirk", "wDnbirk", "wanbirk", "winbirk" },
+                new int[] { 0, 0, 0, 0, 0, 0 },
+                new int[] { 8, 8, 8, 8, 8, 8 },
+                new int[] { 1, 0, 0, 0, 0, 0 });
+        }
+
+        [Test]
+        public void TestLanguageSet()
+        {
+            IDictionary<String, String> args = new Dictionary<string, string>();
+            args.Put("languageSet", "polish");
+            BeiderMorseFilterFactory factory = new BeiderMorseFilterFactory(args);
+            TokenStream ts = factory.Create(new MockTokenizer(new StringReader("Weinberg"), MockTokenizer.WHITESPACE, false));
+            AssertTokenStreamContents(ts,
+                new String[] { "vDmbYrk", "vDmbirk", "vambYrk", "vambirk", "vimbYrk", "vimbirk" },
+                new int[] { 0, 0, 0, 0, 0, 0 },
+                new int[] { 8, 8, 8, 8, 8, 8 },
+                new int[] { 1, 0, 0, 0, 0, 0 });
+        }
+
+        [Test]
+        public void TestOptions()
+        {
+            IDictionary<String, String> args = new Dictionary<string, string>();
+            args.Put("nameType", "ASHKENAZI");
+            args.Put("ruleType", "EXACT");
+            BeiderMorseFilterFactory factory = new BeiderMorseFilterFactory(args);
+            TokenStream ts = factory.Create(new MockTokenizer(new StringReader("Weinberg"), MockTokenizer.WHITESPACE, false));
+            AssertTokenStreamContents(ts,
+                new String[] { "vajnberk" },
+                new int[] { 0 },
+                new int[] { 8 },
+                new int[] { 1 });
+        }
+
+        /** Test that bogus arguments result in exception */
+        [Test]
+        public void TestBogusArguments()
+        {
+            try
+            {
+                new BeiderMorseFilterFactory(new Dictionary<String, String>() {
+                    { "bogusArg", "bogusValue" }
+                });
+                fail();
+            }
+            catch (ArgumentException expected)
+            {
+                assertTrue(expected.Message.Contains("Unknown parameters"));
+            }
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Tests.Analysis.Phonetic/TestDoubleMetaphoneFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Phonetic/TestDoubleMetaphoneFilterFactory.cs b/src/Lucene.Net.Tests.Analysis.Phonetic/TestDoubleMetaphoneFilterFactory.cs
new file mode 100644
index 0000000..5ba337b
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Phonetic/TestDoubleMetaphoneFilterFactory.cs
@@ -0,0 +1,70 @@
+using NUnit.Framework;
+using System;
+using System.Collections.Generic;
+using System.IO;
+
+namespace Lucene.Net.Analysis.Phonetic
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    public class TestDoubleMetaphoneFilterFactory : BaseTokenStreamTestCase
+    {
+        [Test]
+        public void TestDefaults()
+        {
+            DoubleMetaphoneFilterFactory factory = new DoubleMetaphoneFilterFactory(new Dictionary<String, String>());
+            TokenStream inputStream = new MockTokenizer(new StringReader("international"), MockTokenizer.WHITESPACE, false);
+
+            TokenStream filteredStream = factory.Create(inputStream);
+            assertEquals(typeof(DoubleMetaphoneFilter), filteredStream.GetType());
+            AssertTokenStreamContents(filteredStream, new String[] { "international", "ANTR" });
+        }
+
+        [Test]
+        public void TestSettingSizeAndInject()
+        {
+            IDictionary<string, string> parameters = new Dictionary<string, string>();
+            parameters["inject"] = "false";
+            parameters["maxCodeLength"] = "8";
+            DoubleMetaphoneFilterFactory factory = new DoubleMetaphoneFilterFactory(parameters);
+
+            TokenStream inputStream = new MockTokenizer(new StringReader("international"), MockTokenizer.WHITESPACE, false);
+
+            TokenStream filteredStream = factory.Create(inputStream);
+            assertEquals(typeof(DoubleMetaphoneFilter), filteredStream.GetType());
+            AssertTokenStreamContents(filteredStream, new String[] { "ANTRNXNL" });
+        }
+
+        /** Test that bogus arguments result in exception */
+        [Test]
+        public void TestBogusArguments()
+        {
+            try
+            {
+                new DoubleMetaphoneFilterFactory(new Dictionary<String, String>() {
+                    { "bogusArg", "bogusValue" }
+                });
+                fail();
+            }
+            catch (ArgumentException expected)
+            {
+                assertTrue(expected.Message.Contains("Unknown parameters"));
+            }
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Tests.Analysis.Phonetic/TestPhoneticFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Phonetic/TestPhoneticFilter.cs b/src/Lucene.Net.Tests.Analysis.Phonetic/TestPhoneticFilter.cs
new file mode 100644
index 0000000..387765f
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Phonetic/TestPhoneticFilter.cs
@@ -0,0 +1,122 @@
+using Lucene.Net.Analysis.Core;
+using Lucene.Net.Analysis.Phonetic.Language;
+using NUnit.Framework;
+using System;
+using System.IO;
+
+namespace Lucene.Net.Analysis.Phonetic
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Tests <see cref="PhoneticFilter"/>
+    /// </summary>
+    public class TestPhoneticFilter : BaseTokenStreamTestCase
+    {
+        [Test]
+        public void TestAlgorithms()
+        {
+            assertAlgorithm(new Metaphone(), true, "aaa bbb ccc easgasg",
+                new String[] { "A", "aaa", "B", "bbb", "KKK", "ccc", "ESKS", "easgasg" });
+            assertAlgorithm(new Metaphone(), false, "aaa bbb ccc easgasg",
+                new String[] { "A", "B", "KKK", "ESKS" });
+
+
+            assertAlgorithm(new DoubleMetaphone(), true, "aaa bbb ccc easgasg",
+                new String[] { "A", "aaa", "PP", "bbb", "KK", "ccc", "ASKS", "easgasg" });
+            assertAlgorithm(new DoubleMetaphone(), false, "aaa bbb ccc easgasg",
+                new String[] { "A", "PP", "KK", "ASKS" });
+
+
+            assertAlgorithm(new Soundex(), true, "aaa bbb ccc easgasg",
+                new String[] { "A000", "aaa", "B000", "bbb", "C000", "ccc", "E220", "easgasg" });
+            assertAlgorithm(new Soundex(), false, "aaa bbb ccc easgasg",
+                new String[] { "A000", "B000", "C000", "E220" });
+
+
+            assertAlgorithm(new RefinedSoundex(), true, "aaa bbb ccc easgasg",
+                new String[] { "A0", "aaa", "B1", "bbb", "C3", "ccc", "E034034", "easgasg" });
+            assertAlgorithm(new RefinedSoundex(), false, "aaa bbb ccc easgasg",
+                new String[] { "A0", "B1", "C3", "E034034" });
+
+
+            assertAlgorithm(new Caverphone2(), true, "Darda Karleen Datha Carlene",
+                new String[] { "TTA1111111", "Darda", "KLN1111111", "Karleen",
+                    "TTA1111111", "Datha", "KLN1111111", "Carlene" });
+            assertAlgorithm(new Caverphone2(), false, "Darda Karleen Datha Carlene",
+                new String[] { "TTA1111111", "KLN1111111", "TTA1111111", "KLN1111111" });
+        }
+
+
+        static void assertAlgorithm(IStringEncoder encoder, bool inject, String input,
+            String[] expected)
+        {
+            Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT,
+            new StringReader(input));
+            PhoneticFilter filter = new PhoneticFilter(tokenizer, encoder, inject);
+            AssertTokenStreamContents(filter, expected);
+        }
+
+        /** blast some random strings through the analyzer */
+        [Test]
+        public void TestRandomStrings()
+        {
+            IStringEncoder[] encoders = new IStringEncoder[] {
+                new Metaphone(), new DoubleMetaphone(), new Soundex()/*, new RefinedSoundex()*/, new Caverphone2()
+            };
+
+            foreach (IStringEncoder e in encoders)
+            {
+                Analyzer a = Analyzer.NewAnonymous(createComponents: (fieldName, reader) =>
+                {
+                    Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+                    return new TokenStreamComponents(tokenizer, new PhoneticFilter(tokenizer, e, false));
+                });
+
+                CheckRandomData(Random(), a, 1000 * RANDOM_MULTIPLIER);
+
+                Analyzer b = Analyzer.NewAnonymous(createComponents: (fieldName, reader) =>
+                {
+                    Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+                    return new TokenStreamComponents(tokenizer, new PhoneticFilter(tokenizer, e, false));
+                });
+
+
+                CheckRandomData(Random(), b, 1000 * RANDOM_MULTIPLIER);
+            }
+        }
+
+        [Test]
+        public void TestEmptyTerm()
+        {
+            IStringEncoder[] encoders = new IStringEncoder[] {
+                new Metaphone(), new DoubleMetaphone(), new Soundex()/*, new RefinedSoundex()*/, new Caverphone2()
+            };
+            foreach (IStringEncoder e in encoders)
+            {
+                Analyzer a = Analyzer.NewAnonymous(createComponents: (fieldName, reader) =>
+                {
+                    Tokenizer tokenizer = new KeywordTokenizer(reader);
+                    return new TokenStreamComponents(tokenizer, new PhoneticFilter(tokenizer, e, Random().nextBoolean()));
+                });
+
+                CheckOneTerm(a, "", "");
+            }
+        }
+    }
+}