You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by ni...@apache.org on 2017/06/27 20:33:48 UTC
[03/15] lucenenet git commit: Added Lucene.Net.Analysis.Phonetic +
tests. Rather than porting over the entire commons-codec library,
only the language features were ported and added to this library.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Tests.Analysis.Phonetic/Language/MetaphoneTest.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Phonetic/Language/MetaphoneTest.cs b/src/Lucene.Net.Tests.Analysis.Phonetic/Language/MetaphoneTest.cs
new file mode 100644
index 0000000..18a9e59
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Phonetic/Language/MetaphoneTest.cs
@@ -0,0 +1,518 @@
+using NUnit.Framework;
+using System;
+
+namespace Lucene.Net.Analysis.Phonetic.Language
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ public class MetaphoneTest : StringEncoderAbstractTest<Metaphone>
+ {
+ public void AssertIsMetaphoneEqual(string source, string[] matches)
+ {
+ // match source to all matches
+ foreach (string matche in matches)
+ {
+ Assert.True(this.StringEncoder.IsMetaphoneEqual(source, matche),
+ "Source: " + source + ", should have same Metaphone as: " + matche);
+ }
+ // match to each other
+ foreach (string matche in matches)
+ {
+ foreach (string matche2 in matches)
+ {
+ Assert.True(this.StringEncoder.IsMetaphoneEqual(matche, matche2));
+ }
+ }
+ }
+
+ public void AssertMetaphoneEqual(String[][] pairs)
+ {
+ this.ValidateFixture(pairs);
+ foreach (String[] pair in pairs)
+ {
+ String name0 = pair[0];
+ String name1 = pair[1];
+ String failMsg = "Expected match between " + name0 + " and " + name1;
+ Assert.True(this.StringEncoder.IsMetaphoneEqual(name0, name1), failMsg);
+ Assert.True(this.StringEncoder.IsMetaphoneEqual(name1, name0), failMsg);
+ }
+ }
+
+
+ protected override Metaphone CreateStringEncoder()
+ {
+ return new Metaphone();
+ }
+
+ [Test]
+ public void TestIsMetaphoneEqual1()
+ {
+ this.AssertMetaphoneEqual(new String[][] { new string[] {
+ "Case", "case" }, new string[] {
+ "CASE", "Case" }, new string[] {
+ "caSe", "cAsE" }, new string[] {
+ "quick", "cookie" }
+ });
+ }
+
+ /**
+ * Matches computed from http://www.lanw.com/java/phonetic/default.htm
+ */
+ [Test]
+ public void TestIsMetaphoneEqual2()
+ {
+ this.AssertMetaphoneEqual(new String[][] { new string[] { "Lawrence", "Lorenza" }, new string[] {
+ "Gary", "Cahra" }, });
+ }
+
+ /**
+ * Initial AE case.
+ *
+ * Match data computed from http://www.lanw.com/java/phonetic/default.htm
+ */
+ [Test]
+ public void TestIsMetaphoneEqualAero()
+ {
+ this.AssertIsMetaphoneEqual("Aero", new String[] { "Eure" });
+ }
+
+ /**
+ * Initial WH case.
+ *
+ * Match data computed from http://www.lanw.com/java/phonetic/default.htm
+ */
+ [Test]
+ public void TestIsMetaphoneEqualWhite()
+ {
+ this.AssertIsMetaphoneEqual(
+ "White",
+ new String[] { "Wade", "Wait", "Waite", "Wat", "Whit", "Wiatt", "Wit", "Wittie", "Witty", "Wood", "Woodie", "Woody" });
+ }
+
+ /**
+ * Initial A, not followed by an E case.
+ *
+ * Match data computed from http://www.lanw.com/java/phonetic/default.htm
+ */
+ [Test]
+ public void TestIsMetaphoneEqualAlbert()
+ {
+ this.AssertIsMetaphoneEqual("Albert", new String[] { "Ailbert", "Alberik", "Albert", "Alberto", "Albrecht" });
+ }
+
+ /**
+ * Match data computed from http://www.lanw.com/java/phonetic/default.htm
+ */
+ [Test]
+ public void TestIsMetaphoneEqualGary()
+ {
+ this.AssertIsMetaphoneEqual(
+ "Gary",
+ new String[] {
+ "Cahra",
+ "Cara",
+ "Carey",
+ "Cari",
+ "Caria",
+ "Carie",
+ "Caro",
+ "Carree",
+ "Carri",
+ "Carrie",
+ "Carry",
+ "Cary",
+ "Cora",
+ "Corey",
+ "Cori",
+ "Corie",
+ "Correy",
+ "Corri",
+ "Corrie",
+ "Corry",
+ "Cory",
+ "Gray",
+ "Kara",
+ "Kare",
+ "Karee",
+ "Kari",
+ "Karia",
+ "Karie",
+ "Karrah",
+ "Karrie",
+ "Karry",
+ "Kary",
+ "Keri",
+ "Kerri",
+ "Kerrie",
+ "Kerry",
+ "Kira",
+ "Kiri",
+ "Kora",
+ "Kore",
+ "Kori",
+ "Korie",
+ "Korrie",
+ "Korry" });
+ }
+
+ /**
+ * Match data computed from http://www.lanw.com/java/phonetic/default.htm
+ */
+ [Test]
+ public void TestIsMetaphoneEqualJohn()
+ {
+ this.AssertIsMetaphoneEqual(
+ "John",
+ new String[] {
+ "Gena",
+ "Gene",
+ "Genia",
+ "Genna",
+ "Genni",
+ "Gennie",
+ "Genny",
+ "Giana",
+ "Gianna",
+ "Gina",
+ "Ginni",
+ "Ginnie",
+ "Ginny",
+ "Jaine",
+ "Jan",
+ "Jana",
+ "Jane",
+ "Janey",
+ "Jania",
+ "Janie",
+ "Janna",
+ "Jany",
+ "Jayne",
+ "Jean",
+ "Jeana",
+ "Jeane",
+ "Jeanie",
+ "Jeanna",
+ "Jeanne",
+ "Jeannie",
+ "Jen",
+ "Jena",
+ "Jeni",
+ "Jenn",
+ "Jenna",
+ "Jennee",
+ "Jenni",
+ "Jennie",
+ "Jenny",
+ "Jinny",
+ "Jo Ann",
+ "Jo-Ann",
+ "Jo-Anne",
+ "Joan",
+ "Joana",
+ "Joane",
+ "Joanie",
+ "Joann",
+ "Joanna",
+ "Joanne",
+ "Joeann",
+ "Johna",
+ "Johnna",
+ "Joni",
+ "Jonie",
+ "Juana",
+ "June",
+ "Junia",
+ "Junie" });
+ }
+
+ /**
+ * Initial KN case.
+ *
+ * Match data computed from http://www.lanw.com/java/phonetic/default.htm
+ */
+ [Test]
+ public void TestIsMetaphoneEqualKnight()
+ {
+ this.AssertIsMetaphoneEqual(
+ "Knight",
+ new String[] {
+ "Hynda",
+ "Nada",
+ "Nadia",
+ "Nady",
+ "Nat",
+ "Nata",
+ "Natty",
+ "Neda",
+ "Nedda",
+ "Nedi",
+ "Netta",
+ "Netti",
+ "Nettie",
+ "Netty",
+ "Nita",
+ "Nydia" });
+ }
+ /**
+ * Match data computed from http://www.lanw.com/java/phonetic/default.htm
+ */
+ [Test]
+ public void TestIsMetaphoneEqualMary()
+ {
+ this.AssertIsMetaphoneEqual(
+ "Mary",
+ new String[] {
+ "Mair",
+ "Maire",
+ "Mara",
+ "Mareah",
+ "Mari",
+ "Maria",
+ "Marie",
+ "Mary",
+ "Maura",
+ "Maure",
+ "Meara",
+ "Merrie",
+ "Merry",
+ "Mira",
+ "Moira",
+ "Mora",
+ "Moria",
+ "Moyra",
+ "Muire",
+ "Myra",
+ "Myrah" });
+ }
+
+ /**
+ * Match data computed from http://www.lanw.com/java/phonetic/default.htm
+ */
+ [Test]
+ public void TestIsMetaphoneEqualParis()
+ {
+ this.AssertIsMetaphoneEqual("Paris", new String[] { "Pearcy", "Perris", "Piercy", "Pierz", "Pryse" });
+ }
+
+ /**
+ * Match data computed from http://www.lanw.com/java/phonetic/default.htm
+ */
+ [Test]
+ public void TestIsMetaphoneEqualPeter()
+ {
+ this.AssertIsMetaphoneEqual(
+ "Peter",
+ new String[] { "Peadar", "Peder", "Pedro", "Peter", "Petr", "Peyter", "Pieter", "Pietro", "Piotr" });
+ }
+
+ /**
+ * Match data computed from http://www.lanw.com/java/phonetic/default.htm
+ */
+ [Test]
+ public void TestIsMetaphoneEqualRay()
+ {
+ this.AssertIsMetaphoneEqual("Ray", new String[] { "Ray", "Rey", "Roi", "Roy", "Ruy" });
+ }
+
+ /**
+ * Match data computed from http://www.lanw.com/java/phonetic/default.htm
+ */
+ [Test]
+ public void TestIsMetaphoneEqualSusan()
+ {
+ this.AssertIsMetaphoneEqual(
+ "Susan",
+ new String[] {
+ "Siusan",
+ "Sosanna",
+ "Susan",
+ "Susana",
+ "Susann",
+ "Susanna",
+ "Susannah",
+ "Susanne",
+ "Suzann",
+ "Suzanna",
+ "Suzanne",
+ "Zuzana" });
+ }
+
+ /**
+ * Initial WR case.
+ *
+ * Match data computed from http://www.lanw.com/java/phonetic/default.htm
+ */
+ [Test]
+ public void TestIsMetaphoneEqualWright()
+ {
+ this.AssertIsMetaphoneEqual("Wright", new String[] { "Rota", "Rudd", "Ryde" });
+ }
+
+ /**
+ * Match data computed from http://www.lanw.com/java/phonetic/default.htm
+ */
+ [Test]
+ public void TestIsMetaphoneEqualXalan()
+ {
+ this.AssertIsMetaphoneEqual(
+ "Xalan",
+ new String[] { "Celene", "Celina", "Celine", "Selena", "Selene", "Selina", "Seline", "Suellen", "Xylina" });
+ }
+
+ [Test]
+ public void TestMetaphone()
+ {
+ Assert.AreEqual("HL", this.StringEncoder.GetMetaphone("howl"));
+ Assert.AreEqual("TSTN", this.StringEncoder.GetMetaphone("testing"));
+ Assert.AreEqual("0", this.StringEncoder.GetMetaphone("The"));
+ Assert.AreEqual("KK", this.StringEncoder.GetMetaphone("quick"));
+ Assert.AreEqual("BRN", this.StringEncoder.GetMetaphone("brown"));
+ Assert.AreEqual("FKS", this.StringEncoder.GetMetaphone("fox"));
+ Assert.AreEqual("JMPT", this.StringEncoder.GetMetaphone("jumped"));
+ Assert.AreEqual("OFR", this.StringEncoder.GetMetaphone("over"));
+ Assert.AreEqual("0", this.StringEncoder.GetMetaphone("the"));
+ Assert.AreEqual("LS", this.StringEncoder.GetMetaphone("lazy"));
+ Assert.AreEqual("TKS", this.StringEncoder.GetMetaphone("dogs"));
+ }
+
+ [Test]
+ public void TestWordEndingInMB()
+ {
+ Assert.AreEqual("KM", this.StringEncoder.GetMetaphone("COMB"));
+ Assert.AreEqual("TM", this.StringEncoder.GetMetaphone("TOMB"));
+ Assert.AreEqual("WM", this.StringEncoder.GetMetaphone("WOMB"));
+ }
+
+ [Test]
+ public void TestDiscardOfSCEOrSCIOrSCY()
+ {
+ Assert.AreEqual("SNS", this.StringEncoder.GetMetaphone("SCIENCE"));
+ Assert.AreEqual("SN", this.StringEncoder.GetMetaphone("SCENE"));
+ Assert.AreEqual("S", this.StringEncoder.GetMetaphone("SCY"));
+ }
+
+ /**
+ * Tests (CODEC-57) Metaphone.metaphone(String) returns an empty string when passed the word "why"
+ */
+ [Test]
+ public void TestWhy()
+ {
+ // PHP returns "H". The original metaphone returns an empty string.
+ Assert.AreEqual("", this.StringEncoder.GetMetaphone("WHY"));
+ }
+
+ [Test]
+ public void TestWordsWithCIA()
+ {
+ Assert.AreEqual("XP", this.StringEncoder.GetMetaphone("CIAPO"));
+ }
+
+ [Test]
+ public void TestTranslateOfSCHAndCH()
+ {
+ Assert.AreEqual("SKTL", this.StringEncoder.GetMetaphone("SCHEDULE"));
+ Assert.AreEqual("SKMT", this.StringEncoder.GetMetaphone("SCHEMATIC"));
+
+ Assert.AreEqual("KRKT", this.StringEncoder.GetMetaphone("CHARACTER"));
+ Assert.AreEqual("TX", this.StringEncoder.GetMetaphone("TEACH"));
+ }
+
+ [Test]
+ public void TestTranslateToJOfDGEOrDGIOrDGY()
+ {
+ Assert.AreEqual("TJ", this.StringEncoder.GetMetaphone("DODGY"));
+ Assert.AreEqual("TJ", this.StringEncoder.GetMetaphone("DODGE"));
+ Assert.AreEqual("AJMT", this.StringEncoder.GetMetaphone("ADGIEMTI"));
+ }
+
+ [Test]
+ public void TestDiscardOfSilentHAfterG()
+ {
+ Assert.AreEqual("KNT", this.StringEncoder.GetMetaphone("GHENT"));
+ Assert.AreEqual("B", this.StringEncoder.GetMetaphone("BAUGH"));
+ }
+
+ [Test]
+ public void TestDiscardOfSilentGN()
+ {
+ // NOTE: This does not test for silent GN, but for starting with GN
+ Assert.AreEqual("N", this.StringEncoder.GetMetaphone("GNU"));
+
+ // NOTE: Trying to test for GNED, but expected code does not appear to execute
+ Assert.AreEqual("SNT", this.StringEncoder.GetMetaphone("SIGNED"));
+ }
+
+ [Test]
+ public void TestPHTOF()
+ {
+ Assert.AreEqual("FX", this.StringEncoder.GetMetaphone("PHISH"));
+ }
+
+ [Test]
+ public void TestSHAndSIOAndSIAToX()
+ {
+ Assert.AreEqual("XT", this.StringEncoder.GetMetaphone("SHOT"));
+ Assert.AreEqual("OTXN", this.StringEncoder.GetMetaphone("ODSIAN"));
+ Assert.AreEqual("PLXN", this.StringEncoder.GetMetaphone("PULSION"));
+ }
+
+ [Test]
+ public void TestTIOAndTIAToX()
+ {
+ Assert.AreEqual("OX", this.StringEncoder.GetMetaphone("OTIA"));
+ Assert.AreEqual("PRXN", this.StringEncoder.GetMetaphone("PORTION"));
+ }
+
+ [Test]
+ public void TestTCH()
+ {
+ Assert.AreEqual("RX", this.StringEncoder.GetMetaphone("RETCH"));
+ Assert.AreEqual("WX", this.StringEncoder.GetMetaphone("WATCH"));
+ }
+
+ [Test]
+ public void TestExceedLength()
+ {
+ // should be AKSKS, but istruncated by Max Code Length
+ Assert.AreEqual("AKSK", this.StringEncoder.GetMetaphone("AXEAXE"));
+ }
+
+ [Test]
+ public void TestSetMaxLengthWithTruncation()
+ {
+ // should be AKSKS, but istruncated by Max Code Length
+ this.StringEncoder.MaxCodeLen=(6);
+ Assert.AreEqual("AKSKSK", this.StringEncoder.GetMetaphone("AXEAXEAXE"));
+ }
+
+ public void ValidateFixture(String[][] pairs)
+ {
+ if (pairs.Length == 0)
+ {
+ Assert.Fail("Test fixture is empty");
+ }
+ for (int i = 0; i < pairs.Length; i++)
+ {
+ if (pairs[i].Length != 2)
+ {
+ Assert.Fail("Error in test fixture in the data array at index " + i);
+ }
+ }
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Tests.Analysis.Phonetic/Language/NysiisTest.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Phonetic/Language/NysiisTest.cs b/src/Lucene.Net.Tests.Analysis.Phonetic/Language/NysiisTest.cs
new file mode 100644
index 0000000..d1c04d1
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Phonetic/Language/NysiisTest.cs
@@ -0,0 +1,319 @@
+using NUnit.Framework;
+using System;
+
+namespace Lucene.Net.Analysis.Phonetic.Language
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ public class NysiisTest : StringEncoderAbstractTest<Nysiis>
+ {
+ private readonly Nysiis fullNysiis = new Nysiis(false);
+
+ /**
+ * Takes an array of String pairs where each pair's first element is the input and the second element the expected
+ * encoding.
+ *
+ * @param testValues
+ * an array of String pairs where each pair's first element is the input and the second element the
+ * expected encoding.
+ * @throws EncoderException
+ */
+ private void AssertEncodings(params String[][] testValues)
+ {
+ foreach (String[]
+ arr in testValues)
+ {
+ Assert.AreEqual(arr[1], this.fullNysiis.Encode(arr[0]), "Problem with " + arr[0]);
+ }
+ }
+
+ protected override Nysiis CreateStringEncoder()
+ {
+ return new Nysiis();
+ }
+
+ private void EncodeAll(String[] strings, String expectedEncoding)
+ {
+ foreach (String str in strings)
+ {
+ Assert.AreEqual(expectedEncoding, StringEncoder.Encode(str), "Problem with " + str);
+ }
+ }
+
+ [Test]
+ public void TestBran()
+ {
+ EncodeAll(new String[] { "Brian", "Brown", "Brun" }, "BRAN");
+ }
+
+ [Test]
+ public void TestCap()
+ {
+ this.EncodeAll(new String[] { "Capp", "Cope", "Copp", "Kipp" }, "CAP");
+ }
+
+ [Test]
+ public void TestDad()
+ {
+ // Data Quality and Record Linkage Techniques P.121 claims this is DAN,
+ // but it should be DAD, verified also with dropby.com
+ this.EncodeAll(new String[] { "Dent" }, "DAD");
+ }
+
+ [Test]
+ public void TestDan()
+ {
+ this.EncodeAll(new String[] { "Dane", "Dean", "Dionne" }, "DAN");
+ }
+
+ /**
+ * Tests data gathered from around the internet.
+ *
+ * @see <a href="http://www.dropby.com/NYSIISTextStrings.html">http://www.dropby.com/NYSIISTextStrings.html</a>
+ * @throws EncoderException
+ */
+ [Test]
+ public void TestDropBy()
+ {
+ // Explanation of differences between this implementation and the one at dropby.com is
+ // prepended to the test string. The referenced rules refer to the outlined steps the
+ // class description for Nysiis.
+
+ this.AssertEncodings(
+ // 1. Transcode first characters of name
+ new String[] { "MACINTOSH", "MCANT" },
+ // violates 4j: the second N should not be added, as the first
+ // key char is already a N
+ new String[] { "KNUTH", "NAT" }, // Original: NNAT; modified: NATH
+ // O and E are transcoded to A because of rule 4a
+ // H also to A because of rule 4h
+ // the N gets mysteriously lost, maybe because of a wrongly implemented rule 4h
+ // that skips the next char in such a case?
+ // the remaining A is removed because of rule 7
+ new String[] { "KOEHN", "CAN" }, // Original: C
+ // violates 4j: see also KNUTH
+ new String[] { "PHILLIPSON", "FALAPSAN" }, // Original: FFALAP[SAN]
+ // violates 4j: see also KNUTH
+ new String[] { "PFEISTER", "FASTAR" }, // Original: FFASTA[R]
+ // violates 4j: see also KNUTH
+ new String[] { "SCHOENHOEFT", "SANAFT" }, // Original: SSANAF[T]
+ // 2. Transcode last characters of name:
+ new String[] { "MCKEE", "MCY" },
+ new String[] { "MACKIE", "MCY" },
+ new String[] { "HEITSCHMIDT", "HATSNAD" },
+ new String[] { "BART", "BAD" },
+ new String[] { "HURD", "HAD" },
+ new String[] { "HUNT", "HAD" },
+ new String[] { "WESTERLUND", "WASTARLAD" },
+ // 4. Transcode remaining characters by following these rules,
+ // incrementing by one character each time:
+ new String[] { "CASSTEVENS", "CASTAFAN" },
+ new String[] { "VASQUEZ", "VASG" },
+ new String[] { "FRAZIER", "FRASAR" },
+ new String[] { "BOWMAN", "BANAN" },
+ new String[] { "MCKNIGHT", "MCNAGT" },
+ new String[] { "RICKERT", "RACAD" },
+ // violates 5: the last S is not removed
+ // when comparing to DEUTS, which is phonetically similar
+ // the result it also DAT, which is correct for DEUTSCH too imo
+ new String[] { "DEUTSCH", "DAT" }, // Original: DATS
+ new String[] { "WESTPHAL", "WASTFAL" },
+ // violates 4h: the H should be transcoded to S and thus ignored as
+ // the first key character is also S
+ new String[] { "SHRIVER", "SRAVAR" }, // Original: SHRAVA[R]
+ // same as KOEHN, the L gets mysteriously lost
+ new String[] { "KUHL", "CAL" }, // Original: C
+ new String[] { "RAWSON", "RASAN" },
+ // If last character is S, remove it
+ new String[] { "JILES", "JAL" },
+ // violates 6: if the last two characters are AY, remove A
+ new String[] { "CARRAWAY", "CARY" }, // Original: CARAY
+ new String[] { "YAMADA", "YANAD" });
+ }
+
+ [Test]
+ public void TestFal()
+ {
+ this.EncodeAll(new String[] { "Phil" }, "FAL");
+ }
+
+ /**
+ * Tests data gathered from around the internets.
+ *
+ * @throws EncoderException
+ */
+ [Test]
+ public void TestOthers()
+ {
+ this.AssertEncodings(
+ new String[] { "O'Daniel", "ODANAL" },
+ new String[] { "O'Donnel", "ODANAL" },
+ new String[] { "Cory", "CARY" },
+ new String[] { "Corey", "CARY" },
+ new String[] { "Kory", "CARY" },
+ //
+ new String[] { "FUZZY", "FASY" });
+ }
+
+ /**
+ * Tests rule 1: Translate first characters of name: MAC → MCC, KN → N, K → C, PH, PF → FF, SCH → SSS
+ *
+ * @throws EncoderException
+ */
+ [Test]
+ public void TestRule1()
+ {
+ this.AssertEncodings(
+ new String[] { "MACX", "MCX" },
+ new String[] { "KNX", "NX" },
+ new String[] { "KX", "CX" },
+ new String[] { "PHX", "FX" },
+ new String[] { "PFX", "FX" },
+ new String[] { "SCHX", "SX" });
+ }
+
+ /**
+ * Tests rule 2: Translate last characters of name: EE → Y, IE → Y, DT, RT, RD, NT, ND → D
+ *
+ * @throws EncoderException
+ */
+ [Test]
+ public void TestRule2()
+ {
+ this.AssertEncodings(
+ new String[] { "XEE", "XY" },
+ new String[] { "XIE", "XY" },
+ new String[] { "XDT", "XD" },
+ new String[] { "XRT", "XD" },
+ new String[] { "XRD", "XD" },
+ new String[] { "XNT", "XD" },
+ new String[] { "XND", "XD" });
+ }
+
+ /**
+ * Tests rule 4.1: EV → AF else A, E, I, O, U → A
+ *
+ * @throws EncoderException
+ */
+ [Test]
+ public void TestRule4Dot1()
+ {
+ this.AssertEncodings(
+ new String[] { "XEV", "XAF" },
+ new String[] { "XAX", "XAX" },
+ new String[] { "XEX", "XAX" },
+ new String[] { "XIX", "XAX" },
+ new String[] { "XOX", "XAX" },
+ new String[] { "XUX", "XAX" });
+ }
+
+ /**
+ * Tests rule 4.2: Q → G, Z → S, M → N
+ *
+ * @throws EncoderException
+ */
+ [Test]
+ public void TestRule4Dot2()
+ {
+ this.AssertEncodings(
+ new String[] { "XQ", "XG" },
+ new String[] { "XZ", "X" },
+ new String[] { "XM", "XN" });
+ }
+
+ /**
+ * Tests rule 5: If last character is S, remove it.
+ *
+ * @throws EncoderException
+ */
+ [Test]
+ public void TestRule5()
+ {
+ this.AssertEncodings(
+ new String[] { "XS", "X" },
+ new String[] { "XSS", "X" });
+ }
+
+ /**
+ * Tests rule 6: If last characters are AY, replace with Y.
+ *
+ * @throws EncoderException
+ */
+ [Test]
+ public void TestRule6()
+ {
+ this.AssertEncodings(
+ new String[] { "XAY", "XY" },
+ new String[] { "XAYS", "XY" }); // Rules 5, 6
+ }
+
+ /**
+ * Tests rule 7: If last character is A, remove it.
+ *
+ * @throws EncoderException
+ */
+ [Test]
+ public void TestRule7()
+ {
+ this.AssertEncodings(
+ new String[] { "XA", "X" },
+ new String[] { "XAS", "X" }); // Rules 5, 7
+ }
+ [Test]
+ public void TestSnad()
+ {
+ // Data Quality and Record Linkage Techniques P.121 claims this is SNAT,
+ // but it should be SNAD
+ this.EncodeAll(new String[] { "Schmidt" }, "SNAD");
+ }
+
+ [Test]
+ public void TestSnat()
+ {
+ this.EncodeAll(new String[] { "Smith", "Schmit" }, "SNAT");
+ }
+
+ [Test]
+ public void TestSpecialBranches()
+ {
+ this.EncodeAll(new String[] { "Kobwick" }, "CABWAC");
+ this.EncodeAll(new String[] { "Kocher" }, "CACAR");
+ this.EncodeAll(new String[] { "Fesca" }, "FASC");
+ this.EncodeAll(new String[] { "Shom" }, "SAN");
+ this.EncodeAll(new String[] { "Ohlo" }, "OL");
+ this.EncodeAll(new String[] { "Uhu" }, "UH");
+ this.EncodeAll(new String[] { "Um" }, "UN");
+ }
+
+ [Test]
+ public void TestTranan()
+ {
+ this.EncodeAll(new String[] { "Trueman", "Truman" }, "TRANAN");
+ }
+
+ [Test]
+ public void TestTrueVariant()
+ {
+ Nysiis encoder = new Nysiis(true);
+
+ String encoded = encoder.Encode("WESTERLUND");
+ Assert.True(encoded.Length <= 6);
+ Assert.AreEqual("WASTAR", encoded);
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Tests.Analysis.Phonetic/Language/RefinedSoundexTest.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Phonetic/Language/RefinedSoundexTest.cs b/src/Lucene.Net.Tests.Analysis.Phonetic/Language/RefinedSoundexTest.cs
new file mode 100644
index 0000000..eca1827
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Phonetic/Language/RefinedSoundexTest.cs
@@ -0,0 +1,99 @@
+using NUnit.Framework;
+
+namespace Lucene.Net.Analysis.Phonetic.Language
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// Tests RefinedSoundex.
+ /// </summary>
+ public class RefinedSoundexTest : StringEncoderAbstractTest<RefinedSoundex>
+ {
+ protected override RefinedSoundex CreateStringEncoder()
+ {
+ return new RefinedSoundex();
+ }
+
+ [Test]
+ public void TestDifference()
+ {
+ // Edge cases
+ Assert.AreEqual(0, this.StringEncoder.Difference(null, null));
+ Assert.AreEqual(0, this.StringEncoder.Difference("", ""));
+ Assert.AreEqual(0, this.StringEncoder.Difference(" ", " "));
+ // Normal cases
+ Assert.AreEqual(6, this.StringEncoder.Difference("Smith", "Smythe"));
+ Assert.AreEqual(3, this.StringEncoder.Difference("Ann", "Andrew"));
+ Assert.AreEqual(1, this.StringEncoder.Difference("Margaret", "Andrew"));
+ Assert.AreEqual(1, this.StringEncoder.Difference("Janet", "Margaret"));
+ // Examples from
+ // http://msdn.microsoft.com/library/default.asp?url=/library/en-us/tsqlref/ts_de-dz_8co5.asp
+ Assert.AreEqual(5, this.StringEncoder.Difference("Green", "Greene"));
+ Assert.AreEqual(1, this.StringEncoder.Difference("Blotchet-Halls", "Greene"));
+ // Examples from
+ // http://msdn.microsoft.com/library/default.asp?url=/library/en-us/tsqlref/ts_setu-sus_3o6w.asp
+ Assert.AreEqual(6, this.StringEncoder.Difference("Smith", "Smythe"));
+ Assert.AreEqual(8, this.StringEncoder.Difference("Smithers", "Smythers"));
+ Assert.AreEqual(5, this.StringEncoder.Difference("Anothers", "Brothers"));
+ }
+
+ [Test]
+ public void TestEncode()
+ {
+ Assert.AreEqual("T6036084", this.StringEncoder.Encode("testing"));
+ Assert.AreEqual("T6036084", this.StringEncoder.Encode("TESTING"));
+ Assert.AreEqual("T60", this.StringEncoder.Encode("The"));
+ Assert.AreEqual("Q503", this.StringEncoder.Encode("quick"));
+ Assert.AreEqual("B1908", this.StringEncoder.Encode("brown"));
+ Assert.AreEqual("F205", this.StringEncoder.Encode("fox"));
+ Assert.AreEqual("J408106", this.StringEncoder.Encode("jumped"));
+ Assert.AreEqual("O0209", this.StringEncoder.Encode("over"));
+ Assert.AreEqual("T60", this.StringEncoder.Encode("the"));
+ Assert.AreEqual("L7050", this.StringEncoder.Encode("lazy"));
+ Assert.AreEqual("D6043", this.StringEncoder.Encode("dogs"));
+
+ // Testing CODEC-56
+ Assert.AreEqual("D6043", RefinedSoundex.US_ENGLISH.Encode("dogs"));
+ }
+
+ [Test]
+ public void TestGetMappingCodeNonLetter()
+ {
+ char code = this.StringEncoder.GetMappingCode('#');
+ Assert.AreEqual(0, code, "Code does not equals zero");
+ }
+
+ [Test]
+ public void TestNewInstance()
+ {
+ Assert.AreEqual("D6043", new RefinedSoundex().GetSoundex("dogs"));
+ }
+
+ [Test]
+ public void TestNewInstance2()
+ {
+ Assert.AreEqual("D6043", new RefinedSoundex(RefinedSoundex.US_ENGLISH_MAPPING_STRING.toCharArray()).GetSoundex("dogs"));
+ }
+
+ [Test]
+ public void TestNewInstance3()
+ {
+ Assert.AreEqual("D6043", new RefinedSoundex(RefinedSoundex.US_ENGLISH_MAPPING_STRING).GetSoundex("dogs"));
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Tests.Analysis.Phonetic/Language/SoundexTest.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Phonetic/Language/SoundexTest.cs b/src/Lucene.Net.Tests.Analysis.Phonetic/Language/SoundexTest.cs
new file mode 100644
index 0000000..5cc01ec
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Phonetic/Language/SoundexTest.cs
@@ -0,0 +1,424 @@
+// commons-codec version compatibility level: 1.10
+using NUnit.Framework;
+using System;
+
+namespace Lucene.Net.Analysis.Phonetic.Language
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// Tests <see cref="Soundex"/>
+ /// </summary>
+ public class SoundexTest : StringEncoderAbstractTest<Soundex>
+ {
+ protected override Soundex CreateStringEncoder()
+ {
+ return new Soundex();
+ }
+
+ [Test]
+ public void TestB650()
+ {
+ this.CheckEncodingVariations("B650", new string[]{
+ "BARHAM",
+ "BARONE",
+ "BARRON",
+ "BERNA",
+ "BIRNEY",
+ "BIRNIE",
+ "BOOROM",
+ "BOREN",
+ "BORN",
+ "BOURN",
+ "BOURNE",
+ "BOWRON",
+ "BRAIN",
+ "BRAME",
+ "BRANN",
+ "BRAUN",
+ "BREEN",
+ "BRIEN",
+ "BRIM",
+ "BRIMM",
+ "BRINN",
+ "BRION",
+ "BROOM",
+ "BROOME",
+ "BROWN",
+ "BROWNE",
+ "BRUEN",
+ "BRUHN",
+ "BRUIN",
+ "BRUMM",
+ "BRUN",
+ "BRUNO",
+ "BRYAN",
+ "BURIAN",
+ "BURN",
+ "BURNEY",
+ "BYRAM",
+ "BYRNE",
+ "BYRON",
+ "BYRUM"});
+ }
+
+ [Test]
+ public void TestBadCharacters()
+ {
+ Assert.AreEqual("H452", this.StringEncoder.Encode("HOL>MES"));
+
+ }
+
+ [Test]
+ public void TestDifference()
+ {
+ // Edge cases
+ Assert.AreEqual(0, this.StringEncoder.Difference(null, null));
+ Assert.AreEqual(0, this.StringEncoder.Difference("", ""));
+ Assert.AreEqual(0, this.StringEncoder.Difference(" ", " "));
+ // Normal cases
+ Assert.AreEqual(4, this.StringEncoder.Difference("Smith", "Smythe"));
+ Assert.AreEqual(2, this.StringEncoder.Difference("Ann", "Andrew"));
+ Assert.AreEqual(1, this.StringEncoder.Difference("Margaret", "Andrew"));
+ Assert.AreEqual(0, this.StringEncoder.Difference("Janet", "Margaret"));
+ // Examples from http://msdn.microsoft.com/library/default.asp?url=/library/en-us/tsqlref/ts_de-dz_8co5.asp
+ Assert.AreEqual(4, this.StringEncoder.Difference("Green", "Greene"));
+ Assert.AreEqual(0, this.StringEncoder.Difference("Blotchet-Halls", "Greene"));
+ // Examples from http://msdn.microsoft.com/library/default.asp?url=/library/en-us/tsqlref/ts_setu-sus_3o6w.asp
+ Assert.AreEqual(4, this.StringEncoder.Difference("Smith", "Smythe"));
+ Assert.AreEqual(4, this.StringEncoder.Difference("Smithers", "Smythers"));
+ Assert.AreEqual(2, this.StringEncoder.Difference("Anothers", "Brothers"));
+ }
+
+ [Test]
+ public void TestEncodeBasic()
+ {
+ Assert.AreEqual("T235", this.StringEncoder.Encode("testing"));
+ Assert.AreEqual("T000", this.StringEncoder.Encode("The"));
+ Assert.AreEqual("Q200", this.StringEncoder.Encode("quick"));
+ Assert.AreEqual("B650", this.StringEncoder.Encode("brown"));
+ Assert.AreEqual("F200", this.StringEncoder.Encode("fox"));
+ Assert.AreEqual("J513", this.StringEncoder.Encode("jumped"));
+ Assert.AreEqual("O160", this.StringEncoder.Encode("over"));
+ Assert.AreEqual("T000", this.StringEncoder.Encode("the"));
+ Assert.AreEqual("L200", this.StringEncoder.Encode("lazy"));
+ Assert.AreEqual("D200", this.StringEncoder.Encode("dogs"));
+ }
+
+ /**
+ * Examples from http://www.bradandkathy.com/genealogy/overviewofsoundex.html
+ */
+ [Test]
+ public void RestEncodeBatch2()
+ {
+ Assert.AreEqual("A462", this.StringEncoder.Encode("Allricht"));
+ Assert.AreEqual("E166", this.StringEncoder.Encode("Eberhard"));
+ Assert.AreEqual("E521", this.StringEncoder.Encode("Engebrethson"));
+ Assert.AreEqual("H512", this.StringEncoder.Encode("Heimbach"));
+ Assert.AreEqual("H524", this.StringEncoder.Encode("Hanselmann"));
+ Assert.AreEqual("H431", this.StringEncoder.Encode("Hildebrand"));
+ Assert.AreEqual("K152", this.StringEncoder.Encode("Kavanagh"));
+ Assert.AreEqual("L530", this.StringEncoder.Encode("Lind"));
+ Assert.AreEqual("L222", this.StringEncoder.Encode("Lukaschowsky"));
+ Assert.AreEqual("M235", this.StringEncoder.Encode("McDonnell"));
+ Assert.AreEqual("M200", this.StringEncoder.Encode("McGee"));
+ Assert.AreEqual("O155", this.StringEncoder.Encode("Opnian"));
+ Assert.AreEqual("O155", this.StringEncoder.Encode("Oppenheimer"));
+ Assert.AreEqual("R355", this.StringEncoder.Encode("Riedemanas"));
+ Assert.AreEqual("Z300", this.StringEncoder.Encode("Zita"));
+ Assert.AreEqual("Z325", this.StringEncoder.Encode("Zitzmeinn"));
+ }
+
+ /**
+ * Examples from http://www.archives.gov/research_room/genealogy/census/soundex.html
+ */
+ [Test]
+ public void TestEncodeBatch3()
+ {
+ Assert.AreEqual("W252", this.StringEncoder.Encode("Washington"));
+ Assert.AreEqual("L000", this.StringEncoder.Encode("Lee"));
+ Assert.AreEqual("G362", this.StringEncoder.Encode("Gutierrez"));
+ Assert.AreEqual("P236", this.StringEncoder.Encode("Pfister"));
+ Assert.AreEqual("J250", this.StringEncoder.Encode("Jackson"));
+ Assert.AreEqual("T522", this.StringEncoder.Encode("Tymczak"));
+ // For VanDeusen: D-250 (D, 2 for the S, 5 for the N, 0 added) is also
+ // possible.
+ Assert.AreEqual("V532", this.StringEncoder.Encode("VanDeusen"));
+ }
+
+ /**
+ * Examples from: http://www.myatt.demon.co.uk/sxalg.htm
+ */
+ [Test]
+ public void TestEncodeBatch4()
+ {
+ Assert.AreEqual("H452", this.StringEncoder.Encode("HOLMES"));
+ Assert.AreEqual("A355", this.StringEncoder.Encode("ADOMOMI"));
+ Assert.AreEqual("V536", this.StringEncoder.Encode("VONDERLEHR"));
+ Assert.AreEqual("B400", this.StringEncoder.Encode("BALL"));
+ Assert.AreEqual("S000", this.StringEncoder.Encode("SHAW"));
+ Assert.AreEqual("J250", this.StringEncoder.Encode("JACKSON"));
+ Assert.AreEqual("S545", this.StringEncoder.Encode("SCANLON"));
+ Assert.AreEqual("S532", this.StringEncoder.Encode("SAINTJOHN"));
+
+ }
+
+ [Test]
+ public void TestEncodeIgnoreApostrophes()
+ {
+ this.CheckEncodingVariations("O165", new string[]{
+ "OBrien",
+ "'OBrien",
+ "O'Brien",
+ "OB'rien",
+ "OBr'ien",
+ "OBri'en",
+ "OBrie'n",
+ "OBrien'"});
+ }
+
+ /**
+ * Test data from http://www.myatt.demon.co.uk/sxalg.htm
+ *
+ * @throws EncoderException
+ */
+ [Test]
+ public void TestEncodeIgnoreHyphens()
+ {
+ this.CheckEncodingVariations("K525", new String[]{
+ "KINGSMITH",
+ "-KINGSMITH",
+ "K-INGSMITH",
+ "KI-NGSMITH",
+ "KIN-GSMITH",
+ "KING-SMITH",
+ "KINGS-MITH",
+ "KINGSM-ITH",
+ "KINGSMI-TH",
+ "KINGSMIT-H",
+ "KINGSMITH-"});
+ }
+
+ [Test]
+ public void TestEncodeIgnoreTrimmable()
+ {
+ Assert.AreEqual("W252", this.StringEncoder.Encode(" \t\n\r Washington \t\n\r "));
+ }
+
+ /**
+ * Consonants from the same code group separated by W or H are treated as one.
+ */
+ [Test]
+ public void TestHWRuleEx1()
+ {
+ // From
+ // http://www.archives.gov/research_room/genealogy/census/soundex.html:
+ // Ashcraft is coded A-261 (A, 2 for the S, C ignored, 6 for the R, 1
+ // for the F). It is not coded A-226.
+ Assert.AreEqual("A261", this.StringEncoder.Encode("Ashcraft"));
+ }
+
+ /**
+ * Consonants from the same code group separated by W or H are treated as one.
+ *
+ * Test data from http://www.myatt.demon.co.uk/sxalg.htm
+ */
+ [Test]
+ public void TestHWRuleEx2()
+ {
+ Assert.AreEqual("B312", this.StringEncoder.Encode("BOOTHDAVIS"));
+ Assert.AreEqual("B312", this.StringEncoder.Encode("BOOTH-DAVIS"));
+ }
+
+ /**
+ * Consonants from the same code group separated by W or H are treated as one.
+ *
+ * @throws EncoderException
+ */
+ [Test]
+ public void TestHWRuleEx3()
+ {
+ Assert.AreEqual("S460", this.StringEncoder.Encode("Sgler"));
+ Assert.AreEqual("S460", this.StringEncoder.Encode("Swhgler"));
+ // Also S460:
+ this.CheckEncodingVariations("S460", new String[]{
+ "SAILOR",
+ "SALYER",
+ "SAYLOR",
+ "SCHALLER",
+ "SCHELLER",
+ "SCHILLER",
+ "SCHOOLER",
+ "SCHULER",
+ "SCHUYLER",
+ "SEILER",
+ "SEYLER",
+ "SHOLAR",
+ "SHULER",
+ "SILAR",
+ "SILER",
+ "SILLER"});
+ }
+
+ /**
+ * Examples for MS SQLServer from
+ * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/tsqlref/ts_setu-sus_3o6w.asp
+ */
+ [Test]
+ public void TestMsSqlServer1()
+ {
+ Assert.AreEqual("S530", this.StringEncoder.Encode("Smith"));
+ Assert.AreEqual("S530", this.StringEncoder.Encode("Smythe"));
+ }
+
+ /**
+ * Examples for MS SQLServer from
+ * http://support.microsoft.com/default.aspx?scid=http://support.microsoft.com:80/support
+ * /kb/articles/Q100/3/65.asp&NoWebContent=1
+ *
+ * @throws EncoderException
+ */
+ [Test]
+ public void TestMsSqlServer2()
+ {
+ this.CheckEncodingVariations("E625", new String[] { "Erickson", "Erickson", "Erikson", "Ericson", "Ericksen", "Ericsen" });
+ }
+
+ /**
+ * Examples for MS SQLServer from http://databases.about.com/library/weekly/aa042901a.htm
+ */
+ [Test]
+ public void TestMsSqlServer3()
+ {
+ Assert.AreEqual("A500", this.StringEncoder.Encode("Ann"));
+ Assert.AreEqual("A536", this.StringEncoder.Encode("Andrew"));
+ Assert.AreEqual("J530", this.StringEncoder.Encode("Janet"));
+ Assert.AreEqual("M626", this.StringEncoder.Encode("Margaret"));
+ Assert.AreEqual("S315", this.StringEncoder.Encode("Steven"));
+ Assert.AreEqual("M240", this.StringEncoder.Encode("Michael"));
+ Assert.AreEqual("R163", this.StringEncoder.Encode("Robert"));
+ Assert.AreEqual("L600", this.StringEncoder.Encode("Laura"));
+ Assert.AreEqual("A500", this.StringEncoder.Encode("Anne"));
+ }
+
+ /**
+ * https://issues.apache.org/jira/browse/CODEC-54 https://issues.apache.org/jira/browse/CODEC-56
+ */
+ [Test]
+ public void TestNewInstance()
+ {
+ Assert.AreEqual("W452", new Soundex().GetSoundex("Williams"));
+ }
+
+ [Test]
+ public void TestNewInstance2()
+ {
+ Assert.AreEqual("W452", new Soundex(Soundex.US_ENGLISH_MAPPING_STRING.toCharArray()).GetSoundex("Williams"));
+ }
+
+ [Test]
+ public void TestNewInstance3()
+ {
+ Assert.AreEqual("W452", new Soundex(Soundex.US_ENGLISH_MAPPING_STRING).GetSoundex("Williams"));
+ }
+
+ [Test]
+ public void TestSoundexUtilsConstructable()
+ {
+ new SoundexUtils();
+ }
+
+ [Test]
+ public void TestSoundexUtilsNullBehaviour()
+ {
+ Assert.AreEqual(null, SoundexUtils.Clean(null));
+ Assert.AreEqual("", SoundexUtils.Clean(""));
+ Assert.AreEqual(0, SoundexUtils.DifferenceEncoded(null, ""));
+ Assert.AreEqual(0, SoundexUtils.DifferenceEncoded("", null));
+ }
+
+ /**
+ * https://issues.apache.org/jira/browse/CODEC-54 https://issues.apache.org/jira/browse/CODEC-56
+ */
+ [Test]
+ public void TestUsEnglishStatic()
+ {
+ Assert.AreEqual("W452", Soundex.US_ENGLISH.GetSoundex("Williams"));
+ }
+
+ /**
+ * Fancy characters are not mapped by the default US mapping.
+ *
+ * http://issues.apache.org/bugzilla/show_bug.cgi?id=29080
+ */
+ [Test]
+ public void TestUsMappingEWithAcute()
+ {
+ Assert.AreEqual("E000", this.StringEncoder.Encode("e"));
+ if (char.IsLetter('\u00e9'))
+ { // e-acute
+ try
+ {
+ // uppercase E-acute
+ Assert.AreEqual("\u00c9000", this.StringEncoder.Encode("\u00e9"));
+ Assert.Fail("Expected IllegalArgumentException not thrown");
+ }
+#pragma warning disable 168
+ catch (ArgumentException e)
+#pragma warning restore 168
+ {
+ // expected
+ }
+ }
+ else
+ {
+ Assert.AreEqual("", this.StringEncoder.Encode("\u00e9"));
+ }
+ }
+
+ /**
+ * Fancy characters are not mapped by the default US mapping.
+ *
+ * http://issues.apache.org/bugzilla/show_bug.cgi?id=29080
+ */
+ [Test]
+ public void TestUsMappingOWithDiaeresis()
+ {
+ Assert.AreEqual("O000", this.StringEncoder.Encode("o"));
+ if (char.IsLetter('\u00f6'))
+ { // o-umlaut
+ try
+ {
+ // uppercase O-umlaut
+ Assert.AreEqual("\u00d6000", this.StringEncoder.Encode("\u00f6"));
+ Assert.Fail("Expected IllegalArgumentException not thrown");
+ }
+#pragma warning disable 168
+ catch (ArgumentException e)
+#pragma warning restore 168
+ {
+ // expected
+ }
+ }
+ else
+ {
+ Assert.AreEqual("", this.StringEncoder.Encode("\u00f6"));
+ }
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Tests.Analysis.Phonetic/Language/StringEncoderAbstractTest.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Phonetic/Language/StringEncoderAbstractTest.cs b/src/Lucene.Net.Tests.Analysis.Phonetic/Language/StringEncoderAbstractTest.cs
new file mode 100644
index 0000000..8fd8b7f
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Phonetic/Language/StringEncoderAbstractTest.cs
@@ -0,0 +1,164 @@
+using NUnit.Framework;
+using System;
+using System.Globalization;
+using System.Threading;
+
+namespace Lucene.Net.Analysis.Phonetic.Language
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ public abstract class StringEncoderAbstractTest<T>
+ where T : IStringEncoder
+ {
+ protected T stringEncoder;
+
+ [SetUp]
+ public void SetUp()
+ {
+ stringEncoder = this.CreateStringEncoder();
+ }
+
+ public virtual void CheckEncoding(string expected, string source)
+ {
+ Assert.AreEqual(expected, this.StringEncoder.Encode(source), "Source: " + source);
+ }
+
+ protected virtual void CheckEncodings(string[][] data)
+ {
+ foreach (string[]
+ element in data)
+ {
+ this.CheckEncoding(element[1], element[0]);
+ }
+ }
+
+ protected virtual void CheckEncodingVariations(string expected, string[] data)
+ {
+ foreach (string element in data)
+ {
+ this.CheckEncoding(expected, element);
+ }
+ }
+
+ protected abstract T CreateStringEncoder();
+
+ public virtual T StringEncoder
+ {
+ get { return this.stringEncoder; }
+ }
+
+ [Test]
+ public virtual void TestEncodeEmpty()
+ {
+ IStringEncoder encoder = this.StringEncoder;
+ encoder.Encode("");
+ encoder.Encode(" ");
+ encoder.Encode("\t");
+ }
+
+ // LUCENENET specific - since strings are sealed in .NET, there
+ // is no point in implementing IEncoder or running these tests.
+ // Our version only accepts strings
+ [Test]
+ public virtual void TestEncodeNull()
+ {
+ IStringEncoder encoder = this.StringEncoder;
+ try
+ {
+ encoder.Encode(null);
+ }
+#pragma warning disable 168
+ catch (/*Encoder*/Exception ee)
+#pragma warning restore 168
+ {
+ // An exception should be thrown
+ }
+ }
+
+ //[Test]
+ //public virtual void TestEncodeWithInvalidObject()
+ //{
+ // bool exceptionThrown = false;
+ // try
+ // {
+ // IStringEncoder encoder = this.StringEncoder;
+ // encoder.Encode(3.4f);
+ // }
+ // catch (Exception e)
+ // {
+ // exceptionThrown = true;
+ // }
+ // Assert.True(exceptionThrown, "An exception was not thrown when we tried to encode " + "a Float object");
+ //}
+
+ [Test]
+ public virtual void TestLocaleIndependence()
+ {
+ IStringEncoder encoder = this.StringEncoder;
+
+ string[]
+ data = { "I", "i", };
+
+ CultureInfo orig = CultureInfo.CurrentCulture;
+ CultureInfo[] locales = { new CultureInfo("en"), new CultureInfo("tr"), CultureInfo.CurrentCulture };
+
+ try
+ {
+ foreach (string element in data)
+ {
+ string @ref = null;
+ for (int j = 0; j < locales.Length; j++)
+ {
+ //Locale.setDefault(locales[j]);
+#if NETSTANDARD
+ CultureInfo.CurrentCulture = locales[j];
+#else
+ Thread.CurrentThread.CurrentCulture = locales[j];
+#endif
+ if (j <= 0)
+ {
+ @ref = encoder.Encode(element);
+ }
+ else
+ {
+ string cur = null;
+ try
+ {
+ cur = encoder.Encode(element);
+ }
+ catch (Exception e)
+ {
+ Assert.Fail(CultureInfo.CurrentCulture.ToString() + ": " + e.Message);
+ }
+ Assert.AreEqual(@ref, cur, CultureInfo.CurrentCulture.ToString() + ": ");
+ }
+ }
+ }
+ }
+ finally
+ {
+ //Locale.setDefault(orig);
+#if NETSTANDARD
+ CultureInfo.CurrentCulture = orig;
+#else
+ Thread.CurrentThread.CurrentCulture = orig;
+#endif
+ }
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Tests.Analysis.Phonetic/Lucene.Net.Tests.Analysis.Phonetic.csproj
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Phonetic/Lucene.Net.Tests.Analysis.Phonetic.csproj b/src/Lucene.Net.Tests.Analysis.Phonetic/Lucene.Net.Tests.Analysis.Phonetic.csproj
new file mode 100644
index 0000000..5c38e1f
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Phonetic/Lucene.Net.Tests.Analysis.Phonetic.csproj
@@ -0,0 +1,108 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="14.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" />
+ <PropertyGroup>
+ <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
+ <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
+ <ProjectGuid>{A2867797-0A5D-4878-8F59-58C399C9A4E4}</ProjectGuid>
+ <OutputType>Library</OutputType>
+ <AppDesignerFolder>Properties</AppDesignerFolder>
+ <RootNamespace>Lucene.Net.Analysis.Phonetic</RootNamespace>
+ <AssemblyName>Lucene.Net.Tests.Analysis.Phonetic</AssemblyName>
+ <TargetFrameworkVersion>v4.5.1</TargetFrameworkVersion>
+ <FileAlignment>512</FileAlignment>
+ </PropertyGroup>
+ <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
+ <DebugSymbols>true</DebugSymbols>
+ <DebugType>full</DebugType>
+ <Optimize>false</Optimize>
+ <OutputPath>bin\Debug\</OutputPath>
+ <DefineConstants>DEBUG;TRACE</DefineConstants>
+ <ErrorReport>prompt</ErrorReport>
+ <WarningLevel>4</WarningLevel>
+ </PropertyGroup>
+ <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
+ <DebugType>pdbonly</DebugType>
+ <Optimize>true</Optimize>
+ <OutputPath>bin\Release\</OutputPath>
+ <DefineConstants>TRACE</DefineConstants>
+ <ErrorReport>prompt</ErrorReport>
+ <WarningLevel>4</WarningLevel>
+ </PropertyGroup>
+ <PropertyGroup>
+ <DefineConstants>$(DefineConstants);FEATURE_SERIALIZABLE</DefineConstants>
+ </PropertyGroup>
+ <ItemGroup>
+ <Reference Include="System" />
+ <Reference Include="System.Core" />
+ <Reference Include="System.Xml.Linq" />
+ <Reference Include="System.Data.DataSetExtensions" />
+ <Reference Include="Microsoft.CSharp" />
+ <Reference Include="System.Data" />
+ <Reference Include="System.Net.Http" />
+ <Reference Include="System.Xml" />
+ </ItemGroup>
+ <ItemGroup>
+ <Compile Include="DoubleMetaphoneFilterTest.cs" />
+ <Compile Include="Language\Bm\BeiderMorseEncoderTest.cs" />
+ <Compile Include="Language\Bm\CacheSubSequencePerformanceTest.cs" />
+ <Compile Include="Language\Bm\LanguageGuessingTest.cs" />
+ <Compile Include="Language\Bm\PhoneticEnginePerformanceTest.cs" />
+ <Compile Include="Language\Bm\PhoneticEngineRegressionTest.cs" />
+ <Compile Include="Language\Bm\PhoneticEngineTest.cs" />
+ <Compile Include="Language\Bm\RuleTest.cs" />
+ <Compile Include="Language\Caverphone1Test.cs" />
+ <Compile Include="Language\Caverphone2Test .cs" />
+ <Compile Include="Language\ColognePhoneticTest.cs" />
+ <Compile Include="Language\DaitchMokotoffSoundexTest.cs" />
+ <Compile Include="Language\DoubleMetaphone2Test.cs" />
+ <Compile Include="Language\DoubleMetaphoneTest.cs" />
+ <Compile Include="Language\MatchRatingApproachEncoderTest.cs" />
+ <Compile Include="Language\MetaphoneTest.cs" />
+ <Compile Include="Language\NysiisTest.cs" />
+ <Compile Include="Language\RefinedSoundexTest.cs" />
+ <Compile Include="Language\SoundexTest.cs" />
+ <Compile Include="Language\StringEncoderAbstractTest.cs" />
+ <Compile Include="Properties\AssemblyInfo.cs" />
+ <Compile Include="TestBeiderMorseFilter.cs" />
+ <Compile Include="TestBeiderMorseFilterFactory.cs" />
+ <Compile Include="TestDoubleMetaphoneFilterFactory.cs" />
+ <Compile Include="TestPhoneticFilter.cs" />
+ <Compile Include="TestPhoneticFilterFactory.cs" />
+ <Compile Include="..\CommonAssemblyInfo.cs">
+ <Link>Properties\CommonAssemblyInfo.cs</Link>
+ </Compile>
+ </ItemGroup>
+ <ItemGroup>
+ <ProjectReference Include="..\Lucene.Net.Analysis.Common\Lucene.Net.Analysis.Common.csproj">
+ <Project>{4ADD0BBC-B900-4715-9526-D871DE8EEA64}</Project>
+ <Name>Lucene.Net.Analysis.Common</Name>
+ </ProjectReference>
+ <ProjectReference Include="..\Lucene.Net.Analysis.Phonetic\Lucene.Net.Analysis.Phonetic.csproj">
+ <Project>{DAFE3B64-616A-4A2F-90E5-1F135E8A9AF5}</Project>
+ <Name>Lucene.Net.Analysis.Phonetic</Name>
+ </ProjectReference>
+ <ProjectReference Include="..\Lucene.Net.TestFramework\Lucene.Net.TestFramework.csproj">
+ <Project>{b2c0d749-ce34-4f62-a15e-00cb2ff5ddb3}</Project>
+ <Name>Lucene.Net.TestFramework</Name>
+ </ProjectReference>
+ <ProjectReference Include="..\Lucene.Net\Lucene.Net.csproj">
+ <Project>{5D4AD9BE-1FFB-41AB-9943-25737971BF57}</Project>
+ <Name>Lucene.Net</Name>
+ </ProjectReference>
+ </ItemGroup>
+ <ItemGroup>
+ <None Include="Lucene.Net.Tests.Analysis.Phonetic.project.json" />
+ </ItemGroup>
+ <ItemGroup>
+ <Service Include="{82A7F48D-3B50-4B1E-B82E-3ADA8210C358}" />
+ </ItemGroup>
+ <Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
+ <!-- To modify your build process, add your task inside one of the targets below and uncomment it.
+ Other similar extension points exist, see Microsoft.Common.targets.
+ <Target Name="BeforeBuild">
+ </Target>
+ <Target Name="AfterBuild">
+ </Target>
+ -->
+</Project>
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Tests.Analysis.Phonetic/Lucene.Net.Tests.Analysis.Phonetic.project.json
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Phonetic/Lucene.Net.Tests.Analysis.Phonetic.project.json b/src/Lucene.Net.Tests.Analysis.Phonetic/Lucene.Net.Tests.Analysis.Phonetic.project.json
new file mode 100644
index 0000000..8c631ab
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Phonetic/Lucene.Net.Tests.Analysis.Phonetic.project.json
@@ -0,0 +1,11 @@
+{
+ "runtimes": {
+ "win": {}
+ },
+ "dependencies": {
+ "NUnit": "3.5.0"
+ },
+ "frameworks": {
+ "net451": {}
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Tests.Analysis.Phonetic/Lucene.Net.Tests.Analysis.Phonetic.xproj
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Phonetic/Lucene.Net.Tests.Analysis.Phonetic.xproj b/src/Lucene.Net.Tests.Analysis.Phonetic/Lucene.Net.Tests.Analysis.Phonetic.xproj
new file mode 100644
index 0000000..16b7fef
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Phonetic/Lucene.Net.Tests.Analysis.Phonetic.xproj
@@ -0,0 +1,21 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="14.0.25420" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <PropertyGroup>
+ <VisualStudioVersion Condition="'$(VisualStudioVersion)' == ''">14.0.25420</VisualStudioVersion>
+ <VSToolsPath Condition="'$(VSToolsPath)' == ''">$(MSBuildExtensionsPath32)\Microsoft\VisualStudio\v$(VisualStudioVersion)</VSToolsPath>
+ </PropertyGroup>
+ <Import Project="$(VSToolsPath)\DotNet\Microsoft.DotNet.Props" Condition="'$(VSToolsPath)' != ''" />
+ <PropertyGroup Label="Globals">
+ <ProjectGuid>1fe12ef7-4c89-4d49-bdd1-e49dc285f21b</ProjectGuid>
+ <RootNamespace>Lucene.Net.Tests.Analysis.Phonetic</RootNamespace>
+ <BaseIntermediateOutputPath Condition="'$(BaseIntermediateOutputPath)'=='' ">.\obj</BaseIntermediateOutputPath>
+ <OutputPath Condition="'$(OutputPath)'=='' ">.\bin\</OutputPath>
+ </PropertyGroup>
+ <PropertyGroup>
+ <SchemaVersion>2.0</SchemaVersion>
+ </PropertyGroup>
+ <ItemGroup>
+ <Service Include="{82a7f48d-3b50-4b1e-b82e-3ada8210c358}" />
+ </ItemGroup>
+ <Import Project="$(VSToolsPath)\DotNet\Microsoft.DotNet.targets" Condition="'$(VSToolsPath)' != ''" />
+</Project>
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Tests.Analysis.Phonetic/Properties/AssemblyInfo.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Phonetic/Properties/AssemblyInfo.cs b/src/Lucene.Net.Tests.Analysis.Phonetic/Properties/AssemblyInfo.cs
new file mode 100644
index 0000000..14e5b1c
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Phonetic/Properties/AssemblyInfo.cs
@@ -0,0 +1,42 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+*/
+
+using System.Reflection;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+
+// General Information about an assembly is controlled through the following
+// set of attributes. Change these attribute values to modify the information
+// associated with an assembly.
+[assembly: AssemblyTitle("Lucene.Net.Tests.Analysis.Phonetic")]
+[assembly: AssemblyDescription("")]
+[assembly: AssemblyConfiguration("")]
+[assembly: AssemblyCulture("")]
+
+// Setting ComVisible to false makes the types in this assembly not visible
+// to COM components. If you need to access a type in this assembly from
+// COM, set the ComVisible attribute to true on that type.
+[assembly: ComVisible(false)]
+
+// The following GUID is for the ID of the typelib if this project is exposed to COM
+[assembly: Guid("a2867797-0a5d-4878-8f59-58c399c9a4e4")]
+
+// NOTE: Version information is in CommonAssemblyInfo.cs
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Tests.Analysis.Phonetic/TestBeiderMorseFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Phonetic/TestBeiderMorseFilter.cs b/src/Lucene.Net.Tests.Analysis.Phonetic/TestBeiderMorseFilter.cs
new file mode 100644
index 0000000..cc0e897
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Phonetic/TestBeiderMorseFilter.cs
@@ -0,0 +1,132 @@
+using Lucene.Net.Analysis.Core;
+using Lucene.Net.Analysis.Miscellaneous;
+using Lucene.Net.Analysis.Phonetic.Language.Bm;
+using Lucene.Net.Analysis.TokenAttributes;
+using NUnit.Framework;
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Text.RegularExpressions;
+
+namespace Lucene.Net.Analysis.Phonetic
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// Tests <see cref="BeiderMorseFilter"/>
+ /// </summary>
+ public class TestBeiderMorseFilter : BaseTokenStreamTestCase
+ {
+ private Analyzer analyzer = Analyzer.NewAnonymous(createComponents: (fieldName, reader) =>
+ {
+ Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ return new TokenStreamComponents(tokenizer,
+ new BeiderMorseFilter(tokenizer, new PhoneticEngine(NameType.GENERIC, RuleType.EXACT, true)));
+ });
+
+
+ /** generic, "exact" configuration */
+ [Test]
+ public void TestBasicUsage()
+ {
+ AssertAnalyzesTo(analyzer, "Angelo",
+ new String[] { "anZelo", "andZelo", "angelo", "anhelo", "anjelo", "anxelo" },
+ new int[] { 0, 0, 0, 0, 0, 0 },
+ new int[] { 6, 6, 6, 6, 6, 6 },
+ new int[] { 1, 0, 0, 0, 0, 0 });
+
+
+ AssertAnalyzesTo(analyzer, "D'Angelo",
+ new String[] { "anZelo", "andZelo", "angelo", "anhelo", "anjelo", "anxelo",
+ "danZelo", "dandZelo", "dangelo", "danhelo", "danjelo", "danxelo" },
+ new int[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+ new int[] { 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8 },
+ new int[] { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 });
+ }
+
+ /** restrict the output to a set of possible origin languages */
+ [Test]
+ public void TestLanguageSet()
+ {
+ LanguageSet languages = LanguageSet.From(new HashSet<String>() {
+ "italian", "greek", "spanish"
+ });
+ Analyzer analyzer = Analyzer.NewAnonymous(createComponents: (fieldName, reader) =>
+ {
+ Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ return new TokenStreamComponents(tokenizer,
+ new BeiderMorseFilter(tokenizer,
+ new PhoneticEngine(NameType.GENERIC, RuleType.EXACT, true), languages));
+ });
+
+ AssertAnalyzesTo(analyzer, "Angelo",
+ new String[] { "andZelo", "angelo", "anxelo" },
+ new int[] { 0, 0, 0, },
+ new int[] { 6, 6, 6, },
+ new int[] { 1, 0, 0, });
+ }
+
+ /** for convenience, if the input yields no output, we pass it thru as-is */
+ [Test]
+ public void TestNumbers()
+ {
+ AssertAnalyzesTo(analyzer, "1234",
+ new String[] { "1234" },
+ new int[] { 0 },
+ new int[] { 4 },
+ new int[] { 1 });
+ }
+
+ [Test]
+ public void TestRandom()
+ {
+ CheckRandomData(Random(), analyzer, 1000 * RANDOM_MULTIPLIER);
+ }
+
+ [Test]
+ public void TestEmptyTerm()
+ {
+ Analyzer a = Analyzer.NewAnonymous(createComponents: (fieldName, reader) =>
+ {
+ Tokenizer tokenizer = new KeywordTokenizer(reader);
+ return new TokenStreamComponents(tokenizer, new BeiderMorseFilter(tokenizer, new PhoneticEngine(NameType.GENERIC, RuleType.EXACT, true)));
+ });
+
+ CheckOneTerm(a, "", "");
+ }
+
+ [Test]
+ public void TestCustomAttribute()
+ {
+ TokenStream stream = new KeywordTokenizer(new StringReader("D'Angelo"));
+ stream = new PatternKeywordMarkerFilter(stream, new Regex(".*"));
+ stream = new BeiderMorseFilter(stream, new PhoneticEngine(NameType.GENERIC, RuleType.EXACT, true));
+ IKeywordAttribute keyAtt = stream.AddAttribute<IKeywordAttribute>();
+ stream.Reset();
+ int i = 0;
+ while (stream.IncrementToken())
+ {
+ assertTrue(keyAtt.IsKeyword);
+ i++;
+ }
+ assertEquals(12, i);
+ stream.End();
+ stream.Dispose();
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Tests.Analysis.Phonetic/TestBeiderMorseFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Phonetic/TestBeiderMorseFilterFactory.cs b/src/Lucene.Net.Tests.Analysis.Phonetic/TestBeiderMorseFilterFactory.cs
new file mode 100644
index 0000000..5bdf1b7
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Phonetic/TestBeiderMorseFilterFactory.cs
@@ -0,0 +1,89 @@
+using Lucene.Net.Support;
+using NUnit.Framework;
+using System;
+using System.Collections.Generic;
+using System.IO;
+
+namespace Lucene.Net.Analysis.Phonetic
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// Simple tests for <see cref="BeiderMorseFilterFactory"/>
+ /// </summary>
+ public class TestBeiderMorseFilterFactory : BaseTokenStreamTestCase
+ {
+ [Test]
+ public void TestBasics()
+ {
+ BeiderMorseFilterFactory factory = new BeiderMorseFilterFactory(new Dictionary<String, String>());
+ TokenStream ts = factory.Create(new MockTokenizer(new StringReader("Weinberg"), MockTokenizer.WHITESPACE, false));
+ AssertTokenStreamContents(ts,
+ new String[] { "vDnbirk", "vanbirk", "vinbirk", "wDnbirk", "wanbirk", "winbirk" },
+ new int[] { 0, 0, 0, 0, 0, 0 },
+ new int[] { 8, 8, 8, 8, 8, 8 },
+ new int[] { 1, 0, 0, 0, 0, 0 });
+ }
+
+ [Test]
+ public void TestLanguageSet()
+ {
+ IDictionary<String, String> args = new Dictionary<string, string>();
+ args.Put("languageSet", "polish");
+ BeiderMorseFilterFactory factory = new BeiderMorseFilterFactory(args);
+ TokenStream ts = factory.Create(new MockTokenizer(new StringReader("Weinberg"), MockTokenizer.WHITESPACE, false));
+ AssertTokenStreamContents(ts,
+ new String[] { "vDmbYrk", "vDmbirk", "vambYrk", "vambirk", "vimbYrk", "vimbirk" },
+ new int[] { 0, 0, 0, 0, 0, 0 },
+ new int[] { 8, 8, 8, 8, 8, 8 },
+ new int[] { 1, 0, 0, 0, 0, 0 });
+ }
+
+ [Test]
+ public void TestOptions()
+ {
+ IDictionary<String, String> args = new Dictionary<string, string>();
+ args.Put("nameType", "ASHKENAZI");
+ args.Put("ruleType", "EXACT");
+ BeiderMorseFilterFactory factory = new BeiderMorseFilterFactory(args);
+ TokenStream ts = factory.Create(new MockTokenizer(new StringReader("Weinberg"), MockTokenizer.WHITESPACE, false));
+ AssertTokenStreamContents(ts,
+ new String[] { "vajnberk" },
+ new int[] { 0 },
+ new int[] { 8 },
+ new int[] { 1 });
+ }
+
+ /** Test that bogus arguments result in exception */
+ [Test]
+ public void TestBogusArguments()
+ {
+ try
+ {
+ new BeiderMorseFilterFactory(new Dictionary<String, String>() {
+ { "bogusArg", "bogusValue" }
+ });
+ fail();
+ }
+ catch (ArgumentException expected)
+ {
+ assertTrue(expected.Message.Contains("Unknown parameters"));
+ }
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Tests.Analysis.Phonetic/TestDoubleMetaphoneFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Phonetic/TestDoubleMetaphoneFilterFactory.cs b/src/Lucene.Net.Tests.Analysis.Phonetic/TestDoubleMetaphoneFilterFactory.cs
new file mode 100644
index 0000000..5ba337b
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Phonetic/TestDoubleMetaphoneFilterFactory.cs
@@ -0,0 +1,70 @@
+using NUnit.Framework;
+using System;
+using System.Collections.Generic;
+using System.IO;
+
+namespace Lucene.Net.Analysis.Phonetic
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ public class TestDoubleMetaphoneFilterFactory : BaseTokenStreamTestCase
+ {
+ [Test]
+ public void TestDefaults()
+ {
+ DoubleMetaphoneFilterFactory factory = new DoubleMetaphoneFilterFactory(new Dictionary<String, String>());
+ TokenStream inputStream = new MockTokenizer(new StringReader("international"), MockTokenizer.WHITESPACE, false);
+
+ TokenStream filteredStream = factory.Create(inputStream);
+ assertEquals(typeof(DoubleMetaphoneFilter), filteredStream.GetType());
+ AssertTokenStreamContents(filteredStream, new String[] { "international", "ANTR" });
+ }
+
+ [Test]
+ public void TestSettingSizeAndInject()
+ {
+ IDictionary<string, string> parameters = new Dictionary<string, string>();
+ parameters["inject"] = "false";
+ parameters["maxCodeLength"] = "8";
+ DoubleMetaphoneFilterFactory factory = new DoubleMetaphoneFilterFactory(parameters);
+
+ TokenStream inputStream = new MockTokenizer(new StringReader("international"), MockTokenizer.WHITESPACE, false);
+
+ TokenStream filteredStream = factory.Create(inputStream);
+ assertEquals(typeof(DoubleMetaphoneFilter), filteredStream.GetType());
+ AssertTokenStreamContents(filteredStream, new String[] { "ANTRNXNL" });
+ }
+
+ /** Test that bogus arguments result in exception */
+ [Test]
+ public void TestBogusArguments()
+ {
+ try
+ {
+ new DoubleMetaphoneFilterFactory(new Dictionary<String, String>() {
+ { "bogusArg", "bogusValue" }
+ });
+ fail();
+ }
+ catch (ArgumentException expected)
+ {
+ assertTrue(expected.Message.Contains("Unknown parameters"));
+ }
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Tests.Analysis.Phonetic/TestPhoneticFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Phonetic/TestPhoneticFilter.cs b/src/Lucene.Net.Tests.Analysis.Phonetic/TestPhoneticFilter.cs
new file mode 100644
index 0000000..387765f
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Phonetic/TestPhoneticFilter.cs
@@ -0,0 +1,122 @@
+using Lucene.Net.Analysis.Core;
+using Lucene.Net.Analysis.Phonetic.Language;
+using NUnit.Framework;
+using System;
+using System.IO;
+
+namespace Lucene.Net.Analysis.Phonetic
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// Tests <see cref="PhoneticFilter"/>
+ /// </summary>
+ public class TestPhoneticFilter : BaseTokenStreamTestCase
+ {
+ [Test]
+ public void TestAlgorithms()
+ {
+ assertAlgorithm(new Metaphone(), true, "aaa bbb ccc easgasg",
+ new String[] { "A", "aaa", "B", "bbb", "KKK", "ccc", "ESKS", "easgasg" });
+ assertAlgorithm(new Metaphone(), false, "aaa bbb ccc easgasg",
+ new String[] { "A", "B", "KKK", "ESKS" });
+
+
+ assertAlgorithm(new DoubleMetaphone(), true, "aaa bbb ccc easgasg",
+ new String[] { "A", "aaa", "PP", "bbb", "KK", "ccc", "ASKS", "easgasg" });
+ assertAlgorithm(new DoubleMetaphone(), false, "aaa bbb ccc easgasg",
+ new String[] { "A", "PP", "KK", "ASKS" });
+
+
+ assertAlgorithm(new Soundex(), true, "aaa bbb ccc easgasg",
+ new String[] { "A000", "aaa", "B000", "bbb", "C000", "ccc", "E220", "easgasg" });
+ assertAlgorithm(new Soundex(), false, "aaa bbb ccc easgasg",
+ new String[] { "A000", "B000", "C000", "E220" });
+
+
+ assertAlgorithm(new RefinedSoundex(), true, "aaa bbb ccc easgasg",
+ new String[] { "A0", "aaa", "B1", "bbb", "C3", "ccc", "E034034", "easgasg" });
+ assertAlgorithm(new RefinedSoundex(), false, "aaa bbb ccc easgasg",
+ new String[] { "A0", "B1", "C3", "E034034" });
+
+
+ assertAlgorithm(new Caverphone2(), true, "Darda Karleen Datha Carlene",
+ new String[] { "TTA1111111", "Darda", "KLN1111111", "Karleen",
+ "TTA1111111", "Datha", "KLN1111111", "Carlene" });
+ assertAlgorithm(new Caverphone2(), false, "Darda Karleen Datha Carlene",
+ new String[] { "TTA1111111", "KLN1111111", "TTA1111111", "KLN1111111" });
+ }
+
+
+ static void assertAlgorithm(IStringEncoder encoder, bool inject, String input,
+ String[] expected)
+ {
+ Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT,
+ new StringReader(input));
+ PhoneticFilter filter = new PhoneticFilter(tokenizer, encoder, inject);
+ AssertTokenStreamContents(filter, expected);
+ }
+
+ /** blast some random strings through the analyzer */
+ [Test]
+ public void TestRandomStrings()
+ {
+ IStringEncoder[] encoders = new IStringEncoder[] {
+ new Metaphone(), new DoubleMetaphone(), new Soundex()/*, new RefinedSoundex()*/, new Caverphone2()
+ };
+
+ foreach (IStringEncoder e in encoders)
+ {
+ Analyzer a = Analyzer.NewAnonymous(createComponents: (fieldName, reader) =>
+ {
+ Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ return new TokenStreamComponents(tokenizer, new PhoneticFilter(tokenizer, e, false));
+ });
+
+ CheckRandomData(Random(), a, 1000 * RANDOM_MULTIPLIER);
+
+ Analyzer b = Analyzer.NewAnonymous(createComponents: (fieldName, reader) =>
+ {
+ Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ return new TokenStreamComponents(tokenizer, new PhoneticFilter(tokenizer, e, false));
+ });
+
+
+ CheckRandomData(Random(), b, 1000 * RANDOM_MULTIPLIER);
+ }
+ }
+
+ [Test]
+ public void TestEmptyTerm()
+ {
+ IStringEncoder[] encoders = new IStringEncoder[] {
+ new Metaphone(), new DoubleMetaphone(), new Soundex()/*, new RefinedSoundex()*/, new Caverphone2()
+ };
+ foreach (IStringEncoder e in encoders)
+ {
+ Analyzer a = Analyzer.NewAnonymous(createComponents: (fieldName, reader) =>
+ {
+ Tokenizer tokenizer = new KeywordTokenizer(reader);
+ return new TokenStreamComponents(tokenizer, new PhoneticFilter(tokenizer, e, Random().nextBoolean()));
+ });
+
+ CheckOneTerm(a, "", "");
+ }
+ }
+ }
+}