You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by ni...@apache.org on 2017/06/27 20:33:46 UTC

[01/15] lucenenet git commit: API: Lucene.Net.Analysis.Common.Analysis.Util.AbstractAnalysisFactory: Changed return type of GetSet from ICollection to ISet.

Repository: lucenenet
Updated Branches:
  refs/heads/master 5f1b2e0cf -> c99735650


API: Lucene.Net.Analysis.Common.Analysis.Util.AbstractAnalysisFactory: Changed return type of GetSet from ICollection to ISet.


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/368424f6
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/368424f6
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/368424f6

Branch: refs/heads/master
Commit: 368424f6678469109482f7eaf498329edaa061e9
Parents: 5f1b2e0
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Wed Jun 28 02:32:46 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Wed Jun 28 02:32:46 2017 +0700

----------------------------------------------------------------------
 .../Analysis/Util/AbstractAnalysisFactory.cs                       | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/368424f6/src/Lucene.Net.Analysis.Common/Analysis/Util/AbstractAnalysisFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/AbstractAnalysisFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/AbstractAnalysisFactory.cs
index e4b5133..ec93dc4 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Util/AbstractAnalysisFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/AbstractAnalysisFactory.cs
@@ -280,7 +280,7 @@ namespace Lucene.Net.Analysis.Util
 
         /// <summary>
         /// Returns whitespace- and/or comma-separated set of values, or null if none are found </summary>
-        public virtual ICollection<string> GetSet(IDictionary<string, string> args, string name)
+        public virtual ISet<string> GetSet(IDictionary<string, string> args, string name)
         {
             string s;
             if (args.TryGetValue(name, out s))


[14/15] lucenenet git commit: Added Lucene.Net.Analysis.Phonetic + tests. Rather than porting over the entire commons-codec library, only the language features were ported and added to this library.

Posted by ni...@apache.org.
Added Lucene.Net.Analysis.Phonetic + tests. Rather than porting over the entire commons-codec library, only the language features were ported and added to this library.


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/1ee3a9cc
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/1ee3a9cc
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/1ee3a9cc

Branch: refs/heads/master
Commit: 1ee3a9ccad8b8da64d54d19eac5d40beb7e48ca3
Parents: 368424f
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Wed Jun 28 02:34:21 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Wed Jun 28 02:54:54 2017 +0700

----------------------------------------------------------------------
 CONTRIBUTING.md                                 |    2 +-
 Lucene.Net.Portable.sln                         |   36 +-
 Lucene.Net.sln                                  |   52 +
 .../BeiderMorseFilter.cs                        |  137 ++
 .../BeiderMorseFilterFactory.cs                 |   71 +
 .../DoubleMetaphoneFilter.cs                    |  126 ++
 .../DoubleMetaphoneFilterFactory.cs             |   67 +
 .../Language/AbstractCaverphone .cs             |   78 ++
 .../Language/Bm/BeiderMorseEncoder.cs           |  163 +++
 .../Language/Bm/Lang.cs                         |  276 ++++
 .../Language/Bm/Languages.cs                    |  324 +++++
 .../Language/Bm/NameType.cs                     |   69 +
 .../Language/Bm/PhoneticEngine.cs               |  578 ++++++++
 .../Language/Bm/ResourceConstants.cs            |   37 +
 .../Language/Bm/Rule.cs                         | 1069 +++++++++++++++
 .../Language/Bm/RuleType.cs                     |   68 +
 .../Language/Bm/ash_approx_any.txt              |  153 +++
 .../Language/Bm/ash_approx_common.txt           |  219 +++
 .../Language/Bm/ash_approx_cyrillic.txt         |   18 +
 .../Language/Bm/ash_approx_english.txt          |   47 +
 .../Language/Bm/ash_approx_french.txt           |   40 +
 .../Language/Bm/ash_approx_german.txt           |   72 +
 .../Language/Bm/ash_approx_hebrew.txt           |   18 +
 .../Language/Bm/ash_approx_hungarian.txt        |   18 +
 .../Language/Bm/ash_approx_polish.txt           |   83 ++
 .../Language/Bm/ash_approx_romanian.txt         |   18 +
 .../Language/Bm/ash_approx_russian.txt          |   47 +
 .../Language/Bm/ash_approx_spanish.txt          |   18 +
 .../Language/Bm/ash_exact_any.txt               |   52 +
 .../Language/Bm/ash_exact_approx_common.txt     |   82 ++
 .../Language/Bm/ash_exact_common.txt            |   33 +
 .../Language/Bm/ash_exact_cyrillic.txt          |   18 +
 .../Language/Bm/ash_exact_english.txt           |   18 +
 .../Language/Bm/ash_exact_french.txt            |   18 +
 .../Language/Bm/ash_exact_german.txt            |   18 +
 .../Language/Bm/ash_exact_hebrew.txt            |   18 +
 .../Language/Bm/ash_exact_hungarian.txt         |   18 +
 .../Language/Bm/ash_exact_polish.txt            |   23 +
 .../Language/Bm/ash_exact_romanian.txt          |   18 +
 .../Language/Bm/ash_exact_russian.txt           |   19 +
 .../Language/Bm/ash_exact_spanish.txt           |   18 +
 .../Language/Bm/ash_hebrew_common.txt           |  118 ++
 .../Language/Bm/ash_languages.txt               |   28 +
 .../Language/Bm/ash_rules_any.txt               |  332 +++++
 .../Language/Bm/ash_rules_cyrillic.txt          |  100 ++
 .../Language/Bm/ash_rules_english.txt           |  107 ++
 .../Language/Bm/ash_rules_french.txt            |   91 ++
 .../Language/Bm/ash_rules_german.txt            |  128 ++
 .../Language/Bm/ash_rules_hebrew.txt            |   62 +
 .../Language/Bm/ash_rules_hungarian.txt         |   84 ++
 .../Language/Bm/ash_rules_polish.txt            |  185 +++
 .../Language/Bm/ash_rules_romanian.txt          |   66 +
 .../Language/Bm/ash_rules_russian.txt           |  164 +++
 .../Language/Bm/ash_rules_spanish.txt           |   77 ++
 .../Language/Bm/gen_approx_any.txt              |  124 ++
 .../Language/Bm/gen_approx_arabic.txt           |   23 +
 .../Language/Bm/gen_approx_common.txt           |  223 +++
 .../Language/Bm/gen_approx_cyrillic.txt         |   18 +
 .../Language/Bm/gen_approx_czech.txt            |   18 +
 .../Language/Bm/gen_approx_dutch.txt            |   18 +
 .../Language/Bm/gen_approx_english.txt          |   47 +
 .../Language/Bm/gen_approx_french.txt           |   25 +
 .../Language/Bm/gen_approx_german.txt           |   73 +
 .../Language/Bm/gen_approx_greek.txt            |   18 +
 .../Language/Bm/gen_approx_greeklatin.txt       |   20 +
 .../Language/Bm/gen_approx_hebrew.txt           |   18 +
 .../Language/Bm/gen_approx_hungarian.txt        |   18 +
 .../Language/Bm/gen_approx_italian.txt          |   18 +
 .../Language/Bm/gen_approx_polish.txt           |   84 ++
 .../Language/Bm/gen_approx_portuguese.txt       |   18 +
 .../Language/Bm/gen_approx_romanian.txt         |   18 +
 .../Language/Bm/gen_approx_russian.txt          |   48 +
 .../Language/Bm/gen_approx_spanish.txt          |   21 +
 .../Language/Bm/gen_approx_turkish.txt          |   18 +
 .../Language/Bm/gen_exact_any.txt               |   33 +
 .../Language/Bm/gen_exact_approx_common.txt     |   79 ++
 .../Language/Bm/gen_exact_arabic.txt            |   18 +
 .../Language/Bm/gen_exact_common.txt            |   32 +
 .../Language/Bm/gen_exact_cyrillic.txt          |   18 +
 .../Language/Bm/gen_exact_czech.txt             |   18 +
 .../Language/Bm/gen_exact_dutch.txt             |   18 +
 .../Language/Bm/gen_exact_english.txt           |   18 +
 .../Language/Bm/gen_exact_french.txt            |   18 +
 .../Language/Bm/gen_exact_german.txt            |   18 +
 .../Language/Bm/gen_exact_greek.txt             |   18 +
 .../Language/Bm/gen_exact_greeklatin.txt        |   18 +
 .../Language/Bm/gen_exact_hebrew.txt            |   18 +
 .../Language/Bm/gen_exact_hungarian.txt         |   18 +
 .../Language/Bm/gen_exact_italian.txt           |   18 +
 .../Language/Bm/gen_exact_polish.txt            |   23 +
 .../Language/Bm/gen_exact_portuguese.txt        |   18 +
 .../Language/Bm/gen_exact_romanian.txt          |   18 +
 .../Language/Bm/gen_exact_russian.txt           |   19 +
 .../Language/Bm/gen_exact_spanish.txt           |   19 +
 .../Language/Bm/gen_exact_turkish.txt           |   18 +
 .../Language/Bm/gen_hebrew_common.txt           |  107 ++
 .../Language/Bm/gen_languages.txt               |   36 +
 .../Language/Bm/gen_rules_any.txt               |  367 +++++
 .../Language/Bm/gen_rules_arabic.txt            |   74 +
 .../Language/Bm/gen_rules_cyrillic.txt          |   99 ++
 .../Language/Bm/gen_rules_czech.txt             |   67 +
 .../Language/Bm/gen_rules_dutch.txt             |   78 ++
 .../Language/Bm/gen_rules_english.txt           |  113 ++
 .../Language/Bm/gen_rules_french.txt            |  114 ++
 .../Language/Bm/gen_rules_german.txt            |  129 ++
 .../Language/Bm/gen_rules_greek.txt             |   97 ++
 .../Language/Bm/gen_rules_greeklatin.txt        |  118 ++
 .../Language/Bm/gen_rules_hebrew.txt            |   62 +
 .../Language/Bm/gen_rules_hungarian.txt         |   83 ++
 .../Language/Bm/gen_rules_italian.txt           |   77 ++
 .../Language/Bm/gen_rules_polish.txt            |  185 +++
 .../Language/Bm/gen_rules_portuguese.txt        |  105 ++
 .../Language/Bm/gen_rules_romanian.txt          |   64 +
 .../Language/Bm/gen_rules_russian.txt           |  142 ++
 .../Language/Bm/gen_rules_spanish.txt           |   85 ++
 .../Language/Bm/gen_rules_turkish.txt           |   50 +
 .../Language/Bm/lang.txt                        |  293 ++++
 .../Language/Bm/sep_approx_any.txt              |   20 +
 .../Language/Bm/sep_approx_common.txt           |  115 ++
 .../Language/Bm/sep_approx_french.txt           |   18 +
 .../Language/Bm/sep_approx_hebrew.txt           |   18 +
 .../Language/Bm/sep_approx_italian.txt          |   18 +
 .../Language/Bm/sep_approx_portuguese.txt       |   18 +
 .../Language/Bm/sep_approx_spanish.txt          |   18 +
 .../Language/Bm/sep_exact_any.txt               |   18 +
 .../Language/Bm/sep_exact_approx_common.txt     |   79 ++
 .../Language/Bm/sep_exact_common.txt            |   32 +
 .../Language/Bm/sep_exact_french.txt            |   18 +
 .../Language/Bm/sep_exact_hebrew.txt            |   18 +
 .../Language/Bm/sep_exact_italian.txt           |   18 +
 .../Language/Bm/sep_exact_portuguese.txt        |   18 +
 .../Language/Bm/sep_exact_spanish.txt           |   18 +
 .../Language/Bm/sep_hebrew_common.txt           |   86 ++
 .../Language/Bm/sep_languages.txt               |   23 +
 .../Language/Bm/sep_rules_any.txt               |  155 +++
 .../Language/Bm/sep_rules_french.txt            |   91 ++
 .../Language/Bm/sep_rules_hebrew.txt            |   62 +
 .../Language/Bm/sep_rules_italian.txt           |   76 ++
 .../Language/Bm/sep_rules_portuguese.txt        |  104 ++
 .../Language/Bm/sep_rules_spanish.txt           |   95 ++
 .../Language/Caverphone1.cs                     |  131 ++
 .../Language/Caverphone2.cs                     |  133 ++
 .../Language/ColognePhonetic.cs                 |  501 +++++++
 .../Language/DaitchMokotoffSoundex.cs           |  620 +++++++++
 .../Language/DoubleMetaphone.cs                 | 1280 +++++++++++++++++
 .../Language/MatchRatingApproachEncoder.cs      |  425 ++++++
 .../Language/Metaphone.cs                       |  494 +++++++
 .../Language/Nysiis.cs                          |  370 +++++
 .../Language/RefinedSoundex.cs                  |  202 +++
 .../Language/Soundex.cs                         |  318 +++++
 .../Language/SoundexUtils.cs                    |  123 ++
 .../Language/StringEncoder.cs                   |   35 +
 .../Language/dmrules.txt                        |  200 +++
 .../Lucene.Net.Analysis.Phonetic.csproj         |  225 +++
 .../Lucene.Net.Analysis.Phonetic.project.json   |    8 +
 .../Lucene.Net.Analysis.Phonetic.xproj          |   19 +
 .../PhoneticFilter.cs                           |  109 ++
 .../PhoneticFilterFactory.cs                    |  187 +++
 .../Properties/AssemblyInfo.cs                  |   48 +
 src/Lucene.Net.Analysis.Phonetic/project.json   |   54 +
 .../DoubleMetaphoneFilterTest.cs                |  111 ++
 .../Language/Bm/BeiderMorseEncoderTest.cs       |  255 ++++
 .../Bm/CacheSubSequencePerformanceTest.cs       |  138 ++
 .../Language/Bm/LanguageGuessingTest.cs         |   84 ++
 .../Bm/PhoneticEnginePerformanceTest.cs         |  141 ++
 .../Language/Bm/PhoneticEngineRegressionTest.cs |  234 ++++
 .../Language/Bm/PhoneticEngineTest.cs           |   89 ++
 .../Language/Bm/RuleTest.cs                     |  163 +++
 .../Language/Caverphone1Test.cs                 |  109 ++
 .../Language/Caverphone2Test .cs                |  375 +++++
 .../Language/ColognePhoneticTest.cs             |  171 +++
 .../Language/DaitchMokotoffSoundexTest.cs       |  176 +++
 .../Language/DoubleMetaphone2Test.cs            | 1291 ++++++++++++++++++
 .../Language/DoubleMetaphoneTest.cs             | 1266 +++++++++++++++++
 .../Language/MatchRatingApproachEncoderTest.cs  |  609 +++++++++
 .../Language/MetaphoneTest.cs                   |  518 +++++++
 .../Language/NysiisTest.cs                      |  319 +++++
 .../Language/RefinedSoundexTest.cs              |   99 ++
 .../Language/SoundexTest.cs                     |  424 ++++++
 .../Language/StringEncoderAbstractTest.cs       |  164 +++
 .../Lucene.Net.Tests.Analysis.Phonetic.csproj   |  108 ++
 ...ene.Net.Tests.Analysis.Phonetic.project.json |   11 +
 .../Lucene.Net.Tests.Analysis.Phonetic.xproj    |   21 +
 .../Properties/AssemblyInfo.cs                  |   42 +
 .../TestBeiderMorseFilter.cs                    |  132 ++
 .../TestBeiderMorseFilterFactory.cs             |   89 ++
 .../TestDoubleMetaphoneFilterFactory.cs         |   70 +
 .../TestPhoneticFilter.cs                       |  122 ++
 .../TestPhoneticFilterFactory.cs                |  228 ++++
 .../project.json                                |   45 +
 190 files changed, 23900 insertions(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/CONTRIBUTING.md
----------------------------------------------------------------------
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 4e00086..95019c5 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -55,7 +55,7 @@ helpers to help with that, see for examples see our [Java style methods to avoid
 * [Lucene.Net.Analysis.ICU](https://github.com/apache/lucene-solr/tree/releases/lucene-solr/4.8.0/lucene/analysis/icu) (note that we will be putting this functionality into the Lucene.Net.ICU package)
 * [Lucene.Net.Analysis.Kuromoji](https://github.com/apache/lucene-solr/tree/releases/lucene-solr/4.8.0/lucene/analysis/kuromoji)
 
-There are a few other specialized Analysis packages ([Morfologik](https://github.com/apache/lucene-solr/tree/releases/lucene-solr/4.8.0/lucene/analysis/morfologik), [Phonetic](https://github.com/apache/lucene-solr/tree/releases/lucene-solr/4.8.0/lucene/analysis/phonetic), [UIMA](https://github.com/apache/lucene-solr/tree/releases/lucene-solr/4.8.0/lucene/analysis/uima)) that have dependencies that would also need to be ported if they don't exist in .NET yet.
+There are a few other specialized packages ([Analysis.Morfologik](https://github.com/apache/lucene-solr/tree/releases/lucene-solr/4.8.0/lucene/analysis/morfologik), [Analysis.UIMA](https://github.com/apache/lucene-solr/tree/releases/lucene-solr/4.8.0/lucene/analysis/uima), [Benchmark](https://github.com/apache/lucene-solr/tree/releases/lucene-solr/4.8.0/lucene/benchmark)) that have dependencies that would also need to be ported if they don't exist in .NET yet.
 
 There are several command-line utilities for tasks such as maintaining indexes that just need to be put into a console application and "usage" documentation updated for them to be useful (which might be helpful for those who don't want to install Java to run such utilities from the Lucene project). See the [JIRA Issues](https://issues.apache.org/jira/issues/?jql=project%20%3D%20LUCENENET%20AND%20status%20%3D%20Open%20AND%20text%20~%20%22CLI%22) for the current list.
 

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/Lucene.Net.Portable.sln
----------------------------------------------------------------------
diff --git a/Lucene.Net.Portable.sln b/Lucene.Net.Portable.sln
index d3678ee..0b7f53e 100644
--- a/Lucene.Net.Portable.sln
+++ b/Lucene.Net.Portable.sln
@@ -89,6 +89,10 @@ Project("{8BB2217D-0F2D-49D1-97BC-3654ED321F3B}") = "Lucene.Net.Analysis.SmartCn
 EndProject
 Project("{8BB2217D-0F2D-49D1-97BC-3654ED321F3B}") = "Lucene.Net.Tests.Analysis.SmartCn", "src\Lucene.Net.Tests.Analysis.SmartCn\Lucene.Net.Tests.Analysis.SmartCn.xproj", "{2870FB52-1239-493F-A0BE-951660194A66}"
 EndProject
+Project("{8BB2217D-0F2D-49D1-97BC-3654ED321F3B}") = "Lucene.Net.Analysis.Phonetic", "src\Lucene.Net.Analysis.Phonetic\Lucene.Net.Analysis.Phonetic.xproj", "{56B2FFB7-6870-4420-8BC7-187ADF5341D9}"
+EndProject
+Project("{8BB2217D-0F2D-49D1-97BC-3654ED321F3B}") = "Lucene.Net.Tests.Analysis.Phonetic", "src\Lucene.Net.Tests.Analysis.Phonetic\Lucene.Net.Tests.Analysis.Phonetic.xproj", "{1FE12EF7-4C89-4D49-BDD1-E49DC285F21B}"
+EndProject
 Global
 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
 		Debug|Any CPU = Debug|Any CPU
@@ -393,14 +397,6 @@ Global
 		{32FD3471-E862-4055-B969-79C12A656366}.Release|Any CPU.Build.0 = Release|Any CPU
 		{32FD3471-E862-4055-B969-79C12A656366}.Release|x86.ActiveCfg = Release|Any CPU
 		{32FD3471-E862-4055-B969-79C12A656366}.Release|x86.Build.0 = Release|Any CPU
-		{2870FB52-1239-493F-A0BE-951660194A66}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
-		{2870FB52-1239-493F-A0BE-951660194A66}.Debug|Any CPU.Build.0 = Debug|Any CPU
-		{2870FB52-1239-493F-A0BE-951660194A66}.Debug|x86.ActiveCfg = Debug|Any CPU
-		{2870FB52-1239-493F-A0BE-951660194A66}.Debug|x86.Build.0 = Debug|Any CPU
-		{2870FB52-1239-493F-A0BE-951660194A66}.Release|Any CPU.ActiveCfg = Release|Any CPU
-		{2870FB52-1239-493F-A0BE-951660194A66}.Release|Any CPU.Build.0 = Release|Any CPU
-		{2870FB52-1239-493F-A0BE-951660194A66}.Release|x86.ActiveCfg = Release|Any CPU
-		{2870FB52-1239-493F-A0BE-951660194A66}.Release|x86.Build.0 = Release|Any CPU
 		{A400916E-DCB8-4A16-BE83-91891C05191F}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{A400916E-DCB8-4A16-BE83-91891C05191F}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{A400916E-DCB8-4A16-BE83-91891C05191F}.Debug|x86.ActiveCfg = Debug|Any CPU
@@ -409,6 +405,30 @@ Global
 		{A400916E-DCB8-4A16-BE83-91891C05191F}.Release|Any CPU.Build.0 = Release|Any CPU
 		{A400916E-DCB8-4A16-BE83-91891C05191F}.Release|x86.ActiveCfg = Release|Any CPU
 		{A400916E-DCB8-4A16-BE83-91891C05191F}.Release|x86.Build.0 = Release|Any CPU
+		{2870FB52-1239-493F-A0BE-951660194A66}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{2870FB52-1239-493F-A0BE-951660194A66}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{2870FB52-1239-493F-A0BE-951660194A66}.Debug|x86.ActiveCfg = Debug|Any CPU
+		{2870FB52-1239-493F-A0BE-951660194A66}.Debug|x86.Build.0 = Debug|Any CPU
+		{2870FB52-1239-493F-A0BE-951660194A66}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{2870FB52-1239-493F-A0BE-951660194A66}.Release|Any CPU.Build.0 = Release|Any CPU
+		{2870FB52-1239-493F-A0BE-951660194A66}.Release|x86.ActiveCfg = Release|Any CPU
+		{2870FB52-1239-493F-A0BE-951660194A66}.Release|x86.Build.0 = Release|Any CPU
+		{56B2FFB7-6870-4420-8BC7-187ADF5341D9}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{56B2FFB7-6870-4420-8BC7-187ADF5341D9}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{56B2FFB7-6870-4420-8BC7-187ADF5341D9}.Debug|x86.ActiveCfg = Debug|Any CPU
+		{56B2FFB7-6870-4420-8BC7-187ADF5341D9}.Debug|x86.Build.0 = Debug|Any CPU
+		{56B2FFB7-6870-4420-8BC7-187ADF5341D9}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{56B2FFB7-6870-4420-8BC7-187ADF5341D9}.Release|Any CPU.Build.0 = Release|Any CPU
+		{56B2FFB7-6870-4420-8BC7-187ADF5341D9}.Release|x86.ActiveCfg = Release|Any CPU
+		{56B2FFB7-6870-4420-8BC7-187ADF5341D9}.Release|x86.Build.0 = Release|Any CPU
+		{1FE12EF7-4C89-4D49-BDD1-E49DC285F21B}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{1FE12EF7-4C89-4D49-BDD1-E49DC285F21B}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{1FE12EF7-4C89-4D49-BDD1-E49DC285F21B}.Debug|x86.ActiveCfg = Debug|Any CPU
+		{1FE12EF7-4C89-4D49-BDD1-E49DC285F21B}.Debug|x86.Build.0 = Debug|Any CPU
+		{1FE12EF7-4C89-4D49-BDD1-E49DC285F21B}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{1FE12EF7-4C89-4D49-BDD1-E49DC285F21B}.Release|Any CPU.Build.0 = Release|Any CPU
+		{1FE12EF7-4C89-4D49-BDD1-E49DC285F21B}.Release|x86.ActiveCfg = Release|Any CPU
+		{1FE12EF7-4C89-4D49-BDD1-E49DC285F21B}.Release|x86.Build.0 = Release|Any CPU
 	EndGlobalSection
 	GlobalSection(SolutionProperties) = preSolution
 		HideSolutionNode = FALSE

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/Lucene.Net.sln
----------------------------------------------------------------------
diff --git a/Lucene.Net.sln b/Lucene.Net.sln
index be5b2b9..74a64a3 100644
--- a/Lucene.Net.sln
+++ b/Lucene.Net.sln
@@ -98,6 +98,10 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Lucene.Net.Analysis.SmartCn
 EndProject
 Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Lucene.Net.Tests.Analysis.SmartCn", "src\Lucene.Net.Tests.Analysis.SmartCn\Lucene.Net.Tests.Analysis.SmartCn.csproj", "{8C8D78D3-BFFD-4301-953B-FE5350B2AEEB}"
 EndProject
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Lucene.Net.Analysis.Phonetic", "src\Lucene.Net.Analysis.Phonetic\Lucene.Net.Analysis.Phonetic.csproj", "{DAFE3B64-616A-4A2F-90E5-1F135E8A9AF5}"
+EndProject
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Lucene.Net.Tests.Analysis.Phonetic", "src\Lucene.Net.Tests.Analysis.Phonetic\Lucene.Net.Tests.Analysis.Phonetic.csproj", "{A2867797-0A5D-4878-8F59-58C399C9A4E4}"
+EndProject
 Global
 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
 		Debug|Any CPU = Debug|Any CPU
@@ -955,6 +959,54 @@ Global
 		{8C8D78D3-BFFD-4301-953B-FE5350B2AEEB}.Release35|Mixed Platforms.Build.0 = Release|Any CPU
 		{8C8D78D3-BFFD-4301-953B-FE5350B2AEEB}.Release35|x86.ActiveCfg = Release|Any CPU
 		{8C8D78D3-BFFD-4301-953B-FE5350B2AEEB}.Release35|x86.Build.0 = Release|Any CPU
+		{DAFE3B64-616A-4A2F-90E5-1F135E8A9AF5}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{DAFE3B64-616A-4A2F-90E5-1F135E8A9AF5}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{DAFE3B64-616A-4A2F-90E5-1F135E8A9AF5}.Debug|Mixed Platforms.ActiveCfg = Debug|Any CPU
+		{DAFE3B64-616A-4A2F-90E5-1F135E8A9AF5}.Debug|Mixed Platforms.Build.0 = Debug|Any CPU
+		{DAFE3B64-616A-4A2F-90E5-1F135E8A9AF5}.Debug|x86.ActiveCfg = Debug|Any CPU
+		{DAFE3B64-616A-4A2F-90E5-1F135E8A9AF5}.Debug|x86.Build.0 = Debug|Any CPU
+		{DAFE3B64-616A-4A2F-90E5-1F135E8A9AF5}.Debug35|Any CPU.ActiveCfg = Debug|Any CPU
+		{DAFE3B64-616A-4A2F-90E5-1F135E8A9AF5}.Debug35|Any CPU.Build.0 = Debug|Any CPU
+		{DAFE3B64-616A-4A2F-90E5-1F135E8A9AF5}.Debug35|Mixed Platforms.ActiveCfg = Debug|Any CPU
+		{DAFE3B64-616A-4A2F-90E5-1F135E8A9AF5}.Debug35|Mixed Platforms.Build.0 = Debug|Any CPU
+		{DAFE3B64-616A-4A2F-90E5-1F135E8A9AF5}.Debug35|x86.ActiveCfg = Debug|Any CPU
+		{DAFE3B64-616A-4A2F-90E5-1F135E8A9AF5}.Debug35|x86.Build.0 = Debug|Any CPU
+		{DAFE3B64-616A-4A2F-90E5-1F135E8A9AF5}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{DAFE3B64-616A-4A2F-90E5-1F135E8A9AF5}.Release|Any CPU.Build.0 = Release|Any CPU
+		{DAFE3B64-616A-4A2F-90E5-1F135E8A9AF5}.Release|Mixed Platforms.ActiveCfg = Release|Any CPU
+		{DAFE3B64-616A-4A2F-90E5-1F135E8A9AF5}.Release|Mixed Platforms.Build.0 = Release|Any CPU
+		{DAFE3B64-616A-4A2F-90E5-1F135E8A9AF5}.Release|x86.ActiveCfg = Release|Any CPU
+		{DAFE3B64-616A-4A2F-90E5-1F135E8A9AF5}.Release|x86.Build.0 = Release|Any CPU
+		{DAFE3B64-616A-4A2F-90E5-1F135E8A9AF5}.Release35|Any CPU.ActiveCfg = Release|Any CPU
+		{DAFE3B64-616A-4A2F-90E5-1F135E8A9AF5}.Release35|Any CPU.Build.0 = Release|Any CPU
+		{DAFE3B64-616A-4A2F-90E5-1F135E8A9AF5}.Release35|Mixed Platforms.ActiveCfg = Release|Any CPU
+		{DAFE3B64-616A-4A2F-90E5-1F135E8A9AF5}.Release35|Mixed Platforms.Build.0 = Release|Any CPU
+		{DAFE3B64-616A-4A2F-90E5-1F135E8A9AF5}.Release35|x86.ActiveCfg = Release|Any CPU
+		{DAFE3B64-616A-4A2F-90E5-1F135E8A9AF5}.Release35|x86.Build.0 = Release|Any CPU
+		{A2867797-0A5D-4878-8F59-58C399C9A4E4}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{A2867797-0A5D-4878-8F59-58C399C9A4E4}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{A2867797-0A5D-4878-8F59-58C399C9A4E4}.Debug|Mixed Platforms.ActiveCfg = Debug|Any CPU
+		{A2867797-0A5D-4878-8F59-58C399C9A4E4}.Debug|Mixed Platforms.Build.0 = Debug|Any CPU
+		{A2867797-0A5D-4878-8F59-58C399C9A4E4}.Debug|x86.ActiveCfg = Debug|Any CPU
+		{A2867797-0A5D-4878-8F59-58C399C9A4E4}.Debug|x86.Build.0 = Debug|Any CPU
+		{A2867797-0A5D-4878-8F59-58C399C9A4E4}.Debug35|Any CPU.ActiveCfg = Debug|Any CPU
+		{A2867797-0A5D-4878-8F59-58C399C9A4E4}.Debug35|Any CPU.Build.0 = Debug|Any CPU
+		{A2867797-0A5D-4878-8F59-58C399C9A4E4}.Debug35|Mixed Platforms.ActiveCfg = Debug|Any CPU
+		{A2867797-0A5D-4878-8F59-58C399C9A4E4}.Debug35|Mixed Platforms.Build.0 = Debug|Any CPU
+		{A2867797-0A5D-4878-8F59-58C399C9A4E4}.Debug35|x86.ActiveCfg = Debug|Any CPU
+		{A2867797-0A5D-4878-8F59-58C399C9A4E4}.Debug35|x86.Build.0 = Debug|Any CPU
+		{A2867797-0A5D-4878-8F59-58C399C9A4E4}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{A2867797-0A5D-4878-8F59-58C399C9A4E4}.Release|Any CPU.Build.0 = Release|Any CPU
+		{A2867797-0A5D-4878-8F59-58C399C9A4E4}.Release|Mixed Platforms.ActiveCfg = Release|Any CPU
+		{A2867797-0A5D-4878-8F59-58C399C9A4E4}.Release|Mixed Platforms.Build.0 = Release|Any CPU
+		{A2867797-0A5D-4878-8F59-58C399C9A4E4}.Release|x86.ActiveCfg = Release|Any CPU
+		{A2867797-0A5D-4878-8F59-58C399C9A4E4}.Release|x86.Build.0 = Release|Any CPU
+		{A2867797-0A5D-4878-8F59-58C399C9A4E4}.Release35|Any CPU.ActiveCfg = Release|Any CPU
+		{A2867797-0A5D-4878-8F59-58C399C9A4E4}.Release35|Any CPU.Build.0 = Release|Any CPU
+		{A2867797-0A5D-4878-8F59-58C399C9A4E4}.Release35|Mixed Platforms.ActiveCfg = Release|Any CPU
+		{A2867797-0A5D-4878-8F59-58C399C9A4E4}.Release35|Mixed Platforms.Build.0 = Release|Any CPU
+		{A2867797-0A5D-4878-8F59-58C399C9A4E4}.Release35|x86.ActiveCfg = Release|Any CPU
+		{A2867797-0A5D-4878-8F59-58C399C9A4E4}.Release35|x86.Build.0 = Release|Any CPU
 	EndGlobalSection
 	GlobalSection(SolutionProperties) = preSolution
 		HideSolutionNode = FALSE

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/BeiderMorseFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/BeiderMorseFilter.cs b/src/Lucene.Net.Analysis.Phonetic/BeiderMorseFilter.cs
new file mode 100644
index 0000000..b343994
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/BeiderMorseFilter.cs
@@ -0,0 +1,137 @@
+// lucene version compatibility level: 4.8.1
+using Lucene.Net.Analysis.Phonetic.Language.Bm;
+using Lucene.Net.Analysis.TokenAttributes;
+using System.Diagnostics;
+using System.Text.RegularExpressions;
+
+namespace Lucene.Net.Analysis.Phonetic
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// TokenFilter for Beider-Morse phonetic encoding.
+    /// <para/>
+    /// @lucene.experimental
+    /// </summary>
+    /// <seealso cref="BeiderMorseEncoder"/>
+    public sealed class BeiderMorseFilter : TokenFilter
+    {
+        private readonly PhoneticEngine engine;
+        private readonly LanguageSet languages;
+
+        // output is a string such as ab|ac|...
+        // in complex cases like d'angelo its (anZelo|andZelo|...)-(danZelo|...)
+        // if there are multiple 's, it starts to nest...
+        private static readonly Regex pattern = new Regex("([^()|-]+)", RegexOptions.Compiled);
+
+        private bool isReset = false;
+        // matcher over any buffered output
+        private Match matcher = pattern.Match("");
+        // encoded representation
+        private string encoded;
+        // preserves all attributes for any buffered outputs
+        private State state;
+
+        private readonly ICharTermAttribute termAtt;
+        private readonly IPositionIncrementAttribute posIncAtt;
+
+        /// <summary>
+        /// Calls <see cref="BeiderMorseFilter(TokenStream, PhoneticEngine, LanguageSet)"/>
+        /// </summary>
+        /// <param name="input"><see cref="TokenStream"/> to filter</param>
+        /// <param name="engine">Configured <see cref="PhoneticEngine"/> with BM settings.</param>
+        public BeiderMorseFilter(TokenStream input, PhoneticEngine engine)
+            : this(input, engine, null)
+        {
+        }
+
+        /// <summary>
+        /// Create a new <see cref="BeiderMorseFilter"/>
+        /// </summary>
+        /// <param name="input"><see cref="TokenStream"/> to filter</param>
+        /// <param name="engine">Configured <see cref="PhoneticEngine"/> with BM settings.</param>
+        /// <param name="languages">Optional Set of original languages. Can be <c>null</c> (which means it will be guessed).</param>
+        public BeiderMorseFilter(TokenStream input, PhoneticEngine engine, LanguageSet languages)
+            : base(input)
+        {
+            this.engine = engine;
+            this.languages = languages;
+            this.termAtt = AddAttribute<ICharTermAttribute>();
+            this.posIncAtt = AddAttribute<IPositionIncrementAttribute>();
+        }
+
+        public override bool IncrementToken()
+        {
+            if (!isReset)
+            {
+                matcher = matcher.NextMatch();
+            }
+            isReset = false;
+
+            if (matcher.Success)
+            {
+                Debug.Assert(state != null && encoded != null);
+                RestoreState(state);
+
+                int start = matcher.Index;
+                int end = start + matcher.Length;
+                termAtt.SetEmpty().Append(encoded, start, end);
+                posIncAtt.PositionIncrement = 0;
+                return true;
+            }
+
+            if (m_input.IncrementToken())
+            {
+                encoded = (languages == null)
+                    ? engine.Encode(termAtt.ToString())
+                    : engine.Encode(termAtt.ToString(), languages);
+                state = CaptureState();
+
+                matcher = pattern.Match(encoded);
+                if (matcher.Success)
+                {
+                    int start = matcher.Index;
+                    int end = start + matcher.Length;
+                    termAtt.SetEmpty().Append(encoded, start, end);
+                }
+                return true;
+            }
+            else
+            {
+                return false;
+            }
+        }
+
+        public override void Reset()
+        {
+            base.Reset();
+
+            // LUCENENET: Since we need to "reset" the Match
+            // object, we also need an "isReset" flag to indicate
+            // whether we are at the head of the match and to 
+            // take the appropriate measures to ensure we don't 
+            // overwrite our matcher variable with 
+            // matcher = matcher.NextMatch();
+            // before it is time. A string could potentially
+            // match on index 0, so we need another variable to
+            // manage this state.
+            matcher = pattern.Match("");
+            isReset = true;
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/BeiderMorseFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/BeiderMorseFilterFactory.cs b/src/Lucene.Net.Analysis.Phonetic/BeiderMorseFilterFactory.cs
new file mode 100644
index 0000000..d4331bb
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/BeiderMorseFilterFactory.cs
@@ -0,0 +1,71 @@
+// lucene version compatibility level: 4.8.1
+using Lucene.Net.Analysis.Phonetic.Language.Bm;
+using Lucene.Net.Analysis.Util;
+using System;
+using System.Collections.Generic;
+
+namespace Lucene.Net.Analysis.Phonetic
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Factory for <see cref="BeiderMorseFilter"/>.
+    /// <code>
+    /// &lt;fieldType name="text_bm" class="solr.TextField" positionIncrementGap="100"&gt;
+    ///   &lt;analyzer&gt;
+    ///     &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
+    ///     &lt;filter class="solr.BeiderMorseFilterFactory"
+    ///        nameType="GENERIC" ruleType="APPROX" 
+    ///        concat="true" languageSet="auto"
+    ///     &lt;/filter&gt;
+    ///   &lt;/analyzer&gt;
+    /// &lt;/fieldType&gt;
+    /// </code>
+    /// </summary>
+    public class BeiderMorseFilterFactory : TokenFilterFactory
+    {
+        private readonly PhoneticEngine engine;
+        private readonly LanguageSet languageSet;
+
+        /// <summary>Creates a new <see cref="BeiderMorseFilterFactory"/></summary>
+        public BeiderMorseFilterFactory(IDictionary<string, string> args)
+                  : base(args)
+        {
+            // PhoneticEngine = NameType + RuleType + concat
+            // we use common-codec's defaults: GENERIC + APPROX + true
+            NameType nameType = (NameType)Enum.Parse(typeof(NameType), Get(args, "nameType", NameType.GENERIC.ToString()), true);
+            RuleType ruleType = (RuleType)Enum.Parse(typeof(RuleType), Get(args, "ruleType", RuleType.APPROX.ToString()), true);
+
+            bool concat = GetBoolean(args, "concat", true);
+            engine = new PhoneticEngine(nameType, ruleType, concat);
+
+            // LanguageSet: defaults to automagic, otherwise a comma-separated list.
+            ISet<string> langs = GetSet(args, "languageSet");
+            languageSet = (null == langs || (1 == langs.Count && langs.Contains("auto"))) ? null : LanguageSet.From(langs);
+            if (!(args.Count == 0))
+            {
+                throw new ArgumentException("Unknown parameters: " + args);
+            }
+        }
+
+        public override TokenStream Create(TokenStream input)
+        {
+            return new BeiderMorseFilter(input, engine, languageSet);
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/DoubleMetaphoneFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/DoubleMetaphoneFilter.cs b/src/Lucene.Net.Analysis.Phonetic/DoubleMetaphoneFilter.cs
new file mode 100644
index 0000000..8ee37fa
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/DoubleMetaphoneFilter.cs
@@ -0,0 +1,126 @@
+// lucene version compatibility level: 4.8.1
+using Lucene.Net.Analysis.Phonetic.Language;
+using Lucene.Net.Analysis.TokenAttributes;
+using System.Collections.Generic;
+
+namespace Lucene.Net.Analysis.Phonetic
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Filter for DoubleMetaphone (supporting secondary codes)
+    /// </summary>
+    public sealed class DoubleMetaphoneFilter : TokenFilter
+    {
+        //private static readonly string TOKEN_TYPE = "DoubleMetaphone"; // LUCENENET: Not used
+
+        private readonly LinkedList<State> remainingTokens = new LinkedList<State>();
+        private readonly DoubleMetaphone encoder = new DoubleMetaphone();
+        private readonly bool inject;
+        private readonly ICharTermAttribute termAtt;
+        private readonly IPositionIncrementAttribute posAtt;
+
+        /// <summary>
+        /// Creates a <see cref="DoubleMetaphoneFilter"/> with the specified maximum code length, 
+        /// and either adding encoded forms as synonyms (<c>inject=true</c>) or
+        /// replacing them.
+        /// </summary>
+        public DoubleMetaphoneFilter(TokenStream input, int maxCodeLength, bool inject)
+            : base(input)
+        {
+            this.encoder.MaxCodeLen = maxCodeLength;
+            this.inject = inject;
+            this.termAtt = AddAttribute<ICharTermAttribute>();
+            this.posAtt = AddAttribute<IPositionIncrementAttribute>();
+        }
+
+        public override bool IncrementToken()
+        {
+            for (;;)
+            {
+                if (!(remainingTokens.Count == 0))
+                {
+                    // clearAttributes();  // not currently necessary
+                    var first = remainingTokens.First;
+                    remainingTokens.Remove(first);
+                    RestoreState(first.Value);
+                    return true;
+                }
+
+                if (!m_input.IncrementToken()) return false;
+
+                int len = termAtt.Length;
+                if (len == 0) return true; // pass through zero length terms
+
+                int firstAlternativeIncrement = inject ? 0 : posAtt.PositionIncrement;
+
+                string v = termAtt.ToString();
+                string primaryPhoneticValue = encoder.GetDoubleMetaphone(v);
+                string alternatePhoneticValue = encoder.GetDoubleMetaphone(v, true);
+
+                // a flag to lazily save state if needed... this avoids a save/restore when only
+                // one token will be generated.
+                bool saveState = inject;
+
+                if (primaryPhoneticValue != null && primaryPhoneticValue.Length > 0 && !primaryPhoneticValue.Equals(v))
+                {
+                    if (saveState)
+                    {
+                        remainingTokens.AddLast(CaptureState());
+                    }
+                    posAtt.PositionIncrement = firstAlternativeIncrement;
+                    firstAlternativeIncrement = 0;
+                    termAtt.SetEmpty().Append(primaryPhoneticValue);
+                    saveState = true;
+                }
+
+                if (alternatePhoneticValue != null && alternatePhoneticValue.Length > 0
+                        && !alternatePhoneticValue.Equals(primaryPhoneticValue)
+                        && !primaryPhoneticValue.Equals(v))
+                {
+                    if (saveState)
+                    {
+                        remainingTokens.AddLast(CaptureState());
+                        saveState = false;
+                    }
+                    posAtt.PositionIncrement = firstAlternativeIncrement;
+                    termAtt.SetEmpty().Append(alternatePhoneticValue);
+                    saveState = true;
+                }
+
+                // Just one token to return, so no need to capture/restore
+                // any state, simply return it.
+                if (remainingTokens.Count == 0)
+                {
+                    return true;
+                }
+
+                if (saveState)
+                {
+                    remainingTokens.AddLast(CaptureState());
+                }
+            }
+        }
+
+        public override void Reset()
+        {
+            m_input.Reset();
+            remainingTokens.Clear();
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/DoubleMetaphoneFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/DoubleMetaphoneFilterFactory.cs b/src/Lucene.Net.Analysis.Phonetic/DoubleMetaphoneFilterFactory.cs
new file mode 100644
index 0000000..d70fd41
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/DoubleMetaphoneFilterFactory.cs
@@ -0,0 +1,67 @@
+// lucene version compatibility level: 4.8.1
+using Lucene.Net.Analysis.Util;
+using System;
+using System.Collections.Generic;
+
+namespace Lucene.Net.Analysis.Phonetic
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Factory for <see cref="DoubleMetaphoneFilter"/>.
+    /// <code>
+    /// &lt;fieldType name="text_dblmtphn" class="solr.TextField" positionIncrementGap="100"&gt;
+    ///   &lt;analyzer&gt;
+    ///     &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
+    ///     &lt;filter class="solr.DoubleMetaphoneFilterFactory" inject="true" maxCodeLength="4"/&gt;
+    ///   &lt;/analyzer&gt;
+    /// &lt;/fieldType&gt;
+    /// </code>
+    /// </summary>
+    public class DoubleMetaphoneFilterFactory : TokenFilterFactory
+    {
+        /// <summary>parameter name: true if encoded tokens should be added as synonyms</summary>
+        public static readonly string INJECT = "inject";
+        /// <summary>parameter name: restricts the length of the phonetic code</summary>
+        public static readonly string MAX_CODE_LENGTH = "maxCodeLength";
+        /// <summary>default maxCodeLength if not specified</summary>
+        public static readonly int DEFAULT_MAX_CODE_LENGTH = 4;
+
+        private readonly bool inject;
+        private readonly int maxCodeLength;
+
+        /// <summary>
+        /// Creates a new <see cref="DoubleMetaphoneFilterFactory"/>
+        /// </summary>
+        public DoubleMetaphoneFilterFactory(IDictionary<string, string> args)
+            : base(args)
+        {
+            inject = GetBoolean(args, INJECT, true);
+            maxCodeLength = GetInt32(args, MAX_CODE_LENGTH, DEFAULT_MAX_CODE_LENGTH);
+            if (!(args.Count == 0))
+            {
+                throw new ArgumentException("Unknown parameters: " + args);
+            }
+        }
+
+        public override TokenStream Create(TokenStream input)
+        {
+            return new DoubleMetaphoneFilter(input, maxCodeLength, inject);
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/AbstractCaverphone .cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/AbstractCaverphone .cs b/src/Lucene.Net.Analysis.Phonetic/Language/AbstractCaverphone .cs
new file mode 100644
index 0000000..b1db7fa
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/AbstractCaverphone .cs	
@@ -0,0 +1,78 @@
+// commons-codec version compatibility level: 1.9
+namespace Lucene.Net.Analysis.Phonetic.Language
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Encodes a string into a Caverphone value.
+    /// <para/>
+    /// This is an algorithm created by the Caversham Project at the University of Otago. It implements the Caverphone 2.0
+    /// algorithm:
+    /// <para/>
+    /// This class is immutable and thread-safe.
+    /// <para/>
+    /// See <a href="http://en.wikipedia.org/wiki/Caverphone">Wikipedia - Caverphone</a>
+    /// </summary>
+    public abstract class AbstractCaverphone : IStringEncoder
+    {
+        /// <summary>
+        /// Creates an instance of the Caverphone encoder
+        /// </summary>
+        public AbstractCaverphone()
+            : base()
+        {
+        }
+
+        // LUCENENET specific - in .NET we don't need an object overload, since strings are sealed anyway.
+        //    /**
+        //     * Encodes an Object using the caverphone algorithm. This method is provided in order to satisfy the requirements of
+        //     * the Encoder interface, and will throw an EncoderException if the supplied object is not of type java.lang.String.
+        //     *
+        //     * @param source
+        //     *            Object to encode
+        //     * @return An object (or type java.lang.String) containing the caverphone code which corresponds to the String
+        //     *         supplied.
+        //     * @throws EncoderException
+        //     *             if the parameter supplied is not of type java.lang.String
+        //     */
+        //    @Override
+        //public Object encode(final Object source) throws EncoderException
+        //    {
+        //    if (!(source instanceof String)) {
+        //            throw new EncoderException("Parameter supplied to Caverphone encode is not of type java.lang.String");
+        //        }
+        //    return this.encode((String) source);
+        //    }
+
+        // LUCENENET specific - must provide implementation for IStringEncoder
+        public abstract string Encode(string source);
+
+        /// <summary>
+        /// Tests if the encodings of two strings are equal.
+        /// <para/>
+        /// This method might be promoted to a new AbstractStringEncoder superclass.
+        /// </summary>
+        /// <param name="str1">First of two strings to compare.</param>
+        /// <param name="str2">Second of two strings to compare.</param>
+        /// <returns><c>true</c> if the encodings of these strings are identical, <c>false</c> otherwise.</returns>
+        public virtual bool IsEncodeEqual(string str1, string str2) 
+        {
+            return this.Encode(str1).Equals(this.Encode(str2));
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/BeiderMorseEncoder.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/BeiderMorseEncoder.cs b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/BeiderMorseEncoder.cs
new file mode 100644
index 0000000..bb1f683
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/BeiderMorseEncoder.cs
@@ -0,0 +1,163 @@
+// commons-codec version compatibility level: 1.9
+namespace Lucene.Net.Analysis.Phonetic.Language.Bm
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Encodes strings into their Beider-Morse phonetic encoding.
+    /// </summary>
+    /// <remarks>
+    /// Beider-Morse phonetic encodings are optimised for family names. However, they may be useful for a wide range
+    /// of words.
+    /// <para/>
+    /// This encoder is intentionally mutable to allow dynamic configuration through bean properties. As such, it
+    /// is mutable, and may not be thread-safe. If you require a guaranteed thread-safe encoding then use
+    /// <see cref="PhoneticEngine"/> directly.
+    /// <para/>
+    /// <b>Encoding overview</b>
+    /// <para/>
+    /// Beider-Morse phonetic encodings is a multi-step process. Firstly, a table of rules is consulted to guess what
+    /// language the word comes from. For example, if it ends in "<c>ault</c>" then it infers that the word is French.
+    /// Next, the word is translated into a phonetic representation using a language-specific phonetics table. Some
+    /// runs of letters can be pronounced in multiple ways, and a single run of letters may be potentially broken up
+    /// into phonemes at different places, so this stage results in a set of possible language-specific phonetic
+    /// representations. Lastly, this language-specific phonetic representation is processed by a table of rules that
+    /// re-writes it phonetically taking into account systematic pronunciation differences between languages, to move
+    /// it towards a pan-indo-european phonetic representation. Again, sometimes there are multiple ways this could be
+    /// done and sometimes things that can be pronounced in several ways in the source language have only one way to
+    /// represent them in this average phonetic language, so the result is again a set of phonetic spellings.
+    /// <para/>
+    /// Some names are treated as having multiple parts. This can be due to two things. Firstly, they may be hyphenated.
+    /// In this case, each individual hyphenated word is encoded, and then these are combined end-to-end for the final
+    /// encoding. Secondly, some names have standard prefixes, for example, "<c>Mac/Mc</c>" in Scottish (English)
+    /// names. As sometimes it is ambiguous whether the prefix is intended or is an accident of the spelling, the word
+    /// is encoded once with the prefix and once without it. The resulting encoding contains one and then the other result.
+    /// <para/>
+    /// <b>Encoding format</b>
+    /// <para/>
+    /// Individual phonetic spellings of an input word are represented in upper- and lower-case roman characters. Where
+    /// there are multiple possible phonetic representations, these are joined with a pipe (<c>|</c>) character.
+    /// If multiple hyphenated words where found, or if the word may contain a name prefix, each encoded word is placed
+    /// in elipses and these blocks are then joined with hyphens. For example, "<c>d'ortley</c>" has a possible
+    /// prefix. The form without prefix encodes to <c>ortlaj|ortlej</c>, while the form with prefix encodes to
+    /// <c>dortlaj|dortlej</c>. Thus, the full, combined encoding is <c>(ortlaj|ortlej)-(dortlaj|dortlej)</c>.
+    /// <para/>
+    /// The encoded forms are often quite a bit longer than the input strings. This is because a single input may have many
+    /// potential phonetic interpretations. For example, <c>Renault</c> encodes to
+    /// <c>rYnDlt|rYnalt|rYnult|rinDlt|rinalt|rinult</c>. The <see cref="RuleType.APPROX"/> rules will tend to produce larger
+    /// encodings as they consider a wider range of possible, approximate phonetic interpretations of the original word.
+    /// Down-stream applications may wish to further process the encoding for indexing or lookup purposes, for example, by
+    /// splitting on pipe (<c>|</c>) and indexing under each of these alternatives.
+    /// <para/>
+    /// since 1.6
+    /// </remarks>
+    public class BeiderMorseEncoder : IStringEncoder
+    {
+        // Implementation note: This class is a spring-friendly facade to PhoneticEngine. It allows read/write configuration
+        // of an immutable PhoneticEngine instance that will be delegated to for the actual encoding.
+
+        // a cached object
+        private PhoneticEngine engine = new PhoneticEngine(NameType.GENERIC, RuleType.APPROX, true);
+
+        // LUCENENET specific - in .NET we don't need an object overload, since strings are sealed anyway.
+        //public object Encode(object source)
+        //{
+        //    if (!(source is string))
+        //    {
+        //        throw new EncoderException("BeiderMorseEncoder encode parameter is not of type String");
+        //    }
+        //    return encode((string)source);
+        //}
+
+        public virtual string Encode(string source)
+        {
+            if (source == null)
+            {
+                return null;
+            }
+            return this.engine.Encode(source);
+        }
+
+        /// <summary>
+        /// Gets or Sets the name type currently in operation. Use <see cref="NameType.GENERIC"/> unless you specifically want phonetic encodings
+        /// optimized for Ashkenazi or Sephardic Jewish family names.
+        /// </summary>
+        // LUCENENET NOTE: Made setter into property because
+        // its behavior is similar to what would happen when
+        // setting a property, even though it is actually
+        // replacing a related instance.
+        public virtual NameType NameType
+        {
+            get { return this.engine.NameType; }
+            set
+            {
+                this.engine = new PhoneticEngine(value,
+                                             this.engine.RuleType,
+                                             this.engine.IsConcat,
+                                             this.engine.MaxPhonemes);
+            }
+        }
+
+        /// <summary>
+        /// Gets or Sets the rule type to apply. This will widen or narrow the range of phonetic encodings considered.
+        /// <see cref="RuleType.APPROX"/> or <see cref="RuleType.EXACT"/> for approximate or exact phonetic matches.
+        /// </summary>
+        public virtual RuleType RuleType
+        {
+            get { return this.engine.RuleType; }
+            set
+            {
+                this.engine = new PhoneticEngine(this.engine.NameType,
+                                             value,
+                                             this.engine.IsConcat,
+                                             this.engine.MaxPhonemes);
+            }
+        }
+
+        /// <summary>
+        /// Gets or Sets how multiple possible phonetic encodings are combined.
+        /// <c>true</c> if multiple encodings are to be combined with a '|', <c>false</c> if just the first one is 
+        /// to be considered.
+        /// </summary>
+        public virtual bool IsConcat
+        {
+            get { return this.engine.IsConcat; }
+            set
+            {
+                this.engine = new PhoneticEngine(this.engine.NameType,
+                                             this.engine.RuleType,
+                                             value,
+                                             this.engine.MaxPhonemes);
+            }
+        }
+
+        /// <summary>
+        /// Sets the number of maximum of phonemes that shall be considered by the engine.
+        /// <para/>
+        /// since 1.7
+        /// </summary>
+        /// <param name="maxPhonemes">the maximum number of phonemes returned by the engine</param>
+        public virtual void SetMaxPhonemes(int maxPhonemes)
+        {
+            this.engine = new PhoneticEngine(this.engine.NameType,
+                                             this.engine.RuleType,
+                                             this.engine.IsConcat,
+                                             maxPhonemes);
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/Lang.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/Lang.cs b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/Lang.cs
new file mode 100644
index 0000000..5889e8f
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/Lang.cs
@@ -0,0 +1,276 @@
+// commons-codec version compatibility level: 1.9
+using Lucene.Net.Support;
+using System;
+using System.Collections.Generic;
+using System.Globalization;
+using System.IO;
+using System.Reflection;
+using System.Text.RegularExpressions;
+
+namespace Lucene.Net.Analysis.Phonetic.Language.Bm
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Language guessing utility.
+    /// </summary>
+    /// <remarks>
+    /// This class encapsulates rules used to guess the possible languages that a word originates from. This is
+    /// done by reference to a whole series of rules distributed in resource files.
+    /// <para/>
+    /// Instances of this class are typically managed through the static factory method <see cref="GetInstance(NameType)"/>.
+    /// Unless you are developing your own language guessing rules, you will not need to interact with this class directly.
+    /// <para/>
+    /// This class is intended to be immutable and thread-safe.
+    /// <para/>
+    /// <b>Lang resources</b>
+    /// <para/>
+    /// Language guessing rules are typically loaded from resource files. These are UTF-8 encoded text files.
+    /// They are systematically named following the pattern:
+    /// <c>Lucene.Net.Analysis.Phonetic.Language.Bm.lang.txt</c>
+    /// The format of these resources is the following:
+    /// <list type="table">
+    ///     <item>
+    ///         <term>Rules:</term>
+    ///         <description>
+    ///             Whitespace separated strings.
+    ///             There should be 3 columns to each row, and these will be interpreted as:
+    ///             <list type="number">
+    ///                 <item><term>pattern:</term><description>a regular expression.</description></item>
+    ///                 <item><term>languages:</term><description>a '+'-separated list of languages.</description></item>
+    ///                 <item><term>acceptOnMatch:</term><description>'true' or 'false' indicating if a match rules in or rules out the language.</description></item>
+    ///             </list>
+    ///         </description>
+    ///     </item>
+    ///     <item>
+    ///         <term>End-of-line comments:</term>
+    ///         <description>Any occurrence of '//' will cause all text following on that line to be discarded as a comment.</description>
+    ///     </item>
+    ///     <item>
+    ///         <term>Multi-line comments:</term>
+    ///         <description>Any line starting with '/*' will start multi-line commenting mode. This will skip all content until a line ending in '*' and '/' is found.</description>
+    ///     </item>
+    ///     <item>
+    ///         <term>Blank lines:</term>
+    ///         <description>All blank lines will be skipped.</description>
+    ///     </item>
+    /// </list>
+    /// <para/>
+    /// Port of lang.php
+    /// <para/>
+    /// since 1.6
+    /// </remarks>
+    public class Lang
+    {
+        // Implementation note: This class is divided into two sections. The first part is a static factory interface that
+        // exposes the LANGUAGE_RULES_RN resource as a Lang instance. The second part is the Lang instance methods that
+        // encapsulate a particular language-guessing rule table and the language guessing itself.
+        //
+        // It may make sense in the future to expose the private constructor to allow power users to build custom language-
+        // guessing rules, perhaps by marking it protected and allowing sub-classing. However, the vast majority of users
+        // should be strongly encouraged to use the static factory <code>instance</code> method to get their Lang instances.
+
+        private static Regex WHITESPACE = new Regex("\\s+", RegexOptions.Compiled);
+        private static Regex TOKEN = new Regex("\\+", RegexOptions.Compiled);
+
+        private sealed class LangRule
+        {
+            internal readonly bool acceptOnMatch;
+            internal readonly ISet<string> languages;
+            private readonly Regex pattern;
+
+            internal LangRule(Regex pattern, ISet<string> languages, bool acceptOnMatch)
+            {
+                this.pattern = pattern;
+                this.languages = languages;
+                this.acceptOnMatch = acceptOnMatch;
+            }
+
+            public bool Matches(string txt)
+            {
+                Match matcher = this.pattern.Match(txt);
+                return matcher.Success;
+            }
+        }
+
+        private static readonly IDictionary<NameType, Lang> Langs = new Dictionary<NameType, Lang>();
+
+        private static readonly string LANGUAGE_RULES_RN = "lang.txt";
+
+        static Lang()
+        {
+            foreach (NameType s in Enum.GetValues(typeof(NameType)))
+            {
+                Langs[s] = LoadFromResource(LANGUAGE_RULES_RN, Languages.GetInstance(s));
+            }
+        }
+
+        /// <summary>
+        /// Gets a Lang instance for one of the supported <see cref="NameType"/>s.
+        /// </summary>
+        /// <param name="nameType">The <see cref="NameType"/> to look up.</param>
+        /// <returns>A Lang encapsulating the language guessing rules for that name type.</returns>
+        public static Lang GetInstance(NameType nameType)
+        {
+            Lang result;
+            Langs.TryGetValue(nameType, out result);
+            return result;
+        }
+
+        /// <summary>
+        /// Loads language rules from a resource.
+        /// <para/>
+        /// In normal use, you will obtain instances of Lang through the <see cref="GetInstance(NameType)"/> method.
+        /// You will only need to call this yourself if you are developing custom language mapping rules.
+        /// </summary>
+        /// <param name="languageRulesResourceName">The fully-qualified or partially-qualified resource name to load.</param>
+        /// <param name="languages">The languages that these rules will support.</param>
+        /// <returns>A Lang encapsulating the loaded language-guessing rules.</returns>
+        public static Lang LoadFromResource(string languageRulesResourceName, Languages languages)
+        {
+            IList<LangRule> rules = new List<LangRule>();
+            Stream lRulesIS = typeof(Lang).GetTypeInfo().Assembly.FindAndGetManifestResourceStream(typeof(Lang), languageRulesResourceName);
+
+            if (lRulesIS == null)
+            {
+                throw new InvalidOperationException("Unable to resolve required resource:" + LANGUAGE_RULES_RN);
+            }
+
+            using (TextReader reader = new StreamReader(lRulesIS, ResourceConstants.ENCODING))
+            {
+                bool inExtendedComment = false;
+                string rawLine;
+                while ((rawLine = reader.ReadLine()) != null)
+                {
+                    string line = rawLine;
+                    if (inExtendedComment)
+                    {
+                        // check for closing comment marker, otherwise discard doc comment line
+                        if (line.EndsWith(ResourceConstants.EXT_CMT_END, StringComparison.Ordinal))
+                        {
+                            inExtendedComment = false;
+                        }
+                    }
+                    else
+                    {
+                        if (line.StartsWith(ResourceConstants.EXT_CMT_START, StringComparison.Ordinal))
+                        {
+                            inExtendedComment = true;
+                        }
+                        else
+                        {
+                            // discard comments
+                            int cmtI = line.IndexOf(ResourceConstants.CMT);
+                            if (cmtI >= 0)
+                            {
+                                line = line.Substring(0, cmtI - 0);
+                            }
+
+                            // trim leading-trailing whitespace
+                            line = line.Trim();
+
+                            if (line.Length == 0)
+                            {
+                                continue; // empty lines can be safely skipped
+                            }
+
+                            // split it up
+                            string[] parts = WHITESPACE.Split(line);
+
+                            if (parts.Length != 3)
+                            {
+                                throw new ArgumentException("Malformed line '" + rawLine +
+                                        "' in language resource '" + languageRulesResourceName + "'");
+                            }
+
+                            Regex pattern = new Regex(parts[0], RegexOptions.Compiled);
+                            string[] langs = TOKEN.Split(parts[1]);
+                            bool accept = parts[2].Equals("true");
+
+                            rules.Add(new LangRule(pattern, new HashSet<string>(langs), accept));
+                        }
+                    }
+                }
+            }
+            return new Lang(rules, languages);
+        }
+
+        private readonly Languages languages;
+        private readonly IList<LangRule> rules;
+
+        private Lang(IList<LangRule> rules, Languages languages)
+        {
+            this.rules = Collections.UnmodifiableList(rules);
+            this.languages = languages;
+        }
+
+        /// <summary>
+        /// Guesses the language of a word.
+        /// </summary>
+        /// <param name="text">The word.</param>
+        /// <returns>The language that the word originates from or <see cref="Languages.ANY"/> if there was no unique match.</returns>
+        public virtual string GuessLanguage(string text)
+        {
+            LanguageSet ls = GuessLanguages(text);
+            return ls.IsSingleton ? ls.GetAny() : Languages.ANY;
+        }
+
+        /// <summary>
+        /// Guesses the languages of a word.
+        /// </summary>
+        /// <param name="input">The word.</param>
+        /// <returns>A Set of Strings of language names that are potential matches for the input word.</returns>
+        public virtual LanguageSet GuessLanguages(string input)
+        {
+            string text = input.ToLowerInvariant();
+
+            ISet<string> langs = new HashSet<string>(this.languages.GetLanguages());
+            foreach (LangRule rule in this.rules)
+            {
+                if (rule.Matches(text))
+                {
+                    if (rule.acceptOnMatch)
+                    {
+                        List<string> toRemove = new List<string>();
+                        foreach (var item in langs)
+                        {
+                            if (!rule.languages.Contains(item))
+                            {
+                                toRemove.Add(item);
+                            }
+                        }
+                        foreach (var item in toRemove)
+                        {
+                            langs.Remove(item);
+                        }
+                    }
+                    else
+                    {
+                        foreach (var item in rule.languages)
+                        {
+                            langs.Remove(item);
+                        }
+                    }
+                }
+            }
+
+            LanguageSet ls = LanguageSet.From(langs);
+            return ls.Equals(Languages.NO_LANGUAGES) ? Languages.ANY_LANGUAGE : ls;
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/Languages.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/Languages.cs b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/Languages.cs
new file mode 100644
index 0000000..0b058ba
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/Languages.cs
@@ -0,0 +1,324 @@
+// commons-codec version compatibility level: 1.9
+using Lucene.Net.Support;
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+using System.Reflection;
+
+namespace Lucene.Net.Analysis.Phonetic.Language.Bm
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Language codes.
+    /// </summary>
+    /// <remarks>
+    /// Language codes are typically loaded from resource files. These are UTF-8 encoded text files. They are
+    /// systematically named following the pattern:
+    /// <c>Lucene.Net.Analysis.Phonetic.Language.Bm.<see cref="NameType"/>_languages.txt</c>
+    /// <para/>
+    /// The format of these resources is the following:
+    /// <list type="bullet">
+    ///     <item>
+    ///         <term>Language:</term>
+    ///         <description>A single string containing no whitespace.</description>
+    ///     </item>
+    ///     <item>
+    ///         <term>End-of-line comments:</term>
+    ///         <description>Any occurrence of '//' will cause all text following on that line to be discarded as a comment.</description>
+    ///     </item>
+    ///     <item>
+    ///         <term>Multi-line comments:</term>
+    ///         <description>Any line starting with '/*' will start multi-line commenting mode. This will skip all content until a line ending in '*' and '/' is found.</description>
+    ///     </item>
+    ///     <item>
+    ///         <term>Blank lines:</term>
+    ///         <description>All blank lines will be skipped.</description>
+    ///     </item>
+    /// </list>
+    /// <para/>
+    /// Ported from language.php
+    /// <para/>
+    /// This class is immutable and thread-safe.
+    /// <para/>
+    /// since 1.6
+    /// </remarks>
+    public class Languages
+    {
+        // Implementation note: This class is divided into two sections. The first part is a static factory interface that
+        // exposes org/apache/commons/codec/language/bm/%s_languages.txt for %s in NameType.* as a list of supported
+        // languages, and a second part that provides instance methods for accessing this set fo supported languages.
+
+        public static readonly string ANY = "any";
+
+        private static readonly IDictionary<NameType, Languages> LANGUAGES = new Dictionary<NameType, Languages>();
+
+        static Languages()
+        {
+            foreach (NameType s in Enum.GetValues(typeof(NameType)))
+            {
+                LANGUAGES[s] = GetInstance(LangResourceName(s));
+            }
+        }
+
+        public static Languages GetInstance(NameType nameType)
+        {
+            Languages result;
+            LANGUAGES.TryGetValue(nameType, out result);
+            return result;
+        }
+
+        public static Languages GetInstance(string languagesResourceName)
+        {
+            // read languages list
+            ISet<string> ls = new HashSet<string>();
+            Stream langIS = typeof(Languages).GetTypeInfo().Assembly.FindAndGetManifestResourceStream(typeof(Languages), languagesResourceName);
+
+            if (langIS == null)
+            {
+                throw new ArgumentException("Unable to resolve required resource: " + languagesResourceName);
+            }
+
+            using (TextReader reader = new StreamReader(langIS, ResourceConstants.ENCODING))
+            {
+                bool inExtendedComment = false;
+                string rawLine;
+                while ((rawLine = reader.ReadLine()) != null)
+                {
+                    string line = rawLine.Trim();
+                    if (inExtendedComment)
+                    {
+                        if (line.EndsWith(ResourceConstants.EXT_CMT_END, StringComparison.Ordinal))
+                        {
+                            inExtendedComment = false;
+                        }
+                    }
+                    else
+                    {
+                        if (line.StartsWith(ResourceConstants.EXT_CMT_START, StringComparison.Ordinal))
+                        {
+                            inExtendedComment = true;
+                        }
+                        else if (line.Length > 0)
+                        {
+                            ls.Add(line);
+                        }
+                    }
+                }
+            }
+
+            return new Languages(Collections.UnmodifiableSet(ls));
+        }
+
+        private static string LangResourceName(NameType nameType)
+        {
+            return string.Format("{0}_languages.txt", nameType.GetName()); 
+        }
+
+        private readonly ISet<string> languages;
+
+        private class NoLanguagesLanguageSet : LanguageSet
+        {
+            public override bool Contains(string language)
+            {
+                return false;
+            }
+
+            public override string GetAny()
+            {
+                throw new InvalidOperationException("Can't fetch any language from the empty language set.");
+            }
+
+            public override bool IsEmpty
+            {
+                get
+                {
+                    return true;
+                }
+            }
+
+            public override bool IsSingleton
+            {
+                get
+                {
+                    return false;
+                }
+            }
+
+            public override LanguageSet RestrictTo(LanguageSet other)
+            {
+                return this;
+            }
+
+            public override string ToString()
+            {
+                return "NO_LANGUAGES";
+            }
+        }
+
+        /// <summary>
+        /// No languages at all.
+        /// </summary>
+        public static readonly LanguageSet NO_LANGUAGES = new NoLanguagesLanguageSet();
+
+        private class AnyLanguageLanguageSet : LanguageSet
+        {
+            public override bool Contains(string language)
+            {
+                return true;
+            }
+
+            public override string GetAny()
+            {
+                throw new InvalidOperationException("Can't fetch any language from the any language set.");
+            }
+
+            public override bool IsEmpty
+            {
+                get
+                {
+                    return false;
+                }
+            }
+
+            public override bool IsSingleton
+            {
+                get
+                {
+                    return false;
+                }
+            }
+
+            public override LanguageSet RestrictTo(LanguageSet other)
+            {
+                return other;
+            }
+
+            public override string ToString()
+            {
+                return "ANY_LANGUAGE";
+            }
+        }
+
+        /// <summary>
+        /// Any/all languages.
+        /// </summary>
+        public static readonly LanguageSet ANY_LANGUAGE = new AnyLanguageLanguageSet();
+
+        private Languages(ISet<string> languages)
+        {
+            this.languages = languages;
+        }
+
+        public virtual ISet<string> GetLanguages() // LUCENENET NOTE: Kept as GetLanguages() because of naming conflict
+        {
+            return this.languages;
+        }
+    }
+
+    /// <summary>
+    /// A set of languages.
+    /// </summary>
+    public abstract class LanguageSet
+    {
+
+        public static LanguageSet From(ISet<string> langs)
+        {
+            return langs.Count == 0 ? Languages.NO_LANGUAGES : new SomeLanguages(langs);
+        }
+
+        public abstract bool Contains(string language);
+
+        public abstract string GetAny();
+
+        public abstract bool IsEmpty { get; }
+
+        public abstract bool IsSingleton { get; }
+
+        public abstract LanguageSet RestrictTo(LanguageSet other);
+    }
+
+    /// <summary>
+    /// Some languages, explicitly enumerated.
+    /// </summary>
+    public sealed class SomeLanguages : LanguageSet
+    {
+        private readonly ISet<string> languages;
+
+        internal SomeLanguages(ISet<string> languages)
+        {
+            this.languages = Collections.UnmodifiableSet(languages);
+        }
+
+        public override bool Contains(string language)
+        {
+            return this.languages.Contains(language);
+        }
+
+        public override string GetAny()
+        {
+            return this.languages.FirstOrDefault();
+        }
+
+        public ISet<string> GetLanguages()
+        {
+            return this.languages;
+        }
+
+        public override bool IsEmpty
+        {
+            get { return this.languages.Count == 0; }
+        }
+
+        public override bool IsSingleton
+        {
+            get { return this.languages.Count == 1; }
+        }
+
+        public override LanguageSet RestrictTo(LanguageSet other)
+        {
+            if (other == Languages.NO_LANGUAGES)
+            {
+                return other;
+            }
+            else if (other == Languages.ANY_LANGUAGE)
+            {
+                return this;
+            }
+            else
+            {
+                SomeLanguages sl = (SomeLanguages)other;
+                ISet<string> ls = new HashSet<string>(/*Math.Min(languages.Count, sl.languages.Count)*/);
+                foreach (string lang in languages)
+                {
+                    if (sl.languages.Contains(lang))
+                    {
+                        ls.Add(lang);
+                    }
+                }
+                return From(ls);
+            }
+        }
+
+        public override string ToString()
+        {
+            return "Languages(" + languages.ToString() + ")";
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/NameType.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/NameType.cs b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/NameType.cs
new file mode 100644
index 0000000..1b8641c
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/NameType.cs
@@ -0,0 +1,69 @@
+// commons-codec version compatibility level: 1.9
+using System;
+
+namespace Lucene.Net.Analysis.Phonetic.Language.Bm
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Supported types of names. Unless you are matching particular family names, use <see cref="GENERIC"/>. The
+    /// <c>GENERIC</c> NameType should work reasonably well for non-name words. The other encodings are
+    /// specifically tuned to family names, and may not work well at all for general text.
+    /// <para/>
+    /// since 1.6
+    /// </summary>
+    public enum NameType
+    {
+        /// <summary>
+        /// Ashkenazi family names
+        /// </summary>
+        ASHKENAZI,
+
+        /// <summary>
+        /// Generic names and words
+        /// </summary>
+        GENERIC,
+
+        /// <summary>
+        /// Sephardic family names
+        /// </summary>
+        SEPHARDIC
+    }
+
+    public static class NameTypeExtensions
+    {
+        /// <summary>
+        /// Gets the short version of the name type.
+        /// </summary>
+        /// <param name="nameType">the <see cref="NameType"/></param>
+        /// <returns> the <see cref="NameType"/> short string</returns>
+        public static string GetName(this NameType nameType)
+        {
+            switch (nameType)
+            {
+                case NameType.ASHKENAZI:
+                    return "ash";
+                case NameType.GENERIC:
+                    return "gen";
+                case NameType.SEPHARDIC:
+                    return "sep";
+            }
+            throw new ArgumentException("Invalid nameType.");
+        }
+    }
+}


[11/15] lucenenet git commit: Added Lucene.Net.Analysis.Phonetic + tests. Rather than porting over the entire commons-codec library, only the language features were ported and added to this library.

Posted by ni...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_common.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_common.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_common.txt
new file mode 100644
index 0000000..fa0096a
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_common.txt
@@ -0,0 +1,223 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// GENERIC
+
+#include gen_exact_approx_common
+
+// DUTCH 
+"van" "^" "[bp]" "(vam|)" 
+"van" "^" "" "(van|)" 
+
+// REGRESSIVE ASSIMILATION OF CONSONANTS
+"n" "" "[bp]" "m" 
+        
+// PECULIARITY OF "h" 
+"h" "" "" "" 
+"H" "" "" "(x|)" 
+
+// "e" and "i" ARE TO BE OMITTED BEFORE (SYLLABIC) n & l: Halperin=Halpern; Frankel = Frankl, Finkelstein = Finklstein
+// but Andersen & Anderson should match
+"sen" "[rmnl]" "$" "(zn|zon)"
+"sen" "" "$" "(sn|son)"
+"sEn" "[rmnl]" "$" "(zn|zon)"
+"sEn" "" "$" "(sn|son)"
+            
+"e" "[bdfgklmnprsStvzZ]" "[ln]$" ""
+"i" "[bdfgklmnprsStvzZ]" "[ln]$" ""
+"E" "[bdfgklmnprsStvzZ]" "[ln]$" ""
+"I" "[bdfgklmnprsStvzZ]" "[ln]$" ""
+"Q" "[bdfgklmnprsStvzZ]" "[ln]$" ""
+"Y" "[bdfgklmnprsStvzZ]" "[ln]$" ""
+
+"e" "[bdfgklmnprsStvzZ]" "[ln][bdfgklmnprsStvzZ]" ""
+"i" "[bdfgklmnprsStvzZ]" "[ln][bdfgklmnprsStvzZ]" ""
+"E" "[bdfgklmnprsStvzZ]" "[ln][bdfgklmnprsStvzZ]" ""
+"I" "[bdfgklmnprsStvzZ]" "[ln][bdfgklmnprsStvzZ]" ""
+"Q" "[bdfgklmnprsStvzZ]" "[ln][bdfgklmnprsStvzZ]" ""
+"Y" "[bdfgklmnprsStvzZ]" "[ln][bdfgklmnprsStvzZ]" ""
+
+"lEs" "" "" "(lEs|lz)"  // Applebaum < Appelbaum (English + blend English-something forms as Finklestein)
+"lE" "[bdfgkmnprStvzZ]" "" "(lE|l)"  // Applebaum < Appelbaum (English + blend English-something forms as Finklestein)
+
+// SIMPLIFICATION: (TRIPHTHONGS & DIPHTHONGS) -> ONE GENERIC DIPHTHONG "D"
+"aue" "" "" "D"
+"oue" "" "" "D"
+    
+"AvE" "" "" "(D|AvE)"
+"Ave" "" "" "(D|Ave)"
+"avE" "" "" "(D|avE)"
+"ave" "" "" "(D|ave)"
+    
+"OvE" "" "" "(D|OvE)"
+"Ove" "" "" "(D|Ove)"
+"ovE" "" "" "(D|ovE)"
+"ove" "" "" "(D|ove)"
+    
+"ea" "" "" "(D|ea)"
+"EA" "" "" "(D|EA)"
+"Ea" "" "" "(D|Ea)"
+"eA" "" "" "(D|eA)"
+             
+"aji" "" "" "D"
+"ajI" "" "" "D"
+"aje" "" "" "D"
+"ajE" "" "" "D"
+    
+"Aji" "" "" "D"
+"AjI" "" "" "D"
+"Aje" "" "" "D"
+"AjE" "" "" "D"
+    
+"oji" "" "" "D"
+"ojI" "" "" "D"
+"oje" "" "" "D"
+"ojE" "" "" "D"
+    
+"Oji" "" "" "D"
+"OjI" "" "" "D"
+"Oje" "" "" "D"
+"OjE" "" "" "D"
+    
+"eji" "" "" "D"
+"ejI" "" "" "D"
+"eje" "" "" "D"
+"ejE" "" "" "D"
+    
+"Eji" "" "" "D"
+"EjI" "" "" "D"
+"Eje" "" "" "D"
+"EjE" "" "" "D"
+    
+"uji" "" "" "D"
+"ujI" "" "" "D"
+"uje" "" "" "D"
+"ujE" "" "" "D"
+    
+"Uji" "" "" "D"
+"UjI" "" "" "D"
+"Uje" "" "" "D"
+"UjE" "" "" "D"
+        
+"iji" "" "" "D"
+"ijI" "" "" "D"
+"ije" "" "" "D"
+"ijE" "" "" "D"
+    
+"Iji" "" "" "D"
+"IjI" "" "" "D"
+"Ije" "" "" "D"
+"IjE" "" "" "D"
+                         
+"aja" "" "" "D"
+"ajA" "" "" "D"
+"ajo" "" "" "D"
+"ajO" "" "" "D"
+"aju" "" "" "D"
+"ajU" "" "" "D"
+    
+"Aja" "" "" "D"
+"AjA" "" "" "D"
+"Ajo" "" "" "D"
+"AjO" "" "" "D"
+"Aju" "" "" "D"
+"AjU" "" "" "D"
+    
+"oja" "" "" "D"
+"ojA" "" "" "D"
+"ojo" "" "" "D"
+"ojO" "" "" "D"
+"Aju" "" "" "D"
+"AjU" "" "" "D"
+    
+"Oja" "" "" "D"
+"OjA" "" "" "D"
+"Ojo" "" "" "D"
+"OjO" "" "" "D"
+"Aju" "" "" "D"
+"AjU" "" "" "D"
+    
+"eja" "" "" "D"
+"ejA" "" "" "D"
+"ejo" "" "" "D"
+"ejO" "" "" "D"
+"Aju" "" "" "D"
+"AjU" "" "" "D"
+    
+"Eja" "" "" "D"
+"EjA" "" "" "D"
+"Ejo" "" "" "D"
+"EjO" "" "" "D"
+"Aju" "" "" "D"
+"AjU" "" "" "D"
+    
+"uja" "" "" "D"
+"ujA" "" "" "D"
+"ujo" "" "" "D"
+"ujO" "" "" "D"
+"Aju" "" "" "D"
+"AjU" "" "" "D"
+        
+"Uja" "" "" "D"
+"UjA" "" "" "D"
+"Ujo" "" "" "D"
+"UjO" "" "" "D"
+"Aju" "" "" "D"
+"AjU" "" "" "D"
+        
+"ija" "" "" "D"
+"ijA" "" "" "D"
+"ijo" "" "" "D"
+"ijO" "" "" "D"
+"Aju" "" "" "D"
+"AjU" "" "" "D"
+    
+"Ija" "" "" "D"
+"IjA" "" "" "D"
+"Ijo" "" "" "D"
+"IjO" "" "" "D"                         
+"Aju" "" "" "D"
+"AjU" "" "" "D"
+                         
+"j" "" "" "i"                         
+                         
+// lander = lender = länder 
+"lYndEr" "" "$" "lYnder" 
+"lander" "" "$" "lYnder" 
+"lAndEr" "" "$" "lYnder" 
+"lAnder" "" "$" "lYnder" 
+"landEr" "" "$" "lYnder" 
+"lender" "" "$" "lYnder" 
+"lEndEr" "" "$" "lYnder" 
+"lendEr" "" "$" "lYnder" 
+"lEnder" "" "$" "lYnder" 
+             
+// CONSONANTS {z & Z; s & S} are approximately interchangeable
+"s" "" "[rmnl]" "z"
+"S" "" "[rmnl]" "z"
+"s" "[rmnl]" "" "z"
+"S" "[rmnl]" "" "z" 
+    
+"dS" "" "$" "S"
+"dZ" "" "$" "S"
+"Z" "" "$" "S"
+"S" "" "$" "(S|s)"
+"z" "" "$" "(S|s)"
+    
+"S" "" "" "s"
+"dZ" "" "" "z"
+"Z" "" "" "z"

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_cyrillic.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_cyrillic.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_cyrillic.txt
new file mode 100644
index 0000000..d470aa8
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_cyrillic.txt
@@ -0,0 +1,18 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include gen_approx_russian
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_czech.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_czech.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_czech.txt
new file mode 100644
index 0000000..b542861
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_czech.txt
@@ -0,0 +1,18 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include gen_approx_french
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_dutch.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_dutch.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_dutch.txt
new file mode 100644
index 0000000..b542861
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_dutch.txt
@@ -0,0 +1,18 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include gen_approx_french
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_english.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_english.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_english.txt
new file mode 100644
index 0000000..84d8174
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_english.txt
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// VOWELS
+"I" "" "[^aEIeiou]e" "(Q|i|D)" // like in "five"
+"I" "" "$" "i"
+"I" "[aEIeiou]" "" "i"
+"I" "" "[^k]$" "i"
+"Ik" "[lr]" "$" "(ik|Qk)"
+"Ik" "" "$" "ik"
+"sIts" "" "$" "(sits|sQts)"
+"Its" "" "$" "its"
+"I" "" "" "(i|Q)" 
+    
+"lE" "[bdfgkmnprsStvzZ]" "" "(il|li|lY)"  // Applebaum < Appelbaum
+         
+"au" "" "" "(D|a|u)"
+"ou" "" "" "(D|o|u)"
+"ai" "" "" "(D|a|i)"
+"oi" "" "" "(D|o|i)"
+"ui" "" "" "(D|u|i)"
+        
+"E" "D[^aeiEIou]" "" "(i|)" // Weinberg, Shaneberg (shaneberg/shejneberg) --> shejnberg
+"e" "D[^aeiEIou]" "" "(i|)" 
+
+"e" "" "" "i"
+"E" "" "[fklmnprsStv]$" "i"
+"E" "" "ts$" "i"
+"E" "[DaoiEuQY]" "" "i"
+"E" "" "[aoQY]" "i"
+"E" "" "" "(Y|i)"
+      
+"a" "" "" "(a|o)"

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_french.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_french.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_french.txt
new file mode 100644
index 0000000..93a4980
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_french.txt
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+"au" "" "" "(D|a|u)"
+"ou" "" "" "(D|o|u)"
+"ai" "" "" "(D|a|i)"
+"oi" "" "" "(D|o|i)"
+"ui" "" "" "(D|u|i)"
+
+"a" "" "" "(a|o)"
+"e" "" "" "i"

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_german.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_german.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_german.txt
new file mode 100644
index 0000000..14a5db7
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_german.txt
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+"I" "" "$" "i"
+"I" "[aeiAEIOUouQY]" "" "i"
+"I" "" "[^k]$" "i"
+"Ik" "[lr]" "$" "(ik|Qk)"
+"Ik" "" "$" "ik"
+"sIts" "" "$" "(sits|sQts)"
+"Its" "" "$" "its"
+"I" "" "" "(Q|i)" 
+    
+"AU" "" "" "(D|a|u)"
+"aU" "" "" "(D|a|u)"
+"Au" "" "" "(D|a|u)"
+"au" "" "" "(D|a|u)"
+"ou" "" "" "(D|o|u)"
+"OU" "" "" "(D|o|u)"
+"oU" "" "" "(D|o|u)"
+"Ou" "" "" "(D|o|u)"
+"ai" "" "" "(D|a|i)"
+"Ai" "" "" "(D|a|i)"
+"oi" "" "" "(D|o|i)"
+"Oi" "" "" "(D|o|i)"
+"ui" "" "" "(D|u|i)"
+"Ui" "" "" "(D|u|i)"
+        
+"e" "" "" "i" 
+  
+"E" "" "[fklmnprst]$" "i"
+"E" "" "ts$" "i"
+"E" "" "$" "i"
+"E" "[DaoAOUiuQY]" "" "i"
+"E" "" "[aoAOQY]" "i"
+"E" "" "" "(Y|i)" 
+       
+"O" "" "$" "o"
+"O" "" "[fklmnprst]$" "o"
+"O" "" "ts$" "o"
+"O" "[aoAOUeiuQY]" "" "o"
+"O" "" "" "(o|Y)"
+    
+"a" "" "" "(a|o)" 
+  
+"A" "" "$" "(a|o)" 
+"A" "" "[fklmnprst]$" "(a|o)"
+"A" "" "ts$" "(a|o)"
+"A" "[aoeOUiuQY]" "" "(a|o)"
+"A" "" "" "(a|o|Y)" 
+
+"U" "" "$" "u"
+"U" "[DaoiuUQY]" "" "u"
+"U" "" "[^k]$" "u"
+"Uk" "[lr]" "$" "(uk|Qk)"
+"Uk" "" "$" "uk"
+"sUts" "" "$" "(suts|sQts)"
+"Uts" "" "$" "uts"
+"U" "" "" "(u|Q)" 

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_greek.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_greek.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_greek.txt
new file mode 100644
index 0000000..b542861
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_greek.txt
@@ -0,0 +1,18 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include gen_approx_french
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_greeklatin.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_greeklatin.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_greeklatin.txt
new file mode 100644
index 0000000..e492b97
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_greeklatin.txt
@@ -0,0 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include gen_approx_french
+
+"N" "" "" ""

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_hebrew.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_hebrew.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_hebrew.txt
new file mode 100644
index 0000000..0990004
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_hebrew.txt
@@ -0,0 +1,18 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// empty
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_hungarian.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_hungarian.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_hungarian.txt
new file mode 100644
index 0000000..46ebf29
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_hungarian.txt
@@ -0,0 +1,18 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include gen_approx_french

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_italian.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_italian.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_italian.txt
new file mode 100644
index 0000000..46ebf29
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_italian.txt
@@ -0,0 +1,18 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include gen_approx_french

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_polish.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_polish.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_polish.txt
new file mode 100644
index 0000000..ce577af
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_polish.txt
@@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+"aiB" "" "[bp]" "(D|Dm)"
+"oiB" "" "[bp]" "(D|Dm)" 
+"uiB" "" "[bp]" "(D|Dm)" 
+"eiB" "" "[bp]" "(D|Dm)"
+"EiB" "" "[bp]" "(D|Dm)"
+"iiB" "" "[bp]" "(D|Dm)"
+"IiB" "" "[bp]" "(D|Dm)"
+
+"aiB" "" "[dgkstvz]" "(D|Dn)"
+"oiB" "" "[dgkstvz]" "(D|Dn)" 
+"uiB" "" "[dgkstvz]" "(D|Dn)" 
+"eiB" "" "[dgkstvz]" "(D|Dn)"
+"EiB" "" "[dgkstvz]" "(D|Dn)"
+"iiB" "" "[dgkstvz]" "(D|Dn)"
+"IiB" "" "[dgkstvz]" "(D|Dn)"
+
+"B" "" "[bp]" "(o|om|im)" 
+"B" "" "[dgkstvz]" "(o|on|in)" 
+"B" "" "" "o"
+
+"aiF" "" "[bp]" "(D|Dm)"
+"oiF" "" "[bp]" "(D|Dm)" 
+"uiF" "" "[bp]" "(D|Dm)" 
+"eiF" "" "[bp]" "(D|Dm)"
+"EiF" "" "[bp]" "(D|Dm)"
+"iiF" "" "[bp]" "(D|Dm)"
+"IiF" "" "[bp]" "(D|Dm)"
+
+"aiF" "" "[dgkstvz]" "(D|Dn)"
+"oiF" "" "[dgkstvz]" "(D|Dn)" 
+"uiF" "" "[dgkstvz]" "(D|Dn)" 
+"eiF" "" "[dgkstvz]" "(D|Dn)"
+"EiF" "" "[dgkstvz]" "(D|Dn)"
+"iiF" "" "[dgkstvz]" "(D|Dn)"
+"IiF" "" "[dgkstvz]" "(D|Dn)"
+
+"F" "" "[bp]" "(i|im|om)"
+"F" "" "[dgkstvz]" "(i|in|on)"
+"F" "" "" "i" 
+
+"P" "" "" "(o|u)" 
+
+"I" "" "$" "i"
+"I" "" "[^k]$" "i"
+"Ik" "[lr]" "$" "(ik|Qk)"
+"Ik" "" "$" "ik"
+"sIts" "" "$" "(sits|sQts)"
+"Its" "" "$" "its"
+"I" "[aeiAEBFIou]" "" "i"
+"I" "" "" "(i|Q)" 
+
+"au" "" "" "(D|a|u)"
+"ou" "" "" "(D|o|u)"
+"ai" "" "" "(D|a|i)"
+"oi" "" "" "(D|o|i)"
+"ui" "" "" "(D|u|i)"
+
+"a" "" "" "(a|o)" 
+"e" "" "" "i" 
+
+"E" "" "[fklmnprst]$" "i"
+"E" "" "ts$" "i"
+"E" "" "$" "i"
+"E" "[DaoiuQ]" "" "i"
+"E" "" "[aoQ]" "i"
+"E" "" "" "(Y|i)" 

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_portuguese.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_portuguese.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_portuguese.txt
new file mode 100644
index 0000000..b542861
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_portuguese.txt
@@ -0,0 +1,18 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include gen_approx_french
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_romanian.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_romanian.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_romanian.txt
new file mode 100644
index 0000000..f5c5894
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_romanian.txt
@@ -0,0 +1,18 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include gen_approx_polish
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_russian.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_russian.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_russian.txt
new file mode 100644
index 0000000..9138487
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_russian.txt
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// VOWELS
+"I" "" "$" "i"
+"I" "" "[^k]$" "i"
+"Ik" "[lr]" "$" "(ik|Qk)"
+"Ik" "" "$" "ik"
+"sIts" "" "$" "(sits|sQts)"
+"Its" "" "$" "its"
+"I" "[aeiEIou]" "" "i"
+"I" "" "" "(i|Q)" 
+        
+"au" "" "" "(D|a|u)"
+"ou" "" "" "(D|o|u)"
+"ai" "" "" "(D|a|i)"
+"oi" "" "" "(D|o|i)"
+"ui" "" "" "(D|u|i)"
+
+"om" "" "[bp]" "(om|im)" 
+"on" "" "[dgkstvz]" "(on|in)" 
+"em" "" "[bp]" "(im|om)" 
+"en" "" "[dgkstvz]" "(in|on)" 
+"Em" "" "[bp]" "(im|Ym|om)" 
+"En" "" "[dgkstvz]" "(in|Yn|on)" 
+                    
+"a" "" "" "(a|o)" 
+"e" "" "" "i" 
+    
+"E" "" "[fklmnprsStv]$" "i"
+"E" "" "ts$" "i"
+"E" "[DaoiuQ]" "" "i"
+"E" "" "[aoQ]" "i"
+"E" "" "" "(Y|i)" 

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_spanish.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_spanish.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_spanish.txt
new file mode 100644
index 0000000..fb3e661
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_spanish.txt
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include gen_approx_french
+
+"B" "" "" "(b|v)"
+"V" "" "" "(b|v)"

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_turkish.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_turkish.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_turkish.txt
new file mode 100644
index 0000000..b542861
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_turkish.txt
@@ -0,0 +1,18 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include gen_approx_french
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_any.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_any.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_any.txt
new file mode 100644
index 0000000..28fafb9
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_any.txt
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// GENERAL
+   // A, E, I, O, P, U should create variants, 
+   // EE = final "e" (english & french)
+   // V, B from Spanish
+   // but a, e, i, o, u should not create any new variant
+"EE" "" "$" "e"
+    
+"A" "" "" "a"
+"E" "" "" "e"
+"I" "" "" "i"
+"O" "" "" "o"
+"P" "" "" "o"
+"U" "" "" "u"
+    
+"B" "" "" "b"
+"V" "" "" "v"

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_approx_common.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_approx_common.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_approx_common.txt
new file mode 100644
index 0000000..1093912
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_approx_common.txt
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// GENERAL
+"h" "" "$" ""
+
+// VOICED - UNVOICED CONSONANTS
+"b" "" "[fktSs]" "p"
+"b" "" "p" ""
+"b" "" "$" "p"
+"p" "" "[vgdZz]" "b" // Ashk: "v" excluded (everythere)
+"p" "" "b" ""
+    
+"v" "" "[pktSs]" "f"
+"v" "" "f" ""
+"v" "" "$" "f"
+"f" "" "[vbgdZz]" "v"
+"f" "" "v" ""
+    
+"g" "" "[pftSs]" "k"
+"g" "" "k" ""
+"g" "" "$" "k"
+"k" "" "[vbdZz]" "g"
+"k" "" "g" ""
+    
+"d" "" "[pfkSs]" "t"
+"d" "" "t" ""
+"d" "" "$" "t"
+"t" "" "[vbgZz]" "d"
+"t" "" "d" ""
+    
+"s" "" "dZ" ""
+"s" "" "tS" ""
+    
+"z" "" "[pfkSt]" "s"
+"z" "" "[sSzZ]" ""
+"s" "" "[sSzZ]" ""
+"Z" "" "[sSzZ]" ""
+"S" "" "[sSzZ]" ""
+       
+// SIMPLIFICATION OF CONSONANT CLUSTERS
+"jnm" "" "" "jm"
+
+// DOUBLE --> SINGLE
+"ji" "^" "" "i"
+"jI" "^" "" "I"
+        
+"a" "" "[aA]" "" 
+"a" "A" "" "" 
+"A" "" "A" ""
+       
+"b" "" "b" ""
+"d" "" "d" ""
+"f" "" "f" ""
+"g" "" "g" ""
+"j" "" "j" ""
+"k" "" "k" ""
+"l" "" "l" ""
+"m" "" "m" ""
+"n" "" "n" ""
+"p" "" "p" ""
+"r" "" "r" ""
+"t" "" "t" ""
+"v" "" "v" ""
+"z" "" "z" ""

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_arabic.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_arabic.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_arabic.txt
new file mode 100644
index 0000000..4f2ead1
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_arabic.txt
@@ -0,0 +1,18 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+"l" "" "" ""
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_common.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_common.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_common.txt
new file mode 100644
index 0000000..742fc71
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_common.txt
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include gen_exact_approx_common
+
+"H" "" "" ""
+ 
+// VOICED - UNVOICED CONSONANTS
+"s" "[^t]" "[bgZd]" "z"
+"Z" "" "[pfkst]" "S"
+"Z" "" "$" "S"
+"S" "" "[bgzd]" "Z"
+"z" "" "$" "s"
+    
+"ji" "[aAoOeEiIuU]" "" "j"
+"jI" "[aAoOeEiIuU]" "" "j"
+"je" "[aAoOeEiIuU]" "" "j"
+"jE" "[aAoOeEiIuU]" "" "j"

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_cyrillic.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_cyrillic.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_cyrillic.txt
new file mode 100644
index 0000000..474f61b
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_cyrillic.txt
@@ -0,0 +1,18 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include gen_exact_russian
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_czech.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_czech.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_czech.txt
new file mode 100644
index 0000000..474f61b
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_czech.txt
@@ -0,0 +1,18 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include gen_exact_russian
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_dutch.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_dutch.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_dutch.txt
new file mode 100644
index 0000000..0990004
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_dutch.txt
@@ -0,0 +1,18 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// empty
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_english.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_english.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_english.txt
new file mode 100644
index 0000000..474f61b
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_english.txt
@@ -0,0 +1,18 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include gen_exact_russian
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_french.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_french.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_french.txt
new file mode 100644
index 0000000..0990004
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_french.txt
@@ -0,0 +1,18 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// empty
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_german.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_german.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_german.txt
new file mode 100644
index 0000000..7a648f2
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_german.txt
@@ -0,0 +1,18 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include gen_exact_any
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_greek.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_greek.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_greek.txt
new file mode 100644
index 0000000..0990004
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_greek.txt
@@ -0,0 +1,18 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// empty
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_greeklatin.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_greeklatin.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_greeklatin.txt
new file mode 100644
index 0000000..325ff34
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_greeklatin.txt
@@ -0,0 +1,18 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+"N" "" "" "n"
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_hebrew.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_hebrew.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_hebrew.txt
new file mode 100644
index 0000000..0990004
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_hebrew.txt
@@ -0,0 +1,18 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// empty
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_hungarian.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_hungarian.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_hungarian.txt
new file mode 100644
index 0000000..0990004
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_hungarian.txt
@@ -0,0 +1,18 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// empty
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_italian.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_italian.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_italian.txt
new file mode 100644
index 0000000..0990004
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_italian.txt
@@ -0,0 +1,18 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// empty
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_polish.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_polish.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_polish.txt
new file mode 100644
index 0000000..babed2a
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_polish.txt
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+"B" "" "" "a"
+"F" "" "" "e"
+"P" "" "" "o"
+
+"E" "" "" "e"
+"I" "" "" "i"
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_portuguese.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_portuguese.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_portuguese.txt
new file mode 100644
index 0000000..0990004
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_portuguese.txt
@@ -0,0 +1,18 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// empty
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_romanian.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_romanian.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_romanian.txt
new file mode 100644
index 0000000..0990004
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_romanian.txt
@@ -0,0 +1,18 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// empty
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_russian.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_russian.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_russian.txt
new file mode 100644
index 0000000..0a016e0
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_russian.txt
@@ -0,0 +1,19 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+"E" "" "" "e"
+"I" "" "" "i"
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_spanish.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_spanish.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_spanish.txt
new file mode 100644
index 0000000..e555114
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_spanish.txt
@@ -0,0 +1,19 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+"B" "" "" "b"
+"V" "" "" "v"

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_turkish.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_turkish.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_turkish.txt
new file mode 100644
index 0000000..0990004
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_exact_turkish.txt
@@ -0,0 +1,18 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// empty
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_hebrew_common.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_hebrew_common.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_hebrew_common.txt
new file mode 100644
index 0000000..2ae2d9d
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_hebrew_common.txt
@@ -0,0 +1,107 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include gen_exact_approx_common
+
+"ts" "" "" "C" // for not confusion Gutes [=guts] and Guts [=guc]
+"tS" "" "" "C" // same reason
+"S" "" "" "s"
+"p" "" "" "f"   
+"b" "^" "" "b"    
+"b" "" "" "(b|v)"    
+        
+"ja" "" "" "i"
+"jA" "" "" "i"  
+"je" "" "" "i"
+"jE" "" "" "i"
+"aj" "" "" "i"
+"Aj" "" "" "i"
+"I" "" "" "i"
+"j" "" "" "i"
+    
+"a" "^" "" "1"
+"A" "^" "" "1"
+"e" "^" "" "1"
+"E" "^" "" "1"
+"Y" "^" "" "1"
+    
+"a" "" "$" "1"
+"A" "" "$" "1"
+"e" "" "$" "1"
+"E" "" "$" "1"
+"Y" "" "$" "1"
+    
+"a" "" "" ""
+"A" "" "" ""
+"e" "" "" ""
+"E" "" "" ""
+"Y" "" "" ""
+    
+"oj" "^" "" "(u|vi)"
+"Oj" "^" "" "(u|vi)"
+"uj" "^" "" "(u|vi)"
+"Uj" "^" "" "(u|vi)" 
+    
+"oj" "" "" "u"
+"Oj" "" "" "u"
+"uj" "" "" "u"
+"Uj" "" "" "u" 
+    
+"ou" "^" "" "(u|v|1)"
+"o" "^" "" "(u|v|1)"
+"O" "^" "" "(u|v|1)"
+"U" "^" "" "(u|v|1)"
+"u" "^" "" "(u|v|1)"
+    
+"o" "" "$" "(u|1)"
+"O" "" "$" "(u|1)"
+"u" "" "$" "(u|1)"
+"U" "" "$" "(u|1)"
+    
+"ou" "" "" "u"
+"o" "" "" "u"
+"O" "" "" "u"
+"U" "" "" "u"
+        
+"VV" "" "" "u" // alef/ayin + vov from ruleshebrew
+"V" "" "" "v" // tsvey-vov from ruleshebrew;; only Ashkenazic
+"L" "^" "" "1" // alef/ayin from  ruleshebrew
+"L" "" "$" "1" // alef/ayin from  ruleshebrew
+"L" "" "" " " // alef/ayin from  ruleshebrew
+"WW" "^" "" "(vi|u)" // vav-yod from  ruleshebrew
+"WW" "" "" "u" // vav-yod from  ruleshebrew
+"W" "^" "" "(u|v)" // vav from  ruleshebrew
+"W" "" "" "u" // vav from  ruleshebrew
+    
+    //"g" "" "" "(g|Z)"
+    //"z" "" "" "(z|Z)"
+    //"d" "" "" "(d|dZ)"
+   
+"TB" "" "$" "(t|s)" // tav from ruleshebrew; only Ashkenazic
+"TB" "" "" "t" // tav from ruleshebrew; only Ashkenazic    
+"T" "" "" "t"   // tet from  ruleshebrew
+    
+   //"k" "" "" "(k|x)"
+   //"x" "" "" "(k|x)"
+"K" "" "" "k" // kof and initial kaf from ruleshebrew
+"X" "" "" "x" // khet and final kaf from ruleshebrew
+    
+"H" "^" "" "(x|1)"
+"H" "" "$" "(x|1)"
+"H" "" "" "(x|)"
+"h" "^" "" "1"
+"h" "" "" ""
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_languages.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_languages.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_languages.txt
new file mode 100644
index 0000000..50f1118
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_languages.txt
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+any
+arabic
+cyrillic
+czech
+dutch
+english
+french
+german
+greek
+greeklatin
+hebrew
+hungarian
+italian
+polish
+portuguese
+romanian
+russian
+spanish
+turkish

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_any.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_any.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_any.txt
new file mode 100644
index 0000000..57bb939
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_any.txt
@@ -0,0 +1,367 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+  // format of each entry rule in the table
+  //   (pattern, left context, right context, phonetic)
+  // where
+  //   pattern is a sequence of characters that might appear in the word to be transliterated
+  //   left context is the context that precedes the pattern
+  //   right context is the context that follows the pattern
+  //   phonetic is the result that this rule generates
+  //
+  // note that both left context and right context can be regular expressions
+  // ex: left context of ^ would mean start of word
+  //     left context of [aeiouy] means following a vowel
+  //     right context of [^aeiouy] means preceding a consonant
+  //     right context of e$ means preceding a final e
+
+//GENERIC
+
+// CONVERTING FEMININE TO MASCULINE
+"yna" "" "$" "(in[russian]|ina)" 
+"ina" "" "$" "(in[russian]|ina)" 
+"liova" "" "$" "(lova|lof[russian]|lef[russian])"
+"lova" "" "$" "(lova|lof[russian]|lef[russian]|l[czech]|el[czech])"   
+"kova" "" "$" "(kova|kof[russian]|k[czech]|ek[czech])"   
+"ova" "" "$" "(ova|of[russian]|[czech])"   
+"ová" "" "$" "(ova|[czech])"   
+"eva" "" "$" "(eva|ef[russian])"   
+"aia" "" "$" "(aja|i[russian])"
+"aja" "" "$" "(aja|i[russian])" 
+"aya" "" "$" "(aja|i[russian])" 
+    
+"lowa" "" "$" "(lova|lof[polish]|l[polish]|el[polish])"   
+"kowa" "" "$" "(kova|kof[polish]|k[polish]|ek[polish])"   
+"owa" "" "$" "(ova|of[polish]|)"   
+"lowna" "" "$" "(lovna|levna|l[polish]|el[polish])" 
+"kowna" "" "$" "(kovna|k[polish]|ek[polish])"  
+"owna" "" "$" "(ovna|[polish])"  
+"lówna" "" "$" "(l|el)"  // polish
+"kówna" "" "$" "(k|ek)"  // polish
+"ówna" "" "$" ""         // polish
+"á" "" "$" "(a|i[czech])" 
+"a" "" "$" "(a|i[polish+czech])" 
+    
+// CONSONANTS
+"pf" "" "" "(pf|p|f)" 
+"que" "" "$" "(k[french]|ke|kve)"
+"qu" "" "" "(kv|k)" 
+ 
+"m" "" "[bfpv]" "(m|n)" 
+"m" "[aeiouy]" "[aeiouy]" "m"  
+"m" "[aeiouy]" "" "(m|n[french+portuguese])"  // nasal
+ 
+"ly" "" "[au]" "l" 
+"li" "" "[au]" "l" 
+"lio" "" "" "(lo|le[russian])" 
+"lyo" "" "" "(lo|le[russian])" 
+  //array("ll" "" "" "(l|J[spanish])"  // Disabled Argentinian rule
+"lt" "u" "$" "(lt|[french])" 
+    
+"v" "^" "" "(v|f[german]|b[spanish])" 
+
+"ex" "" "[aáuiíoóeéêy]" "(ez[portuguese]|eS[portuguese]|eks|egz)" 
+"ex" "" "[cs]" "(e[portuguese]|ek)" 
+"x" "u" "$" "(ks|[french])" 
+   
+"ck" "" "" "(k|tsk[polish+czech])"
+"cz" "" "" "(tS|tsz[czech])" // Polish
+   
+    //Proceccing of "h" in various combinations         
+"rh" "^" "" "r"
+"dh" "^" "" "d"
+"bh" "^" "" "b"
+     
+"ph" "" "" "(ph|f)"
+"kh" "" "" "(x[russian+english]|kh)"  
+  
+"lh" "" "" "(lh|l[portuguese])" 
+"nh" "" "" "(nh|nj[portuguese])" 
+        
+"ssch" "" "" "S"      // german
+"chsch" "" "" "xS"    // german
+"tsch" "" "" "tS"     // german 
+    
+    ///"desch" "^" "" "deS" 
+    ///"desh" "^" "" "(dES|de[french])" 
+    ///"des" "^" "[^aeiouy]" "(dEs|de[french])" 
+    
+"sch" "[aeiouy]" "[ei]" "(S|StS[russian]|sk[romanian+italian])" 
+"sch" "[aeiouy]" "" "(S|StS[russian])" 
+"sch" "" "[ei]" "(sk[romanian+italian]|S|StS[russian])"
+"sch" "" "" "(S|StS[russian])"
+"ssh" "" "" "S" 
+    
+"sh" "" "[äöü]" "sh"      // german 
+"sh" "" "[aeiou]" "(S[russian+english]|sh)"
+"sh" "" "" "S" 
+ 
+"zh" "" "" "(Z[english+russian]|zh|tsh[german])" 
+    
+"chs" "" "" "(ks[german]|xs|tSs[russian+english])" 
+"ch" "" "[ei]" "(x|tS[spanish+english+russian]|k[romanian+italian]|S[portuguese+french])" 
+"ch" "" "" "(x|tS[spanish+english+russian]|S[portuguese+french])"  
+ 
+"th" "^" "" "t"     // english+german+greeklatin
+"th" "" "[äöüaeiou]" "(t[english+german+greeklatin]|th)"
+"th" "" "" "t"  // english+german+greeklatin
+   
+"gh" "" "[ei]" "(g[romanian+italian+greeklatin]|gh)" 
+          
+"ouh" "" "[aioe]" "(v[french]|uh)"
+"uh" "" "[aioe]" "(v|uh)"
+"h" "." "$" "" // match h at the end of words, but not as a single letter
+"h" "[aeiouyäöü]" "" ""  // german
+"h" "^" "" "(h|x[romanian+greeklatin]|H[english+romanian+polish+french+portuguese+italian+spanish])" 
+         
+    //Processing of "ci" "ce" & "cy"
+"cia" "" "" "(tSa[polish]|tsa)"  // Polish
+"cią" "" "[bp]" "(tSom|tsom)"     // Polish
+"cią" "" "" "(tSon[polish]|tson)" // Polish
+"cię" "" "[bp]" "(tSem[polish]|tsem)" // Polish
+"cię" "" "" "(tSen[polish]|tsen)" // Polish
+"cie" "" "" "(tSe[polish]|tse)"  // Polish
+"cio" "" "" "(tSo[polish]|tso)"  // Polish
+"ciu" "" "" "(tSu[polish]|tsu)" // Polish
+
+"sci" "" "$" "(Si[italian]|stsi[polish+czech]|dZi[turkish]|tSi[polish+romanian]|tS[romanian]|si)" 
+"sc" "" "[ei]" "(S[italian]|sts[polish+czech]|dZ[turkish]|tS[polish+romanian]|s)" 
+"ci" "" "$" "(tsi[polish+czech]|dZi[turkish]|tSi[polish+romanian]|tS[romanian]|si)" 
+"cy" "" "" "(si|tsi[polish])" 
+"c" "" "[ei]" "(ts[polish+czech]|dZ[turkish]|tS[polish+romanian]|k[greeklatin]|s)" 
+      
+    //Processing of "s"      
+"sç" "" "[aeiou]" "(s|stS[turkish])"
+"ssz" "" "" "S" // polish
+"sz" "^" "" "(S|s[hungarian])" // polish
+"sz" "" "$" "(S|s[hungarian])" // polish
+"sz" "" "" "(S|s[hungarian]|sts[german])" // polish
+"ssp" "" "" "(Sp[german]|sp)"
+"sp" "" "" "(Sp[german]|sp)"
+"sst" "" "" "(St[german]|st)"
+"st" "" "" "(St[german]|st)" 
+"ss" "" "" "s"
+"sj" "^" "" "S" // dutch
+"sj" "" "$" "S" // dutch
+"sj" "" "" "(sj|S[dutch]|sx[spanish]|sZ[romanian+turkish])" 
+  
+"sia" "" "" "(Sa[polish]|sa[polish]|sja)" 
+"sią" "" "[bp]" "(Som[polish]|som)" // polish
+"sią" "" "" "(Son[polish]|son)" // polish
+"się" "" "[bp]" "(Sem[polish]|sem)" // polish
+"się" "" "" "(Sen[polish]|sen)" // polish
+"sie" "" "" "(se|sje|Se[polish]|zi[german])" 
+    
+"sio" "" "" "(So[polish]|so)" 
+"siu" "" "" "(Su[polish]|sju)" 
+     
+"si" "[äöëaáuiíoóeéêy]" "" "(Si[polish]|si|zi[portuguese+french+italian+german])"
+"si" "" "" "(Si[polish]|si|zi[german])"
+"s" "[aáuiíoóeéêy]" "[aáuíoóeéêy]" "(s|z[portuguese+french+italian+german])" 
+"s" "" "[aeouäöë]" "(s|z[german])"
+"s" "[aeiouy]" "[dglmnrv]" "(s|z|Z[portuguese]|[french])" // Groslot
+"s" "" "[dglmnrv]" "(s|z|Z[portuguese])" 
+                 
+    //Processing of "g"   
+"gue" "" "$" "(k[french]|gve)"  // portuguese+spanish
+"gu" "" "[ei]" "(g[french]|gv[portuguese+spanish])" // portuguese+spanish
+"gu" "" "[ao]" "gv"     // portuguese+spanish
+"guy" "" "" "gi"  // french
+    
+"gli" "" "" "(glI|l[italian])" 
+"gni" "" "" "(gnI|ni[italian+french])"
+"gn" "" "[aeou]" "(n[italian+french]|nj[italian+french]|gn)"
+    
+"ggie" "" "" "(je[greeklatin]|dZe)" // dZ is Italian
+"ggi" "" "[aou]" "(j[greeklatin]|dZ)" // dZ is Italian
+        
+"ggi" "[yaeiou]" "[aou]" "(gI|dZ[italian]|j[greeklatin])"  
+"gge" "[yaeiou]" "" "(gE|xe[spanish]|gZe[portuguese+french]|dZe[english+romanian+italian+spanish]|je[greeklatin])" 
+"ggi" "[yaeiou]" "" "(gI|xi[spanish]|gZi[portuguese+french]|dZi[english+romanian+italian+spanish]|i[greeklatin])" 
+"ggi" "" "[aou]" "(gI|dZ[italian]|j[greeklatin])" 
+    
+"gie" "" "$" "(ge|gi[german]|ji[french]|dZe[italian])" 
+"gie" "" "" "(ge|gi[german]|dZe[italian]|je[greeklatin])" 
+"gi" "" "[aou]" "(i[greeklatin]|dZ)" // dZ is Italian
+        
+"ge" "[yaeiou]" "" "(gE|xe[spanish]|Ze[portuguese+french]|dZe[english+romanian+italian+spanish])" 
+"gi" "[yaeiou]" "" "(gI|xi[spanish]|Zi[portuguese+french]|dZi[english+romanian+italian+spanish])" 
+"ge" "" "" "(gE|xe[spanish]|hE[russian]|je[greeklatin]|Ze[portuguese+french]|dZe[english+romanian+italian+spanish])" 
+"gi" "" "" "(gI|xi[spanish]|hI[russian]|i[greeklatin]|Zi[portuguese+french]|dZi[english+romanian+italian+spanish])" 
+"gy" "" "[aeouáéóúüöőű]" "(gi|dj[hungarian])"
+"gy" "" "" "(gi|d[hungarian])" 
+"g" "[yaeiou]" "[aouyei]" "g" 
+"g" "" "[aouei]" "(g|h[russian])" 
+    
+    //Processing of "j"        
+"ij" "" "" "(i|ej[dutch]|ix[spanish]|iZ[french+romanian+turkish+portuguese])" 
+"j" "" "[aoeiuy]" "(j|dZ[english]|x[spanish]|Z[french+romanian+turkish+portuguese])" 
+         
+    //Processing of "z"    
+"rz" "t" "" "(S[polish]|r)" // polish
+"rz" "" "" "(rz|rts[german]|Z[polish]|r[polish]|rZ[polish])" 
+        
+"tz" "" "$" "(ts|tS[english+german])" 
+"tz" "^" "" "(ts[english+german+russian]|tS[english+german])" 
+"tz" "" "" "(ts[english+german+russian]|tz)" 
+    
+"zia" "" "[bcdgkpstwzż]" "(Za[polish]|za[polish]|zja)" 
+"zia" "" "" "(Za[polish]|zja)" 
+"zią" "" "[bp]" "(Zom[polish]|zom)"  // polish
+"zią" "" "" "(Zon[polish]|zon)" // polish
+"zię" "" "[bp]" "(Zem[polish]|zem)" // polish
+"zię" "" "" "(Zen[polish]|zen)" // polish
+"zie" "" "[bcdgkpstwzż]" "(Ze[polish]|ze[polish]|ze|tsi[german])" 
+"zie" "" "" "(ze|Ze[polish]|tsi[german])" 
+"zio" "" "" "(Zo[polish]|zo)" 
+"ziu" "" "" "(Zu[polish]|zju)" 
+"zi" "" "" "(Zi[polish]|zi|tsi[german]|dzi[italian]|tsi[italian]|si[spanish])" 
+
+"z" "" "$" "(s|ts[german]|ts[italian]|S[portuguese])" // ts It, s/S/Z Port, s in Sp, z Fr
+"z" "" "[bdgv]" "(z|dz[italian]|Z[portuguese])" // dz It, Z/z Port, z Sp & Fr
+"z" "" "[ptckf]" "(s|ts[italian]|S[portuguese])" // ts It, s/S/z Port, z/s Sp
+              
+ // VOWELS  
+"aue" "" "" "aue" 
+"oue" "" "" "(oue|ve[french])" 
+"eau" "" "" "o" // French
+        
+"ae" "" "" "(Y[german]|aje[russian]|ae)" 
+"ai" "" "" "aj" 
+"au" "" "" "(au|o[french])" 
+"ay" "" "" "aj" 
+"ão" "" "" "(au|an)" // Port
+"ãe" "" "" "(aj|an)" // Port
+"ãi" "" "" "(aj|an)" // Port
+"ea" "" "" "(ea|ja[romanian])"
+"ee" "" "" "(i[english]|aje[russian]|e)" 
+"ei" "" "" "(aj|ej)"
+"eu" "" "" "(eu|Yj[german]|ej[german]|oj[german]|Y[dutch])"
+"ey" "" "" "(aj|ej)"
+"ia" "" "" "ja" 
+"ie" "" "" "(i[german]|e[polish]|ije[russian]|Q[dutch]|je)" 
+"ii" "" "$" "i" // russian
+"io" "" "" "(jo|e[russian])"
+"iu" "" "" "ju" 
+"iy" "" "$" "i" // russian
+"oe" "" "" "(Y[german]|oje[russian]|u[dutch]|oe)" 
+"oi" "" "" "oj" 
+"oo" "" "" "(u[english]|o)" 
+"ou" "" "" "(ou|u[french+greeklatin]|au[dutch])" 
+"où" "" "" "u" // french
+"oy" "" "" "oj" 
+"õe" "" "" "(oj|on)" // Port
+"ua" "" "" "va"
+"ue" "" "" "(Q[german]|uje[russian]|ve)" 
+"ui" "" "" "(uj|vi|Y[dutch])" 
+"uu" "" "" "(u|Q[dutch])" 
+"uo" "" "" "(vo|o)"
+"uy" "" "" "uj" 
+"ya" "" "" "ja" 
+"ye" "" "" "(je|ije[russian])"
+"yi" "^" "" "i"
+"yi" "" "$" "i" // russian
+"yo" "" "" "(jo|e[russian])"
+"yu" "" "" "ju" 
+"yy" "" "$" "i" // russian
+    
+"i" "[áóéê]" "" "j"
+"y" "[áóéê]" "" "j"
+         
+"e" "^" "" "(e|je[russian])" 
+"e" "" "$" "(e|EE[english+french])" 
+            
+// LANGUAGE SPECIFIC CHARACTERS 
+"ą" "" "[bp]" "om" // polish
+"ą" "" "" "on"  // polish
+"ä" "" "" "Y" 
+"á" "" "" "a" // Port & Sp
+"à" "" "" "a" 
+"â" "" "" "a" 
+"ã" "" "" "(a|an)" // Port
+"ă" "" "" "(e[romanian]|a)" // romanian
+"č" "" "" "tS" // czech
+"ć" "" "" "(tS[polish]|ts)"  // polish
+"ç" "" "" "(s|tS[turkish])"
+"ď" "" "" "(d|dj[czech])"
+"ę" "" "[bp]" "em" // polish
+"ę" "" "" "en" // polish
+"é" "" "" "e" 
+"è" "" "" "e" 
+"ê" "" "" "e" 
+"ě" "" "" "(e|je[czech])" 
+"ğ" "" "" "" // turkish
+"í" "" "" "i" 
+"î" "" "" "i" 
+"ı" "" "" "(i|e[turkish]|[turkish])" 
+"ł" "" "" "l" 
+"ń" "" "" "(n|nj[polish])" // polish
+"ñ" "" "" "(n|nj[spanish])" 
+"ó" "" "" "(u[polish]|o)"  
+"ô" "" "" "o" // Port & Fr
+"õ" "" "" "(o|on[portuguese]|Y[hungarian])" 
+"ò" "" "" "o"  // Sp & It
+"ö" "" "" "Y"
+"ř" "" "" "(r|rZ[czech])"
+"ś" "" "" "(S[polish]|s)" 
+"ş" "" "" "S" // romanian+turkish
+"š" "" "" "S" // czech
+"ţ" "" "" "ts"  // romanian
+"ť" "" "" "(t|tj[czech])"
+"ű" "" "" "Q" // hungarian
+"ü" "" "" "(Q|u[portuguese+spanish])"
+"ú" "" "" "u" 
+"ů" "" "" "u" // czech
+"ù" "" "" "u" // french
+"ý" "" "" "i"  // czech
+"ż" "" "" "Z" // polish
+"ź" "" "" "(Z[polish]|z)" 
+   
+"ß" "" "" "s" // german
+"'" "" "" "" // russian
+"\"" "" "" "" // russian
+ 
+"o" "" "[bcćdgklłmnńrsśtwzźż]" "(O|P[polish])"    
+    
+ // LATIN ALPHABET
+"a" "" "" "A"
+"b" "" "" "B" 
+"c" "" "" "(k|ts[polish+czech]|dZ[turkish])" 
+"d" "" "" "d"
+"e" "" "" "E"
+"f" "" "" "f"
+   //array("g" "" "" "(g|x[dutch])" // Dutch sound disabled
+"g" "" "" "g"
+"h" "" "" "(h|x[romanian]|H[french+portuguese+italian+spanish])" 
+"i" "" "" "I"
+"j" "" "" "(j|x[spanish]|Z[french+romanian+turkish+portuguese])" 
+"k" "" "" "k"
+"l" "" "" "l"
+"m" "" "" "m"
+"n" "" "" "n"
+"o" "" "" "O"
+"p" "" "" "p"
+"q" "" "" "k"
+"r" "" "" "r"
+"s" "" "" "(s|S[portuguese])" 
+"t" "" "" "t"
+"u" "" "" "U"
+"v" "" "" "V" 
+"w" "" "" "(v|w[english+dutch])"     
+"x" "" "" "(ks|gz|S[portuguese+spanish])"   // S/ks Port & Sp, gz Sp, It only ks
+"y" "" "" "i"
+"z" "" "" "(z|ts[german]|dz[italian]|ts[italian]|s[spanish])" // ts/dz It, z Port & Fr, z/s Sp

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_arabic.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_arabic.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_arabic.txt
new file mode 100644
index 0000000..00f85e8
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_arabic.txt
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+"ا" "" "" "a" // alif isol & init 
+                
+"ب" "" "" "b1" // ba' isol
+        
+"ت" "" "" "t1" // ta' isol
+        
+"ث" "" "" "t1" // tha' isol
+
+"ج" "" "" "(dZ1|Z1)" // jim isol
+        
+"ح" "" "" "(h1|1)" // h.a' isol
+    
+"خ" "" "" "x1" // kha' isol
+    
+"د" "" "" "d1" // dal isol & init
+           
+"ذ" "" "" "d1" // dhal isol & init
+        
+"ر" "" "" "r1" // dhal isol & init
+    
+"ز" "" "" "z1" // za' isol & init
+        
+"س" "" "" "s1" // sin isol
+    
+"ش" "" "" "S1" // shin isol
+    
+"ص" "" "" "s1" // s.ad isol
+    
+"ض" "" "" "d1" // d.ad isol
+        
+"ط" "" "" "t1" // t.a' isol
+        
+"ظ" "" "" "z1" // z.a' isol
+        
+"ع" "" "" "(h1|1)" // ayin isol 
+    
+"غ" "" "" "g1" // ghayin isol
+    
+"ف" "" "" "f1" // fa' isol
+    
+"ق" "" "" "k1" // qaf isol
+    
+"ك" "" "" "k1" // kaf isol
+    
+"ل" "" "" "l1" // lam isol
+    
+"م" "" "" "m1" // mim isol
+    
+"ن" "" "" "n1" // nun isol
+    
+"ه" "" "" "(h1|1)" // h isol
+        
+"و" "" "" "(u|v1)" // waw, isol + init
+               
+    
+"ي‎" "" "" "(i|j1)" // ya' isol

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_cyrillic.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_cyrillic.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_cyrillic.txt
new file mode 100644
index 0000000..6237de4
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_cyrillic.txt
@@ -0,0 +1,99 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// GENERAL
+"ця" "" "" "tsa"
+"цю" "" "" "tsu"
+"циа" "" "" "tsa"
+"цие" "" "" "tse"
+"цио" "" "" "tso"
+"циу" "" "" "tsu"
+"сие" "" "" "se"
+"сио" "" "" "so"
+"зие" "" "" "ze"
+"зио" "" "" "zo"
+"с" "" "с" ""
+
+"гауз" "" "$" "haus"
+"гаус" "" "$" "haus"
+"гольц" "" "$" "holts"
+"геймер" "" "$" "(hejmer|hajmer)"
+"гейм" "" "$" "(hejm|hajm)"
+"гоф" "" "$" "hof"
+"гер" "" "$" "ger"
+"ген" "" "$" "gen"
+"гин" "" "$" "gin"
+"г" "(й|ё|я|ю|ы|а|е|о|и|у)" "(а|е|о|и|у)" "g"
+"г" "" "(а|е|о|и|у)" "(g|h)"
+
+"ля" "" "" "la"
+"лю" "" "" "lu"
+"лё" "" "" "(le|lo)"
+"лио" "" "" "(le|lo)"
+"ле" "" "" "(lE|lo)"
+
+"ийе" "" "" "je"
+"ие" "" "" "je"
+"ыйе" "" "" "je"
+"ые" "" "" "je"
+"ий" "" "(а|о|у)" "j"
+"ый" "" "(а|о|у)" "j"
+"ий" "" "$" "i"
+"ый" "" "$" "i"
+
+"ей" "^" "" "(jej|ej)"
+"е" "(а|е|о|у)" "" "je"
+"е" "^" "" "je"
+"эй" "" "" "ej"
+"ей" "" "" "ej"
+
+"ауе" "" "" "aue"
+"ауэ" "" "" "aue"
+
+"а" "" "" "a"
+"б" "" "" "b"
+"в" "" "" "v"
+"г" "" "" "g"
+"д" "" "" "d"
+"е" "" "" "E"
+"ё" "" "" "(e|jo)"
+"ж" "" "" "Z"
+"з" "" "" "z"
+"и" "" "" "I"
+"й" "" "" "j"
+"к" "" "" "k"
+"л" "" "" "l"
+"м" "" "" "m"
+"н" "" "" "n"
+"о" "" "" "o"
+"п" "" "" "p"
+"р" "" "" "r"
+"с" "" "" "s"
+"т" "" "" "t"
+"у" "" "" "u"
+"ф" "" "" "f"
+"х" "" "" "x"
+"ц" "" "" "ts"
+"ч" "" "" "tS"
+"ш" "" "" "S"
+"щ" "" "" "StS"
+"ъ" "" "" ""
+"ы" "" "" "I"
+"ь" "" "" ""
+"э" "" "" "E"
+"ю" "" "" "ju"
+"я" "" "" "ja"

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_czech.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_czech.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_czech.txt
new file mode 100644
index 0000000..bc7a79c
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_czech.txt
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+"ch" "" "" "x"
+"qu" "" "" "(k|kv)"    
+"aue" "" "" "aue"
+"ei" "" "" "(ej|aj)"
+"i" "[aou]" "" "j"
+"i" "" "[aeou]" "j"
+
+"č" "" "" "tS"
+"š" "" "" "S"
+"ň" "" "" "n"
+"ť" "" "" "(t|tj)"
+"ď" "" "" "(d|dj)"
+"ř" "" "" "(r|rZ)"
+
+"á" "" "" "a"
+"é" "" "" "e"
+"í" "" "" "i"
+"ó" "" "" "o"
+"ú" "" "" "u"
+"ý" "" "" "i"
+"ě" "" "" "(e|je)"
+"ů" "" "" "u"
+
+// LATIN ALPHABET
+"a" "" "" "a"
+"b" "" "" "b"
+"c" "" "" "ts"
+"d" "" "" "d"
+"e" "" "" "E"
+"f" "" "" "f"
+"g" "" "" "g"
+"h" "" "" "(h|g)"
+"i" "" "" "I"
+"j" "" "" "j"
+"k" "" "" "k"
+"l" "" "" "l"
+"m" "" "" "m"
+"n" "" "" "n"
+"o" "" "" "o"
+"p" "" "" "p"
+"q" "" "" "(k|kv)"    
+"r" "" "" "r"
+"s" "" "" "s"
+"t" "" "" "t"
+"u" "" "" "u"
+"v" "" "" "v"
+"w" "" "" "v"    
+"x" "" "" "ks"    
+"y" "" "" "i"
+"z" "" "" "z" 


[13/15] lucenenet git commit: Added Lucene.Net.Analysis.Phonetic + tests. Rather than porting over the entire commons-codec library, only the language features were ported and added to this library.

Posted by ni...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/PhoneticEngine.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/PhoneticEngine.cs b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/PhoneticEngine.cs
new file mode 100644
index 0000000..3cf5c7a
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/PhoneticEngine.cs
@@ -0,0 +1,578 @@
+// commons-codec version compatibility level: 1.9
+using Lucene.Net.Support;
+using System;
+using System.Collections.Generic;
+using System.Globalization;
+using System.Linq;
+using System.Text;
+using System.Text.RegularExpressions;
+
+namespace Lucene.Net.Analysis.Phonetic.Language.Bm
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Converts words into potential phonetic representations.
+    /// </summary>
+    /// <remarks>
+    /// This is a two-stage process. Firstly, the word is converted into a phonetic representation that takes
+    /// into account the likely source language. Next, this phonetic representation is converted into a
+    /// pan-European 'average' representation, allowing comparison between different versions of essentially
+    /// the same word from different languages.
+    /// <para/>
+    /// This class is intentionally immutable and thread-safe.
+    /// If you wish to alter the settings for a PhoneticEngine, you
+    /// must make a new one with the updated settings.
+    /// <para/>
+    /// Ported from phoneticengine.php
+    /// <para/>
+    /// since 1.6
+    /// </remarks>
+    public class PhoneticEngine
+    {
+        internal Regex WHITESPACE = new Regex("\\s+", RegexOptions.Compiled);
+
+        /// <summary>
+        /// Utility for manipulating a set of phonemes as they are being built up. Not intended for use outside
+        /// this package, and probably not outside the <see cref="PhoneticEngine"/> class.
+        /// <para/>
+        /// since 1.6
+        /// </summary>
+        internal sealed class PhonemeBuilder
+        {
+            /// <summary>
+            /// An empty builder where all phonemes must come from some set of languages. This will contain a single
+            /// phoneme of zero characters. This can then be appended to. This should be the only way to create a new
+            /// phoneme from scratch.
+            /// </summary>
+            /// <param name="languages">The set of languages.</param>
+            /// <returns>A new, empty phoneme builder.</returns>
+            public static PhonemeBuilder Empty(LanguageSet languages)
+            {
+                return new PhonemeBuilder(new Phoneme("", languages));
+            }
+
+            private readonly IList<Phoneme> phonemes;
+
+            private PhonemeBuilder(Phoneme phoneme)
+            {
+                // LUCENENET NOTE: LinkedHashSet cares about insertion order - in .NET, we can just use List<T> for that
+                this.phonemes = new List<Phoneme>();
+                this.phonemes.Add(phoneme);
+            }
+
+            internal PhonemeBuilder(IList<Phoneme> phonemes)
+            {
+                this.phonemes = phonemes;
+            }
+
+            /// <summary>
+            /// Creates a new phoneme builder containing all phonemes in this one extended by <paramref name="str"/>.
+            /// </summary>
+            /// <param name="str">The characters to append to the phonemes.</param>
+            public void Append(ICharSequence str)
+            {
+                foreach (Phoneme ph in this.phonemes)
+                {
+                    ph.Append(str.ToString());
+                }
+            }
+
+            /// <summary>
+            /// Creates a new phoneme builder containing all phonemes in this one extended by <paramref name="str"/>.
+            /// </summary>
+            /// <param name="str">The characters to append to the phonemes.</param>
+            // LUCENENET specific
+            public void Append(string str)
+            {
+                foreach (Phoneme ph in this.phonemes)
+                {
+                    ph.Append(str);
+                }
+            }
+
+            /// <summary>
+            /// Creates a new phoneme builder containing all phonemes in this one extended by <paramref name="str"/>.
+            /// </summary>
+            /// <param name="str">The characters to append to the phonemes.</param>
+            // LUCENENET specific
+            public void Append(StringBuilder str)
+            {
+                foreach (Phoneme ph in this.phonemes)
+                {
+                    ph.Append(str.ToString());
+                }
+            }
+
+            /// <summary>
+            /// Applies the given phoneme expression to all phonemes in this phoneme builder.
+            /// <para/>
+            /// This will lengthen phonemes that have compatible language sets to the expression, and drop those that are
+            /// incompatible.
+            /// </summary>
+            /// <param name="phonemeExpr">The expression to apply.</param>
+            /// <param name="maxPhonemes">The maximum number of phonemes to build up.</param>
+            public void Apply(IPhonemeExpr phonemeExpr, int maxPhonemes)
+            {
+                // LUCENENET NOTE: LinkedHashSet cares about insertion order - in .NET, we can just use List<T> for that
+                IList<Phoneme> newPhonemes = new List<Phoneme>(maxPhonemes);
+
+                //EXPR_continue:
+                foreach (Phoneme left in this.phonemes)
+                {
+                    foreach (Phoneme right in phonemeExpr.Phonemes)
+                    {
+                        LanguageSet languages = left.Languages.RestrictTo(right.Languages);
+                        if (!languages.IsEmpty)
+                        {
+                            Phoneme join = new Phoneme(left, right, languages);
+                            if (newPhonemes.Count < maxPhonemes)
+                            {
+                                newPhonemes.Add(join);
+                                if (newPhonemes.Count >= maxPhonemes)
+                                {
+                                    goto EXPR_break;
+                                }
+                            }
+                        }
+                    }
+                }
+                EXPR_break: { }
+
+                this.phonemes.Clear();
+                // LUCENENET: We need to filter out any duplicates, since we converted from LinkedHashSet
+                // to List.
+                this.phonemes.AddRange(newPhonemes.Where(x => !phonemes.Any(y => y.Equals(x))));
+            }
+
+            /// <summary>
+            /// Gets underlying phoneme set. Please don't mutate.
+            /// </summary>
+            public IList<Phoneme> Phonemes
+            {
+                get { return this.phonemes; }
+            }
+
+            /// <summary>
+            /// Stringifies the phoneme set. This produces a single string of the strings of each phoneme,
+            /// joined with a pipe. This is explicitly provided in place of <see cref="object.ToString()"/> as it is a potentially
+            /// expensive operation, which should be avoided when debugging.
+            /// </summary>
+            /// <returns>The stringified phoneme set.</returns>
+            public string MakeString()
+            {
+                StringBuilder sb = new StringBuilder();
+
+                foreach (Phoneme ph in this.phonemes)
+                {
+                    if (sb.Length > 0)
+                    {
+                        sb.Append("|");
+                    }
+                    sb.Append(ph.GetPhonemeText());
+                }
+
+                return sb.ToString();
+            }
+        }
+
+        /// <summary>
+        /// A function closure capturing the application of a list of rules to an input sequence at a particular offset.
+        /// After invocation, the values <c>i</c> and <c>found</c> are updated. <c>i</c> points to the
+        /// index of the next char in <c>input</c> that must be processed next (the input up to that index having been
+        /// processed already), and <c>found</c> indicates if a matching rule was found or not. In the case where a
+        /// matching rule was found, <c>phonemeBuilder</c> is replaced with a new builder containing the phonemes
+        /// updated by the matching rule.
+        /// <para/>
+        /// Although this class is not thread-safe (it has mutable unprotected fields), it is not shared between threads
+        /// as it is constructed as needed by the calling methods.
+        /// <para/>
+        /// since 1.6
+        /// </summary>
+        private sealed class RulesApplication
+        {
+            private readonly IDictionary<string, IList<Rule>> finalRules;
+            private readonly string input;
+
+            private PhonemeBuilder phonemeBuilder;
+            private int i;
+            private readonly int maxPhonemes;
+            private bool found;
+
+            public RulesApplication(IDictionary<string, IList<Rule>> finalRules, string input,
+                                    PhonemeBuilder phonemeBuilder, int i, int maxPhonemes)
+            {
+                if (finalRules == null)
+                {
+                    throw new ArgumentNullException("The finalRules argument must not be null");
+                }
+                this.finalRules = finalRules;
+                this.phonemeBuilder = phonemeBuilder;
+                this.input = input;
+                this.i = i;
+                this.maxPhonemes = maxPhonemes;
+            }
+
+            public int I
+            {
+                get { return this.i; }
+            }
+
+            public PhonemeBuilder PhonemeBuilder
+            {
+                get { return this.phonemeBuilder; }
+            }
+
+            /// <summary>
+            /// Invokes the rules. Loops over the rules list, stopping at the first one that has a matching context
+            /// and pattern. Then applies this rule to the phoneme builder to produce updated phonemes. If there was no
+            /// match, <c>i</c> is advanced one and the character is silently dropped from the phonetic spelling.
+            /// </summary>
+            /// <returns><c>this</c></returns>
+            public RulesApplication Invoke()
+            {
+                this.found = false;
+                int patternLength = 1;
+                IList<Rule> rules;
+                if (this.finalRules.TryGetValue(input.Substring(i, patternLength), out rules) && rules != null)
+                {
+                    foreach (Rule rule in rules)
+                    {
+                        string pattern = rule.Pattern;
+                        patternLength = pattern.Length;
+                        if (rule.PatternAndContextMatches(this.input, this.i))
+                        {
+                            this.phonemeBuilder.Apply(rule.Phoneme, maxPhonemes);
+                            this.found = true;
+                            break;
+                        }
+                    }
+                }
+
+                if (!this.found)
+                {
+                    patternLength = 1;
+                }
+
+                this.i += patternLength;
+                return this;
+            }
+
+            public bool IsFound
+            {
+                get { return this.found; }
+            }
+        }
+
+        private static readonly IDictionary<NameType, ISet<string>> NAME_PREFIXES = new Dictionary<NameType, ISet<string>>();
+
+        static PhoneticEngine()
+        {
+            NAME_PREFIXES[NameType.ASHKENAZI] =
+                    Collections.UnmodifiableSet(
+                            new HashSet<string>() { "bar", "ben", "da", "de", "van", "von" });
+            NAME_PREFIXES[NameType.SEPHARDIC] =
+                    Collections.UnmodifiableSet(
+                            new HashSet<string>() { "al", "el", "da", "dal", "de", "del", "dela", "de la",
+                                                              "della", "des", "di", "do", "dos", "du", "van", "von" });
+            NAME_PREFIXES[NameType.GENERIC] =
+                    Collections.UnmodifiableSet(
+                            new HashSet<string>() { "da", "dal", "de", "del", "dela", "de la", "della",
+                                                          "des", "di", "do", "dos", "du", "van", "von" });
+        }
+
+        /// <summary>
+        /// Joins some strings with an internal separator.
+        /// </summary>
+        /// <param name="strings">Strings to join.</param>
+        /// <param name="sep">String to separate them with.</param>
+        /// <returns>A single string consisting of each element of <paramref name="strings"/> interleaved by <paramref name="sep"/>.</returns>
+        private static string Join(IEnumerable<string> strings, string sep)
+        {
+            StringBuilder sb = new StringBuilder();
+            using (IEnumerator<string> si = strings.GetEnumerator())
+            {
+                if (si.MoveNext())
+                {
+                    sb.Append(si.Current);
+                }
+                while (si.MoveNext())
+                {
+                    sb.Append(sep).Append(si.Current);
+                }
+            }
+
+            return sb.ToString();
+        }
+
+        private static readonly int DEFAULT_MAX_PHONEMES = 20;
+
+        private readonly Lang lang;
+
+        private readonly NameType nameType;
+
+        private readonly RuleType ruleType;
+
+        private readonly bool concat;
+
+        private readonly int maxPhonemes;
+
+        /// <summary>
+        /// Generates a new, fully-configured phonetic engine.
+        /// </summary>
+        /// <param name="nameType">The type of names it will use.</param>
+        /// <param name="ruleType">The type of rules it will apply.</param>
+        /// <param name="concat">If it will concatenate multiple encodings.</param>
+        public PhoneticEngine(NameType nameType, RuleType ruleType, bool concat)
+            : this(nameType, ruleType, concat, DEFAULT_MAX_PHONEMES)
+        {
+        }
+
+        /// <summary>
+        /// Generates a new, fully-configured phonetic engine.
+        /// <para/>
+        /// since 1.7
+        /// </summary>
+        /// <param name="nameType">The type of names it will use.</param>
+        /// <param name="ruleType">The type of rules it will apply.</param>
+        /// <param name="concat">If it will concatenate multiple encodings.</param>
+        /// <param name="maxPhonemes">The maximum number of phonemes that will be handled.</param>
+        public PhoneticEngine(NameType nameType, RuleType ruleType, bool concat,
+                              int maxPhonemes)
+        {
+            if (ruleType == RuleType.RULES)
+            {
+                throw new ArgumentException("ruleType must not be " + RuleType.RULES);
+            }
+            this.nameType = nameType;
+            this.ruleType = ruleType;
+            this.concat = concat;
+            this.lang = Lang.GetInstance(nameType);
+            this.maxPhonemes = maxPhonemes;
+        }
+
+        /// <summary>
+        /// Applies the final rules to convert from a language-specific phonetic representation to a
+        /// language-independent representation.
+        /// </summary>
+        /// <param name="phonemeBuilder">The current phonemes.</param>
+        /// <param name="finalRules">The final rules to apply.</param>
+        /// <returns>The resulting phonemes.</returns>
+        private PhonemeBuilder ApplyFinalRules(PhonemeBuilder phonemeBuilder,
+                                               IDictionary<string, IList<Rule>> finalRules)
+        {
+            if (finalRules == null)
+            {
+                throw new ArgumentNullException("finalRules can not be null");
+            }
+            if (finalRules.Count == 0)
+            {
+                return phonemeBuilder;
+            }
+
+            ISet<Phoneme> phonemes = new SortedSet<Phoneme>(Phoneme.COMPARER);
+
+            foreach (Phoneme phoneme in phonemeBuilder.Phonemes)
+            {
+                PhonemeBuilder subBuilder = PhonemeBuilder.Empty(phoneme.Languages);
+                string phonemeText = phoneme.GetPhonemeText();
+
+                for (int i = 0; i < phonemeText.Length;)
+                {
+                    RulesApplication rulesApplication =
+                            new RulesApplication(finalRules, phonemeText, subBuilder, i, maxPhonemes).Invoke();
+                    bool found = rulesApplication.IsFound;
+                    subBuilder = rulesApplication.PhonemeBuilder;
+
+                    if (!found)
+                    {
+                        // not found, appending as-is
+                        subBuilder.Append(phonemeText.Substring(i, 1));
+                    }
+
+                    i = rulesApplication.I;
+                }
+
+                phonemes.UnionWith(subBuilder.Phonemes);
+            }
+
+            return new PhonemeBuilder(phonemes.ToList());
+        }
+
+        /// <summary>
+        /// Encodes a string to its phonetic representation.
+        /// </summary>
+        /// <param name="input">The string to encode.</param>
+        /// <returns>The encoding of the input.</returns>
+        public virtual string Encode(string input)
+        {
+            LanguageSet languageSet = this.lang.GuessLanguages(input);
+            return Encode(input, languageSet);
+        }
+
+        /// <summary>
+        /// Encodes an input string into an output phonetic representation, given a set of possible origin languages.
+        /// </summary>
+        /// <param name="input">String to phoneticise; a string with dashes or spaces separating each word.</param>
+        /// <param name="languageSet"></param>
+        /// <returns>A phonetic representation of the input; a string containing '-'-separated phonetic representations of the input.</returns>
+        public virtual string Encode(string input, LanguageSet languageSet)
+        {
+            IDictionary<string, IList<Rule>> rules = Rule.GetInstanceMap(this.nameType, RuleType.RULES, languageSet);
+            // rules common across many (all) languages
+            IDictionary<string, IList<Rule>> finalRules1 = Rule.GetInstanceMap(this.nameType, this.ruleType, "common");
+            // rules that apply to a specific language that may be ambiguous or wrong if applied to other languages
+            IDictionary<string, IList<Rule>> finalRules2 = Rule.GetInstanceMap(this.nameType, this.ruleType, languageSet);
+
+            // tidy the input
+            // lower case is a locale-dependent operation
+            input = input.ToLowerInvariant().Replace('-', ' ').Trim();
+
+            if (this.nameType == NameType.GENERIC)
+            {
+                if (input.Length >= 2 && input.Substring(0, 2 - 0).Equals("d'"))
+                { // check for d'
+                    string remainder = input.Substring(2);
+                    string combined = "d" + remainder;
+                    return "(" + Encode(remainder) + ")-(" + Encode(combined) + ")";
+                }
+                foreach (string l in NAME_PREFIXES[this.nameType])
+                {
+                    // handle generic prefixes
+                    if (input.StartsWith(l + " ", StringComparison.Ordinal))
+                    {
+                        // check for any prefix in the words list
+                        string remainder = input.Substring(l.Length + 1); // input without the prefix
+                        string combined = l + remainder; // input with prefix without space
+                        return "(" + Encode(remainder) + ")-(" + Encode(combined) + ")";
+                    }
+                }
+            }
+
+            IList<string> words = WHITESPACE.Split(input).ToList();
+            IList<string> words2 = new List<string>();
+
+            // special-case handling of word prefixes based upon the name type
+            switch (this.nameType)
+            {
+                case NameType.SEPHARDIC:
+                    foreach (string aWord in words)
+                    {
+                        string[] parts = aWord.Split(new char[] { '\'' }, StringSplitOptions.RemoveEmptyEntries);
+                        string lastPart = parts[parts.Length - 1];
+                        words2.Add(lastPart);
+                    }
+                    words2.RemoveAll(NAME_PREFIXES[this.nameType]);
+                    break;
+                case NameType.ASHKENAZI:
+                    words2.AddRange(words);
+                    words2.RemoveAll(NAME_PREFIXES[this.nameType]);
+                    break;
+                case NameType.GENERIC:
+                    words2.AddRange(words);
+                    break;
+                default:
+                    throw new InvalidOperationException("Unreachable case: " + this.nameType);
+            }
+
+            if (this.concat)
+            {
+                // concat mode enabled
+                input = Join(words2, " ");
+            }
+            else if (words2.Count == 1)
+            {
+                // not a multi-word name
+                //input = words.iterator().next();
+                input = words.FirstOrDefault();
+            }
+            else
+            {
+                // encode each word in a multi-word name separately (normally used for approx matches)
+                StringBuilder result = new StringBuilder();
+                foreach (string word in words2)
+                {
+                    result.Append("-").Append(Encode(word));
+                }
+                // return the result without the leading "-"
+                return result.ToString(1, result.Length - 1);
+            }
+
+            PhonemeBuilder phonemeBuilder = PhonemeBuilder.Empty(languageSet);
+
+            // loop over each char in the input - we will handle the increment manually
+            for (int i = 0; i < input.Length;)
+            {
+                RulesApplication rulesApplication =
+                        new RulesApplication(rules, input, phonemeBuilder, i, maxPhonemes).Invoke();
+                i = rulesApplication.I;
+                phonemeBuilder = rulesApplication.PhonemeBuilder;
+            }
+
+            // Apply the general rules
+            phonemeBuilder = ApplyFinalRules(phonemeBuilder, finalRules1);
+            // Apply the language-specific rules
+            phonemeBuilder = ApplyFinalRules(phonemeBuilder, finalRules2);
+
+            return phonemeBuilder.MakeString();
+        }
+
+        /// <summary>
+        /// Gets the Lang language guessing rules being used.
+        /// </summary>
+        public virtual Lang Lang
+        {
+            get { return this.lang; }
+        }
+
+        /// <summary>
+        /// Gets the <see cref="Bm.NameType"/> being used.
+        /// </summary>
+        public virtual NameType NameType
+        {
+            get { return this.nameType; }
+        }
+
+        /// <summary>
+        /// Gets the <see cref="Bm.RuleType"/> being used.
+        /// </summary>
+        public virtual RuleType RuleType
+        {
+            get { return this.ruleType; }
+        }
+
+        /// <summary>
+        /// Gets if multiple phonetic encodings are concatenated or if just the first one is kept.
+        /// Returns <c>true</c> if multiple phonetic encodings are returned, <c>false</c> if just the first is.
+        /// </summary>
+        public virtual bool IsConcat
+        {
+            get { return this.concat; }
+        }
+
+        /// <summary>
+        /// Gets the maximum number of phonemes the engine will calculate for a given input.
+        /// <para/>
+        /// since 1.7
+        /// </summary>
+        public virtual int MaxPhonemes
+        {
+            get { return this.maxPhonemes; }
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ResourceConstants.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ResourceConstants.cs b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ResourceConstants.cs
new file mode 100644
index 0000000..c70d404
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ResourceConstants.cs
@@ -0,0 +1,37 @@
+// commons-codec version compatibility level: 1.9
+using System.Text;
+
+namespace Lucene.Net.Analysis.Phonetic.Language.Bm
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Constants used to process resource files.
+    /// <para/>
+    /// This class is immutable and thread-safe.
+    /// <para/>
+    /// since 1.6
+    /// </summary>
+    internal class ResourceConstants
+    {
+        public static readonly string CMT = "//";
+        public static readonly Encoding ENCODING = Encoding.UTF8;
+        public static readonly string EXT_CMT_END = "*/";
+        public static readonly string EXT_CMT_START = "/*";
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/Rule.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/Rule.cs b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/Rule.cs
new file mode 100644
index 0000000..52f3d9a
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/Rule.cs
@@ -0,0 +1,1069 @@
+// commons-codec version compatibility level: 1.9
+using Lucene.Net.Support;
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Reflection;
+using System.Text;
+using System.Text.RegularExpressions;
+
+namespace Lucene.Net.Analysis.Phonetic.Language.Bm
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// A phoneme rule.
+    /// </summary>
+    /// <remarks>
+    /// Rules have a pattern, left context, right context, output phoneme, set of languages for which they apply
+    /// and a logical flag indicating if all languages must be in play. A rule matches if:
+    /// <list type="bullet">
+    ///     <item><description>the pattern matches at the current position</description></item>
+    ///     <item><description>the string up until the beginning of the pattern matches the left context</description></item>
+    ///     <item><description>the string from the end of the pattern matches the right context</description></item>
+    ///     <item><description>logical is ALL and all languages are in scope; or</description></item>
+    ///     <item><description>logical is any other value and at least one language is in scope</description></item>
+    /// </list>
+    /// <para/>
+    /// Rules are typically generated by parsing rules resources. In normal use, there will be no need for the user
+    /// to explicitly construct their own.
+    /// <para/>
+    /// Rules are immutable and thread-safe.
+    /// <para/>
+    /// <b>Rules resources</b>
+    /// <para/>
+    /// Rules are typically loaded from resource files. These are UTF-8 encoded text files. They are systematically
+    /// named following the pattern:
+    /// <c>Lucene.Net.Analysis.Phonetic.Language.Bm.<see cref="NameType"/>_<see cref="RuleType"/>_[language].txt</c>
+    /// <para/>
+    /// The format of these resources is the following:
+    /// <list type="table">
+    ///     <item>
+    ///         <term>Rules:</term>
+    ///         <description>
+    ///             whitespace separated, double-quoted strings. There should be 4 columns to each row, and these
+    ///             will be interpreted as:
+    ///             <list type="number">
+    ///                 <item><description>pattern</description></item>
+    ///                 <item><description>left context</description></item>
+    ///                 <item><description>right context</description></item>
+    ///                 <item><description>phoneme</description></item>
+    ///             </list>
+    ///         </description>
+    ///     </item>
+    ///     <item>
+    ///         <term>End-of-line comments:</term>
+    ///         <description>Any occurrence of '//' will cause all text following on that line to be discarded as a comment.</description>
+    ///     </item>
+    ///     <item>
+    ///         <term>Multi-line comments:</term>
+    ///         <description>Any line starting with '/*' will start multi-line commenting mode. This will skip all content until a line ending in '*' and '/' is found.</description>
+    ///     </item>
+    ///     <item>
+    ///         <term>Blank lines:</term>
+    ///         <description>All blank lines will be skipped.</description>
+    ///     </item>
+    /// </list>
+    /// <para/>
+    /// since 1.6
+    /// </remarks>
+    public class Rule
+    {
+        private static Regex PIPE = new Regex("[|]", RegexOptions.Compiled);
+        private static Regex WHITESPACE = new Regex("\\s+", RegexOptions.Compiled);
+        private static Regex PLUS = new Regex("[+]", RegexOptions.Compiled);
+
+        private class AllStringsRMatcher : IRPattern
+        {
+            public bool IsMatch(StringBuilder input)
+            {
+                return true;
+            }
+
+            public bool IsMatch(string input)
+            {
+                return true;
+            }
+
+            public bool IsMatch(ICharSequence input)
+            {
+                return true;
+            }
+        }
+
+        public static readonly IRPattern ALL_STRINGS_RMATCHER = new AllStringsRMatcher();
+
+
+        public static readonly string ALL = "ALL";
+
+        private static readonly string DOUBLE_QUOTE = "\"";
+
+        private static readonly string HASH_INCLUDE = "#include";
+
+        private static readonly IDictionary<NameType, IDictionary<RuleType, IDictionary<string, IDictionary<string, IList<Rule>>>>> RULES =
+                new Dictionary<NameType, IDictionary<RuleType, IDictionary<string, IDictionary<string, IList<Rule>>>>>();
+
+        static Rule()
+        {
+            foreach (NameType s in Enum.GetValues(typeof(NameType)))
+            {
+                IDictionary<RuleType, IDictionary<string, IDictionary<string, IList<Rule>>>> rts =
+                        new Dictionary<RuleType, IDictionary<string, IDictionary<string, IList<Rule>>>>();
+
+                foreach (RuleType rt in Enum.GetValues(typeof(RuleType)))
+                {
+                    IDictionary<string, IDictionary<string, IList<Rule>>> rs = new Dictionary<string, IDictionary<string, IList<Rule>>>();
+
+                    Languages ls = Languages.GetInstance(s);
+                    foreach (string l in ls.GetLanguages())
+                    {
+                        try
+                        {
+                            rs[l] = ParseRules(CreateScanner(s, rt, l), CreateResourceName(s, rt, l));
+                        }
+                        catch (InvalidOperationException e)
+                        {
+                            throw new InvalidOperationException("Problem processing " + CreateResourceName(s, rt, l), e);
+                        }
+                    }
+                    if (!rt.Equals(RuleType.RULES))
+                    {
+                        rs["common"] = ParseRules(CreateScanner(s, rt, "common"), CreateResourceName(s, rt, "common"));
+                    }
+
+                    rts[rt] = Collections.UnmodifiableMap(rs);
+                }
+
+                RULES[s] = Collections.UnmodifiableMap(rts);
+            }
+        }
+
+        private static bool Contains(ICharSequence chars, char input)
+        {
+            for (int i = 0; i < chars.Length; i++)
+            {
+                if (chars[i] == input)
+                {
+                    return true;
+                }
+            }
+            return false;
+        }
+        private static bool Contains(string chars, char input)
+        {
+            for (int i = 0; i < chars.Length; i++)
+            {
+                if (chars[i] == input)
+                {
+                    return true;
+                }
+            }
+            return false;
+        }
+        private static bool Contains(StringBuilder chars, char input)
+        {
+            for (int i = 0; i < chars.Length; i++)
+            {
+                if (chars[i] == input)
+                {
+                    return true;
+                }
+            }
+            return false;
+        }
+
+        private static string CreateResourceName(NameType nameType, RuleType rt, string lang)
+        {
+            return string.Format("{0}_{1}_{2}.txt",
+                                 nameType.GetName(), rt.GetName(), lang);
+        }
+
+        private static TextReader CreateScanner(NameType nameType, RuleType rt, string lang)
+        {
+            string resName = CreateResourceName(nameType, rt, lang);
+            Stream rulesIS = typeof(Languages).GetTypeInfo().Assembly.FindAndGetManifestResourceStream(typeof(Languages), resName);
+
+            if (rulesIS == null)
+            {
+                throw new ArgumentException("Unable to load resource: " + resName);
+            }
+
+            return new StreamReader(rulesIS, ResourceConstants.ENCODING);
+        }
+
+        private static TextReader CreateScanner(string lang)
+        {
+            string resName = string.Format("{0}.txt", lang); 
+            Stream rulesIS = typeof(Languages).GetTypeInfo().Assembly.FindAndGetManifestResourceStream(typeof(Languages), resName);
+
+            if (rulesIS == null)
+            {
+                throw new ArgumentException("Unable to load resource: " + resName);
+            }
+
+            return new StreamReader(rulesIS, ResourceConstants.ENCODING);
+        }
+
+        private static bool EndsWith(ICharSequence input, string suffix)
+        {
+            if (suffix.Length > input.Length)
+            {
+                return false;
+            }
+            for (int i = input.Length - 1, j = suffix.Length - 1; j >= 0; i--, j--)
+            {
+                if (input[i] != suffix[j])
+                {
+                    return false;
+                }
+            }
+            return true;
+        }
+
+        private static bool EndsWith(string input, string suffix)
+        {
+            if (suffix.Length > input.Length)
+            {
+                return false;
+            }
+            for (int i = input.Length - 1, j = suffix.Length - 1; j >= 0; i--, j--)
+            {
+                if (input[i] != suffix[j])
+                {
+                    return false;
+                }
+            }
+            return true;
+        }
+
+        private static bool EndsWith(StringBuilder input, string suffix)
+        {
+            if (suffix.Length > input.Length)
+            {
+                return false;
+            }
+            for (int i = input.Length - 1, j = suffix.Length - 1; j >= 0; i--, j--)
+            {
+                if (input[i] != suffix[j])
+                {
+                    return false;
+                }
+            }
+            return true;
+        }
+
+        /// <summary>
+        /// Gets rules for a combination of name type, rule type and languages.
+        /// </summary>
+        /// <param name="nameType">The <see cref="NameType"/> to consider.</param>
+        /// <param name="rt">The <see cref="RuleType"/> to consider.</param>
+        /// <param name="langs">The set of languages to consider.</param>
+        /// <returns>A list of <see cref="Rule"/>s that apply.</returns>
+        public static IList<Rule> GetInstance(NameType nameType, RuleType rt,
+                                     LanguageSet langs)
+        {
+            IDictionary<string, IList<Rule>> ruleMap = GetInstanceMap(nameType, rt, langs);
+            IList<Rule> allRules = new List<Rule>();
+            foreach (IList<Rule> rules in ruleMap.Values)
+            {
+                allRules.AddRange(rules);
+            }
+            return allRules;
+        }
+
+        /// <summary>
+        /// Gets rules for a combination of name type, rule type and a single language.
+        /// </summary>
+        /// <param name="nameType">The <see cref="NameType"/> to consider.</param>
+        /// <param name="rt">The <see cref="RuleType"/> to consider.</param>
+        /// <param name="lang">The language to consider.</param>
+        /// <returns>A list of <see cref="Rule"/>s that apply.</returns>
+        public static IList<Rule> GetInstance(NameType nameType, RuleType rt, string lang)
+        {
+            return GetInstance(nameType, rt, LanguageSet.From(new HashSet<string>() { lang }));
+        }
+
+        /// <summary>
+        /// Gets rules for a combination of name type, rule type and languages.
+        /// <para/>
+        /// since 1.9
+        /// </summary>
+        /// <param name="nameType">The <see cref="NameType"/> to consider.</param>
+        /// <param name="rt">The <see cref="RuleType"/> to consider.</param>
+        /// <param name="langs">The set of languages to consider.</param>
+        /// <returns>A map containing all <see cref="Rule"/>s that apply, grouped by the first character of the rule pattern.</returns>
+        public static IDictionary<string, IList<Rule>> GetInstanceMap(NameType nameType, RuleType rt,
+                                                             LanguageSet langs)
+        {
+            return langs.IsSingleton ? GetInstanceMap(nameType, rt, langs.GetAny()) :
+                                         GetInstanceMap(nameType, rt, Languages.ANY);
+        }
+
+        /// <summary>
+        /// Gets rules for a combination of name type, rule type and a single language.
+        /// <para/>
+        /// since 1.9
+        /// </summary>
+        /// <param name="nameType">The <see cref="NameType"/> to consider.</param>
+        /// <param name="rt">The <see cref="RuleType"/> to consider.</param>
+        /// <param name="lang">The language to consider.</param>
+        /// <returns>A map containing all <see cref="Rule"/>s that apply, grouped by the first character of the rule pattern.</returns>
+        public static IDictionary<string, IList<Rule>> GetInstanceMap(NameType nameType, RuleType rt,
+                                                             string lang)
+        {
+            IDictionary<RuleType, IDictionary<string, IDictionary<string, IList<Rule>>>> nameTypes;
+            IDictionary<string, IDictionary<string, IList<Rule>>> ruleTypes;
+            IDictionary<string, IList<Rule>> rules = null;
+
+            if (RULES.TryGetValue(nameType, out nameTypes) && nameTypes != null &&
+                nameTypes.TryGetValue(rt, out ruleTypes) && ruleTypes != null &&
+                ruleTypes.TryGetValue(lang, out rules) && rules != null)
+            {
+            }
+            else
+            {
+                throw new ArgumentException(string.Format("No rules found for {0}, {1}, {2}.",
+                                                   nameType.GetName(), rt.GetName(), lang));
+            }
+
+            return rules;
+        }
+
+        private static Phoneme ParsePhoneme(string ph)
+        {
+            int open = ph.IndexOf("[");
+            if (open >= 0)
+            {
+                if (!ph.EndsWith("]", StringComparison.Ordinal))
+                {
+                    throw new ArgumentException("Phoneme expression contains a '[' but does not end in ']'");
+                }
+                string before = ph.Substring(0, open - 0);
+                string input = ph.Substring(open + 1, (ph.Length - 1) - (open + 1));
+                ISet<string> langs = new HashSet<string>(PLUS.Split(input));
+
+                return new Phoneme(before, LanguageSet.From(langs));
+            }
+            else
+            {
+                return new Phoneme(ph, Languages.ANY_LANGUAGE);
+            }
+        }
+
+        private static IPhonemeExpr ParsePhonemeExpr(string ph)
+        {
+            if (ph.StartsWith("(", StringComparison.Ordinal))
+            { // we have a bracketed list of options
+                if (!ph.EndsWith(")", StringComparison.Ordinal))
+                {
+                    throw new ArgumentException("Phoneme starts with '(' so must end with ')'");
+                }
+
+                IList<Phoneme> phs = new List<Phoneme>();
+                string body = ph.Substring(1, (ph.Length - 1) - 1);
+                foreach (string part in PIPE.Split(body))
+                {
+                    phs.Add(ParsePhoneme(part));
+                }
+                if (body.StartsWith("|", StringComparison.Ordinal) || body.EndsWith("|", StringComparison.Ordinal))
+                {
+                    phs.Add(new Phoneme("", Languages.ANY_LANGUAGE));
+                }
+
+                return new PhonemeList(phs);
+            }
+            else
+            {
+                return ParsePhoneme(ph);
+            }
+        }
+
+        private class RuleAnonymousHelper : Rule
+        {
+            private readonly int myLine;
+            private readonly string loc;
+
+            public RuleAnonymousHelper(string pat, string lCon, string rCon, IPhonemeExpr ph, int cLine, string location)
+                : base(pat, lCon, rCon, ph)
+            {
+                this.myLine = cLine;
+                this.loc = location;
+            }
+
+            public override string ToString()
+            {
+                StringBuilder sb = new StringBuilder();
+                sb.Append("Rule");
+                sb.Append("{line=").Append(myLine);
+                sb.Append(", loc='").Append(loc).Append('\'');
+                sb.Append('}');
+                return sb.ToString();
+            }
+        }
+
+        private static IDictionary<string, IList<Rule>> ParseRules(TextReader reader, string location)
+        {
+            IDictionary<string, IList<Rule>> lines = new HashMap<string, IList<Rule>>();
+            int currentLine = 0;
+
+            bool inMultilineComment = false;
+            string rawLine;
+            try
+            {
+                while ((rawLine = reader.ReadLine()) != null)
+                {
+                    currentLine++;
+                    string line = rawLine;
+
+                    if (inMultilineComment)
+                    {
+                        if (line.EndsWith(ResourceConstants.EXT_CMT_END))
+                        {
+                            inMultilineComment = false;
+                        }
+                    }
+                    else
+                    {
+                        if (line.StartsWith(ResourceConstants.EXT_CMT_START))
+                        {
+                            inMultilineComment = true;
+                        }
+                        else
+                        {
+                            // discard comments
+                            int cmtI = line.IndexOf(ResourceConstants.CMT);
+                            if (cmtI >= 0)
+                            {
+                                line = line.Substring(0, cmtI);
+                            }
+
+                            // trim leading-trailing whitespace
+                            line = line.Trim();
+
+                            if (line.Length == 0)
+                            {
+                                continue; // empty lines can be safely skipped
+                            }
+
+                            if (line.StartsWith(HASH_INCLUDE, StringComparison.Ordinal))
+                            {
+                                // include statement
+                                string incl = line.Substring(HASH_INCLUDE.Length).Trim();
+                                if (incl.Contains(" "))
+                                {
+                                    throw new ArgumentException("Malformed import statement '" + rawLine + "' in " +
+                                                                       location);
+                                }
+                                else
+                                {
+                                    lines.PutAll(ParseRules(CreateScanner(incl), location + "->" + incl));
+                                }
+                            }
+                            else
+                            {
+                                // rule
+                                string[] parts = WHITESPACE.Split(line);
+                                if (parts.Length != 4)
+                                {
+                                    throw new ArgumentException("Malformed rule statement split into " + parts.Length +
+                                                                       " parts: " + rawLine + " in " + location);
+                                }
+                                else
+                                {
+                                    try
+                                    {
+                                        string pat = StripQuotes(parts[0]);
+                                        string lCon = StripQuotes(parts[1]);
+                                        string rCon = StripQuotes(parts[2]);
+                                        IPhonemeExpr ph = ParsePhonemeExpr(StripQuotes(parts[3]));
+                                        int cLine = currentLine;
+                                        Rule r = new RuleAnonymousHelper(pat, lCon, rCon, ph, cLine, location);
+
+                                        string patternKey = r.pattern.Substring(0, 1 - 0);
+                                        IList<Rule> rules;
+                                        if (!lines.TryGetValue(patternKey, out rules) || rules == null)
+                                        {
+                                            rules = new List<Rule>();
+                                            lines[patternKey] = rules;
+                                        }
+                                        rules.Add(r);
+                                    }
+                                    catch (ArgumentException e)
+                                    {
+                                        throw new InvalidOperationException("Problem parsing line '" + currentLine + "' in " +
+                                                                        location, e);
+                                    }
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+            finally
+            {
+                reader.Dispose();
+            }
+
+            return lines;
+        }
+
+        private class RPatternHelper : IRPattern
+        {
+            private readonly Func<StringBuilder, bool> isMatchSB;
+            private readonly Func<string, bool> isMatchStr;
+            private readonly Func<ICharSequence, bool> isMatchCS;
+
+            public RPatternHelper(Func<StringBuilder, bool> isMatchSB, Func<string, bool> isMatchStr, Func<ICharSequence, bool> isMatchCS)
+            {
+                this.isMatchSB = isMatchSB;
+                this.isMatchStr = isMatchStr;
+                this.isMatchCS = isMatchCS;
+            }
+
+            public bool IsMatch(StringBuilder input)
+            {
+                return isMatchSB(input);
+            }
+
+            public bool IsMatch(string input)
+            {
+                return isMatchStr(input);
+            }
+
+            public bool IsMatch(ICharSequence input)
+            {
+                return isMatchCS(input);
+            }
+        }
+
+        /// <summary>
+        /// Attempts to compile the regex into direct string ops, falling back to <see cref="Regex"/> and <see cref="Match"/> in the worst case.
+        /// </summary>
+        /// <param name="regex">The regular expression to compile.</param>
+        /// <returns>An RPattern that will match this regex.</returns>
+        private static IRPattern GetPattern(string regex)
+        {
+            bool startsWith = regex.StartsWith("^");
+            bool endsWith = regex.EndsWith("$");
+            string content = regex.Substring(startsWith ? 1 : 0, (endsWith ? regex.Length - 1 : regex.Length) - (startsWith ? 1 : 0));
+            bool boxes = content.Contains("[");
+
+            if (!boxes)
+            {
+                if (startsWith && endsWith)
+                {
+                    // exact match
+                    if (content.Length == 0)
+                    {
+                        // empty
+                        return new RPatternHelper(isMatchSB: (input) =>
+                        {
+                            return input.Length == 0;
+                        }, isMatchStr: (input) =>
+                        {
+                            return input.Length == 0;
+                        }, isMatchCS: (input) =>
+                        {
+                            return input.Length == 0;
+                        });
+                    }
+                    else
+                    {
+
+                        return new RPatternHelper(isMatchSB: (input) =>
+                        {
+                            return input.Equals(content);
+                        }, isMatchStr: (input) =>
+                        {
+                            return input.Equals(content);
+                        }, isMatchCS: (input) =>
+                        {
+                            return input.Equals(content);
+                        });
+                    }
+                }
+                else if ((startsWith || endsWith) && content.Length == 0)
+                {
+                    // matches every string
+                    return ALL_STRINGS_RMATCHER;
+                }
+                else if (startsWith)
+                {
+                    // matches from start
+                    return new RPatternHelper(isMatchSB: (input) =>
+                    {
+                        return StartsWith(input, content);
+                    }, isMatchStr: (input) =>
+                    {
+                        return StartsWith(input, content);
+                    }, isMatchCS: (input) =>
+                    {
+                        return StartsWith(input, content);
+                    });
+
+                }
+                else if (endsWith)
+                {
+                    // matches from start
+                    return new RPatternHelper(isMatchSB: (input) =>
+                    {
+                        return EndsWith(input, content);
+                    }, isMatchStr: (input) =>
+                    {
+                        return EndsWith(input, content);
+                    }, isMatchCS: (input) =>
+                    {
+                        return EndsWith(input, content);
+                    });
+                }
+            }
+            else
+            {
+                bool startsWithBox = content.StartsWith("[", StringComparison.Ordinal);
+                bool endsWithBox = content.EndsWith("]", StringComparison.Ordinal);
+
+                if (startsWithBox && endsWithBox)
+                {
+                    string boxContent = content.Substring(1, (content.Length - 1) - 1);
+                    if (!boxContent.Contains("["))
+                    {
+                        // box containing alternatives
+                        bool negate = boxContent.StartsWith("^", StringComparison.Ordinal);
+                        if (negate)
+                        {
+                            boxContent = boxContent.Substring(1);
+                        }
+                        string bContent = boxContent;
+                        bool shouldMatch = !negate;
+
+                        if (startsWith && endsWith)
+                        {
+                            // exact match
+                            return new RPatternHelper(isMatchSB: (input) =>
+                            {
+                                return input.Length == 1 && Contains(bContent, input[0]) == shouldMatch;
+                            }, isMatchStr: (input) =>
+                            {
+                                return input.Length == 1 && Contains(bContent, input[0]) == shouldMatch;
+                            }, isMatchCS: (input) =>
+                            {
+                                return input.Length == 1 && Contains(bContent, input[0]) == shouldMatch;
+                            });
+                        }
+                        else if (startsWith)
+                        {
+                            // first char
+                            return new RPatternHelper(isMatchSB: (input) =>
+                            {
+                                return input.Length > 0 && Contains(bContent, input[0]) == shouldMatch;
+                            }, isMatchStr: (input) =>
+                            {
+                                return input.Length > 0 && Contains(bContent, input[0]) == shouldMatch;
+                            }, isMatchCS: (input) =>
+                            {
+                                return input.Length > 0 && Contains(bContent, input[0]) == shouldMatch;
+                            });
+                        }
+                        else if (endsWith)
+                        {
+                            // last char
+                            return new RPatternHelper(isMatchSB: (input) =>
+                            {
+                                return input.Length > 0 && Contains(bContent, input[input.Length - 1]) == shouldMatch;
+                            }, isMatchStr: (input) =>
+                            {
+                                return input.Length > 0 && Contains(bContent, input[input.Length - 1]) == shouldMatch;
+                            }, isMatchCS: (input) =>
+                            {
+                                return input.Length > 0 && Contains(bContent, input[input.Length - 1]) == shouldMatch;
+                            });
+                        }
+                    }
+                }
+            }
+            Regex pattern = new Regex(regex, RegexOptions.Compiled);
+
+            return new RPatternHelper(isMatchSB: (input) =>
+            {
+                Match matcher = pattern.Match(input.ToString());
+                return matcher.Success;
+            }, isMatchStr: (input) =>
+            {
+                Match matcher = pattern.Match(input);
+                return matcher.Success;
+            }, isMatchCS: (input) =>
+            {
+                Match matcher = pattern.Match(input.ToString());
+                return matcher.Success;
+            });
+        }
+
+        private static bool StartsWith(ICharSequence input, string prefix)
+        {
+            if (prefix.Length > input.Length)
+            {
+                return false;
+            }
+            for (int i = 0; i < prefix.Length; i++)
+            {
+                if (input[i] != prefix[i])
+                {
+                    return false;
+                }
+            }
+            return true;
+        }
+
+        private static bool StartsWith(string input, string prefix)
+        {
+            if (prefix.Length > input.Length)
+            {
+                return false;
+            }
+            for (int i = 0; i < prefix.Length; i++)
+            {
+                if (input[i] != prefix[i])
+                {
+                    return false;
+                }
+            }
+            return true;
+        }
+
+        private static bool StartsWith(StringBuilder input, string prefix)
+        {
+            if (prefix.Length > input.Length)
+            {
+                return false;
+            }
+            for (int i = 0; i < prefix.Length; i++)
+            {
+                if (input[i] != prefix[i])
+                {
+                    return false;
+                }
+            }
+            return true;
+        }
+
+        private static string StripQuotes(string str)
+        {
+            if (str.StartsWith(DOUBLE_QUOTE, StringComparison.Ordinal))
+            {
+                str = str.Substring(1);
+            }
+
+            if (str.EndsWith(DOUBLE_QUOTE, StringComparison.Ordinal))
+            {
+                str = str.Substring(0, str.Length - 1);
+            }
+
+            return str;
+        }
+
+        private readonly IRPattern lContext;
+
+        private readonly string pattern;
+
+        private readonly IPhonemeExpr phoneme;
+
+        private readonly IRPattern rContext;
+
+        /// <summary>
+        /// Creates a new rule.
+        /// </summary>
+        /// <param name="pattern">The pattern.</param>
+        /// <param name="lContext">The left context.</param>
+        /// <param name="rContext">The right context.</param>
+        /// <param name="phoneme">The resulting phoneme.</param>
+        public Rule(string pattern, string lContext, string rContext, IPhonemeExpr phoneme)
+        {
+            this.pattern = pattern;
+            this.lContext = GetPattern(lContext + "$");
+            this.rContext = GetPattern("^" + rContext);
+            this.phoneme = phoneme;
+        }
+
+        /// <summary>
+        /// Gets the left context pattern. This is a regular expression that must match to the left of the pattern.
+        /// </summary>
+        public virtual IRPattern LContext
+        {
+            get { return this.lContext; }
+        }
+
+        /// <summary>
+        /// Gets the pattern. This is a string-literal that must exactly match.
+        /// </summary>
+        public virtual string Pattern
+        {
+            get { return this.pattern; }
+        }
+
+        /// <summary>
+        /// Gets the phoneme. If the rule matches, this is the phoneme associated with the pattern match.
+        /// </summary>
+        public virtual IPhonemeExpr Phoneme
+        {
+            get { return this.phoneme; }
+        }
+
+        /// <summary>
+        /// Gets the right context pattern. This is a regular expression that must match to the right of the pattern.
+        /// </summary>
+        public virtual IRPattern RContext
+        {
+            get { return this.rContext; }
+        }
+
+        /// <summary>
+        /// Decides if the pattern and context match the input starting at a position. It is a match if the
+        /// <see cref="LContext"/> matches <paramref name="input"/> up to <paramref name="i"/>, <see cref="Pattern"/> matches at <paramref name="i"/> and
+        /// <see cref="RContext"/> matches from the end of the match of <see cref="Pattern"/> to the end of <paramref name="input"/>.
+        /// </summary>
+        /// <param name="input">The input <see cref="ICharSequence"/>.</param>
+        /// <param name="i">The int position within the input.</param>
+        /// <returns><c>true</c> if the pattern and left/right context match, <c>false</c> otherwise.</returns>
+        public virtual bool PatternAndContextMatches(ICharSequence input, int i)
+        {
+            if (i < 0)
+            {
+                throw new ArgumentOutOfRangeException("Can not match pattern at negative indexes");
+            }
+
+            int patternLength = this.pattern.Length;
+            int ipl = i + patternLength;
+
+            if (ipl > input.Length)
+            {
+                // not enough room for the pattern to match
+                return false;
+            }
+
+            // evaluate the pattern, left context and right context
+            // fail early if any of the evaluations is not successful
+            if (!input.SubSequence(i, ipl).Equals(this.pattern))
+            {
+                return false;
+            }
+            else if (!this.rContext.IsMatch(input.SubSequence(ipl, input.Length)))
+            {
+                return false;
+            }
+            return this.lContext.IsMatch(input.SubSequence(0, i));
+        }
+
+        /// <summary>
+        /// Decides if the pattern and context match the input starting at a position. It is a match if the
+        /// <see cref="LContext"/> matches <paramref name="input"/> up to <paramref name="i"/>, <see cref="Pattern"/> matches at <paramref name="i"/> and
+        /// <see cref="RContext"/> matches from the end of the match of <see cref="Pattern"/> to the end of <paramref name="input"/>.
+        /// </summary>
+        /// <param name="input">The input <see cref="string"/>.</param>
+        /// <param name="i">The int position within the input.</param>
+        /// <returns><c>true</c> if the pattern and left/right context match, <c>false</c> otherwise.</returns>
+        // LUCENENET specific
+        public virtual bool PatternAndContextMatches(string input, int i) 
+        {
+            if (i < 0)
+            {
+                throw new ArgumentOutOfRangeException("Can not match pattern at negative indexes");
+            }
+
+            int patternLength = this.pattern.Length;
+            int ipl = i + patternLength;
+
+            if (ipl > input.Length)
+            {
+                // not enough room for the pattern to match
+                return false;
+            }
+
+            // evaluate the pattern, left context and right context
+            // fail early if any of the evaluations is not successful
+            if (!input.Substring(i, (ipl - i)).Equals(this.pattern))
+            {
+                return false;
+            }
+            else if (!this.rContext.IsMatch(input.Substring(ipl, (input.Length - ipl))))
+            {
+                return false;
+            }
+            return this.lContext.IsMatch(input.Substring(0, (i - 0)));
+        }
+
+        /// <summary>
+        /// Decides if the pattern and context match the input starting at a position. It is a match if the
+        /// <see cref="LContext"/> matches <paramref name="input"/> up to <paramref name="i"/>, <see cref="Pattern"/> matches at <paramref name="i"/> and
+        /// <see cref="RContext"/> matches from the end of the match of <see cref="Pattern"/> to the end of <paramref name="input"/>.
+        /// </summary>
+        /// <param name="input">The input <see cref="StringBuilder"/>.</param>
+        /// <param name="i">The int position within the input.</param>
+        /// <returns><c>true</c> if the pattern and left/right context match, <c>false</c> otherwise.</returns>
+        // LUCENENET specific
+        public virtual bool PatternAndContextMatches(StringBuilder input, int i)
+        {
+            if (i < 0)
+            {
+                throw new ArgumentOutOfRangeException("Can not match pattern at negative indexes");
+            }
+
+            int patternLength = this.pattern.Length;
+            int ipl = i + patternLength;
+
+            if (ipl > input.Length)
+            {
+                // not enough room for the pattern to match
+                return false;
+            }
+
+            // evaluate the pattern, left context and right context
+            // fail early if any of the evaluations is not successful
+            if (!input.ToString(i, (ipl - i)).Equals(this.pattern))
+            {
+                return false;
+            }
+            else if (!this.rContext.IsMatch(input.ToString(ipl, (input.Length - ipl))))
+            {
+                return false;
+            }
+            return this.lContext.IsMatch(input.ToString(0, (i - 0)));
+        }
+
+    }
+
+    public sealed class Phoneme : IPhonemeExpr
+    {
+        private class PhonemeComparer : IComparer<Phoneme>
+        {
+            public int Compare(Phoneme o1, Phoneme o2)
+            {
+                for (int i = 0; i < o1.phonemeText.Length; i++)
+                {
+                    if (i >= o2.phonemeText.Length)
+                    {
+                        return +1;
+                    }
+                    int c = o1.phonemeText[i] - o2.phonemeText[i];
+                    if (c != 0)
+                    {
+                        return c;
+                    }
+                }
+
+                if (o1.phonemeText.Length < o2.phonemeText.Length)
+                {
+                    return -1;
+                }
+
+                return 0;
+            }
+        }
+
+        public static readonly IComparer<Phoneme> COMPARER = new PhonemeComparer();
+        private readonly StringBuilder phonemeText;
+        private readonly LanguageSet languages;
+
+        public Phoneme(string phonemeText, LanguageSet languages)
+        {
+            this.phonemeText = new StringBuilder(phonemeText);
+            this.languages = languages;
+        }
+
+        public Phoneme(StringBuilder phonemeText, LanguageSet languages)
+        {
+            this.phonemeText = new StringBuilder(phonemeText.ToString());
+            this.languages = languages;
+        }
+
+        public Phoneme(ICharSequence phonemeText, LanguageSet languages)
+        {
+            this.phonemeText = new StringBuilder(phonemeText.ToString());
+            this.languages = languages;
+        }
+
+        public Phoneme(Phoneme phonemeLeft, Phoneme phonemeRight)
+            : this(phonemeLeft.phonemeText, phonemeLeft.languages)
+        {
+            this.phonemeText.Append(phonemeRight.phonemeText);
+        }
+
+        public Phoneme(Phoneme phonemeLeft, Phoneme phonemeRight, LanguageSet languages)
+            : this(phonemeLeft.phonemeText, languages)
+        {
+            this.phonemeText.Append(phonemeRight.phonemeText);
+        }
+
+        public Phoneme Append(string str)
+        {
+            this.phonemeText.Append(str);
+            return this;
+        }
+
+        public LanguageSet Languages
+        {
+            get { return this.languages; }
+        }
+
+        public IList<Phoneme> Phonemes
+        {
+            get { return new Phoneme[] { this }; }
+        }
+
+        public string GetPhonemeText()
+        {
+            return this.phonemeText.ToString();
+        }
+
+        [Obsolete("since 1.9")]
+        public Phoneme Join(Phoneme right)
+        {
+            return new Phoneme(this.phonemeText.ToString() + right.phonemeText.ToString(),
+                               this.languages.RestrictTo(right.Languages));
+        }
+    }
+
+    public interface IPhonemeExpr
+    {
+        IList<Phoneme> Phonemes { get; }
+    }
+
+    public sealed class PhonemeList : IPhonemeExpr
+    {
+        private readonly IList<Phoneme> phonemes;
+
+        public PhonemeList(IList<Phoneme> phonemes)
+        {
+            this.phonemes = phonemes;
+        }
+
+        public IList<Phoneme> Phonemes
+        {
+            get { return this.phonemes; }
+        }
+    }
+
+    /// <summary>
+    /// A minimal wrapper around the functionality of <see cref="Rule"/> Pattern that we use, to allow for alternate implementations.
+    /// </summary>
+    public interface IRPattern
+    {
+        bool IsMatch(ICharSequence input);
+        bool IsMatch(string input);
+        bool IsMatch(StringBuilder input);
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/RuleType.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/RuleType.cs b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/RuleType.cs
new file mode 100644
index 0000000..ff3af97
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/RuleType.cs
@@ -0,0 +1,68 @@
+// commons-codec version compatibility level: 1.9
+using System;
+
+namespace Lucene.Net.Analysis.Phonetic.Language.Bm
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Types of rule.
+    /// <para/>
+    /// since 1.6
+    /// </summary>
+    public enum RuleType
+    {
+        /// <summary>
+        /// Approximate rules, which will lead to the largest number of phonetic interpretations.
+        /// </summary>
+        APPROX,
+
+        /// <summary>
+        /// Exact rules, which will lead to a minimum number of phonetic interpretations.
+        /// </summary>
+        EXACT,
+
+        /// <summary>
+        /// For internal use only. Please use <see cref="APPROX"/> or <see cref="EXACT"/>.
+        /// </summary>
+        RULES
+    }
+
+    public static class RuleTypeExtensions
+    {
+        /// <summary>
+        /// Gets the rule name.
+        /// </summary>
+        /// <param name="ruleType">The <see cref="RuleType"/>.</param>
+        /// <returns>The rule name.</returns>
+        public static string GetName(this RuleType ruleType)
+        {
+            switch (ruleType)
+            {
+                case RuleType.APPROX:
+                    return "approx";
+                case RuleType.EXACT:
+                    return "exact";
+                case RuleType.RULES:
+                    return "rules";
+            }
+
+            throw new ArgumentException("Invalid ruleType");
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_any.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_any.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_any.txt
new file mode 100644
index 0000000..3f4f4c9
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_any.txt
@@ -0,0 +1,153 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// CONSTONANTS
+"ph"    ""  ""  "f" // foreign
+"sh"    ""  ""  "S" // foreign
+"kh"    ""  ""  "x" // foreign
+
+"gli"   ""  ""  "(gli|l[italian])"
+"gni"   ""  ""  "(gni|ni[italian+french])"
+"gn"    ""  "[aeou]"    "(n[italian+french]|nj[italian+french]|gn)
+"gh"    ""  ""  "g" // It + translit. from Arabic
+"dh"    ""  ""  "d" // translit. from Arabic
+"bh"    ""  ""  "d" // translit. from Arabic
+"th"    ""  ""  "t" // translit. from Arabic
+"lh"    ""  ""  "l" // Port
+"nh"    ""  ""  "nj" // Port
+
+"ig"    "[aeiou]"   ""  "(ig|tS[spanish])"
+"ix"    "[aeiou]"   ""  "S" // Sp
+"tx"    ""  ""  "tS" // Sp
+"tj"    ""  "$"  "tS" // Sp
+"tj"    ""  ""  "dZ" // Sp
+"tg"    ""  ""  "(tg|dZ[spanish])"
+
+"gi"    ""  "[aeou]"    "dZ" // Italian
+"g" ""  "y" "Z" // French
+"gg"    ""  "[ei]"  "(gZ[portuguese+french]|dZ[italian+spanish]|x[spanish])"
+"g" ""  "[ei]"  "(Z[portuguese+french]|dZ[italian+spanish]|x[spanish])"
+
+"guy"   ""  ""  "gi"
+"gue"   ""  "$" "(k[french]|ge)"
+"gu"    ""  "[ei]"  "(g|gv") // not It
+"gu"    ""  "[ao]"  "gv" // not It
+
+"ñ" ""  ""  "(n|nj)"
+"ny"    ""  ""  "nj"
+
+"sc"    ""  "[ei]"  "(s|S[italian])"
+"sç"    ""  "[aeiou]"   "s" // not It
+"ss"    ""  ""  "s"
+"ç" ""  ""  "s"   // not It
+
+"ch"    ""  "[ei]"  "(k[italian]|S[portuguese+french]|tS[spanish]|dZ[spanish])"
+"ch"    ""  ""  "(S|tS[spanish]|dZ[spanish])"
+
+"ci"    ""  "[aeou]"    "(tS[italian]|si)"
+"cc"	""	"[eiyéèê]"	"(tS[italian]|ks[portuguese+french+spanish])"
+"c"	""	"[eiyéèê]"	"(tS[italian]|s[portuguese+french+spanish])"
+   //array("c"	""	"[aou]"	"(k|C[".($portuguese+$spanish)."])" // "C" means that the actual letter could be "ç" (cedille omitted)
+
+"s"	"^"	""	"s"
+"s"	"[aáuiíoóeéêy]"	"[aáuiíoóeéêy]"	"(s[spanish]|z[portuguese+french+italian])"
+"s"	""	"[dglmnrv]"	"(z|Z[portuguese])"
+
+"z"	""	"$"	"(s|ts[italian]|S[portuguese])" // ts It, s/S/Z Port, s in Sp, z Fr
+"z"	""	"[bdgv]"	"(z|dz[italian]|Z[portuguese])" // dz It, Z/z Port, z Sp & Fr
+"z"	""	"[ptckf]"	"(s|ts[italian]|S[portuguese])" // ts It, s/S/z Port, z/s Sp
+"z"	""	""	"(z|dz[italian]|ts[italian]|s[spanish])" // ts/dz It, z Port & Fr, z/s Sp
+
+"que"	""	"$"	"(k[french]|ke)"
+"qu"	""	"[eiu]"	"k"
+"qu"	""	"[ao]"	"(kv|k)" // k is It
+
+"ex"	""	"[aáuiíoóeéêy]"	"(ez[portuguese]|eS[portuguese]|eks|egz)"
+"ex"	""	"[cs]"	"(e[portuguese]|ek)"
+
+"m"	""	"[cdglnrst]"	"(m|n[portuguese])"
+"m"	""	"[bfpv]"	"(m|n[portuguese+spanish])"
+"m"	""	"$"	"(m|n[portuguese])"
+
+"b"	"^"	""	"(b|V[spanish])"
+"v"	"^"	""	"(v|B[spanish])"
+
+ // VOWELS
+"eau"	""	""	"o" // Fr
+
+"ouh"	""	"[aioe]"	"(v[french]|uh)"
+"uh"	""	"[aioe]"	"(v|uh)"
+"ou"	""	"[aioe]"	"v" // french
+"uo"	""	""	"(vo|o)"
+"u"	""	"[aie]"	"v"
+
+"i"	"[aáuoóeéê]"	""	"j"
+"i"	""	"[aeou]"	"j"
+"y"	"[aáuiíoóeéê]"	""	"j"
+"y"	""	"[aeiíou]"	"j"
+"e"	""	"$"	"(e|E[$french])"
+
+"ão"	""	""	"(au|an)" // Port
+"ãe"	""	""	"(aj|an)" // Port
+"ãi"	""	""	"(aj|an)" // Port
+"õe"	""	""	"(oj|on)" // Port
+"où"	""	""	"u" // Fr
+"ou"	""	""	"(ou|u[french])"
+
+"â"	""	""	"a" // Port & Fr
+"à"	""	""	"a" // Port
+"á"	""	""	"a" // Port & Sp
+"ã"	""	""	"(a|an)" // Port
+"é"	""	""	"e"
+"ê"	""	""	"e" // Port & Fr
+"è"	""	""	"e" // Sp & Fr & It
+"í"	""	""	"i" // Port & Sp
+"î"	""	""	"i" // Fr
+"ô"	""	""	"o" // Port & Fr
+"ó"	""	""	"o" // Port & Sp & It
+"õ"	""	""	"(o|on)" // Port
+"ò"	""	""	"o"  // Sp & It
+"ú"	""	""	"u" // Port & Sp
+"ü"	""	""	"u" // Port & Sp
+
+ // LATIN ALPHABET
+"a"	""	""	"a"
+"b"	""	""	"(b|v[spanish])"
+"c"	""	""	"k"
+"d"	""	""	"d"
+"e"	""	""	"e"
+"f"	""	""	"f"
+"g"	""	""	"g"
+"h"	""	""	"h"
+"i"	""	""	"i"
+"j"	""	""	"(x[spanish]|Z)" // not It
+"k"	""	""	"k"
+"l"	""	""	"l"
+"m"	""	""	"m"
+"n"	""	""	"n"
+"o"	""	""	"o"
+"p"	""	""	"p"
+"q"	""	""	"k"
+"r"	""	""	"r"
+"s"	""	""	"(s|S[portuguese])"
+"t"	""	""	"t"
+"u"	""	""	"u"
+"v"	""	""	"(v|b[spanish])"
+"w"	""	""	"v"    // foreign
+"x"	""	""	"(ks|gz|S[portuguese+spanish])"   // S/ks Port & Sp, gz Sp, It only ks
+"y"	""	""	"i"
+"z"	""	""	"z"

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_common.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_common.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_common.txt
new file mode 100644
index 0000000..e95a756
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_common.txt
@@ -0,0 +1,219 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include ash_exact_approx_common
+
+// REGRESSIVE ASSIMILATION OF CONSONANTS
+"n"	""	"[bp]"	"m" 
+
+// PECULIARITY OF "h" 
+"h"	""	""	"" 
+"H"	""	""	"(x|)" 
+
+// POLISH OGONEK IMPOSSIBLE
+"F" "" "[bdgkpstvzZ]h" "e"
+"F" "" "[bdgkpstvzZ]x" "e"
+"B" "" "[bdgkpstvzZ]h" "a"
+"B" "" "[bdgkpstvzZ]x" "a"
+
+// "e" and "i" ARE TO BE OMITTED BEFORE (SYLLABIC) n & l: Halperin=Halpern; Frankel = Frankl, Finkelstein = Finklstein
+"e" "[bdfgklmnprsStvzZ]" "[ln]$" ""
+"i" "[bdfgklmnprsStvzZ]" "[ln]$" ""
+"E" "[bdfgklmnprsStvzZ]" "[ln]$" ""
+"I" "[bdfgklmnprsStvzZ]" "[ln]$" ""
+"F" "[bdfgklmnprsStvzZ]" "[ln]$" ""
+"Q" "[bdfgklmnprsStvzZ]" "[ln]$" ""
+"Y" "[bdfgklmnprsStvzZ]" "[ln]$" ""
+
+"e" "[bdfgklmnprsStvzZ]" "[ln][bdfgklmnprsStvzZ]" ""
+"i" "[bdfgklmnprsStvzZ]" "[ln][bdfgklmnprsStvzZ]" ""
+"E" "[bdfgklmnprsStvzZ]" "[ln][bdfgklmnprsStvzZ]" ""
+"I" "[bdfgklmnprsStvzZ]" "[ln][bdfgklmnprsStvzZ]" ""
+"F" "[bdfgklmnprsStvzZ]" "[ln][bdfgklmnprsStvzZ]" ""
+"Q" "[bdfgklmnprsStvzZ]" "[ln][bdfgklmnprsStvzZ]" ""
+"Y" "[bdfgklmnprsStvzZ]" "[ln][bdfgklmnprsStvzZ]" ""
+
+"lEs"	""	""	"(lEs|lz)"  // Applebaum < Appelbaum (English + blend English-something forms as Finklestein)
+"lE"	"[bdfgkmnprStvzZ]"	""	"(lE|l)"  // Applebaum < Appelbaum (English + blend English-something forms as Finklestein)
+
+// SIMPLIFICATION: (TRIPHTHONGS & DIPHTHONGS) -> ONE GENERIC DIPHTHONG "D"
+"aue"	""	""	"D"
+"oue"	""	""	"D"
+    
+"AvE"	""	""	"(D|AvE)"
+"Ave"	""	""	"(D|Ave)"
+"avE"	""	""	"(D|avE)"
+"ave"	""	""	"(D|ave)"
+    
+"OvE"	""	""	"(D|OvE)"
+"Ove"	""	""	"(D|Ove)"
+"ovE"	""	""	"(D|ovE)"
+"ove"	""	""	"(D|ove)"
+    
+"ea"	""	""	"(D|ea)"
+"EA"	""	""	"(D|EA)"
+"Ea"	""	""	"(D|Ea)"
+"eA"	""	""	"(D|eA)"
+             
+"aji"	""	""	"D"
+"ajI"	""	""	"D"
+"aje"	""	""	"D"
+"ajE"	""	""	"D"
+    
+"Aji"	""	""	"D"
+"AjI"	""	""	"D"
+"Aje"	""	""	"D"
+"AjE"	""	""	"D"
+    
+"oji"	""	""	"D"
+"ojI"	""	""	"D"
+"oje"	""	""	"D"
+"ojE"	""	""	"D"
+    
+"Oji"	""	""	"D"
+"OjI"	""	""	"D"
+"Oje"	""	""	"D"
+"OjE"	""	""	"D"
+    
+"eji"	""	""	"D"
+"ejI"	""	""	"D"
+"eje"	""	""	"D"
+"ejE"	""	""	"D"
+    
+"Eji"	""	""	"D"
+"EjI"	""	""	"D"
+"Eje"	""	""	"D"
+"EjE"	""	""	"D"
+    
+"uji"	""	""	"D"
+"ujI"	""	""	"D"
+"uje"	""	""	"D"
+"ujE"	""	""	"D"
+    
+"Uji"	""	""	"D"
+"UjI"	""	""	"D"
+"Uje"	""	""	"D"
+"UjE"	""	""	"D"
+        
+"iji"	""	""	"D"
+"ijI"	""	""	"D"
+"ije"	""	""	"D"
+"ijE"	""	""	"D"
+    
+"Iji"	""	""	"D"
+"IjI"	""	""	"D"
+"Ije"	""	""	"D"
+"IjE"	""	""	"D"
+                         
+"aja"	""	""	"D"
+"ajA"	""	""	"D"
+"ajo"	""	""	"D"
+"ajO"	""	""	"D"
+"aju"	""	""	"D"
+"ajU"	""	""	"D"
+    
+"Aja"	""	""	"D"
+"AjA"	""	""	"D"
+"Ajo"	""	""	"D"
+"AjO"	""	""	"D"
+"Aju"	""	""	"D"
+"AjU"	""	""	"D"
+    
+"oja"	""	""	"D"
+"ojA"	""	""	"D"
+"ojo"	""	""	"D"
+"ojO"	""	""	"D"
+"Aju"	""	""	"D"
+"AjU"	""	""	"D"
+    
+"Oja"	""	""	"D"
+"OjA"	""	""	"D"
+"Ojo"	""	""	"D"
+"OjO"	""	""	"D"
+"Aju"	""	""	"D"
+"AjU"	""	""	"D"
+    
+"eja"	""	""	"D"
+"ejA"	""	""	"D"
+"ejo"	""	""	"D"
+"ejO"	""	""	"D"
+"Aju"	""	""	"D"
+"AjU"	""	""	"D"
+    
+"Eja"	""	""	"D"
+"EjA"	""	""	"D"
+"Ejo"	""	""	"D"
+"EjO"	""	""	"D"
+"Aju"	""	""	"D"
+"AjU"	""	""	"D"
+    
+"uja"	""	""	"D"
+"ujA"	""	""	"D"
+"ujo"	""	""	"D"
+"ujO"	""	""	"D"
+"Aju"	""	""	"D"
+"AjU"	""	""	"D"
+        
+"Uja"	""	""	"D"
+"UjA"	""	""	"D"
+"Ujo"	""	""	"D"
+"UjO"	""	""	"D"
+"Aju"	""	""	"D"
+"AjU"	""	""	"D"
+        
+"ija"	""	""	"D"
+"ijA"	""	""	"D"
+"ijo"	""	""	"D"
+"ijO"	""	""	"D"
+"Aju"	""	""	"D"
+"AjU"	""	""	"D"
+    
+"Ija"	""	""	"D"
+"IjA"	""	""	"D"
+"Ijo"	""	""	"D"
+"IjO"	""	""	"D"                         
+"Aju"	""	""	"D"
+"AjU"	""	""	"D"
+                         
+"j"	""	""	"i"                         
+                         
+// lander = lender = länder 
+"lYndEr"	""	"$"	"lYnder" 
+"lander"	""	"$"	"lYnder" 
+"lAndEr"	""	"$"	"lYnder" 
+"lAnder"	""	"$"	"lYnder" 
+"landEr"	""	"$"	"lYnder" 
+"lender"	""	"$"	"lYnder" 
+"lEndEr"	""	"$"	"lYnder" 
+"lendEr"	""	"$"	"lYnder" 
+"lEnder"	""	"$"	"lYnder" 
+             
+// CONSONANTS {z & Z; s & S} are approximately interchangeable
+"s" "" "[rmnl]" "z"
+"S" "" "[rmnl]" "z"
+"s" "[rmnl]" "" "z"
+"S" "[rmnl]" "" "z"
+    
+"dS" "" "$" "S"
+"dZ" "" "$" "S"
+"Z" "" "$" "S"
+"S" "" "$" "(S|s)"
+"z" "" "$" "(S|s)"
+    
+"S" "" "" "s"
+"dZ" "" "" "z"
+"Z" "" "" "z"
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_cyrillic.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_cyrillic.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_cyrillic.txt
new file mode 100644
index 0000000..4210173
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_cyrillic.txt
@@ -0,0 +1,18 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include ash_approx_russian
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_english.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_english.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_english.txt
new file mode 100644
index 0000000..84d8174
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_english.txt
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// VOWELS
+"I" "" "[^aEIeiou]e" "(Q|i|D)" // like in "five"
+"I" "" "$" "i"
+"I" "[aEIeiou]" "" "i"
+"I" "" "[^k]$" "i"
+"Ik" "[lr]" "$" "(ik|Qk)"
+"Ik" "" "$" "ik"
+"sIts" "" "$" "(sits|sQts)"
+"Its" "" "$" "its"
+"I" "" "" "(i|Q)" 
+    
+"lE" "[bdfgkmnprsStvzZ]" "" "(il|li|lY)"  // Applebaum < Appelbaum
+         
+"au" "" "" "(D|a|u)"
+"ou" "" "" "(D|o|u)"
+"ai" "" "" "(D|a|i)"
+"oi" "" "" "(D|o|i)"
+"ui" "" "" "(D|u|i)"
+        
+"E" "D[^aeiEIou]" "" "(i|)" // Weinberg, Shaneberg (shaneberg/shejneberg) --> shejnberg
+"e" "D[^aeiEIou]" "" "(i|)" 
+
+"e" "" "" "i"
+"E" "" "[fklmnprsStv]$" "i"
+"E" "" "ts$" "i"
+"E" "[DaoiEuQY]" "" "i"
+"E" "" "[aoQY]" "i"
+"E" "" "" "(Y|i)"
+      
+"a" "" "" "(a|o)"

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_french.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_french.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_french.txt
new file mode 100644
index 0000000..fa8ee99
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_french.txt
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+"I" "" "$" "i"
+"I" "[aEIeiou]" "" "i"
+"I" "" "[^k]$" "i"
+"Ik" "[lr]" "$" "(ik|Qk)"
+"Ik" "" "$" "ik"
+"sIts" "" "$" "(sits|sQts)"
+"Its" "" "$" "its"
+"I" "" "" "(i|Q)" 
+
+"au" "" "" "(D|a|u)"
+"ou" "" "" "(D|o|u)"
+"ai" "" "" "(D|a|i)"
+"oi" "" "" "(D|o|i)"
+"ui" "" "" "(D|u|i)"
+      
+"a" "" "" "(a|o)" 
+"e" "" "" "i" 
+    
+"E" "" "[fklmnprsStv]$" "i"
+"E" "" "ts$" "i"
+"E" "[aoiuQ]" "" "i"
+"E" "" "[aoQ]" "i"
+"E" "" "" "(Y|i)"
\ No newline at end of file


[09/15] lucenenet git commit: Added Lucene.Net.Analysis.Phonetic + tests. Rather than porting over the entire commons-codec library, only the language features were ported and added to this library.

Posted by ni...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_rules_french.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_rules_french.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_rules_french.txt
new file mode 100644
index 0000000..de636f8
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_rules_french.txt
@@ -0,0 +1,91 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Sephardic
+
+// CONSONANTS
+"kh" "" "" "x" // foreign
+"ph" "" "" "f"
+
+"ç" "" "" "s"
+"x" "" "" "ks"
+"ch" "" "" "S"
+"c" "" "[eiyéèê]" "s"
+"c" "" "" "k"
+"gn" "" "" "(n|gn)"
+"g" "" "[eiy]" "Z" 
+"gue" "" "$" "k"     
+"gu" "" "[eiy]" "g" 
+//"aill" "" "e" "aj" // non Jewish
+//"ll" "" "e" "(l|j)" // non Jewish
+"que" "" "$" "k"
+"qu" "" "" "k"
+"q" "" "" "k"
+"s" "[aeiouyéèê]" "[aeiouyéèê]" "z"
+"h" "[bdgt]" "" "" // translit from Arabic
+"h" "" "$" "" // foreign
+"j" "" "" "Z"
+"w" "" "" "v"
+"ouh" "" "[aioe]" "(v|uh)"
+"ou" "" "[aeio]" "v" 
+"uo" "" "" "(vo|o)"
+"u" "" "[aeio]" "v" 
+
+// VOWELS
+"aue" "" "" "aue" 
+"eau" "" "" "o" 
+//"au" "" "" "(o|au)" // non Jewish
+"ai" "" "" "aj" // [e] is non Jewish
+"ay" "" "" "aj" // [e] is non Jewish
+"é" "" "" "e"
+"ê" "" "" "e"
+"è" "" "" "e"
+"à" "" "" "a"
+"â" "" "" "a"
+"où" "" "" "u"
+"ou" "" "" "u"
+"oi" "" "" "oj" // [ua] is non Jewish
+"ei" "" "" "ej" // [e] is non Jewish, in Ashk should be aj
+"ey" "" "" "ej" // [e] non Jewish, in Ashk should be aj
+//"eu" "" "" "(e|o)" // non Jewish
+"y" "[ou]" "" "j"
+"e" "" "$" "(e|)"
+"i" "" "[aou]" "j"
+"y" "" "[aoeu]" "j"
+"y" "" "" "i"
+
+// TRIVIAL      
+"a" "" "" "a"
+"b" "" "" "b"
+"d" "" "" "d"
+"e" "" "" "e"
+"f" "" "" "f"
+"g" "" "" "g"
+"h" "" "" "h"
+"i" "" "" "i"
+"k" "" "" "k"
+"l" "" "" "l"
+"m" "" "" "m"
+"n" "" "" "n"
+"o" "" "" "o"
+"p" "" "" "p"
+"r" "" "" "r"
+"s" "" "" "s"
+"t" "" "" "t"
+"u" "" "" "u"
+"v" "" "" "v"
+"z" "" "" "z"

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_rules_hebrew.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_rules_hebrew.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_rules_hebrew.txt
new file mode 100644
index 0000000..91cf5ba
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_rules_hebrew.txt
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Sephardic
+
+"אי" "" "" "i"
+"עי" "" "" "i"
+"עו" "" "" "VV"
+"או" "" "" "VV"
+
+"ג׳" "" "" "Z"
+"ד׳" "" "" "dZ"
+
+"א" "" "" "L"
+"ב" "" "" "b"
+"ג" "" "" "g"
+"ד" "" "" "d"
+
+"ה" "^" "" "1"
+"ה" "" "$" "1"
+"ה" "" "" ""
+
+"וו" "" "" "V" 
+"וי" "" "" "WW"
+"ו" "" "" "W"
+"ז" "" "" "z"
+"ח" "" "" "X"
+"ט" "" "" "T"
+"יי" "" "" "i"
+"י" "" "" "i"
+"ך" "" "" "X"
+"כ" "^" "" "K"
+"כ" "" "" "k"
+"ל" "" "" "l"
+"ם" "" "" "m"
+"מ" "" "" "m"
+"ן" "" "" "n"
+"נ" "" "" "n"
+"ס" "" "" "s"
+"ע" "" "" "L"
+"ף" "" "" "f"
+"פ" "" "" "f"
+"ץ" "" "" "C"
+"צ" "" "" "C"
+"ק" "" "" "K"
+"ר" "" "" "r"
+"ש" "" "" "s"
+"ת" "" "" "T"   // Special for Sephardim

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_rules_italian.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_rules_italian.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_rules_italian.txt
new file mode 100644
index 0000000..76cf14b
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_rules_italian.txt
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+"kh" "" "" "x" // foreign
+
+"gli" "" "" "(l|gli)"
+"gn" "" "[aeou]" "(n|nj|gn)"
+"gni" "" "" "(ni|gni)"
+
+"gi" "" "[aeou]" "dZ"
+"gg" "" "[ei]" "dZ"
+"g" "" "[ei]" "dZ"
+"h" "[bdgt]" "" "g" // gh is It; others from Arabic translit
+
+"ci" "" "[aeou]" "tS"
+"ch" "" "[ei]" "k"
+"sc" "" "[ei]" "S" 
+"cc" "" "[ei]" "tS"
+"c" "" "[ei]" "tS"
+"s" "[aeiou]" "[aeiou]" "z"
+
+"i" "[aeou]" "" "j"
+"i" "" "[aeou]" "j"
+"y" "[aeou]" "" "j" // foreign
+"y" "" "[aeou]" "j" // foreign
+
+"qu" "" "" "k"    
+"uo" "" "" "(vo|o)"
+"u" "" "[aei]" "v" 
+
+"�" "" "" "e" 
+"�" "" "" "e" 
+"�" "" "" "o"  
+"�" "" "" "o" 
+
+// LATIN ALPHABET    
+"a" "" "" "a"
+"b" "" "" "b"
+"c" "" "" "k"
+"d" "" "" "d"
+"e" "" "" "e"
+"f" "" "" "f"
+"g" "" "" "g"
+"h" "" "" "h"
+"i" "" "" "i"
+"j" "" "" "(Z|dZ|j)" // foreign
+"k" "" "" "k"
+"l" "" "" "l"
+"m" "" "" "m"
+"n" "" "" "n"
+"o" "" "" "o"
+"p" "" "" "p"
+"q" "" "" "k"    
+"r" "" "" "r"
+"s" "" "" "s"
+"t" "" "" "t"
+"u" "" "" "u"
+"v" "" "" "v"
+"w" "" "" "v"    // foreign
+"x" "" "" "ks"    // foreign
+"y" "" "" "i"    // foreign
+"z" "" "" "(ts|dz)"

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_rules_portuguese.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_rules_portuguese.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_rules_portuguese.txt
new file mode 100644
index 0000000..67cbd9b
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_rules_portuguese.txt
@@ -0,0 +1,104 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+"kh" "" "" "x" // foreign
+"ch" "" "" "S"
+"ss" "" "" "s"
+"sc" "" "[ei]" "s"
+"sç" "" "[aou]" "s"
+"ç" "" "" "s"
+"c" "" "[ei]" "s"
+//  "c" "" "[aou]" "(k|C)"
+
+"s" "^" "" "s"
+"s" "[aáuiíoóeéêy]" "[aáuiíoóeéêy]" "z"
+"s" "" "[dglmnrv]" "(Z|S)" // Z is Brazil
+
+"z" "" "$" "(Z|s|S)" // s and S in Brazil
+"z" "" "[bdgv]" "(Z|z)" // Z in Brazil
+"z" "" "[ptckf]" "(s|S|z)" // s and S in Brazil
+
+"gu" "" "[eiu]" "g"    
+"gu" "" "[ao]" "gv"    
+"g" "" "[ei]" "Z"
+"qu" "" "[eiu]" "k"    
+"qu" "" "[ao]" "kv"    
+
+"uo" "" "" "(vo|o|u)"
+"u" "" "[aei]" "v" 
+
+"lh" "" "" "l"
+"nh" "" "" "nj"
+"h" "[bdgt]" "" "" // translit. from Arabic
+
+"ex" "" "[aáuiíoóeéêy]" "(ez|eS|eks)" // ez in Brazil
+"ex" "" "[cs]" "e" 
+
+"y" "[aáuiíoóeéê]" "" "j"
+"y" "" "[aeiíou]" "j"
+"m" "" "[bcdfglnprstv]" "(m|n)" // maybe to add a rule for m/n before a consonant that disappears [preceding vowel becomes nasalized]
+"m" "" "$" "(m|n)" // maybe to add a rule for final m/n that disappears [preceding vowel becomes nasalized]
+
+"ão" "" "" "(au|an|on)"
+"ãe" "" "" "(aj|an)"
+"ãi" "" "" "(aj|an)"
+"õe" "" "" "(oj|on)"
+"i" "[aáuoóeéê]" "" "j"
+"i" "" "[aeou]" "j"
+
+"â" "" "" "a"
+"à" "" "" "a"
+"á" "" "" "a"
+"ã" "" "" "(a|an|on)"
+"é" "" "" "e"
+"ê" "" "" "e"
+"í" "" "" "i"
+"ô" "" "" "o"
+"ó" "" "" "o"
+"õ" "" "" "(o|on)"
+"ú" "" "" "u"
+"ü" "" "" "u"
+
+"aue" "" "" "aue"
+
+// LATIN ALPHABET
+"a" "" "" "a"
+"b" "" "" "b"
+"c" "" "" "k"
+"d" "" "" "d"
+"e" "" "" "(e|i)"
+"f" "" "" "f"
+"g" "" "" "g"
+"h" "" "" "h"
+"i" "" "" "i"
+"j" "" "" "Z" 
+"k" "" "" "k"
+"l" "" "" "l"
+"m" "" "" "m"
+"n" "" "" "n"
+"o" "" "" "(o|u)"
+"p" "" "" "p"
+"q" "" "" "k"    
+"r" "" "" "r"
+"s" "" "" "S"
+"t" "" "" "t"
+"u" "" "" "u"
+"v" "" "" "v"
+"w" "" "" "v"    
+"x" "" "" "(S|ks)"   
+"y" "" "" "i"   
+"z" "" "" "z"

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_rules_spanish.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_rules_spanish.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_rules_spanish.txt
new file mode 100644
index 0000000..b900e7e
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_rules_spanish.txt
@@ -0,0 +1,95 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//Sephardic
+
+// Includes both Spanish (Castillian) & Catalan
+
+// CONSONANTS
+"ñ" "" "" "(n|nj)"
+"ny" "" "" "nj" // Catalan
+"ç" "" "" "s" // Catalan
+
+"ig" "[aeiou]" "" "(tS|ig)" // tS is Catalan
+"ix" "[aeiou]" "" "S" // Catalan
+"tx" "" "" "tS" // Catalan
+"tj" "" "$" "tS" // Catalan
+"tj" "" "" "dZ" // Catalan
+"tg" "" "" "(tg|dZ)" // dZ is Catalan
+"ch" "" "" "(tS|dZ)" // dZ is typical for Argentina
+"bh" "" "" "b" // translit. from Arabic
+"h" "[dgt]" "" "" // translit. from Arabic
+
+"j" "" "" "(x|Z)" // Z is Catalan
+"x" "" "" "(ks|gz|S)" // ks is Spanish, all are Catalan
+
+//"ll" "" "" "(l|Z)" // Z is typical for Argentina, only Ashkenazic
+"w" "" "" "v" // foreign words
+
+"v" "^" "" "(B|v)"
+"b" "^" "" "(b|V)"
+"v" "" "" "(b|v)"
+"b" "" "" "(b|v)"
+"m" "" "[bpvf]" "(m|n)"
+
+"c" "" "[ei]" "s" 
+//  "c" "" "[aou]" "(k|C)"
+"c" "" "" "k"
+
+"z" "" "" "(z|s)" // as "c" befoire "e" or "i", in Spain it is like unvoiced English "th"
+
+"gu" "" "[ei]" "(g|gv)" // "gv" because "u" can actually be "ü"
+"g" "" "[ei]" "(x|g|dZ)"  // "g" only for foreign words; dZ is Catalan
+
+"qu" "" "" "k"
+"q" "" "" "k"
+
+"uo" "" "" "(vo|o)"    
+"u" "" "[aei]" "v"
+
+//  "y" "" "" "(i|j|S|Z)" // S or Z are peculiar to South America; only Ashkenazic
+"y" "" "" "(i|j)"
+
+// VOWELS
+"ü" "" "" "v"
+"á" "" "" "a"
+"é" "" "" "e"
+"í" "" "" "i"
+"ó" "" "" "o"
+"ú" "" "" "u"
+"à" "" "" "a"  // Catalan
+"è" "" "" "e" // Catalan
+"ò" "" "" "o"  // Catalan
+
+// TRIVIAL      
+"a" "" "" "a"
+"d" "" "" "d"
+"e" "" "" "e"
+"f" "" "" "f"
+"g" "" "" "g" 
+"h" "" "" "h"
+"i" "" "" "i"
+"k" "" "" "k"
+"l" "" "" "l"
+"m" "" "" "m"
+"n" "" "" "n"
+"o" "" "" "o"
+"p" "" "" "p"
+"r" "" "" "r"
+"s" "" "" "s"
+"t" "" "" "t"
+"u" "" "" "u"

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Caverphone1.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Caverphone1.cs b/src/Lucene.Net.Analysis.Phonetic/Language/Caverphone1.cs
new file mode 100644
index 0000000..1abfcd1
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Caverphone1.cs
@@ -0,0 +1,131 @@
+// commons-codec version compatibility level: 1.9
+using System.Globalization;
+using System.Text.RegularExpressions;
+
+namespace Lucene.Net.Analysis.Phonetic.Language
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Encodes a string into a Caverphone 1.0 value.
+    /// <para/>
+    /// This is an algorithm created by the Caversham Project at the University of Otago. It implements the Caverphone 1.0
+    /// algorithm:
+    /// <para/>
+    /// See: <a href="http://en.wikipedia.org/wiki/Caverphone">Wikipedia - Caverphone</a>
+    /// <para/>
+    /// See: <a href="http://caversham.otago.ac.nz/files/working/ctp060902.pdf">Caverphone 1.0 specification</a>
+    /// <para/>
+    /// This class is immutable and thread-safe.
+    /// <para/>
+    /// since 1.5
+    /// </summary>
+    public class Caverphone1 : AbstractCaverphone
+    {
+        private static readonly string SIX_1 = "111111";
+
+        /// <summary>
+        /// Encodes the given string into a Caverphone value.
+        /// </summary>
+        /// <param name="source">The source string.</param>
+        /// <returns>A caverphone code for the given string.</returns>
+        public override string Encode(string source)
+        {
+            string txt = source;
+            if (txt == null || txt.Length == 0)
+            {
+                return SIX_1;
+            }
+
+            // 1. Convert to lowercase
+            txt = txt.ToLowerInvariant(); // LUCENENET NOTE: This doesn't work right under "en" language, but does under invariant
+
+            // 2. Remove anything not A-Z
+            txt = Regex.Replace(txt, "[^a-z]", "");
+
+            // 3. Handle various start options
+            // 2 is a temporary placeholder to indicate a consonant which we are no longer interested in.
+            txt = Regex.Replace(txt, "^cough", "cou2f");
+            txt = Regex.Replace(txt, "^rough", "rou2f");
+            txt = Regex.Replace(txt, "^tough", "tou2f");
+            txt = Regex.Replace(txt, "^enough", "enou2f");
+            txt = Regex.Replace(txt, "^gn", "2n");
+
+            // End
+            txt = Regex.Replace(txt, "mb$", "m2");
+
+            // 4. Handle replacements
+            txt = Regex.Replace(txt, "cq", "2q");
+            txt = Regex.Replace(txt, "ci", "si");
+            txt = Regex.Replace(txt, "ce", "se");
+            txt = Regex.Replace(txt, "cy", "sy");
+            txt = Regex.Replace(txt, "tch", "2ch");
+            txt = Regex.Replace(txt, "c", "k");
+            txt = Regex.Replace(txt, "q", "k");
+            txt = Regex.Replace(txt, "x", "k");
+            txt = Regex.Replace(txt, "v", "f");
+            txt = Regex.Replace(txt, "dg", "2g");
+            txt = Regex.Replace(txt, "tio", "sio");
+            txt = Regex.Replace(txt, "tia", "sia");
+            txt = Regex.Replace(txt, "d", "t");
+            txt = Regex.Replace(txt, "ph", "fh");
+            txt = Regex.Replace(txt, "b", "p");
+            txt = Regex.Replace(txt, "sh", "s2");
+            txt = Regex.Replace(txt, "z", "s");
+            txt = Regex.Replace(txt, "^[aeiou]", "A");
+            // 3 is a temporary placeholder marking a vowel
+            txt = Regex.Replace(txt, "[aeiou]", "3");
+            txt = Regex.Replace(txt, "3gh3", "3kh3");
+            txt = Regex.Replace(txt, "gh", "22");
+            txt = Regex.Replace(txt, "g", "k");
+            txt = Regex.Replace(txt, "s+", "S");
+            txt = Regex.Replace(txt, "t+", "T");
+            txt = Regex.Replace(txt, "p+", "P");
+            txt = Regex.Replace(txt, "k+", "K");
+            txt = Regex.Replace(txt, "f+", "F");
+            txt = Regex.Replace(txt, "m+", "M");
+            txt = Regex.Replace(txt, "n+", "N");
+            txt = Regex.Replace(txt, "w3", "W3");
+            txt = Regex.Replace(txt, "wy", "Wy"); // 1.0 only
+            txt = Regex.Replace(txt, "wh3", "Wh3");
+            txt = Regex.Replace(txt, "why", "Why"); // 1.0 only
+            txt = Regex.Replace(txt, "w", "2");
+            txt = Regex.Replace(txt, "^h", "A");
+            txt = Regex.Replace(txt, "h", "2");
+            txt = Regex.Replace(txt, "r3", "R3");
+            txt = Regex.Replace(txt, "ry", "Ry"); // 1.0 only
+            txt = Regex.Replace(txt, "r", "2");
+            txt = Regex.Replace(txt, "l3", "L3");
+            txt = Regex.Replace(txt, "ly", "Ly"); // 1.0 only
+            txt = Regex.Replace(txt, "l", "2");
+            txt = Regex.Replace(txt, "j", "y"); // 1.0 only
+            txt = Regex.Replace(txt, "y3", "Y3"); // 1.0 only
+            txt = Regex.Replace(txt, "y", "2"); // 1.0 only
+
+            // 5. Handle removals
+            txt = Regex.Replace(txt, "2", "");
+            txt = Regex.Replace(txt, "3", "");
+
+            // 6. put ten 1s on the end
+            txt = txt + SIX_1;
+
+            // 7. take the first six characters as the code
+            return txt.Substring(0, SIX_1.Length - 0);
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Caverphone2.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Caverphone2.cs b/src/Lucene.Net.Analysis.Phonetic/Language/Caverphone2.cs
new file mode 100644
index 0000000..cec7388
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Caverphone2.cs
@@ -0,0 +1,133 @@
+// commons-codec version compatibility level: 1.9
+using System.Globalization;
+using System.Text.RegularExpressions;
+
+namespace Lucene.Net.Analysis.Phonetic.Language
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Encodes a string into a Caverphone 2.0 value.
+    /// <para/>
+    /// This is an algorithm created by the Caversham Project at the University of Otago. It implements the Caverphone 2.0
+    /// algorithm:
+    /// <para/>
+    /// See: <a href="http://en.wikipedia.org/wiki/Caverphone">Wikipedia - Caverphone</a>
+    /// <para/>
+    /// See: <a href="http://caversham.otago.ac.nz/files/working/ctp150804.pdf">Caverphone 2.0 specification</a>
+    /// <para/>
+    /// This class is immutable and thread-safe.
+    /// </summary>
+    public class Caverphone2 : AbstractCaverphone
+    {
+        private static readonly string TEN_1 = "1111111111";
+
+        /// <summary>
+        /// Encodes the given string into a Caverphone 2.0 value.
+        /// </summary>
+        /// <param name="source">The source string.</param>
+        /// <returns>A caverphone code for the given string.</returns>
+        public override string Encode(string source)
+        {
+            string txt = source;
+            if (txt == null || txt.Length == 0)
+            {
+                return TEN_1;
+            }
+
+            // 1. Convert to lowercase
+            txt = new CultureInfo("en").TextInfo.ToLower(txt);
+
+            // 2. Remove anything not A-Z
+            txt = Regex.Replace(txt, "[^a-z]", "");
+
+            // 2.5. Remove final e
+            txt = Regex.Replace(txt, "e$", ""); // 2.0 only
+
+            // 3. Handle various start options
+            txt = Regex.Replace(txt, "^cough", "cou2f");
+            txt = Regex.Replace(txt, "^rough", "rou2f");
+            txt = Regex.Replace(txt, "^tough", "tou2f");
+            txt = Regex.Replace(txt, "^enough", "enou2f"); // 2.0 only
+            txt = Regex.Replace(txt, "^trough", "trou2f"); // 2.0 only
+                                                       // note the spec says ^enough here again, c+p error I assume
+            txt = Regex.Replace(txt, "^gn", "2n");
+
+            // End
+            txt = Regex.Replace(txt, "mb$", "m2");
+
+            // 4. Handle replacements
+            txt = Regex.Replace(txt, "cq", "2q");
+            txt = Regex.Replace(txt, "ci", "si");
+            txt = Regex.Replace(txt, "ce", "se");
+            txt = Regex.Replace(txt, "cy", "sy");
+            txt = Regex.Replace(txt, "tch", "2ch");
+            txt = Regex.Replace(txt, "c", "k");
+            txt = Regex.Replace(txt, "q", "k");
+            txt = Regex.Replace(txt, "x", "k");
+            txt = Regex.Replace(txt, "v", "f");
+            txt = Regex.Replace(txt, "dg", "2g");
+            txt = Regex.Replace(txt, "tio", "sio");
+            txt = Regex.Replace(txt, "tia", "sia");
+            txt = Regex.Replace(txt, "d", "t");
+            txt = Regex.Replace(txt, "ph", "fh");
+            txt = Regex.Replace(txt, "b", "p");
+            txt = Regex.Replace(txt, "sh", "s2");
+            txt = Regex.Replace(txt, "z", "s");
+            txt = Regex.Replace(txt, "^[aeiou]", "A");
+            txt = Regex.Replace(txt, "[aeiou]", "3");
+            txt = Regex.Replace(txt, "j", "y"); // 2.0 only
+            txt = Regex.Replace(txt, "^y3", "Y3"); // 2.0 only
+            txt = Regex.Replace(txt, "^y", "A"); // 2.0 only
+            txt = Regex.Replace(txt, "y", "3"); // 2.0 only
+            txt = Regex.Replace(txt, "3gh3", "3kh3");
+            txt = Regex.Replace(txt, "gh", "22");
+            txt = Regex.Replace(txt, "g", "k");
+            txt = Regex.Replace(txt, "s+", "S");
+            txt = Regex.Replace(txt, "t+", "T");
+            txt = Regex.Replace(txt, "p+", "P");
+            txt = Regex.Replace(txt, "k+", "K");
+            txt = Regex.Replace(txt, "f+", "F");
+            txt = Regex.Replace(txt, "m+", "M");
+            txt = Regex.Replace(txt, "n+", "N");
+            txt = Regex.Replace(txt, "w3", "W3");
+            txt = Regex.Replace(txt, "wh3", "Wh3");
+            txt = Regex.Replace(txt, "w$", "3"); // 2.0 only
+            txt = Regex.Replace(txt, "w", "2");
+            txt = Regex.Replace(txt, "^h", "A");
+            txt = Regex.Replace(txt, "h", "2");
+            txt = Regex.Replace(txt, "r3", "R3");
+            txt = Regex.Replace(txt, "r$", "3"); // 2.0 only
+            txt = Regex.Replace(txt, "r", "2");
+            txt = Regex.Replace(txt, "l3", "L3");
+            txt = Regex.Replace(txt, "l$", "3"); // 2.0 only
+            txt = Regex.Replace(txt, "l", "2");
+
+            // 5. Handle removals
+            txt = Regex.Replace(txt, "2", "");
+            txt = Regex.Replace(txt, "3$", "A"); // 2.0 only
+            txt = Regex.Replace(txt, "3", "");
+
+            // 6. put ten 1s on the end
+            txt = txt + TEN_1;
+
+            // 7. take the first ten characters as the code
+            return txt.Substring(0, TEN_1.Length);
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/ColognePhonetic.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/ColognePhonetic.cs b/src/Lucene.Net.Analysis.Phonetic/Language/ColognePhonetic.cs
new file mode 100644
index 0000000..a4824b3
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/ColognePhonetic.cs
@@ -0,0 +1,501 @@
+// commons-codec version compatibility level: 1.9
+using System.Globalization;
+
+namespace Lucene.Net.Analysis.Phonetic.Language
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Encodes a string into a Cologne Phonetic value.
+    /// </summary>
+    /// <remarks>
+    /// Implements the <a href="http://de.wikipedia.org/wiki/K%C3%B6lner_Phonetik">K&#214;lner Phonetik</a>
+    /// (Cologne Phonetic) algorithm issued by Hans Joachim Postel in 1969.
+    /// <para/>
+    /// The <i>K&#214;lner Phonetik</i> is a phonetic algorithm which is optimized for the German language.
+    /// It is related to the well-known soundex algorithm.
+    /// <para/>
+    /// <h2>Algorithm</h2>
+    /// <list type="bullet">
+    ///     <item>
+    ///         <term>Step 1:</term>
+    ///         <description>
+    ///             After preprocessing (conversion to upper case, transcription of <a
+    ///             href="http://en.wikipedia.org/wiki/Germanic_umlaut">germanic umlauts</a>, removal of non alphabetical characters) the
+    ///             letters of the supplied text are replaced by their phonetic code according to the following table.
+    ///             <list type="table">
+    ///                 <listheader>
+    ///                     <term>Letter</term>
+    ///                     <term>Context</term>
+    ///                     <term>Code</term>
+    ///                 </listheader>
+    ///                 <item>
+    ///                     <term>A, E, I, J, O, U, Y</term>
+    ///                     <term></term>
+    ///                     <term>0</term>
+    ///                 </item>
+    ///                 <item>
+    ///                     <term>H</term>
+    ///                     <term></term>
+    ///                     <term>-</term>
+    ///                 </item>
+    ///                 <item>
+    ///                     <term>B</term>
+    ///                     <term></term>
+    ///                     <term>1</term>
+    ///                 </item>
+    ///                 <item>
+    ///                     <term>P</term>
+    ///                     <term>not before H</term>
+    ///                     <term>1</term>
+    ///                 </item>
+    ///                 <item>
+    ///                     <term>D, T</term>
+    ///                     <term>not before C, S, Z</term>
+    ///                     <term>2</term>
+    ///                 </item>
+    ///                 <item>
+    ///                     <term>F, V, W</term>
+    ///                     <term></term>
+    ///                     <term>3</term>
+    ///                 </item>
+    ///                 <item>
+    ///                     <term>P</term>
+    ///                     <term>before H</term>
+    ///                     <term>3</term>
+    ///                 </item>
+    ///                 <item>
+    ///                     <term>G, K, Q</term>
+    ///                     <term></term>
+    ///                     <term>4</term>
+    ///                 </item>
+    ///                 <item>
+    ///                     <term>C</term>
+    ///                     <term>t onset before A, H, K, L, O, Q, R, U, X <para>OR</para>
+    ///                     before A, H, K, O, Q, U, X except after S, Z</term>
+    ///                     <term>4</term>
+    ///                 </item>
+    ///                 <item>
+    ///                     <term>X</term>
+    ///                     <term>not after C, K, Q</term>
+    ///                     <term>48</term>
+    ///                 </item>
+    ///                 <item>
+    ///                     <term>L</term>
+    ///                     <term></term>
+    ///                     <term>5</term>
+    ///                 </item>
+    ///                 <item>
+    ///                     <term>M, N</term>
+    ///                     <term></term>
+    ///                     <term>6</term>
+    ///                 </item>
+    ///                 <item>
+    ///                     <term>R</term>
+    ///                     <term></term>
+    ///                     <term>7</term>
+    ///                 </item>
+    ///                 <item>
+    ///                     <term>S, Z</term>
+    ///                     <term></term>
+    ///                     <term>8</term>
+    ///                 </item>
+    ///                 <item>
+    ///                     <term>C</term>
+    ///                     <term>after S, Z <para>OR</para>
+    ///                     at onset except before A, H, K, L, O, Q, R, U, X <para>OR</para>
+    ///                     not before A, H, K, O, Q, U, X
+    ///                     </term>
+    ///                     <term>8</term>
+    ///                 </item>
+    ///                 <item>
+    ///                     <term>D, T</term>
+    ///                     <term>before C, S, Z</term>
+    ///                     <term>8</term>
+    ///                 </item>
+    ///                 <item>
+    ///                     <term>X</term>
+    ///                     <term>after C, K, Q</term>
+    ///                     <term>8</term>
+    ///                 </item>
+    ///             </list>
+    ///             <para>
+    ///                 <small><i>(Source: <a href= "http://de.wikipedia.org/wiki/K%C3%B6lner_Phonetik#Buchstabencodes" >Wikipedia (de):
+    ///                 K&#214;lner Phonetik -- Buchstabencodes</a>)</i></small>
+    ///             </para>
+    ///             <h4>Example:</h4>
+    ///             <c>"M&#220;ller-L&#220;denscheidt" => "MULLERLUDENSCHEIDT" => "6005507500206880022"</c>
+    ///         </description>
+    ///     </item>
+    ///     <item>
+    ///         <term>Step 2:</term>
+    ///         <description>
+    ///             Collapse of all multiple consecutive code digits.
+    ///             <h4>Example:</h4>
+    ///             <c>"6005507500206880022" => "6050750206802"</c>
+    ///         </description>
+    ///     </item>
+    ///     <item>
+    ///         <term>Step 3:</term>
+    ///         <description>
+    ///             Removal of all codes "0" except at the beginning. This means that two or more identical consecutive digits can occur
+    ///             if they occur after removing the "0" digits.
+    ///             <h4>Example:</h4>
+    ///             <c>"6050750206802" => "65752682"</c>
+    ///         </description>
+    ///     </item>
+    /// </list>
+    /// <para/>
+    /// This class is thread-safe.
+    /// <para/>
+    /// See: <a href="http://de.wikipedia.org/wiki/K%C3%B6lner_Phonetik">Wikipedia (de): K&#246;lner Phonetik (in German)</a>
+    /// <para/>
+    /// since 1.5
+    /// </remarks>
+    public class ColognePhonetic : IStringEncoder
+    {
+        // Predefined char arrays for better performance and less GC load
+        private static readonly char[] AEIJOUY = new char[] { 'A', 'E', 'I', 'J', 'O', 'U', 'Y' };
+        private static readonly char[] SCZ = new char[] { 'S', 'C', 'Z' };
+        private static readonly char[] WFPV = new char[] { 'W', 'F', 'P', 'V' };
+        private static readonly char[] GKQ = new char[] { 'G', 'K', 'Q' };
+        private static readonly char[] CKQ = new char[] { 'C', 'K', 'Q' };
+        private static readonly char[] AHKLOQRUX = new char[] { 'A', 'H', 'K', 'L', 'O', 'Q', 'R', 'U', 'X' };
+        private static readonly char[] SZ = new char[] { 'S', 'Z' };
+        private static readonly char[] AHOUKQX = new char[] { 'A', 'H', 'O', 'U', 'K', 'Q', 'X' };
+        private static readonly char[] TDX = new char[] { 'T', 'D', 'X' };
+
+        /// <summary>
+        /// This class is not thread-safe; the field <see cref="length"/> is mutable.
+        /// However, it is not shared between threads, as it is constructed on demand
+        /// by the method <see cref="ColognePhonetic.GetColognePhonetic(string)"/>.
+        /// </summary>
+        private abstract class CologneBuffer
+        {
+
+            protected readonly char[] data;
+
+            protected int length = 0;
+
+            public CologneBuffer(char[] data)
+            {
+                this.data = data;
+                this.length = data.Length;
+            }
+
+            public CologneBuffer(int buffSize)
+            {
+                this.data = new char[buffSize];
+                this.length = 0;
+            }
+
+            protected abstract char[] CopyData(int start, int length);
+
+            public virtual int Length
+            {
+                get { return length; }
+            }
+
+            public override string ToString()
+            {
+                return new string(CopyData(0, length));
+            }
+        }
+
+        private class CologneOutputBuffer : CologneBuffer
+        {
+            public CologneOutputBuffer(int buffSize)
+                : base(buffSize)
+            {
+            }
+
+            public void AddRight(char chr)
+            {
+                data[length] = chr;
+                length++;
+            }
+
+            protected override char[] CopyData(int start, int length)
+            {
+                char[] newData = new char[length];
+                System.Array.Copy(data, start, newData, 0, length);
+                return newData;
+            }
+        }
+
+        private class CologneInputBuffer : CologneBuffer
+        {
+            public CologneInputBuffer(char[] data)
+                : base(data)
+            {
+            }
+
+            public virtual void AddLeft(char ch)
+            {
+                length++;
+                data[GetNextPos()] = ch;
+            }
+
+            protected override char[] CopyData(int start, int length)
+            {
+                char[] newData = new char[length];
+                System.Array.Copy(data, data.Length - this.length + start, newData, 0, length);
+                return newData;
+            }
+
+            public virtual char GetNextChar()
+            {
+                return data[GetNextPos()];
+            }
+
+            protected virtual int GetNextPos()
+            {
+                return data.Length - length;
+            }
+
+            public virtual char RemoveNext()
+            {
+                char ch = GetNextChar();
+                length--;
+                return ch;
+            }
+        }
+
+        /// <summary>
+        /// Maps some Germanic characters to plain for internal processing. The following characters are mapped:
+        /// <list type="bullet">
+        ///     <item><description>capital a, umlaut mark</description></item>
+        ///     <item><description>capital u, umlaut mark</description></item>
+        ///     <item><description>capital o, umlaut mark</description></item>
+        ///     <item><description>small sharp s, German</description></item>
+        /// </list>
+        /// </summary>
+        private static readonly char[][] PREPROCESS_MAP = {
+            new char[] {'\u00C4', 'A'}, // capital a, umlaut mark
+            new char[] {'\u00DC', 'U'}, // capital u, umlaut mark
+            new char[] {'\u00D6', 'O'}, // capital o, umlaut mark
+            new char[] {'\u00DF', 'S'} // small sharp s, German
+        };
+
+        /// <summary>
+        /// Returns whether the array contains the key, or not.
+        /// </summary>
+        private static bool ArrayContains(char[] arr, char key)
+        {
+            foreach (char element in arr)
+            {
+                if (element == key)
+                {
+                    return true;
+                }
+            }
+            return false;
+        }
+
+        /// <summary>
+        /// <para>
+        /// Implements the <i>K&#246;lner Phonetik</i> algorithm.
+        /// </para>
+        /// <para>
+        /// In contrast to the initial description of the algorithm, this implementation does the encoding in one pass.
+        /// </para>
+        /// 
+        /// </summary>
+        /// <param name="text"></param>
+        /// <returns>The corresponding encoding according to the <i>K&#246;lner Phonetik</i> algorithm</returns>
+        public virtual string GetColognePhonetic(string text)
+        {
+            if (text == null)
+            {
+                return null;
+            }
+
+            text = Preprocess(text);
+
+            CologneOutputBuffer output = new CologneOutputBuffer(text.Length * 2);
+            CologneInputBuffer input = new CologneInputBuffer(text.ToCharArray());
+
+            char nextChar;
+
+            char lastChar = '-';
+            char lastCode = '/';
+            char code;
+            char chr;
+
+            int rightLength = input.Length;
+
+            while (rightLength > 0)
+            {
+                chr = input.RemoveNext();
+
+                if ((rightLength = input.Length) > 0)
+                {
+                    nextChar = input.GetNextChar();
+                }
+                else
+                {
+                    nextChar = '-';
+                }
+
+                if (ArrayContains(AEIJOUY, chr))
+                {
+                    code = '0';
+                }
+                else if (chr == 'H' || chr < 'A' || chr > 'Z')
+                {
+                    if (lastCode == '/')
+                    {
+                        continue;
+                    }
+                    code = '-';
+                }
+                else if (chr == 'B' || (chr == 'P' && nextChar != 'H'))
+                {
+                    code = '1';
+                }
+                else if ((chr == 'D' || chr == 'T') && !ArrayContains(SCZ, nextChar))
+                {
+                    code = '2';
+                }
+                else if (ArrayContains(WFPV, chr))
+                {
+                    code = '3';
+                }
+                else if (ArrayContains(GKQ, chr))
+                {
+                    code = '4';
+                }
+                else if (chr == 'X' && !ArrayContains(CKQ, lastChar))
+                {
+                    code = '4';
+                    input.AddLeft('S');
+                    rightLength++;
+                }
+                else if (chr == 'S' || chr == 'Z')
+                {
+                    code = '8';
+                }
+                else if (chr == 'C')
+                {
+                    if (lastCode == '/')
+                    {
+                        if (ArrayContains(AHKLOQRUX, nextChar))
+                        {
+                            code = '4';
+                        }
+                        else
+                        {
+                            code = '8';
+                        }
+                    }
+                    else
+                    {
+                        if (ArrayContains(SZ, lastChar) || !ArrayContains(AHOUKQX, nextChar))
+                        {
+                            code = '8';
+                        }
+                        else
+                        {
+                            code = '4';
+                        }
+                    }
+                }
+                else if (ArrayContains(TDX, chr))
+                {
+                    code = '8';
+                }
+                else if (chr == 'R')
+                {
+                    code = '7';
+                }
+                else if (chr == 'L')
+                {
+                    code = '5';
+                }
+                else if (chr == 'M' || chr == 'N')
+                {
+                    code = '6';
+                }
+                else
+                {
+                    code = chr;
+                }
+
+                if (code != '-' && (lastCode != code && (code != '0' || lastCode == '/') || code < '0' || code > '8'))
+                {
+                    output.AddRight(code);
+                }
+
+                lastChar = chr;
+                lastCode = code;
+            }
+            return output.ToString();
+        }
+
+        // LUCENENET specific - in .NET we don't need an object overload, since strings are sealed anyway.
+        //@Override
+        //    public Object encode(final Object object) throws EncoderException
+        //{
+        //        if (!(object instanceof String)) {
+        //        throw new EncoderException("This method's parameter was expected to be of the type " +
+        //            String.class.getName() +
+        //                ". But actually it was of the type " +
+        //                object.getClass().getName() +
+        //                ".");
+        //        }
+        //        return encode((String) object);
+        //    }
+
+
+        public virtual string Encode(string text)
+        {
+            return GetColognePhonetic(text);
+        }
+
+        public virtual bool IsEncodeEqual(string text1, string text2)
+        {
+            return GetColognePhonetic(text1).Equals(GetColognePhonetic(text2));
+        }
+
+        /// <summary>
+        /// Converts the string to upper case and replaces germanic characters as defined in <see cref="PREPROCESS_MAP"/>.
+        /// </summary>
+        private string Preprocess(string text)
+        {
+            text = new CultureInfo("de").TextInfo.ToUpper(text);
+
+            char[] chrs = text.ToCharArray();
+
+            for (int index = 0; index < chrs.Length; index++)
+            {
+                if (chrs[index] > 'Z')
+                {
+                    foreach (char[] element in PREPROCESS_MAP)
+                    {
+                        if (chrs[index] == element[0])
+                        {
+                            chrs[index] = element[1];
+                            break;
+                        }
+                    }
+                }
+            }
+            return new string(chrs);
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/DaitchMokotoffSoundex.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/DaitchMokotoffSoundex.cs b/src/Lucene.Net.Analysis.Phonetic/Language/DaitchMokotoffSoundex.cs
new file mode 100644
index 0000000..e72bc38
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/DaitchMokotoffSoundex.cs
@@ -0,0 +1,620 @@
+// commons-codec version compatibility level: 1.10
+using Lucene.Net.Support;
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Reflection;
+using System.Text;
+using System.Text.RegularExpressions;
+
+namespace Lucene.Net.Analysis.Phonetic.Language
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Encodes a string into a Daitch-Mokotoff Soundex value.
+    /// </summary>
+    /// <remarks>
+    /// The Daitch-Mokotoff Soundex algorithm is a refinement of the Russel and American Soundex algorithms, yielding greater
+    /// accuracy in matching especially Slavish and Yiddish surnames with similar pronunciation but differences in spelling.
+    /// <para/>
+    /// The main differences compared to the other soundex variants are:
+    /// <list type="bullet">
+    ///     <item><description>coded names are 6 digits long</description></item>
+    ///     <item><description>the initial character of the name is coded</description></item>
+    ///     <item><description>rules to encoded multi-character n-grams</description></item>
+    ///     <item><description>multiple possible encodings for the same name (branching)</description></item>
+    /// </list>
+    /// <para/>
+    /// This implementation supports branching, depending on the used method:
+    /// <list type="bullet">
+    ///     <item><term><see cref="Encode(string)"/></term><description>branching disabled, only the first code will be returned</description></item>
+    ///     <item><term><see cref="GetSoundex(string)"/></term><description>branching enabled, all codes will be returned, separated by '|'</description></item>
+    /// </list>
+    /// <para/>
+    /// Note: this implementation has additional branching rules compared to the original description of the algorithm. The
+    /// rules can be customized by overriding the default rules contained in the resource file
+    /// <c>Lucene.Net.Analysis.Phonetic.Language.dmrules.txt</c>.
+    /// <para/>
+    /// This class is thread-safe.
+    /// <para/>
+    /// See: <a href="http://en.wikipedia.org/wiki/Daitch%E2%80%93Mokotoff_Soundex"> Wikipedia - Daitch-Mokotoff Soundex</a>
+    /// <para/>
+    /// See: <a href="http://www.avotaynu.com/soundex.htm">Avotaynu - Soundexing and Genealogy</a>
+    /// <para/>
+    /// since 1.10
+    /// </remarks>
+    /// <seealso cref="Soundex"/>
+    public class DaitchMokotoffSoundex : IStringEncoder
+    {
+        /// <summary>
+        /// Inner class representing a branch during DM soundex encoding.
+        /// </summary>
+        private sealed class Branch
+        {
+            private readonly StringBuilder builder;
+            private string cachedString;
+            private string lastReplacement;
+
+            internal Branch()
+            {
+                builder = new StringBuilder();
+                lastReplacement = null;
+                cachedString = null;
+            }
+
+            /// <summary>
+            /// Creates a new branch, identical to this branch.
+            /// </summary>
+            /// <returns>A new, identical branch.</returns>
+            public Branch CreateBranch()
+            {
+                Branch branch = new Branch();
+                branch.builder.Append(ToString());
+                branch.lastReplacement = this.lastReplacement;
+                return branch;
+            }
+
+            public override bool Equals(object other)
+            {
+                if (this == other)
+                {
+                    return true;
+                }
+                if (!(other is Branch))
+                {
+                    return false;
+                }
+
+                return ToString().Equals(((Branch)other).ToString());
+            }
+
+            /// <summary>
+            /// Finish this branch by appending '0's until the maximum code length has been reached.
+            /// </summary>
+            public void Finish()
+            {
+                while (builder.Length < MAX_LENGTH)
+                {
+                    builder.Append('0');
+                    cachedString = null;
+                }
+            }
+
+            public override int GetHashCode()
+            {
+                return ToString().GetHashCode();
+            }
+
+            /// <summary>
+            /// Process the next replacement to be added to this branch.
+            /// </summary>
+            /// <param name="replacement">The next replacement to append.</param>
+            /// <param name="forceAppend">Indicates if the default processing shall be overridden.</param>
+            public void ProcessNextReplacement(string replacement, bool forceAppend)
+            {
+                bool append = lastReplacement == null || !lastReplacement.EndsWith(replacement, StringComparison.Ordinal) || forceAppend;
+
+                if (append && builder.Length < MAX_LENGTH)
+                {
+                    builder.Append(replacement);
+                    // remove all characters after the maximum length
+                    if (builder.Length > MAX_LENGTH)
+                    {
+                        //builder.delete(MAX_LENGTH, builder.Length);
+                        builder.Remove(MAX_LENGTH, builder.Length - MAX_LENGTH);
+                    }
+                    cachedString = null;
+                }
+
+                lastReplacement = replacement;
+            }
+
+            public override string ToString()
+            {
+                if (cachedString == null)
+                {
+                    cachedString = builder.ToString();
+                }
+                return cachedString;
+            }
+        }
+
+        /// <summary>
+        /// Inner class for storing rules.
+        /// </summary>
+        private sealed class Rule
+        {
+            private readonly string pattern;
+            private readonly string[] replacementAtStart;
+            private readonly string[] replacementBeforeVowel;
+            private readonly string[] replacementDefault;
+
+            internal Rule(string pattern, string replacementAtStart, string replacementBeforeVowel,
+                    string replacementDefault)
+            {
+                this.pattern = pattern;
+                this.replacementAtStart = Regex.Split(replacementAtStart, "\\|");
+                this.replacementBeforeVowel = Regex.Split(replacementBeforeVowel, "\\|");
+                this.replacementDefault = Regex.Split(replacementDefault, "\\|");
+            }
+
+            // LUCENENET specific - need read access to pattern
+            public string Pattern
+            {
+                get { return pattern; }
+            }
+
+            public int PatternLength
+            {
+                get { return pattern.Length; }
+            }
+
+            public string[] GetReplacements(string context, bool atStart)
+            {
+                if (atStart)
+                {
+                    return replacementAtStart;
+                }
+
+                int nextIndex = PatternLength;
+                bool nextCharIsVowel = nextIndex < context.Length ? IsVowel(context[nextIndex]) : false;
+                if (nextCharIsVowel)
+                {
+                    return replacementBeforeVowel;
+                }
+
+                return replacementDefault;
+            }
+
+            private bool IsVowel(char ch)
+            {
+                return ch == 'a' || ch == 'e' || ch == 'i' || ch == 'o' || ch == 'u';
+            }
+
+            public bool Matches(string context)
+            {
+                return context.StartsWith(pattern, StringComparison.Ordinal);
+            }
+
+            public override string ToString()
+            {
+                return string.Format("{0}=({1},{2},{3})", pattern, Collections.ToString(replacementAtStart),
+                    Collections.ToString(replacementBeforeVowel), Collections.ToString(replacementDefault));
+            }
+        }
+
+        private static readonly string COMMENT = "//";
+        private static readonly string DOUBLE_QUOTE = "\"";
+
+        private static readonly string MULTILINE_COMMENT_END = "*/";
+
+        private static readonly string MULTILINE_COMMENT_START = "/*";
+
+        /// <summary>The resource file containing the replacement and folding rules</summary>
+        private static readonly string RESOURCE_FILE = "dmrules.txt";
+
+        /// <summary>The code length of a DM soundex value.</summary>
+        private static readonly int MAX_LENGTH = 6;
+
+        /// <summary>Transformation rules indexed by the first character of their pattern.</summary>
+        private static readonly IDictionary<char, IList<Rule>> RULES = new Dictionary<char, IList<Rule>>();
+
+        /// <summary>Folding rules.</summary>
+        private static readonly IDictionary<char, char> FOLDINGS = new Dictionary<char, char>();
+
+        private class DaitchMokotoffRuleComparer : IComparer<Rule>
+        {
+            public int Compare(Rule rule1, Rule rule2)
+            {
+                return rule2.PatternLength - rule1.PatternLength;
+            }
+        }
+
+        static DaitchMokotoffSoundex()
+        {
+            Stream rulesIS = typeof(DaitchMokotoffSoundex).GetTypeInfo().Assembly.FindAndGetManifestResourceStream(typeof(DaitchMokotoffSoundex), RESOURCE_FILE);
+            if (rulesIS == null)
+            {
+                throw new ArgumentException("Unable to load resource: " + RESOURCE_FILE);
+            }
+
+            using (TextReader scanner = new StreamReader(rulesIS, Encoding.UTF8))
+            {
+                ParseRules(scanner, RESOURCE_FILE, RULES, FOLDINGS);
+            }
+
+            // sort RULES by pattern length in descending order
+            foreach (var rule in RULES)
+            {
+                IList<Rule> ruleList = rule.Value;
+                ruleList.Sort(new DaitchMokotoffRuleComparer());
+            }
+        }
+
+        private static void ParseRules(TextReader scanner, string location,
+            IDictionary<char, IList<Rule>> ruleMapping, IDictionary<char, char> asciiFoldings)
+        {
+            int currentLine = 0;
+            bool inMultilineComment = false;
+
+            string rawLine;
+            while ((rawLine = scanner.ReadLine()) != null)
+            { 
+                currentLine++;
+                string line = rawLine;
+
+                if (inMultilineComment)
+                {
+                    if (line.EndsWith(MULTILINE_COMMENT_END, StringComparison.Ordinal))
+                    {
+                        inMultilineComment = false;
+                    }
+                    continue;
+                }
+
+                if (line.StartsWith(MULTILINE_COMMENT_START, StringComparison.Ordinal))
+                {
+                    inMultilineComment = true;
+                }
+                else
+                {
+                    // discard comments
+                    int cmtI = line.IndexOf(COMMENT);
+                    if (cmtI >= 0)
+                    {
+                        line = line.Substring(0, cmtI - 0);
+                    }
+
+                    // trim leading-trailing whitespace
+                    line = line.Trim();
+
+                    if (line.Length == 0)
+                    {
+                        continue; // empty lines can be safely skipped
+                    }
+
+                    if (line.Contains("="))
+                    {
+                        // folding
+                        string[] parts = line.Split(new string[] { "=" }, StringSplitOptions.RemoveEmptyEntries);
+                        if (parts.Length != 2)
+                        {
+                            throw new ArgumentException("Malformed folding statement split into " + parts.Length +
+                                    " parts: " + rawLine + " in " + location);
+                        }
+                        else
+                        {
+                            string leftCharacter = parts[0];
+                            string rightCharacter = parts[1];
+
+                            if (leftCharacter.Length != 1 || rightCharacter.Length != 1)
+                            {
+                                throw new ArgumentException("Malformed folding statement - " +
+                                        "patterns are not single characters: " + rawLine + " in " + location);
+                            }
+
+                            asciiFoldings[leftCharacter[0]] = rightCharacter[0];
+                        }
+                    }
+                    else
+                    {
+                        // rule
+                        string[] parts = Regex.Split(line, "\\s+");
+                        if (parts.Length != 4)
+                        {
+                            throw new ArgumentException("Malformed rule statement split into " + parts.Length +
+                                    " parts: " + rawLine + " in " + location);
+                        }
+                        else
+                        {
+                            try
+                            {
+                                string pattern = StripQuotes(parts[0]);
+                                string replacement1 = StripQuotes(parts[1]);
+                                string replacement2 = StripQuotes(parts[2]);
+                                string replacement3 = StripQuotes(parts[3]);
+
+                                Rule r = new Rule(pattern, replacement1, replacement2, replacement3);
+                                char patternKey = r.Pattern[0];
+                                IList<Rule> rules;
+                                if (!ruleMapping.TryGetValue(patternKey, out rules) || rules == null)
+                                {
+                                    rules = new List<Rule>();
+                                    ruleMapping[patternKey] = rules;
+                                }
+                                rules.Add(r);
+                            }
+                            catch (ArgumentException e)
+                            {
+                                throw new InvalidOperationException(
+                                        "Problem parsing line '" + currentLine + "' in " + location, e);
+                            }
+                        }
+                    }
+                }
+            }
+        }
+
+        private static string StripQuotes(string str)
+        {
+            if (str.StartsWith(DOUBLE_QUOTE, StringComparison.Ordinal))
+            {
+                str = str.Substring(1);
+            }
+
+            if (str.EndsWith(DOUBLE_QUOTE, StringComparison.Ordinal))
+            {
+                str = str.Substring(0, str.Length - 1);
+            }
+
+            return str;
+        }
+
+        /// <summary>Whether to use ASCII folding prior to encoding.</summary>
+        private readonly bool folding;
+
+        /// <summary>
+        /// Creates a new instance with ASCII-folding enabled.
+        /// </summary>
+        public DaitchMokotoffSoundex()
+            : this(true)
+        {
+        }
+
+        /// <summary>
+        /// Creates a new instance.
+        /// <para/>
+        /// With ASCII-folding enabled, certain accented characters will be transformed to equivalent ASCII characters, e.g.
+        /// è -&gt; e.
+        /// </summary>
+        /// <param name="folding">If ASCII-folding shall be performed before encoding.</param>
+        public DaitchMokotoffSoundex(bool folding)
+        {
+            this.folding = folding;
+        }
+
+        /// <summary>
+        /// Performs a cleanup of the input string before the actual soundex transformation.
+        /// <para/>
+        /// Removes all whitespace characters and performs ASCII folding if enabled.
+        /// </summary>
+        /// <param name="input">The input string to cleanup.</param>
+        /// <returns>A cleaned up string.</returns>
+        private string Cleanup(string input)
+        {
+            StringBuilder sb = new StringBuilder();
+            foreach (char c in input.ToCharArray())
+            {
+                char ch = c;
+                if (char.IsWhiteSpace(ch))
+                {
+                    continue;
+                }
+
+                ch = char.ToLowerInvariant(ch);
+                if (folding && FOLDINGS.ContainsKey(ch))
+                {
+                    ch = FOLDINGS[ch];
+                }
+                sb.Append(ch);
+            }
+            return sb.ToString();
+        }
+
+        // LUCENENET specific - in .NET we don't need an object overload, since strings are sealed anyway.
+        //**
+        // * Encodes an Object using the Daitch-Mokotoff soundex algorithm without branching.
+        // * <p>
+        // * This method is provided in order to satisfy the requirements of the Encoder interface, and will throw an
+        // * EncoderException if the supplied object is not of type java.lang.String.
+        // * </p>
+        // *
+        // * @see #soundex(String)
+        // *
+        // * @param obj
+        // *            Object to encode
+        // * @return An object (of type java.lang.String) containing the DM soundex code, which corresponds to the String
+        // *         supplied.
+        // * @throws EncoderException
+        // *             if the parameter supplied is not of type java.lang.String
+        // * @throws IllegalArgumentException
+        // *             if a character is not mapped
+        // */
+        //@Override
+        //    public Object encode(object obj) 
+        //{
+        //        if (!(obj instanceof String)) {
+        //        throw new EncoderException(
+        //                "Parameter supplied to DaitchMokotoffSoundex encode is not of type java.lang.String");
+        //    }
+        //        return encode((String) obj);
+        //}
+
+        /// <summary>
+        /// Encodes a string using the Daitch-Mokotoff soundex algorithm without branching.
+        /// </summary>
+        /// <param name="source">A string to encode.</param>
+        /// <returns>A DM Soundex code corresponding to the string supplied.</returns>
+        /// <exception cref="ArgumentException">If a character is not mapped.</exception>
+        /// <seealso cref="GetSoundex(string)"/>
+        public virtual string Encode(string source)
+        {
+            if (source == null)
+            {
+                return null;
+            }
+            return GetSoundex(source, false)[0];
+        }
+
+        /// <summary>
+        /// Encodes a string using the Daitch-Mokotoff soundex algorithm with branching.
+        /// <para/>
+        /// In case a string is encoded into multiple codes (see branching rules), the result will contain all codes,
+        /// separated by '|'.
+        /// <para/>
+        /// Example: the name "AUERBACH" is encoded as both
+        /// <list type="bullet">
+        ///     <item><description>097400</description></item>
+        ///     <item><description>097500</description></item>
+        /// </list>
+        /// <para/>
+        /// Thus the result will be "097400|097500".
+        /// </summary>
+        /// <param name="source">A string to encode.</param>
+        /// <returns>A string containing a set of DM Soundex codes corresponding to the string supplied.</returns>
+        /// <exception cref="ArgumentException">If a character is not mapped.</exception>
+        public virtual string GetSoundex(string source)
+        {
+            string[] branches = GetSoundex(source, true);
+            StringBuilder sb = new StringBuilder();
+            int index = 0;
+            foreach (string branch in branches)
+            {
+                sb.Append(branch);
+                if (++index < branches.Length)
+                {
+                    sb.Append('|');
+                }
+            }
+            return sb.ToString();
+        }
+
+        /// <summary>
+        /// Perform the actual DM Soundex algorithm on the input string.
+        /// </summary>
+        /// <param name="source">A string to encode.</param>
+        /// <param name="branching">If branching shall be performed.</param>
+        /// <returns>A string array containing all DM Soundex codes corresponding to the string supplied depending on the selected branching mode.</returns>
+        /// <exception cref="ArgumentException">If a character is not mapped.</exception>
+        private string[] GetSoundex(string source, bool branching)
+        {
+            if (source == null)
+            {
+                return null;
+            }
+
+            string input = Cleanup(source);
+
+            // LinkedHashSet preserves input order. In .NET we can use List for that purpose.
+            IList<Branch> currentBranches = new List<Branch>();
+            currentBranches.Add(new Branch());
+
+            char lastChar = '\0';
+            for (int index = 0; index < input.Length; index++)
+            {
+                char ch = input[index];
+
+                // ignore whitespace inside a name
+                if (char.IsWhiteSpace(ch))
+                {
+                    continue;
+                }
+
+                string inputContext = input.Substring(index);
+                IList<Rule> rules;
+                if (!RULES.TryGetValue(ch, out rules) || rules == null)
+                {
+                    continue;
+                }
+
+                // use an EMPTY_LIST to avoid false positive warnings wrt potential null pointer access
+                IList<Branch> nextBranches = branching ? new List<Branch>() : Collections.EmptyList<Branch>();
+
+                foreach (Rule rule in rules)
+                {
+                    if (rule.Matches(inputContext))
+                    {
+                        if (branching)
+                        {
+                            nextBranches.Clear();
+                        }
+                        string[] replacements = rule.GetReplacements(inputContext, lastChar == '\0');
+                        bool branchingRequired = replacements.Length > 1 && branching;
+
+                        foreach (Branch branch in currentBranches)
+                        {
+                            foreach (string nextReplacement in replacements)
+                            {
+                                // if we have multiple replacements, always create a new branch
+                                Branch nextBranch = branchingRequired ? branch.CreateBranch() : branch;
+
+                                // special rule: occurrences of mn or nm are treated differently
+                                bool force = (lastChar == 'm' && ch == 'n') || (lastChar == 'n' && ch == 'm');
+
+                                nextBranch.ProcessNextReplacement(nextReplacement, force);
+
+                                if (branching)
+                                {
+                                    if (!nextBranches.Contains(nextBranch))
+                                    {
+                                        nextBranches.Add(nextBranch);
+                                    }
+                                }
+                                else
+                                {
+                                    break;
+                                }
+                            }
+                        }
+
+                        if (branching)
+                        {
+                            currentBranches.Clear();
+                            currentBranches.AddRange(nextBranches);
+                        }
+                        index += rule.PatternLength - 1;
+                        break;
+                    }
+                }
+
+                lastChar = ch;
+            }
+
+            string[] result = new string[currentBranches.Count];
+            int idx = 0;
+            foreach (Branch branch in currentBranches)
+            {
+                branch.Finish();
+                result[idx++] = branch.ToString();
+            }
+
+            return result;
+        }
+    }
+}


[03/15] lucenenet git commit: Added Lucene.Net.Analysis.Phonetic + tests. Rather than porting over the entire commons-codec library, only the language features were ported and added to this library.

Posted by ni...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Tests.Analysis.Phonetic/Language/MetaphoneTest.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Phonetic/Language/MetaphoneTest.cs b/src/Lucene.Net.Tests.Analysis.Phonetic/Language/MetaphoneTest.cs
new file mode 100644
index 0000000..18a9e59
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Phonetic/Language/MetaphoneTest.cs
@@ -0,0 +1,518 @@
+using NUnit.Framework;
+using System;
+
+namespace Lucene.Net.Analysis.Phonetic.Language
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    public class MetaphoneTest : StringEncoderAbstractTest<Metaphone>
+    {
+        public void AssertIsMetaphoneEqual(string source, string[] matches)
+        {
+            // match source to all matches
+            foreach (string matche in matches)
+            {
+                Assert.True(this.StringEncoder.IsMetaphoneEqual(source, matche),
+                    "Source: " + source + ", should have same Metaphone as: " + matche);
+            }
+            // match to each other
+            foreach (string matche in matches)
+            {
+                foreach (string matche2 in matches)
+                {
+                    Assert.True(this.StringEncoder.IsMetaphoneEqual(matche, matche2));
+                }
+            }
+        }
+
+        public void AssertMetaphoneEqual(String[][] pairs)
+        {
+            this.ValidateFixture(pairs);
+            foreach (String[] pair in pairs)
+            {
+                String name0 = pair[0];
+                String name1 = pair[1];
+                String failMsg = "Expected match between " + name0 + " and " + name1;
+                Assert.True(this.StringEncoder.IsMetaphoneEqual(name0, name1), failMsg);
+                Assert.True(this.StringEncoder.IsMetaphoneEqual(name1, name0), failMsg);
+            }
+        }
+
+        
+    protected override Metaphone CreateStringEncoder()
+        {
+            return new Metaphone();
+        }
+
+        [Test]
+    public void TestIsMetaphoneEqual1()
+        {
+            this.AssertMetaphoneEqual(new String[][] { new string[] {
+                "Case", "case" }, new string[] {
+                "CASE", "Case" }, new string[] {
+                "caSe", "cAsE" }, new string[] {
+                "quick", "cookie" }
+        });
+        }
+
+        /**
+         * Matches computed from http://www.lanw.com/java/phonetic/default.htm
+         */
+        [Test]
+        public void TestIsMetaphoneEqual2()
+        {
+            this.AssertMetaphoneEqual(new String[][] { new string[] { "Lawrence", "Lorenza" }, new string[] {
+                "Gary", "Cahra" }, });
+        }
+
+        /**
+         * Initial AE case.
+         *
+         * Match data computed from http://www.lanw.com/java/phonetic/default.htm
+         */
+        [Test]
+        public void TestIsMetaphoneEqualAero()
+        {
+            this.AssertIsMetaphoneEqual("Aero", new String[] { "Eure" });
+        }
+
+        /**
+         * Initial WH case.
+         *
+         * Match data computed from http://www.lanw.com/java/phonetic/default.htm
+         */
+        [Test]
+        public void TestIsMetaphoneEqualWhite()
+        {
+            this.AssertIsMetaphoneEqual(
+                "White",
+                new String[] { "Wade", "Wait", "Waite", "Wat", "Whit", "Wiatt", "Wit", "Wittie", "Witty", "Wood", "Woodie", "Woody" });
+        }
+
+        /**
+         * Initial A, not followed by an E case.
+         *
+         * Match data computed from http://www.lanw.com/java/phonetic/default.htm
+         */
+        [Test]
+        public void TestIsMetaphoneEqualAlbert()
+        {
+            this.AssertIsMetaphoneEqual("Albert", new String[] { "Ailbert", "Alberik", "Albert", "Alberto", "Albrecht" });
+        }
+
+        /**
+         * Match data computed from http://www.lanw.com/java/phonetic/default.htm
+         */
+        [Test]
+        public void TestIsMetaphoneEqualGary()
+        {
+            this.AssertIsMetaphoneEqual(
+                "Gary",
+                new String[] {
+                "Cahra",
+                "Cara",
+                "Carey",
+                "Cari",
+                "Caria",
+                "Carie",
+                "Caro",
+                "Carree",
+                "Carri",
+                "Carrie",
+                "Carry",
+                "Cary",
+                "Cora",
+                "Corey",
+                "Cori",
+                "Corie",
+                "Correy",
+                "Corri",
+                "Corrie",
+                "Corry",
+                "Cory",
+                "Gray",
+                "Kara",
+                "Kare",
+                "Karee",
+                "Kari",
+                "Karia",
+                "Karie",
+                "Karrah",
+                "Karrie",
+                "Karry",
+                "Kary",
+                "Keri",
+                "Kerri",
+                "Kerrie",
+                "Kerry",
+                "Kira",
+                "Kiri",
+                "Kora",
+                "Kore",
+                "Kori",
+                "Korie",
+                "Korrie",
+                "Korry" });
+        }
+
+        /**
+         * Match data computed from http://www.lanw.com/java/phonetic/default.htm
+         */
+        [Test]
+        public void TestIsMetaphoneEqualJohn()
+        {
+            this.AssertIsMetaphoneEqual(
+                "John",
+                new String[] {
+                "Gena",
+                "Gene",
+                "Genia",
+                "Genna",
+                "Genni",
+                "Gennie",
+                "Genny",
+                "Giana",
+                "Gianna",
+                "Gina",
+                "Ginni",
+                "Ginnie",
+                "Ginny",
+                "Jaine",
+                "Jan",
+                "Jana",
+                "Jane",
+                "Janey",
+                "Jania",
+                "Janie",
+                "Janna",
+                "Jany",
+                "Jayne",
+                "Jean",
+                "Jeana",
+                "Jeane",
+                "Jeanie",
+                "Jeanna",
+                "Jeanne",
+                "Jeannie",
+                "Jen",
+                "Jena",
+                "Jeni",
+                "Jenn",
+                "Jenna",
+                "Jennee",
+                "Jenni",
+                "Jennie",
+                "Jenny",
+                "Jinny",
+                "Jo Ann",
+                "Jo-Ann",
+                "Jo-Anne",
+                "Joan",
+                "Joana",
+                "Joane",
+                "Joanie",
+                "Joann",
+                "Joanna",
+                "Joanne",
+                "Joeann",
+                "Johna",
+                "Johnna",
+                "Joni",
+                "Jonie",
+                "Juana",
+                "June",
+                "Junia",
+                "Junie" });
+        }
+
+        /**
+         * Initial KN case.
+         *
+         * Match data computed from http://www.lanw.com/java/phonetic/default.htm
+         */
+        [Test]
+        public void TestIsMetaphoneEqualKnight()
+        {
+            this.AssertIsMetaphoneEqual(
+                "Knight",
+                new String[] {
+                "Hynda",
+                "Nada",
+                "Nadia",
+                "Nady",
+                "Nat",
+                "Nata",
+                "Natty",
+                "Neda",
+                "Nedda",
+                "Nedi",
+                "Netta",
+                "Netti",
+                "Nettie",
+                "Netty",
+                "Nita",
+                "Nydia" });
+        }
+        /**
+         * Match data computed from http://www.lanw.com/java/phonetic/default.htm
+         */
+        [Test]
+        public void TestIsMetaphoneEqualMary()
+        {
+            this.AssertIsMetaphoneEqual(
+                "Mary",
+                new String[] {
+                "Mair",
+                "Maire",
+                "Mara",
+                "Mareah",
+                "Mari",
+                "Maria",
+                "Marie",
+                "Mary",
+                "Maura",
+                "Maure",
+                "Meara",
+                "Merrie",
+                "Merry",
+                "Mira",
+                "Moira",
+                "Mora",
+                "Moria",
+                "Moyra",
+                "Muire",
+                "Myra",
+                "Myrah" });
+        }
+
+        /**
+         * Match data computed from http://www.lanw.com/java/phonetic/default.htm
+         */
+        [Test]
+        public void TestIsMetaphoneEqualParis()
+        {
+            this.AssertIsMetaphoneEqual("Paris", new String[] { "Pearcy", "Perris", "Piercy", "Pierz", "Pryse" });
+        }
+
+        /**
+         * Match data computed from http://www.lanw.com/java/phonetic/default.htm
+         */
+        [Test]
+        public void TestIsMetaphoneEqualPeter()
+        {
+            this.AssertIsMetaphoneEqual(
+                "Peter",
+                new String[] { "Peadar", "Peder", "Pedro", "Peter", "Petr", "Peyter", "Pieter", "Pietro", "Piotr" });
+        }
+
+        /**
+         * Match data computed from http://www.lanw.com/java/phonetic/default.htm
+         */
+        [Test]
+        public void TestIsMetaphoneEqualRay()
+        {
+            this.AssertIsMetaphoneEqual("Ray", new String[] { "Ray", "Rey", "Roi", "Roy", "Ruy" });
+        }
+
+        /**
+         * Match data computed from http://www.lanw.com/java/phonetic/default.htm
+         */
+        [Test]
+        public void TestIsMetaphoneEqualSusan()
+        {
+            this.AssertIsMetaphoneEqual(
+                "Susan",
+                new String[] {
+                "Siusan",
+                "Sosanna",
+                "Susan",
+                "Susana",
+                "Susann",
+                "Susanna",
+                "Susannah",
+                "Susanne",
+                "Suzann",
+                "Suzanna",
+                "Suzanne",
+                "Zuzana" });
+        }
+
+        /**
+         * Initial WR case.
+         *
+         * Match data computed from http://www.lanw.com/java/phonetic/default.htm
+         */
+        [Test]
+        public void TestIsMetaphoneEqualWright()
+        {
+            this.AssertIsMetaphoneEqual("Wright", new String[] { "Rota", "Rudd", "Ryde" });
+        }
+
+        /**
+         * Match data computed from http://www.lanw.com/java/phonetic/default.htm
+         */
+        [Test]
+        public void TestIsMetaphoneEqualXalan()
+        {
+            this.AssertIsMetaphoneEqual(
+                "Xalan",
+                new String[] { "Celene", "Celina", "Celine", "Selena", "Selene", "Selina", "Seline", "Suellen", "Xylina" });
+        }
+
+        [Test]
+        public void TestMetaphone()
+        {
+            Assert.AreEqual("HL", this.StringEncoder.GetMetaphone("howl"));
+            Assert.AreEqual("TSTN", this.StringEncoder.GetMetaphone("testing"));
+            Assert.AreEqual("0", this.StringEncoder.GetMetaphone("The"));
+            Assert.AreEqual("KK", this.StringEncoder.GetMetaphone("quick"));
+            Assert.AreEqual("BRN", this.StringEncoder.GetMetaphone("brown"));
+            Assert.AreEqual("FKS", this.StringEncoder.GetMetaphone("fox"));
+            Assert.AreEqual("JMPT", this.StringEncoder.GetMetaphone("jumped"));
+            Assert.AreEqual("OFR", this.StringEncoder.GetMetaphone("over"));
+            Assert.AreEqual("0", this.StringEncoder.GetMetaphone("the"));
+            Assert.AreEqual("LS", this.StringEncoder.GetMetaphone("lazy"));
+            Assert.AreEqual("TKS", this.StringEncoder.GetMetaphone("dogs"));
+        }
+
+        [Test]
+        public void TestWordEndingInMB()
+        {
+            Assert.AreEqual("KM", this.StringEncoder.GetMetaphone("COMB"));
+            Assert.AreEqual("TM", this.StringEncoder.GetMetaphone("TOMB"));
+            Assert.AreEqual("WM", this.StringEncoder.GetMetaphone("WOMB"));
+        }
+
+        [Test]
+        public void TestDiscardOfSCEOrSCIOrSCY()
+        {
+            Assert.AreEqual("SNS", this.StringEncoder.GetMetaphone("SCIENCE"));
+            Assert.AreEqual("SN", this.StringEncoder.GetMetaphone("SCENE"));
+            Assert.AreEqual("S", this.StringEncoder.GetMetaphone("SCY"));
+        }
+
+        /**
+         * Tests (CODEC-57) Metaphone.metaphone(String) returns an empty string when passed the word "why"
+         */
+        [Test]
+        public void TestWhy()
+        {
+            // PHP returns "H". The original metaphone returns an empty string.
+            Assert.AreEqual("", this.StringEncoder.GetMetaphone("WHY"));
+        }
+
+        [Test]
+        public void TestWordsWithCIA()
+        {
+            Assert.AreEqual("XP", this.StringEncoder.GetMetaphone("CIAPO"));
+        }
+
+        [Test]
+        public void TestTranslateOfSCHAndCH()
+        {
+            Assert.AreEqual("SKTL", this.StringEncoder.GetMetaphone("SCHEDULE"));
+            Assert.AreEqual("SKMT", this.StringEncoder.GetMetaphone("SCHEMATIC"));
+
+            Assert.AreEqual("KRKT", this.StringEncoder.GetMetaphone("CHARACTER"));
+            Assert.AreEqual("TX", this.StringEncoder.GetMetaphone("TEACH"));
+        }
+
+        [Test]
+        public void TestTranslateToJOfDGEOrDGIOrDGY()
+        {
+            Assert.AreEqual("TJ", this.StringEncoder.GetMetaphone("DODGY"));
+            Assert.AreEqual("TJ", this.StringEncoder.GetMetaphone("DODGE"));
+            Assert.AreEqual("AJMT", this.StringEncoder.GetMetaphone("ADGIEMTI"));
+        }
+
+        [Test]
+        public void TestDiscardOfSilentHAfterG()
+        {
+            Assert.AreEqual("KNT", this.StringEncoder.GetMetaphone("GHENT"));
+            Assert.AreEqual("B", this.StringEncoder.GetMetaphone("BAUGH"));
+        }
+
+        [Test]
+        public void TestDiscardOfSilentGN()
+        {
+            // NOTE: This does not test for silent GN, but for starting with GN
+            Assert.AreEqual("N", this.StringEncoder.GetMetaphone("GNU"));
+
+            // NOTE: Trying to test for GNED, but expected code does not appear to execute
+            Assert.AreEqual("SNT", this.StringEncoder.GetMetaphone("SIGNED"));
+        }
+
+        [Test]
+        public void TestPHTOF()
+        {
+            Assert.AreEqual("FX", this.StringEncoder.GetMetaphone("PHISH"));
+        }
+
+        [Test]
+        public void TestSHAndSIOAndSIAToX()
+        {
+            Assert.AreEqual("XT", this.StringEncoder.GetMetaphone("SHOT"));
+            Assert.AreEqual("OTXN", this.StringEncoder.GetMetaphone("ODSIAN"));
+            Assert.AreEqual("PLXN", this.StringEncoder.GetMetaphone("PULSION"));
+        }
+
+        [Test]
+        public void TestTIOAndTIAToX()
+        {
+            Assert.AreEqual("OX", this.StringEncoder.GetMetaphone("OTIA"));
+            Assert.AreEqual("PRXN", this.StringEncoder.GetMetaphone("PORTION"));
+        }
+
+        [Test]
+        public void TestTCH()
+        {
+            Assert.AreEqual("RX", this.StringEncoder.GetMetaphone("RETCH"));
+            Assert.AreEqual("WX", this.StringEncoder.GetMetaphone("WATCH"));
+        }
+
+        [Test]
+        public void TestExceedLength()
+        {
+            // should be AKSKS, but istruncated by Max Code Length
+            Assert.AreEqual("AKSK", this.StringEncoder.GetMetaphone("AXEAXE"));
+        }
+
+        [Test]
+        public void TestSetMaxLengthWithTruncation()
+        {
+            // should be AKSKS, but istruncated by Max Code Length
+            this.StringEncoder.MaxCodeLen=(6);
+            Assert.AreEqual("AKSKSK", this.StringEncoder.GetMetaphone("AXEAXEAXE"));
+        }
+
+        public void ValidateFixture(String[][] pairs)
+        {
+            if (pairs.Length == 0)
+            {
+                Assert.Fail("Test fixture is empty");
+            }
+            for (int i = 0; i < pairs.Length; i++)
+            {
+                if (pairs[i].Length != 2)
+                {
+                    Assert.Fail("Error in test fixture in the data array at index " + i);
+                }
+            }
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Tests.Analysis.Phonetic/Language/NysiisTest.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Phonetic/Language/NysiisTest.cs b/src/Lucene.Net.Tests.Analysis.Phonetic/Language/NysiisTest.cs
new file mode 100644
index 0000000..d1c04d1
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Phonetic/Language/NysiisTest.cs
@@ -0,0 +1,319 @@
+using NUnit.Framework;
+using System;
+
+namespace Lucene.Net.Analysis.Phonetic.Language
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    public class NysiisTest : StringEncoderAbstractTest<Nysiis>
+    {
+        private readonly Nysiis fullNysiis = new Nysiis(false);
+
+        /**
+         * Takes an array of String pairs where each pair's first element is the input and the second element the expected
+         * encoding.
+         *
+         * @param testValues
+         *            an array of String pairs where each pair's first element is the input and the second element the
+         *            expected encoding.
+         * @throws EncoderException
+         */
+        private void AssertEncodings(params String[][] testValues)
+        {
+            foreach (String[]
+                arr in testValues)
+            {
+                Assert.AreEqual(arr[1], this.fullNysiis.Encode(arr[0]), "Problem with " + arr[0]);
+            }
+        }
+
+        protected override Nysiis CreateStringEncoder()
+        {
+            return new Nysiis();
+        }
+
+        private void EncodeAll(String[] strings, String expectedEncoding)
+        {
+            foreach (String str in strings)
+            {
+                Assert.AreEqual(expectedEncoding, StringEncoder.Encode(str), "Problem with " + str);
+            }
+        }
+
+        [Test]
+        public void TestBran()
+        {
+            EncodeAll(new String[] { "Brian", "Brown", "Brun" }, "BRAN");
+        }
+
+        [Test]
+        public void TestCap()
+        {
+            this.EncodeAll(new String[] { "Capp", "Cope", "Copp", "Kipp" }, "CAP");
+        }
+
+        [Test]
+        public void TestDad()
+        {
+            // Data Quality and Record Linkage Techniques P.121 claims this is DAN,
+            // but it should be DAD, verified also with dropby.com
+            this.EncodeAll(new String[] { "Dent" }, "DAD");
+        }
+
+        [Test]
+        public void TestDan()
+        {
+            this.EncodeAll(new String[] { "Dane", "Dean", "Dionne" }, "DAN");
+        }
+
+        /**
+         * Tests data gathered from around the internet.
+         *
+         * @see <a href="http://www.dropby.com/NYSIISTextStrings.html">http://www.dropby.com/NYSIISTextStrings.html</a>
+         * @throws EncoderException
+         */
+        [Test]
+        public void TestDropBy()
+        {
+            // Explanation of differences between this implementation and the one at dropby.com is
+            // prepended to the test string. The referenced rules refer to the outlined steps the
+            // class description for Nysiis.
+
+            this.AssertEncodings(
+                    // 1. Transcode first characters of name
+                    new String[] { "MACINTOSH", "MCANT" },
+                    // violates 4j: the second N should not be added, as the first
+                    //              key char is already a N
+                    new String[] { "KNUTH", "NAT" },           // Original: NNAT; modified: NATH
+                                                               // O and E are transcoded to A because of rule 4a
+                                                               // H also to A because of rule 4h
+                                                               // the N gets mysteriously lost, maybe because of a wrongly implemented rule 4h
+                                                               // that skips the next char in such a case?
+                                                               // the remaining A is removed because of rule 7
+                    new String[] { "KOEHN", "CAN" },           // Original: C
+                                                               // violates 4j: see also KNUTH
+                    new String[] { "PHILLIPSON", "FALAPSAN" }, // Original: FFALAP[SAN]
+                                                               // violates 4j: see also KNUTH
+                    new String[] { "PFEISTER", "FASTAR" },     // Original: FFASTA[R]
+                                                               // violates 4j: see also KNUTH
+                    new String[] { "SCHOENHOEFT", "SANAFT" },  // Original: SSANAF[T]
+                                                               // 2. Transcode last characters of name:
+                    new String[] { "MCKEE", "MCY" },
+                    new String[] { "MACKIE", "MCY" },
+                    new String[] { "HEITSCHMIDT", "HATSNAD" },
+                    new String[] { "BART", "BAD" },
+                    new String[] { "HURD", "HAD" },
+                    new String[] { "HUNT", "HAD" },
+                    new String[] { "WESTERLUND", "WASTARLAD" },
+                    // 4. Transcode remaining characters by following these rules,
+                    //    incrementing by one character each time:
+                    new String[] { "CASSTEVENS", "CASTAFAN" },
+                    new String[] { "VASQUEZ", "VASG" },
+                    new String[] { "FRAZIER", "FRASAR" },
+                    new String[] { "BOWMAN", "BANAN" },
+                    new String[] { "MCKNIGHT", "MCNAGT" },
+                    new String[] { "RICKERT", "RACAD" },
+                    // violates 5: the last S is not removed
+                    // when comparing to DEUTS, which is phonetically similar
+                    // the result it also DAT, which is correct for DEUTSCH too imo
+                    new String[] { "DEUTSCH", "DAT" },         // Original: DATS
+                    new String[] { "WESTPHAL", "WASTFAL" },
+                    // violates 4h: the H should be transcoded to S and thus ignored as
+                    // the first key character is also S
+                    new String[] { "SHRIVER", "SRAVAR" },      // Original: SHRAVA[R]
+                                                               // same as KOEHN, the L gets mysteriously lost
+                    new String[] { "KUHL", "CAL" },            // Original: C
+                    new String[] { "RAWSON", "RASAN" },
+                    // If last character is S, remove it
+                    new String[] { "JILES", "JAL" },
+                    // violates 6: if the last two characters are AY, remove A
+                    new String[] { "CARRAWAY", "CARY" },       // Original: CARAY
+                    new String[] { "YAMADA", "YANAD" });
+        }
+
+        [Test]
+        public void TestFal()
+        {
+            this.EncodeAll(new String[] { "Phil" }, "FAL");
+        }
+
+        /**
+         * Tests data gathered from around the internets.
+         *
+         * @throws EncoderException
+         */
+        [Test]
+        public void TestOthers()
+        {
+            this.AssertEncodings(
+                    new String[] { "O'Daniel", "ODANAL" },
+                    new String[] { "O'Donnel", "ODANAL" },
+                    new String[] { "Cory", "CARY" },
+                    new String[] { "Corey", "CARY" },
+                    new String[] { "Kory", "CARY" },
+                    //
+                    new String[] { "FUZZY", "FASY" });
+        }
+
+        /**
+         * Tests rule 1: Translate first characters of name: MAC → MCC, KN → N, K → C, PH, PF → FF, SCH → SSS
+         *
+         * @throws EncoderException
+         */
+        [Test]
+        public void TestRule1()
+        {
+            this.AssertEncodings(
+                    new String[] { "MACX", "MCX" },
+                    new String[] { "KNX", "NX" },
+                    new String[] { "KX", "CX" },
+                    new String[] { "PHX", "FX" },
+                    new String[] { "PFX", "FX" },
+                    new String[] { "SCHX", "SX" });
+        }
+
+        /**
+         * Tests rule 2: Translate last characters of name: EE → Y, IE → Y, DT, RT, RD, NT, ND → D
+         *
+         * @throws EncoderException
+         */
+        [Test]
+        public void TestRule2()
+        {
+            this.AssertEncodings(
+                    new String[] { "XEE", "XY" },
+                    new String[] { "XIE", "XY" },
+                    new String[] { "XDT", "XD" },
+                    new String[] { "XRT", "XD" },
+                    new String[] { "XRD", "XD" },
+                    new String[] { "XNT", "XD" },
+                    new String[] { "XND", "XD" });
+        }
+
+        /**
+         * Tests rule 4.1: EV → AF else A, E, I, O, U → A
+         *
+         * @throws EncoderException
+         */
+        [Test]
+        public void TestRule4Dot1()
+        {
+            this.AssertEncodings(
+                    new String[] { "XEV", "XAF" },
+                    new String[] { "XAX", "XAX" },
+                    new String[] { "XEX", "XAX" },
+                    new String[] { "XIX", "XAX" },
+                    new String[] { "XOX", "XAX" },
+                    new String[] { "XUX", "XAX" });
+        }
+
+        /**
+         * Tests rule 4.2: Q → G, Z → S, M → N
+         *
+         * @throws EncoderException
+         */
+        [Test]
+        public void TestRule4Dot2()
+        {
+            this.AssertEncodings(
+                    new String[] { "XQ", "XG" },
+                    new String[] { "XZ", "X" },
+                    new String[] { "XM", "XN" });
+        }
+
+        /**
+         * Tests rule 5: If last character is S, remove it.
+         *
+         * @throws EncoderException
+         */
+        [Test]
+        public void TestRule5()
+        {
+            this.AssertEncodings(
+                    new String[] { "XS", "X" },
+                    new String[] { "XSS", "X" });
+        }
+
+        /**
+         * Tests rule 6: If last characters are AY, replace with Y.
+         *
+         * @throws EncoderException
+         */
+        [Test]
+        public void TestRule6()
+        {
+            this.AssertEncodings(
+                    new String[] { "XAY", "XY" },
+                    new String[] { "XAYS", "XY" }); // Rules 5, 6
+        }
+
+        /**
+         * Tests rule 7: If last character is A, remove it.
+         *
+         * @throws EncoderException
+         */
+        [Test]
+        public void TestRule7()
+        {
+            this.AssertEncodings(
+                    new String[] { "XA", "X" },
+                    new String[] { "XAS", "X" }); // Rules 5, 7
+        }
+        [Test]
+        public void TestSnad()
+        {
+            // Data Quality and Record Linkage Techniques P.121 claims this is SNAT,
+            // but it should be SNAD
+            this.EncodeAll(new String[] { "Schmidt" }, "SNAD");
+        }
+
+        [Test]
+        public void TestSnat()
+        {
+            this.EncodeAll(new String[] { "Smith", "Schmit" }, "SNAT");
+        }
+
+        [Test]
+        public void TestSpecialBranches()
+        {
+            this.EncodeAll(new String[] { "Kobwick" }, "CABWAC");
+            this.EncodeAll(new String[] { "Kocher" }, "CACAR");
+            this.EncodeAll(new String[] { "Fesca" }, "FASC");
+            this.EncodeAll(new String[] { "Shom" }, "SAN");
+            this.EncodeAll(new String[] { "Ohlo" }, "OL");
+            this.EncodeAll(new String[] { "Uhu" }, "UH");
+            this.EncodeAll(new String[] { "Um" }, "UN");
+        }
+
+        [Test]
+        public void TestTranan()
+        {
+            this.EncodeAll(new String[] { "Trueman", "Truman" }, "TRANAN");
+        }
+
+        [Test]
+        public void TestTrueVariant()
+        {
+            Nysiis encoder = new Nysiis(true);
+
+            String encoded = encoder.Encode("WESTERLUND");
+            Assert.True(encoded.Length <= 6);
+            Assert.AreEqual("WASTAR", encoded);
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Tests.Analysis.Phonetic/Language/RefinedSoundexTest.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Phonetic/Language/RefinedSoundexTest.cs b/src/Lucene.Net.Tests.Analysis.Phonetic/Language/RefinedSoundexTest.cs
new file mode 100644
index 0000000..eca1827
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Phonetic/Language/RefinedSoundexTest.cs
@@ -0,0 +1,99 @@
+using NUnit.Framework;
+
+namespace Lucene.Net.Analysis.Phonetic.Language
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Tests RefinedSoundex.
+    /// </summary>
+    public class RefinedSoundexTest : StringEncoderAbstractTest<RefinedSoundex>
+    {
+        protected override RefinedSoundex CreateStringEncoder()
+        {
+            return new RefinedSoundex();
+        }
+
+        [Test]
+        public void TestDifference()
+        {
+            // Edge cases
+            Assert.AreEqual(0, this.StringEncoder.Difference(null, null));
+            Assert.AreEqual(0, this.StringEncoder.Difference("", ""));
+            Assert.AreEqual(0, this.StringEncoder.Difference(" ", " "));
+            // Normal cases
+            Assert.AreEqual(6, this.StringEncoder.Difference("Smith", "Smythe"));
+            Assert.AreEqual(3, this.StringEncoder.Difference("Ann", "Andrew"));
+            Assert.AreEqual(1, this.StringEncoder.Difference("Margaret", "Andrew"));
+            Assert.AreEqual(1, this.StringEncoder.Difference("Janet", "Margaret"));
+            // Examples from
+            // http://msdn.microsoft.com/library/default.asp?url=/library/en-us/tsqlref/ts_de-dz_8co5.asp
+            Assert.AreEqual(5, this.StringEncoder.Difference("Green", "Greene"));
+            Assert.AreEqual(1, this.StringEncoder.Difference("Blotchet-Halls", "Greene"));
+            // Examples from
+            // http://msdn.microsoft.com/library/default.asp?url=/library/en-us/tsqlref/ts_setu-sus_3o6w.asp
+            Assert.AreEqual(6, this.StringEncoder.Difference("Smith", "Smythe"));
+            Assert.AreEqual(8, this.StringEncoder.Difference("Smithers", "Smythers"));
+            Assert.AreEqual(5, this.StringEncoder.Difference("Anothers", "Brothers"));
+        }
+
+        [Test]
+        public void TestEncode()
+        {
+            Assert.AreEqual("T6036084", this.StringEncoder.Encode("testing"));
+            Assert.AreEqual("T6036084", this.StringEncoder.Encode("TESTING"));
+            Assert.AreEqual("T60", this.StringEncoder.Encode("The"));
+            Assert.AreEqual("Q503", this.StringEncoder.Encode("quick"));
+            Assert.AreEqual("B1908", this.StringEncoder.Encode("brown"));
+            Assert.AreEqual("F205", this.StringEncoder.Encode("fox"));
+            Assert.AreEqual("J408106", this.StringEncoder.Encode("jumped"));
+            Assert.AreEqual("O0209", this.StringEncoder.Encode("over"));
+            Assert.AreEqual("T60", this.StringEncoder.Encode("the"));
+            Assert.AreEqual("L7050", this.StringEncoder.Encode("lazy"));
+            Assert.AreEqual("D6043", this.StringEncoder.Encode("dogs"));
+
+            // Testing CODEC-56
+            Assert.AreEqual("D6043", RefinedSoundex.US_ENGLISH.Encode("dogs"));
+        }
+
+        [Test]
+        public void TestGetMappingCodeNonLetter()
+        {
+            char code = this.StringEncoder.GetMappingCode('#');
+            Assert.AreEqual(0, code, "Code does not equals zero");
+        }
+
+        [Test]
+        public void TestNewInstance()
+        {
+            Assert.AreEqual("D6043", new RefinedSoundex().GetSoundex("dogs"));
+        }
+
+        [Test]
+        public void TestNewInstance2()
+        {
+            Assert.AreEqual("D6043", new RefinedSoundex(RefinedSoundex.US_ENGLISH_MAPPING_STRING.toCharArray()).GetSoundex("dogs"));
+        }
+
+        [Test]
+        public void TestNewInstance3()
+        {
+            Assert.AreEqual("D6043", new RefinedSoundex(RefinedSoundex.US_ENGLISH_MAPPING_STRING).GetSoundex("dogs"));
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Tests.Analysis.Phonetic/Language/SoundexTest.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Phonetic/Language/SoundexTest.cs b/src/Lucene.Net.Tests.Analysis.Phonetic/Language/SoundexTest.cs
new file mode 100644
index 0000000..5cc01ec
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Phonetic/Language/SoundexTest.cs
@@ -0,0 +1,424 @@
+// commons-codec version compatibility level: 1.10
+using NUnit.Framework;
+using System;
+
+namespace Lucene.Net.Analysis.Phonetic.Language
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Tests <see cref="Soundex"/>
+    /// </summary>
+    public class SoundexTest : StringEncoderAbstractTest<Soundex>
+    {
+        protected override Soundex CreateStringEncoder()
+        {
+            return new Soundex();
+        }
+
+        [Test]
+        public void TestB650()
+        {
+            this.CheckEncodingVariations("B650", new string[]{
+            "BARHAM",
+            "BARONE",
+            "BARRON",
+            "BERNA",
+            "BIRNEY",
+            "BIRNIE",
+            "BOOROM",
+            "BOREN",
+            "BORN",
+            "BOURN",
+            "BOURNE",
+            "BOWRON",
+            "BRAIN",
+            "BRAME",
+            "BRANN",
+            "BRAUN",
+            "BREEN",
+            "BRIEN",
+            "BRIM",
+            "BRIMM",
+            "BRINN",
+            "BRION",
+            "BROOM",
+            "BROOME",
+            "BROWN",
+            "BROWNE",
+            "BRUEN",
+            "BRUHN",
+            "BRUIN",
+            "BRUMM",
+            "BRUN",
+            "BRUNO",
+            "BRYAN",
+            "BURIAN",
+            "BURN",
+            "BURNEY",
+            "BYRAM",
+            "BYRNE",
+            "BYRON",
+            "BYRUM"});
+        }
+
+        [Test]
+        public void TestBadCharacters()
+        {
+            Assert.AreEqual("H452", this.StringEncoder.Encode("HOL>MES"));
+
+        }
+
+        [Test]
+        public void TestDifference()
+        {
+            // Edge cases
+            Assert.AreEqual(0, this.StringEncoder.Difference(null, null));
+            Assert.AreEqual(0, this.StringEncoder.Difference("", ""));
+            Assert.AreEqual(0, this.StringEncoder.Difference(" ", " "));
+            // Normal cases
+            Assert.AreEqual(4, this.StringEncoder.Difference("Smith", "Smythe"));
+            Assert.AreEqual(2, this.StringEncoder.Difference("Ann", "Andrew"));
+            Assert.AreEqual(1, this.StringEncoder.Difference("Margaret", "Andrew"));
+            Assert.AreEqual(0, this.StringEncoder.Difference("Janet", "Margaret"));
+            // Examples from http://msdn.microsoft.com/library/default.asp?url=/library/en-us/tsqlref/ts_de-dz_8co5.asp
+            Assert.AreEqual(4, this.StringEncoder.Difference("Green", "Greene"));
+            Assert.AreEqual(0, this.StringEncoder.Difference("Blotchet-Halls", "Greene"));
+            // Examples from http://msdn.microsoft.com/library/default.asp?url=/library/en-us/tsqlref/ts_setu-sus_3o6w.asp
+            Assert.AreEqual(4, this.StringEncoder.Difference("Smith", "Smythe"));
+            Assert.AreEqual(4, this.StringEncoder.Difference("Smithers", "Smythers"));
+            Assert.AreEqual(2, this.StringEncoder.Difference("Anothers", "Brothers"));
+        }
+
+        [Test]
+        public void TestEncodeBasic()
+        {
+            Assert.AreEqual("T235", this.StringEncoder.Encode("testing"));
+            Assert.AreEqual("T000", this.StringEncoder.Encode("The"));
+            Assert.AreEqual("Q200", this.StringEncoder.Encode("quick"));
+            Assert.AreEqual("B650", this.StringEncoder.Encode("brown"));
+            Assert.AreEqual("F200", this.StringEncoder.Encode("fox"));
+            Assert.AreEqual("J513", this.StringEncoder.Encode("jumped"));
+            Assert.AreEqual("O160", this.StringEncoder.Encode("over"));
+            Assert.AreEqual("T000", this.StringEncoder.Encode("the"));
+            Assert.AreEqual("L200", this.StringEncoder.Encode("lazy"));
+            Assert.AreEqual("D200", this.StringEncoder.Encode("dogs"));
+        }
+
+        /**
+         * Examples from http://www.bradandkathy.com/genealogy/overviewofsoundex.html
+         */
+        [Test]
+        public void RestEncodeBatch2()
+        {
+            Assert.AreEqual("A462", this.StringEncoder.Encode("Allricht"));
+            Assert.AreEqual("E166", this.StringEncoder.Encode("Eberhard"));
+            Assert.AreEqual("E521", this.StringEncoder.Encode("Engebrethson"));
+            Assert.AreEqual("H512", this.StringEncoder.Encode("Heimbach"));
+            Assert.AreEqual("H524", this.StringEncoder.Encode("Hanselmann"));
+            Assert.AreEqual("H431", this.StringEncoder.Encode("Hildebrand"));
+            Assert.AreEqual("K152", this.StringEncoder.Encode("Kavanagh"));
+            Assert.AreEqual("L530", this.StringEncoder.Encode("Lind"));
+            Assert.AreEqual("L222", this.StringEncoder.Encode("Lukaschowsky"));
+            Assert.AreEqual("M235", this.StringEncoder.Encode("McDonnell"));
+            Assert.AreEqual("M200", this.StringEncoder.Encode("McGee"));
+            Assert.AreEqual("O155", this.StringEncoder.Encode("Opnian"));
+            Assert.AreEqual("O155", this.StringEncoder.Encode("Oppenheimer"));
+            Assert.AreEqual("R355", this.StringEncoder.Encode("Riedemanas"));
+            Assert.AreEqual("Z300", this.StringEncoder.Encode("Zita"));
+            Assert.AreEqual("Z325", this.StringEncoder.Encode("Zitzmeinn"));
+        }
+
+        /**
+         * Examples from http://www.archives.gov/research_room/genealogy/census/soundex.html
+         */
+        [Test]
+        public void TestEncodeBatch3()
+        {
+            Assert.AreEqual("W252", this.StringEncoder.Encode("Washington"));
+            Assert.AreEqual("L000", this.StringEncoder.Encode("Lee"));
+            Assert.AreEqual("G362", this.StringEncoder.Encode("Gutierrez"));
+            Assert.AreEqual("P236", this.StringEncoder.Encode("Pfister"));
+            Assert.AreEqual("J250", this.StringEncoder.Encode("Jackson"));
+            Assert.AreEqual("T522", this.StringEncoder.Encode("Tymczak"));
+            // For VanDeusen: D-250 (D, 2 for the S, 5 for the N, 0 added) is also
+            // possible.
+            Assert.AreEqual("V532", this.StringEncoder.Encode("VanDeusen"));
+        }
+
+        /**
+         * Examples from: http://www.myatt.demon.co.uk/sxalg.htm
+         */
+        [Test]
+        public void TestEncodeBatch4()
+        {
+            Assert.AreEqual("H452", this.StringEncoder.Encode("HOLMES"));
+            Assert.AreEqual("A355", this.StringEncoder.Encode("ADOMOMI"));
+            Assert.AreEqual("V536", this.StringEncoder.Encode("VONDERLEHR"));
+            Assert.AreEqual("B400", this.StringEncoder.Encode("BALL"));
+            Assert.AreEqual("S000", this.StringEncoder.Encode("SHAW"));
+            Assert.AreEqual("J250", this.StringEncoder.Encode("JACKSON"));
+            Assert.AreEqual("S545", this.StringEncoder.Encode("SCANLON"));
+            Assert.AreEqual("S532", this.StringEncoder.Encode("SAINTJOHN"));
+
+        }
+
+        [Test]
+        public void TestEncodeIgnoreApostrophes()
+        {
+            this.CheckEncodingVariations("O165", new string[]{
+            "OBrien",
+            "'OBrien",
+            "O'Brien",
+            "OB'rien",
+            "OBr'ien",
+            "OBri'en",
+            "OBrie'n",
+            "OBrien'"});
+        }
+
+        /**
+         * Test data from http://www.myatt.demon.co.uk/sxalg.htm
+         *
+         * @throws EncoderException
+         */
+        [Test]
+        public void TestEncodeIgnoreHyphens()
+        {
+            this.CheckEncodingVariations("K525", new String[]{
+            "KINGSMITH",
+            "-KINGSMITH",
+            "K-INGSMITH",
+            "KI-NGSMITH",
+            "KIN-GSMITH",
+            "KING-SMITH",
+            "KINGS-MITH",
+            "KINGSM-ITH",
+            "KINGSMI-TH",
+            "KINGSMIT-H",
+            "KINGSMITH-"});
+        }
+
+        [Test]
+        public void TestEncodeIgnoreTrimmable()
+        {
+            Assert.AreEqual("W252", this.StringEncoder.Encode(" \t\n\r Washington \t\n\r "));
+        }
+
+        /**
+         * Consonants from the same code group separated by W or H are treated as one.
+         */
+        [Test]
+        public void TestHWRuleEx1()
+        {
+            // From
+            // http://www.archives.gov/research_room/genealogy/census/soundex.html:
+            // Ashcraft is coded A-261 (A, 2 for the S, C ignored, 6 for the R, 1
+            // for the F). It is not coded A-226.
+            Assert.AreEqual("A261", this.StringEncoder.Encode("Ashcraft"));
+        }
+
+        /**
+         * Consonants from the same code group separated by W or H are treated as one.
+         *
+         * Test data from http://www.myatt.demon.co.uk/sxalg.htm
+         */
+        [Test]
+        public void TestHWRuleEx2()
+        {
+            Assert.AreEqual("B312", this.StringEncoder.Encode("BOOTHDAVIS"));
+            Assert.AreEqual("B312", this.StringEncoder.Encode("BOOTH-DAVIS"));
+        }
+
+        /**
+         * Consonants from the same code group separated by W or H are treated as one.
+         *
+         * @throws EncoderException
+         */
+        [Test]
+        public void TestHWRuleEx3()
+        {
+            Assert.AreEqual("S460", this.StringEncoder.Encode("Sgler"));
+            Assert.AreEqual("S460", this.StringEncoder.Encode("Swhgler"));
+            // Also S460:
+            this.CheckEncodingVariations("S460", new String[]{
+            "SAILOR",
+            "SALYER",
+            "SAYLOR",
+            "SCHALLER",
+            "SCHELLER",
+            "SCHILLER",
+            "SCHOOLER",
+            "SCHULER",
+            "SCHUYLER",
+            "SEILER",
+            "SEYLER",
+            "SHOLAR",
+            "SHULER",
+            "SILAR",
+            "SILER",
+            "SILLER"});
+        }
+
+        /**
+         * Examples for MS SQLServer from
+         * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/tsqlref/ts_setu-sus_3o6w.asp
+         */
+        [Test]
+        public void TestMsSqlServer1()
+        {
+            Assert.AreEqual("S530", this.StringEncoder.Encode("Smith"));
+            Assert.AreEqual("S530", this.StringEncoder.Encode("Smythe"));
+        }
+
+        /**
+         * Examples for MS SQLServer from
+         * http://support.microsoft.com/default.aspx?scid=http://support.microsoft.com:80/support
+         * /kb/articles/Q100/3/65.asp&NoWebContent=1
+         *
+         * @throws EncoderException
+         */
+        [Test]
+        public void TestMsSqlServer2()
+        {
+            this.CheckEncodingVariations("E625", new String[] { "Erickson", "Erickson", "Erikson", "Ericson", "Ericksen", "Ericsen" });
+        }
+
+        /**
+         * Examples for MS SQLServer from http://databases.about.com/library/weekly/aa042901a.htm
+         */
+        [Test]
+        public void TestMsSqlServer3()
+        {
+            Assert.AreEqual("A500", this.StringEncoder.Encode("Ann"));
+            Assert.AreEqual("A536", this.StringEncoder.Encode("Andrew"));
+            Assert.AreEqual("J530", this.StringEncoder.Encode("Janet"));
+            Assert.AreEqual("M626", this.StringEncoder.Encode("Margaret"));
+            Assert.AreEqual("S315", this.StringEncoder.Encode("Steven"));
+            Assert.AreEqual("M240", this.StringEncoder.Encode("Michael"));
+            Assert.AreEqual("R163", this.StringEncoder.Encode("Robert"));
+            Assert.AreEqual("L600", this.StringEncoder.Encode("Laura"));
+            Assert.AreEqual("A500", this.StringEncoder.Encode("Anne"));
+        }
+
+        /**
+         * https://issues.apache.org/jira/browse/CODEC-54 https://issues.apache.org/jira/browse/CODEC-56
+         */
+        [Test]
+        public void TestNewInstance()
+        {
+            Assert.AreEqual("W452", new Soundex().GetSoundex("Williams"));
+        }
+
+        [Test]
+        public void TestNewInstance2()
+        {
+            Assert.AreEqual("W452", new Soundex(Soundex.US_ENGLISH_MAPPING_STRING.toCharArray()).GetSoundex("Williams"));
+        }
+
+        [Test]
+        public void TestNewInstance3()
+        {
+            Assert.AreEqual("W452", new Soundex(Soundex.US_ENGLISH_MAPPING_STRING).GetSoundex("Williams"));
+        }
+
+        [Test]
+        public void TestSoundexUtilsConstructable()
+        {
+            new SoundexUtils();
+        }
+
+        [Test]
+        public void TestSoundexUtilsNullBehaviour()
+        {
+            Assert.AreEqual(null, SoundexUtils.Clean(null));
+            Assert.AreEqual("", SoundexUtils.Clean(""));
+            Assert.AreEqual(0, SoundexUtils.DifferenceEncoded(null, ""));
+            Assert.AreEqual(0, SoundexUtils.DifferenceEncoded("", null));
+        }
+
+        /**
+         * https://issues.apache.org/jira/browse/CODEC-54 https://issues.apache.org/jira/browse/CODEC-56
+         */
+        [Test]
+        public void TestUsEnglishStatic()
+        {
+            Assert.AreEqual("W452", Soundex.US_ENGLISH.GetSoundex("Williams"));
+        }
+
+        /**
+         * Fancy characters are not mapped by the default US mapping.
+         *
+         * http://issues.apache.org/bugzilla/show_bug.cgi?id=29080
+         */
+        [Test]
+        public void TestUsMappingEWithAcute()
+        {
+            Assert.AreEqual("E000", this.StringEncoder.Encode("e"));
+            if (char.IsLetter('\u00e9'))
+            { // e-acute
+                try
+                {
+                    //         uppercase E-acute
+                    Assert.AreEqual("\u00c9000", this.StringEncoder.Encode("\u00e9"));
+                    Assert.Fail("Expected IllegalArgumentException not thrown");
+                }
+#pragma warning disable 168
+                catch (ArgumentException e)
+#pragma warning restore 168
+                {
+                    // expected
+                }
+            }
+            else
+            {
+                Assert.AreEqual("", this.StringEncoder.Encode("\u00e9"));
+            }
+        }
+
+        /**
+         * Fancy characters are not mapped by the default US mapping.
+         *
+         * http://issues.apache.org/bugzilla/show_bug.cgi?id=29080
+         */
+        [Test]
+        public void TestUsMappingOWithDiaeresis()
+        {
+            Assert.AreEqual("O000", this.StringEncoder.Encode("o"));
+            if (char.IsLetter('\u00f6'))
+            { // o-umlaut
+                try
+                {
+                    //         uppercase O-umlaut
+                    Assert.AreEqual("\u00d6000", this.StringEncoder.Encode("\u00f6"));
+                    Assert.Fail("Expected IllegalArgumentException not thrown");
+                }
+#pragma warning disable 168
+                catch (ArgumentException e)
+#pragma warning restore 168
+                {
+                    // expected
+                }
+            }
+            else
+            {
+                Assert.AreEqual("", this.StringEncoder.Encode("\u00f6"));
+            }
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Tests.Analysis.Phonetic/Language/StringEncoderAbstractTest.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Phonetic/Language/StringEncoderAbstractTest.cs b/src/Lucene.Net.Tests.Analysis.Phonetic/Language/StringEncoderAbstractTest.cs
new file mode 100644
index 0000000..8fd8b7f
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Phonetic/Language/StringEncoderAbstractTest.cs
@@ -0,0 +1,164 @@
+using NUnit.Framework;
+using System;
+using System.Globalization;
+using System.Threading;
+
+namespace Lucene.Net.Analysis.Phonetic.Language
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    public abstract class StringEncoderAbstractTest<T>
+        where T : IStringEncoder
+    {
+        protected T stringEncoder;
+
+        [SetUp]
+        public void SetUp()
+        {
+            stringEncoder = this.CreateStringEncoder();
+        }
+
+        public virtual void CheckEncoding(string expected, string source)
+        {
+            Assert.AreEqual(expected, this.StringEncoder.Encode(source), "Source: " + source);
+        }
+
+        protected virtual void CheckEncodings(string[][] data)
+        {
+            foreach (string[]
+                element in data)
+            {
+                this.CheckEncoding(element[1], element[0]);
+            }
+        }
+
+        protected virtual void CheckEncodingVariations(string expected, string[] data)
+        {
+            foreach (string element in data)
+            {
+                this.CheckEncoding(expected, element);
+            }
+        }
+
+        protected abstract T CreateStringEncoder();
+
+        public virtual T StringEncoder
+        {
+            get { return this.stringEncoder; }
+        }
+
+        [Test]
+        public virtual void TestEncodeEmpty()
+        {
+            IStringEncoder encoder = this.StringEncoder;
+            encoder.Encode("");
+            encoder.Encode(" ");
+            encoder.Encode("\t");
+        }
+
+        // LUCENENET specific - since strings are sealed in .NET, there
+        // is no point in implementing IEncoder or running these tests.
+        // Our version only accepts strings 
+        [Test]
+        public virtual void TestEncodeNull()
+        {
+            IStringEncoder encoder = this.StringEncoder;
+            try
+            {
+                encoder.Encode(null);
+            }
+#pragma warning disable 168
+            catch (/*Encoder*/Exception ee)
+#pragma warning restore 168
+            {
+                // An exception should be thrown
+            }
+        }
+
+        //[Test]
+        //public virtual void TestEncodeWithInvalidObject()
+        //{
+        //    bool exceptionThrown = false;
+        //    try
+        //    {
+        //        IStringEncoder encoder = this.StringEncoder;
+        //        encoder.Encode(3.4f);
+        //    }
+        //    catch (Exception e)
+        //    {
+        //        exceptionThrown = true;
+        //    }
+        //    Assert.True(exceptionThrown, "An exception was not thrown when we tried to encode " + "a Float object");
+        //}
+
+        [Test]
+        public virtual void TestLocaleIndependence()
+        {
+            IStringEncoder encoder = this.StringEncoder;
+
+            string[]
+            data = { "I", "i", };
+
+            CultureInfo orig = CultureInfo.CurrentCulture;
+            CultureInfo[] locales = { new CultureInfo("en"), new CultureInfo("tr"), CultureInfo.CurrentCulture };
+
+            try
+            {
+                foreach (string element in data)
+                {
+                    string @ref = null;
+                    for (int j = 0; j < locales.Length; j++)
+                    {
+                        //Locale.setDefault(locales[j]);
+#if NETSTANDARD
+                        CultureInfo.CurrentCulture = locales[j];
+#else
+                        Thread.CurrentThread.CurrentCulture = locales[j];
+#endif
+                        if (j <= 0)
+                        {
+                            @ref = encoder.Encode(element);
+                        }
+                        else
+                        {
+                            string cur = null;
+                            try
+                            {
+                                cur = encoder.Encode(element);
+                            }
+                            catch (Exception e)
+                            {
+                                Assert.Fail(CultureInfo.CurrentCulture.ToString() + ": " + e.Message);
+                            }
+                            Assert.AreEqual(@ref, cur, CultureInfo.CurrentCulture.ToString() + ": ");
+                        }
+                    }
+                }
+            }
+            finally
+            {
+                //Locale.setDefault(orig);
+#if NETSTANDARD
+                CultureInfo.CurrentCulture = orig;
+#else
+                Thread.CurrentThread.CurrentCulture = orig;
+#endif
+            }
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Tests.Analysis.Phonetic/Lucene.Net.Tests.Analysis.Phonetic.csproj
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Phonetic/Lucene.Net.Tests.Analysis.Phonetic.csproj b/src/Lucene.Net.Tests.Analysis.Phonetic/Lucene.Net.Tests.Analysis.Phonetic.csproj
new file mode 100644
index 0000000..5c38e1f
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Phonetic/Lucene.Net.Tests.Analysis.Phonetic.csproj
@@ -0,0 +1,108 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="14.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" />
+  <PropertyGroup>
+    <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
+    <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
+    <ProjectGuid>{A2867797-0A5D-4878-8F59-58C399C9A4E4}</ProjectGuid>
+    <OutputType>Library</OutputType>
+    <AppDesignerFolder>Properties</AppDesignerFolder>
+    <RootNamespace>Lucene.Net.Analysis.Phonetic</RootNamespace>
+    <AssemblyName>Lucene.Net.Tests.Analysis.Phonetic</AssemblyName>
+    <TargetFrameworkVersion>v4.5.1</TargetFrameworkVersion>
+    <FileAlignment>512</FileAlignment>
+  </PropertyGroup>
+  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
+    <DebugSymbols>true</DebugSymbols>
+    <DebugType>full</DebugType>
+    <Optimize>false</Optimize>
+    <OutputPath>bin\Debug\</OutputPath>
+    <DefineConstants>DEBUG;TRACE</DefineConstants>
+    <ErrorReport>prompt</ErrorReport>
+    <WarningLevel>4</WarningLevel>
+  </PropertyGroup>
+  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
+    <DebugType>pdbonly</DebugType>
+    <Optimize>true</Optimize>
+    <OutputPath>bin\Release\</OutputPath>
+    <DefineConstants>TRACE</DefineConstants>
+    <ErrorReport>prompt</ErrorReport>
+    <WarningLevel>4</WarningLevel>
+  </PropertyGroup>
+  <PropertyGroup>
+    <DefineConstants>$(DefineConstants);FEATURE_SERIALIZABLE</DefineConstants>
+  </PropertyGroup>
+  <ItemGroup>
+    <Reference Include="System" />
+    <Reference Include="System.Core" />
+    <Reference Include="System.Xml.Linq" />
+    <Reference Include="System.Data.DataSetExtensions" />
+    <Reference Include="Microsoft.CSharp" />
+    <Reference Include="System.Data" />
+    <Reference Include="System.Net.Http" />
+    <Reference Include="System.Xml" />
+  </ItemGroup>
+  <ItemGroup>
+    <Compile Include="DoubleMetaphoneFilterTest.cs" />
+    <Compile Include="Language\Bm\BeiderMorseEncoderTest.cs" />
+    <Compile Include="Language\Bm\CacheSubSequencePerformanceTest.cs" />
+    <Compile Include="Language\Bm\LanguageGuessingTest.cs" />
+    <Compile Include="Language\Bm\PhoneticEnginePerformanceTest.cs" />
+    <Compile Include="Language\Bm\PhoneticEngineRegressionTest.cs" />
+    <Compile Include="Language\Bm\PhoneticEngineTest.cs" />
+    <Compile Include="Language\Bm\RuleTest.cs" />
+    <Compile Include="Language\Caverphone1Test.cs" />
+    <Compile Include="Language\Caverphone2Test .cs" />
+    <Compile Include="Language\ColognePhoneticTest.cs" />
+    <Compile Include="Language\DaitchMokotoffSoundexTest.cs" />
+    <Compile Include="Language\DoubleMetaphone2Test.cs" />
+    <Compile Include="Language\DoubleMetaphoneTest.cs" />
+    <Compile Include="Language\MatchRatingApproachEncoderTest.cs" />
+    <Compile Include="Language\MetaphoneTest.cs" />
+    <Compile Include="Language\NysiisTest.cs" />
+    <Compile Include="Language\RefinedSoundexTest.cs" />
+    <Compile Include="Language\SoundexTest.cs" />
+    <Compile Include="Language\StringEncoderAbstractTest.cs" />
+    <Compile Include="Properties\AssemblyInfo.cs" />
+    <Compile Include="TestBeiderMorseFilter.cs" />
+    <Compile Include="TestBeiderMorseFilterFactory.cs" />
+    <Compile Include="TestDoubleMetaphoneFilterFactory.cs" />
+    <Compile Include="TestPhoneticFilter.cs" />
+    <Compile Include="TestPhoneticFilterFactory.cs" />
+    <Compile Include="..\CommonAssemblyInfo.cs">
+      <Link>Properties\CommonAssemblyInfo.cs</Link>
+    </Compile>
+  </ItemGroup>
+  <ItemGroup>
+    <ProjectReference Include="..\Lucene.Net.Analysis.Common\Lucene.Net.Analysis.Common.csproj">
+      <Project>{4ADD0BBC-B900-4715-9526-D871DE8EEA64}</Project>
+      <Name>Lucene.Net.Analysis.Common</Name>
+    </ProjectReference>
+    <ProjectReference Include="..\Lucene.Net.Analysis.Phonetic\Lucene.Net.Analysis.Phonetic.csproj">
+      <Project>{DAFE3B64-616A-4A2F-90E5-1F135E8A9AF5}</Project>
+      <Name>Lucene.Net.Analysis.Phonetic</Name>
+    </ProjectReference>
+    <ProjectReference Include="..\Lucene.Net.TestFramework\Lucene.Net.TestFramework.csproj">
+      <Project>{b2c0d749-ce34-4f62-a15e-00cb2ff5ddb3}</Project>
+      <Name>Lucene.Net.TestFramework</Name>
+    </ProjectReference>
+    <ProjectReference Include="..\Lucene.Net\Lucene.Net.csproj">
+      <Project>{5D4AD9BE-1FFB-41AB-9943-25737971BF57}</Project>
+      <Name>Lucene.Net</Name>
+    </ProjectReference>
+  </ItemGroup>
+  <ItemGroup>
+    <None Include="Lucene.Net.Tests.Analysis.Phonetic.project.json" />
+  </ItemGroup>
+  <ItemGroup>
+    <Service Include="{82A7F48D-3B50-4B1E-B82E-3ADA8210C358}" />
+  </ItemGroup>
+  <Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
+  <!-- To modify your build process, add your task inside one of the targets below and uncomment it. 
+       Other similar extension points exist, see Microsoft.Common.targets.
+  <Target Name="BeforeBuild">
+  </Target>
+  <Target Name="AfterBuild">
+  </Target>
+  -->
+</Project>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Tests.Analysis.Phonetic/Lucene.Net.Tests.Analysis.Phonetic.project.json
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Phonetic/Lucene.Net.Tests.Analysis.Phonetic.project.json b/src/Lucene.Net.Tests.Analysis.Phonetic/Lucene.Net.Tests.Analysis.Phonetic.project.json
new file mode 100644
index 0000000..8c631ab
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Phonetic/Lucene.Net.Tests.Analysis.Phonetic.project.json
@@ -0,0 +1,11 @@
+{
+  "runtimes": {
+    "win": {}
+  },
+  "dependencies": {
+    "NUnit": "3.5.0"
+  },
+  "frameworks": {
+    "net451": {}
+  }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Tests.Analysis.Phonetic/Lucene.Net.Tests.Analysis.Phonetic.xproj
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Phonetic/Lucene.Net.Tests.Analysis.Phonetic.xproj b/src/Lucene.Net.Tests.Analysis.Phonetic/Lucene.Net.Tests.Analysis.Phonetic.xproj
new file mode 100644
index 0000000..16b7fef
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Phonetic/Lucene.Net.Tests.Analysis.Phonetic.xproj
@@ -0,0 +1,21 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="14.0.25420" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <PropertyGroup>
+    <VisualStudioVersion Condition="'$(VisualStudioVersion)' == ''">14.0.25420</VisualStudioVersion>
+    <VSToolsPath Condition="'$(VSToolsPath)' == ''">$(MSBuildExtensionsPath32)\Microsoft\VisualStudio\v$(VisualStudioVersion)</VSToolsPath>
+  </PropertyGroup>
+  <Import Project="$(VSToolsPath)\DotNet\Microsoft.DotNet.Props" Condition="'$(VSToolsPath)' != ''" />
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>1fe12ef7-4c89-4d49-bdd1-e49dc285f21b</ProjectGuid>
+    <RootNamespace>Lucene.Net.Tests.Analysis.Phonetic</RootNamespace>
+    <BaseIntermediateOutputPath Condition="'$(BaseIntermediateOutputPath)'=='' ">.\obj</BaseIntermediateOutputPath>
+    <OutputPath Condition="'$(OutputPath)'=='' ">.\bin\</OutputPath>
+  </PropertyGroup>
+  <PropertyGroup>
+    <SchemaVersion>2.0</SchemaVersion>
+  </PropertyGroup>
+  <ItemGroup>
+    <Service Include="{82a7f48d-3b50-4b1e-b82e-3ada8210c358}" />
+  </ItemGroup>
+  <Import Project="$(VSToolsPath)\DotNet\Microsoft.DotNet.targets" Condition="'$(VSToolsPath)' != ''" />
+</Project>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Tests.Analysis.Phonetic/Properties/AssemblyInfo.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Phonetic/Properties/AssemblyInfo.cs b/src/Lucene.Net.Tests.Analysis.Phonetic/Properties/AssemblyInfo.cs
new file mode 100644
index 0000000..14e5b1c
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Phonetic/Properties/AssemblyInfo.cs
@@ -0,0 +1,42 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+*/
+
+using System.Reflection;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+
+// General Information about an assembly is controlled through the following 
+// set of attributes. Change these attribute values to modify the information
+// associated with an assembly.
+[assembly: AssemblyTitle("Lucene.Net.Tests.Analysis.Phonetic")]
+[assembly: AssemblyDescription("")]
+[assembly: AssemblyConfiguration("")]
+[assembly: AssemblyCulture("")]
+
+// Setting ComVisible to false makes the types in this assembly not visible 
+// to COM components.  If you need to access a type in this assembly from 
+// COM, set the ComVisible attribute to true on that type.
+[assembly: ComVisible(false)]
+
+// The following GUID is for the ID of the typelib if this project is exposed to COM
+[assembly: Guid("a2867797-0a5d-4878-8f59-58c399c9a4e4")]
+
+// NOTE: Version information is in CommonAssemblyInfo.cs

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Tests.Analysis.Phonetic/TestBeiderMorseFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Phonetic/TestBeiderMorseFilter.cs b/src/Lucene.Net.Tests.Analysis.Phonetic/TestBeiderMorseFilter.cs
new file mode 100644
index 0000000..cc0e897
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Phonetic/TestBeiderMorseFilter.cs
@@ -0,0 +1,132 @@
+using Lucene.Net.Analysis.Core;
+using Lucene.Net.Analysis.Miscellaneous;
+using Lucene.Net.Analysis.Phonetic.Language.Bm;
+using Lucene.Net.Analysis.TokenAttributes;
+using NUnit.Framework;
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Text.RegularExpressions;
+
+namespace Lucene.Net.Analysis.Phonetic
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Tests <see cref="BeiderMorseFilter"/>
+    /// </summary>
+    public class TestBeiderMorseFilter : BaseTokenStreamTestCase
+    {
+        private Analyzer analyzer = Analyzer.NewAnonymous(createComponents: (fieldName, reader) =>
+        {
+            Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+            return new TokenStreamComponents(tokenizer,
+                new BeiderMorseFilter(tokenizer, new PhoneticEngine(NameType.GENERIC, RuleType.EXACT, true)));
+        });
+
+
+        /** generic, "exact" configuration */
+        [Test]
+        public void TestBasicUsage()
+        {
+            AssertAnalyzesTo(analyzer, "Angelo",
+            new String[] { "anZelo", "andZelo", "angelo", "anhelo", "anjelo", "anxelo" },
+            new int[] { 0, 0, 0, 0, 0, 0 },
+            new int[] { 6, 6, 6, 6, 6, 6 },
+            new int[] { 1, 0, 0, 0, 0, 0 });
+
+
+            AssertAnalyzesTo(analyzer, "D'Angelo",
+                new String[] { "anZelo", "andZelo", "angelo", "anhelo", "anjelo", "anxelo",
+                  "danZelo", "dandZelo", "dangelo", "danhelo", "danjelo", "danxelo" },
+                new int[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+                new int[] { 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8 },
+                new int[] { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 });
+        }
+
+        /** restrict the output to a set of possible origin languages */
+        [Test]
+        public void TestLanguageSet()
+        {
+            LanguageSet languages = LanguageSet.From(new HashSet<String>() {
+                "italian", "greek", "spanish"
+            });
+            Analyzer analyzer = Analyzer.NewAnonymous(createComponents: (fieldName, reader) =>
+            {
+                Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+                return new TokenStreamComponents(tokenizer,
+                    new BeiderMorseFilter(tokenizer,
+                        new PhoneticEngine(NameType.GENERIC, RuleType.EXACT, true), languages));
+            });
+
+            AssertAnalyzesTo(analyzer, "Angelo",
+                new String[] { "andZelo", "angelo", "anxelo" },
+                new int[] { 0, 0, 0, },
+                new int[] { 6, 6, 6, },
+                new int[] { 1, 0, 0, });
+        }
+
+        /** for convenience, if the input yields no output, we pass it thru as-is */
+        [Test]
+        public void TestNumbers()
+        {
+            AssertAnalyzesTo(analyzer, "1234",
+                new String[] { "1234" },
+                new int[] { 0 },
+                new int[] { 4 },
+                new int[] { 1 });
+        }
+
+        [Test]
+        public void TestRandom()
+        {
+            CheckRandomData(Random(), analyzer, 1000 * RANDOM_MULTIPLIER);
+        }
+
+        [Test]
+        public void TestEmptyTerm()
+        {
+            Analyzer a = Analyzer.NewAnonymous(createComponents: (fieldName, reader) =>
+            {
+                Tokenizer tokenizer = new KeywordTokenizer(reader);
+                return new TokenStreamComponents(tokenizer, new BeiderMorseFilter(tokenizer, new PhoneticEngine(NameType.GENERIC, RuleType.EXACT, true)));
+            });
+
+            CheckOneTerm(a, "", "");
+        }
+
+        [Test]
+        public void TestCustomAttribute()
+        {
+            TokenStream stream = new KeywordTokenizer(new StringReader("D'Angelo"));
+            stream = new PatternKeywordMarkerFilter(stream, new Regex(".*"));
+            stream = new BeiderMorseFilter(stream, new PhoneticEngine(NameType.GENERIC, RuleType.EXACT, true));
+            IKeywordAttribute keyAtt = stream.AddAttribute<IKeywordAttribute>();
+            stream.Reset();
+            int i = 0;
+            while (stream.IncrementToken())
+            {
+                assertTrue(keyAtt.IsKeyword);
+                i++;
+            }
+            assertEquals(12, i);
+            stream.End();
+            stream.Dispose();
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Tests.Analysis.Phonetic/TestBeiderMorseFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Phonetic/TestBeiderMorseFilterFactory.cs b/src/Lucene.Net.Tests.Analysis.Phonetic/TestBeiderMorseFilterFactory.cs
new file mode 100644
index 0000000..5bdf1b7
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Phonetic/TestBeiderMorseFilterFactory.cs
@@ -0,0 +1,89 @@
+using Lucene.Net.Support;
+using NUnit.Framework;
+using System;
+using System.Collections.Generic;
+using System.IO;
+
+namespace Lucene.Net.Analysis.Phonetic
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Simple tests for <see cref="BeiderMorseFilterFactory"/>
+    /// </summary>
+    public class TestBeiderMorseFilterFactory : BaseTokenStreamTestCase
+    {
+        [Test]
+        public void TestBasics()
+        {
+            BeiderMorseFilterFactory factory = new BeiderMorseFilterFactory(new Dictionary<String, String>());
+            TokenStream ts = factory.Create(new MockTokenizer(new StringReader("Weinberg"), MockTokenizer.WHITESPACE, false));
+            AssertTokenStreamContents(ts,
+                new String[] { "vDnbirk", "vanbirk", "vinbirk", "wDnbirk", "wanbirk", "winbirk" },
+                new int[] { 0, 0, 0, 0, 0, 0 },
+                new int[] { 8, 8, 8, 8, 8, 8 },
+                new int[] { 1, 0, 0, 0, 0, 0 });
+        }
+
+        [Test]
+        public void TestLanguageSet()
+        {
+            IDictionary<String, String> args = new Dictionary<string, string>();
+            args.Put("languageSet", "polish");
+            BeiderMorseFilterFactory factory = new BeiderMorseFilterFactory(args);
+            TokenStream ts = factory.Create(new MockTokenizer(new StringReader("Weinberg"), MockTokenizer.WHITESPACE, false));
+            AssertTokenStreamContents(ts,
+                new String[] { "vDmbYrk", "vDmbirk", "vambYrk", "vambirk", "vimbYrk", "vimbirk" },
+                new int[] { 0, 0, 0, 0, 0, 0 },
+                new int[] { 8, 8, 8, 8, 8, 8 },
+                new int[] { 1, 0, 0, 0, 0, 0 });
+        }
+
+        [Test]
+        public void TestOptions()
+        {
+            IDictionary<String, String> args = new Dictionary<string, string>();
+            args.Put("nameType", "ASHKENAZI");
+            args.Put("ruleType", "EXACT");
+            BeiderMorseFilterFactory factory = new BeiderMorseFilterFactory(args);
+            TokenStream ts = factory.Create(new MockTokenizer(new StringReader("Weinberg"), MockTokenizer.WHITESPACE, false));
+            AssertTokenStreamContents(ts,
+                new String[] { "vajnberk" },
+                new int[] { 0 },
+                new int[] { 8 },
+                new int[] { 1 });
+        }
+
+        /** Test that bogus arguments result in exception */
+        [Test]
+        public void TestBogusArguments()
+        {
+            try
+            {
+                new BeiderMorseFilterFactory(new Dictionary<String, String>() {
+                    { "bogusArg", "bogusValue" }
+                });
+                fail();
+            }
+            catch (ArgumentException expected)
+            {
+                assertTrue(expected.Message.Contains("Unknown parameters"));
+            }
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Tests.Analysis.Phonetic/TestDoubleMetaphoneFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Phonetic/TestDoubleMetaphoneFilterFactory.cs b/src/Lucene.Net.Tests.Analysis.Phonetic/TestDoubleMetaphoneFilterFactory.cs
new file mode 100644
index 0000000..5ba337b
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Phonetic/TestDoubleMetaphoneFilterFactory.cs
@@ -0,0 +1,70 @@
+using NUnit.Framework;
+using System;
+using System.Collections.Generic;
+using System.IO;
+
+namespace Lucene.Net.Analysis.Phonetic
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    public class TestDoubleMetaphoneFilterFactory : BaseTokenStreamTestCase
+    {
+        [Test]
+        public void TestDefaults()
+        {
+            DoubleMetaphoneFilterFactory factory = new DoubleMetaphoneFilterFactory(new Dictionary<String, String>());
+            TokenStream inputStream = new MockTokenizer(new StringReader("international"), MockTokenizer.WHITESPACE, false);
+
+            TokenStream filteredStream = factory.Create(inputStream);
+            assertEquals(typeof(DoubleMetaphoneFilter), filteredStream.GetType());
+            AssertTokenStreamContents(filteredStream, new String[] { "international", "ANTR" });
+        }
+
+        [Test]
+        public void TestSettingSizeAndInject()
+        {
+            IDictionary<string, string> parameters = new Dictionary<string, string>();
+            parameters["inject"] = "false";
+            parameters["maxCodeLength"] = "8";
+            DoubleMetaphoneFilterFactory factory = new DoubleMetaphoneFilterFactory(parameters);
+
+            TokenStream inputStream = new MockTokenizer(new StringReader("international"), MockTokenizer.WHITESPACE, false);
+
+            TokenStream filteredStream = factory.Create(inputStream);
+            assertEquals(typeof(DoubleMetaphoneFilter), filteredStream.GetType());
+            AssertTokenStreamContents(filteredStream, new String[] { "ANTRNXNL" });
+        }
+
+        /** Test that bogus arguments result in exception */
+        [Test]
+        public void TestBogusArguments()
+        {
+            try
+            {
+                new DoubleMetaphoneFilterFactory(new Dictionary<String, String>() {
+                    { "bogusArg", "bogusValue" }
+                });
+                fail();
+            }
+            catch (ArgumentException expected)
+            {
+                assertTrue(expected.Message.Contains("Unknown parameters"));
+            }
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Tests.Analysis.Phonetic/TestPhoneticFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Phonetic/TestPhoneticFilter.cs b/src/Lucene.Net.Tests.Analysis.Phonetic/TestPhoneticFilter.cs
new file mode 100644
index 0000000..387765f
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Phonetic/TestPhoneticFilter.cs
@@ -0,0 +1,122 @@
+using Lucene.Net.Analysis.Core;
+using Lucene.Net.Analysis.Phonetic.Language;
+using NUnit.Framework;
+using System;
+using System.IO;
+
+namespace Lucene.Net.Analysis.Phonetic
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Tests <see cref="PhoneticFilter"/>
+    /// </summary>
+    public class TestPhoneticFilter : BaseTokenStreamTestCase
+    {
+        [Test]
+        public void TestAlgorithms()
+        {
+            assertAlgorithm(new Metaphone(), true, "aaa bbb ccc easgasg",
+                new String[] { "A", "aaa", "B", "bbb", "KKK", "ccc", "ESKS", "easgasg" });
+            assertAlgorithm(new Metaphone(), false, "aaa bbb ccc easgasg",
+                new String[] { "A", "B", "KKK", "ESKS" });
+
+
+            assertAlgorithm(new DoubleMetaphone(), true, "aaa bbb ccc easgasg",
+                new String[] { "A", "aaa", "PP", "bbb", "KK", "ccc", "ASKS", "easgasg" });
+            assertAlgorithm(new DoubleMetaphone(), false, "aaa bbb ccc easgasg",
+                new String[] { "A", "PP", "KK", "ASKS" });
+
+
+            assertAlgorithm(new Soundex(), true, "aaa bbb ccc easgasg",
+                new String[] { "A000", "aaa", "B000", "bbb", "C000", "ccc", "E220", "easgasg" });
+            assertAlgorithm(new Soundex(), false, "aaa bbb ccc easgasg",
+                new String[] { "A000", "B000", "C000", "E220" });
+
+
+            assertAlgorithm(new RefinedSoundex(), true, "aaa bbb ccc easgasg",
+                new String[] { "A0", "aaa", "B1", "bbb", "C3", "ccc", "E034034", "easgasg" });
+            assertAlgorithm(new RefinedSoundex(), false, "aaa bbb ccc easgasg",
+                new String[] { "A0", "B1", "C3", "E034034" });
+
+
+            assertAlgorithm(new Caverphone2(), true, "Darda Karleen Datha Carlene",
+                new String[] { "TTA1111111", "Darda", "KLN1111111", "Karleen",
+                    "TTA1111111", "Datha", "KLN1111111", "Carlene" });
+            assertAlgorithm(new Caverphone2(), false, "Darda Karleen Datha Carlene",
+                new String[] { "TTA1111111", "KLN1111111", "TTA1111111", "KLN1111111" });
+        }
+
+
+        static void assertAlgorithm(IStringEncoder encoder, bool inject, String input,
+            String[] expected)
+        {
+            Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT,
+            new StringReader(input));
+            PhoneticFilter filter = new PhoneticFilter(tokenizer, encoder, inject);
+            AssertTokenStreamContents(filter, expected);
+        }
+
+        /** blast some random strings through the analyzer */
+        [Test]
+        public void TestRandomStrings()
+        {
+            IStringEncoder[] encoders = new IStringEncoder[] {
+                new Metaphone(), new DoubleMetaphone(), new Soundex()/*, new RefinedSoundex()*/, new Caverphone2()
+            };
+
+            foreach (IStringEncoder e in encoders)
+            {
+                Analyzer a = Analyzer.NewAnonymous(createComponents: (fieldName, reader) =>
+                {
+                    Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+                    return new TokenStreamComponents(tokenizer, new PhoneticFilter(tokenizer, e, false));
+                });
+
+                CheckRandomData(Random(), a, 1000 * RANDOM_MULTIPLIER);
+
+                Analyzer b = Analyzer.NewAnonymous(createComponents: (fieldName, reader) =>
+                {
+                    Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+                    return new TokenStreamComponents(tokenizer, new PhoneticFilter(tokenizer, e, false));
+                });
+
+
+                CheckRandomData(Random(), b, 1000 * RANDOM_MULTIPLIER);
+            }
+        }
+
+        [Test]
+        public void TestEmptyTerm()
+        {
+            IStringEncoder[] encoders = new IStringEncoder[] {
+                new Metaphone(), new DoubleMetaphone(), new Soundex()/*, new RefinedSoundex()*/, new Caverphone2()
+            };
+            foreach (IStringEncoder e in encoders)
+            {
+                Analyzer a = Analyzer.NewAnonymous(createComponents: (fieldName, reader) =>
+                {
+                    Tokenizer tokenizer = new KeywordTokenizer(reader);
+                    return new TokenStreamComponents(tokenizer, new PhoneticFilter(tokenizer, e, Random().nextBoolean()));
+                });
+
+                CheckOneTerm(a, "", "");
+            }
+        }
+    }
+}


[02/15] lucenenet git commit: Added Lucene.Net.Analysis.Phonetic + tests. Rather than porting over the entire commons-codec library, only the language features were ported and added to this library.

Posted by ni...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Tests.Analysis.Phonetic/TestPhoneticFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Phonetic/TestPhoneticFilterFactory.cs b/src/Lucene.Net.Tests.Analysis.Phonetic/TestPhoneticFilterFactory.cs
new file mode 100644
index 0000000..1baedfb
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Phonetic/TestPhoneticFilterFactory.cs
@@ -0,0 +1,228 @@
+using Lucene.Net.Analysis.Phonetic.Language;
+using Lucene.Net.Analysis.Util;
+using Lucene.Net.Support;
+using NUnit.Framework;
+using System;
+using System.Collections.Generic;
+using System.IO;
+
+namespace Lucene.Net.Analysis.Phonetic
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    public class TestPhoneticFilterFactory : BaseTokenStreamTestCase
+    {
+        /**
+   * Case: default
+   */
+        [Test]
+        public void TestFactoryDefaults()
+        {
+            IDictionary<String, String> args = new Dictionary<String, String>();
+            args.Put(PhoneticFilterFactory.ENCODER, "Metaphone");
+            PhoneticFilterFactory factory = new PhoneticFilterFactory(args);
+            factory.Inform(new ClasspathResourceLoader(factory.GetType()));
+            assertTrue(factory.GetEncoder() is Metaphone);
+            assertTrue(factory.inject); // default
+        }
+
+        [Test]
+        public void TestInjectFalse()
+        {
+            IDictionary<String, String> args = new Dictionary<String, String>();
+            args.Put(PhoneticFilterFactory.ENCODER, "Metaphone");
+            args.Put(PhoneticFilterFactory.INJECT, "false");
+            PhoneticFilterFactory factory = new PhoneticFilterFactory(args);
+            factory.Inform(new ClasspathResourceLoader(factory.GetType()));
+            assertFalse(factory.inject);
+        }
+
+        [Test]
+        public void TestMaxCodeLength()
+        {
+            IDictionary<String, String> args = new Dictionary<String, String>();
+            args.Put(PhoneticFilterFactory.ENCODER, "Metaphone");
+            args.Put(PhoneticFilterFactory.MAX_CODE_LENGTH, "2");
+            PhoneticFilterFactory factory = new PhoneticFilterFactory(args);
+            factory.Inform(new ClasspathResourceLoader(factory.GetType()));
+            assertEquals(2, ((Metaphone)factory.GetEncoder()).MaxCodeLen);
+        }
+
+        /**
+         * Case: Failures and Exceptions
+         */
+        [Test]
+        public void TestMissingEncoder()
+        {
+            try
+            {
+                new PhoneticFilterFactory(new Dictionary<String, String>());
+                fail();
+            }
+            catch (ArgumentException expected)
+            {
+                assertTrue(expected.Message.Contains("Configuration Error: missing parameter 'encoder'"));
+            }
+        }
+        [Test]
+        public void TestUnknownEncoder()
+        {
+            try
+            {
+                IDictionary<String, String> args = new Dictionary<String, String>();
+                args.Put("encoder", "XXX");
+                PhoneticFilterFactory factory = new PhoneticFilterFactory(args);
+                factory.Inform(new ClasspathResourceLoader(factory.GetType()));
+                fail();
+            }
+            catch (ArgumentException expected)
+            {
+                assertTrue(expected.Message.Contains("Error loading encoder"));
+            }
+        }
+
+        [Test]
+        public void TestUnknownEncoderReflection()
+        {
+            try
+            {
+                IDictionary<String, String> args = new Dictionary<String, String>();
+                args.Put("encoder", "org.apache.commons.codec.language.NonExistence");
+                PhoneticFilterFactory factory = new PhoneticFilterFactory(args);
+                factory.Inform(new ClasspathResourceLoader(factory.GetType()));
+                fail();
+            }
+            catch (ArgumentException expected)
+            {
+                assertTrue(expected.Message.Contains("Error loading encoder"));
+            }
+        }
+
+        /**
+         * Case: Reflection
+         */
+        [Test]
+        public void TestFactoryReflection()
+        {
+            IDictionary<String, String> args = new Dictionary<String, String>();
+            args.Put(PhoneticFilterFactory.ENCODER, "Metaphone");
+            PhoneticFilterFactory factory = new PhoneticFilterFactory(args);
+            factory.Inform(new ClasspathResourceLoader(factory.GetType()));
+            assertTrue(factory.GetEncoder() is Metaphone);
+            assertTrue(factory.inject); // default
+        }
+
+        /** 
+         * we use "Caverphone2" as it is registered in the REGISTRY as Caverphone,
+         * so this effectively tests reflection without package name
+         */
+        [Test]
+        public void TestFactoryReflectionCaverphone2()
+        {
+            IDictionary<String, String> args = new Dictionary<String, String>();
+            args.Put(PhoneticFilterFactory.ENCODER, "Caverphone2");
+            PhoneticFilterFactory factory = new PhoneticFilterFactory(args);
+            factory.Inform(new ClasspathResourceLoader(factory.GetType()));
+            assertTrue(factory.GetEncoder() is Caverphone2);
+            assertTrue(factory.inject); // default
+        }
+
+        [Test]
+        public void TestFactoryReflectionCaverphone()
+        {
+            IDictionary<String, String> args = new Dictionary<String, String>();
+            args.Put(PhoneticFilterFactory.ENCODER, "Caverphone");
+            PhoneticFilterFactory factory = new PhoneticFilterFactory(args);
+            factory.Inform(new ClasspathResourceLoader(factory.GetType()));
+            assertTrue(factory.GetEncoder() is Caverphone2);
+            assertTrue(factory.inject); // default
+        }
+
+        [Test]
+        public void TestAlgorithms()
+        {
+            assertAlgorithm("Metaphone", "true", "aaa bbb ccc easgasg",
+                new String[] { "A", "aaa", "B", "bbb", "KKK", "ccc", "ESKS", "easgasg" });
+            assertAlgorithm("Metaphone", "false", "aaa bbb ccc easgasg",
+                new String[] { "A", "B", "KKK", "ESKS" });
+
+
+            assertAlgorithm("DoubleMetaphone", "true", "aaa bbb ccc easgasg",
+                new String[] { "A", "aaa", "PP", "bbb", "KK", "ccc", "ASKS", "easgasg" });
+            assertAlgorithm("DoubleMetaphone", "false", "aaa bbb ccc easgasg",
+                new String[] { "A", "PP", "KK", "ASKS" });
+
+
+            assertAlgorithm("Soundex", "true", "aaa bbb ccc easgasg",
+                new String[] { "A000", "aaa", "B000", "bbb", "C000", "ccc", "E220", "easgasg" });
+            assertAlgorithm("Soundex", "false", "aaa bbb ccc easgasg",
+                new String[] { "A000", "B000", "C000", "E220" });
+
+
+            assertAlgorithm("RefinedSoundex", "true", "aaa bbb ccc easgasg",
+                new String[] { "A0", "aaa", "B1", "bbb", "C3", "ccc", "E034034", "easgasg" });
+            assertAlgorithm("RefinedSoundex", "false", "aaa bbb ccc easgasg",
+                new String[] { "A0", "B1", "C3", "E034034" });
+
+
+            assertAlgorithm("Caverphone", "true", "Darda Karleen Datha Carlene",
+                new String[] { "TTA1111111", "Darda", "KLN1111111", "Karleen",
+                "TTA1111111", "Datha", "KLN1111111", "Carlene" });
+            assertAlgorithm("Caverphone", "false", "Darda Karleen Datha Carlene",
+                new String[] { "TTA1111111", "KLN1111111", "TTA1111111", "KLN1111111" });
+
+
+            assertAlgorithm("ColognePhonetic", "true", "Meier Schmitt Meir Schmidt",
+                new String[] { "67", "Meier", "862", "Schmitt",
+                    "67", "Meir", "862", "Schmidt" });
+            assertAlgorithm("ColognePhonetic", "false", "Meier Schmitt Meir Schmidt",
+                new String[] { "67", "862", "67", "862" });
+        }
+
+        /** Test that bogus arguments result in exception */
+        [Test]
+        public void TestBogusArguments()
+        {
+            try
+            {
+                new PhoneticFilterFactory(new Dictionary<String, String>() {
+                    { "encoder", "Metaphone" },
+                    { "bogusArg", "bogusValue" }
+                });
+                fail();
+            }
+            catch (ArgumentException expected)
+            {
+                assertTrue(expected.Message.Contains("Unknown parameters"));
+            }
+        }
+
+        internal static void assertAlgorithm(String algName, String inject, String input,
+            String[] expected)
+        {
+            Tokenizer tokenizer = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
+            IDictionary<String, String> args = new Dictionary<String, String>();
+            args.Put("encoder", algName);
+            args.Put("inject", inject);
+            PhoneticFilterFactory factory = new PhoneticFilterFactory(args);
+            factory.Inform(new ClasspathResourceLoader(factory.GetType()));
+            TokenStream stream = factory.Create(tokenizer);
+            AssertTokenStreamContents(stream, expected);
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Tests.Analysis.Phonetic/project.json
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Phonetic/project.json b/src/Lucene.Net.Tests.Analysis.Phonetic/project.json
new file mode 100644
index 0000000..7bad539
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Phonetic/project.json
@@ -0,0 +1,45 @@
+{
+  "version": "4.8.0",
+  "title": "Lucene.Net.Tests.Analysis.Phonetic",
+  "buildOptions": {
+    "compile": {
+      "includeFiles": [ "../CommonAssemblyInfo.cs" ]
+    },
+    "embed": {
+      "includeFiles": [
+      ]
+    }
+  },
+  "dependencies": {
+    "dotnet-test-nunit-teamcity": "3.4.0-beta-3",
+    "Lucene.Net.Analysis.Phonetic": "4.8.0",
+    "Lucene.Net.TestFramework": "4.8.0",
+    "NUnit": "3.5.0"
+  },
+  "testRunner": "nunit-teamcity",
+  "frameworks": {
+    "netcoreapp1.0": {
+      "imports": "dnxcore50",
+      "buildOptions": {
+        "debugType": "portable",
+        "define": [ "NETSTANDARD" ],
+        "compile": {
+          "excludeFiles": [
+            "Support/TestApiConsistency.cs"
+          ]
+        }
+      }
+    },
+    "net451": {
+      "buildOptions": {
+        "debugType": "full",
+        "define": [ "FEATURE_SERIALIZABLE" ]
+      }
+    }
+  },
+
+  "runtimes": {
+    "win7-x86": {},
+    "win7-x64": {}
+  }
+}


[06/15] lucenenet git commit: Added Lucene.Net.Analysis.Phonetic + tests. Rather than porting over the entire commons-codec library, only the language features were ported and added to this library.

Posted by ni...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Tests.Analysis.Phonetic/DoubleMetaphoneFilterTest.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Phonetic/DoubleMetaphoneFilterTest.cs b/src/Lucene.Net.Tests.Analysis.Phonetic/DoubleMetaphoneFilterTest.cs
new file mode 100644
index 0000000..07e7f66
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Phonetic/DoubleMetaphoneFilterTest.cs
@@ -0,0 +1,111 @@
+using Lucene.Net.Analysis.Core;
+using Lucene.Net.Util;
+using NUnit.Framework;
+using System;
+using System.IO;
+
+namespace Lucene.Net.Analysis.Phonetic
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    public class DoubleMetaphoneFilterTest : BaseTokenStreamTestCase
+    {
+        [Test]
+        public void TestSize4FalseInject()
+        {
+            TokenStream stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("international"));
+            TokenStream filter = new DoubleMetaphoneFilter(stream, 4, false);
+            AssertTokenStreamContents(filter, new String[] { "ANTR" });
+        }
+
+        [Test]
+        public void TestSize4TrueInject()
+        {
+            TokenStream stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("international"));
+            TokenStream filter = new DoubleMetaphoneFilter(stream, 4, true);
+            AssertTokenStreamContents(filter, new String[] { "international", "ANTR" });
+        }
+        [Test]
+        public void TestAlternateInjectFalse()
+        {
+            TokenStream stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("Kuczewski"));
+            TokenStream filter = new DoubleMetaphoneFilter(stream, 4, false);
+            AssertTokenStreamContents(filter, new String[] { "KSSK", "KXFS" });
+        }
+        [Test]
+        public void TestSize8FalseInject()
+        {
+            TokenStream stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("international"));
+            TokenStream filter = new DoubleMetaphoneFilter(stream, 8, false);
+            AssertTokenStreamContents(filter, new String[] { "ANTRNXNL" });
+        }
+        [Test]
+        public void TestNonConvertableStringsWithInject()
+        {
+            TokenStream stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("12345 #$%@#^%&"));
+            TokenStream filter = new DoubleMetaphoneFilter(stream, 8, true);
+            AssertTokenStreamContents(filter, new String[] { "12345", "#$%@#^%&" });
+        }
+
+        [Test]
+        public void TestNonConvertableStringsWithoutInject()
+        {
+            TokenStream stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("12345 #$%@#^%&"));
+            TokenStream filter = new DoubleMetaphoneFilter(stream, 8, false);
+            AssertTokenStreamContents(filter, new String[] { "12345", "#$%@#^%&" });
+
+            // should have something after the stream
+            stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("12345 #$%@#^%& hello"));
+            filter = new DoubleMetaphoneFilter(stream, 8, false);
+            AssertTokenStreamContents(filter, new String[] { "12345", "#$%@#^%&", "HL" });
+        }
+
+        [Test]
+        public void TestRandom()
+        {
+            int codeLen = TestUtil.NextInt(Random(), 1, 8);
+            Analyzer a = Analyzer.NewAnonymous(createComponents: (fieldName, reader) =>
+            {
+                Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+                return new TokenStreamComponents(tokenizer, new DoubleMetaphoneFilter(tokenizer, codeLen, false));
+            });
+
+            CheckRandomData(Random(), a, 1000 * RANDOM_MULTIPLIER);
+
+            Analyzer b = Analyzer.NewAnonymous(createComponents: (fieldName, reader) =>
+            {
+                Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+                return new TokenStreamComponents(tokenizer, new DoubleMetaphoneFilter(tokenizer, codeLen, true));
+            });
+
+            CheckRandomData(Random(), b, 1000 * RANDOM_MULTIPLIER);
+        }
+
+        [Test]
+        public void TestEmptyTerm()
+        {
+            Analyzer a = Analyzer.NewAnonymous(createComponents: (fieldName, reader) =>
+            {
+                Tokenizer tokenizer = new KeywordTokenizer(reader);
+                return new TokenStreamComponents(tokenizer, new DoubleMetaphoneFilter(tokenizer, 8, Random().nextBoolean()));
+            });
+
+            CheckOneTerm(a, "", "");
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Tests.Analysis.Phonetic/Language/Bm/BeiderMorseEncoderTest.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Phonetic/Language/Bm/BeiderMorseEncoderTest.cs b/src/Lucene.Net.Tests.Analysis.Phonetic/Language/Bm/BeiderMorseEncoderTest.cs
new file mode 100644
index 0000000..bd3681b
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Phonetic/Language/Bm/BeiderMorseEncoderTest.cs
@@ -0,0 +1,255 @@
+using NUnit.Framework;
+using System;
+using System.Globalization;
+using System.Text;
+using System.Text.RegularExpressions;
+
+namespace Lucene.Net.Analysis.Phonetic.Language.Bm
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Tests BeiderMorseEncoder.
+    /// </summary>
+    public class BeiderMorseEncoderTest : StringEncoderAbstractTest<BeiderMorseEncoder>
+    {
+        private static readonly char[] TEST_CHARS = new char[] { 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'o', 'u' };
+
+        private void AssertNotEmpty(BeiderMorseEncoder bmpm, string value)
+        {
+            Assert.False(bmpm.Encode(value).Equals(""), value);
+        }
+
+        private BeiderMorseEncoder CreateGenericApproxEncoder()
+        {
+            BeiderMorseEncoder encoder = new BeiderMorseEncoder();
+            encoder.NameType=(NameType.GENERIC);
+            encoder.RuleType=(RuleType.APPROX);
+            return encoder;
+        }
+
+        protected override BeiderMorseEncoder CreateStringEncoder()
+        {
+            return new BeiderMorseEncoder();
+        }
+
+        /**
+         * Tests we do not blow up.
+         *
+         * @throws EncoderException
+         */
+        [Test]
+        public void TestAllChars()
+        {
+            BeiderMorseEncoder bmpm = CreateGenericApproxEncoder();
+            for (char c = char.MinValue; c < char.MaxValue; c++)
+            {
+                bmpm.Encode(c.ToString());
+            }
+        }
+
+        [Test]
+        public void TestAsciiEncodeNotEmpty1Letter()
+        {
+            BeiderMorseEncoder bmpm = CreateGenericApproxEncoder();
+            for (char c = 'a'; c <= 'z'; c++)
+            {
+                string value = c.ToString();
+                string valueU = value.ToUpperInvariant();
+                AssertNotEmpty(bmpm, value);
+                AssertNotEmpty(bmpm, valueU);
+            }
+        }
+
+        [Test]
+        public void TestAsciiEncodeNotEmpty2Letters()
+        {
+            BeiderMorseEncoder bmpm = CreateGenericApproxEncoder();
+            for (char c1 = 'a'; c1 <= 'z'; c1++)
+            {
+                for (char c2 = 'a'; c2 <= 'z'; c2++)
+                {
+                    String value = new String(new char[] { c1, c2 });
+                    String valueU = value.ToUpperInvariant();
+                    AssertNotEmpty(bmpm, value);
+                    AssertNotEmpty(bmpm, valueU);
+                }
+            }
+        }
+
+        [Test]
+        public void TestEncodeAtzNotEmpty()
+        {
+            BeiderMorseEncoder bmpm = CreateGenericApproxEncoder();
+            //String[] names = { "ácz", "átz", "Ignácz", "Ignátz", "Ignác" };
+            String[]
+           names = { "\u00e1cz", "\u00e1tz", "Ign\u00e1cz", "Ign\u00e1tz", "Ign\u00e1c" };
+            foreach (String name in names)
+            {
+                AssertNotEmpty(bmpm, name);
+            }
+        }
+
+        /**
+         * Tests https://issues.apache.org/jira/browse/CODEC-125?focusedCommentId=13071566&page=com.atlassian.jira.plugin.system.issuetabpanels:
+         * comment-tabpanel#comment-13071566
+         *
+         * @throws EncoderException
+         */
+        [Test]
+        public void TestEncodeGna()
+        {
+            BeiderMorseEncoder bmpm = CreateGenericApproxEncoder();
+            bmpm.Encode("gna");
+        }
+
+        [Test]//@Test(expected = IllegalArgumentException.class)
+        public void TestInvalidLangIllegalArgumentException()
+        {
+            Assert.Throws<ArgumentException>(() => Rule.GetInstance(NameType.GENERIC, RuleType.APPROX, "noSuchLanguage"));
+        }
+
+        [Test]//@Test(expected = IllegalStateException.class)
+        public void TestInvalidLangIllegalStateException()
+        {
+            Assert.Throws<InvalidOperationException>(() => Lang.LoadFromResource("thisIsAMadeUpResourceName", Languages.GetInstance(NameType.GENERIC)));
+        }
+
+        [Test]//@Test(expected = IllegalArgumentException.class)
+        public void TestInvalidLanguageIllegalArgumentException()
+        {
+            Assert.Throws<ArgumentException>(() => Languages.GetInstance("thereIsNoSuchLanguage"));
+        }
+
+        [Test]//@Test(timeout = 10000L)
+        public void TestLongestEnglishSurname()
+        {
+            BeiderMorseEncoder bmpm = CreateGenericApproxEncoder();
+            bmpm.Encode("MacGhilleseatheanaich");
+        }
+
+        [Test]//@Test(expected = IndexOutOfBoundsException.class)
+        public void TestNegativeIndexForRuleMatchIndexOutOfBoundsException()
+        {
+            Assert.Throws<ArgumentOutOfRangeException>(() =>
+            {
+                Rule r = new Rule("a", "", "", new Phoneme("", Languages.ANY_LANGUAGE));
+                r.PatternAndContextMatches("bob", -1);
+            });
+        }
+
+        [Test]
+        public void TestOOM()
+        {
+            String phrase = "200697900'-->&#1913348150;</  bceaeef >aadaabcf\"aedfbff<!--\'-->?>cae"
+                       + "cfaaa><?&#<!--</script>&lang&fc;aadeaf?>>&bdquo<    cc =\"abff\"    /></   afe  >"
+                       + "<script><!-- f(';<    cf aefbeef = \"bfabadcf\" ebbfeedd = fccabeb >";
+
+            BeiderMorseEncoder encoder = new BeiderMorseEncoder();
+            encoder.NameType=(NameType.GENERIC);
+            encoder.RuleType=(RuleType.EXACT);
+            encoder.SetMaxPhonemes(10);
+
+            String phonemes = encoder.Encode(phrase);
+            Assert.True(phonemes.Length > 0);
+
+            String[] phonemeArr = new Regex("\\|").Split(phonemes);
+            Assert.True(phonemeArr.Length <= 10);
+        }
+
+        [Test]
+        public void TestSetConcat()
+        {
+            BeiderMorseEncoder bmpm = new BeiderMorseEncoder();
+            bmpm.IsConcat=(false);
+            Assert.False(bmpm.IsConcat, "Should be able to set concat to false");
+        }
+
+        [Test]
+        public void TestSetNameTypeAsh()
+        {
+            BeiderMorseEncoder bmpm = new BeiderMorseEncoder();
+            bmpm.NameType=(NameType.ASHKENAZI);
+            Assert.AreEqual(NameType.ASHKENAZI, bmpm.NameType, "Name type should have been set to ash");
+        }
+
+        [Test]
+        public void TestSetRuleTypeExact()
+        {
+            BeiderMorseEncoder bmpm = new BeiderMorseEncoder();
+            bmpm.RuleType=(RuleType.EXACT);
+            Assert.AreEqual(RuleType.EXACT, bmpm.RuleType, "Rule type should have been set to exact");
+        }
+
+        [Test]//@Test(expected = IllegalArgumentException.class)
+        public void TestSetRuleTypeToRulesIllegalArgumentException()
+        {
+            Assert.Throws<ArgumentException>(() =>
+            {
+                BeiderMorseEncoder bmpm = new BeiderMorseEncoder();
+                bmpm.RuleType=(RuleType.RULES);
+            });
+        }
+
+        /**
+         * (Un)luckily, the worse performing test because of the data in {@link #TEST_CHARS}
+         *
+         * @throws EncoderException
+         */
+        [Test]/* timeout = 20000L */
+        public void TestSpeedCheck()
+        {
+            BeiderMorseEncoder bmpm = this.CreateGenericApproxEncoder();
+            StringBuilder stringBuffer = new StringBuilder();
+            stringBuffer.append(TEST_CHARS[0]);
+            for (int i = 0, j = 1; i < 40; i++, j++)
+            {
+                if (j == TEST_CHARS.Length)
+                {
+                    j = 0;
+                }
+                bmpm.Encode(stringBuffer.toString());
+                stringBuffer.append(TEST_CHARS[j]);
+            }
+        }
+
+        [Test]
+        public void TestSpeedCheck2()
+        {
+            BeiderMorseEncoder bmpm = this.CreateGenericApproxEncoder();
+            String phrase = "ItstheendoftheworldasweknowitandIfeelfine";
+
+            for (int i = 1; i <= phrase.Length; i++)
+            {
+                bmpm.Encode(phrase.Substring(0, i));
+            }
+        }
+
+        [Test]
+        public void TestSpeedCheck3()
+        {
+            BeiderMorseEncoder bmpm = this.CreateGenericApproxEncoder();
+            String phrase = "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz";
+
+            for (int i = 1; i <= phrase.Length; i++)
+            {
+                bmpm.Encode(phrase.Substring(0, i));
+            }
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Tests.Analysis.Phonetic/Language/Bm/CacheSubSequencePerformanceTest.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Phonetic/Language/Bm/CacheSubSequencePerformanceTest.cs b/src/Lucene.Net.Tests.Analysis.Phonetic/Language/Bm/CacheSubSequencePerformanceTest.cs
new file mode 100644
index 0000000..45e9513
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Phonetic/Language/Bm/CacheSubSequencePerformanceTest.cs
@@ -0,0 +1,138 @@
+using Lucene.Net.Attributes;
+using Lucene.Net.Support;
+using NUnit.Framework;
+using System;
+using System.Text;
+
+namespace Lucene.Net.Analysis.Phonetic.Language.Bm
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    public class CacheSubSequencePerformanceTest
+    {
+        [Test, LongRunningTest]
+        public void Test()
+        {
+            //int times = 10000000;
+            int times = 100000; // LUCENENET: 10 million times would take several minutes to run - decreasing to 100,000
+            Console.WriteLine("Test with String : ");
+            Test("Angelo", times);
+            Console.WriteLine("Test with StringBuilder : ");
+            Test(new StringBuilder("Angelo"), times);
+            Console.WriteLine("Test with cached String : ");
+            Test(CacheSubSequence("Angelo").ToString(), times);
+            Console.WriteLine("Test with cached StringBuilder : ");
+            Test(CacheSubSequence(new StringBuilder("Angelo")).ToString(), times);
+        }
+
+        private void Test(string input, int times)
+        {
+            long beginTime = DateTime.UtcNow.Ticks;
+            for (int i = 0; i < times; i++)
+            {
+                Test(input);
+            }
+            Console.WriteLine(DateTime.UtcNow.Ticks - beginTime + " millis");
+        }
+
+        private void Test(StringBuilder input, int times)
+        {
+            long beginTime = DateTime.UtcNow.Ticks;
+            for (int i = 0; i < times; i++)
+            {
+                Test(input);
+            }
+            Console.WriteLine(DateTime.UtcNow.Ticks - beginTime + " millis");
+        }
+
+        private void Test(string input)
+        {
+            for (int i = 0; i < input.Length; i++)
+            {
+                for (int j = i; j <= input.Length; j++)
+                {
+                    input.Substring(i, (j - i));
+                }
+            }
+        }
+
+        private void Test(StringBuilder input)
+        {
+            for (int i = 0; i < input.Length; i++)
+            {
+                for (int j = i; j <= input.Length; j++)
+                {
+                    input.ToString(i, (j - i));
+                }
+            }
+        }
+
+        private class CachedCharSequence : ICharSequence
+        {
+            private readonly string[][] cache;
+            private readonly string cached;
+            public CachedCharSequence(string[][] cache, string cached)
+            {
+                this.cache = cache;
+                this.cached = cached;
+            }
+            public char this[int index]
+            {
+                get
+                {
+                    return cached[index];
+                }
+            }
+
+            public int Length
+            {
+                get
+                {
+                    return cached.Length;
+                }
+            }
+
+            public ICharSequence SubSequence(int start, int end)
+            {
+                if (start == end)
+                {
+                    return "".ToCharSequence();
+                }
+                string res = cache[start][end - 1];
+                if (res == null)
+                {
+                    res = cached.Substring(start, end - start);
+                    cache[start][end - 1] = res;
+                }
+                return res.ToCharSequence();
+            }
+        }
+
+        private ICharSequence CacheSubSequence(string cached)
+        {
+            string[][] cache = Support.RectangularArrays.ReturnRectangularArray<string>(cached.Length, cached.Length);
+            return new CachedCharSequence(cache, cached);
+        }
+
+        private ICharSequence CacheSubSequence(StringBuilder cached)
+        {
+            string[][] cache = Support.RectangularArrays.ReturnRectangularArray<string>(cached.Length, cached.Length);
+            return new CachedCharSequence(cache, cached.ToString());
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Tests.Analysis.Phonetic/Language/Bm/LanguageGuessingTest.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Phonetic/Language/Bm/LanguageGuessingTest.cs b/src/Lucene.Net.Tests.Analysis.Phonetic/Language/Bm/LanguageGuessingTest.cs
new file mode 100644
index 0000000..d50c6f7
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Phonetic/Language/Bm/LanguageGuessingTest.cs
@@ -0,0 +1,84 @@
+using Lucene.Net.Support;
+using NUnit.Framework;
+using System;
+using System.Collections.Generic;
+
+namespace Lucene.Net.Analysis.Phonetic.Language.Bm
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Tests guessLanguages API.
+    /// <para/>
+    /// since 1.6
+    /// </summary>
+    public class LanguageGuessingTest
+    {
+        private static string EXACT = "exact";
+        private static string ONE_OF = "one of";
+
+        public static List<object[]> Values =  Arrays.AsList(new object[][] {
+                new object[] { "Renault", "french", EXACT },
+                new object[] { "Mickiewicz", "polish", EXACT },
+                new object[] { "Thompson", "english", ONE_OF }, // this also hits german and greeklatin
+                new object[] { "Nu\u00f1ez", "spanish", EXACT }, // Nuñez
+                new object[] { "Carvalho", "portuguese", EXACT },
+                new object[] { "\u010capek", "czech", EXACT }, // Čapek
+                new object[] { "Sjneijder", "dutch", EXACT },
+                new object[] { "Klausewitz", "german", EXACT },
+                new object[] { "K\u00fc\u00e7\u00fck", "turkish", EXACT }, // Küçük
+                new object[] { "Giacometti", "italian", EXACT },
+                new object[] { "Nagy", "hungarian", EXACT },
+                new object[] { "Ceau\u015fescu", "romanian", EXACT }, // Ceauşescu
+                new object[] { "Angelopoulos", "greeklatin", EXACT },
+                new object[] { "\u0391\u03b3\u03b3\u03b5\u03bb\u03cc\u03c0\u03bf\u03c5\u03bb\u03bf\u03c2", "greek", EXACT }, // Αγγελόπουλος
+                new object[] { "\u041f\u0443\u0448\u043a\u0438\u043d", "cyrillic", EXACT }, // Пушкин
+                new object[] { "\u05db\u05d4\u05df", "hebrew", EXACT }, // כהן
+                new object[] { "\u00e1cz", "any", EXACT }, // ácz
+                new object[] { "\u00e1tz", "any", EXACT } // átz
+        });
+            
+       
+
+        //private readonly String exactness;
+
+        private readonly Lang lang = Lang.GetInstance(NameType.GENERIC);
+        //private readonly String language;
+        //private readonly String name;
+
+        //[TestCaseSource("Values")]
+        //public LanguageGuessingTest(String name, String language, String exactness)
+        //{
+        //    this.name = name;
+        //    this.language = language;
+        //    this.exactness = exactness;
+        //}
+
+        [Test]
+        [TestCaseSource("Values")]
+        public void TestLanguageGuessing(String name, String language, String exactness)
+        {
+            LanguageSet guesses = this.lang.GuessLanguages(name);
+
+            Assert.True(guesses.Contains(language),
+                "language predicted for name '" + name + "' is wrong: " + guesses + " should contain '" + language + "'"
+                    );
+
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Tests.Analysis.Phonetic/Language/Bm/PhoneticEnginePerformanceTest.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Phonetic/Language/Bm/PhoneticEnginePerformanceTest.cs b/src/Lucene.Net.Tests.Analysis.Phonetic/Language/Bm/PhoneticEnginePerformanceTest.cs
new file mode 100644
index 0000000..7b8b400
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Phonetic/Language/Bm/PhoneticEnginePerformanceTest.cs
@@ -0,0 +1,141 @@
+using NUnit.Framework;
+using System;
+
+namespace Lucene.Net.Analysis.Phonetic.Language.Bm
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /**
+     * Tests performance for {@link PhoneticEngine}.
+     * <p>
+     * See <a href="https://issues.apache.org/jira/browse/CODEC-174">[CODEC-174] Improve performance of Beider Morse
+     * encoder</a>.
+     * </p>
+     * <p>
+     * Results for November 7, 2013, project SVN revision 1539678.
+     * </p>
+     * <p>
+     * Environment:
+     * </p>
+     * <ul>
+     * <li>java version "1.7.0_45"</li>
+     * <li>Java(TM) SE Runtime Environment (build 1.7.0_45-b18)</li>
+     * <li>Java HotSpot(TM) 64-Bit Server VM (build 24.45-b08, mixed mode)</li>
+     * <li>OS name: "windows 7", version: "6.1", arch: "amd64", family: "windows")</li>
+     * </ul>
+     * <ol>
+     * <li>Time for encoding 80,000 times the input 'Angelo': 33,039 millis.</li>
+     * <li>Time for encoding 80,000 times the input 'Angelo': 32,297 millis.</li>
+     * <li>Time for encoding 80,000 times the input 'Angelo': 32,857 millis.</li>
+     * <li>Time for encoding 80,000 times the input 'Angelo': <b>31,561 millis.</b></li>
+     * <li>Time for encoding 80,000 times the input 'Angelo': 32,665 millis.</li>
+     * <li>Time for encoding 80,000 times the input 'Angelo': 32,215 millis.</li>
+     * </ol>
+     * <p>
+     * On this file's revision 1539678, with patch <a
+     * href="https://issues.apache.org/jira/secure/attachment/12611963/CODEC-174-change-rules-storage-to-Map.patch"
+     * >CODEC-174-change-rules-storage-to-Map</a>:
+     * </p>
+     * <ol>
+     * <li>Time for encoding 80,000 times the input 'Angelo': 18,196 millis.</li>
+     * <li>Time for encoding 80,000 times the input 'Angelo': 13,858 millis.</li>
+     * <li>Time for encoding 80,000 times the input 'Angelo': 13,644 millis.</li>
+     * <li>Time for encoding 80,000 times the input 'Angelo': <b>13,591 millis.</b></li>
+     * <li>Time for encoding 80,000 times the input 'Angelo': 13,861 millis.</li>
+     * <li>Time for encoding 80,000 times the input 'Angelo': 13,696 millis.</li>
+     * </ol>
+     * <p>
+     * Patch applied, committed revision 1539783.
+     * </p>
+     * <p>
+     * On this file's revision 1539783, with patch <a
+     * href="https://issues.apache.org/jira/secure/attachment/12611962/CODEC-174-delete-subsequence-cache.patch"
+     * >CODEC-174-delete-subsequence-cache.patch</a>:
+     * </p>
+     * <ol>
+     * <li>Time for encoding 80,000 times the input 'Angelo': 13,547 millis.</li>
+     * <li>Time for encoding 80,000 times the input 'Angelo': <b>13,501 millis.</b></li>
+     * <li>Time for encoding 80,000 times the input 'Angelo': 13,528 millis.</li>
+     * <li>Time for encoding 80,000 times the input 'Angelo': 17,110 millis.</li>
+     * <li>Time for encoding 80,000 times the input 'Angelo': 13,910 millis.</li>
+     * <li>Time for encoding 80,000 times the input 'Angelo': 16,969 millis.</li>
+     * </ol>
+     * <p>
+     * Patch not applied.
+     * </p>
+     * <p>
+     * On this file's revision 1539787, with patch <a
+     * href="https://issues.apache.org/jira/secure/attachment/12612178/CODEC-174-reuse-set-in-PhonemeBuilder.patch"
+     * >CODEC-174-reuse-set-in-PhonemeBuilder.patch</a>:
+     * </p>
+     * <ol>
+     * <li>Time for encoding 80,000 times the input 'Angelo': 13,724 millis.</li>
+     * <li>Time for encoding 80,000 times the input 'Angelo': 13,451 millis.</li>
+     * <li>Time for encoding 80,000 times the input 'Angelo': 13,742 millis.</li>
+     * <li>Time for encoding 80,000 times the input 'Angelo': <b>13,186 millis.</b></li>
+     * <li>Time for encoding 80,000 times the input 'Angelo': 13,600 millis.</li>
+     * <li>Time for encoding 80,000 times the input 'Angelo': 16,405 millis.</li>
+     * </ol>
+     * <p>
+     * Patch applied, committed revision 1539788.
+     * </p>
+     * <p>
+     * Before patch https://issues.apache.org/jira/secure/attachment/12613371/CODEC-174-refactor-restrictTo-method-in-SomeLanguages.patch
+     * </p>
+     * <ol>
+     * <li>Time for encoding 80,000 times the input 'Angelo': 13,133 millis.</li>
+     * <li>Time for encoding 80,000 times the input 'Angelo': 13,064 millis.</li>
+     * <li>Time for encoding 80,000 times the input 'Angelo': <b>12,838 millis.</b></li>
+     * <li>Time for encoding 80,000 times the input 'Angelo': 12,970 millis.</li>
+     * <li>Time for encoding 80,000 times the input 'Angelo': 13,122 millis.</li>
+     * <li>Time for encoding 80,000 times the input 'Angelo': 13,293 millis.</li>
+     * </ol>
+     * <p>
+     * After patch https://issues.apache.org/jira/secure/attachment/12613371/CODEC-174-refactor-restrictTo-method-in-SomeLanguages.patch
+     * </p>
+     * <ol>
+     * <li>Time for encoding 80,000 times the input 'Angelo': 11,576 millis.</li>
+     * <li>Time for encoding 80,000 times the input 'Angelo': 11,506 millis.</li>
+     * <li>Time for encoding 80,000 times the input 'Angelo': 11,361 millis.</li>
+     * <li>Time for encoding 80,000 times the input 'Angelo': <b>11,142 millis.</b></li>
+     * <li>Time for encoding 80,000 times the input 'Angelo': 11,430 millis.</li>
+     * <li>Time for encoding 80,000 times the input 'Angelo': 11,297 millis.</li>
+     * </ol>
+     * <p>
+     * Patch applied, committed revision 1541234.
+     * </p>
+     */
+    public class PhoneticEnginePerformanceTest
+    {
+        private static readonly int LOOP = 80000;
+
+        [Test]
+        public void Test()
+        {
+            PhoneticEngine engine = new PhoneticEngine(NameType.GENERIC, RuleType.APPROX, true);
+            String input = "Angelo";
+            long startMillis = DateTime.UtcNow.Ticks;
+            for (int i = 0; i < LOOP; i++)
+            {
+                engine.Encode(input);
+            }
+            long totalMillis = DateTime.UtcNow.Ticks - startMillis;
+            Console.WriteLine(String.Format("Time for encoding {0} times the input '{1}': {2} millis.", LOOP, input, totalMillis));
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Tests.Analysis.Phonetic/Language/Bm/PhoneticEngineRegressionTest.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Phonetic/Language/Bm/PhoneticEngineRegressionTest.cs b/src/Lucene.Net.Tests.Analysis.Phonetic/Language/Bm/PhoneticEngineRegressionTest.cs
new file mode 100644
index 0000000..cb9a40d
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Phonetic/Language/Bm/PhoneticEngineRegressionTest.cs
@@ -0,0 +1,234 @@
+using Lucene.Net.Support;
+using NUnit.Framework;
+using System;
+using System.Collections.Generic;
+
+namespace Lucene.Net.Analysis.Phonetic.Language.Bm
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Tests <see cref="PhoneticEngine"/> and <see cref="LanguageSet"/> in ways very similar to code found in solr-3.6.0.
+    /// <para/>
+    /// since 1.7
+    /// </summary>
+    public class PhoneticEngineRegressionTest
+    {
+        [Test]
+        public void TestSolrGENERIC()
+        {
+            IDictionary<String, String> args;
+
+            // concat is true, ruleType is EXACT
+            args = new SortedDictionary<String, String>();
+            args.Put("nameType", "GENERIC");
+            Assert.AreEqual(Encode(args, true, "Angelo"), "agilo|angilo|aniilo|anilo|anxilo|anzilo|ogilo|ongilo|oniilo|onilo|onxilo|onzilo");
+            args.Put("ruleType", "EXACT");
+            Assert.AreEqual(Encode(args, true, "Angelo"), "anZelo|andZelo|angelo|anhelo|anjelo|anxelo");
+            Assert.AreEqual(Encode(args, true, "D'Angelo"), "(anZelo|andZelo|angelo|anhelo|anjelo|anxelo)-(danZelo|dandZelo|dangelo|danhelo|danjelo|danxelo)");
+            args.Put("languageSet", "italian,greek,spanish");
+            Assert.AreEqual(Encode(args, true, "Angelo"), "andZelo|angelo|anxelo");
+            Assert.AreEqual(Encode(args, true, "1234"), "");
+
+            // concat is false, ruleType is EXACT
+            args = new SortedDictionary<String, String>();
+            Assert.AreEqual(Encode(args, false, "Angelo"), "agilo|angilo|aniilo|anilo|anxilo|anzilo|ogilo|ongilo|oniilo|onilo|onxilo|onzilo");
+            args.Put("ruleType", "EXACT");
+            Assert.AreEqual(Encode(args, false, "Angelo"), "anZelo|andZelo|angelo|anhelo|anjelo|anxelo");
+            Assert.AreEqual(Encode(args, false, "D'Angelo"), "(anZelo|andZelo|angelo|anhelo|anjelo|anxelo)-(danZelo|dandZelo|dangelo|danhelo|danjelo|danxelo)");
+            args.Put("languageSet", "italian,greek,spanish");
+            Assert.AreEqual(Encode(args, false, "Angelo"), "andZelo|angelo|anxelo");
+            Assert.AreEqual(Encode(args, false, "1234"), "");
+
+            // concat is true, ruleType is APPROX
+            args = new SortedDictionary<String, String>();
+            Assert.AreEqual(Encode(args, true, "Angelo"), "agilo|angilo|aniilo|anilo|anxilo|anzilo|ogilo|ongilo|oniilo|onilo|onxilo|onzilo");
+            args.Put("ruleType", "APPROX");
+            Assert.AreEqual(Encode(args, true, "Angelo"), "agilo|angilo|aniilo|anilo|anxilo|anzilo|ogilo|ongilo|oniilo|onilo|onxilo|onzilo");
+            Assert.AreEqual(Encode(args, true, "D'Angelo"), "(agilo|angilo|aniilo|anilo|anxilo|anzilo|ogilo|ongilo|oniilo|onilo|onxilo|onzilo)-(dagilo|dangilo|daniilo|danilo|danxilo|danzilo|dogilo|dongilo|doniilo|donilo|donxilo|donzilo)");
+            args.Put("languageSet", "italian,greek,spanish");
+            Assert.AreEqual(Encode(args, true, "Angelo"), "angilo|anxilo|anzilo|ongilo|onxilo|onzilo");
+            Assert.AreEqual(Encode(args, true, "1234"), "");
+
+            // concat is false, ruleType is APPROX
+            args = new SortedDictionary<String, String>();
+            Assert.AreEqual(Encode(args, false, "Angelo"), "agilo|angilo|aniilo|anilo|anxilo|anzilo|ogilo|ongilo|oniilo|onilo|onxilo|onzilo");
+            args.Put("ruleType", "APPROX");
+            Assert.AreEqual(Encode(args, false, "Angelo"), "agilo|angilo|aniilo|anilo|anxilo|anzilo|ogilo|ongilo|oniilo|onilo|onxilo|onzilo");
+            Assert.AreEqual(Encode(args, false, "D'Angelo"), "(agilo|angilo|aniilo|anilo|anxilo|anzilo|ogilo|ongilo|oniilo|onilo|onxilo|onzilo)-(dagilo|dangilo|daniilo|danilo|danxilo|danzilo|dogilo|dongilo|doniilo|donilo|donxilo|donzilo)");
+            args.Put("languageSet", "italian,greek,spanish");
+            Assert.AreEqual(Encode(args, false, "Angelo"), "angilo|anxilo|anzilo|ongilo|onxilo|onzilo");
+            Assert.AreEqual(Encode(args, false, "1234"), "");
+        }
+
+        [Test]
+        public void TestSolrASHKENAZI()
+        {
+            IDictionary<String, String> args;
+
+            // concat is true, ruleType is EXACT
+            args = new SortedDictionary<String, String>();
+            args.Put("nameType", "ASHKENAZI");
+            Assert.AreEqual(Encode(args, true, "Angelo"), "AnElO|AnSelO|AngElO|AngzelO|AnkselO|AnzelO");
+            args.Put("ruleType", "EXACT");
+            Assert.AreEqual(Encode(args, true, "Angelo"), "andZelo|angelo|anhelo|anxelo");
+            Assert.AreEqual(Encode(args, true, "D'Angelo"), "dandZelo|dangelo|danhelo|danxelo");
+            args.Put("languageSet", "italian,greek,spanish");
+            Assert.AreEqual(Encode(args, true, "Angelo"), "angelo|anxelo");
+            Assert.AreEqual(Encode(args, true, "1234"), "");
+
+            // concat is false, ruleType is EXACT
+            args = new SortedDictionary<String, String>();
+            args.Put("nameType", "ASHKENAZI");
+            Assert.AreEqual(Encode(args, false, "Angelo"), "AnElO|AnSelO|AngElO|AngzelO|AnkselO|AnzelO");
+            args.Put("ruleType", "EXACT");
+            Assert.AreEqual(Encode(args, false, "Angelo"), "andZelo|angelo|anhelo|anxelo");
+            Assert.AreEqual(Encode(args, false, "D'Angelo"), "dandZelo|dangelo|danhelo|danxelo");
+            args.Put("languageSet", "italian,greek,spanish");
+            Assert.AreEqual(Encode(args, false, "Angelo"), "angelo|anxelo");
+            Assert.AreEqual(Encode(args, false, "1234"), "");
+
+            // concat is true, ruleType is APPROX
+            args = new SortedDictionary<String, String>();
+            args.Put("nameType", "ASHKENAZI");
+            Assert.AreEqual(Encode(args, true, "Angelo"), "AnElO|AnSelO|AngElO|AngzelO|AnkselO|AnzelO");
+            args.Put("ruleType", "APPROX");
+            Assert.AreEqual(Encode(args, true, "Angelo"), "AnElO|AnSelO|AngElO|AngzelO|AnkselO|AnzelO");
+            Assert.AreEqual(Encode(args, true, "D'Angelo"), "dAnElO|dAnSelO|dAngElO|dAngzelO|dAnkselO|dAnzelO");
+            args.Put("languageSet", "italian,greek,spanish");
+            Assert.AreEqual(Encode(args, true, "Angelo"), "AnSelO|AngElO|AngzelO|AnkselO");
+            Assert.AreEqual(Encode(args, true, "1234"), "");
+
+            // concat is false, ruleType is APPROX
+            args = new SortedDictionary<String, String>();
+            args.Put("nameType", "ASHKENAZI");
+            Assert.AreEqual(Encode(args, false, "Angelo"), "AnElO|AnSelO|AngElO|AngzelO|AnkselO|AnzelO");
+            args.Put("ruleType", "APPROX");
+            Assert.AreEqual(Encode(args, false, "Angelo"), "AnElO|AnSelO|AngElO|AngzelO|AnkselO|AnzelO");
+            Assert.AreEqual(Encode(args, false, "D'Angelo"), "dAnElO|dAnSelO|dAngElO|dAngzelO|dAnkselO|dAnzelO");
+            args.Put("languageSet", "italian,greek,spanish");
+            Assert.AreEqual(Encode(args, false, "Angelo"), "AnSelO|AngElO|AngzelO|AnkselO");
+            Assert.AreEqual(Encode(args, false, "1234"), "");
+        }
+
+        [Test]
+        public void TestSolrSEPHARDIC()
+        {
+            IDictionary<String, String> args;
+
+            // concat is true, ruleType is EXACT
+            args = new SortedDictionary<String, String>();
+            args.Put("nameType", "SEPHARDIC");
+            Assert.AreEqual(Encode(args, true, "Angelo"), "anhila|anhilu|anzila|anzilu|nhila|nhilu|nzila|nzilu");
+            args.Put("ruleType", "EXACT");
+            Assert.AreEqual(Encode(args, true, "Angelo"), "anZelo|andZelo|anxelo");
+            Assert.AreEqual(Encode(args, true, "D'Angelo"), "anZelo|andZelo|anxelo");
+            args.Put("languageSet", "italian,greek,spanish");
+            Assert.AreEqual(Encode(args, true, "Angelo"), "andZelo|anxelo");
+            Assert.AreEqual(Encode(args, true, "1234"), "");
+
+            // concat is false, ruleType is EXACT
+            args = new SortedDictionary<String, String>();
+            args.Put("nameType", "SEPHARDIC");
+            Assert.AreEqual(Encode(args, false, "Angelo"), "anhila|anhilu|anzila|anzilu|nhila|nhilu|nzila|nzilu");
+            args.Put("ruleType", "EXACT");
+            Assert.AreEqual(Encode(args, false, "Angelo"), "anZelo|andZelo|anxelo");
+            Assert.AreEqual(Encode(args, false, "D'Angelo"), "danZelo|dandZelo|danxelo");
+            args.Put("languageSet", "italian,greek,spanish");
+            Assert.AreEqual(Encode(args, false, "Angelo"), "andZelo|anxelo");
+            Assert.AreEqual(Encode(args, false, "1234"), "");
+
+            // concat is true, ruleType is APPROX
+            args = new SortedDictionary<String, String>();
+            args.Put("nameType", "SEPHARDIC");
+            Assert.AreEqual(Encode(args, true, "Angelo"), "anhila|anhilu|anzila|anzilu|nhila|nhilu|nzila|nzilu");
+            args.Put("ruleType", "APPROX");
+            Assert.AreEqual(Encode(args, true, "Angelo"), "anhila|anhilu|anzila|anzilu|nhila|nhilu|nzila|nzilu");
+            Assert.AreEqual(Encode(args, true, "D'Angelo"), "anhila|anhilu|anzila|anzilu|nhila|nhilu|nzila|nzilu");
+            args.Put("languageSet", "italian,greek,spanish");
+            Assert.AreEqual(Encode(args, true, "Angelo"), "anhila|anhilu|anzila|anzilu|nhila|nhilu|nzila|nzilu");
+            Assert.AreEqual(Encode(args, true, "1234"), "");
+
+            // concat is false, ruleType is APPROX
+            args = new SortedDictionary<String, String>();
+            args.Put("nameType", "SEPHARDIC");
+            Assert.AreEqual(Encode(args, false, "Angelo"), "anhila|anhilu|anzila|anzilu|nhila|nhilu|nzila|nzilu");
+            args.Put("ruleType", "APPROX");
+            Assert.AreEqual(Encode(args, false, "Angelo"), "anhila|anhilu|anzila|anzilu|nhila|nhilu|nzila|nzilu");
+            Assert.AreEqual(Encode(args, false, "D'Angelo"), "danhila|danhilu|danzila|danzilu|nhila|nhilu|nzila|nzilu");
+            args.Put("languageSet", "italian,greek,spanish");
+            Assert.AreEqual(Encode(args, false, "Angelo"), "anhila|anhilu|anzila|anzilu|nhila|nhilu|nzila|nzilu");
+            Assert.AreEqual(Encode(args, false, "1234"), "");
+        }
+
+        /**
+         * This code is similar in style to code found in Solr:
+         * solr/core/src/java/org/apache/solr/analysis/BeiderMorseFilterFactory.java
+         *
+         * Making a JUnit test out of it to protect Solr from possible future
+         * regressions in Commons-Codec.
+         */
+        private static String Encode(IDictionary<String, String> args, bool concat, String input)
+        {
+            LanguageSet languageSet;
+            PhoneticEngine engine;
+
+            // PhoneticEngine = NameType + RuleType + concat
+            // we use common-codec's defaults: GENERIC + APPROX + true
+            String nameTypeArg;
+            args.TryGetValue("nameType", out nameTypeArg);
+            NameType nameType = (nameTypeArg == null) ? NameType.GENERIC : (NameType)Enum.Parse(typeof(NameType), nameTypeArg, true);
+
+            String ruleTypeArg;
+            args.TryGetValue("ruleType", out ruleTypeArg);
+            RuleType ruleType = (ruleTypeArg == null) ? RuleType.APPROX : (RuleType)Enum.Parse(typeof(RuleType), ruleTypeArg, true);
+
+            engine = new PhoneticEngine(nameType, ruleType, concat);
+
+            // LanguageSet: defaults to automagic, otherwise a comma-separated list.
+            String languageSetArg;
+            args.TryGetValue("languageSet", out languageSetArg);
+            if (languageSetArg == null || languageSetArg.equals("auto"))
+            {
+                languageSet = null;
+            }
+            else
+            {
+                languageSet = LanguageSet.From(new HashSet<String>(Arrays.AsList(languageSetArg.Split(new string[] { "," }, StringSplitOptions.RemoveEmptyEntries))));
+            }
+
+            /*
+                org/apache/lucene/analysis/phonetic/BeiderMorseFilter.java (lines 96-98) does this:
+
+                encoded = (languages == null)
+                    ? engine.encode(termAtt.toString())
+                    : engine.encode(termAtt.toString(), languages);
+
+                Hence our approach, below:
+            */
+            if (languageSet == null)
+            {
+                return engine.Encode(input);
+            }
+            else
+            {
+                return engine.Encode(input, languageSet);
+            }
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Tests.Analysis.Phonetic/Language/Bm/PhoneticEngineTest.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Phonetic/Language/Bm/PhoneticEngineTest.cs b/src/Lucene.Net.Tests.Analysis.Phonetic/Language/Bm/PhoneticEngineTest.cs
new file mode 100644
index 0000000..281fc45
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Phonetic/Language/Bm/PhoneticEngineTest.cs
@@ -0,0 +1,89 @@
+using NUnit.Framework;
+using System;
+using System.Collections.Generic;
+using System.Text.RegularExpressions;
+
+namespace Lucene.Net.Analysis.Phonetic.Language.Bm
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    public class PhoneticEngineTest
+    {
+        private static readonly int TEN = 10;
+
+        public static List<Object[]> Values = new List<object[]> { new Object[] { "Renault", "rinD|rinDlt|rina|rinalt|rino|rinolt|rinu|rinult", NameType.GENERIC, RuleType.APPROX, true, TEN },
+                            new Object[] { "Renault", "rYnDlt|rYnalt|rYnult|rinDlt|rinalt|rinult", NameType.ASHKENAZI, RuleType.APPROX, true, TEN },
+                            new Object[] { "Renault", "rYnDlt", NameType.ASHKENAZI, RuleType.APPROX, true, 1 },
+                            new Object[] { "Renault", "rinDlt", NameType.SEPHARDIC, RuleType.APPROX, true, TEN },
+                            new Object[] { "SntJohn-Smith", "sntjonsmit", NameType.GENERIC, RuleType.EXACT, true, TEN },
+                            new Object[] { "d'ortley", "(ortlaj|ortlej)-(dortlaj|dortlej)", NameType.GENERIC, RuleType.EXACT, true, TEN },
+                            new Object[] {
+                                "van helsing",
+                                "(elSink|elsink|helSink|helsink|helzink|xelsink)-(banhelsink|fanhelsink|fanhelzink|vanhelsink|vanhelzink|vanjelsink)",
+                                NameType.GENERIC,
+                                RuleType.EXACT,
+                                false, TEN } };
+
+        //    private readonly bool concat;
+        //private readonly String name;
+        //private readonly NameType nameType;
+        //private readonly String phoneticExpected;
+        //private readonly RuleType ruleType;
+        //private readonly int maxPhonemes;
+
+        //    public PhoneticEngineTest(String name, String phoneticExpected, NameType nameType,
+        //                              RuleType ruleType, bool concat, int maxPhonemes)
+        //    {
+        //        this.name = name;
+        //        this.phoneticExpected = phoneticExpected;
+        //        this.nameType = nameType;
+        //        this.ruleType = ruleType;
+        //        this.concat = concat;
+        //        this.maxPhonemes = maxPhonemes;
+        //    }
+
+        [Test]//@Test(timeout = 10000L)
+        [TestCaseSource("Values")]
+        public void TestEncode(String name, String phoneticExpected, NameType nameType,
+                                      RuleType ruleType, bool concat, int maxPhonemes)
+        {
+            PhoneticEngine engine = new PhoneticEngine(nameType, ruleType, concat, maxPhonemes);
+
+            String phoneticActual = engine.Encode(name);
+
+            //System.err.println("expecting: " + this.phoneticExpected);
+            //System.err.println("actual:    " + phoneticActual);
+            Assert.AreEqual(phoneticExpected, phoneticActual, "phoneme incorrect");
+
+            if (concat)
+            {
+                String[] split = new Regex("\\|").Split(phoneticActual);
+                Assert.True(split.Length <= maxPhonemes);
+            }
+            else
+            {
+                String[] words = phoneticActual.Split(new string[] { "-" }, StringSplitOptions.RemoveEmptyEntries);
+                foreach (String word in words)
+                {
+                    String[] split = new Regex("\\|").Split(word);
+                    Assert.True(split.Length <= maxPhonemes);
+                }
+            }
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Tests.Analysis.Phonetic/Language/Bm/RuleTest.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Phonetic/Language/Bm/RuleTest.cs b/src/Lucene.Net.Tests.Analysis.Phonetic/Language/Bm/RuleTest.cs
new file mode 100644
index 0000000..fd2e8a2
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Phonetic/Language/Bm/RuleTest.cs
@@ -0,0 +1,163 @@
+using NUnit.Framework;
+using System;
+
+namespace Lucene.Net.Analysis.Phonetic.Language.Bm
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Tests Rule.
+    /// <para/>
+    /// since 1.6
+    /// </summary>
+    public class RuleTest
+    {
+        //    private static class NegativeIntegerBaseMatcher : BaseMatcher<Integer> {
+        //        @Override
+        //    public void describeTo(final Description description)
+        //    {
+        //        description.appendText("value should be negative");
+        //    }
+
+        //    @Override
+        //    public boolean matches(final Object item)
+        //    {
+        //        return ((Integer)item).intValue() < 0;
+        //    }
+        //}
+
+        private Phoneme[][] MakePhonemes()
+        {
+            String[][]
+        words = {
+               new string[] { "rinD", "rinDlt", "rina", "rinalt", "rino", "rinolt", "rinu", "rinult" },
+               new string[] { "dortlaj", "dortlej", "ortlaj", "ortlej", "ortlej-dortlaj" } };
+            Phoneme[][] phonemes = new Phoneme[words.Length][];
+
+            for (int i = 0; i < words.Length; i++)
+            {
+                String[] words_i = words[i];
+                Phoneme[] phonemes_i = phonemes[i] = new Phoneme[words_i.Length];
+                for (int j = 0; j < words_i.Length; j++)
+                {
+                    phonemes_i[j] = new Phoneme(words_i[j], Languages.NO_LANGUAGES);
+                }
+            }
+
+            return phonemes;
+        }
+
+        [Test]
+        public void TestPhonemeComparedToLaterIsNegative()
+        {
+            foreach (Phoneme[] phs in MakePhonemes())
+            {
+                for (int i = 0; i < phs.Length; i++)
+                {
+                    for (int j = i + 1; j < phs.Length; j++)
+                    {
+                        int c = Phoneme.COMPARER.Compare(phs[i], phs[j]);
+
+                        Assert.True(c < 0,
+                                "Comparing " + phs[i].GetPhonemeText() + " to " + phs[j].GetPhonemeText() + " should be negative");
+                    }
+                }
+            }
+        }
+
+        [Test]
+        public void TestPhonemeComparedToSelfIsZero()
+        {
+            foreach (Phoneme[] phs in MakePhonemes())
+            {
+                foreach (Phoneme ph in phs)
+                {
+                    Assert.AreEqual(0,
+                            Phoneme.COMPARER.Compare(ph, ph),
+                            "Phoneme compared to itself should be zero: " + ph.GetPhonemeText());
+                }
+            }
+        }
+
+        [Test]
+        public void TestSubSequenceWorks()
+        {
+            // AppendableCharSequence is private to Rule. We can only make it through a Phoneme.
+
+            Phoneme a = new Phoneme("a", null);
+            Phoneme b = new Phoneme("b", null);
+            Phoneme cd = new Phoneme("cd", null);
+            Phoneme ef = new Phoneme("ef", null);
+            Phoneme ghi = new Phoneme("ghi", null);
+            Phoneme jkl = new Phoneme("jkl", null);
+
+            Assert.AreEqual('a', a.GetPhonemeText()[0]);
+            Assert.AreEqual('b', b.GetPhonemeText()[0]);
+            Assert.AreEqual('c', cd.GetPhonemeText()[0]);
+            Assert.AreEqual('d', cd.GetPhonemeText()[1]);
+            Assert.AreEqual('e', ef.GetPhonemeText()[0]);
+            Assert.AreEqual('f', ef.GetPhonemeText()[1]);
+            Assert.AreEqual('g', ghi.GetPhonemeText()[0]);
+            Assert.AreEqual('h', ghi.GetPhonemeText()[1]);
+            Assert.AreEqual('i', ghi.GetPhonemeText()[2]);
+            Assert.AreEqual('j', jkl.GetPhonemeText()[0]);
+            Assert.AreEqual('k', jkl.GetPhonemeText()[1]);
+            Assert.AreEqual('l', jkl.GetPhonemeText()[2]);
+
+            Phoneme a_b = new Phoneme(a, b);
+            Assert.AreEqual('a', a_b.GetPhonemeText()[0]);
+            Assert.AreEqual('b', a_b.GetPhonemeText()[1]);
+            Assert.AreEqual("ab", a_b.GetPhonemeText().Substring(0, 2 - 0).toString());
+            Assert.AreEqual("a", a_b.GetPhonemeText().Substring(0, 1 - 0).toString());
+            Assert.AreEqual("b", a_b.GetPhonemeText().Substring(1, 2 - 1).toString());
+
+            Phoneme cd_ef = new Phoneme(cd, ef);
+            Assert.AreEqual('c', cd_ef.GetPhonemeText()[0]);
+            Assert.AreEqual('d', cd_ef.GetPhonemeText()[1]);
+            Assert.AreEqual('e', cd_ef.GetPhonemeText()[2]);
+            Assert.AreEqual('f', cd_ef.GetPhonemeText()[3]);
+            Assert.AreEqual("c", cd_ef.GetPhonemeText().Substring(0, 1 - 0).toString());
+            Assert.AreEqual("d", cd_ef.GetPhonemeText().Substring(1, 2 - 1).toString());
+            Assert.AreEqual("e", cd_ef.GetPhonemeText().Substring(2, 3 - 2).toString());
+            Assert.AreEqual("f", cd_ef.GetPhonemeText().Substring(3, 4 - 3).toString());
+            Assert.AreEqual("cd", cd_ef.GetPhonemeText().Substring(0, 2 - 0).toString());
+            Assert.AreEqual("de", cd_ef.GetPhonemeText().Substring(1, 3 - 1).toString());
+            Assert.AreEqual("ef", cd_ef.GetPhonemeText().Substring(2, 4 - 2).toString());
+            Assert.AreEqual("cde", cd_ef.GetPhonemeText().Substring(0, 3 - 0).toString());
+            Assert.AreEqual("def", cd_ef.GetPhonemeText().Substring(1, 4 - 1).toString());
+            Assert.AreEqual("cdef", cd_ef.GetPhonemeText().Substring(0, 4 - 0).toString());
+
+            var test = new Phoneme(a, b);
+            Phoneme a_b_cd = new Phoneme(test, cd);
+            Assert.AreEqual('a', a_b_cd.GetPhonemeText()[0]);
+            Assert.AreEqual('b', a_b_cd.GetPhonemeText()[1]);
+            Assert.AreEqual('c', a_b_cd.GetPhonemeText()[2]);
+            Assert.AreEqual('d', a_b_cd.GetPhonemeText()[3]);
+            Assert.AreEqual("a", a_b_cd.GetPhonemeText().Substring(0, 1 - 0).toString());
+            Assert.AreEqual("b", a_b_cd.GetPhonemeText().Substring(1, 2 - 1).toString());
+            Assert.AreEqual("c", a_b_cd.GetPhonemeText().Substring(2, 3 - 2).toString());
+            Assert.AreEqual("d", a_b_cd.GetPhonemeText().Substring(3, 4 - 3).toString());
+            Assert.AreEqual("ab", a_b_cd.GetPhonemeText().Substring(0, 2 - 0).toString());
+            Assert.AreEqual("bc", a_b_cd.GetPhonemeText().Substring(1, 3 - 1).toString());
+            Assert.AreEqual("cd", a_b_cd.GetPhonemeText().Substring(2, 4 - 2).toString());
+            Assert.AreEqual("abc", a_b_cd.GetPhonemeText().Substring(0, 3 - 0).toString());
+            Assert.AreEqual("bcd", a_b_cd.GetPhonemeText().Substring(1, 4 - 1).toString());
+            Assert.AreEqual("abcd", a_b_cd.GetPhonemeText().Substring(0, 4 - 0).toString());
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Tests.Analysis.Phonetic/Language/Caverphone1Test.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Phonetic/Language/Caverphone1Test.cs b/src/Lucene.Net.Tests.Analysis.Phonetic/Language/Caverphone1Test.cs
new file mode 100644
index 0000000..9112ed4
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Phonetic/Language/Caverphone1Test.cs
@@ -0,0 +1,109 @@
+using NUnit.Framework;
+using System;
+
+namespace Lucene.Net.Analysis.Phonetic.Language
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Tests Caverphone1.
+    /// </summary>
+    public class Caverphone1Test : StringEncoderAbstractTest<Caverphone1>
+    {
+        protected override Caverphone1 CreateStringEncoder()
+        {
+            return new Caverphone1();
+        }
+
+        /**
+         * Tests example adapted from version 2.0  http://caversham.otago.ac.nz/files/working/ctp150804.pdf
+         *
+         * AT1111 words: add, aid, at, art, eat, earth, head, hit, hot, hold, hard, heart, it, out, old
+         *
+         * @throws EncoderException
+         */
+        [Test]
+        public void TestCaverphoneRevisitedCommonCodeAT1111()
+        {
+            this.CheckEncodingVariations("AT1111", new String[]{
+            "add",
+            "aid",
+            "at",
+            "art",
+            "eat",
+            "earth",
+            "head",
+            "hit",
+            "hot",
+            "hold",
+            "hard",
+            "heart",
+            "it",
+            "out",
+            "old"});
+        }
+
+        [Test]
+        public void TestEndMb()
+        {
+            String[]
+            []
+            data = { new string[] { "mb", "M11111" }, new string[] { "mbmb", "MPM111" } };
+            this.CheckEncodings(data);
+        }
+
+        /**
+         * Tests some examples from version 2.0 http://caversham.otago.ac.nz/files/working/ctp150804.pdf
+         *
+         * @throws EncoderException
+         */
+        [Test]
+        public void TestIsCaverphoneEquals()
+        {
+            Caverphone1 caverphone = new Caverphone1();
+            Assert.False(caverphone.IsEncodeEqual("Peter", "Stevenson"), "Caverphone encodings should not be equal");
+            Assert.True(caverphone.IsEncodeEqual("Peter", "Peady"), "Caverphone encodings should be equal");
+        }
+
+        /**
+         * Tests example from http://caversham.otago.ac.nz/files/working/ctp060902.pdf
+         *
+         * @throws EncoderException
+         */
+        [Test]
+        public void TestSpecificationV1Examples()
+        {
+            String[]
+            []
+            data = { new string[] { "David", "TFT111" }, new string[] { "Whittle", "WTL111" } };
+            this.CheckEncodings(data);
+        }
+
+        /**
+         * Tests examples from http://en.wikipedia.org/wiki/Caverphone
+         *
+         * @throws EncoderException
+         */
+        [Test]
+        public void TestWikipediaExamples()
+        {
+            String[][] data = { new string[] { "Lee", "L11111" }, new string[] { "Thompson", "TMPSN1" } };
+            this.CheckEncodings(data);
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Tests.Analysis.Phonetic/Language/Caverphone2Test .cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Phonetic/Language/Caverphone2Test .cs b/src/Lucene.Net.Tests.Analysis.Phonetic/Language/Caverphone2Test .cs
new file mode 100644
index 0000000..4ec1daa
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Phonetic/Language/Caverphone2Test .cs	
@@ -0,0 +1,375 @@
+using NUnit.Framework;
+using System;
+
+namespace Lucene.Net.Analysis.Phonetic.Language
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Tests Caverphone2.
+    /// </summary>
+    public class Caverphone2Test : StringEncoderAbstractTest<Caverphone2>
+    {
+        protected override Caverphone2 CreateStringEncoder()
+        {
+            return new Caverphone2();
+        }
+
+        /**
+         * See http://caversham.otago.ac.nz/files/working/ctp150804.pdf
+         *
+         * AT11111111 words: add, aid, at, art, eat, earth, head, hit, hot, hold, hard, heart, it, out, old
+         *
+         * @throws EncoderException
+         */
+        [Test]
+        public void TestCaverphoneRevisitedCommonCodeAT11111111()
+        {
+            this.CheckEncodingVariations("AT11111111", new String[]{
+            "add",
+            "aid",
+            "at",
+            "art",
+            "eat",
+            "earth",
+            "head",
+            "hit",
+            "hot",
+            "hold",
+            "hard",
+            "heart",
+            "it",
+            "out",
+            "old"});
+        }
+
+        /**
+         * See http://caversham.otago.ac.nz/files/working/ctp150804.pdf
+         *
+         * @throws EncoderException
+         */
+        [Test]
+        public void TestCaverphoneRevisitedExamples()
+        {
+            String[]
+            []
+            data = { new string[] { "Stevenson", "STFNSN1111" }, new string[] { "Peter", "PTA1111111" } };
+            this.CheckEncodings(data);
+        }
+
+        /**
+         * See http://caversham.otago.ac.nz/files/working/ctp150804.pdf
+         *
+         * @throws EncoderException
+         */
+        [Test]
+        public void TestCaverphoneRevisitedRandomNameKLN1111111()
+        {
+            this.CheckEncodingVariations("KLN1111111", new String[]{
+            "Cailean",
+            "Calan",
+            "Calen",
+            "Callahan",
+            "Callan",
+            "Callean",
+            "Carleen",
+            "Carlen",
+            "Carlene",
+            "Carlin",
+            "Carline",
+            "Carlyn",
+            "Carlynn",
+            "Carlynne",
+            "Charlean",
+            "Charleen",
+            "Charlene",
+            "Charline",
+            "Cherlyn",
+            "Chirlin",
+            "Clein",
+            "Cleon",
+            "Cline",
+            "Cohleen",
+            "Colan",
+            "Coleen",
+            "Colene",
+            "Colin",
+            "Colleen",
+            "Collen",
+            "Collin",
+            "Colline",
+            "Colon",
+            "Cullan",
+            "Cullen",
+            "Cullin",
+            "Gaelan",
+            "Galan",
+            "Galen",
+            "Garlan",
+            "Garlen",
+            "Gaulin",
+            "Gayleen",
+            "Gaylene",
+            "Giliane",
+            "Gillan",
+            "Gillian",
+            "Glen",
+            "Glenn",
+            "Glyn",
+            "Glynn",
+            "Gollin",
+            "Gorlin",
+            "Kalin",
+            "Karlan",
+            "Karleen",
+            "Karlen",
+            "Karlene",
+            "Karlin",
+            "Karlyn",
+            "Kaylyn",
+            "Keelin",
+            "Kellen",
+            "Kellene",
+            "Kellyann",
+            "Kellyn",
+            "Khalin",
+            "Kilan",
+            "Kilian",
+            "Killen",
+            "Killian",
+            "Killion",
+            "Klein",
+            "Kleon",
+            "Kline",
+            "Koerlin",
+            "Kylen",
+            "Kylynn",
+            "Quillan",
+            "Quillon",
+            "Qulllon",
+            "Xylon"});
+        }
+
+        /**
+         * See http://caversham.otago.ac.nz/files/working/ctp150804.pdf
+         *
+         * @throws EncoderException
+         */
+        [Test]
+        public void TestCaverphoneRevisitedRandomNameTN11111111()
+        {
+            this.CheckEncodingVariations("TN11111111", new String[]{
+            "Dan",
+            "Dane",
+            "Dann",
+            "Darn",
+            "Daune",
+            "Dawn",
+            "Ddene",
+            "Dean",
+            "Deane",
+            "Deanne",
+            "DeeAnn",
+            "Deeann",
+            "Deeanne",
+            "Deeyn",
+            "Den",
+            "Dene",
+            "Denn",
+            "Deonne",
+            "Diahann",
+            "Dian",
+            "Diane",
+            "Diann",
+            "Dianne",
+            "Diannne",
+            "Dine",
+            "Dion",
+            "Dione",
+            "Dionne",
+            "Doane",
+            "Doehne",
+            "Don",
+            "Donn",
+            "Doone",
+            "Dorn",
+            "Down",
+            "Downe",
+            "Duane",
+            "Dun",
+            "Dunn",
+            "Duyne",
+            "Dyan",
+            "Dyane",
+            "Dyann",
+            "Dyanne",
+            "Dyun",
+            "Tan",
+            "Tann",
+            "Teahan",
+            "Ten",
+            "Tenn",
+            "Terhune",
+            "Thain",
+            "Thaine",
+            "Thane",
+            "Thanh",
+            "Thayne",
+            "Theone",
+            "Thin",
+            "Thorn",
+            "Thorne",
+            "Thun",
+            "Thynne",
+            "Tien",
+            "Tine",
+            "Tjon",
+            "Town",
+            "Towne",
+            "Turne",
+            "Tyne"});
+        }
+
+        /**
+         * See http://caversham.otago.ac.nz/files/working/ctp150804.pdf
+         *
+         * @throws EncoderException
+         */
+        [Test]
+        public void TestCaverphoneRevisitedRandomNameTTA1111111()
+        {
+            this.CheckEncodingVariations("TTA1111111", new String[]{
+            "Darda",
+            "Datha",
+            "Dedie",
+            "Deedee",
+            "Deerdre",
+            "Deidre",
+            "Deirdre",
+            "Detta",
+            "Didi",
+            "Didier",
+            "Dido",
+            "Dierdre",
+            "Dieter",
+            "Dita",
+            "Ditter",
+            "Dodi",
+            "Dodie",
+            "Dody",
+            "Doherty",
+            "Dorthea",
+            "Dorthy",
+            "Doti",
+            "Dotti",
+            "Dottie",
+            "Dotty",
+            "Doty",
+            "Doughty",
+            "Douty",
+            "Dowdell",
+            "Duthie",
+            "Tada",
+            "Taddeo",
+            "Tadeo",
+            "Tadio",
+            "Tati",
+            "Teador",
+            "Tedda",
+            "Tedder",
+            "Teddi",
+            "Teddie",
+            "Teddy",
+            "Tedi",
+            "Tedie",
+            "Teeter",
+            "Teodoor",
+            "Teodor",
+            "Terti",
+            "Theda",
+            "Theodor",
+            "Theodore",
+            "Theta",
+            "Thilda",
+            "Thordia",
+            "Tilda",
+            "Tildi",
+            "Tildie",
+            "Tildy",
+            "Tita",
+            "Tito",
+            "Tjader",
+            "Toddie",
+            "Toddy",
+            "Torto",
+            "Tuddor",
+            "Tudor",
+            "Turtle",
+            "Tuttle",
+            "Tutto"});
+        }
+
+        /**
+         * See http://caversham.otago.ac.nz/files/working/ctp150804.pdf
+         *
+         * @throws EncoderException
+         */
+        [Test]
+        public void TestCaverphoneRevisitedRandomWords()
+        {
+            this.CheckEncodingVariations("RTA1111111", new String[] { "rather", "ready", "writer" });
+            this.CheckEncoding("SSA1111111", "social");
+            this.CheckEncodingVariations("APA1111111", new String[] { "able", "appear" });
+        }
+
+        [Test]
+        public void TestEndMb()
+        {
+            String[]
+            []
+            data = { new string[] { "mb", "M111111111" }, new string[] { "mbmb", "MPM1111111" } };
+            this.CheckEncodings(data);
+        }
+
+        // Caverphone Revisited
+        [Test]
+        public void TestIsCaverphoneEquals()
+        {
+            Caverphone2 caverphone = new Caverphone2();
+            Assert.False(caverphone.IsEncodeEqual("Peter", "Stevenson"), "Caverphone encodings should not be equal");
+            Assert.True(caverphone.IsEncodeEqual("Peter", "Peady"), "Caverphone encodings should be equal");
+        }
+
+        [Test]
+        public void TestSpecificationExamples()
+        {
+            String[]
+            []
+            data = {
+                new string[] { "Peter", "PTA1111111"},
+                new string[] { "ready", "RTA1111111"},
+                new string[] { "social", "SSA1111111"},
+                new string[] { "able", "APA1111111"},
+                new string[] { "Tedder", "TTA1111111"},
+                new string[] { "Karleen", "KLN1111111"},
+                new string[] { "Dyun", "TN11111111"}
+            };
+            this.CheckEncodings(data);
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Tests.Analysis.Phonetic/Language/ColognePhoneticTest.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Phonetic/Language/ColognePhoneticTest.cs b/src/Lucene.Net.Tests.Analysis.Phonetic/Language/ColognePhoneticTest.cs
new file mode 100644
index 0000000..46b14ff
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Phonetic/Language/ColognePhoneticTest.cs
@@ -0,0 +1,171 @@
+using NUnit.Framework;
+using System;
+
+namespace Lucene.Net.Analysis.Phonetic.Language
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Tests the <see cref="ColognePhonetic"/> class.
+    /// </summary>
+    public class ColognePhoneticTest : StringEncoderAbstractTest<ColognePhonetic>
+    {
+        protected override ColognePhonetic CreateStringEncoder()
+        {
+            return new ColognePhonetic();
+        }
+
+        [Test]
+        public void TestAabjoe()
+        {
+            this.CheckEncoding("01", "Aabjoe");
+        }
+
+        [Test]
+        public void TestAaclan()
+        {
+            this.CheckEncoding("0856", "Aaclan");
+        }
+
+        /**
+         * Tests [CODEC-122]
+         *
+         * @throws EncoderException
+         */
+        [Test]
+        public void TestAychlmajrForCodec122()
+        {
+            this.CheckEncoding("04567", "Aychlmajr");
+        }
+
+        [Test]
+        public void TestEdgeCases()
+        {
+            String[][] data = {
+            new string[] { "a", "0"},
+            new string[] { "e", "0"},
+            new string[] { "i", "0"},
+            new string[] { "o", "0"},
+            new string[] { "u", "0"},
+            new string[] { "\u00E4", "0"}, // a-umlaut
+            new string[] { "\u00F6", "0"}, // o-umlaut
+            new string[] { "\u00FC", "0"}, // u-umlaut
+            new string[] { "aa", "0"},
+            new string[] { "ha", "0"},
+            new string[] { "h", ""},
+            new string[] { "aha", "0"},
+            new string[] { "b", "1"},
+            new string[] { "p", "1"},
+            new string[] { "ph", "3"},
+            new string[] { "f", "3"},
+            new string[] { "v", "3"},
+            new string[] { "w", "3"},
+            new string[] { "g", "4"},
+            new string[] { "k", "4"},
+            new string[] { "q", "4"},
+            new string[] { "x", "48"},
+            new string[] { "ax", "048"},
+            new string[] { "cx", "48"},
+            new string[] { "l", "5"},
+            new string[] { "cl", "45"},
+            new string[] { "acl", "085"},
+            new string[] { "mn", "6"},
+            new string[] { "r", "7"}
+            };
+            this.CheckEncodings(data);
+        }
+
+        [Test]
+        public void TestExamples()
+        {
+            String[][] data = {
+            new string[] { "m\u00DCller", "657"}, // mÜller - why upper case U-umlaut?
+            new string[] { "schmidt", "862"},
+            new string[] { "schneider", "8627"},
+            new string[] { "fischer", "387"},
+            new string[] { "weber", "317"},
+            new string[] { "wagner", "3467"},
+            new string[] { "becker", "147"},
+            new string[] { "hoffmann", "0366"},
+            new string[] { "sch\u00C4fer", "837"}, // schÄfer - why upper case A-umlaut ?
+            new string[] { "Breschnew", "17863"},
+            new string[] { "Wikipedia", "3412"},
+            new string[] { "peter", "127"},
+            new string[] { "pharma", "376"},
+            new string[] { "m\u00f6nchengladbach", "664645214"}, // mönchengladbach
+            new string[] { "deutsch", "28"},
+            new string[] { "deutz", "28"},
+            new string[] { "hamburg", "06174"},
+            new string[] { "hannover", "0637"},
+            new string[] { "christstollen", "478256"},
+            new string[] { "Xanthippe", "48621"},
+            new string[] { "Zacharias", "8478"},
+            new string[] { "Holzbau", "0581"},
+            new string[] { "matsch", "68"},
+            new string[] { "matz", "68"},
+            new string[] { "Arbeitsamt", "071862"},
+            new string[] { "Eberhard", "01772"},
+            new string[] { "Eberhardt", "01772"},
+            new string[] { "heithabu", "021"}
+            };
+            this.CheckEncodings(data);
+        }
+
+        [Test]
+        public void TestHyphen()
+        {
+            String[][] data = {
+                new string[] { "bergisch-gladbach", "174845214"},
+                new string[] { "M\u00fcller-L\u00fcdenscheidt", "65752682"}
+            }; // Müller-Lüdenscheidt
+            this.CheckEncodings(data);
+        }
+
+        [Test]
+        public void TestIsEncodeEquals()
+        {
+            String[][] data = {
+            new string[] {"Meyer", "M\u00fcller"}, // Müller
+            new string[] {"Meyer", "Mayr"},
+            new string[] {"house", "house"},
+            new string[] {"House", "house"},
+            new string[] {"Haus", "house"},
+            new string[] {"ganz", "Gans"},
+            new string[] {"ganz", "G\u00e4nse"}, // Gänse
+            new string[] {"Miyagi", "Miyako"}};
+            foreach (String[] element in data)
+            {
+                this.StringEncoder.IsEncodeEqual(element[1], element[0]);
+            }
+        }
+
+        [Test]
+        public void TestVariationsMella()
+        {
+            String[] data = { "mella", "milah", "moulla", "mellah", "muehle", "mule" };
+            this.CheckEncodingVariations("65", data);
+        }
+
+        [Test]
+        public void TestVariationsMeyer()
+        {
+            String[] data = { "Meier", "Maier", "Mair", "Meyer", "Meyr", "Mejer", "Major" };
+            this.CheckEncodingVariations("67", data);
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Tests.Analysis.Phonetic/Language/DaitchMokotoffSoundexTest.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Phonetic/Language/DaitchMokotoffSoundexTest.cs b/src/Lucene.Net.Tests.Analysis.Phonetic/Language/DaitchMokotoffSoundexTest.cs
new file mode 100644
index 0000000..84bb5d3
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Phonetic/Language/DaitchMokotoffSoundexTest.cs
@@ -0,0 +1,176 @@
+// commons-codec version compatibility level: 1.10
+using NUnit.Framework;
+using System;
+
+namespace Lucene.Net.Analysis.Phonetic.Language
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Tests <see cref="DaitchMokotoffSoundex"/>.
+    /// <para/>
+    /// since 1.10
+    /// </summary>
+    public class DaitchMokotoffSoundexTest : StringEncoderAbstractTest<DaitchMokotoffSoundex>
+    {
+        protected override DaitchMokotoffSoundex CreateStringEncoder()
+        {
+            return new DaitchMokotoffSoundex();
+        }
+
+        private string GetSoundex(string source)
+        {
+            return StringEncoder.GetSoundex(source);
+        }
+
+        private string Encode(string source)
+        {
+            return StringEncoder.Encode(source);
+        }
+
+        [Test]
+        public void TestAccentedCharacterFolding()
+        {
+            Assert.AreEqual("294795", GetSoundex("Straßburg"));
+            Assert.AreEqual("294795", GetSoundex("Strasburg"));
+
+            Assert.AreEqual("095600", GetSoundex("Éregon"));
+            Assert.AreEqual("095600", GetSoundex("Eregon"));
+        }
+
+        [Test]
+        public void TestAdjacentCodes()
+        {
+            // AKSSOL
+            // A-KS-S-O-L
+            // 0-54-4---8 -> wrong
+            // 0-54-----8 -> correct
+            Assert.AreEqual("054800", GetSoundex("AKSSOL"));
+
+            // GERSCHFELD
+            // G-E-RS-CH-F-E-L-D
+            // 5--4/94-5/4-7-8-3 -> wrong
+            // 5--4/94-5/--7-8-3 -> correct
+            Assert.AreEqual("547830|545783|594783|594578", GetSoundex("GERSCHFELD"));
+        }
+
+        [Test]
+        public void TestEncodeBasic()
+        {
+            // same as above, but without branching
+            Assert.AreEqual("097400", Encode("AUERBACH"));
+            Assert.AreEqual("097400", Encode("OHRBACH"));
+            Assert.AreEqual("874400", Encode("LIPSHITZ"));
+            Assert.AreEqual("874400", Encode("LIPPSZYC"));
+            Assert.AreEqual("876450", Encode("LEWINSKY"));
+            Assert.AreEqual("876450", Encode("LEVINSKI"));
+            Assert.AreEqual("486740", Encode("SZLAMAWICZ"));
+            Assert.AreEqual("486740", Encode("SHLAMOVITZ"));
+        }
+
+        [Test]
+        public void TestEncodeIgnoreApostrophes()
+        {
+            this.CheckEncodingVariations("079600", new String[] { "OBrien", "'OBrien", "O'Brien", "OB'rien", "OBr'ien",
+                "OBri'en", "OBrie'n", "OBrien'" });
+        }
+
+        /**
+         * Test data from http://www.myatt.demon.co.uk/sxalg.htm
+         *
+         * @throws EncoderException
+         */
+        [Test]
+        public void TestEncodeIgnoreHyphens()
+        {
+            this.CheckEncodingVariations("565463", new String[] { "KINGSMITH", "-KINGSMITH", "K-INGSMITH", "KI-NGSMITH",
+                "KIN-GSMITH", "KING-SMITH", "KINGS-MITH", "KINGSM-ITH", "KINGSMI-TH", "KINGSMIT-H", "KINGSMITH-" });
+        }
+
+        [Test]
+        public void TestEncodeIgnoreTrimmable()
+        {
+            Assert.AreEqual("746536", Encode(" \t\n\r Washington \t\n\r "));
+            Assert.AreEqual("746536", Encode("Washington"));
+        }
+
+        /**
+         * Examples from http://www.jewishgen.org/infofiles/soundex.html
+         */
+        [Test]
+        public void TestSoundexBasic()
+        {
+            Assert.AreEqual("583600", GetSoundex("GOLDEN"));
+            Assert.AreEqual("087930", GetSoundex("Alpert"));
+            Assert.AreEqual("791900", GetSoundex("Breuer"));
+            Assert.AreEqual("579000", GetSoundex("Haber"));
+            Assert.AreEqual("665600", GetSoundex("Mannheim"));
+            Assert.AreEqual("664000", GetSoundex("Mintz"));
+            Assert.AreEqual("370000", GetSoundex("Topf"));
+            Assert.AreEqual("586660", GetSoundex("Kleinmann"));
+            Assert.AreEqual("769600", GetSoundex("Ben Aron"));
+
+            Assert.AreEqual("097400|097500", GetSoundex("AUERBACH"));
+            Assert.AreEqual("097400|097500", GetSoundex("OHRBACH"));
+            Assert.AreEqual("874400", GetSoundex("LIPSHITZ"));
+            Assert.AreEqual("874400|874500", GetSoundex("LIPPSZYC"));
+            Assert.AreEqual("876450", GetSoundex("LEWINSKY"));
+            Assert.AreEqual("876450", GetSoundex("LEVINSKI"));
+            Assert.AreEqual("486740", GetSoundex("SZLAMAWICZ"));
+            Assert.AreEqual("486740", GetSoundex("SHLAMOVITZ"));
+        }
+
+        /**
+         * Examples from http://www.avotaynu.com/soundex.htm
+         */
+        [Test]
+        public void TestSoundexBasic2()
+        {
+            Assert.AreEqual("467000|567000", GetSoundex("Ceniow"));
+            Assert.AreEqual("467000", GetSoundex("Tsenyuv"));
+            Assert.AreEqual("587400|587500", GetSoundex("Holubica"));
+            Assert.AreEqual("587400", GetSoundex("Golubitsa"));
+            Assert.AreEqual("746480|794648", GetSoundex("Przemysl"));
+            Assert.AreEqual("746480", GetSoundex("Pshemeshil"));
+            Assert.AreEqual("944744|944745|944754|944755|945744|945745|945754|945755", GetSoundex("Rosochowaciec"));
+            Assert.AreEqual("945744", GetSoundex("Rosokhovatsets"));
+        }
+
+        /**
+         * Examples from http://en.wikipedia.org/wiki/Daitch%E2%80%93Mokotoff_Soundex
+         */
+        [Test]
+        public void TestSoundexBasic3()
+        {
+            Assert.AreEqual("734000|739400", GetSoundex("Peters"));
+            Assert.AreEqual("734600|739460", GetSoundex("Peterson"));
+            Assert.AreEqual("645740", GetSoundex("Moskowitz"));
+            Assert.AreEqual("645740", GetSoundex("Moskovitz"));
+            Assert.AreEqual("154600|145460|454600|445460", GetSoundex("Jackson"));
+            Assert.AreEqual("154654|154645|154644|145465|145464|454654|454645|454644|445465|445464",
+                    GetSoundex("Jackson-Jackson"));
+        }
+
+        [Test]
+        public void TestSpecialRomanianCharacters()
+        {
+            Assert.AreEqual("364000|464000", GetSoundex("ţamas")); // t-cedilla
+            Assert.AreEqual("364000|464000", GetSoundex("țamas")); // t-comma
+        }
+    }
+}


[07/15] lucenenet git commit: Added Lucene.Net.Analysis.Phonetic + tests. Rather than porting over the entire commons-codec library, only the language features were ported and added to this library.

Posted by ni...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Nysiis.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Nysiis.cs b/src/Lucene.Net.Analysis.Phonetic/Language/Nysiis.cs
new file mode 100644
index 0000000..a80d4f4
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Nysiis.cs
@@ -0,0 +1,370 @@
+// commons-codec version compatibility level: 1.9
+using System;
+using System.Text;
+using System.Text.RegularExpressions;
+
+namespace Lucene.Net.Analysis.Phonetic.Language
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Encodes a string into a NYSIIS value. NYSIIS is an encoding used to relate similar names, but can also be used as a
+    /// general purpose scheme to find word with similar phonemes.
+    /// </summary>
+    /// <remarks>
+    /// NYSIIS features an accuracy increase of 2.7% over the traditional Soundex algorithm.
+    /// <para/>
+    /// Algorithm description:
+    /// <list type="number">
+    ///     <item>
+    ///         <term>Transcode first characters of name</term>
+    ///         <description>
+    ///             <list type="number">
+    ///                 <item><description>MAC ->   MCC</description></item>
+    ///                 <item><description>KN  ->   NN</description></item>
+    ///                 <item><description>K   ->   C</description></item>
+    ///                 <item><description>PH  ->   FF</description></item>
+    ///                 <item><description>PF  ->   FF</description></item>
+    ///                 <item><description>SCH ->   SSS</description></item>
+    ///             </list>
+    ///         </description>
+    ///     </item>
+    ///     <item>
+    ///         <term>Transcode last characters of name</term>
+    ///         <description>
+    ///             <list type="number">
+    ///                 <item><description>EE, IE          ->   Y</description></item>
+    ///                 <item><description>DT,RT,RD,NT,ND  ->   D</description></item>
+    ///             </list>
+    ///         </description>
+    ///     </item>
+    ///     <item>
+    ///         <term>First character of key = first character of name</term>
+    ///     </item>
+    ///     <item>
+    ///         <term>Transcode remaining characters by following these rules, incrementing by one character each time</term>
+    ///         <description>
+    ///             <list type="number">
+    ///                 <item><description>EV  ->   AF  else A,E,I,O,U -> A</description></item>
+    ///                 <item><description>Q   ->   G</description></item>
+    ///                 <item><description>Z   ->   S</description></item>
+    ///                 <item><description>M   ->   N</description></item>
+    ///                 <item><description>KN  ->   N   else K -> C</description></item>
+    ///                 <item><description>SCH ->   SSS</description></item>
+    ///                 <item><description>PH  ->   FF</description></item>
+    ///                 <item><description>H   ->   If previous or next is nonvowel, previous</description></item>
+    ///                 <item><description>W   ->   If previous is vowel, previous</description></item>
+    ///                 <item><description>Add current to key if current != last key character</description></item>
+    ///             </list>
+    ///         </description>
+    ///     </item>
+    ///     <item>
+    ///         <term>If last character is S, remove it</term>
+    ///     </item>
+    ///     <item>
+    ///         <term>If last characters are AY, replace with Y</term>
+    ///     </item>
+    ///     <item>
+    ///         <term>If last character is A, remove it</term>
+    ///     </item>
+    ///     <item>
+    ///         <term>Collapse all strings of repeated characters</term>
+    ///     </item>
+    ///     <item>
+    ///         <term>Add original first character of name as first character of key</term>
+    ///     </item>
+    /// </list>
+    /// <para/>
+    /// This class is immutable and thread-safe.
+    /// <para/>
+    /// See: <a href="http://en.wikipedia.org/wiki/NYSIIS">NYSIIS on Wikipedia</a>
+    /// <para/>
+    /// See: <a href="http://www.dropby.com/NYSIIS.html">NYSIIS on dropby.com</a>
+    /// <para/>
+    /// since 1.7
+    /// </remarks>
+    /// <seealso cref="Soundex"/>
+    public class Nysiis : IStringEncoder
+    {
+        private static readonly char[] CHARS_A = new char[] { 'A' };
+        private static readonly char[] CHARS_AF = new char[] { 'A', 'F' };
+        private static readonly char[] CHARS_C = new char[] { 'C' };
+        private static readonly char[] CHARS_FF = new char[] { 'F', 'F' };
+        private static readonly char[] CHARS_G = new char[] { 'G' };
+        private static readonly char[] CHARS_N = new char[] { 'N' };
+        private static readonly char[] CHARS_NN = new char[] { 'N', 'N' };
+        private static readonly char[] CHARS_S = new char[] { 'S' };
+        private static readonly char[] CHARS_SSS = new char[] { 'S', 'S', 'S' };
+
+        private static readonly Regex PAT_MAC = new Regex("^MAC", RegexOptions.Compiled);
+        private static readonly Regex PAT_KN = new Regex("^KN", RegexOptions.Compiled);
+        private static readonly Regex PAT_K = new Regex("^K", RegexOptions.Compiled);
+        private static readonly Regex PAT_PH_PF = new Regex("^(PH|PF)", RegexOptions.Compiled);
+        private static readonly Regex PAT_SCH = new Regex("^SCH", RegexOptions.Compiled);
+        private static readonly Regex PAT_EE_IE = new Regex("(EE|IE)$", RegexOptions.Compiled);
+        private static readonly Regex PAT_DT_ETC = new Regex("(DT|RT|RD|NT|ND)$", RegexOptions.Compiled);
+
+        private static readonly char SPACE = ' ';
+        private static readonly int TRUE_LENGTH = 6;
+
+        /// <summary>
+        /// Tests if the given character is a vowel.
+        /// </summary>
+        /// <param name="c">The character to test.</param>
+        /// <returns><c>true</c> if the character is a vowel, <c>false</c> otherwise.</returns>
+        private static bool IsVowel(char c)
+        {
+            return c == 'A' || c == 'E' || c == 'I' || c == 'O' || c == 'U';
+        }
+
+        /// <summary>
+        /// Transcodes the remaining parts of the string. The method operates on a sliding window, looking at 4 characters at
+        /// a time: [i-1, i, i+1, i+2].
+        /// </summary>
+        /// <param name="prev">The previous character.</param>
+        /// <param name="curr">The current character.</param>
+        /// <param name="next">The next character.</param>
+        /// <param name="aNext">The after next character</param>
+        /// <returns>A transcoded array of characters, starting from the current position.</returns>
+        private static char[] TranscodeRemaining(char prev, char curr, char next, char aNext)
+        {
+            // 1. EV -> AF
+            if (curr == 'E' && next == 'V')
+            {
+                return CHARS_AF;
+            }
+
+            // A, E, I, O, U -> A
+            if (IsVowel(curr))
+            {
+                return CHARS_A;
+            }
+
+            // 2. Q -> G, Z -> S, M -> N
+            if (curr == 'Q')
+            {
+                return CHARS_G;
+            }
+            else if (curr == 'Z')
+            {
+                return CHARS_S;
+            }
+            else if (curr == 'M')
+            {
+                return CHARS_N;
+            }
+
+            // 3. KN -> NN else K -> C
+            if (curr == 'K')
+            {
+                if (next == 'N')
+                {
+                    return CHARS_NN;
+                }
+                else
+                {
+                    return CHARS_C;
+                }
+            }
+
+            // 4. SCH -> SSS
+            if (curr == 'S' && next == 'C' && aNext == 'H')
+            {
+                return CHARS_SSS;
+            }
+
+            // PH -> FF
+            if (curr == 'P' && next == 'H')
+            {
+                return CHARS_FF;
+            }
+
+            // 5. H -> If previous or next is a non vowel, previous.
+            if (curr == 'H' && (!IsVowel(prev) || !IsVowel(next)))
+            {
+                return new char[] { prev };
+            }
+
+            // 6. W -> If previous is vowel, previous.
+            if (curr == 'W' && IsVowel(prev))
+            {
+                return new char[] { prev };
+            }
+
+            return new char[] { curr };
+        }
+
+        /// <summary>Indicates the strict mode.</summary>
+        private readonly bool strict;
+
+        /// <summary>
+        /// Creates an instance of the <see cref="Nysiis"/> encoder with strict mode (original form),
+        /// i.e. encoded strings have a maximum length of 6.
+        /// </summary>
+        public Nysiis()
+            : this(true)
+        {
+        }
+
+        /// <summary>
+        /// Create an instance of the {@link Nysiis} encoder with the specified strict mode:
+        /// <list type="bullet">
+        ///     <item><term><c>true</c>:</term><description>encoded strings have a maximum length of 6</description></item>
+        ///     <item><term><c>false</c>:</term><description>encoded strings may have arbitrary length</description></item>
+        /// </list>
+        /// </summary>
+        /// <param name="strict">The strict mode.</param>
+        public Nysiis(bool strict)
+        {
+            this.strict = strict;
+        }
+
+        // LUCENENET specific - in .NET we don't need an object overload, since strings are sealed anyway.
+        //    /**
+        //     * Encodes an Object using the NYSIIS algorithm. This method is provided in order to satisfy the requirements of the
+        //     * Encoder interface, and will throw an {@link EncoderException} if the supplied object is not of type
+        //     * {@link String}.
+        //     *
+        //     * @param obj
+        //     *            Object to encode
+        //     * @return An object (or a {@link String}) containing the NYSIIS code which corresponds to the given String.
+        //     * @throws EncoderException
+        //     *            if the parameter supplied is not of a {@link String}
+        //     * @throws IllegalArgumentException
+        //     *            if a character is not mapped
+        //     */
+        //    @Override
+        //public object Encode(object obj) 
+        //    {
+        //    if (!(obj is String)) {
+        //            throw new EncoderException("Parameter supplied to Nysiis encode is not of type java.lang.String");
+        //        }
+        //    return this.nysiis((String) obj);
+        //    }
+
+        /// <summary>
+        /// Encodes a string using the NYSIIS algorithm.
+        /// </summary>
+        /// <param name="str">A string object to encode.</param>
+        /// <returns>A <see cref="Nysiis"/> code corresponding to the string supplied.</returns>
+        /// <exception cref="ArgumentException">If a character is not mapped.</exception>
+        public virtual string Encode(string str)
+        {
+            return this.GetNysiis(str);
+        }
+
+        /// <summary>
+        /// Indicates the strict mode for this <see cref="Nysiis"/> encoder.
+        /// <c>true</c> if the encoder is configured for strict mode, <c>false</c> otherwise.
+        /// </summary>
+        public virtual bool IsStrict
+        {
+            get { return this.strict; }
+        }
+
+        /// <summary>
+        /// Retrieves the NYSIIS code for a given string.
+        /// </summary>
+        /// <param name="str">String to encode using the NYSIIS algorithm.</param>
+        /// <returns>A NYSIIS code for the string supplied.</returns>
+        public virtual string GetNysiis(string str)
+        {
+            if (str == null)
+            {
+                return null;
+            }
+
+            // Use the same clean rules as Soundex
+            str = SoundexUtils.Clean(str);
+
+            if (str.Length == 0)
+            {
+                return str;
+            }
+
+            // Translate first characters of name:
+            // MAC -> MCC, KN -> NN, K -> C, PH | PF -> FF, SCH -> SSS
+            str = PAT_MAC.Replace(str, "MCC", 1);
+            str = PAT_KN.Replace(str, "NN", 1);
+            str = PAT_K.Replace(str, "C", 1);
+            str = PAT_PH_PF.Replace(str, "FF", 1);
+            str = PAT_SCH.Replace(str, "SSS", 1);
+
+            // Translate last characters of name:
+            // EE -> Y, IE -> Y, DT | RT | RD | NT | ND -> D
+            str = PAT_EE_IE.Replace(str, "Y", 1);
+            str = PAT_DT_ETC.Replace(str, "D", 1);
+
+            // First character of key = first character of name.
+            StringBuilder key = new StringBuilder(str.Length);
+            key.Append(str[0]);
+
+            // Transcode remaining characters, incrementing by one character each time
+            char[] chars = str.ToCharArray();
+            int len = chars.Length;
+
+            for (int i = 1; i < len; i++)
+            {
+                char next = i < len - 1 ? chars[i + 1] : SPACE;
+                char aNext = i < len - 2 ? chars[i + 2] : SPACE;
+                char[] transcoded = TranscodeRemaining(chars[i - 1], chars[i], next, aNext);
+                System.Array.Copy(transcoded, 0, chars, i, transcoded.Length);
+
+                // only append the current char to the key if it is different from the last one
+                if (chars[i] != chars[i - 1])
+                {
+                    key.Append(chars[i]);
+                }
+            }
+
+            if (key.Length > 1)
+            {
+                char lastChar = key[key.Length - 1];
+
+                // If last character is S, remove it.
+                if (lastChar == 'S')
+                {
+                    //key.deleteCharAt(key.length() - 1);
+                    key.Remove(key.Length - 1, 1);
+                    lastChar = key[key.Length - 1];
+                }
+
+                if (key.Length > 2)
+                {
+                    char last2Char = key[key.Length - 2];
+                    // If last characters are AY, replace with Y.
+                    if (last2Char == 'A' && lastChar == 'Y')
+                    {
+                        //.key.deleteCharAt(key.length() - 2);
+                        key.Remove(key.Length - 2, 1);
+                    }
+                }
+
+                // If last character is A, remove it.
+                if (lastChar == 'A')
+                {
+                    //key.deleteCharAt(key.length() - 1);
+                    key.Remove(key.Length - 1, 1);
+                }
+            }
+
+            string result = key.ToString();
+            return this.IsStrict ? result.Substring(0, Math.Min(TRUE_LENGTH, result.Length) - 0) : result;
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/RefinedSoundex.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/RefinedSoundex.cs b/src/Lucene.Net.Analysis.Phonetic/Language/RefinedSoundex.cs
new file mode 100644
index 0000000..e0f9071
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/RefinedSoundex.cs
@@ -0,0 +1,202 @@
+// commons-codec version compatibility level: 1.9
+using System.Globalization;
+using System.Text;
+
+namespace Lucene.Net.Analysis.Phonetic.Language
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Encodes a string into a Refined Soundex value. A refined soundex code is
+    /// optimized for spell checking words. Soundex method originally developed by
+    /// <c>Margaret Odell</c> and <c>Robert Russell</c>.
+    /// <para/>
+    /// This class is immutable and thread-safe.
+    /// </summary>
+    public class RefinedSoundex : IStringEncoder
+    {
+        /// <summary>
+        /// since 1.4
+        /// </summary>
+        public static readonly string US_ENGLISH_MAPPING_STRING = "01360240043788015936020505";
+
+        /// <summary>
+        /// RefinedSoundex is *refined* for a number of reasons one being that the
+        /// mappings have been altered. This implementation contains default
+        /// mappings for US English.
+        /// </summary>
+        private static readonly char[] US_ENGLISH_MAPPING = US_ENGLISH_MAPPING_STRING.ToCharArray();
+
+        /// <summary>
+        /// Every letter of the alphabet is "mapped" to a numerical value. This char
+        /// array holds the values to which each letter is mapped. This
+        /// implementation contains a default map for US_ENGLISH.
+        /// </summary>
+        private readonly char[] soundexMapping;
+
+        /// <summary>
+        /// This static variable contains an instance of the RefinedSoundex using
+        /// the US_ENGLISH mapping.
+        /// </summary>
+        public static readonly RefinedSoundex US_ENGLISH = new RefinedSoundex();
+
+        /// <summary>
+        /// Creates an instance of the <see cref="RefinedSoundex"/> object using the default US
+        /// English mapping.
+        /// </summary>
+        public RefinedSoundex()
+        {
+            this.soundexMapping = US_ENGLISH_MAPPING;
+        }
+
+        /// <summary>
+        /// Creates a refined soundex instance using a custom mapping. This
+        /// constructor can be used to customize the mapping, and/or possibly
+        /// provide an internationalized mapping for a non-Western character set.
+        /// </summary>
+        /// <param name="mapping">Mapping array to use when finding the corresponding code for a given character.</param>
+        public RefinedSoundex(char[] mapping)
+        {
+            this.soundexMapping = new char[mapping.Length];
+            System.Array.Copy(mapping, 0, this.soundexMapping, 0, mapping.Length);
+        }
+
+        /// <summary>
+        /// Creates a refined Soundex instance using a custom mapping. This constructor can be used to customize the mapping,
+        /// and/or possibly provide an internationalized mapping for a non-Western character set.
+        /// </summary>
+        /// <param name="mapping">Mapping string to use when finding the corresponding code for a given character.</param>
+        public RefinedSoundex(string mapping)
+        {
+            this.soundexMapping = mapping.ToCharArray();
+        }
+
+        /// <summary>
+        /// Returns the number of characters in the two encoded strings that are the
+        /// same. This return value ranges from 0 to the length of the shortest
+        /// encoded string: 0 indicates little or no similarity, and 4 out of 4 (for
+        /// example) indicates strong similarity or identical values. For refined
+        /// Soundex, the return value can be greater than 4.
+        /// <para/>
+        /// See: <a href="http://msdn.microsoft.com/library/default.asp?url=/library/en-us/tsqlref/ts_de-dz_8co5.asp">
+        ///     MS T-SQL DIFFERENCE</a>
+        /// <para/>
+        /// since 1.3
+        /// </summary>
+        /// <param name="s1">A string that will be encoded and compared.</param>
+        /// <param name="s2">A string that will be encoded and compared.</param>
+        /// <returns>The number of characters in the two encoded strings that are the same from 0 to to the length of the shortest encoded string.</returns>
+        /// <seealso cref="SoundexUtils.Difference(IStringEncoder, string, string)"/>
+        public virtual int Difference(string s1, string s2)
+        {
+            return SoundexUtils.Difference(this, s1, s2);
+        }
+
+        // LUCENENET specific - in .NET we don't need an object overload, since strings are sealed anyway.
+        //    /**
+        //     * Encodes an Object using the refined soundex algorithm. This method is
+        //     * provided in order to satisfy the requirements of the Encoder interface,
+        //     * and will throw an EncoderException if the supplied object is not of type
+        //     * java.lang.String.
+        //     *
+        //     * @param obj
+        //     *                  Object to encode
+        //     * @return An object (or type java.lang.String) containing the refined
+        //     *             soundex code which corresponds to the String supplied.
+        //     * @throws EncoderException
+        //     *                  if the parameter supplied is not of type java.lang.String
+        //     */
+        //    @Override
+        //public virtual object Encode(object obj) 
+        //    {
+        //    if (!(obj is String)) {
+        //            throw new EncoderException("Parameter supplied to RefinedSoundex encode is not of type java.lang.String");
+        //        }
+        //    return soundex((String) obj);
+        //    }
+
+        /// <summary>
+        /// Encodes a string using the refined soundex algorithm.
+        /// </summary>
+        /// <param name="str">A string object to encode.</param>
+        /// <returns>A Soundex code corresponding to the string supplied.</returns>
+        public virtual string Encode(string str)
+        {
+            return GetSoundex(str);
+        }
+
+        /// <summary>
+        /// Returns the mapping code for a given character. The mapping codes are
+        /// maintained in an internal char array named soundexMapping, and the
+        /// default values of these mappings are US English.
+        /// </summary>
+        /// <param name="c"><see cref="char"/> to get mapping for.</param>
+        /// <returns>A character (really a numeral) to return for the given <see cref="char"/>.</returns>
+        internal char GetMappingCode(char c)
+        {
+            if (!char.IsLetter(c))
+            {
+                return (char)0;
+            }
+            return this.soundexMapping[char.ToUpperInvariant(c) - 'A'];
+        }
+
+        /// <summary>
+        /// Retrieves the Refined Soundex code for a given string.
+        /// </summary>
+        /// <param name="str">String to encode using the Refined Soundex algorithm.</param>
+        /// <returns>A soundex code for the string supplied.</returns>
+        public virtual string GetSoundex(string str)
+        {
+            if (str == null)
+            {
+                return null;
+            }
+            str = SoundexUtils.Clean(str);
+            if (str.Length == 0)
+            {
+                return str;
+            }
+
+            StringBuilder sBuf = new StringBuilder();
+            sBuf.Append(str[0]);
+
+            char last, current;
+            last = '*';
+
+            for (int i = 0; i < str.Length; i++)
+            {
+
+                current = GetMappingCode(str[i]);
+                if (current == last)
+                {
+                    continue;
+                }
+                else if (current != 0)
+                {
+                    sBuf.Append(current);
+                }
+
+                last = current;
+
+            }
+
+            return sBuf.ToString();
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Soundex.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Soundex.cs b/src/Lucene.Net.Analysis.Phonetic/Language/Soundex.cs
new file mode 100644
index 0000000..abb70c3
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Soundex.cs
@@ -0,0 +1,318 @@
+// commons-codec version compatibility level: 1.10
+using System;
+
+namespace Lucene.Net.Analysis.Phonetic.Language
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Encodes a string into a Soundex value. Soundex is an encoding used to relate similar names, but can also be used as a
+    /// general purpose scheme to find word with similar phonemes.
+    /// <para/>
+    /// This class is thread-safe.
+    /// Although not strictly immutable, the <see cref="maxLength"/> field is not actually used.
+    /// </summary>
+    public class Soundex : IStringEncoder
+    {
+        /// <summary>
+        /// The marker character used to indicate a silent (ignored) character.
+        /// These are ignored except when they appear as the first character.
+        /// <para/>
+        /// Note: the <see cref="US_ENGLISH_MAPPING_STRING"/> does not use this mechanism
+        /// because changing it might break existing code. Mappings that don't contain
+        /// a silent marker code are treated as though H and W are silent.
+        /// <para/>
+        /// To override this, use the <see cref="Soundex(string, bool)"/> constructor.
+        /// <para/>
+        /// since 1.11
+        /// </summary>
+        public static readonly char SILENT_MARKER = '-';
+
+        /// <summary>
+        /// This is a default mapping of the 26 letters used in US English. A value of <c>0</c> for a letter position
+        /// means do not encode, but treat as a separator when it occurs between consonants with the same code.
+        /// <para/>
+        /// (This constant is provided as both an implementation convenience and to allow documentation to pick
+        /// up the value for the constant values page.)
+        /// <para/>
+        /// <b>Note that letters H and W are treated specially.</b>
+        /// They are ignored (after the first letter) and don't act as separators
+        /// between consonants with the same code.
+        /// </summary>
+        /// <seealso cref="US_ENGLISH_MAPPING"/>
+        //                                                      ABCDEFGHIJKLMNOPQRSTUVWXYZ
+        public static readonly string US_ENGLISH_MAPPING_STRING = "01230120022455012623010202";
+
+        /// <summary>
+        /// This is a default mapping of the 26 letters used in US English. A value of <c>0</c> for a letter position
+        /// means do not encode.
+        /// </summary>
+        /// <seealso cref="Soundex.Soundex(char[])"/>
+        private static readonly char[] US_ENGLISH_MAPPING = US_ENGLISH_MAPPING_STRING.ToCharArray();
+
+        /// <summary>
+        /// An instance of Soundex using the US_ENGLISH_MAPPING mapping.
+        /// This treats H and W as silent letters.
+        /// Apart from when they appear as the first letter, they are ignored.
+        /// They don't act as separators between duplicate codes.
+        /// </summary>
+        /// <seealso cref="US_ENGLISH_MAPPING"/>
+        /// <seealso cref="US_ENGLISH_MAPPING_STRING"/>
+        public static readonly Soundex US_ENGLISH = new Soundex();
+
+        /// <summary>
+        /// An instance of Soundex using the Simplified Soundex mapping, as described here:
+        /// http://west-penwith.org.uk/misc/soundex.htm
+        /// <para/>
+        /// This treats H and W the same as vowels (AEIOUY).
+        /// Such letters aren't encoded (after the first), but they do
+        /// act as separators when dropping duplicate codes.
+        /// The mapping is otherwise the same as for <see cref="US_ENGLISH"/>.
+        /// <para/>
+        /// since 1.11
+        /// </summary>
+        public static readonly Soundex US_ENGLISH_SIMPLIFIED = new Soundex(US_ENGLISH_MAPPING_STRING, false);
+
+        /// <summary>
+        /// An instance of Soundex using the mapping as per the Genealogy site:
+        /// http://www.genealogy.com/articles/research/00000060.html
+        /// <para/>
+        /// This treats vowels (AEIOUY), H and W as silent letters.
+        /// Such letters are ignored (after the first) and do not
+        /// act as separators when dropping duplicate codes.
+        /// <para/>
+        /// The codes for consonants are otherwise the same as for 
+        /// <see cref="US_ENGLISH_MAPPING_STRING"/> and <see cref="US_ENGLISH_SIMPLIFIED"/>.
+        /// <para/>
+        /// since 1.11
+        /// </summary>
+        public static readonly Soundex US_ENGLISH_GENEALOGY = new Soundex("-123-12--22455-12623-1-2-2");
+        //                                                              ABCDEFGHIJKLMNOPQRSTUVWXYZ
+
+        /// <summary>
+        /// The maximum length of a Soundex code - Soundex codes are only four characters by definition.
+        /// </summary>
+        [Obsolete("This feature is not needed since the encoding size must be constant. Will be removed in 2.0.")]
+        private int maxLength = 4;
+
+        /// <summary>
+        /// Every letter of the alphabet is "mapped" to a numerical value. This char array holds the values to which each
+        /// letter is mapped. This implementation contains a default map for US_ENGLISH
+        /// </summary>
+        private readonly char[] soundexMapping;
+
+        /// <summary>
+        /// Should H and W be treated specially?
+        /// <para/>
+        /// In versions of the code prior to 1.11,
+        /// the code always treated H and W as silent (ignored) letters.
+        /// If this field is false, H and W are no longer special-cased.
+        /// </summary>
+        private readonly bool specialCaseHW;
+
+        /// <summary>
+        /// Creates an instance using <see cref="US_ENGLISH_MAPPING"/>.
+        /// </summary>
+        /// <seealso cref="Soundex.Soundex(char[])"/>
+        /// <seealso cref="US_ENGLISH_MAPPING"/>
+        public Soundex()
+        {
+            this.soundexMapping = US_ENGLISH_MAPPING;
+            this.specialCaseHW = true;
+        }
+
+        /// <summary>
+        /// Creates a soundex instance using the given mapping. This constructor can be used to provide an internationalized
+        /// mapping for a non-Western character set.
+        /// <para/>
+        /// Every letter of the alphabet is "mapped" to a numerical value. This char array holds the values to which each
+        /// letter is mapped. This implementation contains a default map for <see cref="US_ENGLISH"/>.
+        /// <para/>
+        /// If the mapping contains an instance of <see cref="SILENT_MARKER"/> then H and W are not given special treatment.
+        /// </summary>
+        /// <param name="mapping"> Mapping array to use when finding the corresponding code for a given character.</param>
+        public Soundex(char[] mapping)
+        {
+            this.soundexMapping = new char[mapping.Length];
+            System.Array.Copy(mapping, 0, this.soundexMapping, 0, mapping.Length);
+            this.specialCaseHW = !HasMarker(this.soundexMapping);
+        }
+
+        private bool HasMarker(char[] mapping)
+        {
+            foreach (char ch in mapping)
+            {
+                if (ch == SILENT_MARKER)
+                {
+                    return true;
+                }
+            }
+            return false;
+        }
+
+        /// <summary>
+        /// Creates a refined soundex instance using a custom mapping. This constructor can be used to customize the mapping,
+        /// and/or possibly provide an internationalized mapping for a non-Western character set.
+        /// <para/>
+        /// If the mapping contains an instance of <see cref="SILENT_MARKER"/> then H and W are not given special treatment.
+        /// <para/>
+        /// since 1.4
+        /// </summary>
+        /// <param name="mapping">Mapping string to use when finding the corresponding code for a given character.</param>
+        public Soundex(string mapping)
+        {
+            this.soundexMapping = mapping.ToCharArray();
+            this.specialCaseHW = !HasMarker(this.soundexMapping);
+        }
+
+        /// <summary>
+        /// Creates a refined soundex instance using a custom mapping. This constructor can be used to customize the mapping,
+        /// and/or possibly provide an internationalized mapping for a non-Western character set.
+        /// <para/>
+        /// since 1.11
+        /// </summary>
+        /// <param name="mapping">Mapping string to use when finding the corresponding code for a given character.</param>
+        /// <param name="specialCaseHW">if true, then </param>
+        public Soundex(string mapping, bool specialCaseHW)
+        {
+            this.soundexMapping = mapping.ToCharArray();
+            this.specialCaseHW = specialCaseHW;
+        }
+
+        /// <summary>
+        /// Encodes the strings and returns the number of characters in the two encoded strings that are the same. This
+        /// return value ranges from 0 through 4: 0 indicates little or no similarity, and 4 indicates strong similarity or
+        /// identical values.
+        /// <para/>
+        /// See: <a href="http://msdn.microsoft.com/library/default.asp?url=/library/en-us/tsqlref/ts_de-dz_8co5.asp"> MS
+        /// T-SQL DIFFERENCE </a>
+        /// <para/>
+        /// since 1.3
+        /// </summary>
+        /// <param name="s1">A string that will be encoded and compared.</param>
+        /// <param name="s2">A string that will be encoded and compared.</param>
+        /// <returns>The number of characters in the two encoded strings that are the same from 0 to 4.</returns>
+        /// <seealso cref="SoundexUtils.Difference(IStringEncoder, string, string)"/>
+        public virtual int Difference(string s1, string s2)
+        {
+            return SoundexUtils.Difference(this, s1, s2);
+        }
+
+        // LUCENENET specific - in .NET we don't need an object overload, since strings are sealed anyway.
+        //    /**
+        //     * Encodes an Object using the soundex algorithm. This method is provided in order to satisfy the requirements of
+        //     * the Encoder interface, and will throw an EncoderException if the supplied object is not of type java.lang.String.
+        //     *
+        //     * @param obj
+        //     *                  Object to encode
+        //     * @return An object (or type java.lang.String) containing the soundex code which corresponds to the String
+        //     *             supplied.
+        //     * @throws EncoderException
+        //     *                  if the parameter supplied is not of type java.lang.String
+        //     * @throws IllegalArgumentException
+        //     *                  if a character is not mapped
+        //     */
+        //public virtual Object encode(object obj) 
+        //    {
+        //    if (!(obj is string)) {
+        //            throw new EncoderException("Parameter supplied to Soundex encode is not of type java.lang.String");
+        //        }
+        //    return soundex((string) obj);
+        //    }
+
+        /// <summary>
+        /// Encodes a string using the soundex algorithm.
+        /// </summary>
+        /// <param name="str">A string to encode.</param>
+        /// <returns>A Soundex code corresponding to the string supplied.</returns>
+        /// <exception cref="ArgumentException">If a character is not mapped.</exception>
+        public virtual string Encode(string str)
+        {
+            return GetSoundex(str);
+        }
+
+        /// <summary>
+        /// Gets or Sets the maxLength. Standard Soundex
+        /// </summary>
+        [Obsolete("This feature is not needed since the encoding size must be constant. Will be removed in 2.0.")]
+        public virtual int MaxLength
+        {
+            get { return this.maxLength; }
+            set { this.maxLength = value; }
+        }
+
+        /// <summary>
+        ///  Maps the given upper-case character to its Soundex code.
+        /// </summary>
+        /// <param name="ch">An upper-case character.</param>
+        /// <returns>A Soundex code.</returns>
+        /// <exception cref="ArgumentException">Thrown if <paramref name="ch"/> is not mapped.</exception>
+        private char Map(char ch)
+        {
+            int index = ch - 'A';
+            if (index < 0 || index >= this.soundexMapping.Length)
+            {
+                throw new ArgumentException("The character is not mapped: " + ch + " (index=" + index + ")");
+            }
+            return this.soundexMapping[index];
+        }
+
+        /// <summary>
+        /// Retrieves the Soundex code for a given string.
+        /// </summary>
+        /// <param name="str">String to encode using the Soundex algorithm.</param>
+        /// <returns>A soundex code for the string supplied.</returns>
+        /// <exception cref="ArgumentException">If a character is not mapped.</exception>
+        public virtual string GetSoundex(string str)
+        {
+            if (str == null)
+            {
+                return null;
+            }
+            str = SoundexUtils.Clean(str);
+            if (str.Length == 0)
+            {
+                return str;
+            }
+            char[] output = { '0', '0', '0', '0' };
+            int count = 0;
+            char first = str[0];
+            output[count++] = first;
+            char lastDigit = Map(first); // previous digit
+            for (int i = 1; i < str.Length && count < output.Length; i++)
+            {
+                char ch = str[i];
+                if ((this.specialCaseHW) && (ch == 'H' || ch == 'W'))
+                { // these are ignored completely
+                    continue;
+                }
+                char digit = Map(ch);
+                if (digit == SILENT_MARKER)
+                {
+                    continue;
+                }
+                if (digit != '0' && digit != lastDigit)
+                { // don't store vowels or repeats
+                    output[count++] = digit;
+                }
+                lastDigit = digit;
+            }
+            return new string(output);
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/SoundexUtils.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/SoundexUtils.cs b/src/Lucene.Net.Analysis.Phonetic/Language/SoundexUtils.cs
new file mode 100644
index 0000000..e6079c2
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/SoundexUtils.cs
@@ -0,0 +1,123 @@
+// commons-codec version compatibility level: 1.9
+using System;
+using System.Globalization;
+
+namespace Lucene.Net.Analysis.Phonetic.Language
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Utility methods for <see cref="Soundex"/> and <see cref="RefinedSoundex"/> classes.
+    /// <para/>
+    /// This class is immutable and thread-safe.
+    /// <para/>
+    /// @since 1.3
+    /// </summary>
+    internal sealed class SoundexUtils
+    {
+        /// <summary>
+        /// Cleans up the input string before Soundex processing by only returning
+        /// upper case letters.
+        /// </summary>
+        /// <param name="str">The string to clean.</param>
+        /// <returns>A clean string.</returns>
+        public static string Clean(string str)
+        {
+            if (str == null || str.Length == 0)
+            {
+                return str;
+            }
+            int len = str.Length;
+            char[] chars = new char[len];
+            int count = 0;
+            for (int i = 0; i < len; i++)
+            {
+                if (char.IsLetter(str[i]))
+                {
+                    chars[count++] = str[i];
+                }
+            }
+            if (count == len)
+            {
+                return new CultureInfo("en").TextInfo.ToUpper(str);
+            }
+            return new CultureInfo("en").TextInfo.ToUpper(new string(chars, 0, count));
+        }
+
+        /// <summary>
+        /// Encodes the Strings and returns the number of characters in the two
+        /// encoded Strings that are the same.
+        /// <list type="bullet">
+        ///     <item><description>
+        ///         For Soundex, this return value ranges from 0 through 4: 0 indicates
+        ///         little or no similarity, and 4 indicates strong similarity or identical
+        ///         values.
+        ///     </description></item>
+        ///     <item><description>For refined Soundex, the return value can be greater than 4.</description></item>
+        /// </list>
+        /// <para/>
+        /// See: <a href="http://msdn.microsoft.com/library/default.asp?url=/library/en-us/tsqlref/ts_de-dz_8co5.asp">
+        /// MS T-SQL DIFFERENCE</a>
+        /// </summary>
+        /// <param name="encoder">The encoder to use to encode the strings.</param>
+        /// <param name="s1">A string that will be encoded and compared.</param>
+        /// <param name="s2">A string that will be encoded and compared.</param>
+        /// <returns>The number of characters in the two Soundex encoded strings that are the same.</returns>
+        /// <seealso cref="DifferenceEncoded(string, string)"/>
+        public static int Difference(IStringEncoder encoder, string s1, string s2)
+        {
+            return DifferenceEncoded(encoder.Encode(s1), encoder.Encode(s2));
+        }
+
+        /// <summary>
+        /// Returns the number of characters in the two Soundex encoded strings that
+        /// are the same.
+        /// <list type="bullet">
+        ///     <item><description>
+        ///         For Soundex, this return value ranges from 0 through 4: 0 indicates
+        ///         little or no similarity, and 4 indicates strong similarity or identical
+        ///         values.
+        ///     </description></item>
+        ///     <item><description>For refined Soundex, the return value can be greater than 4.</description></item>
+        /// </list>
+        /// <para/>
+        /// See: <a href="http://msdn.microsoft.com/library/default.asp?url=/library/en-us/tsqlref/ts_de-dz_8co5.asp">
+        /// MS T-SQL DIFFERENCE</a>
+        /// </summary>
+        /// <param name="es1">An encoded string.</param>
+        /// <param name="es2">An encoded string.</param>
+        /// <returns>The number of characters in the two Soundex encoded strings that are the same.</returns>
+        public static int DifferenceEncoded(string es1, string es2)
+        {
+            if (es1 == null || es2 == null)
+            {
+                return 0;
+            }
+            int lengthToMatch = Math.Min(es1.Length, es2.Length);
+            int diff = 0;
+            for (int i = 0; i < lengthToMatch; i++)
+            {
+                if (es1[i] == es2[i])
+                {
+                    diff++;
+                }
+            }
+            return diff;
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/StringEncoder.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/StringEncoder.cs b/src/Lucene.Net.Analysis.Phonetic/Language/StringEncoder.cs
new file mode 100644
index 0000000..b4137a4
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/StringEncoder.cs
@@ -0,0 +1,35 @@
+// commons-codec version compatibility level: 1.9
+namespace Lucene.Net.Analysis.Phonetic.Language
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Defines common encoding methods for <see cref="string"/> encoders.
+    /// </summary>
+    public interface IStringEncoder
+    {
+        /// <summary>
+        /// Encodes a <see cref="string"/> and returns a <see cref="string"/>.
+        /// </summary>
+        /// <param name="source">the <see cref="string"/> to encode</param>
+        /// <returns>the encoded <see cref="string"/></returns>
+        // LUCENENET specific - EncoderException not ported, as it was only thrown on a coversion from object to string type
+        // <exception cref="EncoderException">thrown if there is an error condition during the encoding process.</exception>
+        string Encode(string source);
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/dmrules.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/dmrules.txt b/src/Lucene.Net.Analysis.Phonetic/Language/dmrules.txt
new file mode 100644
index 0000000..db8367d
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/dmrules.txt
@@ -0,0 +1,200 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Format
+// "pattern" "replacement at start of word" "replacement before a vowel" "replacement in other cases"
+
+// Vowels
+
+"a" "0" "" ""
+"e" "0" "" ""
+"i" "0" "" ""
+"o" "0" "" ""
+"u" "0" "" ""
+
+// Consonants
+
+"b" "7" "7" "7"
+"d" "3" "3" "3"
+"f" "7" "7" "7"
+"g" "5" "5" "5"
+"h" "5" "5" ""
+"k" "5" "5" "5"
+"l" "8" "8" "8"
+"m" "6" "6" "6"
+"n" "6" "6" "6"
+"p" "7" "7" "7"
+"q" "5" "5" "5"
+"r" "9" "9" "9"
+"s" "4" "4" "4"
+"t" "3" "3" "3"
+"v" "7" "7" "7"
+"w" "7" "7" "7"
+"x" "5" "54" "54"
+"y" "1" "" ""
+"z" "4" "4" "4"
+
+// Romanian t-cedilla and t-comma should be equivalent
+"ţ" "3|4" "3|4" "3|4"
+"ț" "3|4" "3|4" "3|4"
+
+// Polish characters (e-ogonek and a-ogonek): default case branch either not coded or 6
+"ę" "" "" "|6"
+"ą" "" "" "|6"
+
+// Other terms
+
+"schtsch" "2" "4" "4"
+"schtsh" "2" "4" "4"
+"schtch" "2" "4" "4"
+"shtch" "2" "4" "4"
+"shtsh" "2" "4" "4"
+"stsch" "2" "4" "4"
+"ttsch" "4" "4" "4"
+"zhdzh" "2" "4" "4"
+"shch" "2" "4" "4"
+"scht" "2" "43" "43"
+"schd" "2" "43" "43"
+"stch" "2" "4" "4"
+"strz" "2" "4" "4"
+"strs" "2" "4" "4"
+"stsh" "2" "4" "4"
+"szcz" "2" "4" "4"
+"szcs" "2" "4" "4"
+"ttch" "4" "4" "4"
+"tsch" "4" "4" "4"
+"ttsz" "4" "4" "4"
+"zdzh" "2" "4" "4"
+"zsch" "4" "4" "4"
+"chs" "5" "54" "54"
+"csz" "4" "4" "4"
+"czs" "4" "4" "4"
+"drz" "4" "4" "4"
+"drs" "4" "4" "4"
+"dsh" "4" "4" "4"
+"dsz" "4" "4" "4"
+"dzh" "4" "4" "4"
+"dzs" "4" "4" "4"
+"sch" "4" "4" "4"
+"sht" "2" "43" "43"
+"szt" "2" "43" "43"
+"shd" "2" "43" "43"
+"szd" "2" "43" "43"
+"tch" "4" "4" "4"
+"trz" "4" "4" "4"
+"trs" "4" "4" "4"
+"tsh" "4" "4" "4"
+"tts" "4" "4" "4"
+"ttz" "4" "4" "4"
+"tzs" "4" "4" "4"
+"tsz" "4" "4" "4"
+"zdz" "2" "4" "4"
+"zhd" "2" "43" "43"
+"zsh" "4" "4" "4"
+"ai" "0" "1" ""
+"aj" "0" "1" ""
+"ay" "0" "1" ""
+"au" "0" "7" ""
+"cz" "4" "4" "4"
+"cs" "4" "4" "4"
+"ds" "4" "4" "4"
+"dz" "4" "4" "4"
+"dt" "3" "3" "3"
+"ei" "0" "1" ""
+"ej" "0" "1" ""
+"ey" "0" "1" ""
+"eu" "1" "1" ""
+"fb" "7" "7" "7"
+"ia" "1" "" ""
+"ie" "1" "" ""
+"io" "1" "" ""
+"iu" "1" "" ""
+"ks" "5" "54" "54"
+"kh" "5" "5" "5"
+"mn" "66" "66" "66"
+"nm" "66" "66" "66"
+"oi" "0" "1" ""
+"oj" "0" "1" ""
+"oy" "0" "1" ""
+"pf" "7" "7" "7"
+"ph" "7" "7" "7"
+"sh" "4" "4" "4"
+"sc" "2" "4" "4"
+"st" "2" "43" "43"
+"sd" "2" "43" "43"
+"sz" "4" "4" "4"
+"th" "3" "3" "3"
+"ts" "4" "4" "4"
+"tc" "4" "4" "4"
+"tz" "4" "4" "4"
+"ui" "0" "1" ""
+"uj" "0" "1" ""
+"uy" "0" "1" ""
+"ue" "0" "1" ""
+"zd" "2" "43" "43"
+"zh" "4" "4" "4"
+"zs" "4" "4" "4"
+
+// Branching cases
+
+"c" "4|5" "4|5" "4|5"
+"ch" "4|5" "4|5" "4|5"
+"ck" "5|45" "5|45" "5|45"
+"rs" "4|94" "4|94" "4|94"
+"rz" "4|94" "4|94" "4|94"
+"j" "1|4" "|4" "|4"
+
+
+// ASCII foldings
+
+ß=s
+à=a
+á=a
+â=a
+ã=a
+ä=a
+å=a
+æ=a
+ç=c
+è=e
+é=e
+ê=e
+ë=e
+ì=i
+í=i
+î=i
+ï=i
+ð=d
+ñ=n
+ò=o
+ó=o
+ô=o
+õ=o
+ö=o
+ø=o
+ù=u
+ú=u
+û=u
+ý=y
+ý=y
+þ=b
+ÿ=y
+ć=c
+ł=l
+ś=s
+ż=z
+ź=z

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Lucene.Net.Analysis.Phonetic.csproj
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Lucene.Net.Analysis.Phonetic.csproj b/src/Lucene.Net.Analysis.Phonetic/Lucene.Net.Analysis.Phonetic.csproj
new file mode 100644
index 0000000..2a60aff
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Lucene.Net.Analysis.Phonetic.csproj
@@ -0,0 +1,225 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="14.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" />
+  <PropertyGroup>
+    <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
+    <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
+    <ProjectGuid>{DAFE3B64-616A-4A2F-90E5-1F135E8A9AF5}</ProjectGuid>
+    <OutputType>Library</OutputType>
+    <AppDesignerFolder>Properties</AppDesignerFolder>
+    <RootNamespace>Lucene.Net.Analysis.Phonetic</RootNamespace>
+    <AssemblyName>Lucene.Net.Analysis.Phonetic</AssemblyName>
+    <TargetFrameworkVersion>v4.5.1</TargetFrameworkVersion>
+    <FileAlignment>512</FileAlignment>
+  </PropertyGroup>
+  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
+    <DebugSymbols>true</DebugSymbols>
+    <DebugType>full</DebugType>
+    <Optimize>false</Optimize>
+    <OutputPath>bin\Debug\</OutputPath>
+    <DefineConstants>DEBUG;TRACE</DefineConstants>
+    <ErrorReport>prompt</ErrorReport>
+    <WarningLevel>4</WarningLevel>
+  </PropertyGroup>
+  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
+    <DebugType>pdbonly</DebugType>
+    <Optimize>true</Optimize>
+    <OutputPath>bin\Release\</OutputPath>
+    <DefineConstants>TRACE</DefineConstants>
+    <ErrorReport>prompt</ErrorReport>
+    <WarningLevel>4</WarningLevel>
+  </PropertyGroup>
+  <PropertyGroup>
+    <DefineConstants>$(DefineConstants);FEATURE_SERIALIZABLE</DefineConstants>
+  </PropertyGroup>
+  <ItemGroup>
+    <Reference Include="System" />
+    <Reference Include="System.Core" />
+    <Reference Include="System.Xml.Linq" />
+    <Reference Include="System.Data.DataSetExtensions" />
+    <Reference Include="Microsoft.CSharp" />
+    <Reference Include="System.Data" />
+    <Reference Include="System.Net.Http" />
+    <Reference Include="System.Xml" />
+  </ItemGroup>
+  <ItemGroup>
+    <Compile Include="BeiderMorseFilter.cs" />
+    <Compile Include="BeiderMorseFilterFactory.cs" />
+    <Compile Include="DoubleMetaphoneFilter.cs" />
+    <Compile Include="DoubleMetaphoneFilterFactory.cs" />
+    <Compile Include="Language\AbstractCaverphone .cs" />
+    <Compile Include="Language\Bm\BeiderMorseEncoder.cs" />
+    <Compile Include="Language\Bm\Lang.cs" />
+    <Compile Include="Language\Bm\Languages.cs" />
+    <Compile Include="Language\Bm\NameType.cs" />
+    <Compile Include="Language\Bm\PhoneticEngine.cs" />
+    <Compile Include="Language\Bm\ResourceConstants.cs" />
+    <Compile Include="Language\Bm\Rule.cs" />
+    <Compile Include="Language\Bm\RuleType.cs" />
+    <Compile Include="Language\Caverphone1.cs" />
+    <Compile Include="Language\Caverphone2.cs" />
+    <Compile Include="Language\ColognePhonetic.cs" />
+    <Compile Include="Language\DaitchMokotoffSoundex.cs" />
+    <Compile Include="Language\DoubleMetaphone.cs" />
+    <Compile Include="Language\MatchRatingApproachEncoder.cs" />
+    <Compile Include="Language\Metaphone.cs" />
+    <Compile Include="Language\Nysiis.cs" />
+    <Compile Include="Language\RefinedSoundex.cs" />
+    <Compile Include="Language\Soundex.cs" />
+    <Compile Include="Language\SoundexUtils.cs" />
+    <Compile Include="Language\StringEncoder.cs" />
+    <Compile Include="PhoneticFilter.cs" />
+    <Compile Include="PhoneticFilterFactory.cs" />
+    <Compile Include="Properties\AssemblyInfo.cs" />
+    <Compile Include="..\CommonAssemblyInfo.cs">
+      <Link>Properties\CommonAssemblyInfo.cs</Link>
+    </Compile>
+  </ItemGroup>
+  <ItemGroup>
+    <ProjectReference Include="..\Lucene.Net.Analysis.Common\Lucene.Net.Analysis.Common.csproj">
+      <Project>{4ADD0BBC-B900-4715-9526-D871DE8EEA64}</Project>
+      <Name>Lucene.Net.Analysis.Common</Name>
+    </ProjectReference>
+    <ProjectReference Include="..\Lucene.Net\Lucene.Net.csproj">
+      <Project>{5D4AD9BE-1FFB-41AB-9943-25737971BF57}</Project>
+      <Name>Lucene.Net</Name>
+    </ProjectReference>
+  </ItemGroup>
+  <ItemGroup>
+    <EmbeddedResource Include="Language\Bm\ash_approx_any.txt" />
+    <EmbeddedResource Include="Language\Bm\ash_approx_common.txt" />
+    <EmbeddedResource Include="Language\Bm\ash_approx_cyrillic.txt" />
+    <EmbeddedResource Include="Language\Bm\ash_approx_english.txt" />
+    <EmbeddedResource Include="Language\Bm\ash_approx_french.txt" />
+    <EmbeddedResource Include="Language\Bm\ash_approx_german.txt" />
+    <EmbeddedResource Include="Language\Bm\ash_approx_hebrew.txt" />
+    <EmbeddedResource Include="Language\Bm\ash_approx_hungarian.txt" />
+    <EmbeddedResource Include="Language\Bm\ash_approx_polish.txt" />
+    <EmbeddedResource Include="Language\Bm\ash_approx_romanian.txt" />
+    <EmbeddedResource Include="Language\Bm\ash_approx_russian.txt" />
+    <EmbeddedResource Include="Language\Bm\ash_approx_spanish.txt" />
+    <EmbeddedResource Include="Language\Bm\ash_exact_any.txt" />
+    <EmbeddedResource Include="Language\Bm\ash_exact_approx_common.txt" />
+    <EmbeddedResource Include="Language\Bm\ash_exact_common.txt" />
+    <EmbeddedResource Include="Language\Bm\ash_exact_cyrillic.txt" />
+    <EmbeddedResource Include="Language\Bm\ash_exact_english.txt" />
+    <EmbeddedResource Include="Language\Bm\ash_exact_french.txt" />
+    <EmbeddedResource Include="Language\Bm\ash_exact_german.txt" />
+    <EmbeddedResource Include="Language\Bm\ash_exact_hebrew.txt" />
+    <EmbeddedResource Include="Language\Bm\ash_exact_hungarian.txt" />
+    <EmbeddedResource Include="Language\Bm\ash_exact_polish.txt" />
+    <EmbeddedResource Include="Language\Bm\ash_exact_romanian.txt" />
+    <EmbeddedResource Include="Language\Bm\ash_exact_russian.txt" />
+    <EmbeddedResource Include="Language\Bm\ash_exact_spanish.txt" />
+    <EmbeddedResource Include="Language\Bm\ash_hebrew_common.txt" />
+    <EmbeddedResource Include="Language\Bm\ash_languages.txt" />
+    <EmbeddedResource Include="Language\Bm\ash_rules_any.txt" />
+    <EmbeddedResource Include="Language\Bm\ash_rules_cyrillic.txt" />
+    <EmbeddedResource Include="Language\Bm\ash_rules_english.txt" />
+    <EmbeddedResource Include="Language\Bm\ash_rules_french.txt" />
+    <EmbeddedResource Include="Language\Bm\ash_rules_german.txt" />
+    <EmbeddedResource Include="Language\Bm\ash_rules_hebrew.txt" />
+    <EmbeddedResource Include="Language\Bm\ash_rules_hungarian.txt" />
+    <EmbeddedResource Include="Language\Bm\ash_rules_polish.txt" />
+    <EmbeddedResource Include="Language\Bm\ash_rules_romanian.txt" />
+    <EmbeddedResource Include="Language\Bm\ash_rules_russian.txt" />
+    <EmbeddedResource Include="Language\Bm\ash_rules_spanish.txt" />
+    <EmbeddedResource Include="Language\Bm\gen_approx_any.txt" />
+    <EmbeddedResource Include="Language\Bm\gen_approx_arabic.txt" />
+    <EmbeddedResource Include="Language\Bm\gen_approx_common.txt" />
+    <EmbeddedResource Include="Language\Bm\gen_approx_cyrillic.txt" />
+    <EmbeddedResource Include="Language\Bm\gen_approx_czech.txt" />
+    <EmbeddedResource Include="Language\Bm\gen_approx_dutch.txt" />
+    <EmbeddedResource Include="Language\Bm\gen_approx_english.txt" />
+    <EmbeddedResource Include="Language\Bm\gen_approx_french.txt" />
+    <EmbeddedResource Include="Language\Bm\gen_approx_german.txt" />
+    <EmbeddedResource Include="Language\Bm\gen_approx_greek.txt" />
+    <EmbeddedResource Include="Language\Bm\gen_approx_greeklatin.txt" />
+    <EmbeddedResource Include="Language\Bm\gen_approx_hebrew.txt" />
+    <EmbeddedResource Include="Language\Bm\gen_approx_hungarian.txt" />
+    <EmbeddedResource Include="Language\Bm\gen_approx_italian.txt" />
+    <EmbeddedResource Include="Language\Bm\gen_approx_polish.txt" />
+    <EmbeddedResource Include="Language\Bm\gen_approx_portuguese.txt" />
+    <EmbeddedResource Include="Language\Bm\gen_approx_romanian.txt" />
+    <EmbeddedResource Include="Language\Bm\gen_approx_russian.txt" />
+    <EmbeddedResource Include="Language\Bm\gen_approx_spanish.txt" />
+    <EmbeddedResource Include="Language\Bm\gen_approx_turkish.txt" />
+    <EmbeddedResource Include="Language\Bm\gen_exact_any.txt" />
+    <EmbeddedResource Include="Language\Bm\gen_exact_approx_common.txt" />
+    <EmbeddedResource Include="Language\Bm\gen_exact_arabic.txt" />
+    <EmbeddedResource Include="Language\Bm\gen_exact_common.txt" />
+    <EmbeddedResource Include="Language\Bm\gen_exact_cyrillic.txt" />
+    <EmbeddedResource Include="Language\Bm\gen_exact_czech.txt" />
+    <EmbeddedResource Include="Language\Bm\gen_exact_dutch.txt" />
+    <EmbeddedResource Include="Language\Bm\gen_exact_english.txt" />
+    <EmbeddedResource Include="Language\Bm\gen_exact_french.txt" />
+    <EmbeddedResource Include="Language\Bm\gen_exact_german.txt" />
+    <EmbeddedResource Include="Language\Bm\gen_exact_greek.txt" />
+    <EmbeddedResource Include="Language\Bm\gen_exact_greeklatin.txt" />
+    <EmbeddedResource Include="Language\Bm\gen_exact_hebrew.txt" />
+    <EmbeddedResource Include="Language\Bm\gen_exact_hungarian.txt" />
+    <EmbeddedResource Include="Language\Bm\gen_exact_italian.txt" />
+    <EmbeddedResource Include="Language\Bm\gen_exact_polish.txt" />
+    <EmbeddedResource Include="Language\Bm\gen_exact_portuguese.txt" />
+    <EmbeddedResource Include="Language\Bm\gen_exact_romanian.txt" />
+    <EmbeddedResource Include="Language\Bm\gen_exact_russian.txt" />
+    <EmbeddedResource Include="Language\Bm\gen_exact_spanish.txt" />
+    <EmbeddedResource Include="Language\Bm\gen_exact_turkish.txt" />
+    <EmbeddedResource Include="Language\Bm\gen_hebrew_common.txt" />
+    <EmbeddedResource Include="Language\Bm\gen_languages.txt" />
+    <EmbeddedResource Include="Language\Bm\gen_rules_any.txt" />
+    <EmbeddedResource Include="Language\Bm\gen_rules_arabic.txt" />
+    <EmbeddedResource Include="Language\Bm\gen_rules_cyrillic.txt" />
+    <EmbeddedResource Include="Language\Bm\gen_rules_czech.txt" />
+    <EmbeddedResource Include="Language\Bm\gen_rules_dutch.txt" />
+    <EmbeddedResource Include="Language\Bm\gen_rules_english.txt" />
+    <EmbeddedResource Include="Language\Bm\gen_rules_french.txt" />
+    <EmbeddedResource Include="Language\Bm\gen_rules_german.txt" />
+    <EmbeddedResource Include="Language\Bm\gen_rules_greek.txt" />
+    <EmbeddedResource Include="Language\Bm\gen_rules_greeklatin.txt" />
+    <EmbeddedResource Include="Language\Bm\gen_rules_hebrew.txt" />
+    <EmbeddedResource Include="Language\Bm\gen_rules_hungarian.txt" />
+    <EmbeddedResource Include="Language\Bm\gen_rules_italian.txt" />
+    <EmbeddedResource Include="Language\Bm\gen_rules_polish.txt" />
+    <EmbeddedResource Include="Language\Bm\gen_rules_portuguese.txt" />
+    <EmbeddedResource Include="Language\Bm\gen_rules_romanian.txt" />
+    <EmbeddedResource Include="Language\Bm\gen_rules_russian.txt" />
+    <EmbeddedResource Include="Language\Bm\gen_rules_spanish.txt" />
+    <EmbeddedResource Include="Language\Bm\gen_rules_turkish.txt" />
+    <EmbeddedResource Include="Language\Bm\lang.txt" />
+    <EmbeddedResource Include="Language\Bm\sep_approx_any.txt" />
+    <EmbeddedResource Include="Language\Bm\sep_approx_common.txt" />
+    <EmbeddedResource Include="Language\Bm\sep_approx_french.txt" />
+    <EmbeddedResource Include="Language\Bm\sep_approx_hebrew.txt" />
+    <EmbeddedResource Include="Language\Bm\sep_approx_italian.txt" />
+    <EmbeddedResource Include="Language\Bm\sep_approx_portuguese.txt" />
+    <EmbeddedResource Include="Language\Bm\sep_approx_spanish.txt" />
+    <EmbeddedResource Include="Language\Bm\sep_exact_any.txt" />
+    <EmbeddedResource Include="Language\Bm\sep_exact_approx_common.txt" />
+    <EmbeddedResource Include="Language\Bm\sep_exact_common.txt" />
+    <EmbeddedResource Include="Language\Bm\sep_exact_french.txt" />
+    <EmbeddedResource Include="Language\Bm\sep_exact_hebrew.txt" />
+    <EmbeddedResource Include="Language\Bm\sep_exact_italian.txt" />
+    <EmbeddedResource Include="Language\Bm\sep_exact_portuguese.txt" />
+    <EmbeddedResource Include="Language\Bm\sep_exact_spanish.txt" />
+    <EmbeddedResource Include="Language\Bm\sep_hebrew_common.txt" />
+    <EmbeddedResource Include="Language\Bm\sep_languages.txt" />
+    <EmbeddedResource Include="Language\Bm\sep_rules_any.txt" />
+    <EmbeddedResource Include="Language\Bm\sep_rules_french.txt" />
+    <EmbeddedResource Include="Language\Bm\sep_rules_hebrew.txt" />
+    <EmbeddedResource Include="Language\Bm\sep_rules_italian.txt" />
+    <EmbeddedResource Include="Language\Bm\sep_rules_portuguese.txt" />
+    <EmbeddedResource Include="Language\Bm\sep_rules_spanish.txt" />
+  </ItemGroup>
+  <ItemGroup>
+    <EmbeddedResource Include="Language\dmrules.txt" />
+  </ItemGroup>
+  <Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
+  <!-- To modify your build process, add your task inside one of the targets below and uncomment it. 
+       Other similar extension points exist, see Microsoft.Common.targets.
+  <Target Name="BeforeBuild">
+  </Target>
+  <Target Name="AfterBuild">
+  </Target>
+  -->
+</Project>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Lucene.Net.Analysis.Phonetic.project.json
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Lucene.Net.Analysis.Phonetic.project.json b/src/Lucene.Net.Analysis.Phonetic/Lucene.Net.Analysis.Phonetic.project.json
new file mode 100644
index 0000000..86d1c12
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Lucene.Net.Analysis.Phonetic.project.json
@@ -0,0 +1,8 @@
+{
+  "runtimes": {
+    "win": {}
+  },
+  "frameworks": {
+    "net451": {}
+  }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Lucene.Net.Analysis.Phonetic.xproj
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Lucene.Net.Analysis.Phonetic.xproj b/src/Lucene.Net.Analysis.Phonetic/Lucene.Net.Analysis.Phonetic.xproj
new file mode 100644
index 0000000..321b9b2
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Lucene.Net.Analysis.Phonetic.xproj
@@ -0,0 +1,19 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="14.0.25420" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <PropertyGroup>
+    <VisualStudioVersion Condition="'$(VisualStudioVersion)' == ''">14.0.25420</VisualStudioVersion>
+    <VSToolsPath Condition="'$(VSToolsPath)' == ''">$(MSBuildExtensionsPath32)\Microsoft\VisualStudio\v$(VisualStudioVersion)</VSToolsPath>
+  </PropertyGroup>
+  <Import Project="$(VSToolsPath)\DotNet\Microsoft.DotNet.Props" Condition="'$(VSToolsPath)' != ''" />
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>56b2ffb7-6870-4420-8bc7-187adf5341d9</ProjectGuid>
+    <RootNamespace>Lucene.Net.Analysis.Phonetic</RootNamespace>
+    <BaseIntermediateOutputPath Condition="'$(BaseIntermediateOutputPath)'=='' ">.\obj</BaseIntermediateOutputPath>
+    <OutputPath Condition="'$(OutputPath)'=='' ">.\bin\</OutputPath>
+  </PropertyGroup>
+
+  <PropertyGroup>
+    <SchemaVersion>2.0</SchemaVersion>
+  </PropertyGroup>
+  <Import Project="$(VSToolsPath)\DotNet\Microsoft.DotNet.targets" Condition="'$(VSToolsPath)' != ''" />
+</Project>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/PhoneticFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/PhoneticFilter.cs b/src/Lucene.Net.Analysis.Phonetic/PhoneticFilter.cs
new file mode 100644
index 0000000..c5d2886
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/PhoneticFilter.cs
@@ -0,0 +1,109 @@
+// lucene version compatibility level: 4.8.1
+using Lucene.Net.Analysis.Phonetic.Language;
+using Lucene.Net.Analysis.TokenAttributes;
+using System;
+
+namespace Lucene.Net.Analysis.Phonetic
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Create tokens for phonetic matches.
+    /// See the Language namespace.
+    /// </summary>
+    public sealed class PhoneticFilter : TokenFilter
+    {
+        /// <summary>true if encoded tokens should be added as synonyms</summary>
+        private bool inject = true;
+        /// <summary>phonetic encoder</summary>
+        private IStringEncoder encoder = null;
+        /// <summary>captured state, non-null when <c>inject=true</c> and a token is buffered</summary>
+        private State save = null;
+        private readonly ICharTermAttribute termAtt;
+        private readonly IPositionIncrementAttribute posAtt;
+
+        /// <summary>
+        /// Creates a <see cref="PhoneticFilter"/> with the specified encoder, and either
+        /// adding encoded forms as synonyms (<c>inject=true</c>) or
+        /// replacing them.
+        /// </summary>
+        public PhoneticFilter(TokenStream input, IStringEncoder encoder, bool inject)
+            : base(input)
+        {
+            this.encoder = encoder;
+            this.inject = inject;
+            this.termAtt = AddAttribute<ICharTermAttribute>();
+            this.posAtt = AddAttribute<IPositionIncrementAttribute>();
+        }
+
+        public override bool IncrementToken()
+        {
+            if (save != null)
+            {
+                // clearAttributes();  // not currently necessary
+                RestoreState(save);
+                save = null;
+                return true;
+            }
+
+            if (!m_input.IncrementToken()) return false;
+
+            // pass through zero-length terms
+            if (termAtt.Length == 0) return true;
+
+            string value = termAtt.ToString();
+            string phonetic = null;
+            try
+            {
+                string v = encoder.Encode(value);
+                if (v.Length > 0 && !value.Equals(v))
+                {
+                    phonetic = v;
+                }
+            }
+            catch (Exception) { /* ignored */ } // just use the direct text
+
+                if (phonetic == null) return true;
+
+            if (!inject)
+            {
+                // just modify this token
+                termAtt.SetEmpty().Append(phonetic);
+                return true;
+            }
+
+            // We need to return both the original and the phonetic tokens.
+            // to avoid a orig=captureState() change_to_phonetic() saved=captureState()  restoreState(orig)
+            // we return the phonetic alternative first
+
+            int origOffset = posAtt.PositionIncrement;
+            posAtt.PositionIncrement = 0;
+            save = CaptureState();
+
+            posAtt.PositionIncrement = origOffset;
+            termAtt.SetEmpty().Append(phonetic);
+            return true;
+        }
+
+        public override void Reset()
+        {
+            m_input.Reset();
+            save = null;
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/PhoneticFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/PhoneticFilterFactory.cs b/src/Lucene.Net.Analysis.Phonetic/PhoneticFilterFactory.cs
new file mode 100644
index 0000000..8af2e5f
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/PhoneticFilterFactory.cs
@@ -0,0 +1,187 @@
+// lucene version compatibility level: 4.8.1
+using Lucene.Net.Analysis.Phonetic.Language;
+using Lucene.Net.Analysis.Util;
+using Lucene.Net.Support;
+using System;
+using System.Collections.Generic;
+using System.Globalization;
+using System.Reflection;
+
+namespace Lucene.Net.Analysis.Phonetic
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Factory for <see cref="PhoneticFilter"/>.
+    /// <para/>
+    /// Create tokens based on phonetic encoders from the Language namespace.
+    /// <para/>
+    /// This takes one required argument, "encoder", and the rest are optional:
+    /// <list type="bullet">
+    ///     <item>
+    ///         <term>encoder</term>
+    ///         <description>
+    ///         required, one of "DoubleMetaphone", "Metaphone", "Soundex", "RefinedSoundex", "Caverphone" (v2.0),
+    ///         or "ColognePhonetic" (case insensitive). If encoder isn't one of these, it'll be resolved as a class name either by
+    ///         itself if it already contains a '.' or otherwise as in the same package as these others.
+    ///         </description>
+    ///     </item>
+    ///     <item>
+    ///         <term>inject</term>
+    ///         <description>
+    ///         (default=true) add tokens to the stream with the offset=0
+    ///         </description>
+    ///     </item>
+    ///     <item>
+    ///         <term>maxCodeLength</term>
+    ///         <description>
+    ///         The maximum length of the phonetic codes, as defined by the encoder. If an encoder doesn't
+    ///         support this then specifying this is an error.
+    ///         </description>
+    ///     </item>
+    /// </list>
+    /// 
+    /// <code>
+    /// &lt;fieldType name="text_phonetic" class="solr.TextField" positionIncrementGap="100"&gt;
+    ///   &lt;analyzer&gt;
+    ///     &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
+    ///     &lt;filter class="solr.PhoneticFilterFactory" encoder="DoubleMetaphone" inject="true"/&gt;
+    ///   &lt;/analyzer&gt;
+    /// &lt;/fieldType&gt;
+    /// </code>
+    /// </summary>
+    /// <seealso cref="PhoneticFilter"/>
+    public class PhoneticFilterFactory : TokenFilterFactory, IResourceLoaderAware
+    {
+        /// <summary>parameter name: either a short name or a full class name</summary>
+        public static readonly string ENCODER = "encoder";
+        /// <summary>parameter name: true if encoded tokens should be added as synonyms</summary>
+        public static readonly string INJECT = "inject"; // boolean
+                                                         /** parameter name: restricts the length of the phonetic code */
+        public static readonly string MAX_CODE_LENGTH = "maxCodeLength";
+        private static readonly string PACKAGE_CONTAINING_ENCODERS = "Lucene.Net.Analysis.Phonetic.Language.";
+
+        //Effectively constants; uppercase keys
+        private static readonly IDictionary<string, Type> registry = new Dictionary<string, Type>(6);
+
+        static PhoneticFilterFactory()
+        {
+            registry["DoubleMetaphone".ToUpperInvariant()] = typeof(DoubleMetaphone);
+            registry["Metaphone".ToUpperInvariant()] = typeof(Metaphone);
+            registry["Soundex".ToUpperInvariant()] = typeof(Soundex);
+            registry["RefinedSoundex".ToUpperInvariant()] = typeof(RefinedSoundex);
+            registry["Caverphone".ToUpperInvariant()] = typeof(Caverphone2);
+            registry["ColognePhonetic".ToUpperInvariant()] = typeof(ColognePhonetic);
+        }
+
+        internal bool inject; //accessed by the test
+        private readonly string name;
+        private readonly int? maxCodeLength;
+        private Type clazz = null;
+        private MethodInfo setMaxCodeLenMethod = null;
+
+        /// <summary>Creates a new <see cref="PhoneticFilterFactory"/>.</summary>
+        public PhoneticFilterFactory(IDictionary<string, string> args)
+                : base(args)
+        {
+            inject = GetBoolean(args, INJECT, true);
+            name = Require(args, ENCODER);
+            string v = Get(args, MAX_CODE_LENGTH);
+            if (v != null)
+            {
+                maxCodeLength = int.Parse(v, CultureInfo.InvariantCulture);
+            }
+            else
+            {
+                maxCodeLength = null;
+            }
+            if (!(args.Count == 0))
+            {
+                throw new ArgumentException("Unknown parameters: " + args);
+            }
+        }
+
+
+        public virtual void Inform(IResourceLoader loader)
+        {
+            registry.TryGetValue(name.ToUpperInvariant(), out clazz);
+            if (clazz == null)
+            {
+                clazz = ResolveEncoder(name, loader);
+            }
+
+            if (maxCodeLength != null)
+            {
+                try
+                {
+                    setMaxCodeLenMethod = clazz.GetMethod("set_MaxCodeLen");
+                }
+                catch (Exception e)
+                {
+                    throw new ArgumentException("Encoder " + name + " / " + clazz + " does not support " + MAX_CODE_LENGTH, e);
+                }
+            }
+
+            GetEncoder();//trigger initialization for potential problems to be thrown now
+        }
+
+        private Type ResolveEncoder(string name, IResourceLoader loader)
+        {
+            string lookupName = name;
+            if (name.IndexOf('.') == -1)
+            {
+                lookupName = PACKAGE_CONTAINING_ENCODERS + name;
+            }
+            try
+            {
+                return loader.NewInstance<IStringEncoder>(lookupName).GetType();
+            }
+            catch (Exception e)
+            {
+                throw new ArgumentException("Error loading encoder '" + name + "': must be full class name or one of " + Collections.ToString(registry.Keys), e);
+            }
+        }
+
+        /// <summary>Must be thread-safe.</summary>
+        protected internal virtual IStringEncoder GetEncoder()
+        {
+            // Unfortunately, Commons-Codec doesn't offer any thread-safe guarantees so we must play it safe and instantiate
+            // every time.  A simple benchmark showed this as negligible.
+            try
+            {
+                IStringEncoder encoder = (IStringEncoder)Activator.CreateInstance(clazz);
+                // Try to set the maxCodeLength
+                if (maxCodeLength != null && setMaxCodeLenMethod != null)
+                {
+                    setMaxCodeLenMethod.Invoke(encoder, new object[] { maxCodeLength });
+                }
+                return encoder;
+            }
+            catch (Exception e)
+            {
+                Exception t = (e is TargetInvocationException) ? e.InnerException : e;
+                throw new ArgumentException("Error initializing encoder: " + name + " / " + clazz, t);
+            }
+        }
+
+        public override TokenStream Create(TokenStream input)
+        {
+            return new PhoneticFilter(input, GetEncoder(), inject);
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Properties/AssemblyInfo.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Properties/AssemblyInfo.cs b/src/Lucene.Net.Analysis.Phonetic/Properties/AssemblyInfo.cs
new file mode 100644
index 0000000..b7cd03f
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Properties/AssemblyInfo.cs
@@ -0,0 +1,48 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+*/
+
+using System.Reflection;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+
+// General Information about an assembly is controlled through the following 
+// set of attributes. Change these attribute values to modify the information
+// associated with an assembly.
+[assembly: AssemblyTitle("Lucene.Net.Analysis.Phonetic")]
+[assembly: AssemblyDescription(
+    "Analyzer for indexing phonetic signatures (for sounds-alike search) " +
+    "for the Lucene.Net full-text search engine library from The Apache Software Foundation.")]
+[assembly: AssemblyConfiguration("")]
+[assembly: AssemblyDefaultAlias("Lucene.Net.Analysis.Phonetic")]
+[assembly: AssemblyCulture("")]
+
+// Setting ComVisible to false makes the types in this assembly not visible 
+// to COM components.  If you need to access a type in this assembly from 
+// COM, set the ComVisible attribute to true on that type.
+[assembly: ComVisible(false)]
+
+// The following GUID is for the ID of the typelib if this project is exposed to COM
+[assembly: Guid("dafe3b64-616a-4a2f-90e5-1f135e8a9af5")]
+
+// for testing
+[assembly: InternalsVisibleTo("Lucene.Net.Tests.Analysis.Phonetic")]
+
+// NOTE: Version information is in CommonAssemblyInfo.cs

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/project.json
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/project.json b/src/Lucene.Net.Analysis.Phonetic/project.json
new file mode 100644
index 0000000..460721b
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/project.json
@@ -0,0 +1,54 @@
+{
+  "version": "4.8.0",
+  "title": "Lucene.Net.Analysis.Phonetic",
+  "description": "Analyzer for indexing phonetic signatures (for sounds-alike search) for the Lucene.Net full-text search engine library from The Apache Software Foundation.",
+  "authors": [ "The Apache Software Foundation" ],
+  "packOptions": {
+    "projectUrl": "http://lucenenet.apache.org/",
+    "licenseUrl": "https://github.com/apache/lucenenet/blob/master/LICENSE.txt",
+    "iconUrl": "https://github.com/apache/lucenenet/blob/master/branding/logo/lucene-net-icon-128x128.png?raw=true",
+    "owners": [ "The Apache Software Foundation" ],
+    "repository": { "url": "https://github.com/apache/lucenenet" },
+    "tags": [ "lucene.net", "core", "text", "search", "information", "retrieval", "lucene", "apache", "analysis", "index", "query", "soundex", "double", "metaphone", "sounds", "like", "beider", "morse", "cologne", "caverphone", "nysiis", "match", "rating" ]
+  },
+  "buildOptions": {
+  "compile": {
+      "includeFiles": [ "../CommonAssemblyInfo.cs" ]
+    },
+    "embed": {
+      "include": [
+        "Language/Bm/ash_*.txt",
+        "Language/Bm/gen_*.txt",
+        "Language/Bm/sep_*.txt"
+      ],
+      "includeFiles": [
+        "Language/Bm/lang.txt",
+        "Language/dmrules.txt"
+      ]
+    },
+    "nowarn": [ "1591", "1573" ]
+  },
+  "dependencies": {
+    "icu.net": "54.1.1-alpha",
+    "Lucene.Net": "4.8.0",
+    "Lucene.Net.Analysis.Common": "4.8.0"
+  },
+  "frameworks": {
+    "netstandard1.5": {
+      "imports": "dnxcore50",
+      "buildOptions": {
+        "debugType": "portable",
+        "define": [ "NETSTANDARD" ]
+      },
+      "dependencies": {
+        "NETStandard.Library": "1.6.0"
+      }
+    },
+    "net451": {
+      "buildOptions": {
+        "debugType": "full",
+        "define": [ "FEATURE_SERIALIZABLE" ]
+      }
+    }
+  }
+}


[05/15] lucenenet git commit: Added Lucene.Net.Analysis.Phonetic + tests. Rather than porting over the entire commons-codec library, only the language features were ported and added to this library.

Posted by ni...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Tests.Analysis.Phonetic/Language/DoubleMetaphone2Test.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Phonetic/Language/DoubleMetaphone2Test.cs b/src/Lucene.Net.Tests.Analysis.Phonetic/Language/DoubleMetaphone2Test.cs
new file mode 100644
index 0000000..da00973
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Phonetic/Language/DoubleMetaphone2Test.cs
@@ -0,0 +1,1291 @@
+using NUnit.Framework;
+
+namespace Lucene.Net.Analysis.Phonetic.Language
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Tests <see cref="DoubleMetaphone"/>.
+    /// <para/>
+    /// The test data was extracted from Stephen Woodbridge's <a
+    /// href="http://swoodbridge.com/DoubleMetaPhone/surnames.txt">PHP test program</a>.
+    /// </summary>
+    public class DoubleMetaphone2Test : StringEncoderAbstractTest<DoubleMetaphone>
+    {
+        private static readonly int ALTERNATE_INDEX = 2;
+
+        private static readonly int PRIMARY_INDEX = 1;
+
+        /** Test values and their expected primary & alternate Double Metaphone encodings */
+        private static readonly string[][]
+        TEST_DATA = {
+        new string[] {"ALLERTON", "ALRT", "ALRT"},
+        new string[] {"Acton", "AKTN", "AKTN"},
+        new string[] {"Adams", "ATMS", "ATMS"},
+        new string[] {"Aggar", "AKR", "AKR"},
+        new string[] {"Ahl", "AL", "AL"},
+        new string[] {"Aiken", "AKN", "AKN"},
+        new string[] {"Alan", "ALN", "ALN"},
+        new string[] {"Alcock", "ALKK", "ALKK"},
+        new string[] {"Alden", "ALTN", "ALTN"},
+        new string[] {"Aldham", "ALTM", "ALTM"},
+        new string[] {"Allen", "ALN", "ALN"},
+        new string[] {"Allerton", "ALRT", "ALRT"},
+        new string[] {"Alsop", "ALSP", "ALSP"},
+        new string[] {"Alwein", "ALN", "ALN"},
+        new string[] {"Ambler", "AMPL", "AMPL"},
+        new string[] {"Andevill", "ANTF", "ANTF"},
+        new string[] {"Andrews", "ANTR", "ANTR"},
+        new string[] {"Andreyco", "ANTR", "ANTR"},
+        new string[] {"Andriesse", "ANTR", "ANTR"},
+        new string[] {"Angier", "ANJ", "ANJR"},
+        new string[] {"Annabel", "ANPL", "ANPL"},
+        new string[] {"Anne", "AN", "AN"},
+        new string[] {"Anstye", "ANST", "ANST"},
+        new string[] {"Appling", "APLN", "APLN"},
+        new string[] {"Apuke", "APK", "APK"},
+        new string[] {"Arnold", "ARNL", "ARNL"},
+        new string[] {"Ashby", "AXP", "AXP"},
+        new string[] {"Astwood", "ASTT", "ASTT"},
+        new string[] {"Atkinson", "ATKN", "ATKN"},
+        new string[] {"Audley", "ATL", "ATL"},
+        new string[] {"Austin", "ASTN", "ASTN"},
+        new string[] {"Avenal", "AFNL", "AFNL"},
+        new string[] {"Ayer", "AR", "AR"},
+        new string[] {"Ayot", "AT", "AT"},
+        new string[] {"Babbitt", "PPT", "PPT"},
+        new string[] {"Bachelor", "PXLR", "PKLR"},
+        new string[] {"Bachelour", "PXLR", "PKLR"},
+        new string[] {"Bailey", "PL", "PL"},
+        new string[] {"Baivel", "PFL", "PFL"},
+        new string[] {"Baker", "PKR", "PKR"},
+        new string[] {"Baldwin", "PLTN", "PLTN"},
+        new string[] {"Balsley", "PLSL", "PLSL"},
+        new string[] {"Barber", "PRPR", "PRPR"},
+        new string[] {"Barker", "PRKR", "PRKR"},
+        new string[] {"Barlow", "PRL", "PRLF"},
+        new string[] {"Barnard", "PRNR", "PRNR"},
+        new string[] {"Barnes", "PRNS", "PRNS"},
+        new string[] {"Barnsley", "PRNS", "PRNS"},
+        new string[] {"Barouxis", "PRKS", "PRKS"},
+        new string[] {"Bartlet", "PRTL", "PRTL"},
+        new string[] {"Basley", "PSL", "PSL"},
+        new string[] {"Basset", "PST", "PST"},
+        new string[] {"Bassett", "PST", "PST"},
+        new string[] {"Batchlor", "PXLR", "PXLR"},
+        new string[] {"Bates", "PTS", "PTS"},
+        new string[] {"Batson", "PTSN", "PTSN"},
+        new string[] {"Bayes", "PS", "PS"},
+        new string[] {"Bayley", "PL", "PL"},
+        new string[] {"Beale", "PL", "PL"},
+        new string[] {"Beauchamp", "PXMP", "PKMP"},
+        new string[] {"Beauclerc", "PKLR", "PKLR"},
+        new string[] {"Beech", "PK", "PK"},
+        new string[] {"Beers", "PRS", "PRS"},
+        new string[] {"Beke", "PK", "PK"},
+        new string[] {"Belcher", "PLXR", "PLKR"},
+        new string[] {"benign", "PNN", "PNKN"},
+        new string[] {"Benjamin", "PNJM", "PNJM"},
+        new string[] {"Benningham", "PNNK", "PNNK"},
+        new string[] {"Bereford", "PRFR", "PRFR"},
+        new string[] {"Bergen", "PRJN", "PRKN"},
+        new string[] {"Berkeley", "PRKL", "PRKL"},
+        new string[] {"Berry", "PR", "PR"},
+        new string[] {"Besse", "PS", "PS"},
+        new string[] {"Bessey", "PS", "PS"},
+        new string[] {"Bessiles", "PSLS", "PSLS"},
+        new string[] {"Bigelow", "PJL", "PKLF"},
+        new string[] {"Bigg", "PK", "PK"},
+        new string[] {"Bigod", "PKT", "PKT"},
+        new string[] {"Billings", "PLNK", "PLNK"},
+        new string[] {"Bimper", "PMPR", "PMPR"},
+        new string[] {"Binker", "PNKR", "PNKR"},
+        new string[] {"Birdsill", "PRTS", "PRTS"},
+        new string[] {"Bishop", "PXP", "PXP"},
+        new string[] {"Black", "PLK", "PLK"},
+        new string[] {"Blagge", "PLK", "PLK"},
+        new string[] {"Blake", "PLK", "PLK"},
+        new string[] {"Blanck", "PLNK", "PLNK"},
+        new string[] {"Bledsoe", "PLTS", "PLTS"},
+        new string[] {"Blennerhasset", "PLNR", "PLNR"},
+        new string[] {"Blessing", "PLSN", "PLSN"},
+        new string[] {"Blewett", "PLT", "PLT"},
+        new string[] {"Bloctgoed", "PLKT", "PLKT"},
+        new string[] {"Bloetgoet", "PLTK", "PLTK"},
+        new string[] {"Bloodgood", "PLTK", "PLTK"},
+        new string[] {"Blossom", "PLSM", "PLSM"},
+        new string[] {"Blount", "PLNT", "PLNT"},
+        new string[] {"Bodine", "PTN", "PTN"},
+        new string[] {"Bodman", "PTMN", "PTMN"},
+        new string[] {"BonCoeur", "PNKR", "PNKR"},
+        new string[] {"Bond", "PNT", "PNT"},
+        new string[] {"Boscawen", "PSKN", "PSKN"},
+        new string[] {"Bosworth", "PSR0", "PSRT"},
+        new string[] {"Bouchier", "PX", "PKR"},
+        new string[] {"Bowne", "PN", "PN"},
+        new string[] {"Bradbury", "PRTP", "PRTP"},
+        new string[] {"Bradder", "PRTR", "PRTR"},
+        new string[] {"Bradford", "PRTF", "PRTF"},
+        new string[] {"Bradstreet", "PRTS", "PRTS"},
+        new string[] {"Braham", "PRHM", "PRHM"},
+        new string[] {"Brailsford", "PRLS", "PRLS"},
+        new string[] {"Brainard", "PRNR", "PRNR"},
+        new string[] {"Brandish", "PRNT", "PRNT"},
+        new string[] {"Braun", "PRN", "PRN"},
+        new string[] {"Brecc", "PRK", "PRK"},
+        new string[] {"Brent", "PRNT", "PRNT"},
+        new string[] {"Brenton", "PRNT", "PRNT"},
+        new string[] {"Briggs", "PRKS", "PRKS"},
+        new string[] {"Brigham", "PRM", "PRM"},
+        new string[] {"Brobst", "PRPS", "PRPS"},
+        new string[] {"Brome", "PRM", "PRM"},
+        new string[] {"Bronson", "PRNS", "PRNS"},
+        new string[] {"Brooks", "PRKS", "PRKS"},
+        new string[] {"Brouillard", "PRLR", "PRLR"},
+        new string[] {"Brown", "PRN", "PRN"},
+        new string[] {"Browne", "PRN", "PRN"},
+        new string[] {"Brownell", "PRNL", "PRNL"},
+        new string[] {"Bruley", "PRL", "PRL"},
+        new string[] {"Bryant", "PRNT", "PRNT"},
+        new string[] {"Brzozowski", "PRSS", "PRTS"},
+        new string[] {"Buide", "PT", "PT"},
+        new string[] {"Bulmer", "PLMR", "PLMR"},
+        new string[] {"Bunker", "PNKR", "PNKR"},
+        new string[] {"Burden", "PRTN", "PRTN"},
+        new string[] {"Burge", "PRJ", "PRK"},
+        new string[] {"Burgoyne", "PRKN", "PRKN"},
+        new string[] {"Burke", "PRK", "PRK"},
+        new string[] {"Burnett", "PRNT", "PRNT"},
+        new string[] {"Burpee", "PRP", "PRP"},
+        new string[] {"Bursley", "PRSL", "PRSL"},
+        new string[] {"Burton", "PRTN", "PRTN"},
+        new string[] {"Bushnell", "PXNL", "PXNL"},
+        new string[] {"Buss", "PS", "PS"},
+        new string[] {"Buswell", "PSL", "PSL"},
+        new string[] {"Butler", "PTLR", "PTLR"},
+        new string[] {"Calkin", "KLKN", "KLKN"},
+        new string[] {"Canada", "KNT", "KNT"},
+        new string[] {"Canmore", "KNMR", "KNMR"},
+        new string[] {"Canney", "KN", "KN"},
+        new string[] {"Capet", "KPT", "KPT"},
+        new string[] {"Card", "KRT", "KRT"},
+        new string[] {"Carman", "KRMN", "KRMN"},
+        new string[] {"Carpenter", "KRPN", "KRPN"},
+        new string[] {"Cartwright", "KRTR", "KRTR"},
+        new string[] {"Casey", "KS", "KS"},
+        new string[] {"Catterfield", "KTRF", "KTRF"},
+        new string[] {"Ceeley", "SL", "SL"},
+        new string[] {"Chambers", "XMPR", "XMPR"},
+        new string[] {"Champion", "XMPN", "XMPN"},
+        new string[] {"Chapman", "XPMN", "XPMN"},
+        new string[] {"Chase", "XS", "XS"},
+        new string[] {"Cheney", "XN", "XN"},
+        new string[] {"Chetwynd", "XTNT", "XTNT"},
+        new string[] {"Chevalier", "XFL", "XFLR"},
+        new string[] {"Chillingsworth", "XLNK", "XLNK"},
+        new string[] {"Christie", "KRST", "KRST"},
+        new string[] {"Chubbuck", "XPK", "XPK"},
+        new string[] {"Church", "XRX", "XRK"},
+        new string[] {"Clark", "KLRK", "KLRK"},
+        new string[] {"Clarke", "KLRK", "KLRK"},
+        new string[] {"Cleare", "KLR", "KLR"},
+        new string[] {"Clement", "KLMN", "KLMN"},
+        new string[] {"Clerke", "KLRK", "KLRK"},
+        new string[] {"Clibben", "KLPN", "KLPN"},
+        new string[] {"Clifford", "KLFR", "KLFR"},
+        new string[] {"Clivedon", "KLFT", "KLFT"},
+        new string[] {"Close", "KLS", "KLS"},
+        new string[] {"Clothilde", "KL0L", "KLTL"},
+        new string[] {"Cobb", "KP", "KP"},
+        new string[] {"Coburn", "KPRN", "KPRN"},
+        new string[] {"Coburne", "KPRN", "KPRN"},
+        new string[] {"Cocke", "KK", "KK"},
+        new string[] {"Coffin", "KFN", "KFN"},
+        new string[] {"Coffyn", "KFN", "KFN"},
+        new string[] {"Colborne", "KLPR", "KLPR"},
+        new string[] {"Colby", "KLP", "KLP"},
+        new string[] {"Cole", "KL", "KL"},
+        new string[] {"Coleman", "KLMN", "KLMN"},
+        new string[] {"Collier", "KL", "KLR"},
+        new string[] {"Compton", "KMPT", "KMPT"},
+        new string[] {"Cone", "KN", "KN"},
+        new string[] {"Cook", "KK", "KK"},
+        new string[] {"Cooke", "KK", "KK"},
+        new string[] {"Cooper", "KPR", "KPR"},
+        new string[] {"Copperthwaite", "KPR0", "KPRT"},
+        new string[] {"Corbet", "KRPT", "KRPT"},
+        new string[] {"Corell", "KRL", "KRL"},
+        new string[] {"Corey", "KR", "KR"},
+        new string[] {"Corlies", "KRLS", "KRLS"},
+        new string[] {"Corneliszen", "KRNL", "KRNL"},
+        new string[] {"Cornelius", "KRNL", "KRNL"},
+        new string[] {"Cornwallis", "KRNL", "KRNL"},
+        new string[] {"Cosgrove", "KSKR", "KSKR"},
+        new string[] {"Count of Brionne", "KNTF", "KNTF"},
+        new string[] {"Covill", "KFL", "KFL"},
+        new string[] {"Cowperthwaite", "KPR0", "KPRT"},
+        new string[] {"Cowperwaite", "KPRT", "KPRT"},
+        new string[] {"Crane", "KRN", "KRN"},
+        new string[] {"Creagmile", "KRKM", "KRKM"},
+        new string[] {"Crew", "KR", "KRF"},
+        new string[] {"Crispin", "KRSP", "KRSP"},
+        new string[] {"Crocker", "KRKR", "KRKR"},
+        new string[] {"Crockett", "KRKT", "KRKT"},
+        new string[] {"Crosby", "KRSP", "KRSP"},
+        new string[] {"Crump", "KRMP", "KRMP"},
+        new string[] {"Cunningham", "KNNK", "KNNK"},
+        new string[] {"Curtis", "KRTS", "KRTS"},
+        new string[] {"Cutha", "K0", "KT"},
+        new string[] {"Cutter", "KTR", "KTR"},
+        new string[] {"D'Aubigny", "TPN", "TPKN"},
+        new string[] {"DAVIS", "TFS", "TFS"},
+        new string[] {"Dabinott", "TPNT", "TPNT"},
+        new string[] {"Dacre", "TKR", "TKR"},
+        new string[] {"Daggett", "TKT", "TKT"},
+        new string[] {"Danvers", "TNFR", "TNFR"},
+        new string[] {"Darcy", "TRS", "TRS"},
+        new string[] {"Davis", "TFS", "TFS"},
+        new string[] {"Dawn", "TN", "TN"},
+        new string[] {"Dawson", "TSN", "TSN"},
+        new string[] {"Day", "T", "T"},
+        new string[] {"Daye", "T", "T"},
+        new string[] {"DeGrenier", "TKRN", "TKRN"},
+        new string[] {"Dean", "TN", "TN"},
+        new string[] {"Deekindaugh", "TKNT", "TKNT"},
+        new string[] {"Dennis", "TNS", "TNS"},
+        new string[] {"Denny", "TN", "TN"},
+        new string[] {"Denton", "TNTN", "TNTN"},
+        new string[] {"Desborough", "TSPR", "TSPR"},
+        new string[] {"Despenser", "TSPN", "TSPN"},
+        new string[] {"Deverill", "TFRL", "TFRL"},
+        new string[] {"Devine", "TFN", "TFN"},
+        new string[] {"Dexter", "TKST", "TKST"},
+        new string[] {"Dillaway", "TL", "TL"},
+        new string[] {"Dimmick", "TMK", "TMK"},
+        new string[] {"Dinan", "TNN", "TNN"},
+        new string[] {"Dix", "TKS", "TKS"},
+        new string[] {"Doggett", "TKT", "TKT"},
+        new string[] {"Donahue", "TNH", "TNH"},
+        new string[] {"Dorfman", "TRFM", "TRFM"},
+        new string[] {"Dorris", "TRS", "TRS"},
+        new string[] {"Dow", "T", "TF"},
+        new string[] {"Downey", "TN", "TN"},
+        new string[] {"Downing", "TNNK", "TNNK"},
+        new string[] {"Dowsett", "TST", "TST"},
+        new string[] {"Duck?", "TK", "TK"},
+        new string[] {"Dudley", "TTL", "TTL"},
+        new string[] {"Duffy", "TF", "TF"},
+        new string[] {"Dunn", "TN", "TN"},
+        new string[] {"Dunsterville", "TNST", "TNST"},
+        new string[] {"Durrant", "TRNT", "TRNT"},
+        new string[] {"Durrin", "TRN", "TRN"},
+        new string[] {"Dustin", "TSTN", "TSTN"},
+        new string[] {"Duston", "TSTN", "TSTN"},
+        new string[] {"Eames", "AMS", "AMS"},
+        new string[] {"Early", "ARL", "ARL"},
+        new string[] {"Easty", "AST", "AST"},
+        new string[] {"Ebbett", "APT", "APT"},
+        new string[] {"Eberbach", "APRP", "APRP"},
+        new string[] {"Eberhard", "APRR", "APRR"},
+        new string[] {"Eddy", "AT", "AT"},
+        new string[] {"Edenden", "ATNT", "ATNT"},
+        new string[] {"Edwards", "ATRT", "ATRT"},
+        new string[] {"Eglinton", "AKLN", "ALNT"},
+        new string[] {"Eliot", "ALT", "ALT"},
+        new string[] {"Elizabeth", "ALSP", "ALSP"},
+        new string[] {"Ellis", "ALS", "ALS"},
+        new string[] {"Ellison", "ALSN", "ALSN"},
+        new string[] {"Ellot", "ALT", "ALT"},
+        new string[] {"Elny", "ALN", "ALN"},
+        new string[] {"Elsner", "ALSN", "ALSN"},
+        new string[] {"Emerson", "AMRS", "AMRS"},
+        new string[] {"Empson", "AMPS", "AMPS"},
+        new string[] {"Est", "AST", "AST"},
+        new string[] {"Estabrook", "ASTP", "ASTP"},
+        new string[] {"Estes", "ASTS", "ASTS"},
+        new string[] {"Estey", "AST", "AST"},
+        new string[] {"Evans", "AFNS", "AFNS"},
+        new string[] {"Fallowell", "FLL", "FLL"},
+        new string[] {"Farnsworth", "FRNS", "FRNS"},
+        new string[] {"Feake", "FK", "FK"},
+        new string[] {"Feke", "FK", "FK"},
+        new string[] {"Fellows", "FLS", "FLS"},
+        new string[] {"Fettiplace", "FTPL", "FTPL"},
+        new string[] {"Finney", "FN", "FN"},
+        new string[] {"Fischer", "FXR", "FSKR"},
+        new string[] {"Fisher", "FXR", "FXR"},
+        new string[] {"Fisk", "FSK", "FSK"},
+        new string[] {"Fiske", "FSK", "FSK"},
+        new string[] {"Fletcher", "FLXR", "FLXR"},
+        new string[] {"Folger", "FLKR", "FLJR"},
+        new string[] {"Foliot", "FLT", "FLT"},
+        new string[] {"Folyot", "FLT", "FLT"},
+        new string[] {"Fones", "FNS", "FNS"},
+        new string[] {"Fordham", "FRTM", "FRTM"},
+        new string[] {"Forstner", "FRST", "FRST"},
+        new string[] {"Fosten", "FSTN", "FSTN"},
+        new string[] {"Foster", "FSTR", "FSTR"},
+        new string[] {"Foulke", "FLK", "FLK"},
+        new string[] {"Fowler", "FLR", "FLR"},
+        new string[] {"Foxwell", "FKSL", "FKSL"},
+        new string[] {"Fraley", "FRL", "FRL"},
+        new string[] {"Franceys", "FRNS", "FRNS"},
+        new string[] {"Franke", "FRNK", "FRNK"},
+        new string[] {"Frascella", "FRSL", "FRSL"},
+        new string[] {"Frazer", "FRSR", "FRSR"},
+        new string[] {"Fredd", "FRT", "FRT"},
+        new string[] {"Freeman", "FRMN", "FRMN"},
+        new string[] {"French", "FRNX", "FRNK"},
+        new string[] {"Freville", "FRFL", "FRFL"},
+        new string[] {"Frey", "FR", "FR"},
+        new string[] {"Frick", "FRK", "FRK"},
+        new string[] {"Frier", "FR", "FRR"},
+        new string[] {"Froe", "FR", "FR"},
+        new string[] {"Frorer", "FRRR", "FRRR"},
+        new string[] {"Frost", "FRST", "FRST"},
+        new string[] {"Frothingham", "FR0N", "FRTN"},
+        new string[] {"Fry", "FR", "FR"},
+        new string[] {"Gaffney", "KFN", "KFN"},
+        new string[] {"Gage", "KJ", "KK"},
+        new string[] {"Gallion", "KLN", "KLN"},
+        new string[] {"Gallishan", "KLXN", "KLXN"},
+        new string[] {"Gamble", "KMPL", "KMPL"},
+        new string[] {"garage", "KRJ", "KRK"},
+        new string[] {"Garbrand", "KRPR", "KRPR"},
+        new string[] {"Gardner", "KRTN", "KRTN"},
+        new string[] {"Garrett", "KRT", "KRT"},
+        new string[] {"Gassner", "KSNR", "KSNR"},
+        new string[] {"Gater", "KTR", "KTR"},
+        new string[] {"Gaunt", "KNT", "KNT"},
+        new string[] {"Gayer", "KR", "KR"},
+        new string[] {"George", "JRJ", "KRK"},
+        new string[] {"Gerken", "KRKN", "JRKN"},
+        new string[] {"Gerritsen", "KRTS", "JRTS"},
+        new string[] {"Gibbs", "KPS", "JPS"},
+        new string[] {"Giffard", "JFRT", "KFRT"},
+        new string[] {"Gilbert", "KLPR", "JLPR"},
+        new string[] {"Gill", "KL", "JL"},
+        new string[] {"Gilman", "KLMN", "JLMN"},
+        new string[] {"Glass", "KLS", "KLS"},
+        new string[] {"Goddard\\Gifford", "KTRT", "KTRT"},
+        new string[] {"Godfrey", "KTFR", "KTFR"},
+        new string[] {"Godwin", "KTN", "KTN"},
+        new string[] {"Goodale", "KTL", "KTL"},
+        new string[] {"Goodnow", "KTN", "KTNF"},
+        new string[] {"Gorham", "KRM", "KRM"},
+        new string[] {"Goseline", "KSLN", "KSLN"},
+        new string[] {"Gott", "KT", "KT"},
+        new string[] {"Gould", "KLT", "KLT"},
+        new string[] {"Grafton", "KRFT", "KRFT"},
+        new string[] {"Grant", "KRNT", "KRNT"},
+        new string[] {"Gray", "KR", "KR"},
+        new string[] {"Green", "KRN", "KRN"},
+        new string[] {"Griffin", "KRFN", "KRFN"},
+        new string[] {"Grill", "KRL", "KRL"},
+        new string[] {"Grim", "KRM", "KRM"},
+        new string[] {"Grisgonelle", "KRSK", "KRSK"},
+        new string[] {"Gross", "KRS", "KRS"},
+        new string[] {"Guba", "KP", "KP"},
+        new string[] {"Gybbes", "KPS", "JPS"},
+        new string[] {"Haburne", "HPRN", "HPRN"},
+        new string[] {"Hackburne", "HKPR", "HKPR"},
+        new string[] {"Haddon?", "HTN", "HTN"},
+        new string[] {"Haines", "HNS", "HNS"},
+        new string[] {"Hale", "HL", "HL"},
+        new string[] {"Hall", "HL", "HL"},
+        new string[] {"Hallet", "HLT", "HLT"},
+        new string[] {"Hallock", "HLK", "HLK"},
+        new string[] {"Halstead", "HLST", "HLST"},
+        new string[] {"Hammond", "HMNT", "HMNT"},
+        new string[] {"Hance", "HNS", "HNS"},
+        new string[] {"Handy", "HNT", "HNT"},
+        new string[] {"Hanson", "HNSN", "HNSN"},
+        new string[] {"Harasek", "HRSK", "HRSK"},
+        new string[] {"Harcourt", "HRKR", "HRKR"},
+        new string[] {"Hardy", "HRT", "HRT"},
+        new string[] {"Harlock", "HRLK", "HRLK"},
+        new string[] {"Harris", "HRS", "HRS"},
+        new string[] {"Hartley", "HRTL", "HRTL"},
+        new string[] {"Harvey", "HRF", "HRF"},
+        new string[] {"Harvie", "HRF", "HRF"},
+        new string[] {"Harwood", "HRT", "HRT"},
+        new string[] {"Hathaway", "H0", "HT"},
+        new string[] {"Haukeness", "HKNS", "HKNS"},
+        new string[] {"Hawkes", "HKS", "HKS"},
+        new string[] {"Hawkhurst", "HKRS", "HKRS"},
+        new string[] {"Hawkins", "HKNS", "HKNS"},
+        new string[] {"Hawley", "HL", "HL"},
+        new string[] {"Heald", "HLT", "HLT"},
+        new string[] {"Helsdon", "HLST", "HLST"},
+        new string[] {"Hemenway", "HMN", "HMN"},
+        new string[] {"Hemmenway", "HMN", "HMN"},
+        new string[] {"Henck", "HNK", "HNK"},
+        new string[] {"Henderson", "HNTR", "HNTR"},
+        new string[] {"Hendricks", "HNTR", "HNTR"},
+        new string[] {"Hersey", "HRS", "HRS"},
+        new string[] {"Hewes", "HS", "HS"},
+        new string[] {"Heyman", "HMN", "HMN"},
+        new string[] {"Hicks", "HKS", "HKS"},
+        new string[] {"Hidden", "HTN", "HTN"},
+        new string[] {"Higgs", "HKS", "HKS"},
+        new string[] {"Hill", "HL", "HL"},
+        new string[] {"Hills", "HLS", "HLS"},
+        new string[] {"Hinckley", "HNKL", "HNKL"},
+        new string[] {"Hipwell", "HPL", "HPL"},
+        new string[] {"Hobart", "HPRT", "HPRT"},
+        new string[] {"Hoben", "HPN", "HPN"},
+        new string[] {"Hoffmann", "HFMN", "HFMN"},
+        new string[] {"Hogan", "HKN", "HKN"},
+        new string[] {"Holmes", "HLMS", "HLMS"},
+        new string[] {"Hoo", "H", "H"},
+        new string[] {"Hooker", "HKR", "HKR"},
+        new string[] {"Hopcott", "HPKT", "HPKT"},
+        new string[] {"Hopkins", "HPKN", "HPKN"},
+        new string[] {"Hopkinson", "HPKN", "HPKN"},
+        new string[] {"Hornsey", "HRNS", "HRNS"},
+        new string[] {"Houckgeest", "HKJS", "HKKS"},
+        new string[] {"Hough", "H", "H"},
+        new string[] {"Houstin", "HSTN", "HSTN"},
+        new string[] {"How", "H", "HF"},
+        new string[] {"Howe", "H", "H"},
+        new string[] {"Howland", "HLNT", "HLNT"},
+        new string[] {"Hubner", "HPNR", "HPNR"},
+        new string[] {"Hudnut", "HTNT", "HTNT"},
+        new string[] {"Hughes", "HS", "HS"},
+        new string[] {"Hull", "HL", "HL"},
+        new string[] {"Hulme", "HLM", "HLM"},
+        new string[] {"Hume", "HM", "HM"},
+        new string[] {"Hundertumark", "HNTR", "HNTR"},
+        new string[] {"Hundley", "HNTL", "HNTL"},
+        new string[] {"Hungerford", "HNKR", "HNJR"},
+        new string[] {"Hunt", "HNT", "HNT"},
+        new string[] {"Hurst", "HRST", "HRST"},
+        new string[] {"Husbands", "HSPN", "HSPN"},
+        new string[] {"Hussey", "HS", "HS"},
+        new string[] {"Husted", "HSTT", "HSTT"},
+        new string[] {"Hutchins", "HXNS", "HXNS"},
+        new string[] {"Hutchinson", "HXNS", "HXNS"},
+        new string[] {"Huttinger", "HTNK", "HTNJ"},
+        new string[] {"Huybertsen", "HPRT", "HPRT"},
+        new string[] {"Iddenden", "ATNT", "ATNT"},
+        new string[] {"Ingraham", "ANKR", "ANKR"},
+        new string[] {"Ives", "AFS", "AFS"},
+        new string[] {"Jackson", "JKSN", "AKSN"},
+        new string[] {"Jacob", "JKP", "AKP"},
+        new string[] {"Jans", "JNS", "ANS"},
+        new string[] {"Jenkins", "JNKN", "ANKN"},
+        new string[] {"Jewett", "JT", "AT"},
+        new string[] {"Jewitt", "JT", "AT"},
+        new string[] {"Johnson", "JNSN", "ANSN"},
+        new string[] {"Jones", "JNS", "ANS"},
+        new string[] {"Josephine", "JSFN", "HSFN"},
+        new string[] {"Judd", "JT", "AT"},
+        new string[] {"June", "JN", "AN"},
+        new string[] {"Kamarowska", "KMRS", "KMRS"},
+        new string[] {"Kay", "K", "K"},
+        new string[] {"Kelley", "KL", "KL"},
+        new string[] {"Kelly", "KL", "KL"},
+        new string[] {"Keymber", "KMPR", "KMPR"},
+        new string[] {"Keynes", "KNS", "KNS"},
+        new string[] {"Kilham", "KLM", "KLM"},
+        new string[] {"Kim", "KM", "KM"},
+        new string[] {"Kimball", "KMPL", "KMPL"},
+        new string[] {"King", "KNK", "KNK"},
+        new string[] {"Kinsey", "KNS", "KNS"},
+        new string[] {"Kirk", "KRK", "KRK"},
+        new string[] {"Kirton", "KRTN", "KRTN"},
+        new string[] {"Kistler", "KSTL", "KSTL"},
+        new string[] {"Kitchen", "KXN", "KXN"},
+        new string[] {"Kitson", "KTSN", "KTSN"},
+        new string[] {"Klett", "KLT", "KLT"},
+        new string[] {"Kline", "KLN", "KLN"},
+        new string[] {"Knapp", "NP", "NP"},
+        new string[] {"Knight", "NT", "NT"},
+        new string[] {"Knote", "NT", "NT"},
+        new string[] {"Knott", "NT", "NT"},
+        new string[] {"Knox", "NKS", "NKS"},
+        new string[] {"Koeller", "KLR", "KLR"},
+        new string[] {"La Pointe", "LPNT", "LPNT"},
+        new string[] {"LaPlante", "LPLN", "LPLN"},
+        new string[] {"Laimbeer", "LMPR", "LMPR"},
+        new string[] {"Lamb", "LMP", "LMP"},
+        new string[] {"Lambertson", "LMPR", "LMPR"},
+        new string[] {"Lancto", "LNKT", "LNKT"},
+        new string[] {"Landry", "LNTR", "LNTR"},
+        new string[] {"Lane", "LN", "LN"},
+        new string[] {"Langendyck", "LNJN", "LNKN"},
+        new string[] {"Langer", "LNKR", "LNJR"},
+        new string[] {"Langford", "LNKF", "LNKF"},
+        new string[] {"Lantersee", "LNTR", "LNTR"},
+        new string[] {"Laquer", "LKR", "LKR"},
+        new string[] {"Larkin", "LRKN", "LRKN"},
+        new string[] {"Latham", "LTM", "LTM"},
+        new string[] {"Lathrop", "L0RP", "LTRP"},
+        new string[] {"Lauter", "LTR", "LTR"},
+        new string[] {"Lawrence", "LRNS", "LRNS"},
+        new string[] {"Leach", "LK", "LK"},
+        new string[] {"Leager", "LKR", "LJR"},
+        new string[] {"Learned", "LRNT", "LRNT"},
+        new string[] {"Leavitt", "LFT", "LFT"},
+        new string[] {"Lee", "L", "L"},
+        new string[] {"Leete", "LT", "LT"},
+        new string[] {"Leggett", "LKT", "LKT"},
+        new string[] {"Leland", "LLNT", "LLNT"},
+        new string[] {"Leonard", "LNRT", "LNRT"},
+        new string[] {"Lester", "LSTR", "LSTR"},
+        new string[] {"Lestrange", "LSTR", "LSTR"},
+        new string[] {"Lethem", "L0M", "LTM"},
+        new string[] {"Levine", "LFN", "LFN"},
+        new string[] {"Lewes", "LS", "LS"},
+        new string[] {"Lewis", "LS", "LS"},
+        new string[] {"Lincoln", "LNKL", "LNKL"},
+        new string[] {"Lindsey", "LNTS", "LNTS"},
+        new string[] {"Linher", "LNR", "LNR"},
+        new string[] {"Lippet", "LPT", "LPT"},
+        new string[] {"Lippincott", "LPNK", "LPNK"},
+        new string[] {"Lockwood", "LKT", "LKT"},
+        new string[] {"Loines", "LNS", "LNS"},
+        new string[] {"Lombard", "LMPR", "LMPR"},
+        new string[] {"Long", "LNK", "LNK"},
+        new string[] {"Longespee", "LNJS", "LNKS"},
+        new string[] {"Look", "LK", "LK"},
+        new string[] {"Lounsberry", "LNSP", "LNSP"},
+        new string[] {"Lounsbury", "LNSP", "LNSP"},
+        new string[] {"Louthe", "L0", "LT"},
+        new string[] {"Loveyne", "LFN", "LFN"},
+        new string[] {"Lowe", "L", "L"},
+        new string[] {"Ludlam", "LTLM", "LTLM"},
+        new string[] {"Lumbard", "LMPR", "LMPR"},
+        new string[] {"Lund", "LNT", "LNT"},
+        new string[] {"Luno", "LN", "LN"},
+        new string[] {"Lutz", "LTS", "LTS"},
+        new string[] {"Lydia", "LT", "LT"},
+        new string[] {"Lynne", "LN", "LN"},
+        new string[] {"Lyon", "LN", "LN"},
+        new string[] {"MacAlpin", "MKLP", "MKLP"},
+        new string[] {"MacBricc", "MKPR", "MKPR"},
+        new string[] {"MacCrinan", "MKRN", "MKRN"},
+        new string[] {"MacKenneth", "MKN0", "MKNT"},
+        new string[] {"MacMael nam Bo", "MKML", "MKML"},
+        new string[] {"MacMurchada", "MKMR", "MKMR"},
+        new string[] {"Macomber", "MKMP", "MKMP"},
+        new string[] {"Macy", "MS", "MS"},
+        new string[] {"Magnus", "MNS", "MKNS"},
+        new string[] {"Mahien", "MHN", "MHN"},
+        new string[] {"Malmains", "MLMN", "MLMN"},
+        new string[] {"Malory", "MLR", "MLR"},
+        new string[] {"Mancinelli", "MNSN", "MNSN"},
+        new string[] {"Mancini", "MNSN", "MNSN"},
+        new string[] {"Mann", "MN", "MN"},
+        new string[] {"Manning", "MNNK", "MNNK"},
+        new string[] {"Manter", "MNTR", "MNTR"},
+        new string[] {"Marion", "MRN", "MRN"},
+        new string[] {"Marley", "MRL", "MRL"},
+        new string[] {"Marmion", "MRMN", "MRMN"},
+        new string[] {"Marquart", "MRKR", "MRKR"},
+        new string[] {"Marsh", "MRX", "MRX"},
+        new string[] {"Marshal", "MRXL", "MRXL"},
+        new string[] {"Marshall", "MRXL", "MRXL"},
+        new string[] {"Martel", "MRTL", "MRTL"},
+        new string[] {"Martha", "MR0", "MRT"},
+        new string[] {"Martin", "MRTN", "MRTN"},
+        new string[] {"Marturano", "MRTR", "MRTR"},
+        new string[] {"Marvin", "MRFN", "MRFN"},
+        new string[] {"Mary", "MR", "MR"},
+        new string[] {"Mason", "MSN", "MSN"},
+        new string[] {"Maxwell", "MKSL", "MKSL"},
+        new string[] {"Mayhew", "MH", "MHF"},
+        new string[] {"McAllaster", "MKLS", "MKLS"},
+        new string[] {"McAllister", "MKLS", "MKLS"},
+        new string[] {"McConnell", "MKNL", "MKNL"},
+        new string[] {"McFarland", "MKFR", "MKFR"},
+        new string[] {"McIlroy", "MSLR", "MSLR"},
+        new string[] {"McNair", "MKNR", "MKNR"},
+        new string[] {"McNair-Landry", "MKNR", "MKNR"},
+        new string[] {"McRaven", "MKRF", "MKRF"},
+        new string[] {"Mead", "MT", "MT"},
+        new string[] {"Meade", "MT", "MT"},
+        new string[] {"Meck", "MK", "MK"},
+        new string[] {"Melton", "MLTN", "MLTN"},
+        new string[] {"Mendenhall", "MNTN", "MNTN"},
+        new string[] {"Mering", "MRNK", "MRNK"},
+        new string[] {"Merrick", "MRK", "MRK"},
+        new string[] {"Merry", "MR", "MR"},
+        new string[] {"Mighill", "ML", "ML"},
+        new string[] {"Miller", "MLR", "MLR"},
+        new string[] {"Milton", "MLTN", "MLTN"},
+        new string[] {"Mohun", "MHN", "MHN"},
+        new string[] {"Montague", "MNTK", "MNTK"},
+        new string[] {"Montboucher", "MNTP", "MNTP"},
+        new string[] {"Moore", "MR", "MR"},
+        new string[] {"Morrel", "MRL", "MRL"},
+        new string[] {"Morrill", "MRL", "MRL"},
+        new string[] {"Morris", "MRS", "MRS"},
+        new string[] {"Morton", "MRTN", "MRTN"},
+        new string[] {"Moton", "MTN", "MTN"},
+        new string[] {"Muir", "MR", "MR"},
+        new string[] {"Mulferd", "MLFR", "MLFR"},
+        new string[] {"Mullins", "MLNS", "MLNS"},
+        new string[] {"Mulso", "MLS", "MLS"},
+        new string[] {"Munger", "MNKR", "MNJR"},
+        new string[] {"Munt", "MNT", "MNT"},
+        new string[] {"Murchad", "MRXT", "MRKT"},
+        new string[] {"Murdock", "MRTK", "MRTK"},
+        new string[] {"Murray", "MR", "MR"},
+        new string[] {"Muskett", "MSKT", "MSKT"},
+        new string[] {"Myers", "MRS", "MRS"},
+        new string[] {"Myrick", "MRK", "MRK"},
+        new string[] {"NORRIS", "NRS", "NRS"},
+        new string[] {"Nayle", "NL", "NL"},
+        new string[] {"Newcomb", "NKMP", "NKMP"},
+        new string[] {"Newcomb(e)", "NKMP", "NKMP"},
+        new string[] {"Newkirk", "NKRK", "NKRK"},
+        new string[] {"Newton", "NTN", "NTN"},
+        new string[] {"Niles", "NLS", "NLS"},
+        new string[] {"Noble", "NPL", "NPL"},
+        new string[] {"Noel", "NL", "NL"},
+        new string[] {"Northend", "NR0N", "NRTN"},
+        new string[] {"Norton", "NRTN", "NRTN"},
+        new string[] {"Nutter", "NTR", "NTR"},
+        new string[] {"Odding", "ATNK", "ATNK"},
+        new string[] {"Odenbaugh", "ATNP", "ATNP"},
+        new string[] {"Ogborn", "AKPR", "AKPR"},
+        new string[] {"Oppenheimer", "APNM", "APNM"},
+        new string[] {"Otis", "ATS", "ATS"},
+        new string[] {"Oviatt", "AFT", "AFT"},
+        new string[] {"PRUST?", "PRST", "PRST"},
+        new string[] {"Paddock", "PTK", "PTK"},
+        new string[] {"Page", "PJ", "PK"},
+        new string[] {"Paine", "PN", "PN"},
+        new string[] {"Paist", "PST", "PST"},
+        new string[] {"Palmer", "PLMR", "PLMR"},
+        new string[] {"Park", "PRK", "PRK"},
+        new string[] {"Parker", "PRKR", "PRKR"},
+        new string[] {"Parkhurst", "PRKR", "PRKR"},
+        new string[] {"Parrat", "PRT", "PRT"},
+        new string[] {"Parsons", "PRSN", "PRSN"},
+        new string[] {"Partridge", "PRTR", "PRTR"},
+        new string[] {"Pashley", "PXL", "PXL"},
+        new string[] {"Pasley", "PSL", "PSL"},
+        new string[] {"Patrick", "PTRK", "PTRK"},
+        new string[] {"Pattee", "PT", "PT"},
+        new string[] {"Patten", "PTN", "PTN"},
+        new string[] {"Pawley", "PL", "PL"},
+        new string[] {"Payne", "PN", "PN"},
+        new string[] {"Peabody", "PPT", "PPT"},
+        new string[] {"Peake", "PK", "PK"},
+        new string[] {"Pearson", "PRSN", "PRSN"},
+        new string[] {"Peat", "PT", "PT"},
+        new string[] {"Pedersen", "PTRS", "PTRS"},
+        new string[] {"Percy", "PRS", "PRS"},
+        new string[] {"Perkins", "PRKN", "PRKN"},
+        new string[] {"Perrine", "PRN", "PRN"},
+        new string[] {"Perry", "PR", "PR"},
+        new string[] {"Peson", "PSN", "PSN"},
+        new string[] {"Peterson", "PTRS", "PTRS"},
+        new string[] {"Peyton", "PTN", "PTN"},
+        new string[] {"Phinney", "FN", "FN"},
+        new string[] {"Pickard", "PKRT", "PKRT"},
+        new string[] {"Pierce", "PRS", "PRS"},
+        new string[] {"Pierrepont", "PRPN", "PRPN"},
+        new string[] {"Pike", "PK", "PK"},
+        new string[] {"Pinkham", "PNKM", "PNKM"},
+        new string[] {"Pitman", "PTMN", "PTMN"},
+        new string[] {"Pitt", "PT", "PT"},
+        new string[] {"Pitts", "PTS", "PTS"},
+        new string[] {"Plantagenet", "PLNT", "PLNT"},
+        new string[] {"Platt", "PLT", "PLT"},
+        new string[] {"Platts", "PLTS", "PLTS"},
+        new string[] {"Pleis", "PLS", "PLS"},
+        new string[] {"Pleiss", "PLS", "PLS"},
+        new string[] {"Plisko", "PLSK", "PLSK"},
+        new string[] {"Pliskovitch", "PLSK", "PLSK"},
+        new string[] {"Plum", "PLM", "PLM"},
+        new string[] {"Plume", "PLM", "PLM"},
+        new string[] {"Poitou", "PT", "PT"},
+        new string[] {"Pomeroy", "PMR", "PMR"},
+        new string[] {"Poretiers", "PRTR", "PRTR"},
+        new string[] {"Pote", "PT", "PT"},
+        new string[] {"Potter", "PTR", "PTR"},
+        new string[] {"Potts", "PTS", "PTS"},
+        new string[] {"Powell", "PL", "PL"},
+        new string[] {"Pratt", "PRT", "PRT"},
+        new string[] {"Presbury", "PRSP", "PRSP"},
+        new string[] {"Priest", "PRST", "PRST"},
+        new string[] {"Prindle", "PRNT", "PRNT"},
+        new string[] {"Prior", "PRR", "PRR"},
+        new string[] {"Profumo", "PRFM", "PRFM"},
+        new string[] {"Purdy", "PRT", "PRT"},
+        new string[] {"Purefoy", "PRF", "PRF"},
+        new string[] {"Pury", "PR", "PR"},
+        new string[] {"Quinter", "KNTR", "KNTR"},
+        new string[] {"Rachel", "RXL", "RKL"},
+        new string[] {"Rand", "RNT", "RNT"},
+        new string[] {"Rankin", "RNKN", "RNKN"},
+        new string[] {"Ravenscroft", "RFNS", "RFNS"},
+        new string[] {"Raynsford", "RNSF", "RNSF"},
+        new string[] {"Reakirt", "RKRT", "RKRT"},
+        new string[] {"Reaves", "RFS", "RFS"},
+        new string[] {"Reeves", "RFS", "RFS"},
+        new string[] {"Reichert", "RXRT", "RKRT"},
+        new string[] {"Remmele", "RML", "RML"},
+        new string[] {"Reynolds", "RNLT", "RNLT"},
+        new string[] {"Rhodes", "RTS", "RTS"},
+        new string[] {"Richards", "RXRT", "RKRT"},
+        new string[] {"Richardson", "RXRT", "RKRT"},
+        new string[] {"Ring", "RNK", "RNK"},
+        new string[] {"Roberts", "RPRT", "RPRT"},
+        new string[] {"Robertson", "RPRT", "RPRT"},
+        new string[] {"Robson", "RPSN", "RPSN"},
+        new string[] {"Rodie", "RT", "RT"},
+        new string[] {"Rody", "RT", "RT"},
+        new string[] {"Rogers", "RKRS", "RJRS"},
+        new string[] {"Ross", "RS", "RS"},
+        new string[] {"Rosslevin", "RSLF", "RSLF"},
+        new string[] {"Rowland", "RLNT", "RLNT"},
+        new string[] {"Ruehl", "RL", "RL"},
+        new string[] {"Russell", "RSL", "RSL"},
+        new string[] {"Ruth", "R0", "RT"},
+        new string[] {"Ryan", "RN", "RN"},
+        new string[] {"Rysse", "RS", "RS"},
+        new string[] {"Sadler", "STLR", "STLR"},
+        new string[] {"Salmon", "SLMN", "SLMN"},
+        new string[] {"Salter", "SLTR", "SLTR"},
+        new string[] {"Salvatore", "SLFT", "SLFT"},
+        new string[] {"Sanders", "SNTR", "SNTR"},
+        new string[] {"Sands", "SNTS", "SNTS"},
+        new string[] {"Sanford", "SNFR", "SNFR"},
+        new string[] {"Sanger", "SNKR", "SNJR"},
+        new string[] {"Sargent", "SRJN", "SRKN"},
+        new string[] {"Saunders", "SNTR", "SNTR"},
+        new string[] {"Schilling", "XLNK", "XLNK"},
+        new string[] {"Schlegel", "XLKL", "SLKL"},
+        new string[] {"Scott", "SKT", "SKT"},
+        new string[] {"Sears", "SRS", "SRS"},
+        new string[] {"Segersall", "SJRS", "SKRS"},
+        new string[] {"Senecal", "SNKL", "SNKL"},
+        new string[] {"Sergeaux", "SRJ", "SRK"},
+        new string[] {"Severance", "SFRN", "SFRN"},
+        new string[] {"Sharp", "XRP", "XRP"},
+        new string[] {"Sharpe", "XRP", "XRP"},
+        new string[] {"Sharply", "XRPL", "XRPL"},
+        new string[] {"Shatswell", "XTSL", "XTSL"},
+        new string[] {"Shattack", "XTK", "XTK"},
+        new string[] {"Shattock", "XTK", "XTK"},
+        new string[] {"Shattuck", "XTK", "XTK"},
+        new string[] {"Shaw", "X", "XF"},
+        new string[] {"Sheldon", "XLTN", "XLTN"},
+        new string[] {"Sherman", "XRMN", "XRMN"},
+        new string[] {"Shinn", "XN", "XN"},
+        new string[] {"Shirford", "XRFR", "XRFR"},
+        new string[] {"Shirley", "XRL", "XRL"},
+        new string[] {"Shively", "XFL", "XFL"},
+        new string[] {"Shoemaker", "XMKR", "XMKR"},
+        new string[] {"Short", "XRT", "XRT"},
+        new string[] {"Shotwell", "XTL", "XTL"},
+        new string[] {"Shute", "XT", "XT"},
+        new string[] {"Sibley", "SPL", "SPL"},
+        new string[] {"Silver", "SLFR", "SLFR"},
+        new string[] {"Simes", "SMS", "SMS"},
+        new string[] {"Sinken", "SNKN", "SNKN"},
+        new string[] {"Sinn", "SN", "SN"},
+        new string[] {"Skelton", "SKLT", "SKLT"},
+        new string[] {"Skiffe", "SKF", "SKF"},
+        new string[] {"Skotkonung", "SKTK", "SKTK"},
+        new string[] {"Slade", "SLT", "XLT"},
+        new string[] {"Slye", "SL", "XL"},
+        new string[] {"Smedley", "SMTL", "XMTL"},
+        new string[] {"Smith", "SM0", "XMT"},
+        new string[] {"Smythe", "SM0", "XMT"},
+        new string[] {"Snow", "SN", "XNF"},
+        new string[] {"Soole", "SL", "SL"},
+        new string[] {"Soule", "SL", "SL"},
+        new string[] {"Southworth", "S0R0", "STRT"},
+        new string[] {"Sowles", "SLS", "SLS"},
+        new string[] {"Spalding", "SPLT", "SPLT"},
+        new string[] {"Spark", "SPRK", "SPRK"},
+        new string[] {"Spencer", "SPNS", "SPNS"},
+        new string[] {"Sperry", "SPR", "SPR"},
+        new string[] {"Spofford", "SPFR", "SPFR"},
+        new string[] {"Spooner", "SPNR", "SPNR"},
+        new string[] {"Sprague", "SPRK", "SPRK"},
+        new string[] {"Springer", "SPRN", "SPRN"},
+        new string[] {"St. Clair", "STKL", "STKL"},
+        new string[] {"St. Claire", "STKL", "STKL"},
+        new string[] {"St. Leger", "STLJ", "STLK"},
+        new string[] {"St. Omer", "STMR", "STMR"},
+        new string[] {"Stafferton", "STFR", "STFR"},
+        new string[] {"Stafford", "STFR", "STFR"},
+        new string[] {"Stalham", "STLM", "STLM"},
+        new string[] {"Stanford", "STNF", "STNF"},
+        new string[] {"Stanton", "STNT", "STNT"},
+        new string[] {"Star", "STR", "STR"},
+        new string[] {"Starbuck", "STRP", "STRP"},
+        new string[] {"Starkey", "STRK", "STRK"},
+        new string[] {"Starkweather", "STRK", "STRK"},
+        new string[] {"Stearns", "STRN", "STRN"},
+        new string[] {"Stebbins", "STPN", "STPN"},
+        new string[] {"Steele", "STL", "STL"},
+        new string[] {"Stephenson", "STFN", "STFN"},
+        new string[] {"Stevens", "STFN", "STFN"},
+        new string[] {"Stoddard", "STTR", "STTR"},
+        new string[] {"Stodder", "STTR", "STTR"},
+        new string[] {"Stone", "STN", "STN"},
+        new string[] {"Storey", "STR", "STR"},
+        new string[] {"Storrada", "STRT", "STRT"},
+        new string[] {"Story", "STR", "STR"},
+        new string[] {"Stoughton", "STFT", "STFT"},
+        new string[] {"Stout", "STT", "STT"},
+        new string[] {"Stow", "ST", "STF"},
+        new string[] {"Strong", "STRN", "STRN"},
+        new string[] {"Strutt", "STRT", "STRT"},
+        new string[] {"Stryker", "STRK", "STRK"},
+        new string[] {"Stuckeley", "STKL", "STKL"},
+        new string[] {"Sturges", "STRJ", "STRK"},
+        new string[] {"Sturgess", "STRJ", "STRK"},
+        new string[] {"Sturgis", "STRJ", "STRK"},
+        new string[] {"Suevain", "SFN", "SFN"},
+        new string[] {"Sulyard", "SLRT", "SLRT"},
+        new string[] {"Sutton", "STN", "STN"},
+        new string[] {"Swain", "SN", "XN"},
+        new string[] {"Swayne", "SN", "XN"},
+        new string[] {"Swayze", "SS", "XTS"},
+        new string[] {"Swift", "SFT", "XFT"},
+        new string[] {"Taber", "TPR", "TPR"},
+        new string[] {"Talcott", "TLKT", "TLKT"},
+        new string[] {"Tarne", "TRN", "TRN"},
+        new string[] {"Tatum", "TTM", "TTM"},
+        new string[] {"Taverner", "TFRN", "TFRN"},
+        new string[] {"Taylor", "TLR", "TLR"},
+        new string[] {"Tenney", "TN", "TN"},
+        new string[] {"Thayer", "0R", "TR"},
+        new string[] {"Thember", "0MPR", "TMPR"},
+        new string[] {"Thomas", "TMS", "TMS"},
+        new string[] {"Thompson", "TMPS", "TMPS"},
+        new string[] {"Thorne", "0RN", "TRN"},
+        new string[] {"Thornycraft", "0RNK", "TRNK"},
+        new string[] {"Threlkeld", "0RLK", "TRLK"},
+        new string[] {"Throckmorton", "0RKM", "TRKM"},
+        new string[] {"Thwaits", "0TS", "TTS"},
+        new string[] {"Tibbetts", "TPTS", "TPTS"},
+        new string[] {"Tidd", "TT", "TT"},
+        new string[] {"Tierney", "TRN", "TRN"},
+        new string[] {"Tilley", "TL", "TL"},
+        new string[] {"Tillieres", "TLRS", "TLRS"},
+        new string[] {"Tilly", "TL", "TL"},
+        new string[] {"Tisdale", "TSTL", "TSTL"},
+        new string[] {"Titus", "TTS", "TTS"},
+        new string[] {"Tobey", "TP", "TP"},
+        new string[] {"Tooker", "TKR", "TKR"},
+        new string[] {"Towle", "TL", "TL"},
+        new string[] {"Towne", "TN", "TN"},
+        new string[] {"Townsend", "TNSN", "TNSN"},
+        new string[] {"Treadway", "TRT", "TRT"},
+        new string[] {"Trelawney", "TRLN", "TRLN"},
+        new string[] {"Trinder", "TRNT", "TRNT"},
+        new string[] {"Tripp", "TRP", "TRP"},
+        new string[] {"Trippe", "TRP", "TRP"},
+        new string[] {"Trott", "TRT", "TRT"},
+        new string[] {"True", "TR", "TR"},
+        new string[] {"Trussebut", "TRSP", "TRSP"},
+        new string[] {"Tucker", "TKR", "TKR"},
+        new string[] {"Turgeon", "TRJN", "TRKN"},
+        new string[] {"Turner", "TRNR", "TRNR"},
+        new string[] {"Tuttle", "TTL", "TTL"},
+        new string[] {"Tyler", "TLR", "TLR"},
+        new string[] {"Tylle", "TL", "TL"},
+        new string[] {"Tyrrel", "TRL", "TRL"},
+        new string[] {"Ua Tuathail", "AT0L", "ATTL"},
+        new string[] {"Ulrich", "ALRX", "ALRK"},
+        new string[] {"Underhill", "ANTR", "ANTR"},
+        new string[] {"Underwood", "ANTR", "ANTR"},
+        new string[] {"Unknown", "ANKN", "ANKN"},
+        new string[] {"Valentine", "FLNT", "FLNT"},
+        new string[] {"Van Egmond", "FNKM", "FNKM"},
+        new string[] {"Van der Beek", "FNTR", "FNTR"},
+        new string[] {"Vaughan", "FKN", "FKN"},
+        new string[] {"Vermenlen", "FRMN", "FRMN"},
+        new string[] {"Vincent", "FNSN", "FNSN"},
+        new string[] {"Volentine", "FLNT", "FLNT"},
+        new string[] {"Wagner", "AKNR", "FKNR"},
+        new string[] {"Waite", "AT", "FT"},
+        new string[] {"Walker", "ALKR", "FLKR"},
+        new string[] {"Walter", "ALTR", "FLTR"},
+        new string[] {"Wandell", "ANTL", "FNTL"},
+        new string[] {"Wandesford", "ANTS", "FNTS"},
+        new string[] {"Warbleton", "ARPL", "FRPL"},
+        new string[] {"Ward", "ART", "FRT"},
+        new string[] {"Warde", "ART", "FRT"},
+        new string[] {"Ware", "AR", "FR"},
+        new string[] {"Wareham", "ARHM", "FRHM"},
+        new string[] {"Warner", "ARNR", "FRNR"},
+        new string[] {"Warren", "ARN", "FRN"},
+        new string[] {"Washburne", "AXPR", "FXPR"},
+        new string[] {"Waterbury", "ATRP", "FTRP"},
+        new string[] {"Watson", "ATSN", "FTSN"},
+        new string[] {"WatsonEllithorpe", "ATSN", "FTSN"},
+        new string[] {"Watts", "ATS", "FTS"},
+        new string[] {"Wayne", "AN", "FN"},
+        new string[] {"Webb", "AP", "FP"},
+        new string[] {"Weber", "APR", "FPR"},
+        new string[] {"Webster", "APST", "FPST"},
+        new string[] {"Weed", "AT", "FT"},
+        new string[] {"Weeks", "AKS", "FKS"},
+        new string[] {"Wells", "ALS", "FLS"},
+        new string[] {"Wenzell", "ANSL", "FNTS"},
+        new string[] {"West", "AST", "FST"},
+        new string[] {"Westbury", "ASTP", "FSTP"},
+        new string[] {"Whatlocke", "ATLK", "ATLK"},
+        new string[] {"Wheeler", "ALR", "ALR"},
+        new string[] {"Whiston", "ASTN", "ASTN"},
+        new string[] {"White", "AT", "AT"},
+        new string[] {"Whitman", "ATMN", "ATMN"},
+        new string[] {"Whiton", "ATN", "ATN"},
+        new string[] {"Whitson", "ATSN", "ATSN"},
+        new string[] {"Wickes", "AKS", "FKS"},
+        new string[] {"Wilbur", "ALPR", "FLPR"},
+        new string[] {"Wilcotes", "ALKT", "FLKT"},
+        new string[] {"Wilkinson", "ALKN", "FLKN"},
+        new string[] {"Willets", "ALTS", "FLTS"},
+        new string[] {"Willett", "ALT", "FLT"},
+        new string[] {"Willey", "AL", "FL"},
+        new string[] {"Williams", "ALMS", "FLMS"},
+        new string[] {"Williston", "ALST", "FLST"},
+        new string[] {"Wilson", "ALSN", "FLSN"},
+        new string[] {"Wimes", "AMS", "FMS"},
+        new string[] {"Winch", "ANX", "FNK"},
+        new string[] {"Winegar", "ANKR", "FNKR"},
+        new string[] {"Wing", "ANK", "FNK"},
+        new string[] {"Winsley", "ANSL", "FNSL"},
+        new string[] {"Winslow", "ANSL", "FNSL"},
+        new string[] {"Winthrop", "AN0R", "FNTR"},
+        new string[] {"Wise", "AS", "FS"},
+        new string[] {"Wood", "AT", "FT"},
+        new string[] {"Woodbridge", "ATPR", "FTPR"},
+        new string[] {"Woodward", "ATRT", "FTRT"},
+        new string[] {"Wooley", "AL", "FL"},
+        new string[] {"Woolley", "AL", "FL"},
+        new string[] {"Worth", "AR0", "FRT"},
+        new string[] {"Worthen", "AR0N", "FRTN"},
+        new string[] {"Worthley", "AR0L", "FRTL"},
+        new string[] {"Wright", "RT", "RT"},
+        new string[] {"Wyer", "AR", "FR"},
+        new string[] {"Wyere", "AR", "FR"},
+        new string[] {"Wynkoop", "ANKP", "FNKP"},
+        new string[] {"Yarnall", "ARNL", "ARNL"},
+        new string[] {"Yeoman", "AMN", "AMN"},
+        new string[] {"Yorke", "ARK", "ARK"},
+        new string[] {"Young", "ANK", "ANK"},
+        new string[] {"ab Wennonwen", "APNN", "APNN"},
+        new string[] {"ap Llewellyn", "APLL", "APLL"},
+        new string[] {"ap Lorwerth", "APLR", "APLR"},
+        new string[] {"d'Angouleme", "TNKL", "TNKL"},
+        new string[] {"de Audeham", "TTHM", "TTHM"},
+        new string[] {"de Bavant", "TPFN", "TPFN"},
+        new string[] {"de Beauchamp", "TPXM", "TPKM"},
+        new string[] {"de Beaumont", "TPMN", "TPMN"},
+        new string[] {"de Bolbec", "TPLP", "TPLP"},
+        new string[] {"de Braiose", "TPRS", "TPRS"},
+        new string[] {"de Braose", "TPRS", "TPRS"},
+        new string[] {"de Briwere", "TPRR", "TPRR"},
+        new string[] {"de Cantelou", "TKNT", "TKNT"},
+        new string[] {"de Cherelton", "TXRL", "TKRL"},
+        new string[] {"de Cherleton", "TXRL", "TKRL"},
+        new string[] {"de Clare", "TKLR", "TKLR"},
+        new string[] {"de Claremont", "TKLR", "TKLR"},
+        new string[] {"de Clifford", "TKLF", "TKLF"},
+        new string[] {"de Colville", "TKLF", "TKLF"},
+        new string[] {"de Courtenay", "TKRT", "TKRT"},
+        new string[] {"de Fauconberg", "TFKN", "TFKN"},
+        new string[] {"de Forest", "TFRS", "TFRS"},
+        new string[] {"de Gai", "TK", "TK"},
+        new string[] {"de Grey", "TKR", "TKR"},
+        new string[] {"de Guernons", "TKRN", "TKRN"},
+        new string[] {"de Haia", "T", "T"},
+        new string[] {"de Harcourt", "TRKR", "TRKR"},
+        new string[] {"de Hastings", "TSTN", "TSTN"},
+        new string[] {"de Hoke", "TK", "TK"},
+        new string[] {"de Hooch", "TK", "TK"},
+        new string[] {"de Hugelville", "TJLF", "TKLF"},
+        new string[] {"de Huntingdon", "TNTN", "TNTN"},
+        new string[] {"de Insula", "TNSL", "TNSL"},
+        new string[] {"de Keynes", "TKNS", "TKNS"},
+        new string[] {"de Lacy", "TLS", "TLS"},
+        new string[] {"de Lexington", "TLKS", "TLKS"},
+        new string[] {"de Lusignan", "TLSN", "TLSK"},
+        new string[] {"de Manvers", "TMNF", "TMNF"},
+        new string[] {"de Montagu", "TMNT", "TMNT"},
+        new string[] {"de Montault", "TMNT", "TMNT"},
+        new string[] {"de Montfort", "TMNT", "TMNT"},
+        new string[] {"de Mortimer", "TMRT", "TMRT"},
+        new string[] {"de Morville", "TMRF", "TMRF"},
+        new string[] {"de Morvois", "TMRF", "TMRF"},
+        new string[] {"de Neufmarche", "TNFM", "TNFM"},
+        new string[] {"de Odingsells", "TTNK", "TTNK"},
+        new string[] {"de Odyngsells", "TTNK", "TTNK"},
+        new string[] {"de Percy", "TPRS", "TPRS"},
+        new string[] {"de Pierrepont", "TPRP", "TPRP"},
+        new string[] {"de Plessetis", "TPLS", "TPLS"},
+        new string[] {"de Porhoet", "TPRT", "TPRT"},
+        new string[] {"de Prouz", "TPRS", "TPRS"},
+        new string[] {"de Quincy", "TKNS", "TKNS"},
+        new string[] {"de Ripellis", "TRPL", "TRPL"},
+        new string[] {"de Ros", "TRS", "TRS"},
+        new string[] {"de Salisbury", "TSLS", "TSLS"},
+        new string[] {"de Sanford", "TSNF", "TSNF"},
+        new string[] {"de Somery", "TSMR", "TSMR"},
+        new string[] {"de St. Hilary", "TSTL", "TSTL"},
+        new string[] {"de St. Liz", "TSTL", "TSTL"},
+        new string[] {"de Sutton", "TSTN", "TSTN"},
+        new string[] {"de Toeni", "TTN", "TTN"},
+        new string[] {"de Tony", "TTN", "TTN"},
+        new string[] {"de Umfreville", "TMFR", "TMFR"},
+        new string[] {"de Valognes", "TFLN", "TFLK"},
+        new string[] {"de Vaux", "TF", "TF"},
+        new string[] {"de Vere", "TFR", "TFR"},
+        new string[] {"de Vermandois", "TFRM", "TFRM"},
+        new string[] {"de Vernon", "TFRN", "TFRN"},
+        new string[] {"de Vexin", "TFKS", "TFKS"},
+        new string[] {"de Vitre", "TFTR", "TFTR"},
+        new string[] {"de Wandesford", "TNTS", "TNTS"},
+        new string[] {"de Warenne", "TRN", "TRN"},
+        new string[] {"de Westbury", "TSTP", "TSTP"},
+        new string[] {"di Saluzzo", "TSLS", "TSLT"},
+        new string[] {"fitz Alan", "FTSL", "FTSL"},
+        new string[] {"fitz Geoffrey", "FTSJ", "FTSK"},
+        new string[] {"fitz Herbert", "FTSR", "FTSR"},
+        new string[] {"fitz John", "FTSJ", "FTSJ"},
+        new string[] {"fitz Patrick", "FTSP", "FTSP"},
+        new string[] {"fitz Payn", "FTSP", "FTSP"},
+        new string[] {"fitz Piers", "FTSP", "FTSP"},
+        new string[] {"fitz Randolph", "FTSR", "FTSR"},
+        new string[] {"fitz Richard", "FTSR", "FTSR"},
+        new string[] {"fitz Robert", "FTSR", "FTSR"},
+        new string[] {"fitz Roy", "FTSR", "FTSR"},
+        new string[] {"fitz Scrob", "FTSS", "FTSS"},
+        new string[] {"fitz Walter", "FTSL", "FTSL"},
+        new string[] {"fitz Warin", "FTSR", "FTSR"},
+        new string[] {"fitz Williams", "FTSL", "FTSL"},
+        new string[] {"la Zouche", "LSX", "LSK"},
+        new string[] {"le Botiller", "LPTL", "LPTL"},
+        new string[] {"le Despenser", "LTSP", "LTSP"},
+        new string[] {"le deSpencer", "LTSP", "LTSP"},
+        new string[] {"of Allendale", "AFLN", "AFLN"},
+        new string[] {"of Angouleme", "AFNK", "AFNK"},
+        new string[] {"of Anjou", "AFNJ", "AFNJ"},
+        new string[] {"of Aquitaine", "AFKT", "AFKT"},
+        new string[] {"of Aumale", "AFML", "AFML"},
+        new string[] {"of Bavaria", "AFPF", "AFPF"},
+        new string[] {"of Boulogne", "AFPL", "AFPL"},
+        new string[] {"of Brittany", "AFPR", "AFPR"},
+        new string[] {"of Brittary", "AFPR", "AFPR"},
+        new string[] {"of Castile", "AFKS", "AFKS"},
+        new string[] {"of Chester", "AFXS", "AFKS"},
+        new string[] {"of Clermont", "AFKL", "AFKL"},
+        new string[] {"of Cologne", "AFKL", "AFKL"},
+        new string[] {"of Dinan", "AFTN", "AFTN"},
+        new string[] {"of Dunbar", "AFTN", "AFTN"},
+        new string[] {"of England", "AFNK", "AFNK"},
+        new string[] {"of Essex", "AFSK", "AFSK"},
+        new string[] {"of Falaise", "AFFL", "AFFL"},
+        new string[] {"of Flanders", "AFFL", "AFFL"},
+        new string[] {"of Galloway", "AFKL", "AFKL"},
+        new string[] {"of Germany", "AFKR", "AFJR"},
+        new string[] {"of Gloucester", "AFKL", "AFKL"},
+        new string[] {"of Heristal", "AFRS", "AFRS"},
+        new string[] {"of Hungary", "AFNK", "AFNK"},
+        new string[] {"of Huntington", "AFNT", "AFNT"},
+        new string[] {"of Kiev", "AFKF", "AFKF"},
+        new string[] {"of Kuno", "AFKN", "AFKN"},
+        new string[] {"of Landen", "AFLN", "AFLN"},
+        new string[] {"of Laon", "AFLN", "AFLN"},
+        new string[] {"of Leinster", "AFLN", "AFLN"},
+        new string[] {"of Lens", "AFLN", "AFLN"},
+        new string[] {"of Lorraine", "AFLR", "AFLR"},
+        new string[] {"of Louvain", "AFLF", "AFLF"},
+        new string[] {"of Mercia", "AFMR", "AFMR"},
+        new string[] {"of Metz", "AFMT", "AFMT"},
+        new string[] {"of Meulan", "AFML", "AFML"},
+        new string[] {"of Nass", "AFNS", "AFNS"},
+        new string[] {"of Normandy", "AFNR", "AFNR"},
+        new string[] {"of Ohningen", "AFNN", "AFNN"},
+        new string[] {"of Orleans", "AFRL", "AFRL"},
+        new string[] {"of Poitou", "AFPT", "AFPT"},
+        new string[] {"of Polotzk", "AFPL", "AFPL"},
+        new string[] {"of Provence", "AFPR", "AFPR"},
+        new string[] {"of Ringelheim", "AFRN", "AFRN"},
+        new string[] {"of Salisbury", "AFSL", "AFSL"},
+        new string[] {"of Saxony", "AFSK", "AFSK"},
+        new string[] {"of Scotland", "AFSK", "AFSK"},
+        new string[] {"of Senlis", "AFSN", "AFSN"},
+        new string[] {"of Stafford", "AFST", "AFST"},
+        new string[] {"of Swabia", "AFSP", "AFSP"},
+        new string[] {"of Tongres", "AFTN", "AFTN"},
+        new string[] {"of the Tributes", "AF0T", "AFTT"},
+        new string[] {"unknown", "ANKN", "ANKN"},
+        new string[] {"van der Gouda", "FNTR", "FNTR"},
+        new string[] {"von Adenbaugh", "FNTN", "FNTN"},
+        new string[] {"ARCHITure", "ARKT", "ARKT"},
+        new string[] {"Arnoff", "ARNF", "ARNF"},
+        new string[] {"Arnow", "ARN", "ARNF"},
+        new string[] {"DANGER", "TNJR", "TNKR"},
+        new string[] {"Jankelowicz", "JNKL", "ANKL"},
+        new string[] {"MANGER", "MNJR", "MNKR"},
+        new string[] {"McClellan", "MKLL", "MKLL"},
+        new string[] {"McHugh", "MK", "MK"},
+        new string[] {"McLaughlin", "MKLF", "MKLF"},
+        new string[] {"ORCHEStra", "ARKS", "ARKS"},
+        new string[] {"ORCHID", "ARKT", "ARKT"},
+        new string[] {"Pierce", "PRS", "PRS"},
+        new string[] {"RANGER", "RNJR", "RNKR"},
+        new string[] {"Schlesinger", "XLSN", "SLSN"},
+        new string[] {"Uomo", "AM", "AM"},
+        new string[] {"Vasserman", "FSRM", "FSRM"},
+        new string[] {"Wasserman", "ASRM", "FSRM"},
+        new string[] {"Womo", "AM", "FM"},
+        new string[] {"Yankelovich", "ANKL", "ANKL"},
+        new string[] {"accede", "AKST", "AKST"},
+        new string[] {"accident", "AKST", "AKST"},
+        new string[] {"adelsheim", "ATLS", "ATLS"},
+        new string[] {"aged", "AJT", "AKT"},
+        new string[] {"ageless", "AJLS", "AKLS"},
+        new string[] {"agency", "AJNS", "AKNS"},
+        new string[] {"aghast", "AKST", "AKST"},
+        new string[] {"agio", "AJ", "AK"},
+        new string[] {"agrimony", "AKRM", "AKRM"},
+        new string[] {"album", "ALPM", "ALPM"},
+        new string[] {"alcmene", "ALKM", "ALKM"},
+        new string[] {"alehouse", "ALHS", "ALHS"},
+        new string[] {"antique", "ANTK", "ANTK"},
+        new string[] {"artois", "ART", "ARTS"},
+        new string[] {"automation", "ATMX", "ATMX"},
+        new string[] {"bacchus", "PKS", "PKS"},
+        new string[] {"bacci", "PX", "PX"},
+        new string[] {"bajador", "PJTR", "PHTR"},
+        new string[] {"bellocchio", "PLX", "PLX"},
+        new string[] {"bertucci", "PRTX", "PRTX"},
+        new string[] {"biaggi", "PJ", "PK"},
+        new string[] {"bough", "P", "P"},
+        new string[] {"breaux", "PR", "PR"},
+        new string[] {"broughton", "PRTN", "PRTN"},
+        new string[] {"cabrillo", "KPRL", "KPR"},
+        new string[] {"caesar", "SSR", "SSR"},
+        new string[] {"cagney", "KKN", "KKN"},
+        new string[] {"campbell", "KMPL", "KMPL"},
+        new string[] {"carlisle", "KRLL", "KRLL"},
+        new string[] {"carlysle", "KRLL", "KRLL"},
+        new string[] {"chemistry", "KMST", "KMST"},
+        new string[] {"chianti", "KNT", "KNT"},
+        new string[] {"chorus", "KRS", "KRS"},
+        new string[] {"cough", "KF", "KF"},
+        new string[] {"czerny", "SRN", "XRN"},
+        new string[] {"deffenbacher", "TFNP", "TFNP"},
+        new string[] {"dumb", "TM", "TM"},
+        new string[] {"edgar", "ATKR", "ATKR"},
+        new string[] {"edge", "AJ", "AJ"},
+        new string[] {"filipowicz", "FLPT", "FLPF"},
+        new string[] {"focaccia", "FKX", "FKX"},
+        new string[] {"gallegos", "KLKS", "KKS"},
+        new string[] {"gambrelli", "KMPR", "KMPR"},
+        new string[] {"geithain", "K0N", "JTN"},
+        new string[] {"ghiradelli", "JRTL", "JRTL"},
+        new string[] {"ghislane", "JLN", "JLN"},
+        new string[] {"gough", "KF", "KF"},
+        new string[] {"hartheim", "HR0M", "HRTM"},
+        new string[] {"heimsheim", "HMSM", "HMSM"},
+        new string[] {"hochmeier", "HKMR", "HKMR"},
+        new string[] {"hugh", "H", "H"},
+        new string[] {"hunger", "HNKR", "HNJR"},
+        new string[] {"hungry", "HNKR", "HNKR"},
+        new string[] {"island", "ALNT", "ALNT"},
+        new string[] {"isle", "AL", "AL"},
+        new string[] {"jose", "HS", "HS"},
+        new string[] {"laugh", "LF", "LF"},
+        new string[] {"mac caffrey", "MKFR", "MKFR"},
+        new string[] {"mac gregor", "MKRK", "MKRK"},
+        new string[] {"pegnitz", "PNTS", "PKNT"},
+        new string[] {"piskowitz", "PSKT", "PSKF"},
+        new string[] {"queen", "KN", "KN"},
+        new string[] {"raspberry", "RSPR", "RSPR"},
+        new string[] {"resnais", "RSN", "RSNS"},
+        new string[] {"rogier", "RJ", "RJR"},
+        new string[] {"rough", "RF", "RF"},
+        new string[] {"san jacinto", "SNHS", "SNHS"},
+        new string[] {"schenker", "XNKR", "SKNK"},
+        new string[] {"schermerhorn", "XRMR", "SKRM"},
+        new string[] {"schmidt", "XMT", "SMT"},
+        new string[] {"schneider", "XNTR", "SNTR"},
+        new string[] {"school", "SKL", "SKL"},
+        new string[] {"schooner", "SKNR", "SKNR"},
+        new string[] {"schrozberg", "XRSP", "SRSP"},
+        new string[] {"schulman", "XLMN", "XLMN"},
+        new string[] {"schwabach", "XPK", "XFPK"},
+        new string[] {"schwarzach", "XRSK", "XFRT"},
+        new string[] {"smith", "SM0", "XMT"},
+        new string[] {"snider", "SNTR", "XNTR"},
+        new string[] {"succeed", "SKST", "SKST"},
+        new string[] {"sugarcane", "XKRK", "SKRK"},
+        new string[] {"svobodka", "SFPT", "SFPT"},
+        new string[] {"tagliaro", "TKLR", "TLR"},
+        new string[] {"thames", "TMS", "TMS"},
+        new string[] {"theilheim", "0LM", "TLM"},
+        new string[] {"thomas", "TMS", "TMS"},
+        new string[] {"thumb", "0M", "TM"},
+        new string[] {"tichner", "TXNR", "TKNR"},
+        new string[] {"tough", "TF", "TF"},
+        new string[] {"umbrella", "AMPR", "AMPR"},
+        new string[] {"vilshofen", "FLXF", "FLXF"},
+        new string[] {"von schuller", "FNXL", "FNXL"},
+        new string[] {"wachtler", "AKTL", "FKTL"},
+        new string[] {"wechsler", "AKSL", "FKSL"},
+        new string[] {"weikersheim", "AKRS", "FKRS"},
+        new string[] {"zhao", "J", "J"}};
+
+        private void CheckDoubleMetaphone(int typeIndex, bool alternate)
+        {
+            for (int i = 0; i < TEST_DATA.Length; i++)
+            {
+                string value = TEST_DATA[i][0];
+                Assert.AreEqual(TEST_DATA[i][typeIndex], this.StringEncoder.GetDoubleMetaphone(value, alternate), "Test [" + i + "]=" + value);
+            }
+        }
+
+        protected override DoubleMetaphone CreateStringEncoder()
+        {
+            return new DoubleMetaphone();
+        }
+
+        /**
+         * Test alternative encoding.
+         */
+        [Test]
+        public void TestDoubleMetaphoneAlternate()
+        {
+            this.CheckDoubleMetaphone(ALTERNATE_INDEX, true);
+        }
+
+        /**
+         * Test primary encoding.
+         */
+        [Test]
+        public void TestDoubleMetaphonePrimary()
+        {
+            this.CheckDoubleMetaphone(PRIMARY_INDEX, false);
+        }
+    }
+}


[15/15] lucenenet git commit: Lucene.Net.Analysis.SmartCn: Added version compatibility level to each file.

Posted by ni...@apache.org.
Lucene.Net.Analysis.SmartCn: Added version compatibility level to each file.


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/c9973565
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/c9973565
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/c9973565

Branch: refs/heads/master
Commit: c997356507f4a79b891581236f10b885967fe6d6
Parents: 1ee3a9c
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Wed Jun 28 03:23:15 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Wed Jun 28 03:23:15 2017 +0700

----------------------------------------------------------------------
 src/Lucene.Net.Analysis.SmartCn/AnalyzerProfile.cs                | 3 ++-
 src/Lucene.Net.Analysis.SmartCn/CharType.cs                       | 3 ++-
 src/Lucene.Net.Analysis.SmartCn/HHMM/AbstractDictionary.cs        | 3 ++-
 src/Lucene.Net.Analysis.SmartCn/HHMM/BiSegGraph.cs                | 3 ++-
 src/Lucene.Net.Analysis.SmartCn/HHMM/BigramDictionary.cs          | 3 ++-
 src/Lucene.Net.Analysis.SmartCn/HHMM/HHMMSegmenter.cs             | 3 ++-
 src/Lucene.Net.Analysis.SmartCn/HHMM/PathNode.cs                  | 3 ++-
 src/Lucene.Net.Analysis.SmartCn/HHMM/SegGraph.cs                  | 3 ++-
 src/Lucene.Net.Analysis.SmartCn/HHMM/SegToken.cs                  | 3 ++-
 src/Lucene.Net.Analysis.SmartCn/HHMM/SegTokenFilter.cs            | 3 ++-
 src/Lucene.Net.Analysis.SmartCn/HHMM/SegTokenPair.cs              | 3 ++-
 src/Lucene.Net.Analysis.SmartCn/HHMM/WordDictionary.cs            | 3 ++-
 src/Lucene.Net.Analysis.SmartCn/HMMChineseTokenizer.cs            | 3 ++-
 src/Lucene.Net.Analysis.SmartCn/HMMChineseTokenizerFactory.cs     | 3 ++-
 src/Lucene.Net.Analysis.SmartCn/SentenceTokenizer.cs              | 3 ++-
 src/Lucene.Net.Analysis.SmartCn/SmartChineseAnalyzer.cs           | 3 ++-
 .../SmartChineseSentenceTokenizerFactory.cs                       | 3 ++-
 .../SmartChineseWordTokenFilterFactory.cs                         | 3 ++-
 src/Lucene.Net.Analysis.SmartCn/Utility.cs                        | 3 ++-
 src/Lucene.Net.Analysis.SmartCn/WordSegmenter.cs                  | 3 ++-
 src/Lucene.Net.Analysis.SmartCn/WordTokenFilter.cs                | 3 ++-
 src/Lucene.Net.Analysis.SmartCn/WordType.cs                       | 3 ++-
 22 files changed, 44 insertions(+), 22 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c9973565/src/Lucene.Net.Analysis.SmartCn/AnalyzerProfile.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.SmartCn/AnalyzerProfile.cs b/src/Lucene.Net.Analysis.SmartCn/AnalyzerProfile.cs
index 88c6c27..0b80796 100644
--- a/src/Lucene.Net.Analysis.SmartCn/AnalyzerProfile.cs
+++ b/src/Lucene.Net.Analysis.SmartCn/AnalyzerProfile.cs
@@ -1,4 +1,5 @@
-using System;
+// lucene version compatibility level: 4.8.1
+using System;
 using System.IO;
 using System.Security;
 

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c9973565/src/Lucene.Net.Analysis.SmartCn/CharType.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.SmartCn/CharType.cs b/src/Lucene.Net.Analysis.SmartCn/CharType.cs
index 8360802..03132fb 100644
--- a/src/Lucene.Net.Analysis.SmartCn/CharType.cs
+++ b/src/Lucene.Net.Analysis.SmartCn/CharType.cs
@@ -1,4 +1,5 @@
-namespace Lucene.Net.Analysis.Cn.Smart
+// lucene version compatibility level: 4.8.1
+namespace Lucene.Net.Analysis.Cn.Smart
 {
     /*
      * Licensed to the Apache Software Foundation (ASF) under one or more

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c9973565/src/Lucene.Net.Analysis.SmartCn/HHMM/AbstractDictionary.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.SmartCn/HHMM/AbstractDictionary.cs b/src/Lucene.Net.Analysis.SmartCn/HHMM/AbstractDictionary.cs
index efac7d0..370056a 100644
--- a/src/Lucene.Net.Analysis.SmartCn/HHMM/AbstractDictionary.cs
+++ b/src/Lucene.Net.Analysis.SmartCn/HHMM/AbstractDictionary.cs
@@ -1,4 +1,5 @@
-using System;
+// lucene version compatibility level: 4.8.1
+using System;
 using System.Text;
 
 namespace Lucene.Net.Analysis.Cn.Smart.HHMM

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c9973565/src/Lucene.Net.Analysis.SmartCn/HHMM/BiSegGraph.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.SmartCn/HHMM/BiSegGraph.cs b/src/Lucene.Net.Analysis.SmartCn/HHMM/BiSegGraph.cs
index adeef2a..c32c8d5 100644
--- a/src/Lucene.Net.Analysis.SmartCn/HHMM/BiSegGraph.cs
+++ b/src/Lucene.Net.Analysis.SmartCn/HHMM/BiSegGraph.cs
@@ -1,4 +1,5 @@
-using Lucene.Net.Support;
+// lucene version compatibility level: 4.8.1
+using Lucene.Net.Support;
 using System;
 using System.Collections.Generic;
 using System.Text;

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c9973565/src/Lucene.Net.Analysis.SmartCn/HHMM/BigramDictionary.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.SmartCn/HHMM/BigramDictionary.cs b/src/Lucene.Net.Analysis.SmartCn/HHMM/BigramDictionary.cs
index cc87ceb..72e5f1f 100644
--- a/src/Lucene.Net.Analysis.SmartCn/HHMM/BigramDictionary.cs
+++ b/src/Lucene.Net.Analysis.SmartCn/HHMM/BigramDictionary.cs
@@ -1,4 +1,5 @@
-using Lucene.Net.Support;
+// lucene version compatibility level: 4.8.1
+using Lucene.Net.Support;
 using Lucene.Net.Support.IO;
 using System;
 using System.IO;

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c9973565/src/Lucene.Net.Analysis.SmartCn/HHMM/HHMMSegmenter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.SmartCn/HHMM/HHMMSegmenter.cs b/src/Lucene.Net.Analysis.SmartCn/HHMM/HHMMSegmenter.cs
index 5d6ee55..e2ef365 100644
--- a/src/Lucene.Net.Analysis.SmartCn/HHMM/HHMMSegmenter.cs
+++ b/src/Lucene.Net.Analysis.SmartCn/HHMM/HHMMSegmenter.cs
@@ -1,4 +1,5 @@
-using System.Collections.Generic;
+// lucene version compatibility level: 4.8.1
+using System.Collections.Generic;
 using System.Text;
 
 namespace Lucene.Net.Analysis.Cn.Smart.HHMM

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c9973565/src/Lucene.Net.Analysis.SmartCn/HHMM/PathNode.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.SmartCn/HHMM/PathNode.cs b/src/Lucene.Net.Analysis.SmartCn/HHMM/PathNode.cs
index 11387ad..b8de5fb 100644
--- a/src/Lucene.Net.Analysis.SmartCn/HHMM/PathNode.cs
+++ b/src/Lucene.Net.Analysis.SmartCn/HHMM/PathNode.cs
@@ -1,4 +1,5 @@
-using Lucene.Net.Support;
+// lucene version compatibility level: 4.8.1
+using Lucene.Net.Support;
 using System;
 
 namespace Lucene.Net.Analysis.Cn.Smart.HHMM

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c9973565/src/Lucene.Net.Analysis.SmartCn/HHMM/SegGraph.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.SmartCn/HHMM/SegGraph.cs b/src/Lucene.Net.Analysis.SmartCn/HHMM/SegGraph.cs
index e0138c1..f3643eb 100644
--- a/src/Lucene.Net.Analysis.SmartCn/HHMM/SegGraph.cs
+++ b/src/Lucene.Net.Analysis.SmartCn/HHMM/SegGraph.cs
@@ -1,4 +1,5 @@
-using System.Collections.Generic;
+// lucene version compatibility level: 4.8.1
+using System.Collections.Generic;
 using System.Text;
 
 namespace Lucene.Net.Analysis.Cn.Smart.HHMM

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c9973565/src/Lucene.Net.Analysis.SmartCn/HHMM/SegToken.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.SmartCn/HHMM/SegToken.cs b/src/Lucene.Net.Analysis.SmartCn/HHMM/SegToken.cs
index 48ba8ce..f557cbe 100644
--- a/src/Lucene.Net.Analysis.SmartCn/HHMM/SegToken.cs
+++ b/src/Lucene.Net.Analysis.SmartCn/HHMM/SegToken.cs
@@ -1,4 +1,5 @@
-using Lucene.Net.Support;
+// lucene version compatibility level: 4.8.1
+using Lucene.Net.Support;
 
 namespace Lucene.Net.Analysis.Cn.Smart.HHMM
 {

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c9973565/src/Lucene.Net.Analysis.SmartCn/HHMM/SegTokenFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.SmartCn/HHMM/SegTokenFilter.cs b/src/Lucene.Net.Analysis.SmartCn/HHMM/SegTokenFilter.cs
index 008460e..5b61cff 100644
--- a/src/Lucene.Net.Analysis.SmartCn/HHMM/SegTokenFilter.cs
+++ b/src/Lucene.Net.Analysis.SmartCn/HHMM/SegTokenFilter.cs
@@ -1,4 +1,5 @@
-namespace Lucene.Net.Analysis.Cn.Smart.HHMM
+// lucene version compatibility level: 4.8.1
+namespace Lucene.Net.Analysis.Cn.Smart.HHMM
 {
     /*
      * Licensed to the Apache Software Foundation (ASF) under one or more

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c9973565/src/Lucene.Net.Analysis.SmartCn/HHMM/SegTokenPair.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.SmartCn/HHMM/SegTokenPair.cs b/src/Lucene.Net.Analysis.SmartCn/HHMM/SegTokenPair.cs
index b7b697a..b5ceecd 100644
--- a/src/Lucene.Net.Analysis.SmartCn/HHMM/SegTokenPair.cs
+++ b/src/Lucene.Net.Analysis.SmartCn/HHMM/SegTokenPair.cs
@@ -1,4 +1,5 @@
-using Lucene.Net.Support;
+// lucene version compatibility level: 4.8.1
+using Lucene.Net.Support;
 
 namespace Lucene.Net.Analysis.Cn.Smart.HHMM
 {

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c9973565/src/Lucene.Net.Analysis.SmartCn/HHMM/WordDictionary.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.SmartCn/HHMM/WordDictionary.cs b/src/Lucene.Net.Analysis.SmartCn/HHMM/WordDictionary.cs
index c857380..c0cd331 100644
--- a/src/Lucene.Net.Analysis.SmartCn/HHMM/WordDictionary.cs
+++ b/src/Lucene.Net.Analysis.SmartCn/HHMM/WordDictionary.cs
@@ -1,4 +1,5 @@
-using Lucene.Net.Support;
+// lucene version compatibility level: 4.8.1
+using Lucene.Net.Support;
 using Lucene.Net.Support.IO;
 using System;
 using System.IO;

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c9973565/src/Lucene.Net.Analysis.SmartCn/HMMChineseTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.SmartCn/HMMChineseTokenizer.cs b/src/Lucene.Net.Analysis.SmartCn/HMMChineseTokenizer.cs
index 10d6de7..27ca17c 100644
--- a/src/Lucene.Net.Analysis.SmartCn/HMMChineseTokenizer.cs
+++ b/src/Lucene.Net.Analysis.SmartCn/HMMChineseTokenizer.cs
@@ -1,4 +1,5 @@
-using Lucene.Net.Analysis.Cn.Smart.HHMM;
+// lucene version compatibility level: 4.8.1
+using Lucene.Net.Analysis.Cn.Smart.HHMM;
 using Lucene.Net.Analysis.TokenAttributes;
 using Lucene.Net.Analysis.Util;
 using Lucene.Net.Support;

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c9973565/src/Lucene.Net.Analysis.SmartCn/HMMChineseTokenizerFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.SmartCn/HMMChineseTokenizerFactory.cs b/src/Lucene.Net.Analysis.SmartCn/HMMChineseTokenizerFactory.cs
index bb2e8a9..13a9215 100644
--- a/src/Lucene.Net.Analysis.SmartCn/HMMChineseTokenizerFactory.cs
+++ b/src/Lucene.Net.Analysis.SmartCn/HMMChineseTokenizerFactory.cs
@@ -1,4 +1,5 @@
-using Lucene.Net.Analysis.Util;
+// lucene version compatibility level: 4.8.1
+using Lucene.Net.Analysis.Util;
 using Lucene.Net.Util;
 using System;
 using System.Collections.Generic;

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c9973565/src/Lucene.Net.Analysis.SmartCn/SentenceTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.SmartCn/SentenceTokenizer.cs b/src/Lucene.Net.Analysis.SmartCn/SentenceTokenizer.cs
index 28e949d..08b5a31 100644
--- a/src/Lucene.Net.Analysis.SmartCn/SentenceTokenizer.cs
+++ b/src/Lucene.Net.Analysis.SmartCn/SentenceTokenizer.cs
@@ -1,4 +1,5 @@
-using Lucene.Net.Analysis.TokenAttributes;
+// lucene version compatibility level: 4.8.1
+using Lucene.Net.Analysis.TokenAttributes;
 using System;
 using System.IO;
 using System.Text;

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c9973565/src/Lucene.Net.Analysis.SmartCn/SmartChineseAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.SmartCn/SmartChineseAnalyzer.cs b/src/Lucene.Net.Analysis.SmartCn/SmartChineseAnalyzer.cs
index 97c36ee..dde51c4 100644
--- a/src/Lucene.Net.Analysis.SmartCn/SmartChineseAnalyzer.cs
+++ b/src/Lucene.Net.Analysis.SmartCn/SmartChineseAnalyzer.cs
@@ -1,4 +1,5 @@
-using Lucene.Net.Analysis.Core;
+// lucene version compatibility level: 4.8.1
+using Lucene.Net.Analysis.Core;
 using Lucene.Net.Analysis.En;
 using Lucene.Net.Analysis.Util;
 using Lucene.Net.Util;

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c9973565/src/Lucene.Net.Analysis.SmartCn/SmartChineseSentenceTokenizerFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.SmartCn/SmartChineseSentenceTokenizerFactory.cs b/src/Lucene.Net.Analysis.SmartCn/SmartChineseSentenceTokenizerFactory.cs
index 498e9fd..d2d5dfe 100644
--- a/src/Lucene.Net.Analysis.SmartCn/SmartChineseSentenceTokenizerFactory.cs
+++ b/src/Lucene.Net.Analysis.SmartCn/SmartChineseSentenceTokenizerFactory.cs
@@ -1,4 +1,5 @@
-using Lucene.Net.Analysis.Util;
+// lucene version compatibility level: 4.8.1
+using Lucene.Net.Analysis.Util;
 using Lucene.Net.Util;
 using System;
 using System.Collections.Generic;

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c9973565/src/Lucene.Net.Analysis.SmartCn/SmartChineseWordTokenFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.SmartCn/SmartChineseWordTokenFilterFactory.cs b/src/Lucene.Net.Analysis.SmartCn/SmartChineseWordTokenFilterFactory.cs
index 79b0ec5..50f4b6d 100644
--- a/src/Lucene.Net.Analysis.SmartCn/SmartChineseWordTokenFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.SmartCn/SmartChineseWordTokenFilterFactory.cs
@@ -1,4 +1,5 @@
-using Lucene.Net.Analysis.Util;
+// lucene version compatibility level: 4.8.1
+using Lucene.Net.Analysis.Util;
 using System;
 using System.Collections.Generic;
 using System.Linq;

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c9973565/src/Lucene.Net.Analysis.SmartCn/Utility.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.SmartCn/Utility.cs b/src/Lucene.Net.Analysis.SmartCn/Utility.cs
index 8160ecc..d34252f 100644
--- a/src/Lucene.Net.Analysis.SmartCn/Utility.cs
+++ b/src/Lucene.Net.Analysis.SmartCn/Utility.cs
@@ -1,4 +1,5 @@
-namespace Lucene.Net.Analysis.Cn.Smart
+// lucene version compatibility level: 4.8.1
+namespace Lucene.Net.Analysis.Cn.Smart
 {
     /*
      * Licensed to the Apache Software Foundation (ASF) under one or more

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c9973565/src/Lucene.Net.Analysis.SmartCn/WordSegmenter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.SmartCn/WordSegmenter.cs b/src/Lucene.Net.Analysis.SmartCn/WordSegmenter.cs
index fed2b0b..9917509 100644
--- a/src/Lucene.Net.Analysis.SmartCn/WordSegmenter.cs
+++ b/src/Lucene.Net.Analysis.SmartCn/WordSegmenter.cs
@@ -1,4 +1,5 @@
-using Lucene.Net.Analysis.Cn.Smart.HHMM;
+// lucene version compatibility level: 4.8.1
+using Lucene.Net.Analysis.Cn.Smart.HHMM;
 using Lucene.Net.Support;
 using System.Collections.Generic;
 

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c9973565/src/Lucene.Net.Analysis.SmartCn/WordTokenFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.SmartCn/WordTokenFilter.cs b/src/Lucene.Net.Analysis.SmartCn/WordTokenFilter.cs
index d7a419d..c8a706c 100644
--- a/src/Lucene.Net.Analysis.SmartCn/WordTokenFilter.cs
+++ b/src/Lucene.Net.Analysis.SmartCn/WordTokenFilter.cs
@@ -1,4 +1,5 @@
-using Lucene.Net.Analysis.Cn.Smart.HHMM;
+// lucene version compatibility level: 4.8.1
+using Lucene.Net.Analysis.Cn.Smart.HHMM;
 using Lucene.Net.Analysis.TokenAttributes;
 using System;
 using System.Collections.Generic;

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c9973565/src/Lucene.Net.Analysis.SmartCn/WordType.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.SmartCn/WordType.cs b/src/Lucene.Net.Analysis.SmartCn/WordType.cs
index 0eb4948..4c2ce1e 100644
--- a/src/Lucene.Net.Analysis.SmartCn/WordType.cs
+++ b/src/Lucene.Net.Analysis.SmartCn/WordType.cs
@@ -1,4 +1,5 @@
-namespace Lucene.Net.Analysis.Cn.Smart
+// lucene version compatibility level: 4.8.1
+namespace Lucene.Net.Analysis.Cn.Smart
 {
     /*
      * Licensed to the Apache Software Foundation (ASF) under one or more


[10/15] lucenenet git commit: Added Lucene.Net.Analysis.Phonetic + tests. Rather than porting over the entire commons-codec library, only the language features were ported and added to this library.

Posted by ni...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_dutch.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_dutch.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_dutch.txt
new file mode 100644
index 0000000..2a69a96
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_dutch.txt
@@ -0,0 +1,78 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// CONSONANTS
+"ssj" "" "" "S"
+"sj" "" "" "S"
+"ch" "" "" "x"
+"c" "" "[eiy]" "ts"   
+"ck" "" "" "k"     // German
+"pf" "" "" "(pf|p|f)" // German
+"ph" "" "" "(ph|f)"
+"qu" "" "" "kv"
+"th" "^" "" "t" // German
+"th" "" "[äöüaeiou]" "(t|th)" // German
+"th" "" "" "t" // German
+"ss" "" "" "s"
+"h" "[aeiouy]" "" ""
+
+// VOWELS
+"aue" "" "" "aue" 
+"ou" "" "" "au" 
+"ie" "" "" "(Q|i)" 
+"uu" "" "" "(Q|u)"   
+"ee" "" "" "e"   
+"eu" "" "" "(Y|Yj)" // Dutch Y  
+"aa" "" "" "a"   
+"oo" "" "" "o"   
+"oe" "" "" "u"   
+"ij" "" "" "ej"
+"ui" "" "" "(Y|uj)"
+"ei" "" "" "(ej|aj)" // Dutch ej
+
+"i" "" "[aou]" "j"
+"y" "" "[aeou]" "j"
+"i" "[aou]" "" "j"
+"y" "[aeou]" "" "j"
+
+// LATIN ALPHABET     
+"a" "" "" "a"
+"b" "" "" "b"
+"c" "" "" "k"
+"d" "" "" "d"
+"e" "" "" "e"
+"f" "" "" "f"
+"g" "" "" "(g|x)"
+"h" "" "" "h"
+"i" "" "" "(i|Q)"   
+"j" "" "" "j"
+"k" "" "" "k"
+"l" "" "" "l"
+"m" "" "" "m"
+"n" "" "" "n"
+"o" "" "" "o"
+"p" "" "" "p"
+"q" "" "" "k"
+"r" "" "" "r"
+"s" "" "" "s"
+"t" "" "" "t"
+"u" "" "" "(u|Q)"   
+"v" "" "" "v"
+"w" "" "" "(w|v)"
+"x" "" "" "ks"
+"y" "" "" "i"
+"z" "" "" "z"

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_english.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_english.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_english.txt
new file mode 100644
index 0000000..db9ccec
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_english.txt
@@ -0,0 +1,113 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// GENERAL
+
+// CONSONANTS
+"�" "" "" "" // O�Neill
+"'" "" "" "" // O�Neill
+"mc" "^" "" "mak" // McDonald
+"tz" "" "" "ts" // Fitzgerald
+"tch" "" "" "tS"
+"ch" "" "" "(tS|x)"
+"ck" "" "" "k"
+"cc" "" "[iey]" "ks" // success, accent
+"c" "" "c" ""
+"c" "" "[iey]" "s" // circle 
+
+"gh" "^" "" "g" // ghost
+"gh" "" "" "(g|f|w)" // burgh | tough | bough
+"gn" "" "" "(gn|n)"
+"g" "" "[iey]" "(g|dZ)" // get, gem, giant, gigabyte
+// "th" "" "" "(6|8|t)"
+"th" "" "" "t"
+"kh" "" "" "x"
+"ph" "" "" "f"
+"sch" "" "" "(S|sk)"
+"sh" "" "" "S"
+"who" "^" "" "hu"
+"wh" "^" "" "w"
+
+"h" "" "$" "" // hard to find an example that isn't in a name
+"h" "" "[^aeiou]" "" // hard to find an example that isn't in a name
+"h" "^" "" "H"
+
+"kn" "^" "" "n" // knight
+"mb" "" "$" "m"
+"ng" "" "$" "(N|ng)"
+"pn" "^" "" "(pn|n)"
+"ps" "^" "" "(ps|s)"
+"qu" "" "" "kw"
+"tia" "" "" "(So|Sa)"
+"tio" "" "" "So"
+"wr" "^" "" "r"
+"x" "^" "" "z"
+
+// VOWELS
+"y" "^" "" "j"
+"y" "^" "[aeiouy]" "j"
+"yi" "^" "" "i"
+"aue" "" "" "aue" 
+"oue" "" "" "(aue|oue)" 
+"ai" "" "" "(aj|ej|e)" // rain | said
+"ay" "" "" "(aj|ej)" 
+"a" "" "[^aeiou]e" "ej" // plane 
+"ei" "" "" "(ej|aj|i)" // weigh | receive
+"ey" "" "" "(ej|aj|i)" // hey | barley
+"ear" "" "" "ia" // tear
+"ea" "" "" "(i|e)" // reason | treasure
+"ee" "" "" "i" // between
+"e" "" "[^aeiou]e" "i" // meter
+"e" "" "$" "(|E)" // blame, badge
+"ie" "" "" "i" // believe
+"i" "" "[^aeiou]e" "aj" // five
+"oa" "" "" "ou" // toad
+"oi" "" "" "oj" // join
+"oo" "" "" "u" // food
+"ou" "" "" "(u|ou)" // through | tough | could
+"oy" "" "" "oj" // boy
+"o" "" "[^aeiou]e" "ou" // rode
+"u" "" "[^aeiou]e" "(ju|u)" // cute | flute
+"u" "" "r" "(e|u)" // turn -- Morse disagrees, feels it should go to E
+
+// LATIN ALPHABET
+"a" "" "" "(e|o|a)" // hat | call | part
+"b" "" "" "b"
+"c" "" "" "k" // candy
+"d" "" "" "d"
+"e" "" "" "E" // bed
+"f" "" "" "f"
+"g" "" "" "g" 
+"h" "" "" "h"    
+"i" "" "" "I" 
+"j" "" "" "dZ"
+"k" "" "" "k"
+"l" "" "" "l"
+"m" "" "" "m"
+"n" "" "" "n"
+"o" "" "" "(o|a)" // hot 
+"p" "" "" "p"
+"q" "" "" "k"
+"r" "" "" "r"
+"s" "" "" "s"
+"t" "" "" "t"
+"u" "" "" "(u|a)" // put
+"v" "" "" "v"
+"w" "" "" "(w|v)" // the variant "v" is for spellings coming from German/Polish
+"x" "" "" "ks"
+"y" "" "" "i"
+"z" "" "" "z"

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_french.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_french.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_french.txt
new file mode 100644
index 0000000..e67a0ec
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_french.txt
@@ -0,0 +1,114 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// GENERAL
+
+// CONSONANTS
+"lt" "u" "$" "(lt|)" // Renault
+"c" "n" "$" "(k|)" // Tronc
+//"f" "" "" "(f|)" // Clef
+"d" "" "$" "(t|)" // Durand
+"g" "n" "$" "(k|)" // Gang
+"p" "" "$" "(p|)" // Trop, Champ
+"r" "e" "$" "(r|)" // Barbier
+"t" "" "$" "(t|)" // Murat, Constant
+"z" "" "$" "(s|)" 
+
+"ds" "" "$" "(ds|)" 
+"ps" "" "$" "(ps|)" // Champs
+"rs" "e" "$" "(rs|)" 
+"ts" "" "$" "(ts|)" 
+"s" "" "$" "(s|)" // Denis
+
+"x" "u" "$" "(ks|)" // Arnoux
+
+"s" "[aeéèêiou]" "[^aeéèêiou]" "(s|)" // Deschamps, Malesherbes, Groslot
+"t" "[aeéèêiou]" "[^aeéèêiou]" "(t|)" // Petitjean
+
+"kh" "" "" "x" // foreign
+"ph" "" "" "f"
+
+"ç" "" "" "s"
+"x" "" "" "ks"
+"ch" "" "" "S"
+"c" "" "[eiyéèê]" "s"
+
+"gn" "" "" "(n|gn)"
+"g" "" "[eiy]" "Z" 
+"gue" "" "$" "k"     
+"gu" "" "[eiy]" "g" 
+"aill" "" "e" "aj" // non Jewish
+"ll" "" "e" "(l|j)" // non Jewish
+"que" "" "$" "k"
+"qu" "" "" "k"
+"s" "[aeiouyéèê]" "[aeiouyéèê]" "z"
+"h" "[bdgt]" "" "" // translit from Arabic
+
+"m" "[aeiouy]" "[aeiouy]" "m"  
+"m" "[aeiouy]" "" "(m|n)"  // nasal
+
+"ou" "" "[aeio]" "v" 
+"u" "" "[aeio]" "v" 
+
+// VOWELS
+"aue" "" "" "aue" 
+"eau" "" "" "o" 
+"au" "" "" "(o|au)" // non Jewish
+"ai" "" "" "(e|aj)" // [e] is non Jewish
+"ay" "" "" "(e|aj)" // [e] is non Jewish
+"é" "" "" "e"
+"ê" "" "" "e"
+"è" "" "" "e"
+"à" "" "" "a"
+"â" "" "" "a"
+"où" "" "" "u"
+"ou" "" "" "u"
+"oi" "" "" "(oj|va)" // [va] (actually "ua") is non Jewish
+"ei" "" "" "(aj|ej|e)" // [e] is non Jewish
+"ey" "" "" "(aj|ej|e)" // [e] non Jewish
+"eu" "" "" "(ej|Y)" // non Jewish
+"y" "[ou]" "" "j"
+"e" "" "$" "(e|)"
+"i" "" "[aou]" "j"
+"y" "" "[aoeu]" "j"
+
+// LATIN ALPHABET      
+"a" "" "" "a"
+"b" "" "" "b"
+"c" "" "" "k"
+"d" "" "" "d"
+"e" "" "" "e" 
+"f" "" "" "f"
+"g" "" "" "g"
+"h" "" "" "h"
+"i" "" "" "i" 
+"j" "" "" "Z"
+"k" "" "" "k"
+"l" "" "" "l"
+"m" "" "" "m"
+"n" "" "" "n"
+"o" "" "" "o"
+"p" "" "" "p"
+"q" "" "" "k"
+"r" "" "" "r"
+"s" "" "" "s"
+"t" "" "" "t"
+"u" "" "" "(u|Q)"
+"v" "" "" "v"
+"w" "" "" "v"
+"y" "" "" "i"
+"z" "" "" "z"

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_german.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_german.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_german.txt
new file mode 100644
index 0000000..1e79c35
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_german.txt
@@ -0,0 +1,129 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// GENERIC
+
+// CONSONANTS
+"ewitsch" "" "$" "evitS"
+"owitsch" "" "$" "ovitS"
+"evitsch" "" "$" "evitS"
+"ovitsch" "" "$" "ovitS"
+"witsch" "" "$" "vitS"
+"vitsch" "" "$" "vitS"
+"ssch" "" "" "S"
+"chsch" "" "" "xS"
+"sch" "" "" "S"
+
+"ziu" "" "" "tsu"
+"zia" "" "" "tsa"
+"zio" "" "" "tso"
+
+"chs" "" "" "ks"
+"ch" "" "" "x"
+"ck" "" "" "k"
+"c" "" "[eiy]" "ts"
+
+"sp" "^" "" "Sp"
+"st" "^" "" "St"
+"ssp" "" "" "(Sp|sp)"
+"sp" "" "" "(Sp|sp)"
+"sst" "" "" "(St|st)"
+"st" "" "" "(St|st)"
+"pf" "" "" "(pf|p|f)"
+"ph" "" "" "(ph|f)"
+"qu" "" "" "kv"
+
+"ewitz" "" "$" "(evits|evitS)"
+"ewiz" "" "$" "(evits|evitS)"
+"evitz" "" "$" "(evits|evitS)"
+"eviz" "" "$" "(evits|evitS)"
+"owitz" "" "$" "(ovits|ovitS)"
+"owiz" "" "$" "(ovits|ovitS)"
+"ovitz" "" "$" "(ovits|ovitS)"
+"oviz" "" "$" "(ovits|ovitS)"
+"witz" "" "$" "(vits|vitS)"
+"wiz" "" "$" "(vits|vitS)"
+"vitz" "" "$" "(vits|vitS)"
+"viz" "" "$" "(vits|vitS)"
+"tz" "" "" "ts"
+
+"thal" "" "$" "tal"
+"th" "^" "" "t"
+"th" "" "[äöüaeiou]" "(t|th)"
+"th" "" "" "t"
+"rh" "^" "" "r"
+"h" "[aeiouyäöü]" "" ""
+"h" "^" "" "H"
+
+"ss" "" "" "s"
+"s" "" "[äöüaeiouy]" "(z|s)"
+"s" "[aeiouyäöüj]" "[aeiouyäöü]" "z"
+"ß" "" "" "s"
+
+
+// VOWELS
+"ij" "" "$" "i"
+"aue" "" "" "aue"
+"ue" "" "" "Q"
+"ae" "" "" "Y"
+"oe" "" "" "Y"
+"ü" "" "" "Q"
+"ä" "" "" "Y"
+"ö" "" "" "Y"
+"ei" "" "" "(aj|ej)"
+"ey" "" "" "(aj|ej)"
+"eu" "" "" "(Yj|ej|aj|oj)"
+"i" "[aou]" "" "j"
+"y" "[aou]" "" "j"
+"ie" "" "" "I"
+"i" "" "[aou]" "j"
+"y" "" "[aoeu]" "j"
+
+// FOREIGN LETTERs
+"ñ" "" "" "n"
+"ã" "" "" "a"
+"ő" "" "" "o"
+"ű" "" "" "u"
+"ç" "" "" "s"
+
+// LATIN ALPHABET
+"a" "" "" "A"
+"b" "" "" "b"
+"c" "" "" "k"
+"d" "" "" "d"
+"e" "" "" "E"
+"f" "" "" "f"
+"g" "" "" "g"
+"h" "" "" "h"
+"i" "" "" "I"
+"j" "" "" "j"
+"k" "" "" "k"
+"l" "" "" "l"
+"m" "" "" "m"
+"n" "" "" "n"
+"o" "" "" "O"
+"p" "" "" "p"
+"q" "" "" "k"
+"r" "" "" "r"
+"s" "" "" "s"
+"t" "" "" "t"
+"u" "" "" "U"
+"v" "" "" "(f|v)"
+"w" "" "" "v"
+"x" "" "" "ks"
+"y" "" "" "i"
+"z" "" "" "ts"

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_greek.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_greek.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_greek.txt
new file mode 100644
index 0000000..f396a65
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_greek.txt
@@ -0,0 +1,97 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+"αυ" "" "$" "af"  // "av" before vowels and voiced consonants, "af" elsewhere
+"αυ" "" "(κ|π|σ|τ|φ|θ|χ|ψ)" "af" 
+"αυ" "" "" "av" 
+"ευ" "" "$" "ef" // "ev" before vowels and voiced consonants, "ef" elsewhere
+"ευ" "" "(κ|π|σ|τ|φ|θ|χ|ψ)" "ef" 
+"ευ" "" "" "ev" 
+"ηυ" "" "$" "if" // "iv" before vowels and voiced consonants, "if" elsewhere
+"ηυ" "" "(κ|π|σ|τ|φ|θ|χ|ψ)" "if" 
+"ηυ" "" "" "iv" 
+"ου" "" "" "u"  // [u:]
+
+"αι" "" "" "aj"  // modern [e]
+"ει" "" "" "ej" // modern [i]
+"οι" "" "" "oj" // modern [i]
+"ωι" "" "" "oj" 
+"ηι" "" "" "ej" 
+"υι" "" "" "i" // modern Greek "i"
+
+"γγ" "(ε|ι|η|α|ο|ω|υ)" "(ε|ι|η)" "(nj|j)"
+"γγ" "" "(ε|ι|η)" "j"
+"γγ" "(ε|ι|η|α|ο|ω|υ)" "" "(ng|g)"
+"γγ" "" "" "g" 
+"γκ" "^" "" "g"
+"γκ" "(ε|ι|η|α|ο|ω|υ)" "(ε|ι|η)" "(nj|j)"
+"γκ" "" "(ε|ι|η)" "j"
+"γκ" "(ε|ι|η|α|ο|ω|υ)" "" "(ng|g)"
+"γκ" "" "" "g" 
+"γι" "" "(α|ο|ω|υ)" "j"
+"γι" "" "" "(gi|i)"
+"γε" "" "(α|ο|ω|υ)" "j"
+"γε" "" "" "(ge|je)"
+
+"κζ" "" "" "gz"
+"τζ" "" "" "dz"
+"σ" "" "(β|γ|δ|μ|ν|ρ)" "z"
+
+"μβ" "" "" "(mb|b)"
+"μπ" "^" "" "b"
+"μπ" "(ε|ι|η|α|ο|ω|υ)" "" "mb"
+"μπ" "" "" "b" // after any consonant
+"ντ" "^" "" "d"
+"ντ" "(ε|ι|η|α|ο|ω|υ)" "" "(nd|nt)" // Greek is "nd" 
+"ντ" "" "" "(nt|d)" // Greek is "d" after any consonant
+
+"ά" "" "" "a"
+"έ" "" "" "e"
+"ή" "" "" "(i|e)" 
+"ί" "" "" "i"   
+"ό" "" "" "o"
+"ύ" "" "" "(Q|i|u)"
+"ώ" "" "" "o"
+"ΰ" "" "" "(Q|i|u)"
+"ϋ" "" "" "(Q|i|u)"
+"ϊ" "" "" "j"
+
+"α" "" "" "a"
+"β" "" "" "(v|b)" // modern "v", old "b"
+"γ" "" "" "g" 
+"δ" "" "" "d"    // modern like "th" in English "them", old "d"
+"ε" "" "" "e"
+"ζ" "" "" "z"
+"η" "" "" "(i|e)" // modern "i", old "e:"
+"ι" "" "" "i"
+"κ" "" "" "k"
+"λ" "" "" "l"
+"μ" "" "" "m"
+"ν" "" "" "n"
+"ξ" "" "" "ks"
+"ο" "" "" "o"
+"π" "" "" "p"
+"ρ" "" "" "r"
+"σ" "" "" "s"
+"ς" "" "" "s"
+"τ" "" "" "t" 
+"υ" "" "" "(Q|i|u)" // modern "i", old like German "ü"
+"φ" "" "" "f" 
+"θ" "" "" "t" // old greek like "th" in English "theme"
+"χ" "" "" "x"
+"ψ" "" "" "ps"
+"ω" "" "" "o"

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_greeklatin.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_greeklatin.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_greeklatin.txt
new file mode 100644
index 0000000..43ec3f5
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_greeklatin.txt
@@ -0,0 +1,118 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+"au" "" "$" "af"
+"au" "" "[kpstfh]" "af"
+"au" "" "" "av"
+"eu" "" "$" "ef"
+"eu" "" "[kpstfh]" "ef"
+"eu" "" "" "ev"
+"ou" "" "" "u"
+
+"gge" "[aeiouy]" "" "(nje|je)" // aggelopoulos
+"ggi" "[aeiouy]" "[aou]" "(nj|j)" 
+"ggi" "[aeiouy]" "" "(ni|i)" 
+"gge" "" "" "je"
+"ggi" "" "" "i"
+"gg" "[aeiouy]" "" "(ng|g)"
+"gg" "" "" "g" 
+"gk" "^" "" "g"
+"gke" "[aeiouy]" "" "(nje|je)"
+"gki" "[aeiouy]" "" "(ni|i)"
+"gke" "" "" "je"
+"gki" "" "" "i"
+"gk" "[aeiouy]" "" "(ng|g)"
+"gk" "" "" "g" 
+"nghi" "" "[aouy]" "Nj"
+"nghi" "" "" "(Ngi|Ni)" 
+"nghe" "" "[aouy]" "Nj"
+"nghe" "" "" "(Nje|Nge)" 
+"ghi" "" "[aouy]" "j"
+"ghi" "" "" "(gi|i)" 
+"ghe" "" "[aouy]" "j"
+"ghe" "" "" "(je|ge)" 
+"ngh" "" "" "Ng"
+"gh" "" "" "g"
+"ngi" "" "[aouy]" "Nj" 
+"ngi" "" "" "(Ngi|Ni)" 
+"nge" "" "[aouy]" "Nj" 
+"nge" "" "" "(Nje|Nge)" 
+"gi" "" "[aouy]" "j" 
+"gi" "" "" "(gi|i)" // what about Pantazis = Pantagis ???
+"ge" "" "[aouy]" "j" 
+"ge" "" "" "(je|ge)" 
+"ng" "" "" "Ng" // fragakis = fraggakis = frangakis; angel = agel = aggel 
+
+"i" "" "[aeou]" "j"
+"i" "[aeou]" "" "j"  
+"y" "" "[aeou]" "j"
+"y" "[aeou]" "" "j"  
+"yi" "" "[aeou]" "j"
+"yi" "" "" "i"
+
+"ch" "" "" "x"
+"kh" "" "" "x"
+"dh" "" "" "d"  // actually as "th" in English "that"
+"dj" "" "" "dZ" // Turkish words
+"ph" "" "" "f"
+"th" "" "" "t"
+"kz" "" "" "gz"
+"tz" "" "" "dz" 
+"s" "" "[bgdmnr]" "z"
+
+"mb" "" "" "(mb|b)" // Liberis = Limperis = Limberis
+"mp" "^" "" "b"
+"mp" "[aeiouy]" "" "mp"
+"mp" "" "" "b"
+"nt" "^" "" "d"
+"nt" "[aeiouy]" "" "(nd|nt)" // Greek "nd"
+"nt" "" "" "(nt|d)" // Greek "d" after any consonant
+
+"á" "" "" "a"  
+"é" "" "" "e"  
+"í" "" "" "i"  
+"ó" "" "" "o"  
+"óu" "" "" "u"  
+"ú" "" "" "u" 
+"ý" "" "" "(i|Q|u)" // [ü]
+
+"a" "" "" "a"
+"b" "" "" "(b|v)" // beta: modern "v", old "b"
+"c" "" "" "k"
+"d" "" "" "d"    // modern like "th" in English "them", old "d"
+"e" "" "" "e"
+"f" "" "" "f" 
+"g" "" "" "g" 
+"h" "" "" "x"
+"i" "" "" "i"
+"j" "" "" "(j|Z)" // Panajotti = Panaiotti; Louijos = Louizos; Pantajis = Pantazis = Pantagis
+"k" "" "" "k"
+"l" "" "" "l"
+"m" "" "" "m"
+"n" "" "" "n"
+"ο" "" "" "o"
+"p" "" "" "p"
+"q" "" "" "k" // foreign
+"r" "" "" "r"
+"s" "" "" "s"
+"t" "" "" "t" 
+"u" "" "" "u" 
+"v" "" "" "v" 
+"w" "" "" "v" // foreign
+"x" "" "" "ks"
+"y" "" "" "(i|Q|u)" // [ü] 
+"z" "" "" "z"

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_hebrew.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_hebrew.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_hebrew.txt
new file mode 100644
index 0000000..7e039d5
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_hebrew.txt
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// General = Ashkenazic
+
+"אי" "" "" "i"
+"עי" "" "" "i"
+"עו" "" "" "VV"
+"או" "" "" "VV"
+
+"ג׳" "" "" "Z"
+"ד׳" "" "" "dZ"
+
+"א" "" "" "L"
+"ב" "" "" "b"
+"ג" "" "" "g"
+"ד" "" "" "d"
+
+"ה" "^" "" "1"
+"ה" "" "$" "1"
+"ה" "" "" ""
+
+"וו" "" "" "V"
+"וי" "" "" "WW"
+"ו" "" "" "W"
+"ז" "" "" "z"
+"ח" "" "" "X"
+"ט" "" "" "T"
+"יי" "" "" "i"
+"י" "" "" "i"
+"ך" "" "" "X"
+"כ" "^" "" "K"
+"כ" "" "" "k"
+"ל" "" "" "l"
+"ם" "" "" "m"
+"מ" "" "" "m"
+"ן" "" "" "n"
+"נ" "" "" "n"
+"ס" "" "" "s"
+"ע" "" "" "L"
+"ף" "" "" "f"
+"פ" "" "" "f"
+"ץ" "" "" "C"
+"צ" "" "" "C"
+"ק" "" "" "K"
+"ר" "" "" "r"
+"ש" "" "" "s"
+"ת" "" "" "TB" // only Ashkenazic

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_hungarian.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_hungarian.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_hungarian.txt
new file mode 100644
index 0000000..615d26a
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_hungarian.txt
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// GENERAL
+
+// CONSONANTS
+"sz" "" "" "s"
+"zs" "" "" "Z"
+"cs" "" "" "tS"
+
+"ay" "" "" "(oj|aj)"
+"ai" "" "" "(oj|aj)"
+"aj" "" "" "(oj|aj)"
+
+"ei" "" "" "(aj|ej)" // German element
+"ey" "" "" "(aj|ej)" // German element
+
+"y" "[áo]" "" "j"
+"i" "[áo]" "" "j"
+"ee" "" "" "(ej|e)" 
+"ely" "" "" "(ej|eli)"
+"ly" "" "" "(j|li)"
+"gy" "" "[aeouáéóúüöőű]" "dj"
+"gy" "" "" "(d|gi)"
+"ny" "" "[aeouáéóúüöőű]" "nj"
+"ny" "" "" "(n|ni)"
+"ty" "" "[aeouáéóúüöőű]" "tj"
+"ty" "" "" "(t|ti)"
+"qu" "" "" "(ku|kv)"
+"h" "" "$" ""
+
+// SPECIAL VOWELS
+"á" "" "" "a"
+"é" "" "" "e"
+"í" "" "" "i"
+"ó" "" "" "o"
+"ú" "" "" "u"
+"ö" "" "" "Y"
+"ő" "" "" "Y" 
+"ü" "" "" "Q"
+"ű" "" "" "Q"
+
+// LATIN ALPHABET      
+"a" "" "" "a"
+"b" "" "" "b"
+"c" "" "" "ts" 
+"d" "" "" "d"
+"e" "" "" "E"
+"f" "" "" "f"
+"g" "" "" "g" 
+"h" "" "" "h"
+"i" "" "" "I"
+"j" "" "" "j"
+"k" "" "" "k"
+"l" "" "" "l"
+"m" "" "" "m"
+"n" "" "" "n"
+"o" "" "" "o"
+"p" "" "" "p"
+"q" "" "" "k"
+"r" "" "" "r"
+"s" "" "" "(S|s)" 
+"t" "" "" "t"
+"u" "" "" "u"
+"v" "" "" "v" 
+"w" "" "" "v" 
+"x" "" "" "ks"
+"y" "" "" "i" 
+"z" "" "" "z"

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_italian.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_italian.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_italian.txt
new file mode 100644
index 0000000..8775edd
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_italian.txt
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+"kh" "" "" "x" // foreign
+
+"gli" "" "" "(l|gli)"
+"gn" "" "[aeou]" "(n|nj|gn)"
+"gni" "" "" "(ni|gni)"
+
+"gi" "" "[aeou]" "dZ"
+"gg" "" "[ei]" "dZ"
+"g" "" "[ei]" "dZ"
+"h" "[bdgt]" "" "g" // gh is It; others from Arabic translit
+"h" "" "$" "" // foreign
+
+"ci" "" "[aeou]" "tS"
+"ch" "" "[ei]" "k"
+"sc" "" "[ei]" "S" 
+"cc" "" "[ei]" "tS"
+"c" "" "[ei]" "tS"
+"s" "[aeiou]" "[aeiou]" "z"
+
+"i" "[aeou]" "" "j"
+"i" "" "[aeou]" "j"
+"y" "[aeou]" "" "j" // foreign
+"y" "" "[aeou]" "j" // foreign
+
+"qu" "" "" "k"    
+"uo" "" "" "(vo|o)"
+"u" "" "[aei]" "v" 
+
+"�" "" "" "e" 
+"�" "" "" "e" 
+"�" "" "" "o"  
+"�" "" "" "o" 
+
+// LATIN ALPHABET    
+"a" "" "" "a"
+"b" "" "" "b"
+"c" "" "" "k"
+"d" "" "" "d"
+"e" "" "" "e"
+"f" "" "" "f"
+"g" "" "" "g"
+"h" "" "" "h"
+"i" "" "" "i"
+"j" "" "" "(Z|dZ|j)" // foreign
+"k" "" "" "k"
+"l" "" "" "l"
+"m" "" "" "m"
+"n" "" "" "n"
+"o" "" "" "o"
+"p" "" "" "p"
+"q" "" "" "k"    
+"r" "" "" "r"
+"s" "" "" "s"
+"t" "" "" "t"
+"u" "" "" "u"
+"v" "" "" "v"
+"w" "" "" "v"    // foreign
+"x" "" "" "ks"    // foreign
+"y" "" "" "i"    // foreign
+"z" "" "" "(ts|dz)"

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_polish.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_polish.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_polish.txt
new file mode 100644
index 0000000..dd72f6a
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_polish.txt
@@ -0,0 +1,185 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// GENERIC
+
+// CONVERTING FEMININE TO MASCULINE
+"ska" "" "$" "ski"   
+"cka" "" "$" "tski"   
+"lowa" "" "$" "(lova|lof|l|el)"   
+"kowa" "" "$" "(kova|kof|k|ek)"   
+"owa" "" "$" "(ova|of|)"  
+"lowna" "" "$" "(lovna|levna|l|el)" 
+"kowna" "" "$" "(kovna|k|ek)"  
+"owna" "" "$" "(ovna|)"   
+"lówna" "" "$" "(l|el)"   
+"kówna" "" "$" "(k|ek)"   
+"ówna" "" "$" ""   
+"a" "" "$" "(a|i)"   
+
+// CONSONANTS
+"czy" "" "" "tSi"
+"cze" "" "[bcdgkpstwzż]" "(tSe|tSF)"
+"ciewicz" "" "" "(tsevitS|tSevitS)"
+"siewicz" "" "" "(sevitS|SevitS)"
+"ziewicz" "" "" "(zevitS|ZevitS)"
+"riewicz" "" "" "rjevitS" 
+"diewicz" "" "" "djevitS" 
+"tiewicz" "" "" "tjevitS" 
+"iewicz" "" "" "evitS"
+"ewicz" "" "" "evitS"
+"owicz" "" "" "ovitS"
+"icz" "" "" "itS"
+"cz" "" "" "tS"
+"ch" "" "" "x"
+
+"cia" "" "[bcdgkpstwzż]" "(tSB|tsB)"
+"cia" "" "" "(tSa|tsa)" 
+"cią" "" "[bp]" "(tSom|tsom)"
+"cią" "" "" "(tSon|tson)"
+"cię" "" "[bp]" "(tSem|tsem)"
+"cię" "" "" "(tSen|tsen)"
+"cie" "" "[bcdgkpstwzż]" "(tSF|tsF)" 
+"cie" "" "" "(tSe|tse)" 
+"cio" "" "" "(tSo|tso)" 
+"ciu" "" "" "(tSu|tsu)" 
+"ci" "" "" "(tSi|tsI)"
+"ć" "" "" "(tS|ts)"
+
+"ssz" "" "" "S"
+"sz" "" "" "S"
+"sia" "" "[bcdgkpstwzż]" "(SB|sB|sja)" 
+"sia" "" "" "(Sa|sja)" 
+"sią" "" "[bp]" "(Som|som)"
+"sią" "" "" "(Son|son)"
+"się" "" "[bp]" "(Sem|sem)"
+"się" "" "" "(Sen|sen)"
+"sie" "" "[bcdgkpstwzż]" "(SF|sF|se)" 
+"sie" "" "" "(Se|se)" 
+"sio" "" "" "(So|so)" 
+"siu" "" "" "(Su|sju)" 
+"si" "" "" "(Si|sI)"
+"ś" "" "" "(S|s)"
+
+"zia" "" "[bcdgkpstwzż]" "(ZB|zB|zja)" 
+"zia" "" "" "(Za|zja)" 
+"zią" "" "[bp]" "(Zom|zom)"
+"zią" "" "" "(Zon|zon)"
+"zię" "" "[bp]" "(Zem|zem)"
+"zię" "" "" "(Zen|zen)"
+"zie" "" "[bcdgkpstwzż]" "(ZF|zF)" 
+"zie" "" "" "(Ze|ze)" 
+"zio" "" "" "(Zo|zo)" 
+"ziu" "" "" "(Zu|zju)" 
+"zi" "" "" "(Zi|zI)"
+
+"że" "" "[bcdgkpstwzż]" "(Ze|ZF)"
+"że" "" "[bcdgkpstwzż]" "(Ze|ZF|ze|zF)"
+"że" "" "" "Ze"
+"źe" "" "" "(Ze|ze)"
+"ży" "" "" "Zi"
+"źi" "" "" "(Zi|zi)"
+"ż" "" "" "Z"
+"ź" "" "" "(Z|z)"
+
+"rze" "t" "" "(Se|re)"
+"rze" "" "" "(Ze|re|rZe)"
+"rzy" "t" "" "(Si|ri)"
+"rzy" "" "" "(Zi|ri|rZi)"
+"rz" "t" "" "(S|r)"
+"rz" "" "" "(Z|r|rZ)"
+
+"lio" "" "" "(lo|le)"
+"ł" "" "" "l"
+"ń" "" "" "n"
+"qu" "" "" "k"
+"s" "" "s" "" 
+
+// VOWELS   
+"ó" "" "" "(u|o)"
+"ą" "" "[bp]" "om"
+"ę" "" "[bp]" "em"
+"ą" "" "" "on"
+"ę" "" "" "en"
+
+"ije" "" "" "je"
+"yje" "" "" "je"
+"iie" "" "" "je"
+"yie" "" "" "je"
+"iye" "" "" "je"
+"yye" "" "" "je"
+
+"ij" "" "[aou]" "j"
+"yj" "" "[aou]" "j"
+"ii" "" "[aou]" "j"
+"yi" "" "[aou]" "j"
+"iy" "" "[aou]" "j"
+"yy" "" "[aou]" "j"
+
+"rie" "" "" "rje" 
+"die" "" "" "dje" 
+"tie" "" "" "tje" 
+"ie" "" "[bcdgkpstwzż]" "F" 
+"ie" "" "" "e"
+
+"aue" "" "" "aue"
+"au" "" "" "au"
+
+"ei" "" "" "aj"
+"ey" "" "" "aj"
+"ej" "" "" "aj"
+
+"ai" "" "" "aj"
+"ay" "" "" "aj"
+"aj" "" "" "aj"
+
+"i" "[aeou]" "" "j" 
+"y" "[aeou]" "" "j" 
+"i" "" "[aou]" "j"
+"y" "" "[aeou]" "j"
+
+"a" "" "[bcdgkpstwzż]" "B" 
+"e" "" "[bcdgkpstwzż]" "(E|F)" 
+"o" "" "[bcćdgklłmnńrsśtwzźż]" "P" 
+
+// LATIN ALPHABET
+"a" "" "" "a"
+"b" "" "" "b"
+"c" "" "" "ts"
+"d" "" "" "d"
+"e" "" "" "E"
+"f" "" "" "f"
+"g" "" "" "g"
+"h" "" "" "(h|x)"
+"i" "" "" "I"
+"j" "" "" "j"
+"k" "" "" "k"
+"l" "" "" "l"
+"m" "" "" "m"
+"n" "" "" "n"
+"o" "" "" "o"
+"p" "" "" "p"
+"q" "" "" "k"
+"r" "" "" "r"
+"s" "" "" "s"
+"t" "" "" "t"
+"u" "" "" "u"
+"v" "" "" "v"
+"w" "" "" "v"
+"x" "" "" "ks"
+"y" "" "" "I"
+"z" "" "" "z"

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_portuguese.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_portuguese.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_portuguese.txt
new file mode 100644
index 0000000..74de1d7
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_portuguese.txt
@@ -0,0 +1,105 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+"kh" "" "" "x" // foreign
+"ch" "" "" "S"
+"ss" "" "" "s"
+"sc" "" "[ei]" "s"
+"sç" "" "[aou]" "s"
+"ç" "" "" "s"
+"c" "" "[ei]" "s"
+//  "c" "" "[aou]" "(k|C)"
+
+"s" "^" "" "s"
+"s" "[aáuiíoóeéêy]" "[aáuiíoóeéêy]" "z"
+"s" "" "[dglmnrv]" "(Z|S)" // Z is Brazil
+
+"z" "" "$" "(Z|s|S)" // s and S in Brazil
+"z" "" "[bdgv]" "(Z|z)" // Z in Brazil
+"z" "" "[ptckf]" "(s|S|z)" // s and S in Brazil
+
+"gu" "" "[eiu]" "g"    
+"gu" "" "[ao]" "gv"    
+"g" "" "[ei]" "Z"
+"qu" "" "[eiu]" "k"    
+"qu" "" "[ao]" "kv"    
+
+"uo" "" "" "(vo|o|u)"
+"u" "" "[aei]" "v" 
+
+"lh" "" "" "l"
+"nh" "" "" "nj"
+"h" "[bdgt]" "" "" // translit. from Arabic
+"h" "" "$" "" // foreign
+
+"ex" "" "[aáuiíoóeéêy]" "(ez|eS|eks)" // ez in Brazil
+"ex" "" "[cs]" "e" 
+
+"y" "[aáuiíoóeéê]" "" "j"
+"y" "" "[aeiíou]" "j"
+"m" "" "[bcdfglnprstv]" "(m|n)" // maybe to add a rule for m/n before a consonant that disappears [preceding vowel becomes nasalized]
+"m" "" "$" "(m|n)" // maybe to add a rule for final m/n that disappears [preceding vowel becomes nasalized]
+
+"ão" "" "" "(au|an|on)"
+"ãe" "" "" "(aj|an)"
+"ãi" "" "" "(aj|an)"
+"õe" "" "" "(oj|on)"
+"i" "[aáuoóeéê]" "" "j"
+"i" "" "[aeou]" "j"
+
+"â" "" "" "a"
+"à" "" "" "a"
+"á" "" "" "a"
+"ã" "" "" "(a|an|on)"
+"é" "" "" "e"
+"ê" "" "" "e"
+"í" "" "" "i"
+"ô" "" "" "o"
+"ó" "" "" "o"
+"õ" "" "" "(o|on)"
+"ú" "" "" "u"
+"ü" "" "" "u"
+
+"aue" "" "" "aue"
+
+// LATIN ALPHABET
+"a" "" "" "a"
+"b" "" "" "b"
+"c" "" "" "k"
+"d" "" "" "d"
+"e" "" "" "(e|i)"
+"f" "" "" "f"
+"g" "" "" "g"
+"h" "" "" "h"
+"i" "" "" "i"
+"j" "" "" "Z" 
+"k" "" "" "k"
+"l" "" "" "l"
+"m" "" "" "m"
+"n" "" "" "n"
+"o" "" "" "(o|u)"
+"p" "" "" "p"
+"q" "" "" "k"    
+"r" "" "" "r"
+"s" "" "" "S"
+"t" "" "" "t"
+"u" "" "" "u"
+"v" "" "" "v"
+"w" "" "" "v"    
+"x" "" "" "(S|ks)"   
+"y" "" "" "i"   
+"z" "" "" "z"

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_romanian.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_romanian.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_romanian.txt
new file mode 100644
index 0000000..a6d0aac
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_romanian.txt
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+"ce" "" "" "tSe"
+"ci" "" "" "(tSi|tS)"
+"ch" "" "[ei]" "k"
+"ch" "" "" "x" // foreign
+
+"gi" "" "" "(dZi|dZ)"
+"g" "" "[ei]" "dZ"
+"gh" "" "" "g"
+
+"i" "[aeou]" "" "j"
+"i" "" "[aeou]" "j"
+"ţ" "" "" "ts"
+"ş" "" "" "S"
+"qu" "" "" "k"    
+
+"î" "" "" "i"
+"ea" "" "" "ja"
+"ă" "" "" "(e|a)"
+"aue" "" "" "aue"
+
+// LATIN ALPHABET
+"a" "" "" "a"
+"b" "" "" "b"
+"c" "" "" "k"
+"d" "" "" "d"
+"e" "" "" "E"
+"f" "" "" "f"
+"g" "" "" "g"
+"h" "" "" "(x|h)"
+"i" "" "" "I"
+"j" "" "" "Z"
+"k" "" "" "k"
+"l" "" "" "l"
+"m" "" "" "m"
+"n" "" "" "n"
+"o" "" "" "o"
+"p" "" "" "p"
+"q" "" "" "k"    
+"r" "" "" "r"
+"s" "" "" "s"
+"t" "" "" "t"
+"u" "" "" "u"
+"v" "" "" "v"
+"w" "" "" "v"    
+"x" "" "" "ks"    
+"y" "" "" "i"    
+"z" "" "" "z"

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_russian.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_russian.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_russian.txt
new file mode 100644
index 0000000..310be84
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_russian.txt
@@ -0,0 +1,142 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//GENERAL// CONVERTING FEMININE TO MASCULINE
+"yna" "" "$" "(in|ina)" 
+"ina" "" "$" "(in|ina)" 
+"liova" "" "$" "(lof|lef)" 
+"lova" "" "$" "(lof|lef|lova)" 
+"ova" "" "$" "(of|ova)" 
+"eva" "" "$" "(ef|ova)" 
+"aia" "" "$" "(aja|i)" 
+"aja" "" "$" "(aja|i)" 
+"aya" "" "$" "(aja|i)" 
+
+//SPECIAL CONSONANTS
+"tsya" "" "" "tsa" 
+"tsyu" "" "" "tsu" 
+"tsia" "" "" "tsa" 
+"tsie" "" "" "tse" 
+"tsio" "" "" "tso"   
+"tsye" "" "" "tse" 
+"tsyo" "" "" "tso" 
+"tsiu" "" "" "tsu" 
+"sie" "" "" "se" 
+"sio" "" "" "so"   
+"zie" "" "" "ze" 
+"zio" "" "" "zo"   
+"sye" "" "" "se" 
+"syo" "" "" "so"   
+"zye" "" "" "ze" 
+"zyo" "" "" "zo"   
+
+"ger" "" "$" "ger" 
+"gen" "" "$" "gen" 
+"gin" "" "$" "gin" 
+"gg" "" "" "g" 
+"g" "[jaeoiuy]" "[aeoiu]" "g" 
+"g" "" "[aeoiu]" "(g|h)" 
+
+"kh" "" "" "x"
+"ch" "" "" "(tS|x)" 
+"sch" "" "" "(StS|S)"
+"ssh" "" "" "S"
+"sh" "" "" "S"
+"zh" "" "" "Z" 
+"tz" "" "$" "ts" 
+"tz" "" "" "(ts|tz)" 
+"c" "" "[iey]" "s" 
+"qu" "" "" "(kv|k)" 
+"s" "" "s" ""
+
+//SPECIAL VOWELS
+"lya" "" "" "la" 
+"lyu" "" "" "lu"  
+"lia" "" "" "la" // not in DJSRE
+"liu" "" "" "lu"  // not in DJSRE
+"lja" "" "" "la" // not in DJSRE
+"lju" "" "" "lu"  // not in DJSRE
+"le" "" "" "(lo|lE)" //not in DJSRE
+"lyo" "" "" "(lo|le)" //not in DJSRE
+"lio" "" "" "(lo|le)" 
+
+"ije" "" "" "je"
+"ie" "" "" "je"
+"iye" "" "" "je"
+"iie" "" "" "je"
+"yje" "" "" "je"
+"ye" "" "" "je"
+"yye" "" "" "je"
+"yie" "" "" "je"
+
+"ij" "" "[aou]" "j"
+"iy" "" "[aou]" "j"
+"ii" "" "[aou]" "j"
+"yj" "" "[aou]" "j"
+"yy" "" "[aou]" "j"
+"yi" "" "[aou]" "j"
+
+"io" "" "" "(jo|e)" 
+"i" "" "[au]" "j" 
+"i" "[aeou]" "" "j" 
+"yo" "" "" "(jo|e)" 
+"y" "" "[au]" "j"
+"y" "[aeiou]" "" "j" 
+
+"ii" "" "$" "i" 
+"iy" "" "$" "i" 
+"yy" "" "$" "i" 
+"yi" "" "$" "i" 
+"yj" "" "$" "i"
+"ij" "" "$" "i"
+
+"e" "^" "" "(je|E)" 
+"ee" "" "" "(aje|i)" 
+"e" "[aou]" "" "je" 
+"oo" "" "" "(oo|u)" 
+"'" "" "" "" 
+"\"" "" "" ""
+
+"aue" "" "" "aue"
+
+// LATIN ALPHABET 
+"a" "" "" "a"
+"b" "" "" "b"
+"c" "" "" "k" 
+"d" "" "" "d"
+"e" "" "" "E"
+"f" "" "" "f"
+"g" "" "" "g" 
+"h" "" "" "h" 
+"i" "" "" "I"
+"j" "" "" "j" 
+"k" "" "" "k"
+"l" "" "" "l"
+"m" "" "" "m"
+"n" "" "" "n"
+"o" "" "" "o"
+"p" "" "" "p"
+"q" "" "" "k" 
+"r" "" "" "r"
+"s" "" "" "s"
+"t" "" "" "t"
+"u" "" "" "u"
+"v" "" "" "v"
+"w" "" "" "v" 
+"x" "" "" "ks" 
+"y" "" "" "I"
+"z" "" "" "z"

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_spanish.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_spanish.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_spanish.txt
new file mode 100644
index 0000000..3ba2695
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_spanish.txt
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// GENERAL
+
+// Includes both Spanish (Castillian) & Catalan
+
+// CONSONANTS
+"ñ" "" "" "(n|nj)"
+"ny" "" "" "nj" // Catalan
+"ç" "" "" "s" // Catalan
+
+"ig" "[aeiou]" "" "(tS|ig)" // tS is Catalan
+"ix" "[aeiou]" "" "S" // Catalan
+"tx" "" "" "tS" // Catalan
+"tj" "" "$" "tS" // Catalan
+"tj" "" "" "dZ" // Catalan
+"tg" "" "" "(tg|dZ)" // dZ is Catalan
+"ch" "" "" "(tS|dZ)" // dZ is typical for Argentina
+"bh" "" "" "b" // translit. from Arabic
+"h" "[dgt]" "" "" // translit. from Arabic
+"h" "" "$" "" // foreign
+//"ll" "" "" "(l|Z)" // Z is typical for Argentina, only Ashkenazic
+"m" "" "[bpvf]" "(m|n)"
+"c" "" "[ei]" "s" 
+//  "c" "" "[aou]" "(k|C)"
+"gu" "" "[ei]" "(g|gv)" // "gv" because "u" can actually be "ü"
+"g" "" "[ei]" "(x|g|dZ)"  // "g" only for foreign words; dZ is Catalan
+"qu" "" "" "k"
+
+"uo" "" "" "(vo|o)"    
+"u" "" "[aei]" "v"
+
+// SPECIAL VOWELS
+"ü" "" "" "v"
+"á" "" "" "a"
+"é" "" "" "e"
+"í" "" "" "i"
+"ó" "" "" "o"
+"ú" "" "" "u"
+"à" "" "" "a"  // Catalan
+"è" "" "" "e" // Catalan
+"ò" "" "" "o"  // Catalan
+
+// LATIN ALPHABET      
+"a" "" "" "a"
+"b" "" "" "B"
+"c" "" "" "k"
+"d" "" "" "d"
+"e" "" "" "e"
+"f" "" "" "f"
+"g" "" "" "g" 
+"h" "" "" "h"
+"i" "" "" "i"
+"j" "" "" "(x|Z)" // Z is Catalan
+"k" "" "" "k"
+"l" "" "" "l"
+"m" "" "" "m"
+"n" "" "" "n"
+"o" "" "" "o"
+"p" "" "" "p"
+"q" "" "" "k"
+"r" "" "" "r"
+"s" "" "" "s"
+"t" "" "" "t"
+"u" "" "" "u"
+"v" "" "" "V"
+"w" "" "" "v" // foreign words
+"x" "" "" "(ks|gz|S)" // ks is Spanish, all are Catalan
+"y" "" "" "(i|j)"
+"z" "" "" "(z|s)" // as "c" befoire "e" or "i", in Spain it is like unvoiced English "th"

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_turkish.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_turkish.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_turkish.txt
new file mode 100644
index 0000000..c639a13
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_turkish.txt
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+"ç" "" "" "tS"
+"ğ" "" "" "" // to show that previous vowel is long
+"ş" "" "" "S"
+"ü" "" "" "Q"
+"ö" "" "" "Y"
+"ı" "" "" "(e|i|)" // as "e" in English "label"
+
+"a" "" "" "a"
+"b" "" "" "b"
+"c" "" "" "dZ"
+"d" "" "" "d"
+"e" "" "" "e"
+"f" "" "" "f"
+"g" "" "" "g"
+"h" "" "" "h"
+"i" "" "" "i"
+"j" "" "" "Z"
+"k" "" "" "k"
+"l" "" "" "l"
+"m" "" "" "m"
+"n" "" "" "n"
+"o" "" "" "o"
+"p" "" "" "p"
+"q" "" "" "k" // foreign words
+"r" "" "" "r"
+"s" "" "" "s"
+"t" "" "" "t"
+"u" "" "" "u"
+"v" "" "" "v"
+"w" "" "" "v" // foreign words
+"x" "" "" "ks" // foreign words
+"y" "" "" "j"
+"z" "" "" "z" 

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/lang.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/lang.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/lang.txt
new file mode 100644
index 0000000..99742b1
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/lang.txt
@@ -0,0 +1,293 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// 1. following are rules to accept the language
+// 1.1 Special letter combinations
+^o’ english true
+^o' english true
+^mc english true
+^fitz english true
+ceau french+romanian true
+eau$ french true // mp: I've added this
+eaux$ french true // mp: I've added this
+ault$ french true
+oult$ french true
+eux$ french true
+eix$ french true
+glou$ greeklatin true
+uu dutch true
+tx spanish true
+witz german true
+tz$ german+russian+english true
+^tz russian+english true
+poulos$ greeklatin true
+pulos$ greeklatin true
+iou greeklatin true
+sj$ dutch true
+^sj dutch true
+güe spanish true
+güi spanish true
+ghe romanian+greeklatin true
+ghi romanian+greeklatin true
+escu$ romanian true
+esco$ romanian true
+vici$ romanian true
+schi$ romanian true
+ii$ russian true
+iy$ russian true
+yy$ russian true
+yi$ russian true
+^rz polish true
+rz$ polish+german true
+[bcdfgklmnpstwz]rz polish true
+rz[bcdfghklmnpstw] polish true
+etti$ italian true
+eti$ italian true
+ati$ italian true
+ato$ italian true
+[aoei]no$ italian true
+[aoei]ni$ italian true
+esi$ italian true
+oli$ italian true
+field$ english true
+cki$ polish true
+ska$ polish true
+cka$ polish true
+ae german+russian+english true
+oe german+french+russian+english+dutch true
+th$ german+english true
+^th german+english+greeklatin true
+mann german true
+cz polish true
+cy polish+greeklatin true
+niew polish true
+stein german true
+heim$ german true
+heimer$ german true
+thal german true
+zweig german true
+[aeou]h german true
+äh german true
+öh german true
+üh german true
+[ln]h[ao]$ portuguese true
+[ln]h[aou] portuguese+french+german+dutch+czech+spanish+turkish true
+chsch german true
+tsch german true
+sch$ german+russian true
+^sch german+russian true
+ck$ german+english true
+c$ polish+romanian+hungarian+czech+turkish true
+sz polish+hungarian true
+cs$ hungarian true
+^cs hungarian true
+dzs hungarian true
+zs$ hungarian true
+^zs hungarian true
+^wl polish true
+^wr polish+english+german+dutch true
+
+gy$ hungarian true
+gy[aeou] hungarian true
+gy hungarian+russian+french+greeklatin true
+guy french true
+gu[ei] spanish+french+portuguese true
+gu[ao] spanish+portuguese true
+gi[aou] italian+greeklatin true
+
+ly hungarian+russian+polish+greeklatin true
+ny hungarian+russian+polish+spanish+greeklatin true
+ty hungarian+russian+polish+greeklatin true
+
+// 1.2 special characters
+ć polish true
+ç french+spanish+portuguese+turkish true
+č czech true
+ď czech true
+ğ turkish true
+ł polish true
+ń polish true
+ñ spanish true
+ň czech true
+ř czech true
+ś polish true
+ş romanian+turkish true
+š czech true
+ţ romanian true
+ť czech true
+ź polish true
+ż polish true
+
+ß german true
+
+ä german true
+á hungarian+spanish+portuguese+czech+greeklatin true
+â romanian+french+portuguese true
+ă romanian true
+ą polish true
+à portuguese true
+ã portuguese true
+ę polish true
+é french+hungarian+czech+greeklatin true
+è french+spanish+italian true
+ê french true
+ě czech true
+ê french+portuguese true
+í hungarian+spanish+portuguese+czech+greeklatin true
+î romanian+french true
+ı turkish true
+ó polish+hungarian+spanish+italian+portuguese+czech+greeklatin true
+ö german+hungarian+turkish true
+ô french+portuguese true
+õ portuguese+hungarian true
+ò italian+spanish true
+ű hungarian true
+ú hungarian+spanish+portuguese+czech+greeklatin true
+ü german+hungarian+spanish+portuguese+turkish true
+ù french true
+ů czech true
+ý czech+greeklatin true
+
+// Every Cyrillic word has at least one Cyrillic vowel (аёеоиуыэюя)
+а cyrillic true
+ё cyrillic true
+о cyrillic true
+е cyrillic true
+и cyrillic true
+у cyrillic true
+ы cyrillic true
+э cyrillic true
+ю cyrillic true
+я cyrillic true
+
+// Every Greek word has at least one Greek vowel
+α greek true
+ε greek true
+η greek true
+ι greek true
+ο greek true
+υ greek true
+ω greek true
+
+// Arabic (only initial)
+ا arabic true // alif (isol + init)   
+ب arabic true // ba' 
+ت arabic true // ta' 
+ث arabic true // tha'
+ج arabic true // jim
+ح arabic true // h.a' 
+خ' arabic true // kha' 
+د arabic true // dal (isol + init)
+ذ arabic true // dhal (isol + init)
+ر arabic true // ra' (isol + init)
+ز arabic true // za' (isol + init)
+س arabic true // sin 
+ش arabic true // shin 
+ص arabic true // s.ad 
+ض arabic true // d.ad 
+ط arabic true // t.a' 
+ظ arabic true // z.a' 
+ع arabic true // 'ayn
+غ arabic true // ghayn 
+ف arabic true // fa' 
+ق arabic true // qaf 
+ك arabic true // kaf  
+ل arabic true // lam 
+م arabic true // mim 
+ن arabic true // nun 
+ه arabic true // ha' 
+و arabic true // waw (isol + init)
+ي arabic true // ya' 
+    
+آ arabic true // alif madda  
+إ arabic true // alif + diacritic  
+أ arabic true // alif + hamza
+ؤ arabic true //  waw + hamza
+ئ arabic true //  ya' + hamza
+
+
+// Hebrew
+א hebrew true
+ב hebrew true
+ג hebrew true
+ד hebrew true
+ה hebrew true
+ו hebrew true
+ז hebrew true
+ח hebrew true
+ט hebrew true
+י hebrew true
+כ hebrew true
+ל hebrew true
+מ hebrew true
+נ hebrew true
+ס hebrew true
+ע hebrew true
+פ hebrew true
+צ hebrew true
+ק hebrew true
+ר hebrew true
+ש hebrew true
+ת hebrew true
+
+// 2. following are rules to reject the language
+
+// Every Latin character word has at least one Latin vowel
+a cyrillic+hebrew+greek+arabic false
+o cyrillic+hebrew+greek+arabic false
+e cyrillic+hebrew+greek+arabic false
+i cyrillic+hebrew+greek+arabic false
+y cyrillic+hebrew+greek+arabic+romanian+dutch false
+u cyrillic+hebrew+greek+arabic false
+
+j italian false
+j[^aoeiuy] french+spanish+portuguese+greeklatin false
+g czech false
+k romanian+spanish+portuguese+french+italian false
+q hungarian+polish+russian+romanian+czech+dutch+turkish+greeklatin false
+v polish false
+w french+romanian+spanish+hungarian+russian+czech+turkish+greeklatin false
+x czech+hungarian+dutch+turkish false // polish excluded from the list
+
+dj spanish+turkish false
+v[^aoeiu] german false // in german, "v" can be found before a vowel only
+y[^aoeiu] german false  // in german, "y" usually appears only in the last position; sometimes before a vowel
+c[^aohk] german false
+dzi german+english+french+turkish false
+ou german false
+a[eiou] turkish false // no diphthongs in Turkish
+ö[eaio] turkish false
+ü[eaio] turkish false
+e[aiou] turkish false
+i[aeou] turkish false
+o[aieu] turkish false
+u[aieo] turkish false
+aj german+english+french+dutch false
+ej german+english+french+dutch false
+oj german+english+french+dutch false
+uj german+english+french+dutch false
+eu russian+polish false
+ky polish false
+kie french+spanish+greeklatin false
+gie portuguese+romanian+spanish+greeklatin false
+ch[aou] italian false
+ch turkish false
+son$ german false
+sc[ei] french false
+sch hungarian+polish+french+spanish false
+^h russian false
+etti$ greeklatin false

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_approx_any.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_approx_any.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_approx_any.txt
new file mode 100644
index 0000000..390419e
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_approx_any.txt
@@ -0,0 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// SEPHARDIC
+
+"E" "" "" ""
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_approx_common.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_approx_common.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_approx_common.txt
new file mode 100644
index 0000000..e744d32
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_approx_common.txt
@@ -0,0 +1,115 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include sep_exact_approx_common
+
+"bens" "^" "" "(binz|s)" 
+"benS" "^" "" "(binz|s)" 
+"ben" "^" "" "(bin|)" 
+
+"abens" "^" "" "(abinz|binz|s)" 
+"abenS" "^" "" "(abinz|binz|s)" 
+"aben" "^" "" "(abin|bin|)"
+
+"els" "^" "" "(ilz|alz|s)" 
+"elS" "^" "" "(ilz|alz|s)" 
+"el" "^" "" "(il|al|)" 
+"als" "^" "" "(alz|s)" 
+"alS" "^" "" "(alz|s)" 
+"al" "^" "" "(al|)" 
+
+//"dels" "^" "" "(dilz|s)" 
+//"delS" "^" "" "(dilz|s)" 
+"del" "^" "" "(dil|)" 
+"dela" "^" "" "(dila|)" 
+//"delo" "^" "" "(dila|)" 
+"da" "^" "" "(da|)" 
+"de" "^" "" "(di|)" 
+//"des" "^" "" "(dis|dAs|)" 
+//"di" "^" "" "(di|)" 
+//"dos" "^" "" "(das|dus|)" 
+
+"oa" "" "" "(va|a|D)"
+"oe" "" "" "(vi|D)"
+"ae" "" "" "D"
+
+/// "s" "" "$" "(s|)" // Attia(s)
+/// "C" "" "" "s"  // "c" could actually be "�"
+
+"n" "" "[bp]" "m"
+
+"h" "" "" "(|h|f)" // sound "h" (absent) can be expressed via /x/, Cojab in Spanish = Kohab ; Hakim = Fakim
+"x" "" "" "h"
+
+// DIPHTHONGS ARE APPROXIMATELY equivalent
+"aja" "^" "" "(Da|ia)"                         
+"aje" "^" "" "(Di|Da|i|ia)"                         
+"aji" "^" "" "(Di|i)"                         
+"ajo" "^" "" "(Du|Da|iu|ia)"                         
+"aju" "^" "" "(Du|iu)"                         
+
+"aj" "" "" "D"                         
+"ej" "" "" "D"                         
+"oj" "" "" "D"                         
+"uj" "" "" "D"                         
+"au" "" "" "D"                         
+"eu" "" "" "D"                         
+"ou" "" "" "D"                         
+
+"a" "^" "" "(a|)"  // Arabic
+
+"ja" "^" "" "ia"                         
+"je" "^" "" "i"                         
+"jo" "^" "" "(iu|ia)"                         
+"ju" "^" "" "iu"                         
+
+"ja" "" "" "a"                         
+"je" "" "" "i"                         
+"ji" "" "" "i"                         
+"jo" "" "" "u"                         
+"ju" "" "" "u"                         
+
+"j" "" "" "i"                         
+
+// CONSONANTS {z & Z & dZ; s & S} are approximately interchangeable
+"s" "" "[rmnl]" "z"
+"S" "" "[rmnl]" "z"
+"s" "[rmnl]" "" "z"
+"S" "[rmnl]" "" "z" 
+
+"dS" "" "$" "S"
+"dZ" "" "$" "S"
+"Z" "" "$" "S"
+"S" "" "$" "(S|s)"
+"z" "" "$" "(S|s)"
+
+"S" "" "" "s"
+"dZ" "" "" "z"
+"Z" "" "" "z"
+
+"i" "" "$" "(i|)" // often in Arabic
+"e" "" "" "i"
+
+"o" "" "$" "(a|u)"
+"o" "" "" "u"
+
+// special character to deal correctly in Hebrew match
+"B" "" "" "b" 
+"V" "" "" "v" 
+
+// Arabic
+"p" "^" "" "b"

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_approx_french.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_approx_french.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_approx_french.txt
new file mode 100644
index 0000000..0990004
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_approx_french.txt
@@ -0,0 +1,18 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// empty
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_approx_hebrew.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_approx_hebrew.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_approx_hebrew.txt
new file mode 100644
index 0000000..0990004
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_approx_hebrew.txt
@@ -0,0 +1,18 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// empty
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_approx_italian.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_approx_italian.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_approx_italian.txt
new file mode 100644
index 0000000..58fe459
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_approx_italian.txt
@@ -0,0 +1,18 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include sep_approx_french
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_approx_portuguese.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_approx_portuguese.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_approx_portuguese.txt
new file mode 100644
index 0000000..4bca846
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_approx_portuguese.txt
@@ -0,0 +1,18 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include sep_approx_french

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_approx_spanish.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_approx_spanish.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_approx_spanish.txt
new file mode 100644
index 0000000..4bca846
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_approx_spanish.txt
@@ -0,0 +1,18 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include sep_approx_french

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_exact_any.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_exact_any.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_exact_any.txt
new file mode 100644
index 0000000..d4bf51e
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_exact_any.txt
@@ -0,0 +1,18 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+"E" "" "" "e"
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_exact_approx_common.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_exact_approx_common.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_exact_approx_common.txt
new file mode 100644
index 0000000..1f4e864
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_exact_approx_common.txt
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Sephardic
+
+"h" "" "$" ""
+
+// VOICED - UNVOICED CONSONANTS
+"b" "" "[fktSs]" "p"
+"b" "" "p" ""
+"b" "" "$" "p"
+"p" "" "[vgdZz]" "b"
+"p" "" "b" ""
+
+"v" "" "[pktSs]" "f"
+"v" "" "f" ""
+"v" "" "$" "f"
+"f" "" "[vbgdZz]" "v"
+"f" "" "v" ""
+
+"g" "" "[pftSs]" "k"
+"g" "" "k" ""
+"g" "" "$" "k"
+"k" "" "[vbdZz]" "g"
+"k" "" "g" ""
+
+"d" "" "[pfkSs]" "t"
+"d" "" "t" ""
+"d" "" "$" "t"
+"t" "" "[vbgZz]" "d"
+"t" "" "d" ""
+
+"s" "" "dZ" ""
+"s" "" "tS" ""
+
+"z" "" "[pfkSt]" "s"
+"z" "" "[sSzZ]" ""
+"s" "" "[sSzZ]" ""
+"Z" "" "[sSzZ]" ""
+"S" "" "[sSzZ]" ""
+
+// SIMPLIFICATION OF CONSONANT CLUSTERS
+"nm" "" "" "m"
+
+// DOUBLE --> SINGLE
+"ji" "^" "" "i"
+
+"a" "" "a" ""
+"b" "" "b" ""
+"d" "" "d" ""
+"e" "" "e" ""
+"f" "" "f" ""
+"g" "" "g" ""
+"i" "" "i" ""
+"k" "" "k" ""
+"l" "" "l" ""
+"m" "" "m" ""
+"n" "" "n" ""
+"o" "" "o" ""
+"p" "" "p" ""
+"r" "" "r" ""
+"t" "" "t" ""
+"u" "" "u" ""
+"v" "" "v" ""
+"z" "" "z" ""

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_exact_common.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_exact_common.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_exact_common.txt
new file mode 100644
index 0000000..b97c589
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_exact_common.txt
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include sep_exact_approx_common
+
+"h" "" "" ""
+//"C" "" "" "k"  // c that can actually be �
+
+// VOICED - UNVOICED CONSONANTS
+"s" "[^t]" "[bgZd]" "z"
+"Z" "" "[pfkst]" "S"
+"Z" "" "$" "S"
+"S" "" "[bgzd]" "Z"
+"z" "" "$" "s"
+
+//special character to deal correctly in Hebrew match
+"B" "" "" "b"
+"V" "" "" "v"

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_exact_french.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_exact_french.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_exact_french.txt
new file mode 100644
index 0000000..ea75dc4
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_exact_french.txt
@@ -0,0 +1,18 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Sephadic
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_exact_hebrew.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_exact_hebrew.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_exact_hebrew.txt
new file mode 100644
index 0000000..0990004
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_exact_hebrew.txt
@@ -0,0 +1,18 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// empty
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_exact_italian.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_exact_italian.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_exact_italian.txt
new file mode 100644
index 0000000..0990004
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_exact_italian.txt
@@ -0,0 +1,18 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// empty
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_exact_portuguese.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_exact_portuguese.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_exact_portuguese.txt
new file mode 100644
index 0000000..0990004
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_exact_portuguese.txt
@@ -0,0 +1,18 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// empty
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_exact_spanish.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_exact_spanish.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_exact_spanish.txt
new file mode 100644
index 0000000..0990004
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_exact_spanish.txt
@@ -0,0 +1,18 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// empty
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_hebrew_common.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_hebrew_common.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_hebrew_common.txt
new file mode 100644
index 0000000..00357f9
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_hebrew_common.txt
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include sep_exact_approx_common
+
+"E" "" "" ""  // final French "e": only in Sephardic
+
+"ts" "" "" "C" // for not confusion Gutes [=guts] and Guts [=guc]
+"tS" "" "" "C" // same reason
+"S" "" "" "s"
+"p" "" "" "f"   
+"b" "^" "" "b"    
+"b" "" "" "(b|v)"    
+
+"ja" "" "" "i"
+"je" "" "" "i"
+"aj" "" "" "i"
+"j" "" "" "i"
+
+"a" "^" "" "1"
+"e" "^" "" "1"
+"a" "" "$" "1"
+"e" "" "$" "1"
+
+"a" "" "" ""
+"e" "" "" ""
+
+"oj" "^" "" "(u|vi)"
+"uj" "^" "" "(u|vi)"
+
+"oj" "" "" "u"
+"uj" "" "" "u"
+
+"ou" "^" "" "(u|v|1)"
+"o" "^" "" "(u|v|1)"
+"u" "^" "" "(u|v|1)"
+
+"o" "" "$" "(u|1)"
+"u" "" "$" "(u|1)"
+
+"ou" "" "" "u"
+"o" "" "" "u"
+
+"VV" "" "" "u" // alef/ayin + vov from ruleshebrew
+"L" "^" "" "1" // alef/ayin from  ruleshebrew
+"L" "" "$" "1" // alef/ayin from  ruleshebrew
+"L" "" "" " " // alef/ayin from  ruleshebrew
+"WW" "^" "" "(vi|u)" // vav-yod from  ruleshebrew
+"WW" "" "" "u" // vav-yod from  ruleshebrew
+"W" "^" "" "(u|v)" // vav from  ruleshebrew
+"W" "" "" "u" // vav from  ruleshebrew
+
+// "g" "" "" "(g|Z)"
+// "z" "" "" "(z|Z)"
+// "d" "" "" "(d|dZ)"
+
+"T" "" "" "t"   // tet from  ruleshebrew
+
+// "k" "" "" "(k|x)"
+// "x" "" "" "(k|x)"
+"K" "" "" "k" // kof and initial kaf from ruleshebrew
+"X" "" "" "x" // khet and final kaf from ruleshebrew
+
+// special for Spanish initial B/V
+"B" "" "" "v"
+"V" "" "" "b"
+
+"H" "^" "" "(x|1)"
+"H" "" "$" "(x|1)"
+"H" "" "" "(x|)"
+"h" "^" "" "1"
+"h" "" "" ""

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_languages.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_languages.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_languages.txt
new file mode 100644
index 0000000..9a1935a
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_languages.txt
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+any
+french
+hebrew
+italian
+portuguese
+spanish

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_rules_any.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_rules_any.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_rules_any.txt
new file mode 100644
index 0000000..fc08b5a
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_rules_any.txt
@@ -0,0 +1,155 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// SEPHARDIC: INCORPORATES Portuguese + Italian + Spanish(+Catalan) + French
+
+// CONSONANTS
+"ph" "" "" "f" // foreign
+"sh" "" "" "S" // foreign
+"kh" "" "" "x" // foreign
+
+"gli" "" "" "(gli|l[italian])" 
+"gni" "" "" "(gni|ni[italian+french])"
+"gn" "" "[aeou]" "(n[italian+french]|nj[italian+french]|gn)" 
+"gh" "" "" "g" // It + translit. from Arabic
+"dh" "" "" "d" // translit. from Arabic
+"bh" "" "" "b" // translit. from Arabic
+"th" "" "" "t" // translit. from Arabic
+"lh" "" "" "l" // Port
+"nh" "" "" "nj" // Port
+
+"ig" "[aeiou]" "" "(ig|tS[spanish])"
+"ix" "[aeiou]" "" "S" // Sp
+"tx" "" "" "tS" // Sp
+"tj" "" "$" "tS" // Sp
+"tj" "" "" "dZ" // Sp
+"tg" "" "" "(tg|dZ[spanish])"
+
+"gi" "" "[aeou]" "dZ" // italian
+"g" "" "y" "Z" // french
+"gg" "" "[ei]" "(gZ[portuguese+french]|dZ[italian+spanish]|x[spanish])" 
+"g" "" "[ei]" "(Z[portuguese+french]|dZ[italian+spanish]|x[spanish])" 
+
+"guy" "" "" "gi"     
+"gue" "" "$" "(k[french]|ge)"
+"gu" "" "[ei]" "(g|gv)"     // not It
+"gu" "" "[ao]" "gv"  // not It  
+
+"ñ" "" "" "(n|nj)" 
+"ny" "" "" "nj" 
+
+"sc" "" "[ei]" "(s|S[italian])" 
+"sç" "" "[aeiou]" "s" // not It
+"ss" "" "" "s"
+"ç" "" "" "s"   // not It
+
+"ch" "" "[ei]" "(k[italian]|S[portuguese+french]|tS[spanish]|dZ[spanish])" 
+"ch" "" "" "(S|tS[spanish]|dZ[spanish])" 
+
+"ci" "" "[aeou]" "(tS[italian]|si)" 
+"cc" "" "[eiyéèê]" "(tS[italian]|ks[portuguese+french+spanish])" 
+"c" "" "[eiyéèê]" "(tS[italian]|s[portuguese+french+spanish])" 
+//"c" "" "[aou]" "(k|C[portuguese+spanish])" // "C" means that the actual letter could be "ç" (cedille omitted)
+
+"s" "^" "" "s"
+"s" "[aáuiíoóeéêy]" "[aáuiíoóeéêy]" "(s[spanish]|z[portuguese+french+italian])" 
+"s" "" "[dglmnrv]" "(z|Z[portuguese])" 
+
+"z" "" "$" "(s|ts[italian]|S[portuguese])" // ts It, s/S/Z Port, s in Sp, z Fr
+"z" "" "[bdgv]" "(z|dz[italian]|Z[portuguese])" // dz It, Z/z Port, z Sp & Fr
+"z" "" "[ptckf]" "(s|ts[italian]|S[portuguese])" // ts It, s/S/z Port, z/s Sp
+"z" "" "" "(z|dz[italian]|ts[italian]|s[spanish])" // ts/dz It, z Port & Fr, z/s Sp
+
+"que" "" "$" "(k[french]|ke)"
+"qu" "" "[eiu]" "k"    
+"qu" "" "[ao]" "(kv|k)" // k is It   
+
+"ex" "" "[aáuiíoóeéêy]" "(ez[portuguese]|eS[portuguese]|eks|egz)" 
+"ex" "" "[cs]" "(e[portuguese]|ek)" 
+
+"m" "" "[cdglnrst]" "(m|n[portuguese])" 
+"m" "" "[bfpv]" "(m|n[portuguese+spanish])" 
+"m" "" "$" "(m|n[portuguese])" 
+
+"b" "^" "" "(b|V[spanish])" 
+"v" "^" "" "(v|B[spanish])" 
+
+// VOWELS   
+"eau" "" "" "o" // Fr
+
+"ouh" "" "[aioe]" "(v[french]|uh)"
+"uh" "" "[aioe]" "(v|uh)"
+"ou" "" "[aioe]" "v" // french
+"uo" "" "" "(vo|o)"
+"u" "" "[aie]" "v"
+
+"i" "[aáuoóeéê]" "" "j"
+"i" "" "[aeou]" "j"
+"y" "[aáuiíoóeéê]" "" "j"
+"y" "" "[aeiíou]" "j"
+"e" "" "$" "(e|E[french])"
+
+"ão" "" "" "(au|an)" // Port
+"ãe" "" "" "(aj|an)" // Port
+"ãi" "" "" "(aj|an)" // Port
+"õe" "" "" "(oj|on)" // Port
+"où" "" "" "u" // Fr
+"ou" "" "" "(ou|u[french])" 
+
+"â" "" "" "a" // Port & Fr
+"à" "" "" "a" // Port 
+"á" "" "" "a" // Port & Sp
+"ã" "" "" "(a|an)" // Port
+"é" "" "" "e" 
+"ê" "" "" "e" // Port & Fr
+"è" "" "" "e" // Sp & Fr & It
+"í" "" "" "i" // Port & Sp
+"î" "" "" "i" // Fr
+"ô" "" "" "o" // Port & Fr
+"ó" "" "" "o" // Port & Sp & It
+"õ" "" "" "(o|on)" // Port
+"ò" "" "" "o"  // Sp & It
+"ú" "" "" "u" // Port & Sp
+"ü" "" "" "u" // Port & Sp
+
+// LATIN ALPHABET     
+"a" "" "" "a"
+"b" "" "" "(b|v[spanish])" 
+"c" "" "" "k"
+"d" "" "" "d"
+"e" "" "" "e"
+"f" "" "" "f"
+"g" "" "" "g"
+"h" "" "" "h" 
+"i" "" "" "i"
+"j" "" "" "(x[spanish]|Z)" // not It
+"k" "" "" "k"
+"l" "" "" "l"
+"m" "" "" "m"
+"n" "" "" "n"
+"o" "" "" "o"
+"p" "" "" "p"
+"q" "" "" "k"    
+"r" "" "" "r"
+"s" "" "" "(s|S[portuguese])" 
+"t" "" "" "t"
+"u" "" "" "u"
+"v" "" "" "(v|b[spanish])" 
+"w" "" "" "v"    // foreign
+"x" "" "" "(ks|gz|S[portuguese+spanish])"   // S/ks Port & Sp, gz Sp, It only ks
+"y" "" "" "i"   
+"z" "" "" "z"


[08/15] lucenenet git commit: Added Lucene.Net.Analysis.Phonetic + tests. Rather than porting over the entire commons-codec library, only the language features were ported and added to this library.

Posted by ni...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/DoubleMetaphone.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/DoubleMetaphone.cs b/src/Lucene.Net.Analysis.Phonetic/Language/DoubleMetaphone.cs
new file mode 100644
index 0000000..d54968d
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/DoubleMetaphone.cs
@@ -0,0 +1,1280 @@
+// commons-codec version compatibility level: 1.9
+using System;
+using System.Globalization;
+using System.Text;
+
+namespace Lucene.Net.Analysis.Phonetic.Language
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Encodes a string into a double metaphone value. This Implementation is based on the algorithm by <c>Lawrence
+    /// Philips</c>.
+    /// <para/>
+    /// This class is conditionally thread-safe. The instance field <see cref="maxCodeLen"/> is mutable
+    /// <see cref="MaxCodeLen"/> but is not volatile, and accesses are not synchronized. If an instance of the class is
+    /// shared between threads, the caller needs to ensure that suitable synchronization is used to ensure safe publication
+    /// of the value between threads, and must not set <see cref="MaxCodeLen"/> after initial setup.
+    /// <para/>
+    /// See <a href="http://drdobbs.com/184401251?pgno=2">Original Article</a>
+    /// <para/>
+    /// See <a href="http://en.wikipedia.org/wiki/Metaphone">http://en.wikipedia.org/wiki/Metaphone</a>
+    /// </summary>
+    public class DoubleMetaphone : IStringEncoder
+    {
+        /// <summary>
+        /// "Vowels" to test for
+        /// </summary>
+        private static readonly string VOWELS = "AEIOUY";
+
+        /// <summary>
+        /// Prefixes when present which are not pronounced
+        /// </summary>
+        private static readonly string[] SILENT_START =
+            { "GN", "KN", "PN", "WR", "PS" };
+        private static readonly string[] L_R_N_M_B_H_F_V_W_SPACE =
+            { "L", "R", "N", "M", "B", "H", "F", "V", "W", " " };
+        private static readonly string[] ES_EP_EB_EL_EY_IB_IL_IN_IE_EI_ER =
+            { "ES", "EP", "EB", "EL", "EY", "IB", "IL", "IN", "IE", "EI", "ER" };
+        private static readonly string[] L_T_K_S_N_M_B_Z =
+            { "L", "T", "K", "S", "N", "M", "B", "Z" };
+
+        /// <summary>
+        /// Maximum length of an encoding, default is 4
+        /// </summary>
+        private int maxCodeLen = 4;
+
+        /// <summary>
+        /// Creates an instance of this <see cref="DoubleMetaphone"/> encoder
+        /// </summary>
+        public DoubleMetaphone()
+            : base()
+        {
+        }
+
+        /// <summary>
+        /// Encode a value with Double Metaphone.
+        /// </summary>
+        /// <param name="value">String to encode.</param>
+        /// <returns>An encoded string.</returns>
+        public virtual string GetDoubleMetaphone(string value)
+        {
+            return GetDoubleMetaphone(value, false);
+        }
+
+        /// <summary>
+        /// Encode a value with Double Metaphone, optionally using the alternate encoding.
+        /// </summary>
+        /// <param name="value">String to encode.</param>
+        /// <param name="alternate">Use alternate encode.</param>
+        /// <returns>An encoded string.</returns>
+        public virtual string GetDoubleMetaphone(string value, bool alternate)
+        {
+            value = CleanInput(value);
+            if (value == null)
+            {
+                return null;
+            }
+
+            bool slavoGermanic = IsSlavoGermanic(value);
+            int index = IsSilentStart(value) ? 1 : 0;
+
+            DoubleMetaphoneResult result = new DoubleMetaphoneResult(this.MaxCodeLen);
+
+            while (!result.IsComplete && index <= value.Length - 1)
+            {
+                switch (value[index])
+                {
+                    case 'A':
+                    case 'E':
+                    case 'I':
+                    case 'O':
+                    case 'U':
+                    case 'Y':
+                        index = HandleAEIOUY(result, index);
+                        break;
+                    case 'B':
+                        result.Append('P');
+                        index = CharAt(value, index + 1) == 'B' ? index + 2 : index + 1;
+                        break;
+                    case '\u00C7':
+                        // A C with a Cedilla
+                        result.Append('S');
+                        index++;
+                        break;
+                    case 'C':
+                        index = HandleC(value, result, index);
+                        break;
+                    case 'D':
+                        index = HandleD(value, result, index);
+                        break;
+                    case 'F':
+                        result.Append('F');
+                        index = CharAt(value, index + 1) == 'F' ? index + 2 : index + 1;
+                        break;
+                    case 'G':
+                        index = HandleG(value, result, index, slavoGermanic);
+                        break;
+                    case 'H':
+                        index = HandleH(value, result, index);
+                        break;
+                    case 'J':
+                        index = HandleJ(value, result, index, slavoGermanic);
+                        break;
+                    case 'K':
+                        result.Append('K');
+                        index = CharAt(value, index + 1) == 'K' ? index + 2 : index + 1;
+                        break;
+                    case 'L':
+                        index = HandleL(value, result, index);
+                        break;
+                    case 'M':
+                        result.Append('M');
+                        index = ConditionM0(value, index) ? index + 2 : index + 1;
+                        break;
+                    case 'N':
+                        result.Append('N');
+                        index = CharAt(value, index + 1) == 'N' ? index + 2 : index + 1;
+                        break;
+                    case '\u00D1':
+                        // N with a tilde (spanish ene)
+                        result.Append('N');
+                        index++;
+                        break;
+                    case 'P':
+                        index = HandleP(value, result, index);
+                        break;
+                    case 'Q':
+                        result.Append('K');
+                        index = CharAt(value, index + 1) == 'Q' ? index + 2 : index + 1;
+                        break;
+                    case 'R':
+                        index = HandleR(value, result, index, slavoGermanic);
+                        break;
+                    case 'S':
+                        index = HandleS(value, result, index, slavoGermanic);
+                        break;
+                    case 'T':
+                        index = HandleT(value, result, index);
+                        break;
+                    case 'V':
+                        result.Append('F');
+                        index = CharAt(value, index + 1) == 'V' ? index + 2 : index + 1;
+                        break;
+                    case 'W':
+                        index = HandleW(value, result, index);
+                        break;
+                    case 'X':
+                        index = HandleX(value, result, index);
+                        break;
+                    case 'Z':
+                        index = HandleZ(value, result, index, slavoGermanic);
+                        break;
+                    default:
+                        index++;
+                        break;
+                }
+            }
+
+            return alternate ? result.Alternate : result.Primary;
+        }
+
+        // LUCENENET specific - in .NET we don't need an object overload, since strings are sealed anyway.
+        //    /**
+        //     * Encode the value using DoubleMetaphone.  It will only work if
+        //     * <code>obj</code> is a <code>String</code> (like <code>Metaphone</code>).
+        //     *
+        //     * @param obj Object to encode (should be of type String)
+        //     * @return An encoded Object (will be of type String)
+        //     * @throws EncoderException encode parameter is not of type String
+        //     */
+
+        //public virtual object Encode(object obj) 
+        //    {
+        //    if (!(obj is String)) {
+        //            throw new EncoderException("DoubleMetaphone encode parameter is not of type String");
+        //        }
+        //    return GetDoubleMetaphone((String) obj);
+        //    }
+
+        /// <summary>
+        /// Encode the value using DoubleMetaphone.
+        /// </summary>
+        /// <param name="value">String to encode.</param>
+        /// <returns>An encoded string.</returns>
+        public virtual string Encode(string value)
+        {
+            return GetDoubleMetaphone(value);
+        }
+
+        /// <summary>
+        /// Check if the Double Metaphone values of two <see cref="string"/> values
+        /// are equal.
+        /// </summary>
+        /// <param name="value1">The left-hand side of the encoded <see cref="string.Equals(object)"/>.</param>
+        /// <param name="value2">The right-hand side of the encoded <see cref="string.Equals(object)"/>.</param>
+        /// <returns><c>true</c> if the encoded <see cref="string"/>s are equal; <c>false</c> otherwise.</returns>
+        public virtual bool IsDoubleMetaphoneEqual(string value1, string value2)
+        {
+            return IsDoubleMetaphoneEqual(value1, value2, false);
+        }
+
+        /// <summary>
+        /// Check if the Double Metaphone values of two <see cref="string"/> values
+        /// are equal, optionally using the alternate value.
+        /// </summary>
+        /// <param name="value1">The left-hand side of the encoded <see cref="string.Equals(object)"/>.</param>
+        /// <param name="value2">The right-hand side of the encoded <see cref="string.Equals(object)"/>.</param>
+        /// <param name="alternate">Use the alternate value if <c>true</c>.</param>
+        /// <returns><c>true</c> if the encoded <see cref="string"/>s are equal; <c>false</c> otherwise.</returns>
+        public virtual bool IsDoubleMetaphoneEqual(string value1, string value2, bool alternate)
+        {
+            return GetDoubleMetaphone(value1, alternate).Equals(GetDoubleMetaphone(value2, alternate));
+        }
+
+        /// <summary>
+        /// Gets or Sets the maxCodeLen.
+        /// </summary>
+        public virtual int MaxCodeLen
+        {
+            get { return this.maxCodeLen; }
+            set { this.maxCodeLen = value; }
+        }
+
+        //-- BEGIN HANDLERS --//
+
+        /// <summary>
+        /// Handles 'A', 'E', 'I', 'O', 'U', and 'Y' cases.
+        /// </summary>
+        private int HandleAEIOUY(DoubleMetaphoneResult result, int index)
+        {
+            if (index == 0)
+            {
+                result.Append('A');
+            }
+            return index + 1;
+        }
+
+        /// <summary>
+        /// Handles 'C' cases.
+        /// </summary>
+        private int HandleC(string value, DoubleMetaphoneResult result, int index)
+        {
+            if (ConditionC0(value, index))
+            {  // very confusing, moved out
+                result.Append('K');
+                index += 2;
+            }
+            else if (index == 0 && Contains(value, index, 6, "CAESAR"))
+            {
+                result.Append('S');
+                index += 2;
+            }
+            else if (Contains(value, index, 2, "CH"))
+            {
+                index = HandleCH(value, result, index);
+            }
+            else if (Contains(value, index, 2, "CZ") &&
+                     !Contains(value, index - 2, 4, "WICZ"))
+            {
+                //-- "Czerny" --//
+                result.Append('S', 'X');
+                index += 2;
+            }
+            else if (Contains(value, index + 1, 3, "CIA"))
+            {
+                //-- "focaccia" --//
+                result.Append('X');
+                index += 3;
+            }
+            else if (Contains(value, index, 2, "CC") &&
+                     !(index == 1 && CharAt(value, 0) == 'M'))
+            {
+                //-- double "cc" but not "McClelland" --//
+                return HandleCC(value, result, index);
+            }
+            else if (Contains(value, index, 2, "CK", "CG", "CQ"))
+            {
+                result.Append('K');
+                index += 2;
+            }
+            else if (Contains(value, index, 2, "CI", "CE", "CY"))
+            {
+                //-- Italian vs. English --//
+                if (Contains(value, index, 3, "CIO", "CIE", "CIA"))
+                {
+                    result.Append('S', 'X');
+                }
+                else
+                {
+                    result.Append('S');
+                }
+                index += 2;
+            }
+            else
+            {
+                result.Append('K');
+                if (Contains(value, index + 1, 2, " C", " Q", " G"))
+                {
+                    //-- Mac Caffrey, Mac Gregor --//
+                    index += 3;
+                }
+                else if (Contains(value, index + 1, 1, "C", "K", "Q") &&
+                         !Contains(value, index + 1, 2, "CE", "CI"))
+                {
+                    index += 2;
+                }
+                else
+                {
+                    index++;
+                }
+            }
+
+            return index;
+        }
+
+        /// <summary>
+        /// Handles 'CC' cases.
+        /// </summary>
+        private int HandleCC(string value, DoubleMetaphoneResult result, int index)
+        {
+            if (Contains(value, index + 2, 1, "I", "E", "H") &&
+                !Contains(value, index + 2, 2, "HU"))
+            {
+                //-- "bellocchio" but not "bacchus" --//
+                if ((index == 1 && CharAt(value, index - 1) == 'A') ||
+                    Contains(value, index - 1, 5, "UCCEE", "UCCES"))
+                {
+                    //-- "accident", "accede", "succeed" --//
+                    result.Append("KS");
+                }
+                else
+                {
+                    //-- "bacci", "bertucci", other Italian --//
+                    result.Append('X');
+                }
+                index += 3;
+            }
+            else
+            {    // Pierce's rule
+                result.Append('K');
+                index += 2;
+            }
+
+            return index;
+        }
+
+        /// <summary>
+        /// Handles 'CH' cases.
+        /// </summary>
+        private int HandleCH(string value, DoubleMetaphoneResult result, int index)
+        {
+            if (index > 0 && Contains(value, index, 4, "CHAE"))
+            {   // Michael
+                result.Append('K', 'X');
+                return index + 2;
+            }
+            else if (ConditionCH0(value, index))
+            {
+                //-- Greek roots ("chemistry", "chorus", etc.) --//
+                result.Append('K');
+                return index + 2;
+            }
+            else if (ConditionCH1(value, index))
+            {
+                //-- Germanic, Greek, or otherwise 'ch' for 'kh' sound --//
+                result.Append('K');
+                return index + 2;
+            }
+            else
+            {
+                if (index > 0)
+                {
+                    if (Contains(value, 0, 2, "MC"))
+                    {
+                        result.Append('K');
+                    }
+                    else
+                    {
+                        result.Append('X', 'K');
+                    }
+                }
+                else
+                {
+                    result.Append('X');
+                }
+                return index + 2;
+            }
+        }
+
+        /// <summary>
+        /// Handles 'D' cases.
+        /// </summary>
+        private int HandleD(string value, DoubleMetaphoneResult result, int index)
+        {
+            if (Contains(value, index, 2, "DG"))
+            {
+                //-- "Edge" --//
+                if (Contains(value, index + 2, 1, "I", "E", "Y"))
+                {
+                    result.Append('J');
+                    index += 3;
+                    //-- "Edgar" --//
+                }
+                else
+                {
+                    result.Append("TK");
+                    index += 2;
+                }
+            }
+            else if (Contains(value, index, 2, "DT", "DD"))
+            {
+                result.Append('T');
+                index += 2;
+            }
+            else
+            {
+                result.Append('T');
+                index++;
+            }
+            return index;
+        }
+
+        /// <summary>
+        /// Handles 'G' cases.
+        /// </summary>
+        private int HandleG(string value, DoubleMetaphoneResult result, int index,
+                            bool slavoGermanic)
+        {
+            if (CharAt(value, index + 1) == 'H')
+            {
+                index = HandleGH(value, result, index);
+            }
+            else if (CharAt(value, index + 1) == 'N')
+            {
+                if (index == 1 && IsVowel(CharAt(value, 0)) && !slavoGermanic)
+                {
+                    result.Append("KN", "N");
+                }
+                else if (!Contains(value, index + 2, 2, "EY") &&
+                         CharAt(value, index + 1) != 'Y' && !slavoGermanic)
+                {
+                    result.Append("N", "KN");
+                }
+                else
+                {
+                    result.Append("KN");
+                }
+                index = index + 2;
+            }
+            else if (Contains(value, index + 1, 2, "LI") && !slavoGermanic)
+            {
+                result.Append("KL", "L");
+                index += 2;
+            }
+            else if (index == 0 &&
+                     (CharAt(value, index + 1) == 'Y' ||
+                      Contains(value, index + 1, 2, ES_EP_EB_EL_EY_IB_IL_IN_IE_EI_ER)))
+            {
+                //-- -ges-, -gep-, -gel-, -gie- at beginning --//
+                result.Append('K', 'J');
+                index += 2;
+            }
+            else if ((Contains(value, index + 1, 2, "ER") ||
+                      CharAt(value, index + 1) == 'Y') &&
+                     !Contains(value, 0, 6, "DANGER", "RANGER", "MANGER") &&
+                     !Contains(value, index - 1, 1, "E", "I") &&
+                     !Contains(value, index - 1, 3, "RGY", "OGY"))
+            {
+                //-- -ger-, -gy- --//
+                result.Append('K', 'J');
+                index += 2;
+            }
+            else if (Contains(value, index + 1, 1, "E", "I", "Y") ||
+                     Contains(value, index - 1, 4, "AGGI", "OGGI"))
+            {
+                //-- Italian "biaggi" --//
+                if (Contains(value, 0, 4, "VAN ", "VON ") ||
+                    Contains(value, 0, 3, "SCH") ||
+                    Contains(value, index + 1, 2, "ET"))
+                {
+                    //-- obvious germanic --//
+                    result.Append('K');
+                }
+                else if (Contains(value, index + 1, 3, "IER"))
+                {
+                    result.Append('J');
+                }
+                else
+                {
+                    result.Append('J', 'K');
+                }
+                index += 2;
+            }
+            else if (CharAt(value, index + 1) == 'G')
+            {
+                index += 2;
+                result.Append('K');
+            }
+            else
+            {
+                index++;
+                result.Append('K');
+            }
+            return index;
+        }
+
+        /// <summary>
+        /// Handles 'GH' cases.
+        /// </summary>
+        private int HandleGH(string value, DoubleMetaphoneResult result, int index)
+        {
+            if (index > 0 && !IsVowel(CharAt(value, index - 1)))
+            {
+                result.Append('K');
+                index += 2;
+            }
+            else if (index == 0)
+            {
+                if (CharAt(value, index + 2) == 'I')
+                {
+                    result.Append('J');
+                }
+                else
+                {
+                    result.Append('K');
+                }
+                index += 2;
+            }
+            else if ((index > 1 && Contains(value, index - 2, 1, "B", "H", "D")) ||
+                     (index > 2 && Contains(value, index - 3, 1, "B", "H", "D")) ||
+                     (index > 3 && Contains(value, index - 4, 1, "B", "H")))
+            {
+                //-- Parker's rule (with some further refinements) - "hugh"
+                index += 2;
+            }
+            else
+            {
+                if (index > 2 && CharAt(value, index - 1) == 'U' &&
+                    Contains(value, index - 3, 1, "C", "G", "L", "R", "T"))
+                {
+                    //-- "laugh", "McLaughlin", "cough", "gough", "rough", "tough"
+                    result.Append('F');
+                }
+                else if (index > 0 && CharAt(value, index - 1) != 'I')
+                {
+                    result.Append('K');
+                }
+                index += 2;
+            }
+            return index;
+        }
+
+        /// <summary>
+        /// Handles 'H' cases.
+        /// </summary>
+        private int HandleH(string value, DoubleMetaphoneResult result, int index)
+        {
+            //-- only keep if first & before vowel or between 2 vowels --//
+            if ((index == 0 || IsVowel(CharAt(value, index - 1))) &&
+                IsVowel(CharAt(value, index + 1)))
+            {
+                result.Append('H');
+                index += 2;
+                //-- also takes car of "HH" --//
+            }
+            else
+            {
+                index++;
+            }
+            return index;
+        }
+
+        /// <summary>
+        /// Handles 'J' cases.
+        /// </summary>
+        private int HandleJ(string value, DoubleMetaphoneResult result, int index,
+                            bool slavoGermanic)
+        {
+            if (Contains(value, index, 4, "JOSE") || Contains(value, 0, 4, "SAN "))
+            {
+                //-- obvious Spanish, "Jose", "San Jacinto" --//
+                if ((index == 0 && (CharAt(value, index + 4) == ' ') ||
+                     value.Length == 4) || Contains(value, 0, 4, "SAN "))
+                {
+                    result.Append('H');
+                }
+                else
+                {
+                    result.Append('J', 'H');
+                }
+                index++;
+            }
+            else
+            {
+                if (index == 0 && !Contains(value, index, 4, "JOSE"))
+                {
+                    result.Append('J', 'A');
+                }
+                else if (IsVowel(CharAt(value, index - 1)) && !slavoGermanic &&
+                         (CharAt(value, index + 1) == 'A' || CharAt(value, index + 1) == 'O'))
+                {
+                    result.Append('J', 'H');
+                }
+                else if (index == value.Length - 1)
+                {
+                    result.Append('J', ' ');
+                }
+                else if (!Contains(value, index + 1, 1, L_T_K_S_N_M_B_Z) &&
+                         !Contains(value, index - 1, 1, "S", "K", "L"))
+                {
+                    result.Append('J');
+                }
+
+                if (CharAt(value, index + 1) == 'J')
+                {
+                    index += 2;
+                }
+                else
+                {
+                    index++;
+                }
+            }
+            return index;
+        }
+
+        /// <summary>
+        /// Handles 'L' cases.
+        /// </summary>
+        private int HandleL(string value, DoubleMetaphoneResult result, int index)
+        {
+            if (CharAt(value, index + 1) == 'L')
+            {
+                if (ConditionL0(value, index))
+                {
+                    result.AppendPrimary('L');
+                }
+                else
+                {
+                    result.Append('L');
+                }
+                index += 2;
+            }
+            else
+            {
+                index++;
+                result.Append('L');
+            }
+            return index;
+        }
+
+        /// <summary>
+        /// Handles 'P' cases.
+        /// </summary>
+        private int HandleP(string value, DoubleMetaphoneResult result, int index)
+        {
+            if (CharAt(value, index + 1) == 'H')
+            {
+                result.Append('F');
+                index += 2;
+            }
+            else
+            {
+                result.Append('P');
+                index = Contains(value, index + 1, 1, "P", "B") ? index + 2 : index + 1;
+            }
+            return index;
+        }
+
+        /// <summary>
+        /// Handles 'R' cases.
+        /// </summary>
+        private int HandleR(string value, DoubleMetaphoneResult result, int index,
+                            bool slavoGermanic)
+        {
+            if (index == value.Length - 1 && !slavoGermanic &&
+                Contains(value, index - 2, 2, "IE") &&
+                !Contains(value, index - 4, 2, "ME", "MA"))
+            {
+                result.AppendAlternate('R');
+            }
+            else
+            {
+                result.Append('R');
+            }
+            return CharAt(value, index + 1) == 'R' ? index + 2 : index + 1;
+        }
+
+        /// <summary>
+        /// Handles 'S' cases.
+        /// </summary>
+        private int HandleS(string value, DoubleMetaphoneResult result, int index,
+                            bool slavoGermanic)
+        {
+            if (Contains(value, index - 1, 3, "ISL", "YSL"))
+            {
+                //-- special cases "island", "isle", "carlisle", "carlysle" --//
+                index++;
+            }
+            else if (index == 0 && Contains(value, index, 5, "SUGAR"))
+            {
+                //-- special case "sugar-" --//
+                result.Append('X', 'S');
+                index++;
+            }
+            else if (Contains(value, index, 2, "SH"))
+            {
+                if (Contains(value, index + 1, 4, "HEIM", "HOEK", "HOLM", "HOLZ"))
+                {
+                    //-- germanic --//
+                    result.Append('S');
+                }
+                else
+                {
+                    result.Append('X');
+                }
+                index += 2;
+            }
+            else if (Contains(value, index, 3, "SIO", "SIA") || Contains(value, index, 4, "SIAN"))
+            {
+                //-- Italian and Armenian --//
+                if (slavoGermanic)
+                {
+                    result.Append('S');
+                }
+                else
+                {
+                    result.Append('S', 'X');
+                }
+                index += 3;
+            }
+            else if ((index == 0 && Contains(value, index + 1, 1, "M", "N", "L", "W")) ||
+                     Contains(value, index + 1, 1, "Z"))
+            {
+                //-- german & anglicisations, e.g. "smith" match "schmidt" //
+                // "snider" match "schneider" --//
+                //-- also, -sz- in slavic language although in hungarian it //
+                //   is pronounced "s" --//
+                result.Append('S', 'X');
+                index = Contains(value, index + 1, 1, "Z") ? index + 2 : index + 1;
+            }
+            else if (Contains(value, index, 2, "SC"))
+            {
+                index = HandleSC(value, result, index);
+            }
+            else
+            {
+                if (index == value.Length - 1 && Contains(value, index - 2, 2, "AI", "OI"))
+                {
+                    //-- french e.g. "resnais", "artois" --//
+                    result.AppendAlternate('S');
+                }
+                else
+                {
+                    result.Append('S');
+                }
+                index = Contains(value, index + 1, 1, "S", "Z") ? index + 2 : index + 1;
+            }
+            return index;
+        }
+
+        /// <summary>
+        /// Handles 'SC' cases.
+        /// </summary>
+        private int HandleSC(string value, DoubleMetaphoneResult result, int index)
+        {
+            if (CharAt(value, index + 2) == 'H')
+            {
+                //-- Schlesinger's rule --//
+                if (Contains(value, index + 3, 2, "OO", "ER", "EN", "UY", "ED", "EM"))
+                {
+                    //-- Dutch origin, e.g. "school", "schooner" --//
+                    if (Contains(value, index + 3, 2, "ER", "EN"))
+                    {
+                        //-- "schermerhorn", "schenker" --//
+                        result.Append("X", "SK");
+                    }
+                    else
+                    {
+                        result.Append("SK");
+                    }
+                }
+                else
+                {
+                    if (index == 0 && !IsVowel(CharAt(value, 3)) && CharAt(value, 3) != 'W')
+                    {
+                        result.Append('X', 'S');
+                    }
+                    else
+                    {
+                        result.Append('X');
+                    }
+                }
+            }
+            else if (Contains(value, index + 2, 1, "I", "E", "Y"))
+            {
+                result.Append('S');
+            }
+            else
+            {
+                result.Append("SK");
+            }
+            return index + 3;
+        }
+
+        /// <summary>
+        /// Handles 'T' cases.
+        /// </summary>
+        private int HandleT(string value, DoubleMetaphoneResult result, int index)
+        {
+            if (Contains(value, index, 4, "TION"))
+            {
+                result.Append('X');
+                index += 3;
+            }
+            else if (Contains(value, index, 3, "TIA", "TCH"))
+            {
+                result.Append('X');
+                index += 3;
+            }
+            else if (Contains(value, index, 2, "TH") || Contains(value, index, 3, "TTH"))
+            {
+                if (Contains(value, index + 2, 2, "OM", "AM") ||
+                    //-- special case "thomas", "thames" or germanic --//
+                    Contains(value, 0, 4, "VAN ", "VON ") ||
+                    Contains(value, 0, 3, "SCH"))
+                {
+                    result.Append('T');
+                }
+                else
+                {
+                    result.Append('0', 'T');
+                }
+                index += 2;
+            }
+            else
+            {
+                result.Append('T');
+                index = Contains(value, index + 1, 1, "T", "D") ? index + 2 : index + 1;
+            }
+            return index;
+        }
+
+        /// <summary>
+        /// Handles 'W' cases.
+        /// </summary>
+        private int HandleW(string value, DoubleMetaphoneResult result, int index)
+        {
+            if (Contains(value, index, 2, "WR"))
+            {
+                //-- can also be in middle of word --//
+                result.Append('R');
+                index += 2;
+            }
+            else
+            {
+                if (index == 0 && (IsVowel(CharAt(value, index + 1)) ||
+                                   Contains(value, index, 2, "WH")))
+                {
+                    if (IsVowel(CharAt(value, index + 1)))
+                    {
+                        //-- Wasserman should match Vasserman --//
+                        result.Append('A', 'F');
+                    }
+                    else
+                    {
+                        //-- need Uomo to match Womo --//
+                        result.Append('A');
+                    }
+                    index++;
+                }
+                else if ((index == value.Length - 1 && IsVowel(CharAt(value, index - 1))) ||
+                         Contains(value, index - 1, 5, "EWSKI", "EWSKY", "OWSKI", "OWSKY") ||
+                         Contains(value, 0, 3, "SCH"))
+                {
+                    //-- Arnow should match Arnoff --//
+                    result.AppendAlternate('F');
+                    index++;
+                }
+                else if (Contains(value, index, 4, "WICZ", "WITZ"))
+                {
+                    //-- Polish e.g. "filipowicz" --//
+                    result.Append("TS", "FX");
+                    index += 4;
+                }
+                else
+                {
+                    index++;
+                }
+            }
+            return index;
+        }
+
+        /// <summary>
+        /// Handles 'X' cases.
+        /// </summary>
+        private int HandleX(string value, DoubleMetaphoneResult result, int index)
+        {
+            if (index == 0)
+            {
+                result.Append('S');
+                index++;
+            }
+            else
+            {
+                if (!((index == value.Length - 1) &&
+                      (Contains(value, index - 3, 3, "IAU", "EAU") ||
+                       Contains(value, index - 2, 2, "AU", "OU"))))
+                {
+                    //-- French e.g. breaux --//
+                    result.Append("KS");
+                }
+                index = Contains(value, index + 1, 1, "C", "X") ? index + 2 : index + 1;
+            }
+            return index;
+        }
+
+        /// <summary>
+        /// Handles 'Z' cases.
+        /// </summary>
+        private int HandleZ(string value, DoubleMetaphoneResult result, int index,
+                            bool slavoGermanic)
+        {
+            if (CharAt(value, index + 1) == 'H')
+            {
+                //-- Chinese pinyin e.g. "zhao" or Angelina "Zhang" --//
+                result.Append('J');
+                index += 2;
+            }
+            else
+            {
+                if (Contains(value, index + 1, 2, "ZO", "ZI", "ZA") ||
+                    (slavoGermanic && (index > 0 && CharAt(value, index - 1) != 'T')))
+                {
+                    result.Append("S", "TS");
+                }
+                else
+                {
+                    result.Append('S');
+                }
+                index = CharAt(value, index + 1) == 'Z' ? index + 2 : index + 1;
+            }
+            return index;
+        }
+
+        //-- BEGIN CONDITIONS --//
+
+        /// <summary>
+        /// Complex condition 0 for 'C'.
+        /// </summary>
+        private bool ConditionC0(string value, int index)
+        {
+            if (Contains(value, index, 4, "CHIA"))
+            {
+                return true;
+            }
+            else if (index <= 1)
+            {
+                return false;
+            }
+            else if (IsVowel(CharAt(value, index - 2)))
+            {
+                return false;
+            }
+            else if (!Contains(value, index - 1, 3, "ACH"))
+            {
+                return false;
+            }
+            else
+            {
+                char c = CharAt(value, index + 2);
+                return (c != 'I' && c != 'E') ||
+                        Contains(value, index - 2, 6, "BACHER", "MACHER");
+            }
+        }
+
+        /// <summary>
+        /// Complex condition 0 for 'CH'.
+        /// </summary>
+        private bool ConditionCH0(string value, int index)
+        {
+            if (index != 0)
+            {
+                return false;
+            }
+            else if (!Contains(value, index + 1, 5, "HARAC", "HARIS") &&
+                     !Contains(value, index + 1, 3, "HOR", "HYM", "HIA", "HEM"))
+            {
+                return false;
+            }
+            else if (Contains(value, 0, 5, "CHORE"))
+            {
+                return false;
+            }
+            else
+            {
+                return true;
+            }
+        }
+
+        /// <summary>
+        /// Complex condition 1 for 'CH'.
+        /// </summary>
+        private bool ConditionCH1(string value, int index)
+        {
+            return ((Contains(value, 0, 4, "VAN ", "VON ") || Contains(value, 0, 3, "SCH")) ||
+                    Contains(value, index - 2, 6, "ORCHES", "ARCHIT", "ORCHID") ||
+                    Contains(value, index + 2, 1, "T", "S") ||
+                    ((Contains(value, index - 1, 1, "A", "O", "U", "E") || index == 0) &&
+                     (Contains(value, index + 2, 1, L_R_N_M_B_H_F_V_W_SPACE) || index + 1 == value.Length - 1)));
+        }
+
+        /// <summary>
+        /// Complex condition 0 for 'L'.
+        /// </summary>
+        private bool ConditionL0(string value, int index)
+        {
+            if (index == value.Length - 3 &&
+                Contains(value, index - 1, 4, "ILLO", "ILLA", "ALLE"))
+            {
+                return true;
+            }
+            else if ((Contains(value, value.Length - 2, 2, "AS", "OS") ||
+                      Contains(value, value.Length - 1, 1, "A", "O")) &&
+                     Contains(value, index - 1, 4, "ALLE"))
+            {
+                return true;
+            }
+            else
+            {
+                return false;
+            }
+        }
+
+        /// <summary>
+        /// Complex condition 0 for 'M'.
+        /// </summary>
+        private bool ConditionM0(string value, int index)
+        {
+            if (CharAt(value, index + 1) == 'M')
+            {
+                return true;
+            }
+            return Contains(value, index - 1, 3, "UMB") &&
+                   ((index + 1) == value.Length - 1 || Contains(value, index + 2, 2, "ER"));
+        }
+
+        //-- BEGIN HELPER FUNCTIONS --//
+
+        /// <summary>
+        /// Determines whether or not a value is of slavo-germanic origin. A value is
+        /// of slavo-germanic origin if it contians any of 'W', 'K', 'CZ', or 'WITZ'.
+        /// </summary>
+        private bool IsSlavoGermanic(string value)
+        {
+            return value.IndexOf('W') > -1 || value.IndexOf('K') > -1 ||
+                value.IndexOf("CZ") > -1 || value.IndexOf("WITZ") > -1;
+        }
+
+        /// <summary>
+        /// Determines whether or not a character is a vowel or not
+        /// </summary>
+        private bool IsVowel(char ch)
+        {
+            return VOWELS.IndexOf(ch) != -1;
+        }
+
+        /// <summary>
+        /// Determines whether or not the value starts with a silent letter.  It will
+        /// return <c>true</c> if the value starts with any of 'GN', 'KN',
+        /// 'PN', 'WR' or 'PS'.
+        /// </summary>
+        private bool IsSilentStart(string value)
+        {
+            bool result = false;
+            foreach (string element in SILENT_START)
+            {
+                if (value.StartsWith(element, StringComparison.Ordinal))
+                {
+                    result = true;
+                    break;
+                }
+            }
+            return result;
+        }
+
+        /// <summary>
+        /// Cleans the input.
+        /// </summary>
+        private string CleanInput(string input)
+        {
+            if (input == null)
+            {
+                return null;
+            }
+            input = input.Trim();
+            if (input.Length == 0)
+            {
+                return null;
+            }
+            return new CultureInfo("en").TextInfo.ToUpper(input);
+        }
+
+        /// <summary>
+        /// Gets the character at index <paramref name="index"/> if available, otherwise
+        /// it returns <see cref="char.MinValue"/> so that there is some sort
+        /// of a default.
+        /// </summary>
+        protected virtual char CharAt(string value, int index)
+        {
+            if (index < 0 || index >= value.Length)
+            {
+                return char.MinValue;
+            }
+            return value[index];
+        }
+
+        /// <summary>
+        /// Determines whether <paramref name="value"/> contains any of the criteria starting at index <paramref name="start"/> and
+        /// matching up to length <paramref name="length"/>.
+        /// </summary>
+        protected static bool Contains(string value, int start, int length,
+                                          params string[] criteria)
+        {
+            bool result = false;
+            if (start >= 0 && start + length <= value.Length)
+            {
+                string target = value.Substring(start, length);
+
+                foreach (string element in criteria)
+                {
+                    if (target.Equals(element))
+                    {
+                        result = true;
+                        break;
+                    }
+                }
+            }
+            return result;
+        }
+
+        //-- BEGIN INNER CLASSES --//
+
+        /// <summary>
+        /// Inner class for storing results, since there is the optional alternate encoding.
+        /// </summary>
+        public class DoubleMetaphoneResult
+        {
+            private readonly StringBuilder primary;
+            private readonly StringBuilder alternate;
+            private readonly int maxLength;
+
+            public DoubleMetaphoneResult(int maxLength)
+            {
+                this.maxLength = maxLength;
+                this.primary = new StringBuilder(maxLength);
+                this.alternate = new StringBuilder(maxLength);
+            }
+
+            public virtual void Append(char value)
+            {
+                AppendPrimary(value);
+                AppendAlternate(value);
+            }
+
+            public virtual void Append(char primary, char alternate)
+            {
+                AppendPrimary(primary);
+                AppendAlternate(alternate);
+            }
+
+            public virtual void AppendPrimary(char value)
+            {
+                if (this.primary.Length < this.maxLength)
+                {
+                    this.primary.Append(value);
+                }
+            }
+
+            public virtual void AppendAlternate(char value)
+            {
+                if (this.alternate.Length < this.maxLength)
+                {
+                    this.alternate.Append(value);
+                }
+            }
+
+            public virtual void Append(string value)
+            {
+                AppendPrimary(value);
+                AppendAlternate(value);
+            }
+
+            public virtual void Append(string primary, string alternate)
+            {
+                AppendPrimary(primary);
+                AppendAlternate(alternate);
+            }
+
+            public virtual void AppendPrimary(string value)
+            {
+                int addChars = this.maxLength - this.primary.Length;
+                if (value.Length <= addChars)
+                {
+                    this.primary.Append(value);
+                }
+                else
+                {
+                    this.primary.Append(value.Substring(0, addChars - 0));
+                }
+            }
+
+            public virtual void AppendAlternate(string value)
+            {
+                int addChars = this.maxLength - this.alternate.Length;
+                if (value.Length <= addChars)
+                {
+                    this.alternate.Append(value);
+                }
+                else
+                {
+                    this.alternate.Append(value.Substring(0, addChars - 0));
+                }
+            }
+
+            public virtual string Primary
+            {
+                get { return this.primary.ToString(); }
+            }
+
+            public virtual string Alternate
+            {
+                get { return this.alternate.ToString(); }
+            }
+
+            public virtual bool IsComplete
+            {
+                get
+                {
+                    return this.primary.Length >= this.maxLength &&
+                     this.alternate.Length >= this.maxLength;
+                }
+            }
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/MatchRatingApproachEncoder.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/MatchRatingApproachEncoder.cs b/src/Lucene.Net.Analysis.Phonetic/Language/MatchRatingApproachEncoder.cs
new file mode 100644
index 0000000..c30e571
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/MatchRatingApproachEncoder.cs
@@ -0,0 +1,425 @@
+// commons-codec version compatibility level: 1.9
+using System;
+using System.Globalization;
+using System.Text;
+using System.Text.RegularExpressions;
+
+namespace Lucene.Net.Analysis.Phonetic.Language
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Match Rating Approach Phonetic Algorithm Developed by <c>Western Airlines</c> in 1977.
+    /// <para/>
+    /// This class is immutable and thread-safe.
+    /// <para/>
+    /// See: <a href="http://en.wikipedia.org/wiki/Match_rating_approach">Wikipedia - Match Rating Approach</a>
+    /// <para/>
+    /// since 1.8
+    /// </summary>
+    public class MatchRatingApproachEncoder : IStringEncoder
+    {
+        private static readonly string SPACE = " ";
+
+        private static readonly string EMPTY = "";
+
+        /// <summary>
+        /// Constants used mainly for the min rating value.
+        /// </summary>
+        private static readonly int ONE = 1, TWO = 2, THREE = 3, FOUR = 4, FIVE = 5, SIX = 6, SEVEN = 7, EIGHT = 8,
+                                 ELEVEN = 11, TWELVE = 12;
+
+        /// <summary>
+        /// The plain letter equivalent of the accented letters.
+        /// </summary>
+        private static readonly string PLAIN_ASCII = "AaEeIiOoUu" + // grave
+            "AaEeIiOoUuYy" + // acute
+            "AaEeIiOoUuYy" + // circumflex
+            "AaOoNn" + // tilde
+            "AaEeIiOoUuYy" + // umlaut
+            "Aa" + // ring
+            "Cc" + // cedilla
+            "OoUu"; // double acute
+
+        /// <summary>
+        /// Unicode characters corresponding to various accented letters. For example: \u00DA is U acute etc...
+        /// </summary>
+        private static readonly string UNICODE = "\u00C0\u00E0\u00C8\u00E8\u00CC\u00EC\u00D2\u00F2\u00D9\u00F9" +
+                "\u00C1\u00E1\u00C9\u00E9\u00CD\u00ED\u00D3\u00F3\u00DA\u00FA\u00DD\u00FD" +
+                "\u00C2\u00E2\u00CA\u00EA\u00CE\u00EE\u00D4\u00F4\u00DB\u00FB\u0176\u0177" +
+                "\u00C3\u00E3\u00D5\u00F5\u00D1\u00F1" +
+                "\u00C4\u00E4\u00CB\u00EB\u00CF\u00EF\u00D6\u00F6\u00DC\u00FC\u0178\u00FF" +
+                "\u00C5\u00E5" + "\u00C7\u00E7" + "\u0150\u0151\u0170\u0171";
+
+        private static readonly string[] DOUBLE_CONSONANT =
+                new string[] { "BB", "CC", "DD", "FF", "GG", "HH", "JJ", "KK", "LL", "MM", "NN", "PP", "QQ", "RR", "SS",
+                           "TT", "VV", "WW", "XX", "YY", "ZZ" };
+
+        /// <summary>
+        /// Cleans up a name: 1. Upper-cases everything 2. Removes some common punctuation 3. Removes accents 4. Removes any
+        /// spaces.
+        /// </summary>
+        /// <param name="name">The name to be cleaned.</param>
+        /// <returns>The cleaned name.</returns>
+        internal string CleanName(string name)
+        {
+            string upperName = new CultureInfo("en").TextInfo.ToUpper(name);
+
+            string[] charsToTrim = { "\\-", "[&]", "\\'", "\\.", "[\\,]" };
+            foreach (string str in charsToTrim)
+            {
+                upperName = Regex.Replace(upperName, str, EMPTY);
+            }
+
+            upperName = RemoveAccents(upperName);
+            upperName = Regex.Replace(upperName, "\\s+", EMPTY);
+
+            return upperName;
+        }
+
+        // LUCENENET specific - in .NET we don't need an object overload, since strings are sealed anyway.
+        // **
+        // * Encodes an Object using the Match Rating Approach algorithm. Method is here to satisfy the requirements of the
+        // * Encoder interface Throws an EncoderException if input object is not of type java.lang.string.
+        // *
+        // * @param pObject
+        // *            Object to encode
+        // * @return An object (or type java.lang.string) containing the Match Rating Approach code which corresponds to the
+        // *         string supplied.
+        // * @throws EncoderException
+        // *             if the parameter supplied is not of type java.lang.string
+        // */
+        //public Object encode(Object pObject) throws EncoderException
+        //{
+        //if (!(pObject instanceof string)) {
+        //        throw new EncoderException(
+        //                "Parameter supplied to Match Rating Approach encoder is not of type java.lang.string");
+        //    }
+        //return encode((string) pObject);
+        //}
+
+        /// <summary>
+        /// Encodes a string using the Match Rating Approach (MRA) algorithm.
+        /// </summary>
+        /// <param name="name">String to encode.</param>
+        /// <returns>The MRA code corresponding to the string supplied.</returns>
+        public string Encode(string name)
+        {
+            // Bulletproof for trivial input - NINO
+            if (name == null || EMPTY.Equals(name, StringComparison.OrdinalIgnoreCase) ||
+                SPACE.Equals(name, StringComparison.OrdinalIgnoreCase) || name.Length == 1)
+            {
+                return EMPTY;
+            }
+
+            // Preprocessing
+            name = CleanName(name);
+
+            // BEGIN: Actual encoding part of the algorithm...
+            // 1. Delete all vowels unless the vowel begins the word
+            name = RemoveVowels(name);
+
+            // 2. Remove second consonant from any double consonant
+            name = RemoveDoubleConsonants(name);
+
+            // 3. Reduce codex to 6 letters by joining the first 3 and last 3 letters
+            name = GetFirst3Last3(name);
+
+            return name;
+        }
+
+        /// <summary>
+        /// Gets the first &amp; last 3 letters of a name (if &gt; 6 characters) Else just returns the name.
+        /// </summary>
+        /// <param name="name">The string to get the substrings from.</param>
+        /// <returns>Annexed first &amp; last 3 letters of input word.</returns>
+        internal string GetFirst3Last3(string name)
+        {
+            int nameLength = name.Length;
+
+            if (nameLength > SIX)
+            {
+                string firstThree = name.Substring(0, THREE - 0);
+                string lastThree = name.Substring(nameLength - THREE, nameLength - (nameLength - THREE));
+                return firstThree + lastThree;
+            }
+            else
+            {
+                return name;
+            }
+        }
+
+        /// <summary>
+        /// Obtains the min rating of the length sum of the 2 names. In essence the larger the sum length the smaller the
+        /// min rating. Values strictly from documentation.
+        /// </summary>
+        /// <param name="sumLength">The length of 2 strings sent down.</param>
+        /// <returns>The min rating value.</returns>
+        internal int GetMinRating(int sumLength)
+        {
+            int minRating = 0;
+
+            if (sumLength <= FOUR)
+            {
+                minRating = FIVE;
+            }
+            else if (sumLength >= FIVE && sumLength <= SEVEN)
+            {
+                minRating = FOUR;
+            }
+            else if (sumLength >= EIGHT && sumLength <= ELEVEN)
+            {
+                minRating = THREE;
+            }
+            else if (sumLength == TWELVE)
+            {
+                minRating = TWO;
+            }
+            else
+            {
+                minRating = ONE; // docs said little here.
+            }
+
+            return minRating;
+        }
+
+        /// <summary>
+        /// Determines if two names are homophonous via Match Rating Approach (MRA) algorithm. It should be noted that the
+        /// strings are cleaned in the same way as <see cref="Encode(string)"/>.
+        /// </summary>
+        /// <param name="name1">First of the 2 strings (names) to compare.</param>
+        /// <param name="name2">Second of the 2 names to compare.</param>
+        /// <returns><c>true</c> if the encodings are identical <c>false</c> otherwise.</returns>
+        public virtual bool IsEncodeEquals(string name1, string name2)
+        {
+            // Bulletproof for trivial input - NINO
+            if (name1 == null || EMPTY.Equals(name1, StringComparison.OrdinalIgnoreCase) || SPACE.Equals(name1, StringComparison.OrdinalIgnoreCase))
+            {
+                return false;
+            }
+            else if (name2 == null || EMPTY.Equals(name2, StringComparison.OrdinalIgnoreCase) || SPACE.Equals(name2, StringComparison.OrdinalIgnoreCase))
+            {
+                return false;
+            }
+            else if (name1.Length == 1 || name2.Length == 1)
+            {
+                return false;
+            }
+            else if (name1.Equals(name2, StringComparison.OrdinalIgnoreCase))
+            {
+                return true;
+            }
+
+            // Preprocessing
+            name1 = CleanName(name1);
+            name2 = CleanName(name2);
+
+            // Actual MRA Algorithm
+
+            // 1. Remove vowels
+            name1 = RemoveVowels(name1);
+            name2 = RemoveVowels(name2);
+
+            // 2. Remove double consonants
+            name1 = RemoveDoubleConsonants(name1);
+            name2 = RemoveDoubleConsonants(name2);
+
+            // 3. Reduce down to 3 letters
+            name1 = GetFirst3Last3(name1);
+            name2 = GetFirst3Last3(name2);
+
+            // 4. Check for length difference - if 3 or greater then no similarity
+            // comparison is done
+            if (Math.Abs(name1.Length - name2.Length) >= THREE)
+            {
+                return false;
+            }
+
+            // 5. Obtain the minimum rating value by calculating the length sum of the
+            // encoded strings and sending it down.
+            int sumLength = Math.Abs(name1.Length + name2.Length);
+            int minRating = 0;
+            minRating = GetMinRating(sumLength);
+
+            // 6. Process the encoded strings from left to right and remove any
+            // identical characters found from both strings respectively.
+            int count = LeftToRightThenRightToLeftProcessing(name1, name2);
+
+            // 7. Each PNI item that has a similarity rating equal to or greater than
+            // the min is considered to be a good candidate match
+            return count >= minRating;
+
+        }
+
+        /// <summary>
+        /// Determines if a letter is a vowel.
+        /// </summary>
+        /// <param name="letter">The letter under investiagtion.</param>
+        /// <returns><c>true</c> if a vowel, else <c>false</c>.</returns>
+        internal bool IsVowel(string letter)
+        {
+            return letter.Equals("E", StringComparison.OrdinalIgnoreCase) || letter.Equals("A", StringComparison.OrdinalIgnoreCase) || letter.Equals("O", StringComparison.OrdinalIgnoreCase) ||
+                   letter.Equals("I", StringComparison.OrdinalIgnoreCase) || letter.Equals("U", StringComparison.OrdinalIgnoreCase);
+        }
+
+        /// <summary>
+        /// Processes the names from left to right (first) then right to left removing identical letters in same positions.
+        /// Then subtracts the longer string that remains from 6 and returns this.
+        /// </summary>
+        /// <param name="name1"></param>
+        /// <param name="name2"></param>
+        /// <returns></returns>
+        internal int LeftToRightThenRightToLeftProcessing(string name1, string name2)
+        {
+            char[] name1Char = name1.ToCharArray();
+            char[] name2Char = name2.ToCharArray();
+
+            int name1Size = name1.Length - 1;
+            int name2Size = name2.Length - 1;
+
+            string name1LtRStart = EMPTY;
+            string name1LtREnd = EMPTY;
+
+            string name2RtLStart = EMPTY;
+            string name2RtLEnd = EMPTY;
+
+            for (int i = 0; i < name1Char.Length; i++)
+            {
+                if (i > name2Size)
+                {
+                    break;
+                }
+
+                name1LtRStart = name1.Substring(i, 1);
+                name1LtREnd = name1.Substring(name1Size - i, 1);
+
+                name2RtLStart = name2.Substring(i, 1);
+                name2RtLEnd = name2.Substring(name2Size - i, 1);
+
+                // Left to right...
+                if (name1LtRStart.Equals(name2RtLStart, StringComparison.Ordinal))
+                {
+                    name1Char[i] = ' ';
+                    name2Char[i] = ' ';
+                }
+
+                // Right to left...
+                if (name1LtREnd.Equals(name2RtLEnd, StringComparison.Ordinal))
+                {
+                    name1Char[name1Size - i] = ' ';
+                    name2Char[name2Size - i] = ' ';
+                }
+            }
+
+            // Char arrays -> string & remove extraneous space
+            string strA = Regex.Replace(new string(name1Char), "\\s+", EMPTY);
+            string strB = Regex.Replace(new string(name2Char), "\\s+", EMPTY);
+
+            // Final bit - subtract longest string from 6 and return this int value
+            if (strA.Length > strB.Length)
+            {
+                return Math.Abs(SIX - strA.Length);
+            }
+            else
+            {
+                return Math.Abs(SIX - strB.Length);
+            }
+        }
+
+        /// <summary>
+        /// Removes accented letters and replaces with non-accented ascii equivalent Case is preserved.
+        /// http://www.codecodex.com/wiki/Remove_accent_from_letters_%28ex_.%C3%A9_to_e%29
+        /// </summary>
+        /// <param name="accentedWord">The word that may have accents in it.</param>
+        /// <returns>De-accented word.</returns>
+        internal string RemoveAccents(string accentedWord)
+        {
+            if (accentedWord == null)
+            {
+                return null;
+            }
+
+            StringBuilder sb = new StringBuilder();
+            int n = accentedWord.Length;
+
+            for (int i = 0; i < n; i++)
+            {
+                char c = accentedWord[i];
+                int pos = UNICODE.IndexOf(c);
+                if (pos > -1)
+                {
+                    sb.Append(PLAIN_ASCII[pos]);
+                }
+                else
+                {
+                    sb.Append(c);
+                }
+            }
+
+            return sb.ToString();
+        }
+
+        /// <summary>
+        /// Replaces any double consonant pair with the single letter equivalent.
+        /// </summary>
+        /// <param name="name">String to have double consonants removed.</param>
+        /// <returns>Single consonant word.</returns>
+        internal string RemoveDoubleConsonants(string name)
+        {
+            string replacedName = name.ToUpperInvariant();
+            foreach (string dc in DOUBLE_CONSONANT)
+            {
+                if (replacedName.Contains(dc))
+                {
+                    string singleLetter = dc.Substring(0, 1 - 0);
+                    replacedName = replacedName.Replace(dc, singleLetter);
+                }
+            }
+            return replacedName;
+        }
+
+        /// <summary>
+        /// Deletes all vowels unless the vowel begins the word.
+        /// </summary>
+        /// <param name="name">The name to have vowels removed.</param>
+        /// <returns>De-voweled word.</returns>
+        internal string RemoveVowels(string name)
+        {
+            // Extract first letter
+            string firstLetter = name.Substring(0, 1 - 0);
+
+            name = Regex.Replace(name, "A", EMPTY);
+            name = Regex.Replace(name, "E", EMPTY);
+            name = Regex.Replace(name, "I", EMPTY);
+            name = Regex.Replace(name, "O", EMPTY);
+            name = Regex.Replace(name, "U", EMPTY);
+
+            name = Regex.Replace(name, "\\s{2,}\\b", SPACE);
+
+            // return isVowel(firstLetter) ? (firstLetter + name) : name;
+            if (IsVowel(firstLetter))
+            {
+                return firstLetter + name;
+            }
+            else
+            {
+                return name;
+            }
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Metaphone.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Metaphone.cs b/src/Lucene.Net.Analysis.Phonetic/Language/Metaphone.cs
new file mode 100644
index 0000000..dd3038f
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Metaphone.cs
@@ -0,0 +1,494 @@
+// commons-codec version compatibility level: 1.9
+using System.Globalization;
+using System.Text;
+
+namespace Lucene.Net.Analysis.Phonetic.Language
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Encodes a string into a Metaphone value.
+    /// <para/>
+    /// Initial Java implementation by <c>William B. Brogden. December, 1997</c>.
+    /// Permission given by <c>wbrogden</c> for code to be used anywhere.
+    /// <para/>
+    /// <c>Hanging on the Metaphone</c> by <c>Lawrence Philips</c> in <c>Computer Language of Dec. 1990,
+    /// p 39.</c>
+    /// <para/>
+    /// Note, that this does not match the algorithm that ships with PHP, or the algorithm found in the Perl implementations:
+    /// <para/>
+    /// <list type="bullet">
+    ///     <item><description><a href="http://search.cpan.org/~mschwern/Text-Metaphone-1.96/Metaphone.pm">Text:Metaphone-1.96</a> (broken link 4/30/2013) </description></item>
+    ///     <item><description><a href="https://metacpan.org/source/MSCHWERN/Text-Metaphone-1.96//Metaphone.pm">Text:Metaphone-1.96</a> (link checked 4/30/2013) </description></item>
+    /// </list>
+    /// <para/>
+    /// They have had undocumented changes from the originally published algorithm.
+    /// For more information, see <a href="https://issues.apache.org/jira/browse/CODEC-57">CODEC-57</a>.
+    /// <para/>
+    /// This class is conditionally thread-safe.
+    /// The instance field <see cref="maxCodeLen"/> is mutable <see cref="MaxCodeLen"/>
+    /// but is not volatile, and accesses are not synchronized.
+    /// If an instance of the class is shared between threads, the caller needs to ensure that suitable synchronization
+    /// is used to ensure safe publication of the value between threads, and must not set <see cref="MaxCodeLen"/>
+    /// after initial setup.
+    /// </summary>
+    public class Metaphone : IStringEncoder
+    {
+        /// <summary>
+        /// Five values in the English language
+        /// </summary>
+        private static readonly string VOWELS = "AEIOU";
+
+        /// <summary>
+        /// Variable used in Metaphone algorithm
+        /// </summary>
+        private static readonly string FRONTV = "EIY";
+
+        /// <summary>
+        /// Variable used in Metaphone algorithm
+        /// </summary>
+        private static readonly string VARSON = "CSPTG";
+
+        /// <summary>
+        /// The max code length for metaphone is 4
+        /// </summary>
+        private int maxCodeLen = 4;
+
+        /// <summary>
+        /// Creates an instance of the <see cref="Metaphone"/> encoder
+        /// </summary>
+        public Metaphone()
+            : base()
+        {
+        }
+
+        /// <summary>
+        /// Find the metaphone value of a string. This is similar to the
+        /// soundex algorithm, but better at finding similar sounding words.
+        /// All input is converted to upper case.
+        /// Limitations: Input format is expected to be a single ASCII word
+        /// with only characters in the A - Z range, no punctuation or numbers.
+        /// </summary>
+        /// <param name="txt">String to find the metaphone code for.</param>
+        /// <returns>A metaphone code corresponding to the string supplied.</returns>
+        public virtual string GetMetaphone(string txt)
+        {
+            bool hard = false;
+            if (txt == null || txt.Length == 0)
+            {
+                return "";
+            }
+            // single character is itself
+            if (txt.Length == 1)
+            {
+                return new CultureInfo("en").TextInfo.ToUpper(txt);
+            }
+
+            char[] inwd = new CultureInfo("en").TextInfo.ToUpper(txt).ToCharArray();
+
+            StringBuilder local = new StringBuilder(40); // manipulate
+            StringBuilder code = new StringBuilder(10); //   output
+                                                        // handle initial 2 characters exceptions
+            switch (inwd[0])
+            {
+                case 'K':
+                case 'G':
+                case 'P': /* looking for KN, etc*/
+                    if (inwd[1] == 'N')
+                    {
+                        local.Append(inwd, 1, inwd.Length - 1);
+                    }
+                    else
+                    {
+                        local.Append(inwd);
+                    }
+                    break;
+                case 'A': /* looking for AE */
+                    if (inwd[1] == 'E')
+                    {
+                        local.Append(inwd, 1, inwd.Length - 1);
+                    }
+                    else
+                    {
+                        local.Append(inwd);
+                    }
+                    break;
+                case 'W': /* looking for WR or WH */
+                    if (inwd[1] == 'R')
+                    {   // WR -> R
+                        local.Append(inwd, 1, inwd.Length - 1);
+                        break;
+                    }
+                    if (inwd[1] == 'H')
+                    {
+                        local.Append(inwd, 1, inwd.Length - 1);
+                        local[0] = 'W'; // WH -> W
+                    }
+                    else
+                    {
+                        local.Append(inwd);
+                    }
+                    break;
+                case 'X': /* initial X becomes S */
+                    inwd[0] = 'S';
+                    local.Append(inwd);
+                    break;
+                default:
+                    local.Append(inwd);
+                    break;
+            } // now local has working string with initials fixed
+
+            int wdsz = local.Length;
+            int n = 0;
+
+            while (code.Length < this.MaxCodeLen &&
+                   n < wdsz)
+            { // max code size of 4 works well
+                char symb = local[n];
+                // remove duplicate letters except C
+                if (symb != 'C' && IsPreviousChar(local, n, symb))
+                {
+                    n++;
+                }
+                else
+                { // not dup
+                    switch (symb)
+                    {
+                        case 'A':
+                        case 'E':
+                        case 'I':
+                        case 'O':
+                        case 'U':
+                            if (n == 0)
+                            {
+                                code.Append(symb);
+                            }
+                            break; // only use vowel if leading char
+                        case 'B':
+                            if (IsPreviousChar(local, n, 'M') &&
+                                 IsLastChar(wdsz, n))
+                            { // B is silent if word ends in MB
+                                break;
+                            }
+                            code.Append(symb);
+                            break;
+                        case 'C': // lots of C special cases
+                                  /* discard if SCI, SCE or SCY */
+                            if (IsPreviousChar(local, n, 'S') &&
+                                 !IsLastChar(wdsz, n) &&
+                                 FRONTV.IndexOf(local[n + 1]) >= 0)
+                            {
+                                break;
+                            }
+                            if (RegionMatch(local, n, "CIA"))
+                            { // "CIA" -> X
+                                code.Append('X');
+                                break;
+                            }
+                            if (!IsLastChar(wdsz, n) &&
+                                FRONTV.IndexOf(local[n + 1]) >= 0)
+                            {
+                                code.Append('S');
+                                break; // CI,CE,CY -> S
+                            }
+                            if (IsPreviousChar(local, n, 'S') &&
+                                IsNextChar(local, n, 'H'))
+                            { // SCH->sk
+                                code.Append('K');
+                                break;
+                            }
+                            if (IsNextChar(local, n, 'H'))
+                            { // detect CH
+                                if (n == 0 &&
+                                    wdsz >= 3 &&
+                                    IsVowel(local, 2))
+                                { // CH consonant -> K consonant
+                                    code.Append('K');
+                                }
+                                else
+                                {
+                                    code.Append('X'); // CHvowel -> X
+                                }
+                            }
+                            else
+                            {
+                                code.Append('K');
+                            }
+                            break;
+                        case 'D':
+                            if (!IsLastChar(wdsz, n + 1) &&
+                                IsNextChar(local, n, 'G') &&
+                                FRONTV.IndexOf(local[n + 2]) >= 0)
+                            { // DGE DGI DGY -> J
+                                code.Append('J'); n += 2;
+                            }
+                            else
+                            {
+                                code.Append('T');
+                            }
+                            break;
+                        case 'G': // GH silent at end or before consonant
+                            if (IsLastChar(wdsz, n + 1) &&
+                                IsNextChar(local, n, 'H'))
+                            {
+                                break;
+                            }
+                            if (!IsLastChar(wdsz, n + 1) &&
+                                IsNextChar(local, n, 'H') &&
+                                !IsVowel(local, n + 2))
+                            {
+                                break;
+                            }
+                            if (n > 0 &&
+                                (RegionMatch(local, n, "GN") ||
+                                  RegionMatch(local, n, "GNED")))
+                            {
+                                break; // silent G
+                            }
+                            if (IsPreviousChar(local, n, 'G'))
+                            {
+                                // NOTE: Given that duplicated chars are removed, I don't see how this can ever be true
+                                hard = true;
+                            }
+                            else
+                            {
+                                hard = false;
+                            }
+                            if (!IsLastChar(wdsz, n) &&
+                                FRONTV.IndexOf(local[n + 1]) >= 0 &&
+                                !hard)
+                            {
+                                code.Append('J');
+                            }
+                            else
+                            {
+                                code.Append('K');
+                            }
+                            break;
+                        case 'H':
+                            if (IsLastChar(wdsz, n))
+                            {
+                                break; // terminal H
+                            }
+                            if (n > 0 &&
+                                VARSON.IndexOf(local[n - 1]) >= 0)
+                            {
+                                break;
+                            }
+                            if (IsVowel(local, n + 1))
+                            {
+                                code.Append('H'); // Hvowel
+                            }
+                            break;
+                        case 'F':
+                        case 'J':
+                        case 'L':
+                        case 'M':
+                        case 'N':
+                        case 'R':
+                            code.Append(symb);
+                            break;
+                        case 'K':
+                            if (n > 0)
+                            { // not initial
+                                if (!IsPreviousChar(local, n, 'C'))
+                                {
+                                    code.Append(symb);
+                                }
+                            }
+                            else
+                            {
+                                code.Append(symb); // initial K
+                            }
+                            break;
+                        case 'P':
+                            if (IsNextChar(local, n, 'H'))
+                            {
+                                // PH -> F
+                                code.Append('F');
+                            }
+                            else
+                            {
+                                code.Append(symb);
+                            }
+                            break;
+                        case 'Q':
+                            code.Append('K');
+                            break;
+                        case 'S':
+                            if (RegionMatch(local, n, "SH") ||
+                                RegionMatch(local, n, "SIO") ||
+                                RegionMatch(local, n, "SIA"))
+                            {
+                                code.Append('X');
+                            }
+                            else
+                            {
+                                code.Append('S');
+                            }
+                            break;
+                        case 'T':
+                            if (RegionMatch(local, n, "TIA") ||
+                                RegionMatch(local, n, "TIO"))
+                            {
+                                code.Append('X');
+                                break;
+                            }
+                            if (RegionMatch(local, n, "TCH"))
+                            {
+                                // Silent if in "TCH"
+                                break;
+                            }
+                            // substitute numeral 0 for TH (resembles theta after all)
+                            if (RegionMatch(local, n, "TH"))
+                            {
+                                code.Append('0');
+                            }
+                            else
+                            {
+                                code.Append('T');
+                            }
+                            break;
+                        case 'V':
+                            code.Append('F'); break;
+                        case 'W':
+                        case 'Y': // silent if not followed by vowel
+                            if (!IsLastChar(wdsz, n) &&
+                                IsVowel(local, n + 1))
+                            {
+                                code.Append(symb);
+                            }
+                            break;
+                        case 'X':
+                            code.Append('K');
+                            code.Append('S');
+                            break;
+                        case 'Z':
+                            code.Append('S');
+                            break;
+                        default:
+                            // do nothing
+                            break;
+                    } // end switch
+                    n++;
+                } // end else from symb != 'C'
+                if (code.Length > this.MaxCodeLen)
+                {
+                    code.Length = this.MaxCodeLen;
+                }
+            }
+            return code.ToString();
+        }
+
+        private bool IsVowel(StringBuilder sb, int index)
+        {
+            return VOWELS.IndexOf(sb[index]) >= 0;
+        }
+
+        private bool IsPreviousChar(StringBuilder sb, int index, char c)
+        {
+            bool matches = false;
+            if (index > 0 &&
+                index < sb.Length)
+            {
+                matches = sb[index - 1] == c;
+            }
+            return matches;
+        }
+
+        private bool IsNextChar(StringBuilder sb, int index, char c)
+        {
+            bool matches = false;
+            if (index >= 0 &&
+                index < sb.Length - 1)
+            {
+                matches = sb[index + 1] == c;
+            }
+            return matches;
+        }
+
+        private bool RegionMatch(StringBuilder sb, int index, string test)
+        {
+            bool matches = false;
+            if (index >= 0 &&
+                index + test.Length - 1 < sb.Length)
+            {
+                string substring = sb.ToString(index, test.Length);
+                matches = substring.Equals(test);
+            }
+            return matches;
+        }
+
+        private bool IsLastChar(int wdsz, int n)
+        {
+            return n + 1 == wdsz;
+        }
+
+        // LUCENENET specific - in .NET we don't need an object overload, since strings are sealed anyway.
+        //    /**
+        //     * Encodes an Object using the metaphone algorithm.  This method
+        //     * is provided in order to satisfy the requirements of the
+        //     * Encoder interface, and will throw an EncoderException if the
+        //     * supplied object is not of type java.lang.String.
+        //     *
+        //     * @param obj Object to encode
+        //     * @return An object (or type java.lang.String) containing the
+        //     *         metaphone code which corresponds to the String supplied.
+        //     * @throws EncoderException if the parameter supplied is not
+        //     *                          of type java.lang.String
+        //     */
+        //    @Override
+        //public object encode(object obj) 
+        //    {
+        //    if (!(obj is String)) {
+        //            throw new EncoderException("Parameter supplied to Metaphone encode is not of type java.lang.String");
+        //        }
+        //    return GetMetaphone((String) obj);
+        //    }
+
+        /// <summary>
+        /// Encodes a string using the <see cref="Metaphone"/> algorithm.
+        /// </summary>
+        /// <param name="str">String to encode.</param>
+        /// <returns>The metaphone code corresponding to the string supplied.</returns>
+        public virtual string Encode(string str)
+        {
+            return GetMetaphone(str);
+        }
+
+        /// <summary>
+        /// Tests is the metaphones of two strings are identical.
+        /// </summary>
+        /// <param name="str1">First of two strings to compare.</param>
+        /// <param name="str2">Second of two strings to compare.</param>
+        /// <returns><c>true</c> if the metaphones of these strings are identical, <c>false</c> otherwise.</returns>
+        public virtual bool IsMetaphoneEqual(string str1, string str2)
+        {
+            return GetMetaphone(str1).Equals(GetMetaphone(str2));
+        }
+
+        /// <summary>
+        /// Gets or Sets <see cref="maxCodeLen"/>.
+        /// </summary>
+        public virtual int MaxCodeLen
+        {
+            get { return this.maxCodeLen; }
+            set { this.maxCodeLen = value; }
+        }
+    }
+}


[12/15] lucenenet git commit: Added Lucene.Net.Analysis.Phonetic + tests. Rather than porting over the entire commons-codec library, only the language features were ported and added to this library.

Posted by ni...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_german.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_german.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_german.txt
new file mode 100644
index 0000000..78cc0f8
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_german.txt
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+"I" "" "$" "i"
+"I" "[aeiAEIOUouQY]" "" "i"
+"I" "" "[^k]$" "i"
+"Ik" "[lr]" "$" "(ik|Qk)"
+"Ik" "" "$" "ik"
+"sIts" "" "$" "(sits|sQts)"
+"Its" "" "$" "its"
+"I" "" "" "(Q|i)" 
+    
+"AU" "" "" "(D|a|u)"
+"aU" "" "" "(D|a|u)"
+"Au" "" "" "(D|a|u)"
+"au" "" "" "(D|a|u)"
+"ou" "" "" "(D|o|u)"
+"OU" "" "" "(D|o|u)"
+"oU" "" "" "(D|o|u)"
+"Ou" "" "" "(D|o|u)"
+"ai" "" "" "(D|a|i)"
+"Ai" "" "" "(D|a|i)"
+"oi" "" "" "(D|o|i)"
+"Oi" "" "" "(D|o|i)"
+"ui" "" "" "(D|u|i)"
+"Ui" "" "" "(D|u|i)"
+        
+"e" "" "" "i" 
+  
+"E" "" "[fklmnprst]$" "i"
+"E" "" "ts$" "i"
+"E" "" "$" "i"
+"E" "[DaoAOUiuQY]" "" "i"
+"E" "" "[aoAOQY]" "i"
+"E" "" "" "(Y|i)" 
+       
+"O" "" "$" "o"
+"O" "" "[fklmnprst]$" "o"
+"O" "" "ts$" "o"
+"O" "[aoAOUeiuQY]" "" "o"
+"O" "" "" "(o|Y)"
+    
+"a" "" "" "(a|o)" 
+  
+"A" "" "$" "(a|o)" 
+"A" "" "[fklmnprst]$" "(a|o)"
+"A" "" "ts$" "(a|o)"
+"A" "[aoeOUiuQY]" "" "(a|o)"
+"A" "" "" "(a|o|Y)" 
+
+"U" "" "$" "u"
+"U" "[DaoiuUQY]" "" "u"
+"U" "" "[^k]$" "u"
+"Uk" "[lr]" "$" "(uk|Qk)"
+"Uk" "" "$" "uk"
+"sUts" "" "$" "(suts|sQts)"
+"Uts" "" "$" "uts"
+"U" "" "" "(u|Q)" 

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_hebrew.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_hebrew.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_hebrew.txt
new file mode 100644
index 0000000..0990004
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_hebrew.txt
@@ -0,0 +1,18 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// empty
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_hungarian.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_hungarian.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_hungarian.txt
new file mode 100644
index 0000000..bb950fb
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_hungarian.txt
@@ -0,0 +1,18 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include ash_approx_french
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_polish.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_polish.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_polish.txt
new file mode 100644
index 0000000..7f49817
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_polish.txt
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+"aiB" "" "[bp]" "(D|Dm)"
+"oiB" "" "[bp]" "(D|Dm)" 
+"uiB" "" "[bp]" "(D|Dm)" 
+"eiB" "" "[bp]" "(D|Dm)"
+"EiB" "" "[bp]" "(D|Dm)"
+"iiB" "" "[bp]" "(D|Dm)"
+"IiB" "" "[bp]" "(D|Dm)"
+    
+"aiB" "" "[dgkstvz]" "(D|Dn)"
+"oiB" "" "[dgkstvz]" "(D|Dn)" 
+"uiB" "" "[dgkstvz]" "(D|Dn)" 
+"eiB" "" "[dgkstvz]" "(D|Dn)"
+"EiB" "" "[dgkstvz]" "(D|Dn)"
+"iiB" "" "[dgkstvz]" "(D|Dn)"
+"IiB" "" "[dgkstvz]" "(D|Dn)"
+      
+"B" "" "[bp]" "(o|om|im)" 
+"B" "" "[dgkstvz]" "(o|on|in)" 
+"B" "" "" "o"
+    
+"aiF" "" "[bp]" "(D|Dm)"
+"oiF" "" "[bp]" "(D|Dm)" 
+"uiF" "" "[bp]" "(D|Dm)" 
+"eiF" "" "[bp]" "(D|Dm)"
+"EiF" "" "[bp]" "(D|Dm)"
+"iiF" "" "[bp]" "(D|Dm)"
+"IiF" "" "[bp]" "(D|Dm)"
+        
+"aiF" "" "[dgkstvz]" "(D|Dn)"
+"oiF" "" "[dgkstvz]" "(D|Dn)" 
+"uiF" "" "[dgkstvz]" "(D|Dn)" 
+"eiF" "" "[dgkstvz]" "(D|Dn)"
+"EiF" "" "[dgkstvz]" "(D|Dn)"
+"iiF" "" "[dgkstvz]" "(D|Dn)"
+"IiF" "" "[dgkstvz]" "(D|Dn)"
+            
+"F" "" "[bp]" "(i|im|om)"
+"F" "" "[dgkstvz]" "(i|in|on)"
+"F" "" "" "i" 
+    
+"P" "" "" "(o|u)" 
+    
+"I" "" "$" "i"
+"I" "" "[^k]$" "i"
+"Ik" "[lr]" "$" "(ik|Qk)"
+"Ik" "" "$" "ik"
+"sIts" "" "$" "(sits|sQts)"
+"Its" "" "$" "its"
+"I" "[aeiAEBFIou]" "" "i"
+"I" "" "" "(i|Q)" 
+
+"au" "" "" "(D|a|u)"
+"ou" "" "" "(D|o|u)"
+"ai" "" "" "(D|a|i)"
+"oi" "" "" "(D|o|i)"
+"ui" "" "" "(D|u|i)"
+    
+"a" "" "" "(a|o)" 
+"e" "" "" "i" 
+
+"E" "" "[fklmnprst]$" "i"
+"E" "" "ts$" "i"
+"E" "" "$" "i"
+"E" "[DaoiuQ]" "" "i"
+"E" "" "[aoQ]" "i"
+"E" "" "" "(Y|i)" 

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_romanian.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_romanian.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_romanian.txt
new file mode 100644
index 0000000..295debf
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_romanian.txt
@@ -0,0 +1,18 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include ash_approx_polish
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_russian.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_russian.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_russian.txt
new file mode 100644
index 0000000..46d6a8c
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_russian.txt
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+"I" "" "$" "i"
+"I" "" "[^k]$" "i"
+"Ik" "[lr]" "$" "(ik|Qk)"
+"Ik" "" "$" "ik"
+"sIts" "" "$" "(sits|sQts)"
+"Its" "" "$" "its"
+"I" "[aeiEIou]" "" "i"
+"I" "" "" "(i|Q)" 
+        
+"au" "" "" "(D|a|u)"
+"ou" "" "" "(D|o|u)"
+"ai" "" "" "(D|a|i)"
+"oi" "" "" "(D|o|i)"
+"ui" "" "" "(D|u|i)"
+
+"om" "" "[bp]" "(om|im)" 
+"on" "" "[dgkstvz]" "(on|in)" 
+"em" "" "[bp]" "(im|om)" 
+"en" "" "[dgkstvz]" "(in|on)" 
+"Em" "" "[bp]" "(im|Ym|om)" 
+"En" "" "[dgkstvz]" "(in|Yn|on)" 
+                    
+"a" "" "" "(a|o)" 
+"e" "" "" "i" 
+    
+"E" "" "[fklmnprsStv]$" "i"
+"E" "" "ts$" "i"
+"E" "[DaoiuQ]" "" "i"
+"E" "" "[aoQ]" "i"
+"E" "" "" "(Y|i)" 

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_spanish.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_spanish.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_spanish.txt
new file mode 100644
index 0000000..bb950fb
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_approx_spanish.txt
@@ -0,0 +1,18 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include ash_approx_french
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_exact_any.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_exact_any.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_exact_any.txt
new file mode 100644
index 0000000..e6abc2d
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_exact_any.txt
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// These rules are applied after the word has been transliterated into the phonetic alphabet
+// These rules are substitution rules within the phonetic character space rather than mapping rules
+
+// format of each entry rule in the table
+//   (pattern, left context, right context, phonetic)
+// where
+//   pattern is a sequence of characters that might appear after a word has been transliterated into phonetic alphabet
+//   left context is the context that precedes the pattern
+//   right context is the context that follows the pattern
+//   phonetic is the result that this rule generates
+//
+// note that both left context and right context can be regular expressions
+// ex: left context of ^ would mean start of word
+//     right context of $ means end of word
+//
+// match occurs if all of the following are true:
+//   portion of word matches the pattern
+//   that portion satisfies the context
+
+// A, E, I, O, P, U should create variants, but a, e, i, o, u should not create any new variant
+// Q = ü ; Y = ä = ö
+
+
+"A" "" "" "a"
+"B" "" "" "a"
+
+"E" "" "" "e"
+"F" "" "" "e"
+
+"I" "" "" "i"
+"O" "" "" "o"
+"P" "" "" "o"
+"U" "" "" "u"
+
+"J" "" "" "l"

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_exact_approx_common.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_exact_approx_common.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_exact_approx_common.txt
new file mode 100644
index 0000000..0a8d121
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_exact_approx_common.txt
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Ashkenazic
+    
+"h" "" "$" ""
+// VOICED - UNVOICED CONSONANTS
+"b" "" "[fktSs]" "p"
+"b" "" "p" ""
+"b" "" "$" "p"
+"p" "" "[gdZz]" "b"
+"p" "" "b" ""
+    
+"v" "" "[pktSs]" "f"
+"v" "" "f" ""
+"v" "" "$" "f"
+"f" "" "[bgdZz]" "v"
+"f" "" "v" ""
+    
+"g" "" "[pftSs]" "k"
+"g" "" "k" ""
+"g" "" "$" "k"
+"k" "" "[bdZz]" "g"
+"k" "" "g" ""
+    
+"d" "" "[pfkSs]" "t"
+"d" "" "t" ""
+"d" "" "$" "t"
+"t" "" "[bgZz]" "d"
+"t" "" "d" ""
+
+"s" "" "dZ" ""
+"s" "" "tS" ""
+    
+"z" "" "[pfkSt]" "s"
+"z" "" "[sSzZ]" ""
+"s" "" "[sSzZ]" ""
+"Z" "" "[sSzZ]" ""
+"S" "" "[sSzZ]" ""
+       
+// SIMPLIFICATION OF CONSONANT CLUSTERS
+
+"jnm" "" "" "jm"
+
+// DOUBLE --> SINGLE
+
+"ji" "^" "" "i"
+"jI" "^" "" "I"
+    
+"a" "" "[aAB]" "" 
+"a" "[AB]" "" "" 
+"A" "" "A" ""
+"B" "" "B" ""
+    
+"b" "" "b" ""
+"d" "" "d" ""
+"f" "" "f" ""
+"g" "" "g" ""
+"k" "" "k" ""
+"l" "" "l" ""
+"m" "" "m" ""
+"n" "" "n" ""
+"p" "" "p" ""
+"r" "" "r" ""
+"t" "" "t" ""
+"v" "" "v" ""
+"z" "" "z" ""
+    
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_exact_common.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_exact_common.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_exact_common.txt
new file mode 100644
index 0000000..7e6ff95
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_exact_common.txt
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include ash_exact_approx_common
+
+"H" "" "" "h"
+ 
+// VOICED - UNVOICED CONSONANTS
+
+"s" "[^t]" "[bgZd]" "z"
+"Z" "" "[pfkst]" "S"
+"Z" "" "$" "S"
+"S" "" "[bgzd]" "Z"
+"z" "" "$" "s"
+    
+"ji" "[aAoOeEiIuU]" "" "j"
+"jI" "[aAoOeEiIuU]" "" "j"
+"je" "[aAoOeEiIuU]" "" "j"
+"jE" "[aAoOeEiIuU]" "" "j"

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_exact_cyrillic.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_exact_cyrillic.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_exact_cyrillic.txt
new file mode 100644
index 0000000..d309ead
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_exact_cyrillic.txt
@@ -0,0 +1,18 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include ash_exact_russian
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_exact_english.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_exact_english.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_exact_english.txt
new file mode 100644
index 0000000..d309ead
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_exact_english.txt
@@ -0,0 +1,18 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include ash_exact_russian
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_exact_french.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_exact_french.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_exact_french.txt
new file mode 100644
index 0000000..d309ead
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_exact_french.txt
@@ -0,0 +1,18 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include ash_exact_russian
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_exact_german.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_exact_german.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_exact_german.txt
new file mode 100644
index 0000000..a60f8cc
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_exact_german.txt
@@ -0,0 +1,18 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include ash_exact_any
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_exact_hebrew.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_exact_hebrew.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_exact_hebrew.txt
new file mode 100644
index 0000000..0990004
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_exact_hebrew.txt
@@ -0,0 +1,18 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// empty
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_exact_hungarian.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_exact_hungarian.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_exact_hungarian.txt
new file mode 100644
index 0000000..d309ead
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_exact_hungarian.txt
@@ -0,0 +1,18 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include ash_exact_russian
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_exact_polish.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_exact_polish.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_exact_polish.txt
new file mode 100644
index 0000000..ba32ce7
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_exact_polish.txt
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+"B" "" "" "a"
+"F" "" "" "e"
+"P" "" "" "o"
+
+"E" "" "" "e"
+"I" "" "" "i"

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_exact_romanian.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_exact_romanian.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_exact_romanian.txt
new file mode 100644
index 0000000..d309ead
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_exact_romanian.txt
@@ -0,0 +1,18 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include ash_exact_russian
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_exact_russian.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_exact_russian.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_exact_russian.txt
new file mode 100644
index 0000000..fc9f14d
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_exact_russian.txt
@@ -0,0 +1,19 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+"E" "" "" "e"
+"I "" "" "i"
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_exact_spanish.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_exact_spanish.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_exact_spanish.txt
new file mode 100644
index 0000000..d309ead
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_exact_spanish.txt
@@ -0,0 +1,18 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include ash_exact_russian
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_hebrew_common.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_hebrew_common.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_hebrew_common.txt
new file mode 100644
index 0000000..b1c6501
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_hebrew_common.txt
@@ -0,0 +1,118 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include ash_exact_approx_common
+
+"ts" "" "" "C" // for not confusion Gutes [=guts] and Guts [=guc]
+"tS" "" "" "C" // same reason
+"S" "" "" "s"
+"p" "" "" "f"   
+"b" "^" "" "b"    
+"b" "" "" "(b|v)"    
+        
+"ja" "" "" "i"
+"jA" "" "" "i"
+"jB" "" "" "i"
+"je" "" "" "i"
+"jE" "" "" "i"
+"jF" "" "" "i"
+"aj" "" "" "i"
+"Aj" "" "" "i"
+"Bj" "" "" "i"
+"Fj" "" "" "i"
+"I" "" "" "i"
+"Q" "" "" "i"
+"j" "" "" "i"
+    
+"a" "^" "" "1"
+"A" "^" "" "1"
+"B" "^" "" "1"
+"e" "^" "" "1"
+"E" "^" "" "1"
+"F" "^" "" "1"
+"Y" "^" "" "1"
+    
+"a" "" "$" "1"
+"A" "" "$" "1"
+"B" "" "$" "1"
+"e" "" "$" "1"
+"E" "" "$" "1"
+"F" "" "$" "1"
+"Y" "" "$" "1"
+        
+"a" "" "" ""
+"A" "" "" ""
+"B" "" "" ""
+"e" "" "" ""
+"E" "" "" ""
+"F" "" "" ""
+"Y" "" "" ""
+   
+"oj" "^" "" "(u|vi)"
+"Oj" "^" "" "(u|vi)"
+"uj" "^" "" "(u|vi)"
+"Uj" "^" "" "(u|vi)" 
+    
+"oj" "" "" "u"
+"Oj" "" "" "u"
+"uj" "" "" "u"
+"Uj" "" "" "u" 
+    
+"ou" "^" "" "(u|v|1)"
+"o" "^" "" "(u|v|1)"
+"O" "^" "" "(u|v|1)"
+"U" "^" "" "(u|v|1)"
+"u" "^" "" "(u|v|1)"
+    
+"o" "" "$" "(u|1)"
+"O" "" "$" "(u|1)"
+"u" "" "$" "(u|1)"
+"U" "" "$" "(u|1)"
+    
+"ou" "" "" "u"
+"o" "" "" "u"
+"O" "" "" "u"
+"U" "" "" "u"
+        
+"VV" "" "" "u" // alef/ayin + vov from ruleshebrew
+"V" "" "" "v" // tsvey-vov from ruleshebrew;; only Ashkenazic
+"L" "^" "" "1" // alef/ayin from ruleshebrew
+"L" "" "$" "1" // alef/ayin from ruleshebrew
+"L" "" "" " " // alef/ayin from ruleshebrew
+"WW" "^" "" "(vi|u)" // vav-yod from ruleshebrew
+"WW" "" "" "u" // vav-yod from ruleshebrew
+"W" "^" "" "(u|v)" // vav from ruleshebrew
+"W" "" "" "u" // vav from ruleshebrew
+    
+    //"g" "" "" "(g|Z)"
+    //"z" "" "" "(z|Z)"
+    //"d" "" "" "(d|dZ)"
+       
+"TB" "" "$" "(t|s)" // tav from ruleshebrew; only Ashkenazic
+"TB" "" "" "t" // tav from ruleshebrew; only Ashkenazic
+"T" "" "" "t"   // tet from ruleshebrew
+    
+   //"k" "" "" "(k|x)"
+   //"x" "" "" "(k|x)"
+"K" "" "" "k" // kof and initial kaf from ruleshebrew
+"X" "" "" "x" // khet and final kaf from ruleshebrew
+    
+"H" "^" "" "(x|1)"
+"H" "" "$" "(x|1)"
+"H" "" "" "(x|)"
+"h" "^" "" "1"
+"h" "" "" ""

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_languages.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_languages.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_languages.txt
new file mode 100644
index 0000000..8c84c51
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_languages.txt
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+any
+cyrillic
+english
+french
+german
+hebrew
+hungarian
+polish
+romanian
+russian
+spanish

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_rules_any.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_rules_any.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_rules_any.txt
new file mode 100644
index 0000000..9960ada
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_rules_any.txt
@@ -0,0 +1,332 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//ASHKENAZIC
+
+// CONVERTING FEMININE TO MASCULINE
+"yna" "" "$" "(in[russian]|ina)"
+"ina" "" "$" "(in[russian]|ina)"
+"liova" "" "$" "(lof[russian]|lef[russian]|lova)"
+"lova" "" "$" "(lof[russian]|lef[russian]|lova)"
+"ova" "" "$" "(of[russian]|ova)"
+"eva" "" "$" "(ef[russian]|eva)"
+"aia" "" "$" "(aja|i[russian])"
+"aja" "" "$" "(aja|i[russian])"
+"aya" "" "$" "(aja|i[russian])"
+    
+"lowa" "" "$" "(lova|lof[polish]|l[polish]|el[polish])"
+"kowa" "" "$" "(kova|kof[polish]|k[polish]|ek[polish])"
+"owa" "" "$" "(ova|of[polish]|)"
+"lowna" "" "$" "(lovna|levna|l[polish]|el[polish])"
+"kowna" "" "$" "(kovna|k[polish]|ek[polish])"
+"owna" "" "$" "(ovna|[polish])"
+"lówna" "" "$" "(l|el[polish])"  // polish
+"kówna" "" "$" "(k|ek[polish])"  // polish
+"ówna" "" "$" ""   // polish
+    
+"a" "" "$" "(a|i[polish])"
+    
+// CONSONANTS  (integrated: German, Polish, Russian, Romanian and English)
+
+"rh" "^" "" "r"
+"ssch" "" "" "S" 
+"chsch" "" "" "xS" 
+"tsch" "" "" "tS" 
+    
+"sch" "" "[ei]" "(sk[romanian]|S|StS[russian])" // german
+"sch" "" "" "(S|StS[russian])" // german
+            
+"ssh" "" "" "S" 
+    
+"sh" "" "[äöü]" "sh" // german
+"sh" "" "[aeiou]" "(S[russian+english]|sh)"
+"sh" "" "" "S" // russian+english
+    
+"kh" "" "" "(x[russian+english]|kh)"
+    
+"chs" "" "" "(ks[german]|xs|tSs[russian+english])"
+    
+    // French "ch" is currently disabled
+    //array("ch" "" "[ei]" "(x|tS|k[romanian]|S[french])"
+    //array("ch" "" "" "(x|tS[russian+english]|S[french])"
+    
+"ch" "" "[ei]" "(x|k[romanian]|tS[russian+english])"
+"ch" "" "" "(x|tS[russian+english])"
+        
+"ck" "" "" "(k|tsk[polish])"
+        
+"czy" "" "" "tSi"
+"cze" "" "[bcdgkpstwzż]" "(tSe|tSF)"
+"ciewicz" "" "" "(tsevitS|tSevitS)"
+"siewicz" "" "" "(sevitS|SevitS)"
+"ziewicz" "" "" "(zevitS|ZevitS)"
+"riewicz" "" "" "rjevitS" 
+"diewicz" "" "" "djevitS" 
+"tiewicz" "" "" "tjevitS" 
+"iewicz" "" "" "evitS"
+"ewicz" "" "" "evitS"
+"owicz" "" "" "ovitS"
+"icz" "" "" "itS"
+"cz" "" "" "tS" // Polish
+    
+"cia" "" "[bcdgkpstwzż]" "(tSB[polish]|tsB)"
+"cia" "" "" "(tSa[$polish]|tsa)" 
+"cią" "" "[bp]" "(tSom[polish]|tsom)"
+"cią" "" "" "(tSon[polish]|tson)"
+"cię" "" "[bp]" "(tSem[polish]|tsem)"
+"cię" "" "" "(tSen[polish]|tsen)"
+"cie" "" "[bcdgkpstwzż]" "(tSF[polish]|tsF)"
+"cie" "" "" "(tSe[polish]|tse)"
+"cio" "" "" "(tSo[polish]|tso)"
+"ciu" "" "" "(tSu[polish]|tsu)"
+
+"ci" "" "$" "(tsi[polish]|tSi[polish+romanian]|tS[romanian]|si)"
+"ci" "" "" "(tsi[polish]|tSi[polish+romanian]|si)"
+"ce" "" "[bcdgkpstwzż]" "(tsF[polish]|tSe[polish+romanian]|se)"
+"ce" "" "" "(tSe[polish+romanian]|tse[polish]|se)"
+"cy" "" "" "(si|tsi[polish])"
+              
+"ssz" "" "" "S" // Polish
+"sz" "" "" "S" // Polish; actually could also be Hungarian /s/, disabled here 
+    
+"ssp" "" "" "(Sp[german]|sp)"
+"sp" "" "" "(Sp[german]|sp)"
+"sst" "" "" "(St[german]|st)"
+"st" "" "" "(St[german]|st)"
+"ss" "" "" "s" 
+    
+"sia" "" "[bcdgkpstwzż]" "(SB[polish]|sB[polish]|sja)"
+"sia" "" "" "(Sa[polish]|sja)"
+"sią" "" "[bp]" "(Som[polish]|som)"
+"sią" "" "" "(Son[polish]|son)"
+"się" "" "[bp]" "(Sem[polish]|sem)"
+"się" "" "" "(Sen[polish]|sen)"
+"sie" "" "[bcdgkpstwzż]" "(SF[polish]|sF|zi[german])"
+"sie" "" "" "(se|Se[polish]|zi[german])"
+"sio" "" "" "(So[polish]|so)"
+"siu" "" "" "(Su[polish]|sju)"
+"si" "" "" "(Si[polish]|si|zi[german])"
+"s" "" "[aeiouäöë]" "(s|z[german])"
+        
+"gue" "" "" "ge" 
+"gui" "" "" "gi" 
+"guy" "" "" "gi" 
+"gh" "" "[ei]" "(g[romanian]|gh)"
+    
+"gauz" "" "$" "haus" 
+"gaus" "" "$" "haus" 
+"gol'ts" "" "$" "holts" 
+"golts" "" "$" "holts" 
+"gol'tz" "" "$" "holts" 
+"goltz" "" "" "holts" 
+"gol'ts" "^" "" "holts" 
+"golts" "^" "" "holts" 
+"gol'tz" "^" "" "holts" 
+"goltz" "^" "" "holts" 
+"gendler" "" "$" "hendler" 
+"gejmer" "" "$" "hajmer" 
+"gejm" "" "$" "hajm" 
+"geymer" "" "$" "hajmer" 
+"geym" "" "$" "hajm" 
+"geimer" "" "$" "hajmer" 
+"geim" "" "$" "hajm" 
+"gof" "" "$" "hof" 
+    
+"ger" "" "$" "ger" 
+"gen" "" "$" "gen" 
+"gin" "" "$" "gin" 
+    
+"gie" "" "$" "(ge|gi[german]|ji[french])"
+"gie" "" "" "ge" 
+"ge" "[yaeiou]" "" "(gE|xe[spanish]|dZe[english+romanian])"
+"gi" "[yaeiou]" "" "(gI|xi[spanish]|dZi[english+romanian])"
+"ge" "" "" "(gE|dZe[english+romanian]|hE[russian]|xe[spanish])"
+"gi" "" "" "(gI|dZi[english+romanian]|hI[russian]|xi[spanish])"
+"gy" "" "[aeouáéóúüöőű]" "(gi|dj[hungarian])"
+"gy" "" "" "(gi|d[hungarian])"
+"g" "[jyaeiou]" "[aouyei]" "g" 
+"g" "" "[aouei]" "(g|h[russian])"
+              
+"ej" "" "" "(aj|eZ[french+romanian]|ex[spanish])"
+"ej" "" "" "aj"
+    
+"ly" "" "[au]" "l" 
+"li" "" "[au]" "l" 
+"lj" "" "[au]" "l" 
+"lio" "" "" "(lo|le[russian])"
+"lyo" "" "" "(lo|le[russian])"
+"ll" "" "" "(l|J[spanish])"
+  
+"j" "" "[aoeiuy]" "(j|dZ[english]|x[spanish]|Z[french+romanian])"
+"j" "" "" "(j|x[spanish])"
+                       
+"pf" "" "" "(pf|p|f)" 
+"ph" "" "" "(ph|f)"
+    
+"qu" "" "" "(kv[german]|k)"
+        
+"rze" "t" "" "(Se[polish]|re)" // polish
+"rze" "" "" "(rze|rtsE[german]|Ze[polish]|re[polish]|rZe[polish])"
+"rzy" "t" "" "(Si[polish]|ri)" // polish
+"rzy" "" "" "(Zi[polish]|ri[polish]|rZi)"
+"rz" "t" "" "(S[polish]|r)" // polish
+"rz" "" "" "(rz|rts[german]|Z[polish]|r[polish]|rZ[polish])" // polish
+    
+"tz" "" "$" "(ts|tS[english+german])"
+"tz" "^" "" "(ts|tS[english+german])"
+"tz" "" "" "(ts[english+german+russian]|tz)"
+    
+"zh" "" "" "(Z|zh[polish]|tsh[german])"
+        
+"zia" "" "[bcdgkpstwzż]" "(ZB[polish]|zB[polish]|zja)"
+"zia" "" "" "(Za[polish]|zja)"
+"zią" "" "[bp]" "(Zom[polish]|zom)"
+"zią" "" "" "(Zon[polish]|zon)"
+"zię" "" "[bp]" "(Zem[polish]|zem)"
+"zię" "" "" "(Zen[polish]|zen)"
+"zie" "" "[bcdgkpstwzż]" "(ZF[polish]|zF[polish]|ze|tsi[german])"
+"zie" "" "" "(ze|Ze[polish]|tsi[german])"
+"zio" "" "" "(Zo[polish]|zo)"
+"ziu" "" "" "(Zu[polish]|zju)"
+"zi" "" "" "(Zi[polish]|zi|tsi[german])"
+            
+"thal" "" "$" "tal" 
+"th" "^" "" "t" 
+"th" "" "[aeiou]" "(t[german]|th)"
+"th" "" "" "t" // german 
+"vogel" "" "" "(vogel|fogel[german])"
+"v" "^" "" "(v|f[german])"
+        
+"h" "[aeiouyäöü]" "" "" //german
+"h" "" "" "(h|x[".(romanian+polish)."])"
+"h" "^" "" "(h|H[".(english+german)."])" // H can be exact "h" or approximate "kh"
+    
+ // VOWELS  
+"yi" "^" "" "i"
+    
+    //"e" "" "$" "(e|)"  // French & English rule disabled except for final -ine
+"e" "in" "$" "(e|[french])"
+    
+"ii" "" "$" "i" // russian
+"iy" "" "$" "i" // russian
+"yy" "" "$" "i" // russian
+"yi" "" "$" "i" // russian
+"yj" "" "$" "i" // russian
+"ij" "" "$" "i" // russian
+    
+"aue" "" "" "aue" 
+"oue" "" "" "oue" 
+    
+"au" "" "" "(au|o[french])"
+"ou" "" "" "(ou|u[french])"
+        
+"ue" "" "" "(Q|uje[russian])"
+"ae" "" "" "(Y[german]|aje[russian]|ae)"
+"oe" "" "" "(Y[german]|oje[russian]|oe)"
+"ee" "" "" "(i[english]|aje[russian]|e)"
+    
+"ei" "" "" "aj"
+"ey" "" "" "aj"
+"eu" "" "" "(aj[german]|oj[german]|eu)"
+    
+"i" "[aou]" "" "j"
+"y" "[aou]" "" "j"
+    
+"ie" "" "[bcdgkpstwzż]" "(i[german]|e[polish]|ije[russian]|je)"
+"ie" "" "" "(i[german]|e[polish]|ije[russian]|je)"
+"ye" "" "" "(je|ije[russian])"
+         
+"i" "" "[au]" "j"
+"y" "" "[au]" "j"
+"io" "" "" "(jo|e[russian])"
+"yo" "" "" "(jo|e[russian])"
+            
+"ea" "" "" "(ea|ja[romanian])"
+"e" "^" "" "(e|je[russian])"
+"oo" "" "" "(u[english]|o)"
+"uu" "" "" "u" 
+    
+// LANGUAGE SPECIFIC CHARACTERS 
+"ć" "" "" "(tS[polish]|ts)"  // polish
+"ł" "" "" "l"  // polish
+"ń" "" "" "n"  // polish
+"ñ" "" "" "(n|nj[spanish])"
+"ś" "" "" "(S[polish]|s)" // polish
+"ş" "" "" "S"  // romanian
+"ţ" "" "" "ts"  // romanian
+"ż" "" "" "Z"  // polish
+"ź" "" "" "(Z[polish]|z)" // polish
+
+"où" "" "" "u" // french
+    
+"ą" "" "[bp]" "om"  // polish
+"ą" "" "" "on"  // polish
+"ä" "" "" "Y"  // german
+"á" "" "" "a" // hungarian
+"ă" "" "" "(e[romanian]|a)" //romanian
+"à" "" "" "a"  // french
+"â" "" "" "a" //french+romanian
+"é" "" "" "e" 
+"è" "" "" "e" // french
+"ê" "" "" "e" // french
+"ę" "" "[bp]" "em"  // polish
+"ę" "" "" "en"  // polish
+"í" "" "" "i" 
+"î" "" "" "i" 
+"ö" "" "" "Y"
+"ő" "" "" "Y" // hungarian
+"ó" "" "" "(u[polish]|o)"
+"ű" "" "" "Q" 
+"ü" "" "" "Q"
+"ú" "" "" "u" 
+"ű" "" "" "Q" // hungarian
+  
+"ß" "" "" "s"  // german
+"'" "" "" "" 
+"\"" "" "" ""
+       
+"a" "" "[bcdgkpstwzż]" "(A|B[polish])"
+"e" "" "[bcdgkpstwzż]" "(E|F[polish])"
+"o" "" "[bcćdgklłmnńrsśtwzźż]" "(O|P[polish])"
+  
+  // LATIN ALPHABET
+"a" "" "" "A"
+"b" "" "" "b"
+"c" "" "" "(k|ts[polish])"
+"d" "" "" "d"
+"e" "" "" "E"  
+"f" "" "" "f"
+"g" "" "" "g"
+"h" "" "" "h" 
+"i" "" "" "I"
+"j" "" "" "j"
+"k" "" "" "k"
+"l" "" "" "l"
+"m" "" "" "m"
+"n" "" "" "n"
+"o" "" "" "O"
+"p" "" "" "p"
+"q" "" "" "k"
+"r" "" "" "r"
+"s" "" "" "s"
+"t" "" "" "t"
+"u" "" "" "U"
+"v" "" "" "v"
+"w" "" "" "v" // English disabled
+"x" "" "" "ks" 
+"y" "" "" "i"
+"z" "" "" "(ts[german]|z)"

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_rules_cyrillic.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_rules_cyrillic.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_rules_cyrillic.txt
new file mode 100644
index 0000000..d262587
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_rules_cyrillic.txt
@@ -0,0 +1,100 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+"ця" "" "" "tsa"
+"цю" "" "" "tsu" 
+"циа" "" "" "tsa" 
+"цие" "" "" "tse" 
+"цио" "" "" "tso"   
+"циу" "" "" "tsu" 
+"сие" "" "" "se" 
+"сио" "" "" "so"   
+"зие" "" "" "ze" 
+"зио" "" "" "zo"   
+        
+"гауз" "" "$" "haus" 
+"гаус" "" "$" "haus" 
+"гольц" "" "$" "holts" 
+"геймер" "" "$" "hajmer" 
+"гейм" "" "$" "hajm" 
+"гоф" "" "$" "hof" 
+"гер" "" "$" "ger" 
+"ген" "" "$" "gen" 
+"гин" "" "$" "gin" 
+"г" "(й|ё|я|ю|ы|а|е|о|и|у)" "(а|е|о|и|у)" "g" 
+"г" "" "(а|е|о|и|у)" "(g|h)" 
+    
+"ля" "" "" "la"   
+"лю" "" "" "lu"   
+"лё" "" "" "(le|lo)"   
+"лио" "" "" "(le|lo)"   
+"ле" "" "" "(lE|lo)"   
+    
+"ийе" "" "" "je" 
+"ие" "" "" "je" 
+"ыйе" "" "" "je" 
+"ые" "" "" "je" 
+"ий" "" "(а|о|у)" "j" 
+"ый" "" "(а|о|у)" "j" 
+    
+"ий" "" "$" "i"
+"ый" "" "$" "i"
+    
+"ё" "" "" "(e|jo)"
+        
+"ей" "^" "" "(jaj|aj)"
+"е" "(а|е|о|у)" "" "je"
+"е" "^" "" "je"
+"эй" "" "" "aj"
+"ей" "" "" "aj"
+        
+"ауе" "" "" "aue"
+"ауэ" "" "" "aue"
+    
+"а" "" "" "a"
+"б" "" "" "b"
+"в" "" "" "v"
+"г" "" "" "g"
+"д" "" "" "d"
+"е" "" "" "E"
+"ж" "" "" "Z"
+"з" "" "" "z"
+"и" "" "" "I"
+"й" "" "" "j"
+"к" "" "" "k"
+"л" "" "" "l"
+"м" "" "" "m"
+"н" "" "" "n"
+"о" "" "" "o"
+"п" "" "" "p"
+"р" "" "" "r"
+"с" "" "с" ""
+"с" "" "" "s"
+"т" "" "" "t"
+"у" "" "" "u"
+"ф" "" "" "f"
+"х" "" "" "x"
+"ц" "" "" "ts"
+"ч" "" "" "tS"
+"ш" "" "" "S"
+"щ" "" "" "StS"
+"ъ" "" "" ""
+"ы" "" "" "I"
+"ь" "" "" ""
+"э" "" "" "E"
+"ю" "" "" "ju"
+"я" "" "" "ja"

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_rules_english.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_rules_english.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_rules_english.txt
new file mode 100644
index 0000000..f84e53f
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_rules_english.txt
@@ -0,0 +1,107 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// CONSONANTS
+"tch" "" "" "tS"
+"ch" "" "" "(tS|x)"
+"ck" "" "" "k"
+"cc" "" "[iey]" "ks" // success, accent
+"c" "" "c" ""
+"c" "" "[iey]" "s" // circle 
+"c" "" "" "k" // candy
+"gh" "^" "" "g" // ghost
+"gh" "" "" "(g|f|w)" // burgh | tough | bough
+"gn" "" "" "(gn|n)"
+"g" "" "[iey]" "(g|dZ)" // get, gem, giant, gigabyte
+// "th" "" "" "(6|8|t)"
+"th" "" "" "t"
+"kh" "" "" "x"
+"ph" "" "" "f"
+"sch" "" "" "(S|sk)"
+"sh" "" "" "S"
+"who" "^" "" "hu"
+"wh" "^" "" "w"
+
+"h" "" "$" "" // hard to find an example that isn't in a name
+"h" "" "[^aeiou]" "" // hard to find an example that isn't in a name
+"h" "^" "" "H"
+"h" "" "" "h"
+
+"j" "" "" "dZ"
+"kn" "^" "" "n" // knight
+"mb" "" "$" "m"
+"ng" "" "$" "(N|ng)"
+"pn" "^" "" "(pn|n)"
+"ps" "^" "" "(ps|s)"
+"qu" "" "" "kw"
+"q" "" "" "k"
+"tia" "" "" "(So|Sa)"
+"tio" "" "" "So"
+"wr" "^" "" "r"
+"w" "" "" "(w|v)" // the variant "v" is for spellings coming from German/Polish
+"x" "^" "" "z"
+"x" "" "" "ks"
+
+// VOWELS
+"y" "^" "" "j"
+"y" "^" "[aeiouy]" "j"
+"yi" "^" "" "i"
+"aue" "" "" "aue" 
+"oue" "" "" "(aue|oue)" 
+"ai" "" "" "(aj|e)" // rain | said
+"ay" "" "" "aj" 
+"a" "" "[^aeiou]e" "aj" // plane (actually "ej")
+"a" "" "" "(e|o|a)" // hat | call | part
+"ei" "" "" "(aj|i)" // weigh | receive
+"ey" "" "" "(aj|i)" // hey | barley
+"ear" "" "" "ia" // tear
+"ea" "" "" "(i|e)" // reason | treasure
+"ee" "" "" "i" // between
+"e" "" "[^aeiou]e" "i" // meter
+"e" "" "$" "(|E)" // blame, badge
+"e" "" "" "E" // bed
+"ie" "" "" "i" // believe
+"i" "" "[^aeiou]e" "aj" // five
+"i" "" "" "I" // hit -- Morse disagrees, feels it should go to I
+"oa" "" "" "ou" // toad
+"oi" "" "" "oj" // join
+"oo" "" "" "u" // food
+"ou" "" "" "(u|ou)" // through | tough | could
+"oy" "" "" "oj" // boy
+"o" "" "[^aeiou]e" "ou" // rode
+"o" "" "" "(o|a)" // hot -- Morse disagrees, feels it should go to 9
+"u" "" "[^aeiou]e" "(ju|u)" // cute | flute
+"u" "" "r" "(e|u)" // turn -- Morse disagrees, feels it should go to E
+"u" "" "" "(u|a)" // put
+"y" "" "" "i"
+
+// TRIVIAL
+"b" "" "" "b"
+"d" "" "" "d"
+"f" "" "" "f"
+"g" "" "" "g" 
+"k" "" "" "k"
+"l" "" "" "l"
+"m" "" "" "m"
+"n" "" "" "n"
+"p" "" "" "p"
+"r" "" "" "r"
+"s" "" "" "s"
+"t" "" "" "t"
+"v" "" "" "v"
+"z" "" "" "z"
+

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_rules_french.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_rules_french.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_rules_french.txt
new file mode 100644
index 0000000..668645f
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_rules_french.txt
@@ -0,0 +1,91 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Ashkenazic
+
+// CONSONANTS
+"kh" "" "" "x" // foreign
+"ph" "" "" "f"
+    
+"ç" "" "" "s"
+"x" "" "" "ks"
+"ch" "" "" "S"
+"c" "" "[eiyéèê]" "s"
+"c" "" "" "k"
+"gn" "" "" "(n|gn)"
+"g" "" "[eiy]" "Z" 
+"gue" "" "$" "k"     
+"gu" "" "[eiy]" "g" 
+   //array("aill" "" "e" "aj" // non Jewish
+   //array("ll" "" "e" "(l|j)" // non Jewish
+"que" "" "$" "k"
+"qu" "" "" "k"
+"q" "" "" "k"
+"s" "[aeiouyéèê]" "[aeiouyéèê]" "z"
+"h" "[bdgt]" "" "" // translit from Arabic
+"h" "" "$" "" // foreign
+"j" "" "" "Z"
+"w" "" "" "v"
+"ouh" "" "[aioe]" "(v|uh)"
+"ou" "" "[aeio]" "v" 
+"uo" "" "" "(vo|o)"
+"u" "" "[aeio]" "v" 
+      
+// VOWELS
+"aue" "" "" "aue" 
+"eau" "" "" "o" 
+  //array("au" "" "" "(o|au)" // non Jewish
+"ai" "" "" "aj" // [e] is non Jewish
+"ay" "" "" "aj" // [e] is non Jewish
+"é" "" "" "e"
+"ê" "" "" "e"
+"è" "" "" "e"
+"à" "" "" "a"
+"â" "" "" "a"
+"où" "" "" "u"
+"ou" "" "" "u"
+"oi" "" "" "oj" // [ua] is non Jewish
+"ei" "" "" "aj" // [e] is non Jewish
+"ey" "" "" "aj" // [e] non Jewish
+    //array("eu" "" "" "(e|o)" // non Jewish
+"y" "[ou]" "" "j"
+"e" "" "$" "(e|)"
+"i" "" "[aou]" "j"
+"y" "" "[aoeu]" "j"
+"y" "" "" "i"
+       
+  // TRIVIAL      
+"a" "" "" "a"
+"b" "" "" "b"
+"d" "" "" "d"
+"e" "" "" "E" // only Ashkenazic
+"f" "" "" "f"
+"g" "" "" "g"
+"h" "" "" "h"
+"i" "" "" "I" // only Ashkenazic
+"k" "" "" "k"
+"l" "" "" "l"
+"m" "" "" "m"
+"n" "" "" "n"
+"o" "" "" "o"
+"p" "" "" "p"
+"r" "" "" "r"
+"s" "" "" "s"
+"t" "" "" "t"
+"u" "" "" "u"
+"v" "" "" "v"
+"z" "" "" "z"

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_rules_german.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_rules_german.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_rules_german.txt
new file mode 100644
index 0000000..72eef9d
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_rules_german.txt
@@ -0,0 +1,128 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Ashkenazic
+
+// CONSONANTS
+"ziu" "" "" "tsu"   
+"zia" "" "" "tsa"   
+"zio" "" "" "tso"   
+
+"ssch" "" "" "S"
+"chsch" "" "" "xS"
+"ewitsch" "" "$" "evitS"
+"owitsch" "" "$" "ovitS"
+"evitsch" "" "$" "evitS"
+"ovitsch" "" "$" "ovitS"
+"witsch" "" "$" "vitS"
+"vitsch" "" "$" "vitS"
+"sch" "" "" "S"
+
+"chs" "" "" "ks"
+"ch" "" "" "x"
+"ck" "" "" "k"
+"c" "" "[eiy]" "ts"
+        
+"sp" "^" "" "Sp"
+"st" "^" "" "St"
+"ssp" "" "" "(Sp|sp)"
+"sp" "" "" "(Sp|sp)"
+"sst" "" "" "(St|st)"
+"st" "" "" "(St|st)"
+"pf" "" "" "(pf|p|f)"
+"ph" "" "" "(ph|f)"
+"qu" "" "" "kv"
+    
+"ewitz" "" "$" "(evits|evitS)"
+"ewiz" "" "$" "(evits|evitS)"
+"evitz" "" "$" "(evits|evitS)"
+"eviz" "" "$" "(evits|evitS)"
+"owitz" "" "$" "(ovits|ovitS)"
+"owiz" "" "$" "(ovits|ovitS)"
+"ovitz" "" "$" "(ovits|ovitS)"
+"oviz" "" "$" "(ovits|ovitS)"
+"witz" "" "$" "(vits|vitS)"
+"wiz" "" "$" "(vits|vitS)"
+"vitz" "" "$" "(vits|vitS)"
+"viz" "" "$" "(vits|vitS)"
+"tz" "" "" "ts"
+    
+"thal" "" "$" "tal"
+"th" "^" "" "t"
+"th" "" "[äöüaeiou]" "(t|th)"
+"th" "" "" "t"
+"rh" "^" "" "r"
+"h" "[aeiouyäöü]" "" ""
+"h" "^" "" "H"
+    
+"ss" "" "" "s"
+"s" "" "[äöüaeiouy]" "(z|s)"
+"s" "[aeiouyäöüj]" "[aeiouyäöü]" "z"
+"ß" "" "" "s"
+          
+ // VOWELS
+"ij" "" "$" "i"
+"aue" "" "" "aue" 
+"ue" "" "" "Q" 
+"ae" "" "" "Y" 
+"oe" "" "" "Y" 
+"ü" "" "" "Q"
+"ä" "" "" "Y"
+"ö" "" "" "Y"
+"ei" "" "" "aj"
+"ey" "" "" "aj"
+"eu" "" "" "(aj|oj)"
+"i" "[aou]" "" "j"
+"y" "[aou]" "" "j"
+"ie" "" "" "I"
+"i" "" "[aou]" "j"
+"y" "" "[aoeu]" "j"
+        
+ // FOREIGN LETTERs
+"ñ" "" "" "n" 
+"ã" "" "" "a" 
+"ő" "" "" "o" 
+"ű" "" "" "u" 
+"ç" "" "" "s" 
+   
+  // ALPHABET      
+"a" "" "" "A"
+"b" "" "" "b"
+"c" "" "" "k"
+"d" "" "" "d"
+"e" "" "" "E"
+"f" "" "" "f"
+"g" "" "" "g"
+"h" "" "" "h"
+"i" "" "" "I"
+"j" "" "" "j"
+"k" "" "" "k"
+"l" "" "" "l"
+"m" "" "" "m"
+"n" "" "" "n"
+"o" "" "" "O"
+"p" "" "" "p"
+"q" "" "" "k"
+"r" "" "" "r"
+"s" "" "" "s"
+"t" "" "" "t"
+"u" "" "" "U"
+"v" "" "" "(f|v)"
+"w" "" "" "v"
+"x" "" "" "ks"
+"y" "" "" "i"   
+"z" "" "" "ts"

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_rules_hebrew.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_rules_hebrew.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_rules_hebrew.txt
new file mode 100644
index 0000000..4c59503
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_rules_hebrew.txt
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Ashkenazic
+
+"אי" "" "" "i"
+"עי" "" "" "i"
+"עו" "" "" "VV"
+"או" "" "" "VV"
+    
+"ג׳" "" "" "Z"
+"ד׳" "" "" "dZ"
+        
+"א" "" "" "L"
+"ב" "" "" "b"
+"ג" "" "" "g"
+"ד" "" "" "d"
+    
+"ה" "^" "" "1"
+"ה" "" "$" "1"
+"ה" "" "" ""
+    
+"וו" "" "" "V"
+"וי" "" "" "WW"
+"ו" "" "" "W"
+"ז" "" "" "z"
+"ח" "" "" "X"
+"ט" "" "" "T"
+"יי" "" "" "i"
+"י" "" "" "i"
+"ך" "" "" "X"
+"כ" "^" "" "K"
+"כ" "" "" "k"
+"ל" "" "" "l"
+"ם" "" "" "m"
+"מ" "" "" "m"
+"ן" "" "" "n"
+"נ" "" "" "n"
+"ס" "" "" "s"
+"ע" "" "" "L"
+"ף" "" "" "f"
+"פ" "" "" "f"
+"ץ" "" "" "C"
+"צ" "" "" "C"
+"ק" "" "" "K"
+"ר" "" "" "r"
+"ש" "" "" "s"
+"ת" "" "" "TB" // only Ashkenazic

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_rules_hungarian.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_rules_hungarian.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_rules_hungarian.txt
new file mode 100644
index 0000000..1e6f047
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_rules_hungarian.txt
@@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// ASHKENAZIC
+
+// CONSONANTS
+"sz" "" "" "s"
+"zs" "" "" "Z"
+"cs" "" "" "tS"
+           
+"ay" "" "" "(oj|aj)"
+"ai" "" "" "(oj|aj)"
+"aj" "" "" "(oj|aj)"
+    
+"ei" "" "" "aj" // German element
+"ey" "" "" "aj" // German element
+    
+"y" "[áo]" "" "j"
+"i" "[áo]" "" "j"
+"ee" "" "" "(aj|e)" // actually ej
+"ely" "" "" "(aj|eli)" // actually ej
+"ly" "" "" "(j|li)"
+"gy" "" "[aeouáéóúüöőű]" "dj"
+"gy" "" "" "(d|gi)"
+"ny" "" "[aeouáéóúüöőű]" "nj"
+"ny" "" "" "(n|ni)"
+"ty" "" "[aeouáéóúüöőű]" "tj"
+"ty" "" "" "(t|ti)"
+    
+"qu" "" "" "(ku|kv)"
+"h" "" "$" ""
+                  
+// VOWELS
+"á" "" "" "a"
+"é" "" "" "e"
+"í" "" "" "i"
+"ó" "" "" "o"
+"ö" "" "" "Y"
+"ő" "" "" "Y" 
+"ú" "" "" "u"
+"ü" "" "" "Q"
+"ű" "" "" "Q"
+                       
+// LATIN ALPHABET
+"a" "" "" "a"
+"b" "" "" "b"
+"c" "" "" "ts" 
+"d" "" "" "d"
+"e" "" "" "E"
+"f" "" "" "f"
+"g" "" "" "g" 
+"h" "" "" "h"
+"i" "" "" "I"
+"j" "" "" "j"
+"k" "" "" "k"
+"l" "" "" "l"
+"m" "" "" "m"
+"n" "" "" "n"
+"o" "" "" "o"
+"p" "" "" "p"
+"q" "" "" "k"
+"r" "" "" "r"
+"s" "" "" "(S|s)" 
+"t" "" "" "t"
+"u" "" "" "u"
+"v" "" "" "v" 
+"w" "" "" "v" 
+"x" "" "" "ks"
+"y" "" "" "i" 
+"z" "" "" "z"

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_rules_polish.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_rules_polish.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_rules_polish.txt
new file mode 100644
index 0000000..59a87dd
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_rules_polish.txt
@@ -0,0 +1,185 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Ashkenazic
+
+// CONVERTING FEMININE TO MASCULINE
+"ska" "" "$" "ski"   
+"cka" "" "$" "tski"   
+"lowa" "" "$" "(lova|lof|l|el)"   
+"kowa" "" "$" "(kova|kof|k|ek)"   
+"owa" "" "$" "(ova|of|)"  
+"lowna" "" "$" "(lovna|levna|l|el)" 
+"kowna" "" "$" "(kovna|k|ek)"  
+"owna" "" "$" "(ovna|)"   
+"lówna" "" "$" "(l|el)"   
+"kówna" "" "$" "(k|ek)"   
+"ówna" "" "$" ""   
+"a" "" "$" "(a|i)"   
+
+ // CONSONANTS
+"czy" "" "" "tSi"
+"cze" "" "[bcdgkpstwzż]" "(tSe|tSF)"
+"ciewicz" "" "" "(tsevitS|tSevitS)"
+"siewicz" "" "" "(sevitS|SevitS)"
+"ziewicz" "" "" "(zevitS|ZevitS)"
+"riewicz" "" "" "rjevitS" 
+"diewicz" "" "" "djevitS" 
+"tiewicz" "" "" "tjevitS" 
+"iewicz" "" "" "evitS"
+"ewicz" "" "" "evitS"
+"owicz" "" "" "ovitS"
+"icz" "" "" "itS"
+"cz" "" "" "tS"
+"ch" "" "" "x"
+    
+"cia" "" "[bcdgkpstwzż]" "(tSB|tsB)" 
+"cia" "" "" "(tSa|tsa)" 
+"cią" "" "[bp]" "(tSom|tsom)"
+"cią" "" "" "(tSon|tson)"
+"cię" "" "[bp]" "(tSem|tsem)"
+"cię" "" "" "(tSen|tsen)"
+"cie" "" "[bcdgkpstwzż]" "(tSF|tsF)" 
+"cie" "" "" "(tSe|tse)" 
+"cio" "" "" "(tSo|tso)" 
+"ciu" "" "" "(tSu|tsu)" 
+"ci" "" "" "(tSi|tsI)"
+"ć" "" "" "(tS|ts)"
+    
+"ssz" "" "" "S"
+"sz" "" "" "S"
+"sia" "" "[bcdgkpstwzż]" "(SB|sB|sja)" 
+"sia" "" "" "(Sa|sja)" 
+"sią" "" "[bp]" "(Som|som)"
+"sią" "" "" "(Son|son)"
+"się" "" "[bp]" "(Sem|sem)"
+"się" "" "" "(Sen|sen)"
+"sie" "" "[bcdgkpstwzż]" "(SF|sF|se)" 
+"sie" "" "" "(Se|se)" 
+"sio" "" "" "(So|so)" 
+"siu" "" "" "(Su|sju)" 
+"si" "" "" "(Si|sI)"
+"ś" "" "" "(S|s)"
+
+"zia" "" "[bcdgkpstwzż]" "(ZB|zB|zja)" 
+"zia" "" "" "(Za|zja)" 
+"zią" "" "[bp]" "(Zom|zom)"
+"zią" "" "" "(Zon|zon)"
+"zię" "" "[bp]" "(Zem|zem)"
+"zię" "" "" "(Zen|zen)"
+"zie" "" "[bcdgkpstwzż]" "(ZF|zF)"
+"zie" "" "" "(Ze|ze)" 
+"zio" "" "" "(Zo|zo)" 
+"ziu" "" "" "(Zu|zju)" 
+"zi" "" "" "(Zi|zI)"
+    
+"że" "" "[bcdgkpstwzż]" "(Ze|ZF)"
+"że" "" "[bcdgkpstwzż]" "(Ze|ZF|ze|zF)"
+"że" "" "" "Ze"
+"źe" "" "" "(Ze|ze)"
+"ży" "" "" "Zi"
+"źi" "" "" "(Zi|zi)"
+"ż" "" "" "Z"
+"ź" "" "" "(Z|z)"
+    
+"rze" "t" "" "(Se|re)"
+"rze" "" "" "(Ze|re|rZe)"
+"rzy" "t" "" "(Si|ri)"
+"rzy" "" "" "(Zi|ri|rZi)"
+"rz" "t" "" "(S|r)"
+"rz" "" "" "(Z|r|rZ)"
+    
+"lio" "" "" "(lo|le)"
+"ł" "" "" "l"
+"ń" "" "" "n"
+"qu" "" "" "k"
+"s" "" "s" "" 
+    
+ // VOWELS   
+"ó" "" "" "(u|o)"
+"ą" "" "[bp]" "om"
+"ę" "" "[bp]" "em"
+"ą" "" "" "on"
+"ę" "" "" "en"
+   
+"ije" "" "" "je"
+"yje" "" "" "je"
+"iie" "" "" "je"
+"yie" "" "" "je"
+"iye" "" "" "je"
+"yye" "" "" "je"
+   
+"ij" "" "[aou]" "j"
+"yj" "" "[aou]" "j"
+"ii" "" "[aou]" "j"
+"yi" "" "[aou]" "j"
+"iy" "" "[aou]" "j"
+"yy" "" "[aou]" "j"
+   
+"rie" "" "" "rje" 
+"die" "" "" "dje" 
+"tie" "" "" "tje" 
+"ie" "" "[bcdgkpstwzż]" "F" 
+"ie" "" "" "e"
+   
+"aue" "" "" "aue"
+"au" "" "" "au"
+   
+"ei" "" "" "aj"
+"ey" "" "" "aj"
+"ej" "" "" "aj"
+    
+"ai" "" "" "aj"
+"ay" "" "" "aj"
+"aj" "" "" "aj"
+    
+"i" "[ou]" "" "j" 
+"y" "[ou]" "" "j" 
+"i" "" "[aou]" "j"
+"y" "" "[aeou]" "j"
+       
+"a" "" "[bcdgkpstwzż]" "B"
+"e" "" "[bcdgkpstwzż]" "(E|F)" 
+"o" "" "[bcćdgklłmnńrsśtwzźż]" "P" 
+       
+// ALPHABET
+"a" "" "" "a"
+"b" "" "" "b"
+"c" "" "" "ts"
+"d" "" "" "d"
+"e" "" "" "E"
+"f" "" "" "f"
+"g" "" "" "g"
+"h" "" "" "(h|x)"
+"i" "" "" "I"
+"j" "" "" "j"
+"k" "" "" "k"
+"l" "" "" "l"
+"m" "" "" "m"
+"n" "" "" "n"
+"o" "" "" "o"
+"p" "" "" "p"
+"q" "" "" "k"
+"r" "" "" "r"
+"s" "" "" "s"
+"t" "" "" "t"
+"u" "" "" "u"
+"v" "" "" "v"
+"w" "" "" "v"
+"x" "" "" "ks"
+"y" "" "" "I"
+"z" "" "" "z"

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_rules_romanian.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_rules_romanian.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_rules_romanian.txt
new file mode 100644
index 0000000..f53e262
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_rules_romanian.txt
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+"j" "" "" "Z"
+  
+"ce" "" "" "tSe"
+"ci" "" "" "(tSi|tS)"
+"ch" "" "[ei]" "k"
+"ch" "" "" "x" // foreign
+"c" "" "" "k"
+  
+"gi" "" "" "(dZi|dZ)"
+"g" "" "[ei]" "dZ"
+"gh" "" "" "g"
+  
+"ei" "" "" "aj"
+"i" "[aou]" "" "j"
+"i" "" "[aeou]" "j"
+"ţ" "" "" "ts"
+"ş" "" "" "S"
+"h" "" "" "(x|h)"
+    
+"qu" "" "" "k"    
+"q" "" "" "k"    
+"w" "" "" "v"    
+"x" "" "" "ks"    
+"y" "" "" "i"    
+    
+"î" "" "" "i"
+"ea" "" "" "ja"
+"ă" "" "" "(e|a)"
+"aue" "" "" "aue"
+    
+"a" "" "" "a"
+"b" "" "" "b"
+"d" "" "" "d"
+"e" "" "" "E"
+"f" "" "" "f"
+"g" "" "" "g"
+"i" "" "" "I"
+"k" "" "" "k"
+"l" "" "" "l"
+"m" "" "" "m"
+"n" "" "" "n"
+"o" "" "" "o"
+"p" "" "" "p"
+"r" "" "" "r"
+"s" "" "" "s"
+"t" "" "" "t"
+"u" "" "" "u"
+"v" "" "" "v"
+"z" "" "" "z"

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_rules_russian.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_rules_russian.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_rules_russian.txt
new file mode 100644
index 0000000..817b2c3
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_rules_russian.txt
@@ -0,0 +1,164 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+// CONVERTING FEMININE TO MASCULINE
+"yna" "" "$" "(in|ina)" 
+"ina" "" "$" "(in|ina)" 
+"liova" "" "$" "(lof|lef)" 
+"lova" "" "$" "(lof|lef|lova)" 
+"ova" "" "$" "(of|ova)" 
+"eva" "" "$" "(ef|ova)" 
+"aia" "" "$" "(aja|i)" 
+"aja" "" "$" "(aja|i)" 
+"aya" "" "$" "(aja|i)" 
+
+ //SPECIFIC CONSONANTS
+"tsya" "" "" "tsa" 
+"tsyu" "" "" "tsu" 
+"tsia" "" "" "tsa" 
+"tsie" "" "" "tse" 
+"tsio" "" "" "tso"   
+"tsye" "" "" "tse" 
+"tsyo" "" "" "tso" 
+"tsiu" "" "" "tsu" 
+"sie" "" "" "se" 
+"sio" "" "" "so"   
+"zie" "" "" "ze" 
+"zio" "" "" "zo"   
+"sye" "" "" "se" 
+"syo" "" "" "so"   
+"zye" "" "" "ze" 
+"zyo" "" "" "zo"   
+    
+"gauz" "" "$" "haus" 
+"gaus" "" "$" "haus" 
+"gol'ts" "" "$" "holts" 
+"golts" "" "$" "holts" 
+"gol'tz" "" "$" "holts" 
+"goltz" "" "$" "holts" 
+"gejmer" "" "$" "hajmer" 
+"gejm" "" "$" "hajm" 
+"geimer" "" "$" "hajmer" 
+"geim" "" "$" "hajm" 
+"geymer" "" "$" "hajmer" 
+"geym" "" "$" "hajm" 
+"gendler" "" "$" "hendler" 
+"gof" "" "$" "hof" 
+"gojf" "" "$" "hojf" 
+"goyf" "" "$" "hojf" 
+"goif" "" "$" "hojf" 
+"ger" "" "$" "ger" 
+"gen" "" "$" "gen" 
+"gin" "" "$" "gin" 
+"gg" "" "" "g" 
+"g" "[jaeoiuy]" "[aeoiu]" "g" 
+"g" "" "[aeoiu]" "(g|h)" 
+       
+"kh" "" "" "x"
+"ch" "" "" "(tS|x)" // in DJSRE the rule is simpler:"ch" "" "" "tS");
+"sch" "" "" "(StS|S)"
+"ssh" "" "" "S"
+"sh" "" "" "S"
+"zh" "" "" "Z" 
+"tz" "" "$" "ts" // not in DJSRE
+"tz" "" "" "(ts|tz)" // not in DJSRE
+"c" "" "[iey]" "s" // not in DJSRE
+"c" "" "" "k" // not in DJSRE
+"qu" "" "" "(kv|k)" // not in DJSRE
+"q" "" "" "k" // not in DJSRE
+"s" "" "s" ""
+    
+"w" "" "" "v" // not in DJSRE
+"x" "" "" "ks" // not in DJSRE
+                  
+ //SPECIFIC VOWELS
+"lya" "" "" "la" 
+"lyu" "" "" "lu"  
+"lia" "" "" "la" // not in DJSRE
+"liu" "" "" "lu"  // not in DJSRE
+"lja" "" "" "la" // not in DJSRE
+"lju" "" "" "lu"  // not in DJSRE
+"le" "" "" "(lo|lE)" //not in DJSRE
+"lyo" "" "" "(lo|le)" //not in DJSRE
+"lio" "" "" "(lo|le)" 
+    
+"ije" "" "" "je"
+"ie" "" "" "je"
+"iye" "" "" "je"
+"iie" "" "" "je"
+"yje" "" "" "je"
+"ye" "" "" "je"
+"yye" "" "" "je"
+"yie" "" "" "je"
+    
+"ij" "" "[aou]" "j"
+"iy" "" "[aou]" "j"
+"ii" "" "[aou]" "j"
+"yj" "" "[aou]" "j"
+"yy" "" "[aou]" "j"
+"yi" "" "[aou]" "j"
+        
+"io" "" "" "(jo|e)" 
+"i" "" "[au]" "j" 
+"i" "[aou]" "" "j" // not in DJSRE
+"ei" "" "" "aj" // not in DJSRE
+"ey" "" "" "aj" // not in DJSRE
+"ej" "" "" "aj" 
+"yo" "" "" "(jo|e)" //not in DJSRE
+"y" "" "[au]" "j"
+"y" "[aiou]" "" "j" // not in DJSRE
+    
+"ii" "" "$" "i" // not in DJSRE
+"iy" "" "$" "i" // not in DJSRE
+"yy" "" "$" "i" // not in DJSRE
+"yi" "" "$" "i" // not in DJSRE
+"yj" "" "$" "i"
+"ij" "" "$" "i"
+    
+"e" "^" "" "(je|E)" // in DJSRE the rule is simpler:"e" "^" "" "je");
+"ee" "" "" "(aje|i)" // in DJSRE the rule is simpler:"ee" "" "" "(eje|aje)");
+"e" "[aou]" "" "je" 
+"y" "" "" "I"
+"oo" "" "" "(oo|u)" // not in DJSRE
+"'" "" "" "" 
+"\"" "" "" ""
+    
+"aue" "" "" "aue"
+
+// TRIVIAL 
+"a" "" "" "a"
+"b" "" "" "b"
+"d" "" "" "d"
+"e" "" "" "E"
+"f" "" "" "f"
+"g" "" "" "g" 
+"h" "" "" "h" // not in DJSRE
+"i" "" "" "I"
+"j" "" "" "j" 
+"k" "" "" "k"
+"l" "" "" "l"
+"m" "" "" "m"
+"n" "" "" "n"
+"o" "" "" "o"
+"p" "" "" "p"
+"r" "" "" "r"
+"s" "" "" "s"
+"t" "" "" "t"
+"u" "" "" "u"
+"v" "" "" "v"
+"z" "" "" "z"

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_rules_spanish.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_rules_spanish.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_rules_spanish.txt
new file mode 100644
index 0000000..03dc04a
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/ash_rules_spanish.txt
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Ashkenazic = Argentina
+
+// CONSONANTS
+"ñ" "" "" "(n|nj)"
+    
+"ch" "" "" "(tS|dZ)" // dZ is typical for Argentina   
+"h" "[bdgt]" "" "" // translit. from Arabic
+"h" "" "$" "" // foreign
+          
+"j" "" "" "x" 
+"x" "" "" "ks"         
+"ll" "" "" "(l|Z)" // Z is typical for Argentina, only Ashkenazic
+"w" "" "" "v" // foreign words
+        
+"v" "" "" "(b|v)"
+"b" "" "" "(b|v)"
+"m" "" "[bpvf]" "(m|n)"
+    
+"c" "" "[ei]" "s" 
+"c" "" "" "k"
+
+"z" "" "" "(z|s)" // as "c" befoire "e" or "i", in Spain it is like unvoiced English "th"
+        
+"gu" "" "[ei]" "(g|gv)" // "gv" because "u" can actually be "ü"
+"g" "" "[ei]" "(x|g)"  // "g" only for foreign words
+            
+"qu" "" "" "k"
+"q" "" "" "k"
+    
+"uo" "" "" "(vo|o)"    
+"u" "" "[aei]" "v"
+        
+"y" "" "" "(i|j|S|Z)" // S or Z are peculiar to South America; only Ashkenazic
+           
+ // VOWELS
+"ü" "" "" "v"
+"á" "" "" "a"
+"é" "" "" "e"
+"í" "" "" "i"
+"ó" "" "" "o"
+"ú" "" "" "u"
+               
+  // TRIVIAL      
+"a" "" "" "a"
+"d" "" "" "d"
+"e" "" "" "E" // Only Ashkenazic
+"f" "" "" "f"
+"g" "" "" "g" 
+"h" "" "" "h"
+"i" "" "" "I" // Only Ashkenazic
+"k" "" "" "k"
+"l" "" "" "l"
+"m" "" "" "m"
+"n" "" "" "n"
+"o" "" "" "o"
+"p" "" "" "p"
+"r" "" "" "r"
+"s" "" "" "s"
+"t" "" "" "t"
+"u" "" "" "u"    

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_any.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_any.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_any.txt
new file mode 100644
index 0000000..6627aac
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_any.txt
@@ -0,0 +1,124 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// GENERIC
+// A, E, I, O, P, U should create variants, but a, e, i, o, u should not create any new variant
+// Q = ü ; Y = ä = ö
+// EE = final "e" (english or french)
+
+// VOWELS
+    // "ALL" DIPHTHONGS are interchangeable BETWEEN THEM and with monophthongs of which they are composed ("D" means "diphthong")
+    //  {a,o} are totally interchangeable if non-stressed; in German "a/o" can actually be from "ä/ö" (that are equivalent to "e")
+    //  {i,e} are interchangeable if non-stressed, while in German "u" can actually be from "ü" (that is equivalent to "i")
+
+"mb" "" "" "(mb|b[greeklatin])"
+"mp" "" "" "(mp|b[greeklatin])"
+"ng" "" "" "(ng|g[greeklatin])"
+
+"B" "" "" "(b|v[spanish])"
+"V" "" "" "(v|b[spanish])"
+    
+    // French word-final and word-part-final letters
+"t" "" "$" "(t|[french])"
+"g" "n" "$" "(g|[french])"
+"k" "n" "$" "(k|[french])"
+"p" "" "$" "(p|[french])"
+"r" "[Ee]" "$" "(r|[french])"
+"s" "" "$" "(s|[french])"
+"t" "[aeiouAEIOU]" "[^aeiouAEIOU]" "(t|[french])" // Petitjean
+"s" "[aeiouAEIOU]" "[^aeiouAEIOU]" "(s|[french])" // Groslot, Grosleau
+    //array("p" "[aeiouAEIOU]" "[^aeiouAEIOU]" "(p|[$french])" 
+    
+"I" "[aeiouAEIBFOUQY]" "" "i"
+"I" "" "[^aeiouAEBFIOU]e" "(Q[german]|i|D[english])"  // "line"
+"I" "" "$" "i"
+"I" "" "[^k]$" "i"
+"Ik" "[lr]" "$" "(ik|Qk[german])"
+"Ik" "" "$" "ik"
+"sIts" "" "$" "(sits|sQts[german])"
+"Its" "" "$" "its"
+"I" "" "" "(Q[german]|i)"
+   
+"lEE" "[bdfgkmnprsStvzZ]" "" "(li|il[english])"  // Apple = Appel
+"rEE" "[bdfgkmnprsStvzZ]" "" "(ri|ir[english])"
+"lE" "[bdfgkmnprsStvzZ]" "" "(li|il[english]|lY[german])"  // Applebaum < Appelbaum
+"rE" "[bdfgkmnprsStvzZ]" "" "(ri|ir[english]|rY[german])"
+    
+"ea" "" "" "(D|a|i)"
+    
+"au" "" "" "(D|a|u)"
+"ou" "" "" "(D|o|u)"
+"eu" "" "" "(D|e|u)"
+    
+"ai" "" "" "(D|a|i)"
+"Ai" "" "" "(D|a|i)"
+"oi" "" "" "(D|o|i)"
+"Oi" "" "" "(D|o|i)"
+"ui" "" "" "(D|u|i)"
+"Ui" "" "" "(D|u|i)"
+"ei" "" "" "(D|i)"
+"Ei" "" "" "(D|i)"
+    
+"iA" "" "$" "(ia|io)" 
+"iA" "" "" "(ia|io|iY[german])"
+"A" "" "[^aeiouAEBFIOU]e" "(a|o|Y[german]|D[english])" // "plane"
+    
+    
+"E" "i[^aeiouAEIOU]" "" "(i|Y[german]|[english])" // Wineberg (vineberg/vajneberg) --> vajnberg
+"E" "a[^aeiouAEIOU]" "" "(i|Y[german]|[english])" //  Shaneberg (shaneberg/shejneberg) --> shejnberg
+    
+"E" "" "[fklmnprst]$" "i"
+"E" "" "ts$" "i"
+"E" "" "$" "i"
+"E" "[DaoiuAOIUQY]" "" "i"
+"E" "" "[aoAOQY]" "i"
+"E" "" "" "(i|Y[$german])"
+        
+"P" "" "" "(o|u)" 
+    
+"O" "" "[fklmnprstv]$" "o"
+"O" "" "ts$" "o"
+"O" "" "$" "o"
+"O" "[oeiuQY]" "" "o"
+"O" "" "" "(o|Y[$german])"
+"O" "" "" "o"
+    
+"A" "" "[fklmnprst]$" "(a|o)"
+"A" "" "ts$" "(a|o)"
+"A" "" "$" "(a|o)"
+"A" "[oeiuQY]" "" "(a|o)"
+"A" "" "" "(a|o|Y[$german])"
+"A" "" "" "(a|o)"
+
+"U" "" "$" "u"
+"U" "[DoiuQY]" "" "u"
+"U" "" "[^k]$" "u"
+"Uk" "[lr]" "$" "(uk|Qk[german])"
+"Uk" "" "$" "uk"
+"sUts" "" "$" "(suts|sQts[german])"
+"Uts" "" "$" "uts"
+"U" "" "" "(u|Q[german])"
+"U" "" "" "u"
+
+"e" "" "[fklmnprstv]$" "i"
+"e" "" "ts$" "i"
+"e" "" "$" "i"
+"e" "[DaoiuAOIUQY]" "" "i"
+"e" "" "[aoAOQY]" "i"
+"e" "" "" "(i|Y[german])"
+        
+"a" "" "" "(a|o)"

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_arabic.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_arabic.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_arabic.txt
new file mode 100644
index 0000000..a8cad65
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_approx_arabic.txt
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+"j1" "" "" "(ja|je|jo|ju|j)"
+"1" "" "" "(a|e|i|o|u|)"
+"u" "" "" "(o|u)"
+"i" "" "" "(i|e)"
+"p" "" "$" "p"
+"p" "" "" "(p|b)"


[04/15] lucenenet git commit: Added Lucene.Net.Analysis.Phonetic + tests. Rather than porting over the entire commons-codec library, only the language features were ported and added to this library.

Posted by ni...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Tests.Analysis.Phonetic/Language/DoubleMetaphoneTest.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Phonetic/Language/DoubleMetaphoneTest.cs b/src/Lucene.Net.Tests.Analysis.Phonetic/Language/DoubleMetaphoneTest.cs
new file mode 100644
index 0000000..8a4604d
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Phonetic/Language/DoubleMetaphoneTest.cs
@@ -0,0 +1,1266 @@
+using NUnit.Framework;
+using System;
+using System.Text;
+
+namespace Lucene.Net.Analysis.Phonetic.Language
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Tests <see cref="DoubleMetaphone"/>
+    /// </summary>
+    public class DoubleMetaphoneTest : StringEncoderAbstractTest<DoubleMetaphone>
+    {
+        /**
+     * Test data from http://aspell.net/test/orig/batch0.tab.
+     *
+     * "Copyright (C) 2002 Kevin Atkinson (kevina@gnu.org). Verbatim copying
+     * and distribution of this entire article is permitted in any medium,
+     * provided this notice is preserved."
+     *
+     * Massaged the test data in the array below.
+     */
+        private static readonly string[][] FIXTURE = { new string[] {
+            "Accosinly", "Occasionally" }, new string[] {
+            "Ciculer", "Circler" }, new string[] {
+            "Circue", "Circle" }, new string[] {
+            "Maddness", "Madness" }, new string[] {
+            "Occusionaly", "Occasionally" }, new string[] {
+            "Steffen", "Stephen" }, new string[] {
+            "Thw", "The" }, new string[] {
+            "Unformanlly", "Unfortunately" }, new string[] {
+            "Unfortally", "Unfortunately" }, new string[] {
+            "abilitey", "ability" }, new string[] {
+            "abouy", "about" }, new string[] {
+            "absorbtion", "absorption" }, new string[] {
+            "accidently", "accidentally" }, new string[] {
+            "accomodate", "accommodate" }, new string[] {
+            "acommadate", "accommodate" }, new string[] {
+            "acord", "accord" }, new string[] {
+            "adultry", "adultery" }, new string[] {
+            "aggresive", "aggressive" }, new string[] {
+            "alchohol", "alcohol" }, new string[] {
+            "alchoholic", "alcoholic" }, new string[] {
+            "allieve", "alive" }, new string[] {
+            "alot", "a lot" }, new string[] {
+            "alright", "all right" }, new string[] {
+            "amature", "amateur" }, new string[] {
+            "ambivilant", "ambivalent" }, new string[] {
+            "amification", "amplification" }, new string[] {
+            "amourfous", "amorphous" }, new string[] {
+            "annoint", "anoint" }, new string[] {
+            "annonsment", "announcement" }, new string[] {
+            "annoyting", "anting" }, new string[] {
+            "annuncio", "announce" }, new string[] {
+            "anonomy", "anatomy" }, new string[] {
+            "anotomy", "anatomy" }, new string[] {
+            "antidesestablishmentarianism", "antidisestablishmentarianism" }, new string[] {
+            "antidisestablishmentarism", "antidisestablishmentarianism" }, new string[] {
+            "anynomous", "anonymous" }, new string[] {
+            "appelet", "applet" }, new string[] {
+            "appreceiated", "appreciated" }, new string[] {
+            "appresteate", "appreciate" }, new string[] {
+            "aquantance", "acquaintance" }, new string[] {
+            "aratictature", "architecture" }, new string[] {
+            "archeype", "archetype" }, new string[] {
+            "aricticure", "architecture" }, new string[] {
+            "artic", "arctic" }, new string[] {
+            "asentote", "asymptote" }, new string[] {
+            "ast", "at" }, new string[] {
+            "asterick", "asterisk" }, new string[] {
+            "asymetric", "asymmetric" }, new string[] {
+            "atentively", "attentively" }, new string[] {
+            "autoamlly", "automatically" }, new string[] {
+            "bankrot", "bankrupt" }, new string[] {
+            "basicly", "basically" }, new string[] {
+            "batallion", "battalion" }, new string[] {
+            "bbrose", "browse" }, new string[] {
+            "beauro", "bureau" }, new string[] {
+            "beaurocracy", "bureaucracy" }, new string[] {
+            "beggining", "beginning" }, new string[] {
+            "beging", "beginning" }, new string[] {
+            "behaviour", "behavior" }, new string[] {
+            "beleive", "believe" }, new string[] {
+            "belive", "believe" }, new string[] {
+            "benidifs", "benefits" }, new string[] {
+            "bigginging", "beginning" }, new string[] {
+            "blait", "bleat" }, new string[] {
+            "bouyant", "buoyant" }, new string[] {
+            "boygot", "boycott" }, new string[] {
+            "brocolli", "broccoli" }, new string[] {
+            "buch", "bush" }, new string[] {
+            "buder", "butter" }, new string[] {
+            "budr", "butter" }, new string[] {
+            "budter", "butter" }, new string[] {
+            "buracracy", "bureaucracy" }, new string[] {
+            "burracracy", "bureaucracy" }, new string[] {
+            "buton", "button" }, new string[] {
+            "byby", "by by" }, new string[] {
+            "cauler", "caller" }, new string[] {
+            "ceasar", "caesar" }, new string[] {
+            "cemetary", "cemetery" }, new string[] {
+            "changeing", "changing" }, new string[] {
+            "cheet", "cheat" }, new string[] {
+            "cicle", "circle" }, new string[] {
+            "cimplicity", "simplicity" }, new string[] {
+            "circumstaces", "circumstances" }, new string[] {
+            "clob", "club" }, new string[] {
+            "coaln", "colon" }, new string[] {
+            "cocamena", "cockamamie" }, new string[] {
+            "colleaque", "colleague" }, new string[] {
+            "colloquilism", "colloquialism" }, new string[] {
+            "columne", "column" }, new string[] {
+            "comiler", "compiler" }, new string[] {
+            "comitmment", "commitment" }, new string[] {
+            "comitte", "committee" }, new string[] {
+            "comittmen", "commitment" }, new string[] {
+            "comittmend", "commitment" }, new string[] {
+            "commerciasl", "commercials" }, new string[] {
+            "commited", "committed" }, new string[] {
+            "commitee", "committee" }, new string[] {
+            "companys", "companies" }, new string[] {
+            "compicated", "complicated" }, new string[] {
+            "comupter", "computer" }, new string[] {
+            "concensus", "consensus" }, new string[] {
+            "confusionism", "confucianism" }, new string[] {
+            "congradulations", "congratulations" }, new string[] {
+            "conibation", "contribution" }, new string[] {
+            "consident", "consistent" }, new string[] {
+            "consident", "consonant" }, new string[] {
+            "contast", "constant" }, new string[] {
+            "contastant", "constant" }, new string[] {
+            "contunie", "continue" }, new string[] {
+            "cooly", "coolly" }, new string[] {
+            "copping", "coping" }, new string[] {
+            "cosmoplyton", "cosmopolitan" }, new string[] {
+            "courst", "court" }, new string[] {
+            "crasy", "crazy" }, new string[] {
+            "cravets", "caveats" }, new string[] {
+            "credetability", "credibility" }, new string[] {
+            "criqitue", "critique" }, new string[] {
+            "croke", "croak" }, new string[] {
+            "crucifiction", "crucifixion" }, new string[] {
+            "crusifed", "crucified" }, new string[] {
+            "ctitique", "critique" }, new string[] {
+            "cumba", "combo" }, new string[] {
+            "custamisation", "customization" }, new string[] {
+            "dag", "dog" }, new string[] {
+            "daly", "daily" }, new string[] {
+            "danguages", "dangerous" }, new string[] {
+            "deaft", "draft" }, new string[] {
+            "defence", "defense" }, new string[] {
+            "defenly", "defiantly" }, new string[] {
+            "definate", "definite" }, new string[] {
+            "definately", "definitely" }, new string[] {
+            "dependeble", "dependable" }, new string[] {
+            "descrption", "description" }, new string[] {
+            "descrptn", "description" }, new string[] {
+            "desparate", "desperate" }, new string[] {
+            "dessicate", "desiccate" }, new string[] {
+            "destint", "distant" }, new string[] {
+            "develepment", "developments" }, new string[] {
+            "developement", "development" }, new string[] {
+            "develpond", "development" }, new string[] {
+            "devulge", "divulge" }, new string[] {
+            "diagree", "disagree" }, new string[] {
+            "dieties", "deities" }, new string[] {
+            "dinasaur", "dinosaur" }, new string[] {
+            "dinasour", "dinosaur" }, new string[] {
+            "direcyly", "directly" }, new string[] {
+            "discuess", "discuss" }, new string[] {
+            "disect", "dissect" }, new string[] {
+            "disippate", "dissipate" }, new string[] {
+            "disition", "decision" }, new string[] {
+            "dispair", "despair" }, new string[] {
+            "disssicion", "discussion" }, new string[] {
+            "distarct", "distract" }, new string[] {
+            "distart", "distort" }, new string[] {
+            "distroy", "destroy" }, new string[] {
+            "documtations", "documentation" }, new string[] {
+            "doenload", "download" }, new string[] {
+            "dongle", "dangle" }, new string[] {
+            "doog", "dog" }, new string[] {
+            "dramaticly", "dramatically" }, new string[] {
+            "drunkeness", "drunkenness" }, new string[] {
+            "ductioneery", "dictionary" }, new string[] {
+            "dur", "due" }, new string[] {
+            "duren", "during" }, new string[] {
+            "dymatic", "dynamic" }, new string[] {
+            "dynaic", "dynamic" }, new string[] {
+            "ecstacy", "ecstasy" }, new string[] {
+            "efficat", "efficient" }, new string[] {
+            "efficity", "efficacy" }, new string[] {
+            "effots", "efforts" }, new string[] {
+            "egsistence", "existence" }, new string[] {
+            "eitiology", "etiology" }, new string[] {
+            "elagent", "elegant" }, new string[] {
+            "elligit", "elegant" }, new string[] {
+            "embarass", "embarrass" }, new string[] {
+            "embarassment", "embarrassment" }, new string[] {
+            "embaress", "embarrass" }, new string[] {
+            "encapsualtion", "encapsulation" }, new string[] {
+            "encyclapidia", "encyclopedia" }, new string[] {
+            "encyclopia", "encyclopedia" }, new string[] {
+            "engins", "engine" }, new string[] {
+            "enhence", "enhance" }, new string[] {
+            "enligtment", "Enlightenment" }, new string[] {
+            "ennuui", "ennui" }, new string[] {
+            "enought", "enough" }, new string[] {
+            "enventions", "inventions" }, new string[] {
+            "envireminakl", "environmental" }, new string[] {
+            "enviroment", "environment" }, new string[] {
+            "epitomy", "epitome" }, new string[] {
+            "equire", "acquire" }, new string[] {
+            "errara", "error" }, new string[] {
+            "erro", "error" }, new string[] {
+            "evaualtion", "evaluation" }, new string[] {
+            "evething", "everything" }, new string[] {
+            "evtually", "eventually" }, new string[] {
+            "excede", "exceed" }, new string[] {
+            "excercise", "exercise" }, new string[] {
+            "excpt", "except" }, new string[] {
+            "excution", "execution" }, new string[] {
+            "exhileration", "exhilaration" }, new string[] {
+            "existance", "existence" }, new string[] {
+            "expleyly", "explicitly" }, new string[] {
+            "explity", "explicitly" }, new string[] {
+            "expresso", "espresso" }, new string[] {
+            "exspidient", "expedient" }, new string[] {
+            "extions", "extensions" }, new string[] {
+            "factontion", "factorization" }, new string[] {
+            "failer", "failure" }, new string[] {
+            "famdasy", "fantasy" }, new string[] {
+            "faver", "favor" }, new string[] {
+            "faxe", "fax" }, new string[] {
+            "febuary", "february" }, new string[] {
+            "firey", "fiery" }, new string[] {
+            "fistival", "festival" }, new string[] {
+            "flatterring", "flattering" }, new string[] {
+            "fluk", "flux" }, new string[] {
+            "flukse", "flux" }, new string[] {
+            "fone", "phone" }, new string[] {
+            "forsee", "foresee" }, new string[] {
+            "frustartaion", "frustrating" }, new string[] {
+            "fuction", "function" }, new string[] {
+            "funetik", "phonetic" }, new string[] {
+            "futs", "guts" }, new string[] {
+            "gamne", "came" }, new string[] {
+            "gaurd", "guard" }, new string[] {
+            "generly", "generally" }, new string[] {
+            "ghandi", "gandhi" }, new string[] {
+            "goberment", "government" }, new string[] {
+            "gobernement", "government" }, new string[] {
+            "gobernment", "government" }, new string[] {
+            "gotton", "gotten" }, new string[] {
+            "gracefull", "graceful" }, new string[] {
+            "gradualy", "gradually" }, new string[] {
+            "grammer", "grammar" }, new string[] {
+            "hallo", "hello" }, new string[] {
+            "hapily", "happily" }, new string[] {
+            "harrass", "harass" }, new string[] {
+            "havne", "have" }, new string[] {
+            "heellp", "help" }, new string[] {
+            "heighth", "height" }, new string[] {
+            "hellp", "help" }, new string[] {
+            "helo", "hello" }, new string[] {
+            "herlo", "hello" }, new string[] {
+            "hifin", "hyphen" }, new string[] {
+            "hifine", "hyphen" }, new string[] {
+            "higer", "higher" }, new string[] {
+            "hiphine", "hyphen" }, new string[] {
+            "hippie", "hippy" }, new string[] {
+            "hippopotamous", "hippopotamus" }, new string[] {
+            "hlp", "help" }, new string[] {
+            "hourse", "horse" }, new string[] {
+            "houssing", "housing" }, new string[] {
+            "howaver", "however" }, new string[] {
+            "howver", "however" }, new string[] {
+            "humaniti", "humanity" }, new string[] {
+            "hyfin", "hyphen" }, new string[] {
+            "hypotathes", "hypothesis" }, new string[] {
+            "hypotathese", "hypothesis" }, new string[] {
+            "hystrical", "hysterical" }, new string[] {
+            "ident", "indent" }, new string[] {
+            "illegitament", "illegitimate" }, new string[] {
+            "imbed", "embed" }, new string[] {
+            "imediaetly", "immediately" }, new string[] {
+            "imfamy", "infamy" }, new string[] {
+            "immenant", "immanent" }, new string[] {
+            "implemtes", "implements" }, new string[] {
+            "inadvertant", "inadvertent" }, new string[] {
+            "incase", "in case" }, new string[] {
+            "incedious", "insidious" }, new string[] {
+            "incompleet", "incomplete" }, new string[] {
+            "incomplot", "incomplete" }, new string[] {
+            "inconvenant", "inconvenient" }, new string[] {
+            "inconvience", "inconvenience" }, new string[] {
+            "independant", "independent" }, new string[] {
+            "independenent", "independent" }, new string[] {
+            "indepnends", "independent" }, new string[] {
+            "indepth", "in depth" }, new string[] {
+            "indispensible", "indispensable" }, new string[] {
+            "inefficite", "inefficient" }, new string[] {
+            "inerface", "interface" }, new string[] {
+            "infact", "in fact" }, new string[] {
+            "influencial", "influential" }, new string[] {
+            "inital", "initial" }, new string[] {
+            "initinized", "initialized" }, new string[] {
+            "initized", "initialized" }, new string[] {
+            "innoculate", "inoculate" }, new string[] {
+            "insistant", "insistent" }, new string[] {
+            "insistenet", "insistent" }, new string[] {
+            "instulation", "installation" }, new string[] {
+            "intealignt", "intelligent" }, new string[] {
+            "intejilent", "intelligent" }, new string[] {
+            "intelegent", "intelligent" }, new string[] {
+            "intelegnent", "intelligent" }, new string[] {
+            "intelejent", "intelligent" }, new string[] {
+            "inteligent", "intelligent" }, new string[] {
+            "intelignt", "intelligent" }, new string[] {
+            "intellagant", "intelligent" }, new string[] {
+            "intellegent", "intelligent" }, new string[] {
+            "intellegint", "intelligent" }, new string[] {
+            "intellgnt", "intelligent" }, new string[] {
+            "intensionality", "intensionally" }, new string[] {
+            "interate", "iterate" }, new string[] {
+            "internation", "international" }, new string[] {
+            "interpretate", "interpret" }, new string[] {
+            "interpretter", "interpreter" }, new string[] {
+            "intertes", "interested" }, new string[] {
+            "intertesd", "interested" }, new string[] {
+            "invermeantial", "environmental" }, new string[] {
+            "irregardless", "regardless" }, new string[] {
+            "irresistable", "irresistible" }, new string[] {
+            "irritible", "irritable" }, new string[] {
+            "islams", "muslims" }, new string[] {
+            "isotrop", "isotope" }, new string[] {
+            "isreal", "israel" }, new string[] {
+            "johhn", "john" }, new string[] {
+            "judgement", "judgment" }, new string[] {
+            "kippur", "kipper" }, new string[] {
+            "knawing", "knowing" }, new string[] {
+            "latext", "latest" }, new string[] {
+            "leasve", "leave" }, new string[] {
+            "lesure", "leisure" }, new string[] {
+            "liasion", "lesion" }, new string[] {
+            "liason", "liaison" }, new string[] {
+            "libary", "library" }, new string[] {
+            "likly", "likely" }, new string[] {
+            "lilometer", "kilometer" }, new string[] {
+            "liquify", "liquefy" }, new string[] {
+            "lloyer", "layer" }, new string[] {
+            "lossing", "losing" }, new string[] {
+            "luser", "laser" }, new string[] {
+            "maintanence", "maintenance" }, new string[] {
+            "majaerly", "majority" }, new string[] {
+            "majoraly", "majority" }, new string[] {
+            "maks", "masks" }, new string[] {
+            "mandelbrot", "Mandelbrot" }, new string[] {
+            "mant", "want" }, new string[] {
+            "marshall", "marshal" }, new string[] {
+            "maxium", "maximum" }, new string[] {
+            "meory", "memory" }, new string[] {
+            "metter", "better" }, new string[] {
+            "mic", "mike" }, new string[] {
+            "midia", "media" }, new string[] {
+            "millenium", "millennium" }, new string[] {
+            "miniscule", "minuscule" }, new string[] {
+            "minkay", "monkey" }, new string[] {
+            "minum", "minimum" }, new string[] {
+            "mischievious", "mischievous" }, new string[] {
+            "misilous", "miscellaneous" }, new string[] {
+            "momento", "memento" }, new string[] {
+            "monkay", "monkey" }, new string[] {
+            "mosaik", "mosaic" }, new string[] {
+            "mostlikely", "most likely" }, new string[] {
+            "mousr", "mouser" }, new string[] {
+            "mroe", "more" }, new string[] {
+            "neccessary", "necessary" }, new string[] {
+            "necesary", "necessary" }, new string[] {
+            "necesser", "necessary" }, new string[] {
+            "neice", "niece" }, new string[] {
+            "neighbour", "neighbor" }, new string[] {
+            "nemonic", "pneumonic" }, new string[] {
+            "nevade", "Nevada" }, new string[] {
+            "nickleodeon", "nickelodeon" }, new string[] {
+            "nieve", "naive" }, new string[] {
+            "noone", "no one" }, new string[] {
+            "noticably", "noticeably" }, new string[] {
+            "notin", "not in" }, new string[] {
+            "nozled", "nuzzled" }, new string[] {
+            "objectsion", "objects" }, new string[] {
+            "obsfuscate", "obfuscate" }, new string[] {
+            "ocassion", "occasion" }, new string[] {
+            "occuppied", "occupied" }, new string[] {
+            "occurence", "occurrence" }, new string[] {
+            "octagenarian", "octogenarian" }, new string[] {
+            "olf", "old" }, new string[] {
+            "opposim", "opossum" }, new string[] {
+            "organise", "organize" }, new string[] {
+            "organiz", "organize" }, new string[] {
+            "orientate", "orient" }, new string[] {
+            "oscilascope", "oscilloscope" }, new string[] {
+            "oving", "moving" }, new string[] {
+            "paramers", "parameters" }, new string[] {
+            "parametic", "parameter" }, new string[] {
+            "paranets", "parameters" }, new string[] {
+            "partrucal", "particular" }, new string[] {
+            "pataphysical", "metaphysical" }, new string[] {
+            "patten", "pattern" }, new string[] {
+            "permissable", "permissible" }, new string[] {
+            "permition", "permission" }, new string[] {
+            "permmasivie", "permissive" }, new string[] {
+            "perogative", "prerogative" }, new string[] {
+            "persue", "pursue" }, new string[] {
+            "phantasia", "fantasia" }, new string[] {
+            "phenominal", "phenomenal" }, new string[] {
+            "picaresque", "picturesque" }, new string[] {
+            "playwrite", "playwright" }, new string[] {
+            "poeses", "poesies" }, new string[] {
+            "polation", "politician" }, new string[] {
+            "poligamy", "polygamy" }, new string[] {
+            "politict", "politic" }, new string[] {
+            "pollice", "police" }, new string[] {
+            "polypropalene", "polypropylene" }, new string[] {
+            "pompom", "pompon" }, new string[] {
+            "possable", "possible" }, new string[] {
+            "practicle", "practical" }, new string[] {
+            "pragmaticism", "pragmatism" }, new string[] {
+            "preceeding", "preceding" }, new string[] {
+            "precion", "precision" }, new string[] {
+            "precios", "precision" }, new string[] {
+            "preemptory", "peremptory" }, new string[] {
+            "prefices", "prefixes" }, new string[] {
+            "prefixt", "prefixed" }, new string[] {
+            "presbyterian", "Presbyterian" }, new string[] {
+            "presue", "pursue" }, new string[] {
+            "presued", "pursued" }, new string[] {
+            "privielage", "privilege" }, new string[] {
+            "priviledge", "privilege" }, new string[] {
+            "proceedures", "procedures" }, new string[] {
+            "pronensiation", "pronunciation" }, new string[] {
+            "pronisation", "pronunciation" }, new string[] {
+            "pronounciation", "pronunciation" }, new string[] {
+            "properally", "properly" }, new string[] {
+            "proplematic", "problematic" }, new string[] {
+            "protray", "portray" }, new string[] {
+            "pscolgst", "psychologist" }, new string[] {
+            "psicolagest", "psychologist" }, new string[] {
+            "psycolagest", "psychologist" }, new string[] {
+            "quoz", "quiz" }, new string[] {
+            "radious", "radius" }, new string[] {
+            "ramplily", "rampantly" }, new string[] {
+            "reccomend", "recommend" }, new string[] {
+            "reccona", "raccoon" }, new string[] {
+            "recieve", "receive" }, new string[] {
+            "reconise", "recognize" }, new string[] {
+            "rectangeles", "rectangle" }, new string[] {
+            "redign", "redesign" }, new string[] {
+            "reoccurring", "recurring" }, new string[] {
+            "repitition", "repetition" }, new string[] {
+            "replasments", "replacement" }, new string[] {
+            "reposable", "responsible" }, new string[] {
+            "reseblence", "resemblance" }, new string[] {
+            "respct", "respect" }, new string[] {
+            "respecally", "respectfully" }, new string[] {
+            "roon", "room" }, new string[] {
+            "rought", "roughly" }, new string[] {
+            "rsx", "RSX" }, new string[] {
+            "rudemtry", "rudimentary" }, new string[] {
+            "runnung", "running" }, new string[] {
+            "sacreligious", "sacrilegious" }, new string[] {
+            "saftly", "safely" }, new string[] {
+            "salut", "salute" }, new string[] {
+            "satifly", "satisfy" }, new string[] {
+            "scrabdle", "scrabble" }, new string[] {
+            "searcheable", "searchable" }, new string[] {
+            "secion", "section" }, new string[] {
+            "seferal", "several" }, new string[] {
+            "segements", "segments" }, new string[] {
+            "sence", "sense" }, new string[] {
+            "seperate", "separate" }, new string[] {
+            "sherbert", "sherbet" }, new string[] {
+            "sicolagest", "psychologist" }, new string[] {
+            "sieze", "seize" }, new string[] {
+            "simpfilty", "simplicity" }, new string[] {
+            "simplye", "simply" }, new string[] {
+            "singal", "signal" }, new string[] {
+            "sitte", "site" }, new string[] {
+            "situration", "situation" }, new string[] {
+            "slyph", "sylph" }, new string[] {
+            "smil", "smile" }, new string[] {
+            "snuck", "sneaked" }, new string[] {
+            "sometmes", "sometimes" }, new string[] {
+            "soonec", "sonic" }, new string[] {
+            "specificialy", "specifically" }, new string[] {
+            "spel", "spell" }, new string[] {
+            "spoak", "spoke" }, new string[] {
+            "sponsered", "sponsored" }, new string[] {
+            "stering", "steering" }, new string[] {
+            "straightjacket", "straitjacket" }, new string[] {
+            "stumach", "stomach" }, new string[] {
+            "stutent", "student" }, new string[] {
+            "styleguide", "style guide" }, new string[] {
+            "subisitions", "substitutions" }, new string[] {
+            "subjecribed", "subscribed" }, new string[] {
+            "subpena", "subpoena" }, new string[] {
+            "substations", "substitutions" }, new string[] {
+            "suger", "sugar" }, new string[] {
+            "supercede", "supersede" }, new string[] {
+            "superfulous", "superfluous" }, new string[] {
+            "susan", "Susan" }, new string[] {
+            "swimwear", "swim wear" }, new string[] {
+            "syncorization", "synchronization" }, new string[] {
+            "taff", "tough" }, new string[] {
+            "taht", "that" }, new string[] {
+            "tattos", "tattoos" }, new string[] {
+            "techniquely", "technically" }, new string[] {
+            "teh", "the" }, new string[] {
+            "tem", "team" }, new string[] {
+            "teo", "two" }, new string[] {
+            "teridical", "theoretical" }, new string[] {
+            "tesst", "test" }, new string[] {
+            "tets", "tests" }, new string[] {
+            "thanot", "than or" }, new string[] {
+            "theirselves", "themselves" }, new string[] {
+            "theridically", "theoretical" }, new string[] {
+            "thredically", "theoretically" }, new string[] {
+            "thruout", "throughout" }, new string[] {
+            "ths", "this" }, new string[] {
+            "titalate", "titillate" }, new string[] {
+            "tobagan", "tobaggon" }, new string[] {
+            "tommorrow", "tomorrow" }, new string[] {
+            "tomorow", "tomorrow" }, new string[] {
+            "tradegy", "tragedy" }, new string[] {
+            "trubbel", "trouble" }, new string[] {
+            "ttest", "test" }, new string[] {
+            "tunnellike", "tunnel like" }, new string[] {
+            "tured", "turned" }, new string[] {
+            "tyrrany", "tyranny" }, new string[] {
+            "unatourral", "unnatural" }, new string[] {
+            "unaturral", "unnatural" }, new string[] {
+            "unconisitional", "unconstitutional" }, new string[] {
+            "unconscience", "unconscious" }, new string[] {
+            "underladder", "under ladder" }, new string[] {
+            "unentelegible", "unintelligible" }, new string[] {
+            "unfortunently", "unfortunately" }, new string[] {
+            "unnaturral", "unnatural" }, new string[] {
+            "upcast", "up cast" }, new string[] {
+            "upmost", "utmost" }, new string[] {
+            "uranisium", "uranium" }, new string[] {
+            "verison", "version" }, new string[] {
+            "vinagarette", "vinaigrette" }, new string[] {
+            "volumptuous", "voluptuous" }, new string[] {
+            "volunteerism", "voluntarism" }, new string[] {
+            "volye", "volley" }, new string[] {
+            "wadting", "wasting" }, new string[] {
+            "waite", "wait" }, new string[] {
+            "wan't", "won't" }, new string[] {
+            "warloord", "warlord" }, new string[] {
+            "whaaat", "what" }, new string[] {
+            "whard", "ward" }, new string[] {
+            "whimp", "wimp" }, new string[] {
+            "wicken", "weaken" }, new string[] {
+            "wierd", "weird" }, new string[] {
+            "wrank", "rank" }, new string[] {
+            "writeen", "righten" }, new string[] {
+            "writting", "writing" }, new string[] {
+            "wundeews", "windows" }, new string[] {
+            "yeild", "yield" }, new string[] {
+            "youe", "your" }
+        };
+
+        /**
+         * A subset of FIXTURE generated by this test.
+         */
+        private static readonly string[][] MATCHES = { new string[] {
+            "Accosinly", "Occasionally" }, new string[] {
+            "Maddness", "Madness" }, new string[] {
+            "Occusionaly", "Occasionally" }, new string[] {
+            "Steffen", "Stephen" }, new string[] {
+            "Thw", "The" }, new string[] {
+            "Unformanlly", "Unfortunately" }, new string[] {
+            "Unfortally", "Unfortunately" }, new string[] {
+            "abilitey", "ability" }, new string[] {
+            "absorbtion", "absorption" }, new string[] {
+            "accidently", "accidentally" }, new string[] {
+            "accomodate", "accommodate" }, new string[] {
+            "acommadate", "accommodate" }, new string[] {
+            "acord", "accord" }, new string[] {
+            "adultry", "adultery" }, new string[] {
+            "aggresive", "aggressive" }, new string[] {
+            "alchohol", "alcohol" }, new string[] {
+            "alchoholic", "alcoholic" }, new string[] {
+            "allieve", "alive" }, new string[] {
+            "alot", "a lot" }, new string[] {
+            "alright", "all right" }, new string[] {
+            "amature", "amateur" }, new string[] {
+            "ambivilant", "ambivalent" }, new string[] {
+            "amourfous", "amorphous" }, new string[] {
+            "annoint", "anoint" }, new string[] {
+            "annonsment", "announcement" }, new string[] {
+            "annoyting", "anting" }, new string[] {
+            "annuncio", "announce" }, new string[] {
+            "anotomy", "anatomy" }, new string[] {
+            "antidesestablishmentarianism", "antidisestablishmentarianism" }, new string[] {
+            "antidisestablishmentarism", "antidisestablishmentarianism" }, new string[] {
+            "anynomous", "anonymous" }, new string[] {
+            "appelet", "applet" }, new string[] {
+            "appreceiated", "appreciated" }, new string[] {
+            "appresteate", "appreciate" }, new string[] {
+            "aquantance", "acquaintance" }, new string[] {
+            "aricticure", "architecture" }, new string[] {
+            "asterick", "asterisk" }, new string[] {
+            "asymetric", "asymmetric" }, new string[] {
+            "atentively", "attentively" }, new string[] {
+            "bankrot", "bankrupt" }, new string[] {
+            "basicly", "basically" }, new string[] {
+            "batallion", "battalion" }, new string[] {
+            "bbrose", "browse" }, new string[] {
+            "beauro", "bureau" }, new string[] {
+            "beaurocracy", "bureaucracy" }, new string[] {
+            "beggining", "beginning" }, new string[] {
+            "behaviour", "behavior" }, new string[] {
+            "beleive", "believe" }, new string[] {
+            "belive", "believe" }, new string[] {
+            "blait", "bleat" }, new string[] {
+            "bouyant", "buoyant" }, new string[] {
+            "boygot", "boycott" }, new string[] {
+            "brocolli", "broccoli" }, new string[] {
+            "buder", "butter" }, new string[] {
+            "budr", "butter" }, new string[] {
+            "budter", "butter" }, new string[] {
+            "buracracy", "bureaucracy" }, new string[] {
+            "burracracy", "bureaucracy" }, new string[] {
+            "buton", "button" }, new string[] {
+            "byby", "by by" }, new string[] {
+            "cauler", "caller" }, new string[] {
+            "ceasar", "caesar" }, new string[] {
+            "cemetary", "cemetery" }, new string[] {
+            "changeing", "changing" }, new string[] {
+            "cheet", "cheat" }, new string[] {
+            "cimplicity", "simplicity" }, new string[] {
+            "circumstaces", "circumstances" }, new string[] {
+            "clob", "club" }, new string[] {
+            "coaln", "colon" }, new string[] {
+            "colleaque", "colleague" }, new string[] {
+            "colloquilism", "colloquialism" }, new string[] {
+            "columne", "column" }, new string[] {
+            "comitmment", "commitment" }, new string[] {
+            "comitte", "committee" }, new string[] {
+            "comittmen", "commitment" }, new string[] {
+            "comittmend", "commitment" }, new string[] {
+            "commerciasl", "commercials" }, new string[] {
+            "commited", "committed" }, new string[] {
+            "commitee", "committee" }, new string[] {
+            "companys", "companies" }, new string[] {
+            "comupter", "computer" }, new string[] {
+            "concensus", "consensus" }, new string[] {
+            "confusionism", "confucianism" }, new string[] {
+            "congradulations", "congratulations" }, new string[] {
+            "contunie", "continue" }, new string[] {
+            "cooly", "coolly" }, new string[] {
+            "copping", "coping" }, new string[] {
+            "cosmoplyton", "cosmopolitan" }, new string[] {
+            "crasy", "crazy" }, new string[] {
+            "croke", "croak" }, new string[] {
+            "crucifiction", "crucifixion" }, new string[] {
+            "crusifed", "crucified" }, new string[] {
+            "cumba", "combo" }, new string[] {
+            "custamisation", "customization" }, new string[] {
+            "dag", "dog" }, new string[] {
+            "daly", "daily" }, new string[] {
+            "defence", "defense" }, new string[] {
+            "definate", "definite" }, new string[] {
+            "definately", "definitely" }, new string[] {
+            "dependeble", "dependable" }, new string[] {
+            "descrption", "description" }, new string[] {
+            "descrptn", "description" }, new string[] {
+            "desparate", "desperate" }, new string[] {
+            "dessicate", "desiccate" }, new string[] {
+            "destint", "distant" }, new string[] {
+            "develepment", "developments" }, new string[] {
+            "developement", "development" }, new string[] {
+            "develpond", "development" }, new string[] {
+            "devulge", "divulge" }, new string[] {
+            "dieties", "deities" }, new string[] {
+            "dinasaur", "dinosaur" }, new string[] {
+            "dinasour", "dinosaur" }, new string[] {
+            "discuess", "discuss" }, new string[] {
+            "disect", "dissect" }, new string[] {
+            "disippate", "dissipate" }, new string[] {
+            "disition", "decision" }, new string[] {
+            "dispair", "despair" }, new string[] {
+            "distarct", "distract" }, new string[] {
+            "distart", "distort" }, new string[] {
+            "distroy", "destroy" }, new string[] {
+            "doenload", "download" }, new string[] {
+            "dongle", "dangle" }, new string[] {
+            "doog", "dog" }, new string[] {
+            "dramaticly", "dramatically" }, new string[] {
+            "drunkeness", "drunkenness" }, new string[] {
+            "ductioneery", "dictionary" }, new string[] {
+            "ecstacy", "ecstasy" }, new string[] {
+            "egsistence", "existence" }, new string[] {
+            "eitiology", "etiology" }, new string[] {
+            "elagent", "elegant" }, new string[] {
+            "embarass", "embarrass" }, new string[] {
+            "embarassment", "embarrassment" }, new string[] {
+            "embaress", "embarrass" }, new string[] {
+            "encapsualtion", "encapsulation" }, new string[] {
+            "encyclapidia", "encyclopedia" }, new string[] {
+            "encyclopia", "encyclopedia" }, new string[] {
+            "engins", "engine" }, new string[] {
+            "enhence", "enhance" }, new string[] {
+            "ennuui", "ennui" }, new string[] {
+            "enventions", "inventions" }, new string[] {
+            "envireminakl", "environmental" }, new string[] {
+            "enviroment", "environment" }, new string[] {
+            "epitomy", "epitome" }, new string[] {
+            "equire", "acquire" }, new string[] {
+            "errara", "error" }, new string[] {
+            "evaualtion", "evaluation" }, new string[] {
+            "excede", "exceed" }, new string[] {
+            "excercise", "exercise" }, new string[] {
+            "excpt", "except" }, new string[] {
+            "exhileration", "exhilaration" }, new string[] {
+            "existance", "existence" }, new string[] {
+            "expleyly", "explicitly" }, new string[] {
+            "explity", "explicitly" }, new string[] {
+            "failer", "failure" }, new string[] {
+            "faver", "favor" }, new string[] {
+            "faxe", "fax" }, new string[] {
+            "firey", "fiery" }, new string[] {
+            "fistival", "festival" }, new string[] {
+            "flatterring", "flattering" }, new string[] {
+            "flukse", "flux" }, new string[] {
+            "fone", "phone" }, new string[] {
+            "forsee", "foresee" }, new string[] {
+            "frustartaion", "frustrating" }, new string[] {
+            "funetik", "phonetic" }, new string[] {
+            "gaurd", "guard" }, new string[] {
+            "generly", "generally" }, new string[] {
+            "ghandi", "gandhi" }, new string[] {
+            "gotton", "gotten" }, new string[] {
+            "gracefull", "graceful" }, new string[] {
+            "gradualy", "gradually" }, new string[] {
+            "grammer", "grammar" }, new string[] {
+            "hallo", "hello" }, new string[] {
+            "hapily", "happily" }, new string[] {
+            "harrass", "harass" }, new string[] {
+            "heellp", "help" }, new string[] {
+            "heighth", "height" }, new string[] {
+            "hellp", "help" }, new string[] {
+            "helo", "hello" }, new string[] {
+            "hifin", "hyphen" }, new string[] {
+            "hifine", "hyphen" }, new string[] {
+            "hiphine", "hyphen" }, new string[] {
+            "hippie", "hippy" }, new string[] {
+            "hippopotamous", "hippopotamus" }, new string[] {
+            "hourse", "horse" }, new string[] {
+            "houssing", "housing" }, new string[] {
+            "howaver", "however" }, new string[] {
+            "howver", "however" }, new string[] {
+            "humaniti", "humanity" }, new string[] {
+            "hyfin", "hyphen" }, new string[] {
+            "hystrical", "hysterical" }, new string[] {
+            "illegitament", "illegitimate" }, new string[] {
+            "imbed", "embed" }, new string[] {
+            "imediaetly", "immediately" }, new string[] {
+            "immenant", "immanent" }, new string[] {
+            "implemtes", "implements" }, new string[] {
+            "inadvertant", "inadvertent" }, new string[] {
+            "incase", "in case" }, new string[] {
+            "incedious", "insidious" }, new string[] {
+            "incompleet", "incomplete" }, new string[] {
+            "incomplot", "incomplete" }, new string[] {
+            "inconvenant", "inconvenient" }, new string[] {
+            "inconvience", "inconvenience" }, new string[] {
+            "independant", "independent" }, new string[] {
+            "independenent", "independent" }, new string[] {
+            "indepnends", "independent" }, new string[] {
+            "indepth", "in depth" }, new string[] {
+            "indispensible", "indispensable" }, new string[] {
+            "inefficite", "inefficient" }, new string[] {
+            "infact", "in fact" }, new string[] {
+            "influencial", "influential" }, new string[] {
+            "innoculate", "inoculate" }, new string[] {
+            "insistant", "insistent" }, new string[] {
+            "insistenet", "insistent" }, new string[] {
+            "instulation", "installation" }, new string[] {
+            "intealignt", "intelligent" }, new string[] {
+            "intelegent", "intelligent" }, new string[] {
+            "intelegnent", "intelligent" }, new string[] {
+            "intelejent", "intelligent" }, new string[] {
+            "inteligent", "intelligent" }, new string[] {
+            "intelignt", "intelligent" }, new string[] {
+            "intellagant", "intelligent" }, new string[] {
+            "intellegent", "intelligent" }, new string[] {
+            "intellegint", "intelligent" }, new string[] {
+            "intellgnt", "intelligent" }, new string[] {
+            "intensionality", "intensionally" }, new string[] {
+            "internation", "international" }, new string[] {
+            "interpretate", "interpret" }, new string[] {
+            "interpretter", "interpreter" }, new string[] {
+            "intertes", "interested" }, new string[] {
+            "intertesd", "interested" }, new string[] {
+            "invermeantial", "environmental" }, new string[] {
+            "irresistable", "irresistible" }, new string[] {
+            "irritible", "irritable" }, new string[] {
+            "isreal", "israel" }, new string[] {
+            "johhn", "john" }, new string[] {
+            "kippur", "kipper" }, new string[] {
+            "knawing", "knowing" }, new string[] {
+            "lesure", "leisure" }, new string[] {
+            "liasion", "lesion" }, new string[] {
+            "liason", "liaison" }, new string[] {
+            "likly", "likely" }, new string[] {
+            "liquify", "liquefy" }, new string[] {
+            "lloyer", "layer" }, new string[] {
+            "lossing", "losing" }, new string[] {
+            "luser", "laser" }, new string[] {
+            "maintanence", "maintenance" }, new string[] {
+            "mandelbrot", "Mandelbrot" }, new string[] {
+            "marshall", "marshal" }, new string[] {
+            "maxium", "maximum" }, new string[] {
+            "mic", "mike" }, new string[] {
+            "midia", "media" }, new string[] {
+            "millenium", "millennium" }, new string[] {
+            "miniscule", "minuscule" }, new string[] {
+            "minkay", "monkey" }, new string[] {
+            "mischievious", "mischievous" }, new string[] {
+            "momento", "memento" }, new string[] {
+            "monkay", "monkey" }, new string[] {
+            "mosaik", "mosaic" }, new string[] {
+            "mostlikely", "most likely" }, new string[] {
+            "mousr", "mouser" }, new string[] {
+            "mroe", "more" }, new string[] {
+            "necesary", "necessary" }, new string[] {
+            "necesser", "necessary" }, new string[] {
+            "neice", "niece" }, new string[] {
+            "neighbour", "neighbor" }, new string[] {
+            "nemonic", "pneumonic" }, new string[] {
+            "nevade", "Nevada" }, new string[] {
+            "nickleodeon", "nickelodeon" }, new string[] {
+            "nieve", "naive" }, new string[] {
+            "noone", "no one" }, new string[] {
+            "notin", "not in" }, new string[] {
+            "nozled", "nuzzled" }, new string[] {
+            "objectsion", "objects" }, new string[] {
+            "ocassion", "occasion" }, new string[] {
+            "occuppied", "occupied" }, new string[] {
+            "occurence", "occurrence" }, new string[] {
+            "octagenarian", "octogenarian" }, new string[] {
+            "opposim", "opossum" }, new string[] {
+            "organise", "organize" }, new string[] {
+            "organiz", "organize" }, new string[] {
+            "orientate", "orient" }, new string[] {
+            "oscilascope", "oscilloscope" }, new string[] {
+            "parametic", "parameter" }, new string[] {
+            "permissable", "permissible" }, new string[] {
+            "permmasivie", "permissive" }, new string[] {
+            "persue", "pursue" }, new string[] {
+            "phantasia", "fantasia" }, new string[] {
+            "phenominal", "phenomenal" }, new string[] {
+            "playwrite", "playwright" }, new string[] {
+            "poeses", "poesies" }, new string[] {
+            "poligamy", "polygamy" }, new string[] {
+            "politict", "politic" }, new string[] {
+            "pollice", "police" }, new string[] {
+            "polypropalene", "polypropylene" }, new string[] {
+            "possable", "possible" }, new string[] {
+            "practicle", "practical" }, new string[] {
+            "pragmaticism", "pragmatism" }, new string[] {
+            "preceeding", "preceding" }, new string[] {
+            "precios", "precision" }, new string[] {
+            "preemptory", "peremptory" }, new string[] {
+            "prefixt", "prefixed" }, new string[] {
+            "presbyterian", "Presbyterian" }, new string[] {
+            "presue", "pursue" }, new string[] {
+            "presued", "pursued" }, new string[] {
+            "privielage", "privilege" }, new string[] {
+            "priviledge", "privilege" }, new string[] {
+            "proceedures", "procedures" }, new string[] {
+            "pronensiation", "pronunciation" }, new string[] {
+            "pronounciation", "pronunciation" }, new string[] {
+            "properally", "properly" }, new string[] {
+            "proplematic", "problematic" }, new string[] {
+            "protray", "portray" }, new string[] {
+            "pscolgst", "psychologist" }, new string[] {
+            "psicolagest", "psychologist" }, new string[] {
+            "psycolagest", "psychologist" }, new string[] {
+            "quoz", "quiz" }, new string[] {
+            "radious", "radius" }, new string[] {
+            "reccomend", "recommend" }, new string[] {
+            "reccona", "raccoon" }, new string[] {
+            "recieve", "receive" }, new string[] {
+            "reconise", "recognize" }, new string[] {
+            "rectangeles", "rectangle" }, new string[] {
+            "reoccurring", "recurring" }, new string[] {
+            "repitition", "repetition" }, new string[] {
+            "replasments", "replacement" }, new string[] {
+            "respct", "respect" }, new string[] {
+            "respecally", "respectfully" }, new string[] {
+            "rsx", "RSX" }, new string[] {
+            "runnung", "running" }, new string[] {
+            "sacreligious", "sacrilegious" }, new string[] {
+            "salut", "salute" }, new string[] {
+            "searcheable", "searchable" }, new string[] {
+            "seferal", "several" }, new string[] {
+            "segements", "segments" }, new string[] {
+            "sence", "sense" }, new string[] {
+            "seperate", "separate" }, new string[] {
+            "sicolagest", "psychologist" }, new string[] {
+            "sieze", "seize" }, new string[] {
+            "simplye", "simply" }, new string[] {
+            "sitte", "site" }, new string[] {
+            "slyph", "sylph" }, new string[] {
+            "smil", "smile" }, new string[] {
+            "sometmes", "sometimes" }, new string[] {
+            "soonec", "sonic" }, new string[] {
+            "specificialy", "specifically" }, new string[] {
+            "spel", "spell" }, new string[] {
+            "spoak", "spoke" }, new string[] {
+            "sponsered", "sponsored" }, new string[] {
+            "stering", "steering" }, new string[] {
+            "straightjacket", "straitjacket" }, new string[] {
+            "stumach", "stomach" }, new string[] {
+            "stutent", "student" }, new string[] {
+            "styleguide", "style guide" }, new string[] {
+            "subpena", "subpoena" }, new string[] {
+            "substations", "substitutions" }, new string[] {
+            "supercede", "supersede" }, new string[] {
+            "superfulous", "superfluous" }, new string[] {
+            "susan", "Susan" }, new string[] {
+            "swimwear", "swim wear" }, new string[] {
+            "syncorization", "synchronization" }, new string[] {
+            "taff", "tough" }, new string[] {
+            "taht", "that" }, new string[] {
+            "tattos", "tattoos" }, new string[] {
+            "techniquely", "technically" }, new string[] {
+            "teh", "the" }, new string[] {
+            "tem", "team" }, new string[] {
+            "teo", "two" }, new string[] {
+            "teridical", "theoretical" }, new string[] {
+            "tesst", "test" }, new string[] {
+            "theridically", "theoretical" }, new string[] {
+            "thredically", "theoretically" }, new string[] {
+            "thruout", "throughout" }, new string[] {
+            "ths", "this" }, new string[] {
+            "titalate", "titillate" }, new string[] {
+            "tobagan", "tobaggon" }, new string[] {
+            "tommorrow", "tomorrow" }, new string[] {
+            "tomorow", "tomorrow" }, new string[] {
+            "trubbel", "trouble" }, new string[] {
+            "ttest", "test" }, new string[] {
+            "tyrrany", "tyranny" }, new string[] {
+            "unatourral", "unnatural" }, new string[] {
+            "unaturral", "unnatural" }, new string[] {
+            "unconisitional", "unconstitutional" }, new string[] {
+            "unconscience", "unconscious" }, new string[] {
+            "underladder", "under ladder" }, new string[] {
+            "unentelegible", "unintelligible" }, new string[] {
+            "unfortunently", "unfortunately" }, new string[] {
+            "unnaturral", "unnatural" }, new string[] {
+            "upcast", "up cast" }, new string[] {
+            "verison", "version" }, new string[] {
+            "vinagarette", "vinaigrette" }, new string[] {
+            "volunteerism", "voluntarism" }, new string[] {
+            "volye", "volley" }, new string[] {
+            "waite", "wait" }, new string[] {
+            "wan't", "won't" }, new string[] {
+            "warloord", "warlord" }, new string[] {
+            "whaaat", "what" }, new string[] {
+            "whard", "ward" }, new string[] {
+            "whimp", "wimp" }, new string[] {
+            "wicken", "weaken" }, new string[] {
+            "wierd", "weird" }, new string[] {
+            "wrank", "rank" }, new string[] {
+            "writeen", "righten" }, new string[] {
+            "writting", "writing" }, new string[] {
+            "wundeews", "windows" }, new string[] {
+            "yeild", "yield" },
+        };
+
+        /**
+         * Tests encoding APIs in one place.
+         */
+        private void AssertDoubleMetaphone(string expected, string source)
+        {
+            Assert.AreEqual(expected, this.StringEncoder.Encode(source));
+            //try
+            //{
+            //    Assert.AreEqual(expected, this.StringEncoder.Encode((object)source));
+            //}
+            //catch (EncoderException e) {
+            //    Assert.Fail("Unexpected expection: " + e);
+            //}
+            Assert.AreEqual(expected, this.StringEncoder.GetDoubleMetaphone(source));
+            Assert.AreEqual(expected, this.StringEncoder.GetDoubleMetaphone(source, false));
+        }
+
+        /**
+         * Tests encoding APIs in one place.
+         */
+        public void AssertDoubleMetaphoneAlt(string expected, string source)
+        {
+            Assert.AreEqual(expected, this.StringEncoder.GetDoubleMetaphone(source, true));
+        }
+
+        public void DoubleMetaphoneEqualTest(string[][] pairs, bool useAlternate)
+        {
+            this.ValidateFixture(pairs);
+            foreach (string[] pair in pairs)
+            {
+                String name0 = pair[0];
+                String name1 = pair[1];
+                String failMsg = "Expected match between " + name0 + " and " + name1 + " (use alternate: " + useAlternate + ")";
+                Assert.True(this.StringEncoder.IsDoubleMetaphoneEqual(name0, name1, useAlternate), failMsg);
+                Assert.True(this.StringEncoder.IsDoubleMetaphoneEqual(name1, name0, useAlternate), failMsg);
+                if (!useAlternate)
+                {
+                    Assert.True(this.StringEncoder.IsDoubleMetaphoneEqual(name0, name1), failMsg);
+                    Assert.True(this.StringEncoder.IsDoubleMetaphoneEqual(name1, name0), failMsg);
+                }
+            }
+        }
+
+        public void DoubleMetaphoneNotEqualTest(bool alternate)
+        {
+            Assert.False(this.StringEncoder.IsDoubleMetaphoneEqual("Brain", "Band", alternate));
+            Assert.False(this.StringEncoder.IsDoubleMetaphoneEqual("Band", "Brain", alternate));
+
+            if (!alternate)
+            {
+                Assert.False(this.StringEncoder.IsDoubleMetaphoneEqual("Brain", "Band"));
+                Assert.False(this.StringEncoder.IsDoubleMetaphoneEqual("Band", "Brain"));
+            }
+        }
+
+        protected override DoubleMetaphone CreateStringEncoder()
+        {
+            return new DoubleMetaphone();
+        }
+
+        [Test]
+        public void TestDoubleMetaphone()
+        {
+            AssertDoubleMetaphone("TSTN", "testing");
+            AssertDoubleMetaphone("0", "The");
+            AssertDoubleMetaphone("KK", "quick");
+            AssertDoubleMetaphone("PRN", "brown");
+            AssertDoubleMetaphone("FKS", "fox");
+            AssertDoubleMetaphone("JMPT", "jumped");
+            AssertDoubleMetaphone("AFR", "over");
+            AssertDoubleMetaphone("0", "the");
+            AssertDoubleMetaphone("LS", "lazy");
+            AssertDoubleMetaphone("TKS", "dogs");
+            AssertDoubleMetaphone("MKFR", "MacCafferey");
+            AssertDoubleMetaphone("STFN", "Stephan");
+            AssertDoubleMetaphone("KSSK", "Kuczewski");
+            AssertDoubleMetaphone("MKLL", "McClelland");
+            AssertDoubleMetaphone("SNHS", "san jose");
+            AssertDoubleMetaphone("SNFP", "xenophobia");
+
+            AssertDoubleMetaphoneAlt("TSTN", "testing");
+            AssertDoubleMetaphoneAlt("T", "The");
+            AssertDoubleMetaphoneAlt("KK", "quick");
+            AssertDoubleMetaphoneAlt("PRN", "brown");
+            AssertDoubleMetaphoneAlt("FKS", "fox");
+            AssertDoubleMetaphoneAlt("AMPT", "jumped");
+            AssertDoubleMetaphoneAlt("AFR", "over");
+            AssertDoubleMetaphoneAlt("T", "the");
+            AssertDoubleMetaphoneAlt("LS", "lazy");
+            AssertDoubleMetaphoneAlt("TKS", "dogs");
+            AssertDoubleMetaphoneAlt("MKFR", "MacCafferey");
+            AssertDoubleMetaphoneAlt("STFN", "Stephan");
+            AssertDoubleMetaphoneAlt("KXFS", "Kutchefski");
+            AssertDoubleMetaphoneAlt("MKLL", "McClelland");
+            AssertDoubleMetaphoneAlt("SNHS", "san jose");
+            AssertDoubleMetaphoneAlt("SNFP", "xenophobia");
+            AssertDoubleMetaphoneAlt("FKR", "Fokker");
+            AssertDoubleMetaphoneAlt("AK", "Joqqi");
+            AssertDoubleMetaphoneAlt("HF", "Hovvi");
+            AssertDoubleMetaphoneAlt("XRN", "Czerny");
+        }
+
+        [Test]
+        public void TestEmpty()
+        {
+            Assert.AreEqual(null, this.StringEncoder.GetDoubleMetaphone(null));
+            Assert.AreEqual(null, this.StringEncoder.GetDoubleMetaphone(""));
+            Assert.AreEqual(null, this.StringEncoder.GetDoubleMetaphone(" "));
+            Assert.AreEqual(null, this.StringEncoder.GetDoubleMetaphone("\t\n\r "));
+        }
+
+        /**
+         * Test setting maximum length
+         */
+        [Test]
+        public void TestSetMaxCodeLength()
+        {
+            String value = "jumped";
+
+            DoubleMetaphone doubleMetaphone = new DoubleMetaphone();
+
+            // Sanity check of default settings
+            Assert.AreEqual(4, doubleMetaphone.MaxCodeLen, "Default Max Code Length");
+            Assert.AreEqual("JMPT", doubleMetaphone.GetDoubleMetaphone(value, false), "Default Primary");
+            Assert.AreEqual("AMPT", doubleMetaphone.GetDoubleMetaphone(value, true), "Default Alternate");
+
+            // Check setting Max Code Length
+            doubleMetaphone.MaxCodeLen = (3);
+            Assert.AreEqual(3, doubleMetaphone.MaxCodeLen, "Set Max Code Length");
+            Assert.AreEqual("JMP", doubleMetaphone.GetDoubleMetaphone(value, false), "Max=3 Primary");
+            Assert.AreEqual("AMP", doubleMetaphone.GetDoubleMetaphone(value, true), "Max=3 Alternate");
+        }
+
+        [Test]
+        public void TestIsDoubleMetaphoneEqualBasic()
+        {
+            string[][]
+        testFixture = { new string[] { "Case", "case" }, new string[] {
+                "CASE", "Case" }, new string[]{
+                "caSe", "cAsE" }, new string[]{
+                "cookie", "quick" }, new string[]{
+                "quick", "cookie" }, new string[]{
+                "Brian", "Bryan" }, new string[]{
+                "Auto", "Otto" }, new string[]{
+                "Steven", "Stefan" }, new string[]{
+                "Philipowitz", "Filipowicz" }
+        };
+            DoubleMetaphoneEqualTest(testFixture, false);
+            DoubleMetaphoneEqualTest(testFixture, true);
+        }
+
+        /**
+         * Example in the original article but failures in this Java impl:
+         */
+        [Test]
+        public void TestIsDoubleMetaphoneEqualExtended1()
+        {
+            //        String[][] testFixture = new String[][] { { "Smith", "Schmidt" }
+            //        };
+            //        doubleMetaphoneEqualTest(testFixture, false);
+            //        doubleMetaphoneEqualTest(testFixture, true);
+        }
+
+        [Test]
+        public void TestIsDoubleMetaphoneEqualExtended2()
+        {
+            string[][]
+        testFixture = { new string[] { "Jablonski", "Yablonsky" }
+        };
+            //doubleMetaphoneEqualTest(testFixture, false);
+            DoubleMetaphoneEqualTest(testFixture, true);
+        }
+
+        /**
+         * Used to generate the MATCHES array and test possible matches from the
+         * FIXTURE array.
+         */
+        [Test]
+        public void TestIsDoubleMetaphoneEqualExtended3()
+        {
+            this.ValidateFixture(FIXTURE);
+            StringBuilder failures = new StringBuilder();
+            StringBuilder matches = new StringBuilder();
+            String cr = Environment.NewLine;
+            matches.Append("private static final String[][] MATCHES = {" + cr);
+            int failCount = 0;
+            for (int i = 0; i < FIXTURE.Length; i++)
+            {
+                String name0 = FIXTURE[i][0];
+                String name1 = FIXTURE[i][1];
+                bool match1 = this.StringEncoder.IsDoubleMetaphoneEqual(name0, name1, false);
+                bool match2 = this.StringEncoder.IsDoubleMetaphoneEqual(name0, name1, true);
+                if (match1 == false && match2 == false)
+                {
+                    string failMsg = "[" + i + "] " + name0 + " and " + name1 + cr;
+                    failures.Append(failMsg);
+                    failCount++;
+                }
+                else
+                {
+                    matches.Append("{\"" + name0 + "\", \"" + name1 + "\"}," + cr);
+                }
+            }
+            matches.Append("};");
+            // Turn on to print a new MATCH array
+            //System.out.println(matches.toString());
+            if (failCount > 0)
+            {
+                // Turn on to see which pairs do NOT match.
+                // String msg = failures.toString();
+                //fail(failCount + " failures out of " + FIXTURE.length + ". The
+                // following could be made to match: " + cr + msg);
+            }
+        }
+
+        [Test]
+        public void TestIsDoubleMetaphoneEqualWithMATCHES()
+        {
+            this.ValidateFixture(MATCHES);
+            for (int i = 0; i < MATCHES.Length; i++)
+            {
+                String name0 = MATCHES[i][0];
+                String name1 = MATCHES[i][1];
+                bool match1 = this.StringEncoder.IsDoubleMetaphoneEqual(name0, name1, false);
+                bool match2 = this.StringEncoder.IsDoubleMetaphoneEqual(name0, name1, true);
+                if (match1 == false && match2 == false)
+                {
+                    Assert.Fail("Expected match [" + i + "] " + name0 + " and " + name1);
+                }
+            }
+        }
+
+        [Test]
+        public void TestIsDoubleMetaphoneNotEqual()
+        {
+            DoubleMetaphoneNotEqualTest(false);
+            DoubleMetaphoneNotEqualTest(true);
+        }
+
+        [Test]
+        public void TestCCedilla()
+        {
+            Assert.True(this.StringEncoder.IsDoubleMetaphoneEqual("\u00e7", "S")); // c-cedilla
+        }
+
+        [Test]
+        public void TestNTilde()
+        {
+            Assert.True(this.StringEncoder.IsDoubleMetaphoneEqual("\u00f1", "N")); // n-tilde
+        }
+
+        public void ValidateFixture(string[][] pairs)
+        {
+            if (pairs.Length == 0)
+            {
+                Assert.Fail("Test fixture is empty");
+            }
+            for (int i = 0; i < pairs.Length; i++)
+            {
+                if (pairs[i].Length != 2)
+                {
+                    Assert.Fail("Error in test fixture in the data array at index " + i);
+                }
+            }
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Tests.Analysis.Phonetic/Language/MatchRatingApproachEncoderTest.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Phonetic/Language/MatchRatingApproachEncoderTest.cs b/src/Lucene.Net.Tests.Analysis.Phonetic/Language/MatchRatingApproachEncoderTest.cs
new file mode 100644
index 0000000..1c5cb1a
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Phonetic/Language/MatchRatingApproachEncoderTest.cs
@@ -0,0 +1,609 @@
+using NUnit.Framework;
+
+namespace Lucene.Net.Analysis.Phonetic.Language
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Series of tests for the Match Rating Approach algorithm.
+    /// <para/>
+    /// General naming nomenclature for the test is of the form:
+    /// GeneralMetadataOnTheTestArea_ActualTestValues_ExpectedResult
+    /// <para/>
+    /// An unusual value is indicated by the term "corner case"
+    /// </summary>
+    public class MatchRatingApproachEncoderTest : StringEncoderAbstractTest<MatchRatingApproachEncoder>
+    {
+        // ********** BEGIN REGION - TEST SUPPORT METHODS
+
+        [Test]
+        public void TestAccentRemoval_AllLower_SuccessfullyRemoved()
+        {
+            Assert.AreEqual("aeiou", this.StringEncoder.RemoveAccents("áéíóú"));
+        }
+
+        [Test]
+        public void TestAccentRemoval_WithSpaces_SuccessfullyRemovedAndSpacesInvariant()
+        {
+            Assert.AreEqual("ae io  u", this.StringEncoder.RemoveAccents("áé íó  ú"));
+        }
+
+        [Test]
+        public void TestAccentRemoval_UpperandLower_SuccessfullyRemovedAndCaseInvariant()
+        {
+            Assert.AreEqual("AeiOuu", this.StringEncoder.RemoveAccents("ÁeíÓuu"));
+        }
+
+        [Test]
+        public void TestAccentRemoval_MixedWithUnusualChars_SuccessfullyRemovedAndUnusualcharactersInvariant()
+        {
+            Assert.AreEqual("A-e'i.,o&u", this.StringEncoder.RemoveAccents("Á-e'í.,ó&ú"));
+        }
+
+        [Test]
+        public void TestAccentRemoval_GerSpanFrenMix_SuccessfullyRemoved()
+        {
+            Assert.AreEqual("aeoußAEOUnNa", this.StringEncoder.RemoveAccents("äëöüßÄËÖÜñÑà"));
+        }
+
+        [Test]
+        public void TestAccentRemoval_ComprehensiveAccentMix_AllSuccessfullyRemoved()
+        {
+            Assert.AreEqual("E,E,E,E,U,U,I,I,A,A,O,e,e,e,e,u,u,i,i,a,a,o,c",
+                    this.StringEncoder.RemoveAccents("È,É,Ê,Ë,Û,Ù,Ï,Î,À,Â,Ô,è,é,ê,ë,û,ù,ï,î,à,â,ô,ç"));
+        }
+
+        [Test]
+        public void TestAccentRemovalNormalString_NoChange()
+        {
+            Assert.AreEqual("Colorless green ideas sleep furiously", this.StringEncoder.RemoveAccents("Colorless green ideas sleep furiously"));
+        }
+
+        [Test]
+        public void TestAccentRemoval_NINO_NoChange()
+        {
+            Assert.AreEqual("", this.StringEncoder.RemoveAccents(""));
+        }
+
+        [Test]
+        public void TestAccentRemoval_NullValue_ReturnNullSuccessfully()
+        {
+            Assert.AreEqual(null, this.StringEncoder.RemoveAccents(null));
+        }
+
+        [Test]
+        public void TestRemoveSingleDoubleConsonants_BUBLE_RemovedSuccessfully()
+        {
+            Assert.AreEqual("BUBLE", this.StringEncoder.RemoveDoubleConsonants("BUBBLE"));
+        }
+
+        [Test]
+        public void TestRemoveDoubleConsonants_MISSISSIPPI_RemovedSuccessfully()
+        {
+            Assert.AreEqual("MISISIPI", this.StringEncoder.RemoveDoubleConsonants("MISSISSIPPI"));
+        }
+
+        [Test]
+        public void TestRemoveDoubleDoubleVowel_BEETLE_NotRemoved()
+        {
+            Assert.AreEqual("BEETLE", this.StringEncoder.RemoveDoubleConsonants("BEETLE"));
+        }
+
+        [Test]
+        public void TestIsVowel_CapitalA_ReturnsTrue()
+        {
+            Assert.True(this.StringEncoder.IsVowel("A"));
+        }
+
+        [Test]
+        public void TestIsVowel_SmallD_ReturnsFalse()
+        {
+            Assert.False(this.StringEncoder.IsVowel("d"));
+        }
+
+        [Test]
+        public void TestRemoveVowel_ALESSANDRA_Returns_ALSSNDR()
+        {
+            Assert.AreEqual("ALSSNDR", this.StringEncoder.RemoveVowels("ALESSANDRA"));
+        }
+
+        [Test]
+        public void TestRemoveVowel__AIDAN_Returns_ADN()
+        {
+            Assert.AreEqual("ADN", this.StringEncoder.RemoveVowels("AIDAN"));
+        }
+
+        [Test]
+        public void TestRemoveVowel__DECLAN_Returns_DCLN()
+        {
+            Assert.AreEqual("DCLN", this.StringEncoder.RemoveVowels("DECLAN"));
+        }
+
+        [Test]
+        public void TestGetFirstLast3__ALEXANDER_Returns_Aleder()
+        {
+            Assert.AreEqual("Aleder", this.StringEncoder.GetFirst3Last3("Alexzander"));
+        }
+
+        [Test]
+        public void TestGetFirstLast3_PETE_Returns_PETE()
+        {
+            Assert.AreEqual("PETE", this.StringEncoder.GetFirst3Last3("PETE"));
+        }
+
+        [Test]
+        public void TestleftTorightThenRightToLeft_ALEXANDER_ALEXANDRA_Returns4()
+        {
+            Assert.AreEqual(4, this.StringEncoder.LeftToRightThenRightToLeftProcessing("ALEXANDER", "ALEXANDRA"));
+        }
+
+        [Test]
+        public void TestleftTorightThenRightToLeft_EINSTEIN_MICHAELA_Returns0()
+        {
+            Assert.AreEqual(0, this.StringEncoder.LeftToRightThenRightToLeftProcessing("EINSTEIN", "MICHAELA"));
+        }
+
+        [Test]
+        public void TestGetMinRating_7_Return4_Successfully()
+        {
+            Assert.AreEqual(4, this.StringEncoder.GetMinRating(7));
+        }
+
+        [Test]
+        public void TestGetMinRating_1_Returns5_Successfully()
+        {
+            Assert.AreEqual(5, this.StringEncoder.GetMinRating(1));
+        }
+
+        [Test]
+        public void TestGetMinRating_2_Returns5_Successfully()
+        {
+            Assert.AreEqual(5, this.StringEncoder.GetMinRating(2));
+        }
+
+        [Test]
+        public void TestGetMinRating_5_Returns4_Successfully()
+        {
+            Assert.AreEqual(4, this.StringEncoder.GetMinRating(5));
+        }
+
+        [Test]
+        public void TestGetMinRating_5_Returns4_Successfully2()
+        {
+            Assert.AreEqual(4, this.StringEncoder.GetMinRating(5));
+        }
+
+        [Test]
+        public void TestGetMinRating_6_Returns4_Successfully()
+        {
+            Assert.AreEqual(4, this.StringEncoder.GetMinRating(6));
+        }
+
+        [Test]
+        public void TestGetMinRating_7_Returns4_Successfully()
+        {
+            Assert.AreEqual(4, this.StringEncoder.GetMinRating(7));
+        }
+
+        [Test]
+        public void TestGetMinRating_8_Returns3_Successfully()
+        {
+            Assert.AreEqual(3, this.StringEncoder.GetMinRating(8));
+        }
+
+        [Test]
+        public void TestGetMinRating_10_Returns3_Successfully()
+        {
+            Assert.AreEqual(3, this.StringEncoder.GetMinRating(10));
+        }
+
+        [Test]
+        public void TestGetMinRating_11_Returns_3_Successfully()
+        {
+            Assert.AreEqual(3, this.StringEncoder.GetMinRating(11));
+        }
+
+        [Test]
+        public void TestGetMinRating_13_Returns_1_Successfully()
+        {
+            Assert.AreEqual(1, this.StringEncoder.GetMinRating(13));
+        }
+
+        [Test]
+        public void TestCleanName_SuccessfullyClean()
+        {
+            Assert.AreEqual("THISISATEST", this.StringEncoder.CleanName("This-ís   a t.,es &t"));
+        }
+
+        [Test]
+        public void TestIsVowel_SingleVowel_ReturnsTrue()
+        {
+            Assert.True(this.StringEncoder.IsVowel(("I")));
+        }
+
+        [Test]
+        public void TestIsEncodeEquals_CornerCase_SecondNameNothing_ReturnsFalse()
+        {
+            Assert.False(this.StringEncoder.IsEncodeEquals("test", ""));
+        }
+
+        [Test]
+        public void TestIsEncodeEquals_CornerCase_FirstNameNothing_ReturnsFalse()
+        {
+            Assert.False(this.StringEncoder.IsEncodeEquals("", "test"));
+        }
+
+        [Test]
+        public void TestIsEncodeEquals_CornerCase_SecondNameJustSpace_ReturnsFalse()
+        {
+            Assert.False(this.StringEncoder.IsEncodeEquals("test", " "));
+        }
+
+        [Test]
+        public void TestIsEncodeEquals_CornerCase_FirstNameJustSpace_ReturnsFalse()
+        {
+            Assert.False(this.StringEncoder.IsEncodeEquals(" ", "test"));
+        }
+
+        [Test]
+        public void TestIsEncodeEquals_CornerCase_SecondNameNull_ReturnsFalse()
+        {
+            Assert.False(this.StringEncoder.IsEncodeEquals("test", null));
+        }
+
+        [Test]
+        public void TestIsEncodeEquals_CornerCase_FirstNameNull_ReturnsFalse()
+        {
+            Assert.False(this.StringEncoder.IsEncodeEquals(null, "test"));
+        }
+
+        [Test]
+        public void TestIsEncodeEquals_CornerCase_FirstNameJust1Letter_ReturnsFalse()
+        {
+            Assert.False(this.StringEncoder.IsEncodeEquals("t", "test"));
+        }
+
+        [Test]
+        public void TestIsEncodeEqualsSecondNameJust1Letter_ReturnsFalse()
+        {
+            Assert.False(this.StringEncoder.IsEncodeEquals("test", "t"));
+        }
+
+        // ***** END REGION - TEST SUPPORT METHODS
+
+        // ***** BEGIN REGION - TEST GET MRA ENCODING
+
+        [Test]
+        public void TestGetEncoding_HARPER_HRPR()
+        {
+            Assert.AreEqual("HRPR", this.StringEncoder.Encode("HARPER"));
+        }
+
+        [Test]
+        public void TestGetEncoding_SMITH_to_SMTH()
+        {
+            Assert.AreEqual("SMTH", this.StringEncoder.Encode("Smith"));
+        }
+
+        [Test]
+        public void TestGetEncoding_SMYTH_to_SMYTH()
+        {
+            Assert.AreEqual("SMYTH", this.StringEncoder.Encode("Smyth"));
+        }
+
+        [Test]
+        public void TestGetEncoding_Space_to_Nothing()
+        {
+            Assert.AreEqual("", this.StringEncoder.Encode(" "));
+        }
+
+        [Test]
+        public void TestGetEncoding_NoSpace_to_Nothing()
+        {
+            Assert.AreEqual("", this.StringEncoder.Encode(""));
+        }
+
+        [Test]
+        public void TestGetEncoding_Null_to_Nothing()
+        {
+            Assert.AreEqual("", this.StringEncoder.Encode(null));
+        }
+
+        [Test]
+        public void TestGetEncoding_One_Letter_to_Nothing()
+        {
+            Assert.AreEqual("", this.StringEncoder.Encode("E"));
+        }
+
+        [Test]
+        public void TestCompareNameNullSpace_ReturnsFalseSuccessfully()
+        {
+            Assert.False(StringEncoder.IsEncodeEquals(null, " "));
+        }
+
+        [Test]
+        public void TestCompareNameSameNames_ReturnsFalseSuccessfully()
+        {
+            Assert.True(StringEncoder.IsEncodeEquals("John", "John"));
+        }
+
+        // ***** END REGION - TEST GET MRA ENCODING
+
+        // ***** BEGIN REGION - TEST GET MRA COMPARISONS
+
+        [Test]
+        public void TestCompare_SMITH_SMYTH_SuccessfullyMatched()
+        {
+            Assert.True(this.StringEncoder.IsEncodeEquals("smith", "smyth"));
+        }
+
+        [Test]
+        public void TestCompare_BURNS_BOURNE_SuccessfullyMatched()
+        {
+            Assert.True(this.StringEncoder.IsEncodeEquals("Burns", "Bourne"));
+        }
+
+        [Test]
+        public void TestCompare_ShortNames_AL_ED_WorksButNoMatch()
+        {
+            Assert.False(this.StringEncoder.IsEncodeEquals("Al", "Ed"));
+        }
+
+        [Test]
+        public void TestCompare_CATHERINE_KATHRYN_SuccessfullyMatched()
+        {
+            Assert.True(this.StringEncoder.IsEncodeEquals("Catherine", "Kathryn"));
+        }
+
+        [Test]
+        public void TestCompare_BRIAN_BRYAN_SuccessfullyMatched()
+        {
+            Assert.True(this.StringEncoder.IsEncodeEquals("Brian", "Bryan"));
+        }
+
+        [Test]
+        public void TestCompare_SEAN_SHAUN_SuccessfullyMatched()
+        {
+            Assert.True(this.StringEncoder.IsEncodeEquals("Séan", "Shaun"));
+        }
+
+        [Test]
+        public void TestCompare_COLM_COLIN_WithAccentsAndSymbolsAndSpaces_SuccessfullyMatched()
+        {
+            Assert.True(this.StringEncoder.IsEncodeEquals("Cólm.   ", "C-olín"));
+        }
+
+        [Test]
+        public void TestCompare_STEPHEN_STEVEN_SuccessfullyMatched()
+        {
+            Assert.True(this.StringEncoder.IsEncodeEquals("Stephen", "Steven"));
+        }
+
+        [Test]
+        public void TestCompare_STEVEN_STEFAN_SuccessfullyMatched()
+        {
+            Assert.True(this.StringEncoder.IsEncodeEquals("Steven", "Stefan"));
+        }
+
+        [Test]
+        public void TestCompare_STEPHEN_STEFAN_SuccessfullyMatched()
+        {
+            Assert.True(this.StringEncoder.IsEncodeEquals("Stephen", "Stefan"));
+        }
+
+        [Test]
+        public void TestCompare_SAM_SAMUEL_SuccessfullyMatched()
+        {
+            Assert.True(this.StringEncoder.IsEncodeEquals("Sam", "Samuel"));
+        }
+
+        [Test]
+        public void TestCompare_MICKY_MICHAEL_SuccessfullyMatched()
+        {
+            Assert.True(this.StringEncoder.IsEncodeEquals("Micky", "Michael"));
+        }
+
+        [Test]
+        public void TestCompare_OONA_OONAGH_SuccessfullyMatched()
+        {
+            Assert.True(this.StringEncoder.IsEncodeEquals("Oona", "Oonagh"));
+        }
+
+        [Test]
+        public void TestCompare_SOPHIE_SOFIA_SuccessfullyMatched()
+        {
+            Assert.True(this.StringEncoder.IsEncodeEquals("Sophie", "Sofia"));
+        }
+
+        [Test]
+        public void TestCompare_FRANCISZEK_FRANCES_SuccessfullyMatched()
+        {
+            Assert.True(this.StringEncoder.IsEncodeEquals("Franciszek", "Frances"));
+        }
+
+        [Test]
+        public void TestCompare_TOMASZ_TOM_SuccessfullyMatched()
+        {
+            Assert.True(this.StringEncoder.IsEncodeEquals("Tomasz", "tom"));
+        }
+
+        [Test]
+        public void TestCompare_SmallInput_CARK_Kl_SuccessfullyMatched()
+        {
+            Assert.True(this.StringEncoder.IsEncodeEquals("Kl", "Karl"));
+        }
+
+        [Test]
+        public void TestCompareNameToSingleLetter_KARL_C_DoesNotMatch()
+        {
+            Assert.False(this.StringEncoder.IsEncodeEquals("Karl", "C"));
+        }
+
+        [Test]
+        public void TestCompare_ZACH_ZAKARIA_SuccessfullyMatched()
+        {
+            Assert.True(this.StringEncoder.IsEncodeEquals("Zach", "Zacharia"));
+        }
+
+        [Test]
+        public void TestCompare_KARL_ALESSANDRO_DoesNotMatch()
+        {
+            Assert.False(this.StringEncoder.IsEncodeEquals("Karl", "Alessandro"));
+        }
+
+        [Test]
+        public void TestCompare_Forenames_UNA_OONAGH_ShouldSuccessfullyMatchButDoesNot()
+        {
+            Assert.False(this.StringEncoder.IsEncodeEquals("Úna", "Oonagh")); // Disappointing
+        }
+
+        // ***** Begin Region - Test Get Encoding - Surnames
+
+        [Test]
+        public void TestCompare_Surname_OSULLIVAN_OSUILLEABHAIN_SuccessfulMatch()
+        {
+            Assert.True(this.StringEncoder.IsEncodeEquals("O'Sullivan", "Ó ' Súilleabháin"));
+        }
+
+        [Test]
+        public void TestCompare_LongSurnames_MORIARTY_OMUIRCHEARTAIGH_DoesNotSuccessfulMatch()
+        {
+            Assert.False(this.StringEncoder.IsEncodeEquals("Moriarty", "OMuircheartaigh"));
+        }
+
+        [Test]
+        public void TestCompare_LongSurnames_OMUIRCHEARTAIGH_OMIREADHAIGH_SuccessfulMatch()
+        {
+            Assert.True(this.StringEncoder.IsEncodeEquals("o'muireadhaigh", "Ó 'Muircheartaigh "));
+        }
+
+        [Test]
+        public void TestCompare_Surname_COOPERFLYNN_SUPERLYN_SuccessfullyMatched()
+        {
+            Assert.True(this.StringEncoder.IsEncodeEquals("Cooper-Flynn", "Super-Lyn"));
+        }
+
+        [Test]
+        public void TestCompare_Surname_HAILEY_HALLEY_SuccessfullyMatched()
+        {
+            Assert.True(this.StringEncoder.IsEncodeEquals("Hailey", "Halley"));
+        }
+
+        // **** BEGIN YIDDISH/SLAVIC SECTION ****
+
+        [Test]
+        public void TestCompare_Surname_AUERBACH_UHRBACH_SuccessfullyMatched()
+        {
+            Assert.True(this.StringEncoder.IsEncodeEquals("Auerbach", "Uhrbach"));
+        }
+
+        [Test]
+        public void TestCompare_Surname_MOSKOWITZ_MOSKOVITZ_SuccessfullyMatched()
+        {
+            Assert.True(this.StringEncoder.IsEncodeEquals("Moskowitz", "Moskovitz"));
+        }
+
+        [Test]
+        public void TestCompare_Surname_LIPSHITZ_LIPPSZYC_SuccessfullyMatched()
+        {
+            Assert.True(this.StringEncoder.IsEncodeEquals("LIPSHITZ", "LIPPSZYC"));
+        }
+
+        [Test]
+        public void TestCompare_Surname_LEWINSKY_LEVINSKI_SuccessfullyMatched()
+        {
+            Assert.True(this.StringEncoder.IsEncodeEquals("LEWINSKY", "LEVINSKI"));
+        }
+
+        [Test]
+        public void TestCompare_Surname_SZLAMAWICZ_SHLAMOVITZ_SuccessfullyMatched()
+        {
+            Assert.True(this.StringEncoder.IsEncodeEquals("SZLAMAWICZ", "SHLAMOVITZ"));
+        }
+
+        [Test]
+        public void TestCompare_Surname_ROSOCHOWACIEC_ROSOKHOVATSETS_SuccessfullyMatched()
+        {
+            Assert.True(this.StringEncoder.IsEncodeEquals("R o s o ch o w a c ie c", " R o s o k ho v a ts e ts"));
+        }
+
+        [Test]
+        public void TestCompare_Surname_PRZEMYSL_PSHEMESHIL_SuccessfullyMatched()
+        {
+            Assert.True(this.StringEncoder.IsEncodeEquals(" P rz e m y s l", " P sh e m e sh i l"));
+        }
+
+        // **** END YIDDISH/SLAVIC SECTION ****
+
+        [Test]
+        public void TestCompare_PETERSON_PETERS_SuccessfullyMatched()
+        {
+            Assert.True(this.StringEncoder.IsEncodeEquals("Peterson", "Peters"));
+        }
+
+        [Test]
+        public void TestCompare_MCGOWAN_MCGEOGHEGAN_SuccessfullyMatched()
+        {
+            Assert.True(this.StringEncoder.IsEncodeEquals("McGowan", "Mc Geoghegan"));
+        }
+
+        [Test]
+        public void TestCompare_SurnamesCornerCase_MURPHY_Space_NoMatch()
+        {
+            Assert.False(this.StringEncoder.IsEncodeEquals("Murphy", " "));
+        }
+
+        [Test]
+        public void TestCompare_SurnamesCornerCase_MURPHY_NoSpace_NoMatch()
+        {
+            Assert.False(this.StringEncoder.IsEncodeEquals("Murphy", ""));
+        }
+
+        [Test]
+        public void TestCompare_SurnameCornerCase_Nulls_NoMatch()
+        {
+            Assert.False(this.StringEncoder.IsEncodeEquals(null, null));
+        }
+
+        [Test]
+        public void TestCompare_Surnames_MURPHY_LYNCH_NoMatchExpected()
+        {
+            Assert.False(this.StringEncoder.IsEncodeEquals("Murphy", "Lynch"));
+        }
+
+        [Test]
+        public void TestCompare_Forenames_SEAN_JOHN_MatchExpected()
+        {
+            Assert.True(this.StringEncoder.IsEncodeEquals("Sean", "John"));
+        }
+
+        [Test]
+        public void TestCompare_Forenames_SEAN_PETE_NoMatchExpected()
+        {
+            Assert.False(this.StringEncoder.IsEncodeEquals("Sean", "Pete"));
+        }
+
+        protected override MatchRatingApproachEncoder CreateStringEncoder()
+        {
+            return new MatchRatingApproachEncoder();
+        }
+
+        // ***** END REGION - TEST GET MRA COMPARISONS
+
+    }
+}