You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by ni...@apache.org on 2017/06/27 20:33:59 UTC
[14/15] lucenenet git commit: Added Lucene.Net.Analysis.Phonetic + tests. Rather than porting over the entire commons-codec library, only the language features were ported and added to this library.

Added Lucene.Net.Analysis.Phonetic + tests. Rather than porting over the entire commons-codec library, only the language features were ported and added to this library.


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/1ee3a9cc
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/1ee3a9cc
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/1ee3a9cc

Branch: refs/heads/master
Commit: 1ee3a9ccad8b8da64d54d19eac5d40beb7e48ca3
Parents: 368424f
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Wed Jun 28 02:34:21 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Wed Jun 28 02:54:54 2017 +0700

----------------------------------------------------------------------
 CONTRIBUTING.md                                 |    2 +-
 Lucene.Net.Portable.sln                         |   36 +-
 Lucene.Net.sln                                  |   52 +
 .../BeiderMorseFilter.cs                        |  137 ++
 .../BeiderMorseFilterFactory.cs                 |   71 +
 .../DoubleMetaphoneFilter.cs                    |  126 ++
 .../DoubleMetaphoneFilterFactory.cs             |   67 +
 .../Language/AbstractCaverphone .cs             |   78 ++
 .../Language/Bm/BeiderMorseEncoder.cs           |  163 +++
 .../Language/Bm/Lang.cs                         |  276 ++++
 .../Language/Bm/Languages.cs                    |  324 +++++
 .../Language/Bm/NameType.cs                     |   69 +
 .../Language/Bm/PhoneticEngine.cs               |  578 ++++++++
 .../Language/Bm/ResourceConstants.cs            |   37 +
 .../Language/Bm/Rule.cs                         | 1069 +++++++++++++++
 .../Language/Bm/RuleType.cs                     |   68 +
 .../Language/Bm/ash_approx_any.txt              |  153 +++
 .../Language/Bm/ash_approx_common.txt           |  219 +++
 .../Language/Bm/ash_approx_cyrillic.txt         |   18 +
 .../Language/Bm/ash_approx_english.txt          |   47 +
 .../Language/Bm/ash_approx_french.txt           |   40 +
 .../Language/Bm/ash_approx_german.txt           |   72 +
 .../Language/Bm/ash_approx_hebrew.txt           |   18 +
 .../Language/Bm/ash_approx_hungarian.txt        |   18 +
 .../Language/Bm/ash_approx_polish.txt           |   83 ++
 .../Language/Bm/ash_approx_romanian.txt         |   18 +
 .../Language/Bm/ash_approx_russian.txt          |   47 +
 .../Language/Bm/ash_approx_spanish.txt          |   18 +
 .../Language/Bm/ash_exact_any.txt               |   52 +
 .../Language/Bm/ash_exact_approx_common.txt     |   82 ++
 .../Language/Bm/ash_exact_common.txt            |   33 +
 .../Language/Bm/ash_exact_cyrillic.txt          |   18 +
 .../Language/Bm/ash_exact_english.txt           |   18 +
 .../Language/Bm/ash_exact_french.txt            |   18 +
 .../Language/Bm/ash_exact_german.txt            |   18 +
 .../Language/Bm/ash_exact_hebrew.txt            |   18 +
 .../Language/Bm/ash_exact_hungarian.txt         |   18 +
 .../Language/Bm/ash_exact_polish.txt            |   23 +
 .../Language/Bm/ash_exact_romanian.txt          |   18 +
 .../Language/Bm/ash_exact_russian.txt           |   19 +
 .../Language/Bm/ash_exact_spanish.txt           |   18 +
 .../Language/Bm/ash_hebrew_common.txt           |  118 ++
 .../Language/Bm/ash_languages.txt               |   28 +
 .../Language/Bm/ash_rules_any.txt               |  332 +++++
 .../Language/Bm/ash_rules_cyrillic.txt          |  100 ++
 .../Language/Bm/ash_rules_english.txt           |  107 ++
 .../Language/Bm/ash_rules_french.txt            |   91 ++
 .../Language/Bm/ash_rules_german.txt            |  128 ++
 .../Language/Bm/ash_rules_hebrew.txt            |   62 +
 .../Language/Bm/ash_rules_hungarian.txt         |   84 ++
 .../Language/Bm/ash_rules_polish.txt            |  185 +++
 .../Language/Bm/ash_rules_romanian.txt          |   66 +
 .../Language/Bm/ash_rules_russian.txt           |  164 +++
 .../Language/Bm/ash_rules_spanish.txt           |   77 ++
 .../Language/Bm/gen_approx_any.txt              |  124 ++
 .../Language/Bm/gen_approx_arabic.txt           |   23 +
 .../Language/Bm/gen_approx_common.txt           |  223 +++
 .../Language/Bm/gen_approx_cyrillic.txt         |   18 +
 .../Language/Bm/gen_approx_czech.txt            |   18 +
 .../Language/Bm/gen_approx_dutch.txt            |   18 +
 .../Language/Bm/gen_approx_english.txt          |   47 +
 .../Language/Bm/gen_approx_french.txt           |   25 +
 .../Language/Bm/gen_approx_german.txt           |   73 +
 .../Language/Bm/gen_approx_greek.txt            |   18 +
 .../Language/Bm/gen_approx_greeklatin.txt       |   20 +
 .../Language/Bm/gen_approx_hebrew.txt           |   18 +
 .../Language/Bm/gen_approx_hungarian.txt        |   18 +
 .../Language/Bm/gen_approx_italian.txt          |   18 +
 .../Language/Bm/gen_approx_polish.txt           |   84 ++
 .../Language/Bm/gen_approx_portuguese.txt       |   18 +
 .../Language/Bm/gen_approx_romanian.txt         |   18 +
 .../Language/Bm/gen_approx_russian.txt          |   48 +
 .../Language/Bm/gen_approx_spanish.txt          |   21 +
 .../Language/Bm/gen_approx_turkish.txt          |   18 +
 .../Language/Bm/gen_exact_any.txt               |   33 +
 .../Language/Bm/gen_exact_approx_common.txt     |   79 ++
 .../Language/Bm/gen_exact_arabic.txt            |   18 +
 .../Language/Bm/gen_exact_common.txt            |   32 +
 .../Language/Bm/gen_exact_cyrillic.txt          |   18 +
 .../Language/Bm/gen_exact_czech.txt             |   18 +
 .../Language/Bm/gen_exact_dutch.txt             |   18 +
 .../Language/Bm/gen_exact_english.txt           |   18 +
 .../Language/Bm/gen_exact_french.txt            |   18 +
 .../Language/Bm/gen_exact_german.txt            |   18 +
 .../Language/Bm/gen_exact_greek.txt             |   18 +
 .../Language/Bm/gen_exact_greeklatin.txt        |   18 +
 .../Language/Bm/gen_exact_hebrew.txt            |   18 +
 .../Language/Bm/gen_exact_hungarian.txt         |   18 +
 .../Language/Bm/gen_exact_italian.txt           |   18 +
 .../Language/Bm/gen_exact_polish.txt            |   23 +
 .../Language/Bm/gen_exact_portuguese.txt        |   18 +
 .../Language/Bm/gen_exact_romanian.txt          |   18 +
 .../Language/Bm/gen_exact_russian.txt           |   19 +
 .../Language/Bm/gen_exact_spanish.txt           |   19 +
 .../Language/Bm/gen_exact_turkish.txt           |   18 +
 .../Language/Bm/gen_hebrew_common.txt           |  107 ++
 .../Language/Bm/gen_languages.txt               |   36 +
 .../Language/Bm/gen_rules_any.txt               |  367 +++++
 .../Language/Bm/gen_rules_arabic.txt            |   74 +
 .../Language/Bm/gen_rules_cyrillic.txt          |   99 ++
 .../Language/Bm/gen_rules_czech.txt             |   67 +
 .../Language/Bm/gen_rules_dutch.txt             |   78 ++
 .../Language/Bm/gen_rules_english.txt           |  113 ++
 .../Language/Bm/gen_rules_french.txt            |  114 ++
 .../Language/Bm/gen_rules_german.txt            |  129 ++
 .../Language/Bm/gen_rules_greek.txt             |   97 ++
 .../Language/Bm/gen_rules_greeklatin.txt        |  118 ++
 .../Language/Bm/gen_rules_hebrew.txt            |   62 +
 .../Language/Bm/gen_rules_hungarian.txt         |   83 ++
 .../Language/Bm/gen_rules_italian.txt           |   77 ++
 .../Language/Bm/gen_rules_polish.txt            |  185 +++
 .../Language/Bm/gen_rules_portuguese.txt        |  105 ++
 .../Language/Bm/gen_rules_romanian.txt          |   64 +
 .../Language/Bm/gen_rules_russian.txt           |  142 ++
 .../Language/Bm/gen_rules_spanish.txt           |   85 ++
 .../Language/Bm/gen_rules_turkish.txt           |   50 +
 .../Language/Bm/lang.txt                        |  293 ++++
 .../Language/Bm/sep_approx_any.txt              |   20 +
 .../Language/Bm/sep_approx_common.txt           |  115 ++
 .../Language/Bm/sep_approx_french.txt           |   18 +
 .../Language/Bm/sep_approx_hebrew.txt           |   18 +
 .../Language/Bm/sep_approx_italian.txt          |   18 +
 .../Language/Bm/sep_approx_portuguese.txt       |   18 +
 .../Language/Bm/sep_approx_spanish.txt          |   18 +
 .../Language/Bm/sep_exact_any.txt               |   18 +
 .../Language/Bm/sep_exact_approx_common.txt     |   79 ++
 .../Language/Bm/sep_exact_common.txt            |   32 +
 .../Language/Bm/sep_exact_french.txt            |   18 +
 .../Language/Bm/sep_exact_hebrew.txt            |   18 +
 .../Language/Bm/sep_exact_italian.txt           |   18 +
 .../Language/Bm/sep_exact_portuguese.txt        |   18 +
 .../Language/Bm/sep_exact_spanish.txt           |   18 +
 .../Language/Bm/sep_hebrew_common.txt           |   86 ++
 .../Language/Bm/sep_languages.txt               |   23 +
 .../Language/Bm/sep_rules_any.txt               |  155 +++
 .../Language/Bm/sep_rules_french.txt            |   91 ++
 .../Language/Bm/sep_rules_hebrew.txt            |   62 +
 .../Language/Bm/sep_rules_italian.txt           |   76 ++
 .../Language/Bm/sep_rules_portuguese.txt        |  104 ++
 .../Language/Bm/sep_rules_spanish.txt           |   95 ++
 .../Language/Caverphone1.cs                     |  131 ++
 .../Language/Caverphone2.cs                     |  133 ++
 .../Language/ColognePhonetic.cs                 |  501 +++++++
 .../Language/DaitchMokotoffSoundex.cs           |  620 +++++++++
 .../Language/DoubleMetaphone.cs                 | 1280 +++++++++++++++++
 .../Language/MatchRatingApproachEncoder.cs      |  425 ++++++
 .../Language/Metaphone.cs                       |  494 +++++++
 .../Language/Nysiis.cs                          |  370 +++++
 .../Language/RefinedSoundex.cs                  |  202 +++
 .../Language/Soundex.cs                         |  318 +++++
 .../Language/SoundexUtils.cs                    |  123 ++
 .../Language/StringEncoder.cs                   |   35 +
 .../Language/dmrules.txt                        |  200 +++
 .../Lucene.Net.Analysis.Phonetic.csproj         |  225 +++
 .../Lucene.Net.Analysis.Phonetic.project.json   |    8 +
 .../Lucene.Net.Analysis.Phonetic.xproj          |   19 +
 .../PhoneticFilter.cs                           |  109 ++
 .../PhoneticFilterFactory.cs                    |  187 +++
 .../Properties/AssemblyInfo.cs                  |   48 +
 src/Lucene.Net.Analysis.Phonetic/project.json   |   54 +
 .../DoubleMetaphoneFilterTest.cs                |  111 ++
 .../Language/Bm/BeiderMorseEncoderTest.cs       |  255 ++++
 .../Bm/CacheSubSequencePerformanceTest.cs       |  138 ++
 .../Language/Bm/LanguageGuessingTest.cs         |   84 ++
 .../Bm/PhoneticEnginePerformanceTest.cs         |  141 ++
 .../Language/Bm/PhoneticEngineRegressionTest.cs |  234 ++++
 .../Language/Bm/PhoneticEngineTest.cs           |   89 ++
 .../Language/Bm/RuleTest.cs                     |  163 +++
 .../Language/Caverphone1Test.cs                 |  109 ++
 .../Language/Caverphone2Test .cs                |  375 +++++
 .../Language/ColognePhoneticTest.cs             |  171 +++
 .../Language/DaitchMokotoffSoundexTest.cs       |  176 +++
 .../Language/DoubleMetaphone2Test.cs            | 1291 ++++++++++++++++++
 .../Language/DoubleMetaphoneTest.cs             | 1266 +++++++++++++++++
 .../Language/MatchRatingApproachEncoderTest.cs  |  609 +++++++++
 .../Language/MetaphoneTest.cs                   |  518 +++++++
 .../Language/NysiisTest.cs                      |  319 +++++
 .../Language/RefinedSoundexTest.cs              |   99 ++
 .../Language/SoundexTest.cs                     |  424 ++++++
 .../Language/StringEncoderAbstractTest.cs       |  164 +++
 .../Lucene.Net.Tests.Analysis.Phonetic.csproj   |  108 ++
 ...ene.Net.Tests.Analysis.Phonetic.project.json |   11 +
 .../Lucene.Net.Tests.Analysis.Phonetic.xproj    |   21 +
 .../Properties/AssemblyInfo.cs                  |   42 +
 .../TestBeiderMorseFilter.cs                    |  132 ++
 .../TestBeiderMorseFilterFactory.cs             |   89 ++
 .../TestDoubleMetaphoneFilterFactory.cs         |   70 +
 .../TestPhoneticFilter.cs                       |  122 ++
 .../TestPhoneticFilterFactory.cs                |  228 ++++
 .../project.json                                |   45 +
 190 files changed, 23900 insertions(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/CONTRIBUTING.md
----------------------------------------------------------------------
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 4e00086..95019c5 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -55,7 +55,7 @@ helpers to help with that, see for examples see our [Java style methods to avoid
 * [Lucene.Net.Analysis.ICU](https://github.com/apache/lucene-solr/tree/releases/lucene-solr/4.8.0/lucene/analysis/icu) (note that we will be putting this functionality into the Lucene.Net.ICU package)
 * [Lucene.Net.Analysis.Kuromoji](https://github.com/apache/lucene-solr/tree/releases/lucene-solr/4.8.0/lucene/analysis/kuromoji)
 
-There are a few other specialized Analysis packages ([Morfologik](https://github.com/apache/lucene-solr/tree/releases/lucene-solr/4.8.0/lucene/analysis/morfologik), [Phonetic](https://github.com/apache/lucene-solr/tree/releases/lucene-solr/4.8.0/lucene/analysis/phonetic), [UIMA](https://github.com/apache/lucene-solr/tree/releases/lucene-solr/4.8.0/lucene/analysis/uima)) that have dependencies that would also need to be ported if they don't exist in .NET yet.
+There are a few other specialized packages ([Analysis.Morfologik](https://github.com/apache/lucene-solr/tree/releases/lucene-solr/4.8.0/lucene/analysis/morfologik), [Analysis.UIMA](https://github.com/apache/lucene-solr/tree/releases/lucene-solr/4.8.0/lucene/analysis/uima), [Benchmark](https://github.com/apache/lucene-solr/tree/releases/lucene-solr/4.8.0/lucene/benchmark)) that have dependencies that would also need to be ported if they don't exist in .NET yet.
 
 There are several command-line utilities for tasks such as maintaining indexes that just need to be put into a console application and "usage" documentation updated for them to be useful (which might be helpful for those who don't want to install Java to run such utilities from the Lucene project). See the [JIRA Issues](https://issues.apache.org/jira/issues/?jql=project%20%3D%20LUCENENET%20AND%20status%20%3D%20Open%20AND%20text%20~%20%22CLI%22) for the current list.
 

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/Lucene.Net.Portable.sln
----------------------------------------------------------------------
diff --git a/Lucene.Net.Portable.sln b/Lucene.Net.Portable.sln
index d3678ee..0b7f53e 100644
--- a/Lucene.Net.Portable.sln
+++ b/Lucene.Net.Portable.sln
@@ -89,6 +89,10 @@ Project("{8BB2217D-0F2D-49D1-97BC-3654ED321F3B}") = "Lucene.Net.Analysis.SmartCn
 EndProject
 Project("{8BB2217D-0F2D-49D1-97BC-3654ED321F3B}") = "Lucene.Net.Tests.Analysis.SmartCn", "src\Lucene.Net.Tests.Analysis.SmartCn\Lucene.Net.Tests.Analysis.SmartCn.xproj", "{2870FB52-1239-493F-A0BE-951660194A66}"
 EndProject
+Project("{8BB2217D-0F2D-49D1-97BC-3654ED321F3B}") = "Lucene.Net.Analysis.Phonetic", "src\Lucene.Net.Analysis.Phonetic\Lucene.Net.Analysis.Phonetic.xproj", "{56B2FFB7-6870-4420-8BC7-187ADF5341D9}"
+EndProject
+Project("{8BB2217D-0F2D-49D1-97BC-3654ED321F3B}") = "Lucene.Net.Tests.Analysis.Phonetic", "src\Lucene.Net.Tests.Analysis.Phonetic\Lucene.Net.Tests.Analysis.Phonetic.xproj", "{1FE12EF7-4C89-4D49-BDD1-E49DC285F21B}"
+EndProject
 Global
 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
 		Debug|Any CPU = Debug|Any CPU
@@ -393,14 +397,6 @@ Global
 		{32FD3471-E862-4055-B969-79C12A656366}.Release|Any CPU.Build.0 = Release|Any CPU
 		{32FD3471-E862-4055-B969-79C12A656366}.Release|x86.ActiveCfg = Release|Any CPU
 		{32FD3471-E862-4055-B969-79C12A656366}.Release|x86.Build.0 = Release|Any CPU
-		{2870FB52-1239-493F-A0BE-951660194A66}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
-		{2870FB52-1239-493F-A0BE-951660194A66}.Debug|Any CPU.Build.0 = Debug|Any CPU
-		{2870FB52-1239-493F-A0BE-951660194A66}.Debug|x86.ActiveCfg = Debug|Any CPU
-		{2870FB52-1239-493F-A0BE-951660194A66}.Debug|x86.Build.0 = Debug|Any CPU
-		{2870FB52-1239-493F-A0BE-951660194A66}.Release|Any CPU.ActiveCfg = Release|Any CPU
-		{2870FB52-1239-493F-A0BE-951660194A66}.Release|Any CPU.Build.0 = Release|Any CPU
-		{2870FB52-1239-493F-A0BE-951660194A66}.Release|x86.ActiveCfg = Release|Any CPU
-		{2870FB52-1239-493F-A0BE-951660194A66}.Release|x86.Build.0 = Release|Any CPU
 		{A400916E-DCB8-4A16-BE83-91891C05191F}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{A400916E-DCB8-4A16-BE83-91891C05191F}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{A400916E-DCB8-4A16-BE83-91891C05191F}.Debug|x86.ActiveCfg = Debug|Any CPU
@@ -409,6 +405,30 @@ Global
 		{A400916E-DCB8-4A16-BE83-91891C05191F}.Release|Any CPU.Build.0 = Release|Any CPU
 		{A400916E-DCB8-4A16-BE83-91891C05191F}.Release|x86.ActiveCfg = Release|Any CPU
 		{A400916E-DCB8-4A16-BE83-91891C05191F}.Release|x86.Build.0 = Release|Any CPU
+		{2870FB52-1239-493F-A0BE-951660194A66}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{2870FB52-1239-493F-A0BE-951660194A66}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{2870FB52-1239-493F-A0BE-951660194A66}.Debug|x86.ActiveCfg = Debug|Any CPU
+		{2870FB52-1239-493F-A0BE-951660194A66}.Debug|x86.Build.0 = Debug|Any CPU
+		{2870FB52-1239-493F-A0BE-951660194A66}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{2870FB52-1239-493F-A0BE-951660194A66}.Release|Any CPU.Build.0 = Release|Any CPU
+		{2870FB52-1239-493F-A0BE-951660194A66}.Release|x86.ActiveCfg = Release|Any CPU
+		{2870FB52-1239-493F-A0BE-951660194A66}.Release|x86.Build.0 = Release|Any CPU
+		{56B2FFB7-6870-4420-8BC7-187ADF5341D9}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{56B2FFB7-6870-4420-8BC7-187ADF5341D9}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{56B2FFB7-6870-4420-8BC7-187ADF5341D9}.Debug|x86.ActiveCfg = Debug|Any CPU
+		{56B2FFB7-6870-4420-8BC7-187ADF5341D9}.Debug|x86.Build.0 = Debug|Any CPU
+		{56B2FFB7-6870-4420-8BC7-187ADF5341D9}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{56B2FFB7-6870-4420-8BC7-187ADF5341D9}.Release|Any CPU.Build.0 = Release|Any CPU
+		{56B2FFB7-6870-4420-8BC7-187ADF5341D9}.Release|x86.ActiveCfg = Release|Any CPU
+		{56B2FFB7-6870-4420-8BC7-187ADF5341D9}.Release|x86.Build.0 = Release|Any CPU
+		{1FE12EF7-4C89-4D49-BDD1-E49DC285F21B}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{1FE12EF7-4C89-4D49-BDD1-E49DC285F21B}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{1FE12EF7-4C89-4D49-BDD1-E49DC285F21B}.Debug|x86.ActiveCfg = Debug|Any CPU
+		{1FE12EF7-4C89-4D49-BDD1-E49DC285F21B}.Debug|x86.Build.0 = Debug|Any CPU
+		{1FE12EF7-4C89-4D49-BDD1-E49DC285F21B}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{1FE12EF7-4C89-4D49-BDD1-E49DC285F21B}.Release|Any CPU.Build.0 = Release|Any CPU
+		{1FE12EF7-4C89-4D49-BDD1-E49DC285F21B}.Release|x86.ActiveCfg = Release|Any CPU
+		{1FE12EF7-4C89-4D49-BDD1-E49DC285F21B}.Release|x86.Build.0 = Release|Any CPU
 	EndGlobalSection
 	GlobalSection(SolutionProperties) = preSolution
 		HideSolutionNode = FALSE

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/Lucene.Net.sln
----------------------------------------------------------------------
diff --git a/Lucene.Net.sln b/Lucene.Net.sln
index be5b2b9..74a64a3 100644
--- a/Lucene.Net.sln
+++ b/Lucene.Net.sln
@@ -98,6 +98,10 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Lucene.Net.Analysis.SmartCn
 EndProject
 Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Lucene.Net.Tests.Analysis.SmartCn", "src\Lucene.Net.Tests.Analysis.SmartCn\Lucene.Net.Tests.Analysis.SmartCn.csproj", "{8C8D78D3-BFFD-4301-953B-FE5350B2AEEB}"
 EndProject
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Lucene.Net.Analysis.Phonetic", "src\Lucene.Net.Analysis.Phonetic\Lucene.Net.Analysis.Phonetic.csproj", "{DAFE3B64-616A-4A2F-90E5-1F135E8A9AF5}"
+EndProject
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Lucene.Net.Tests.Analysis.Phonetic", "src\Lucene.Net.Tests.Analysis.Phonetic\Lucene.Net.Tests.Analysis.Phonetic.csproj", "{A2867797-0A5D-4878-8F59-58C399C9A4E4}"
+EndProject
 Global
 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
 		Debug|Any CPU = Debug|Any CPU
@@ -955,6 +959,54 @@ Global
 		{8C8D78D3-BFFD-4301-953B-FE5350B2AEEB}.Release35|Mixed Platforms.Build.0 = Release|Any CPU
 		{8C8D78D3-BFFD-4301-953B-FE5350B2AEEB}.Release35|x86.ActiveCfg = Release|Any CPU
 		{8C8D78D3-BFFD-4301-953B-FE5350B2AEEB}.Release35|x86.Build.0 = Release|Any CPU
+		{DAFE3B64-616A-4A2F-90E5-1F135E8A9AF5}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{DAFE3B64-616A-4A2F-90E5-1F135E8A9AF5}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{DAFE3B64-616A-4A2F-90E5-1F135E8A9AF5}.Debug|Mixed Platforms.ActiveCfg = Debug|Any CPU
+		{DAFE3B64-616A-4A2F-90E5-1F135E8A9AF5}.Debug|Mixed Platforms.Build.0 = Debug|Any CPU
+		{DAFE3B64-616A-4A2F-90E5-1F135E8A9AF5}.Debug|x86.ActiveCfg = Debug|Any CPU
+		{DAFE3B64-616A-4A2F-90E5-1F135E8A9AF5}.Debug|x86.Build.0 = Debug|Any CPU
+		{DAFE3B64-616A-4A2F-90E5-1F135E8A9AF5}.Debug35|Any CPU.ActiveCfg = Debug|Any CPU
+		{DAFE3B64-616A-4A2F-90E5-1F135E8A9AF5}.Debug35|Any CPU.Build.0 = Debug|Any CPU
+		{DAFE3B64-616A-4A2F-90E5-1F135E8A9AF5}.Debug35|Mixed Platforms.ActiveCfg = Debug|Any CPU
+		{DAFE3B64-616A-4A2F-90E5-1F135E8A9AF5}.Debug35|Mixed Platforms.Build.0 = Debug|Any CPU
+		{DAFE3B64-616A-4A2F-90E5-1F135E8A9AF5}.Debug35|x86.ActiveCfg = Debug|Any CPU
+		{DAFE3B64-616A-4A2F-90E5-1F135E8A9AF5}.Debug35|x86.Build.0 = Debug|Any CPU
+		{DAFE3B64-616A-4A2F-90E5-1F135E8A9AF5}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{DAFE3B64-616A-4A2F-90E5-1F135E8A9AF5}.Release|Any CPU.Build.0 = Release|Any CPU
+		{DAFE3B64-616A-4A2F-90E5-1F135E8A9AF5}.Release|Mixed Platforms.ActiveCfg = Release|Any CPU
+		{DAFE3B64-616A-4A2F-90E5-1F135E8A9AF5}.Release|Mixed Platforms.Build.0 = Release|Any CPU
+		{DAFE3B64-616A-4A2F-90E5-1F135E8A9AF5}.Release|x86.ActiveCfg = Release|Any CPU
+		{DAFE3B64-616A-4A2F-90E5-1F135E8A9AF5}.Release|x86.Build.0 = Release|Any CPU
+		{DAFE3B64-616A-4A2F-90E5-1F135E8A9AF5}.Release35|Any CPU.ActiveCfg = Release|Any CPU
+		{DAFE3B64-616A-4A2F-90E5-1F135E8A9AF5}.Release35|Any CPU.Build.0 = Release|Any CPU
+		{DAFE3B64-616A-4A2F-90E5-1F135E8A9AF5}.Release35|Mixed Platforms.ActiveCfg = Release|Any CPU
+		{DAFE3B64-616A-4A2F-90E5-1F135E8A9AF5}.Release35|Mixed Platforms.Build.0 = Release|Any CPU
+		{DAFE3B64-616A-4A2F-90E5-1F135E8A9AF5}.Release35|x86.ActiveCfg = Release|Any CPU
+		{DAFE3B64-616A-4A2F-90E5-1F135E8A9AF5}.Release35|x86.Build.0 = Release|Any CPU
+		{A2867797-0A5D-4878-8F59-58C399C9A4E4}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{A2867797-0A5D-4878-8F59-58C399C9A4E4}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{A2867797-0A5D-4878-8F59-58C399C9A4E4}.Debug|Mixed Platforms.ActiveCfg = Debug|Any CPU
+		{A2867797-0A5D-4878-8F59-58C399C9A4E4}.Debug|Mixed Platforms.Build.0 = Debug|Any CPU
+		{A2867797-0A5D-4878-8F59-58C399C9A4E4}.Debug|x86.ActiveCfg = Debug|Any CPU
+		{A2867797-0A5D-4878-8F59-58C399C9A4E4}.Debug|x86.Build.0 = Debug|Any CPU
+		{A2867797-0A5D-4878-8F59-58C399C9A4E4}.Debug35|Any CPU.ActiveCfg = Debug|Any CPU
+		{A2867797-0A5D-4878-8F59-58C399C9A4E4}.Debug35|Any CPU.Build.0 = Debug|Any CPU
+		{A2867797-0A5D-4878-8F59-58C399C9A4E4}.Debug35|Mixed Platforms.ActiveCfg = Debug|Any CPU
+		{A2867797-0A5D-4878-8F59-58C399C9A4E4}.Debug35|Mixed Platforms.Build.0 = Debug|Any CPU
+		{A2867797-0A5D-4878-8F59-58C399C9A4E4}.Debug35|x86.ActiveCfg = Debug|Any CPU
+		{A2867797-0A5D-4878-8F59-58C399C9A4E4}.Debug35|x86.Build.0 = Debug|Any CPU
+		{A2867797-0A5D-4878-8F59-58C399C9A4E4}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{A2867797-0A5D-4878-8F59-58C399C9A4E4}.Release|Any CPU.Build.0 = Release|Any CPU
+		{A2867797-0A5D-4878-8F59-58C399C9A4E4}.Release|Mixed Platforms.ActiveCfg = Release|Any CPU
+		{A2867797-0A5D-4878-8F59-58C399C9A4E4}.Release|Mixed Platforms.Build.0 = Release|Any CPU
+		{A2867797-0A5D-4878-8F59-58C399C9A4E4}.Release|x86.ActiveCfg = Release|Any CPU
+		{A2867797-0A5D-4878-8F59-58C399C9A4E4}.Release|x86.Build.0 = Release|Any CPU
+		{A2867797-0A5D-4878-8F59-58C399C9A4E4}.Release35|Any CPU.ActiveCfg = Release|Any CPU
+		{A2867797-0A5D-4878-8F59-58C399C9A4E4}.Release35|Any CPU.Build.0 = Release|Any CPU
+		{A2867797-0A5D-4878-8F59-58C399C9A4E4}.Release35|Mixed Platforms.ActiveCfg = Release|Any CPU
+		{A2867797-0A5D-4878-8F59-58C399C9A4E4}.Release35|Mixed Platforms.Build.0 = Release|Any CPU
+		{A2867797-0A5D-4878-8F59-58C399C9A4E4}.Release35|x86.ActiveCfg = Release|Any CPU
+		{A2867797-0A5D-4878-8F59-58C399C9A4E4}.Release35|x86.Build.0 = Release|Any CPU
 	EndGlobalSection
 	GlobalSection(SolutionProperties) = preSolution
 		HideSolutionNode = FALSE

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/BeiderMorseFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/BeiderMorseFilter.cs b/src/Lucene.Net.Analysis.Phonetic/BeiderMorseFilter.cs
new file mode 100644
index 0000000..b343994
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/BeiderMorseFilter.cs
@@ -0,0 +1,137 @@
+// lucene version compatibility level: 4.8.1
+using Lucene.Net.Analysis.Phonetic.Language.Bm;
+using Lucene.Net.Analysis.TokenAttributes;
+using System.Diagnostics;
+using System.Text.RegularExpressions;
+
+namespace Lucene.Net.Analysis.Phonetic
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// TokenFilter for Beider-Morse phonetic encoding.
+    /// <para/>
+    /// @lucene.experimental
+    /// </summary>
+    /// <seealso cref="BeiderMorseEncoder"/>
+    public sealed class BeiderMorseFilter : TokenFilter
+    {
+        private readonly PhoneticEngine engine;
+        private readonly LanguageSet languages;
+
+        // output is a string such as ab|ac|...
+        // in complex cases like d'angelo its (anZelo|andZelo|...)-(danZelo|...)
+        // if there are multiple 's, it starts to nest...
+        private static readonly Regex pattern = new Regex("([^()|-]+)", RegexOptions.Compiled);
+
+        private bool isReset = false;
+        // matcher over any buffered output
+        private Match matcher = pattern.Match("");
+        // encoded representation
+        private string encoded;
+        // preserves all attributes for any buffered outputs
+        private State state;
+
+        private readonly ICharTermAttribute termAtt;
+        private readonly IPositionIncrementAttribute posIncAtt;
+
+        /// <summary>
+        /// Calls <see cref="BeiderMorseFilter(TokenStream, PhoneticEngine, LanguageSet)"/>
+        /// </summary>
+        /// <param name="input"><see cref="TokenStream"/> to filter</param>
+        /// <param name="engine">Configured <see cref="PhoneticEngine"/> with BM settings.</param>
+        public BeiderMorseFilter(TokenStream input, PhoneticEngine engine)
+            : this(input, engine, null)
+        {
+        }
+
+        /// <summary>
+        /// Create a new <see cref="BeiderMorseFilter"/>
+        /// </summary>
+        /// <param name="input"><see cref="TokenStream"/> to filter</param>
+        /// <param name="engine">Configured <see cref="PhoneticEngine"/> with BM settings.</param>
+        /// <param name="languages">Optional Set of original languages. Can be <c>null</c> (which means it will be guessed).</param>
+        public BeiderMorseFilter(TokenStream input, PhoneticEngine engine, LanguageSet languages)
+            : base(input)
+        {
+            this.engine = engine;
+            this.languages = languages;
+            this.termAtt = AddAttribute<ICharTermAttribute>();
+            this.posIncAtt = AddAttribute<IPositionIncrementAttribute>();
+        }
+
+        public override bool IncrementToken()
+        {
+            if (!isReset)
+            {
+                matcher = matcher.NextMatch();
+            }
+            isReset = false;
+
+            if (matcher.Success)
+            {
+                Debug.Assert(state != null && encoded != null);
+                RestoreState(state);
+
+                int start = matcher.Index;
+                int end = start + matcher.Length;
+                termAtt.SetEmpty().Append(encoded, start, end);
+                posIncAtt.PositionIncrement = 0;
+                return true;
+            }
+
+            if (m_input.IncrementToken())
+            {
+                encoded = (languages == null)
+                    ? engine.Encode(termAtt.ToString())
+                    : engine.Encode(termAtt.ToString(), languages);
+                state = CaptureState();
+
+                matcher = pattern.Match(encoded);
+                if (matcher.Success)
+                {
+                    int start = matcher.Index;
+                    int end = start + matcher.Length;
+                    termAtt.SetEmpty().Append(encoded, start, end);
+                }
+                return true;
+            }
+            else
+            {
+                return false;
+            }
+        }
+
+        public override void Reset()
+        {
+            base.Reset();
+
+            // LUCENENET: Since we need to "reset" the Match
+            // object, we also need an "isReset" flag to indicate
+            // whether we are at the head of the match and to 
+            // take the appropriate measures to ensure we don't 
+            // overwrite our matcher variable with 
+            // matcher = matcher.NextMatch();
+            // before it is time. A string could potentially
+            // match on index 0, so we need another variable to
+            // manage this state.
+            matcher = pattern.Match("");
+            isReset = true;
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/BeiderMorseFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/BeiderMorseFilterFactory.cs b/src/Lucene.Net.Analysis.Phonetic/BeiderMorseFilterFactory.cs
new file mode 100644
index 0000000..d4331bb
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/BeiderMorseFilterFactory.cs
@@ -0,0 +1,71 @@
+// lucene version compatibility level: 4.8.1
+using Lucene.Net.Analysis.Phonetic.Language.Bm;
+using Lucene.Net.Analysis.Util;
+using System;
+using System.Collections.Generic;
+
+namespace Lucene.Net.Analysis.Phonetic
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Factory for <see cref="BeiderMorseFilter"/>.
+    /// <code>
+    /// &lt;fieldType name="text_bm" class="solr.TextField" positionIncrementGap="100"&gt;
+    ///   &lt;analyzer&gt;
+    ///     &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
+    ///     &lt;filter class="solr.BeiderMorseFilterFactory"
+    ///        nameType="GENERIC" ruleType="APPROX" 
+    ///        concat="true" languageSet="auto"
+    ///     &lt;/filter&gt;
+    ///   &lt;/analyzer&gt;
+    /// &lt;/fieldType&gt;
+    /// </code>
+    /// </summary>
+    public class BeiderMorseFilterFactory : TokenFilterFactory
+    {
+        private readonly PhoneticEngine engine;
+        private readonly LanguageSet languageSet;
+
+        /// <summary>Creates a new <see cref="BeiderMorseFilterFactory"/></summary>
+        public BeiderMorseFilterFactory(IDictionary<string, string> args)
+                  : base(args)
+        {
+            // PhoneticEngine = NameType + RuleType + concat
+            // we use common-codec's defaults: GENERIC + APPROX + true
+            NameType nameType = (NameType)Enum.Parse(typeof(NameType), Get(args, "nameType", NameType.GENERIC.ToString()), true);
+            RuleType ruleType = (RuleType)Enum.Parse(typeof(RuleType), Get(args, "ruleType", RuleType.APPROX.ToString()), true);
+
+            bool concat = GetBoolean(args, "concat", true);
+            engine = new PhoneticEngine(nameType, ruleType, concat);
+
+            // LanguageSet: defaults to automagic, otherwise a comma-separated list.
+            ISet<string> langs = GetSet(args, "languageSet");
+            languageSet = (null == langs || (1 == langs.Count && langs.Contains("auto"))) ? null : LanguageSet.From(langs);
+            if (!(args.Count == 0))
+            {
+                throw new ArgumentException("Unknown parameters: " + args);
+            }
+        }
+
+        public override TokenStream Create(TokenStream input)
+        {
+            return new BeiderMorseFilter(input, engine, languageSet);
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/DoubleMetaphoneFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/DoubleMetaphoneFilter.cs b/src/Lucene.Net.Analysis.Phonetic/DoubleMetaphoneFilter.cs
new file mode 100644
index 0000000..8ee37fa
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/DoubleMetaphoneFilter.cs
@@ -0,0 +1,126 @@
+// lucene version compatibility level: 4.8.1
+using Lucene.Net.Analysis.Phonetic.Language;
+using Lucene.Net.Analysis.TokenAttributes;
+using System.Collections.Generic;
+
+namespace Lucene.Net.Analysis.Phonetic
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Filter for DoubleMetaphone (supporting secondary codes)
+    /// </summary>
+    public sealed class DoubleMetaphoneFilter : TokenFilter
+    {
+        //private static readonly string TOKEN_TYPE = "DoubleMetaphone"; // LUCENENET: Not used
+
+        private readonly LinkedList<State> remainingTokens = new LinkedList<State>();
+        private readonly DoubleMetaphone encoder = new DoubleMetaphone();
+        private readonly bool inject;
+        private readonly ICharTermAttribute termAtt;
+        private readonly IPositionIncrementAttribute posAtt;
+
+        /// <summary>
+        /// Creates a <see cref="DoubleMetaphoneFilter"/> with the specified maximum code length, 
+        /// and either adding encoded forms as synonyms (<c>inject=true</c>) or
+        /// replacing them.
+        /// </summary>
+        public DoubleMetaphoneFilter(TokenStream input, int maxCodeLength, bool inject)
+            : base(input)
+        {
+            this.encoder.MaxCodeLen = maxCodeLength;
+            this.inject = inject;
+            this.termAtt = AddAttribute<ICharTermAttribute>();
+            this.posAtt = AddAttribute<IPositionIncrementAttribute>();
+        }
+
+        public override bool IncrementToken()
+        {
+            for (;;)
+            {
+                if (!(remainingTokens.Count == 0))
+                {
+                    // clearAttributes();  // not currently necessary
+                    var first = remainingTokens.First;
+                    remainingTokens.Remove(first);
+                    RestoreState(first.Value);
+                    return true;
+                }
+
+                if (!m_input.IncrementToken()) return false;
+
+                int len = termAtt.Length;
+                if (len == 0) return true; // pass through zero length terms
+
+                int firstAlternativeIncrement = inject ? 0 : posAtt.PositionIncrement;
+
+                string v = termAtt.ToString();
+                string primaryPhoneticValue = encoder.GetDoubleMetaphone(v);
+                string alternatePhoneticValue = encoder.GetDoubleMetaphone(v, true);
+
+                // a flag to lazily save state if needed... this avoids a save/restore when only
+                // one token will be generated.
+                bool saveState = inject;
+
+                if (primaryPhoneticValue != null && primaryPhoneticValue.Length > 0 && !primaryPhoneticValue.Equals(v))
+                {
+                    if (saveState)
+                    {
+                        remainingTokens.AddLast(CaptureState());
+                    }
+                    posAtt.PositionIncrement = firstAlternativeIncrement;
+                    firstAlternativeIncrement = 0;
+                    termAtt.SetEmpty().Append(primaryPhoneticValue);
+                    saveState = true;
+                }
+
+                if (alternatePhoneticValue != null && alternatePhoneticValue.Length > 0
+                        && !alternatePhoneticValue.Equals(primaryPhoneticValue)
+                        && !primaryPhoneticValue.Equals(v))
+                {
+                    if (saveState)
+                    {
+                        remainingTokens.AddLast(CaptureState());
+                        saveState = false;
+                    }
+                    posAtt.PositionIncrement = firstAlternativeIncrement;
+                    termAtt.SetEmpty().Append(alternatePhoneticValue);
+                    saveState = true;
+                }
+
+                // Just one token to return, so no need to capture/restore
+                // any state, simply return it.
+                if (remainingTokens.Count == 0)
+                {
+                    return true;
+                }
+
+                if (saveState)
+                {
+                    remainingTokens.AddLast(CaptureState());
+                }
+            }
+        }
+
+        public override void Reset()
+        {
+            m_input.Reset();
+            remainingTokens.Clear();
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/DoubleMetaphoneFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/DoubleMetaphoneFilterFactory.cs b/src/Lucene.Net.Analysis.Phonetic/DoubleMetaphoneFilterFactory.cs
new file mode 100644
index 0000000..d70fd41
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/DoubleMetaphoneFilterFactory.cs
@@ -0,0 +1,67 @@
+// lucene version compatibility level: 4.8.1
+using Lucene.Net.Analysis.Util;
+using System;
+using System.Collections.Generic;
+
+namespace Lucene.Net.Analysis.Phonetic
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Factory for <see cref="DoubleMetaphoneFilter"/>.
+    /// <code>
+    /// &lt;fieldType name="text_dblmtphn" class="solr.TextField" positionIncrementGap="100"&gt;
+    ///   &lt;analyzer&gt;
+    ///     &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
+    ///     &lt;filter class="solr.DoubleMetaphoneFilterFactory" inject="true" maxCodeLength="4"/&gt;
+    ///   &lt;/analyzer&gt;
+    /// &lt;/fieldType&gt;
+    /// </code>
+    /// </summary>
+    public class DoubleMetaphoneFilterFactory : TokenFilterFactory
+    {
+        /// <summary>parameter name: true if encoded tokens should be added as synonyms</summary>
+        public static readonly string INJECT = "inject";
+        /// <summary>parameter name: restricts the length of the phonetic code</summary>
+        public static readonly string MAX_CODE_LENGTH = "maxCodeLength";
+        /// <summary>default maxCodeLength if not specified</summary>
+        public static readonly int DEFAULT_MAX_CODE_LENGTH = 4;
+
+        private readonly bool inject;
+        private readonly int maxCodeLength;
+
+        /// <summary>
+        /// Creates a new <see cref="DoubleMetaphoneFilterFactory"/>
+        /// </summary>
+        public DoubleMetaphoneFilterFactory(IDictionary<string, string> args)
+            : base(args)
+        {
+            inject = GetBoolean(args, INJECT, true);
+            maxCodeLength = GetInt32(args, MAX_CODE_LENGTH, DEFAULT_MAX_CODE_LENGTH);
+            if (!(args.Count == 0))
+            {
+                throw new ArgumentException("Unknown parameters: " + args);
+            }
+        }
+
+        public override TokenStream Create(TokenStream input)
+        {
+            return new DoubleMetaphoneFilter(input, maxCodeLength, inject);
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/AbstractCaverphone .cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/AbstractCaverphone .cs b/src/Lucene.Net.Analysis.Phonetic/Language/AbstractCaverphone .cs
new file mode 100644
index 0000000..b1db7fa
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/AbstractCaverphone .cs	
@@ -0,0 +1,78 @@
+// commons-codec version compatibility level: 1.9
+namespace Lucene.Net.Analysis.Phonetic.Language
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Encodes a string into a Caverphone value.
+    /// <para/>
+    /// This is an algorithm created by the Caversham Project at the University of Otago. It implements the Caverphone 2.0
+    /// algorithm:
+    /// <para/>
+    /// This class is immutable and thread-safe.
+    /// <para/>
+    /// See <a href="http://en.wikipedia.org/wiki/Caverphone">Wikipedia - Caverphone</a>
+    /// </summary>
+    public abstract class AbstractCaverphone : IStringEncoder
+    {
+        /// <summary>
+        /// Creates an instance of the Caverphone encoder
+        /// </summary>
+        public AbstractCaverphone()
+            : base()
+        {
+        }
+
+        // LUCENENET specific - in .NET we don't need an object overload, since strings are sealed anyway.
+        //    /**
+        //     * Encodes an Object using the caverphone algorithm. This method is provided in order to satisfy the requirements of
+        //     * the Encoder interface, and will throw an EncoderException if the supplied object is not of type java.lang.String.
+        //     *
+        //     * @param source
+        //     *            Object to encode
+        //     * @return An object (or type java.lang.String) containing the caverphone code which corresponds to the String
+        //     *         supplied.
+        //     * @throws EncoderException
+        //     *             if the parameter supplied is not of type java.lang.String
+        //     */
+        //    @Override
+        //public Object encode(final Object source) throws EncoderException
+        //    {
+        //    if (!(source instanceof String)) {
+        //            throw new EncoderException("Parameter supplied to Caverphone encode is not of type java.lang.String");
+        //        }
+        //    return this.encode((String) source);
+        //    }
+
+        // LUCENENET specific - must provide implementation for IStringEncoder
+        public abstract string Encode(string source);
+
+        /// <summary>
+        /// Tests if the encodings of two strings are equal.
+        /// <para/>
+        /// This method might be promoted to a new AbstractStringEncoder superclass.
+        /// </summary>
+        /// <param name="str1">First of two strings to compare.</param>
+        /// <param name="str2">Second of two strings to compare.</param>
+        /// <returns><c>true</c> if the encodings of these strings are identical, <c>false</c> otherwise.</returns>
+        public virtual bool IsEncodeEqual(string str1, string str2) 
+        {
+            return this.Encode(str1).Equals(this.Encode(str2));
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/BeiderMorseEncoder.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/BeiderMorseEncoder.cs b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/BeiderMorseEncoder.cs
new file mode 100644
index 0000000..bb1f683
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/BeiderMorseEncoder.cs
@@ -0,0 +1,163 @@
+// commons-codec version compatibility level: 1.9
+namespace Lucene.Net.Analysis.Phonetic.Language.Bm
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Encodes strings into their Beider-Morse phonetic encoding.
+    /// </summary>
+    /// <remarks>
+    /// Beider-Morse phonetic encodings are optimised for family names. However, they may be useful for a wide range
+    /// of words.
+    /// <para/>
+    /// This encoder is intentionally mutable to allow dynamic configuration through bean properties. As such, it
+    /// is mutable, and may not be thread-safe. If you require a guaranteed thread-safe encoding then use
+    /// <see cref="PhoneticEngine"/> directly.
+    /// <para/>
+    /// <b>Encoding overview</b>
+    /// <para/>
+    /// Beider-Morse phonetic encodings is a multi-step process. Firstly, a table of rules is consulted to guess what
+    /// language the word comes from. For example, if it ends in "<c>ault</c>" then it infers that the word is French.
+    /// Next, the word is translated into a phonetic representation using a language-specific phonetics table. Some
+    /// runs of letters can be pronounced in multiple ways, and a single run of letters may be potentially broken up
+    /// into phonemes at different places, so this stage results in a set of possible language-specific phonetic
+    /// representations. Lastly, this language-specific phonetic representation is processed by a table of rules that
+    /// re-writes it phonetically taking into account systematic pronunciation differences between languages, to move
+    /// it towards a pan-indo-european phonetic representation. Again, sometimes there are multiple ways this could be
+    /// done and sometimes things that can be pronounced in several ways in the source language have only one way to
+    /// represent them in this average phonetic language, so the result is again a set of phonetic spellings.
+    /// <para/>
+    /// Some names are treated as having multiple parts. This can be due to two things. Firstly, they may be hyphenated.
+    /// In this case, each individual hyphenated word is encoded, and then these are combined end-to-end for the final
+    /// encoding. Secondly, some names have standard prefixes, for example, "<c>Mac/Mc</c>" in Scottish (English)
+    /// names. As sometimes it is ambiguous whether the prefix is intended or is an accident of the spelling, the word
+    /// is encoded once with the prefix and once without it. The resulting encoding contains one and then the other result.
+    /// <para/>
+    /// <b>Encoding format</b>
+    /// <para/>
+    /// Individual phonetic spellings of an input word are represented in upper- and lower-case roman characters. Where
+    /// there are multiple possible phonetic representations, these are joined with a pipe (<c>|</c>) character.
+    /// If multiple hyphenated words where found, or if the word may contain a name prefix, each encoded word is placed
+    /// in elipses and these blocks are then joined with hyphens. For example, "<c>d'ortley</c>" has a possible
+    /// prefix. The form without prefix encodes to <c>ortlaj|ortlej</c>, while the form with prefix encodes to
+    /// <c>dortlaj|dortlej</c>. Thus, the full, combined encoding is <c>(ortlaj|ortlej)-(dortlaj|dortlej)</c>.
+    /// <para/>
+    /// The encoded forms are often quite a bit longer than the input strings. This is because a single input may have many
+    /// potential phonetic interpretations. For example, <c>Renault</c> encodes to
+    /// <c>rYnDlt|rYnalt|rYnult|rinDlt|rinalt|rinult</c>. The <see cref="RuleType.APPROX"/> rules will tend to produce larger
+    /// encodings as they consider a wider range of possible, approximate phonetic interpretations of the original word.
+    /// Down-stream applications may wish to further process the encoding for indexing or lookup purposes, for example, by
+    /// splitting on pipe (<c>|</c>) and indexing under each of these alternatives.
+    /// <para/>
+    /// since 1.6
+    /// </remarks>
+    public class BeiderMorseEncoder : IStringEncoder
+    {
+        // Implementation note: This class is a spring-friendly facade to PhoneticEngine. It allows read/write configuration
+        // of an immutable PhoneticEngine instance that will be delegated to for the actual encoding.
+
+        // a cached object
+        private PhoneticEngine engine = new PhoneticEngine(NameType.GENERIC, RuleType.APPROX, true);
+
+        // LUCENENET specific - in .NET we don't need an object overload, since strings are sealed anyway.
+        //public object Encode(object source)
+        //{
+        //    if (!(source is string))
+        //    {
+        //        throw new EncoderException("BeiderMorseEncoder encode parameter is not of type String");
+        //    }
+        //    return encode((string)source);
+        //}
+
+        public virtual string Encode(string source)
+        {
+            if (source == null)
+            {
+                return null;
+            }
+            return this.engine.Encode(source);
+        }
+
+        /// <summary>
+        /// Gets or Sets the name type currently in operation. Use <see cref="NameType.GENERIC"/> unless you specifically want phonetic encodings
+        /// optimized for Ashkenazi or Sephardic Jewish family names.
+        /// </summary>
+        // LUCENENET NOTE: Made setter into property because
+        // its behavior is similar to what would happen when
+        // setting a property, even though it is actually
+        // replacing a related instance.
+        public virtual NameType NameType
+        {
+            get { return this.engine.NameType; }
+            set
+            {
+                this.engine = new PhoneticEngine(value,
+                                             this.engine.RuleType,
+                                             this.engine.IsConcat,
+                                             this.engine.MaxPhonemes);
+            }
+        }
+
+        /// <summary>
+        /// Gets or Sets the rule type to apply. This will widen or narrow the range of phonetic encodings considered.
+        /// <see cref="RuleType.APPROX"/> or <see cref="RuleType.EXACT"/> for approximate or exact phonetic matches.
+        /// </summary>
+        public virtual RuleType RuleType
+        {
+            get { return this.engine.RuleType; }
+            set
+            {
+                this.engine = new PhoneticEngine(this.engine.NameType,
+                                             value,
+                                             this.engine.IsConcat,
+                                             this.engine.MaxPhonemes);
+            }
+        }
+
+        /// <summary>
+        /// Gets or Sets how multiple possible phonetic encodings are combined.
+        /// <c>true</c> if multiple encodings are to be combined with a '|', <c>false</c> if just the first one is 
+        /// to be considered.
+        /// </summary>
+        public virtual bool IsConcat
+        {
+            get { return this.engine.IsConcat; }
+            set
+            {
+                this.engine = new PhoneticEngine(this.engine.NameType,
+                                             this.engine.RuleType,
+                                             value,
+                                             this.engine.MaxPhonemes);
+            }
+        }
+
+        /// <summary>
+        /// Sets the number of maximum of phonemes that shall be considered by the engine.
+        /// <para/>
+        /// since 1.7
+        /// </summary>
+        /// <param name="maxPhonemes">the maximum number of phonemes returned by the engine</param>
+        public virtual void SetMaxPhonemes(int maxPhonemes)
+        {
+            this.engine = new PhoneticEngine(this.engine.NameType,
+                                             this.engine.RuleType,
+                                             this.engine.IsConcat,
+                                             maxPhonemes);
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/Lang.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/Lang.cs b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/Lang.cs
new file mode 100644
index 0000000..5889e8f
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/Lang.cs
@@ -0,0 +1,276 @@
+// commons-codec version compatibility level: 1.9
+using Lucene.Net.Support;
+using System;
+using System.Collections.Generic;
+using System.Globalization;
+using System.IO;
+using System.Reflection;
+using System.Text.RegularExpressions;
+
+namespace Lucene.Net.Analysis.Phonetic.Language.Bm
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Language guessing utility.
+    /// </summary>
+    /// <remarks>
+    /// This class encapsulates rules used to guess the possible languages that a word originates from. This is
+    /// done by reference to a whole series of rules distributed in resource files.
+    /// <para/>
+    /// Instances of this class are typically managed through the static factory method <see cref="GetInstance(NameType)"/>.
+    /// Unless you are developing your own language guessing rules, you will not need to interact with this class directly.
+    /// <para/>
+    /// This class is intended to be immutable and thread-safe.
+    /// <para/>
+    /// <b>Lang resources</b>
+    /// <para/>
+    /// Language guessing rules are typically loaded from resource files. These are UTF-8 encoded text files.
+    /// They are systematically named following the pattern:
+    /// <c>Lucene.Net.Analysis.Phonetic.Language.Bm.lang.txt</c>
+    /// The format of these resources is the following:
+    /// <list type="table">
+    ///     <item>
+    ///         <term>Rules:</term>
+    ///         <description>
+    ///             Whitespace separated strings.
+    ///             There should be 3 columns to each row, and these will be interpreted as:
+    ///             <list type="number">
+    ///                 <item><term>pattern:</term><description>a regular expression.</description></item>
+    ///                 <item><term>languages:</term><description>a '+'-separated list of languages.</description></item>
+    ///                 <item><term>acceptOnMatch:</term><description>'true' or 'false' indicating if a match rules in or rules out the language.</description></item>
+    ///             </list>
+    ///         </description>
+    ///     </item>
+    ///     <item>
+    ///         <term>End-of-line comments:</term>
+    ///         <description>Any occurrence of '//' will cause all text following on that line to be discarded as a comment.</description>
+    ///     </item>
+    ///     <item>
+    ///         <term>Multi-line comments:</term>
+    ///         <description>Any line starting with '/*' will start multi-line commenting mode. This will skip all content until a line ending in '*' and '/' is found.</description>
+    ///     </item>
+    ///     <item>
+    ///         <term>Blank lines:</term>
+    ///         <description>All blank lines will be skipped.</description>
+    ///     </item>
+    /// </list>
+    /// <para/>
+    /// Port of lang.php
+    /// <para/>
+    /// since 1.6
+    /// </remarks>
+    public class Lang
+    {
+        // Implementation note: This class is divided into two sections. The first part is a static factory interface that
+        // exposes the LANGUAGE_RULES_RN resource as a Lang instance. The second part is the Lang instance methods that
+        // encapsulate a particular language-guessing rule table and the language guessing itself.
+        //
+        // It may make sense in the future to expose the private constructor to allow power users to build custom language-
+        // guessing rules, perhaps by marking it protected and allowing sub-classing. However, the vast majority of users
+        // should be strongly encouraged to use the static factory <code>instance</code> method to get their Lang instances.
+
+        private static Regex WHITESPACE = new Regex("\\s+", RegexOptions.Compiled);
+        private static Regex TOKEN = new Regex("\\+", RegexOptions.Compiled);
+
+        private sealed class LangRule
+        {
+            internal readonly bool acceptOnMatch;
+            internal readonly ISet<string> languages;
+            private readonly Regex pattern;
+
+            internal LangRule(Regex pattern, ISet<string> languages, bool acceptOnMatch)
+            {
+                this.pattern = pattern;
+                this.languages = languages;
+                this.acceptOnMatch = acceptOnMatch;
+            }
+
+            public bool Matches(string txt)
+            {
+                Match matcher = this.pattern.Match(txt);
+                return matcher.Success;
+            }
+        }
+
+        private static readonly IDictionary<NameType, Lang> Langs = new Dictionary<NameType, Lang>();
+
+        private static readonly string LANGUAGE_RULES_RN = "lang.txt";
+
+        static Lang()
+        {
+            foreach (NameType s in Enum.GetValues(typeof(NameType)))
+            {
+                Langs[s] = LoadFromResource(LANGUAGE_RULES_RN, Languages.GetInstance(s));
+            }
+        }
+
+        /// <summary>
+        /// Gets a Lang instance for one of the supported <see cref="NameType"/>s.
+        /// </summary>
+        /// <param name="nameType">The <see cref="NameType"/> to look up.</param>
+        /// <returns>A Lang encapsulating the language guessing rules for that name type.</returns>
+        public static Lang GetInstance(NameType nameType)
+        {
+            Lang result;
+            Langs.TryGetValue(nameType, out result);
+            return result;
+        }
+
+        /// <summary>
+        /// Loads language rules from a resource.
+        /// <para/>
+        /// In normal use, you will obtain instances of Lang through the <see cref="GetInstance(NameType)"/> method.
+        /// You will only need to call this yourself if you are developing custom language mapping rules.
+        /// </summary>
+        /// <param name="languageRulesResourceName">The fully-qualified or partially-qualified resource name to load.</param>
+        /// <param name="languages">The languages that these rules will support.</param>
+        /// <returns>A Lang encapsulating the loaded language-guessing rules.</returns>
+        public static Lang LoadFromResource(string languageRulesResourceName, Languages languages)
+        {
+            IList<LangRule> rules = new List<LangRule>();
+            Stream lRulesIS = typeof(Lang).GetTypeInfo().Assembly.FindAndGetManifestResourceStream(typeof(Lang), languageRulesResourceName);
+
+            if (lRulesIS == null)
+            {
+                throw new InvalidOperationException("Unable to resolve required resource:" + LANGUAGE_RULES_RN);
+            }
+
+            using (TextReader reader = new StreamReader(lRulesIS, ResourceConstants.ENCODING))
+            {
+                bool inExtendedComment = false;
+                string rawLine;
+                while ((rawLine = reader.ReadLine()) != null)
+                {
+                    string line = rawLine;
+                    if (inExtendedComment)
+                    {
+                        // check for closing comment marker, otherwise discard doc comment line
+                        if (line.EndsWith(ResourceConstants.EXT_CMT_END, StringComparison.Ordinal))
+                        {
+                            inExtendedComment = false;
+                        }
+                    }
+                    else
+                    {
+                        if (line.StartsWith(ResourceConstants.EXT_CMT_START, StringComparison.Ordinal))
+                        {
+                            inExtendedComment = true;
+                        }
+                        else
+                        {
+                            // discard comments
+                            int cmtI = line.IndexOf(ResourceConstants.CMT);
+                            if (cmtI >= 0)
+                            {
+                                line = line.Substring(0, cmtI - 0);
+                            }
+
+                            // trim leading-trailing whitespace
+                            line = line.Trim();
+
+                            if (line.Length == 0)
+                            {
+                                continue; // empty lines can be safely skipped
+                            }
+
+                            // split it up
+                            string[] parts = WHITESPACE.Split(line);
+
+                            if (parts.Length != 3)
+                            {
+                                throw new ArgumentException("Malformed line '" + rawLine +
+                                        "' in language resource '" + languageRulesResourceName + "'");
+                            }
+
+                            Regex pattern = new Regex(parts[0], RegexOptions.Compiled);
+                            string[] langs = TOKEN.Split(parts[1]);
+                            bool accept = parts[2].Equals("true");
+
+                            rules.Add(new LangRule(pattern, new HashSet<string>(langs), accept));
+                        }
+                    }
+                }
+            }
+            return new Lang(rules, languages);
+        }
+
+        private readonly Languages languages;
+        private readonly IList<LangRule> rules;
+
+        private Lang(IList<LangRule> rules, Languages languages)
+        {
+            this.rules = Collections.UnmodifiableList(rules);
+            this.languages = languages;
+        }
+
+        /// <summary>
+        /// Guesses the language of a word.
+        /// </summary>
+        /// <param name="text">The word.</param>
+        /// <returns>The language that the word originates from or <see cref="Languages.ANY"/> if there was no unique match.</returns>
+        public virtual string GuessLanguage(string text)
+        {
+            LanguageSet ls = GuessLanguages(text);
+            return ls.IsSingleton ? ls.GetAny() : Languages.ANY;
+        }
+
+        /// <summary>
+        /// Guesses the languages of a word.
+        /// </summary>
+        /// <param name="input">The word.</param>
+        /// <returns>A Set of Strings of language names that are potential matches for the input word.</returns>
+        public virtual LanguageSet GuessLanguages(string input)
+        {
+            string text = input.ToLowerInvariant();
+
+            ISet<string> langs = new HashSet<string>(this.languages.GetLanguages());
+            foreach (LangRule rule in this.rules)
+            {
+                if (rule.Matches(text))
+                {
+                    if (rule.acceptOnMatch)
+                    {
+                        List<string> toRemove = new List<string>();
+                        foreach (var item in langs)
+                        {
+                            if (!rule.languages.Contains(item))
+                            {
+                                toRemove.Add(item);
+                            }
+                        }
+                        foreach (var item in toRemove)
+                        {
+                            langs.Remove(item);
+                        }
+                    }
+                    else
+                    {
+                        foreach (var item in rule.languages)
+                        {
+                            langs.Remove(item);
+                        }
+                    }
+                }
+            }
+
+            LanguageSet ls = LanguageSet.From(langs);
+            return ls.Equals(Languages.NO_LANGUAGES) ? Languages.ANY_LANGUAGE : ls;
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/Languages.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/Languages.cs b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/Languages.cs
new file mode 100644
index 0000000..0b058ba
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/Languages.cs
@@ -0,0 +1,324 @@
+// commons-codec version compatibility level: 1.9
+using Lucene.Net.Support;
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+using System.Reflection;
+
+namespace Lucene.Net.Analysis.Phonetic.Language.Bm
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Language codes.
+    /// </summary>
+    /// <remarks>
+    /// Language codes are typically loaded from resource files. These are UTF-8 encoded text files. They are
+    /// systematically named following the pattern:
+    /// <c>Lucene.Net.Analysis.Phonetic.Language.Bm.<see cref="NameType"/>_languages.txt</c>
+    /// <para/>
+    /// The format of these resources is the following:
+    /// <list type="bullet">
+    ///     <item>
+    ///         <term>Language:</term>
+    ///         <description>A single string containing no whitespace.</description>
+    ///     </item>
+    ///     <item>
+    ///         <term>End-of-line comments:</term>
+    ///         <description>Any occurrence of '//' will cause all text following on that line to be discarded as a comment.</description>
+    ///     </item>
+    ///     <item>
+    ///         <term>Multi-line comments:</term>
+    ///         <description>Any line starting with '/*' will start multi-line commenting mode. This will skip all content until a line ending in '*' and '/' is found.</description>
+    ///     </item>
+    ///     <item>
+    ///         <term>Blank lines:</term>
+    ///         <description>All blank lines will be skipped.</description>
+    ///     </item>
+    /// </list>
+    /// <para/>
+    /// Ported from language.php
+    /// <para/>
+    /// This class is immutable and thread-safe.
+    /// <para/>
+    /// since 1.6
+    /// </remarks>
+    public class Languages
+    {
+        // Implementation note: This class is divided into two sections. The first part is a static factory interface that
+        // exposes org/apache/commons/codec/language/bm/%s_languages.txt for %s in NameType.* as a list of supported
+        // languages, and a second part that provides instance methods for accessing this set fo supported languages.
+
+        public static readonly string ANY = "any";
+
+        private static readonly IDictionary<NameType, Languages> LANGUAGES = new Dictionary<NameType, Languages>();
+
+        static Languages()
+        {
+            foreach (NameType s in Enum.GetValues(typeof(NameType)))
+            {
+                LANGUAGES[s] = GetInstance(LangResourceName(s));
+            }
+        }
+
+        public static Languages GetInstance(NameType nameType)
+        {
+            Languages result;
+            LANGUAGES.TryGetValue(nameType, out result);
+            return result;
+        }
+
+        public static Languages GetInstance(string languagesResourceName)
+        {
+            // read languages list
+            ISet<string> ls = new HashSet<string>();
+            Stream langIS = typeof(Languages).GetTypeInfo().Assembly.FindAndGetManifestResourceStream(typeof(Languages), languagesResourceName);
+
+            if (langIS == null)
+            {
+                throw new ArgumentException("Unable to resolve required resource: " + languagesResourceName);
+            }
+
+            using (TextReader reader = new StreamReader(langIS, ResourceConstants.ENCODING))
+            {
+                bool inExtendedComment = false;
+                string rawLine;
+                while ((rawLine = reader.ReadLine()) != null)
+                {
+                    string line = rawLine.Trim();
+                    if (inExtendedComment)
+                    {
+                        if (line.EndsWith(ResourceConstants.EXT_CMT_END, StringComparison.Ordinal))
+                        {
+                            inExtendedComment = false;
+                        }
+                    }
+                    else
+                    {
+                        if (line.StartsWith(ResourceConstants.EXT_CMT_START, StringComparison.Ordinal))
+                        {
+                            inExtendedComment = true;
+                        }
+                        else if (line.Length > 0)
+                        {
+                            ls.Add(line);
+                        }
+                    }
+                }
+            }
+
+            return new Languages(Collections.UnmodifiableSet(ls));
+        }
+
+        private static string LangResourceName(NameType nameType)
+        {
+            return string.Format("{0}_languages.txt", nameType.GetName()); 
+        }
+
+        private readonly ISet<string> languages;
+
+        private class NoLanguagesLanguageSet : LanguageSet
+        {
+            public override bool Contains(string language)
+            {
+                return false;
+            }
+
+            public override string GetAny()
+            {
+                throw new InvalidOperationException("Can't fetch any language from the empty language set.");
+            }
+
+            public override bool IsEmpty
+            {
+                get
+                {
+                    return true;
+                }
+            }
+
+            public override bool IsSingleton
+            {
+                get
+                {
+                    return false;
+                }
+            }
+
+            public override LanguageSet RestrictTo(LanguageSet other)
+            {
+                return this;
+            }
+
+            public override string ToString()
+            {
+                return "NO_LANGUAGES";
+            }
+        }
+
+        /// <summary>
+        /// No languages at all.
+        /// </summary>
+        public static readonly LanguageSet NO_LANGUAGES = new NoLanguagesLanguageSet();
+
+        private class AnyLanguageLanguageSet : LanguageSet
+        {
+            public override bool Contains(string language)
+            {
+                return true;
+            }
+
+            public override string GetAny()
+            {
+                throw new InvalidOperationException("Can't fetch any language from the any language set.");
+            }
+
+            public override bool IsEmpty
+            {
+                get
+                {
+                    return false;
+                }
+            }
+
+            public override bool IsSingleton
+            {
+                get
+                {
+                    return false;
+                }
+            }
+
+            public override LanguageSet RestrictTo(LanguageSet other)
+            {
+                return other;
+            }
+
+            public override string ToString()
+            {
+                return "ANY_LANGUAGE";
+            }
+        }
+
+        /// <summary>
+        /// Any/all languages.
+        /// </summary>
+        public static readonly LanguageSet ANY_LANGUAGE = new AnyLanguageLanguageSet();
+
+        private Languages(ISet<string> languages)
+        {
+            this.languages = languages;
+        }
+
+        public virtual ISet<string> GetLanguages() // LUCENENET NOTE: Kept as GetLanguages() because of naming conflict
+        {
+            return this.languages;
+        }
+    }
+
+    /// <summary>
+    /// A set of languages.
+    /// </summary>
+    public abstract class LanguageSet
+    {
+
+        public static LanguageSet From(ISet<string> langs)
+        {
+            return langs.Count == 0 ? Languages.NO_LANGUAGES : new SomeLanguages(langs);
+        }
+
+        public abstract bool Contains(string language);
+
+        public abstract string GetAny();
+
+        public abstract bool IsEmpty { get; }
+
+        public abstract bool IsSingleton { get; }
+
+        public abstract LanguageSet RestrictTo(LanguageSet other);
+    }
+
+    /// <summary>
+    /// Some languages, explicitly enumerated.
+    /// </summary>
+    public sealed class SomeLanguages : LanguageSet
+    {
+        private readonly ISet<string> languages;
+
+        internal SomeLanguages(ISet<string> languages)
+        {
+            this.languages = Collections.UnmodifiableSet(languages);
+        }
+
+        public override bool Contains(string language)
+        {
+            return this.languages.Contains(language);
+        }
+
+        public override string GetAny()
+        {
+            return this.languages.FirstOrDefault();
+        }
+
+        public ISet<string> GetLanguages()
+        {
+            return this.languages;
+        }
+
+        public override bool IsEmpty
+        {
+            get { return this.languages.Count == 0; }
+        }
+
+        public override bool IsSingleton
+        {
+            get { return this.languages.Count == 1; }
+        }
+
+        public override LanguageSet RestrictTo(LanguageSet other)
+        {
+            if (other == Languages.NO_LANGUAGES)
+            {
+                return other;
+            }
+            else if (other == Languages.ANY_LANGUAGE)
+            {
+                return this;
+            }
+            else
+            {
+                SomeLanguages sl = (SomeLanguages)other;
+                ISet<string> ls = new HashSet<string>(/*Math.Min(languages.Count, sl.languages.Count)*/);
+                foreach (string lang in languages)
+                {
+                    if (sl.languages.Contains(lang))
+                    {
+                        ls.Add(lang);
+                    }
+                }
+                return From(ls);
+            }
+        }
+
+        public override string ToString()
+        {
+            return "Languages(" + languages.ToString() + ")";
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/NameType.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/NameType.cs b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/NameType.cs
new file mode 100644
index 0000000..1b8641c
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/NameType.cs
@@ -0,0 +1,69 @@
+// commons-codec version compatibility level: 1.9
+using System;
+
+namespace Lucene.Net.Analysis.Phonetic.Language.Bm
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Supported types of names. Unless you are matching particular family names, use <see cref="GENERIC"/>. The
+    /// <c>GENERIC</c> NameType should work reasonably well for non-name words. The other encodings are
+    /// specifically tuned to family names, and may not work well at all for general text.
+    /// <para/>
+    /// since 1.6
+    /// </summary>
+    public enum NameType
+    {
+        /// <summary>
+        /// Ashkenazi family names
+        /// </summary>
+        ASHKENAZI,
+
+        /// <summary>
+        /// Generic names and words
+        /// </summary>
+        GENERIC,
+
+        /// <summary>
+        /// Sephardic family names
+        /// </summary>
+        SEPHARDIC
+    }
+
+    public static class NameTypeExtensions
+    {
+        /// <summary>
+        /// Gets the short version of the name type.
+        /// </summary>
+        /// <param name="nameType">the <see cref="NameType"/></param>
+        /// <returns> the <see cref="NameType"/> short string</returns>
+        public static string GetName(this NameType nameType)
+        {
+            switch (nameType)
+            {
+                case NameType.ASHKENAZI:
+                    return "ash";
+                case NameType.GENERIC:
+                    return "gen";
+                case NameType.SEPHARDIC:
+                    return "sep";
+            }
+            throw new ArgumentException("Invalid nameType.");
+        }
+    }
+}