You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by ni...@apache.org on 2017/06/27 20:33:55 UTC

[10/15] lucenenet git commit: Added Lucene.Net.Analysis.Phonetic + tests. Rather than porting over the entire commons-codec library, only the language features were ported and added to this library.

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_dutch.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_dutch.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_dutch.txt
new file mode 100644
index 0000000..2a69a96
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_dutch.txt
@@ -0,0 +1,78 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// CONSONANTS
+"ssj" "" "" "S"
+"sj" "" "" "S"
+"ch" "" "" "x"
+"c" "" "[eiy]" "ts"   
+"ck" "" "" "k"     // German
+"pf" "" "" "(pf|p|f)" // German
+"ph" "" "" "(ph|f)"
+"qu" "" "" "kv"
+"th" "^" "" "t" // German
+"th" "" "[äöüaeiou]" "(t|th)" // German
+"th" "" "" "t" // German
+"ss" "" "" "s"
+"h" "[aeiouy]" "" ""
+
+// VOWELS
+"aue" "" "" "aue" 
+"ou" "" "" "au" 
+"ie" "" "" "(Q|i)" 
+"uu" "" "" "(Q|u)"   
+"ee" "" "" "e"   
+"eu" "" "" "(Y|Yj)" // Dutch Y  
+"aa" "" "" "a"   
+"oo" "" "" "o"   
+"oe" "" "" "u"   
+"ij" "" "" "ej"
+"ui" "" "" "(Y|uj)"
+"ei" "" "" "(ej|aj)" // Dutch ej
+
+"i" "" "[aou]" "j"
+"y" "" "[aeou]" "j"
+"i" "[aou]" "" "j"
+"y" "[aeou]" "" "j"
+
+// LATIN ALPHABET     
+"a" "" "" "a"
+"b" "" "" "b"
+"c" "" "" "k"
+"d" "" "" "d"
+"e" "" "" "e"
+"f" "" "" "f"
+"g" "" "" "(g|x)"
+"h" "" "" "h"
+"i" "" "" "(i|Q)"   
+"j" "" "" "j"
+"k" "" "" "k"
+"l" "" "" "l"
+"m" "" "" "m"
+"n" "" "" "n"
+"o" "" "" "o"
+"p" "" "" "p"
+"q" "" "" "k"
+"r" "" "" "r"
+"s" "" "" "s"
+"t" "" "" "t"
+"u" "" "" "(u|Q)"   
+"v" "" "" "v"
+"w" "" "" "(w|v)"
+"x" "" "" "ks"
+"y" "" "" "i"
+"z" "" "" "z"

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_english.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_english.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_english.txt
new file mode 100644
index 0000000..db9ccec
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_english.txt
@@ -0,0 +1,113 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// GENERAL
+
+// CONSONANTS
+"�" "" "" "" // O�Neill
+"'" "" "" "" // O�Neill
+"mc" "^" "" "mak" // McDonald
+"tz" "" "" "ts" // Fitzgerald
+"tch" "" "" "tS"
+"ch" "" "" "(tS|x)"
+"ck" "" "" "k"
+"cc" "" "[iey]" "ks" // success, accent
+"c" "" "c" ""
+"c" "" "[iey]" "s" // circle 
+
+"gh" "^" "" "g" // ghost
+"gh" "" "" "(g|f|w)" // burgh | tough | bough
+"gn" "" "" "(gn|n)"
+"g" "" "[iey]" "(g|dZ)" // get, gem, giant, gigabyte
+// "th" "" "" "(6|8|t)"
+"th" "" "" "t"
+"kh" "" "" "x"
+"ph" "" "" "f"
+"sch" "" "" "(S|sk)"
+"sh" "" "" "S"
+"who" "^" "" "hu"
+"wh" "^" "" "w"
+
+"h" "" "$" "" // hard to find an example that isn't in a name
+"h" "" "[^aeiou]" "" // hard to find an example that isn't in a name
+"h" "^" "" "H"
+
+"kn" "^" "" "n" // knight
+"mb" "" "$" "m"
+"ng" "" "$" "(N|ng)"
+"pn" "^" "" "(pn|n)"
+"ps" "^" "" "(ps|s)"
+"qu" "" "" "kw"
+"tia" "" "" "(So|Sa)"
+"tio" "" "" "So"
+"wr" "^" "" "r"
+"x" "^" "" "z"
+
+// VOWELS
+"y" "^" "" "j"
+"y" "^" "[aeiouy]" "j"
+"yi" "^" "" "i"
+"aue" "" "" "aue" 
+"oue" "" "" "(aue|oue)" 
+"ai" "" "" "(aj|ej|e)" // rain | said
+"ay" "" "" "(aj|ej)" 
+"a" "" "[^aeiou]e" "ej" // plane 
+"ei" "" "" "(ej|aj|i)" // weigh | receive
+"ey" "" "" "(ej|aj|i)" // hey | barley
+"ear" "" "" "ia" // tear
+"ea" "" "" "(i|e)" // reason | treasure
+"ee" "" "" "i" // between
+"e" "" "[^aeiou]e" "i" // meter
+"e" "" "$" "(|E)" // blame, badge
+"ie" "" "" "i" // believe
+"i" "" "[^aeiou]e" "aj" // five
+"oa" "" "" "ou" // toad
+"oi" "" "" "oj" // join
+"oo" "" "" "u" // food
+"ou" "" "" "(u|ou)" // through | tough | could
+"oy" "" "" "oj" // boy
+"o" "" "[^aeiou]e" "ou" // rode
+"u" "" "[^aeiou]e" "(ju|u)" // cute | flute
+"u" "" "r" "(e|u)" // turn -- Morse disagrees, feels it should go to E
+
+// LATIN ALPHABET
+"a" "" "" "(e|o|a)" // hat | call | part
+"b" "" "" "b"
+"c" "" "" "k" // candy
+"d" "" "" "d"
+"e" "" "" "E" // bed
+"f" "" "" "f"
+"g" "" "" "g" 
+"h" "" "" "h"    
+"i" "" "" "I" 
+"j" "" "" "dZ"
+"k" "" "" "k"
+"l" "" "" "l"
+"m" "" "" "m"
+"n" "" "" "n"
+"o" "" "" "(o|a)" // hot 
+"p" "" "" "p"
+"q" "" "" "k"
+"r" "" "" "r"
+"s" "" "" "s"
+"t" "" "" "t"
+"u" "" "" "(u|a)" // put
+"v" "" "" "v"
+"w" "" "" "(w|v)" // the variant "v" is for spellings coming from German/Polish
+"x" "" "" "ks"
+"y" "" "" "i"
+"z" "" "" "z"

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_french.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_french.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_french.txt
new file mode 100644
index 0000000..e67a0ec
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_french.txt
@@ -0,0 +1,114 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// GENERAL
+
+// CONSONANTS
+"lt" "u" "$" "(lt|)" // Renault
+"c" "n" "$" "(k|)" // Tronc
+//"f" "" "" "(f|)" // Clef
+"d" "" "$" "(t|)" // Durand
+"g" "n" "$" "(k|)" // Gang
+"p" "" "$" "(p|)" // Trop, Champ
+"r" "e" "$" "(r|)" // Barbier
+"t" "" "$" "(t|)" // Murat, Constant
+"z" "" "$" "(s|)" 
+
+"ds" "" "$" "(ds|)" 
+"ps" "" "$" "(ps|)" // Champs
+"rs" "e" "$" "(rs|)" 
+"ts" "" "$" "(ts|)" 
+"s" "" "$" "(s|)" // Denis
+
+"x" "u" "$" "(ks|)" // Arnoux
+
+"s" "[aeéèêiou]" "[^aeéèêiou]" "(s|)" // Deschamps, Malesherbes, Groslot
+"t" "[aeéèêiou]" "[^aeéèêiou]" "(t|)" // Petitjean
+
+"kh" "" "" "x" // foreign
+"ph" "" "" "f"
+
+"ç" "" "" "s"
+"x" "" "" "ks"
+"ch" "" "" "S"
+"c" "" "[eiyéèê]" "s"
+
+"gn" "" "" "(n|gn)"
+"g" "" "[eiy]" "Z" 
+"gue" "" "$" "k"     
+"gu" "" "[eiy]" "g" 
+"aill" "" "e" "aj" // non Jewish
+"ll" "" "e" "(l|j)" // non Jewish
+"que" "" "$" "k"
+"qu" "" "" "k"
+"s" "[aeiouyéèê]" "[aeiouyéèê]" "z"
+"h" "[bdgt]" "" "" // translit from Arabic
+
+"m" "[aeiouy]" "[aeiouy]" "m"  
+"m" "[aeiouy]" "" "(m|n)"  // nasal
+
+"ou" "" "[aeio]" "v" 
+"u" "" "[aeio]" "v" 
+
+// VOWELS
+"aue" "" "" "aue" 
+"eau" "" "" "o" 
+"au" "" "" "(o|au)" // non Jewish
+"ai" "" "" "(e|aj)" // [e] is non Jewish
+"ay" "" "" "(e|aj)" // [e] is non Jewish
+"é" "" "" "e"
+"ê" "" "" "e"
+"è" "" "" "e"
+"à" "" "" "a"
+"â" "" "" "a"
+"où" "" "" "u"
+"ou" "" "" "u"
+"oi" "" "" "(oj|va)" // [va] (actually "ua") is non Jewish
+"ei" "" "" "(aj|ej|e)" // [e] is non Jewish
+"ey" "" "" "(aj|ej|e)" // [e] non Jewish
+"eu" "" "" "(ej|Y)" // non Jewish
+"y" "[ou]" "" "j"
+"e" "" "$" "(e|)"
+"i" "" "[aou]" "j"
+"y" "" "[aoeu]" "j"
+
+// LATIN ALPHABET      
+"a" "" "" "a"
+"b" "" "" "b"
+"c" "" "" "k"
+"d" "" "" "d"
+"e" "" "" "e" 
+"f" "" "" "f"
+"g" "" "" "g"
+"h" "" "" "h"
+"i" "" "" "i" 
+"j" "" "" "Z"
+"k" "" "" "k"
+"l" "" "" "l"
+"m" "" "" "m"
+"n" "" "" "n"
+"o" "" "" "o"
+"p" "" "" "p"
+"q" "" "" "k"
+"r" "" "" "r"
+"s" "" "" "s"
+"t" "" "" "t"
+"u" "" "" "(u|Q)"
+"v" "" "" "v"
+"w" "" "" "v"
+"y" "" "" "i"
+"z" "" "" "z"

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_german.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_german.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_german.txt
new file mode 100644
index 0000000..1e79c35
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_german.txt
@@ -0,0 +1,129 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// GENERIC
+
+// CONSONANTS
+"ewitsch" "" "$" "evitS"
+"owitsch" "" "$" "ovitS"
+"evitsch" "" "$" "evitS"
+"ovitsch" "" "$" "ovitS"
+"witsch" "" "$" "vitS"
+"vitsch" "" "$" "vitS"
+"ssch" "" "" "S"
+"chsch" "" "" "xS"
+"sch" "" "" "S"
+
+"ziu" "" "" "tsu"
+"zia" "" "" "tsa"
+"zio" "" "" "tso"
+
+"chs" "" "" "ks"
+"ch" "" "" "x"
+"ck" "" "" "k"
+"c" "" "[eiy]" "ts"
+
+"sp" "^" "" "Sp"
+"st" "^" "" "St"
+"ssp" "" "" "(Sp|sp)"
+"sp" "" "" "(Sp|sp)"
+"sst" "" "" "(St|st)"
+"st" "" "" "(St|st)"
+"pf" "" "" "(pf|p|f)"
+"ph" "" "" "(ph|f)"
+"qu" "" "" "kv"
+
+"ewitz" "" "$" "(evits|evitS)"
+"ewiz" "" "$" "(evits|evitS)"
+"evitz" "" "$" "(evits|evitS)"
+"eviz" "" "$" "(evits|evitS)"
+"owitz" "" "$" "(ovits|ovitS)"
+"owiz" "" "$" "(ovits|ovitS)"
+"ovitz" "" "$" "(ovits|ovitS)"
+"oviz" "" "$" "(ovits|ovitS)"
+"witz" "" "$" "(vits|vitS)"
+"wiz" "" "$" "(vits|vitS)"
+"vitz" "" "$" "(vits|vitS)"
+"viz" "" "$" "(vits|vitS)"
+"tz" "" "" "ts"
+
+"thal" "" "$" "tal"
+"th" "^" "" "t"
+"th" "" "[äöüaeiou]" "(t|th)"
+"th" "" "" "t"
+"rh" "^" "" "r"
+"h" "[aeiouyäöü]" "" ""
+"h" "^" "" "H"
+
+"ss" "" "" "s"
+"s" "" "[äöüaeiouy]" "(z|s)"
+"s" "[aeiouyäöüj]" "[aeiouyäöü]" "z"
+"ß" "" "" "s"
+
+
+// VOWELS
+"ij" "" "$" "i"
+"aue" "" "" "aue"
+"ue" "" "" "Q"
+"ae" "" "" "Y"
+"oe" "" "" "Y"
+"ü" "" "" "Q"
+"ä" "" "" "Y"
+"ö" "" "" "Y"
+"ei" "" "" "(aj|ej)"
+"ey" "" "" "(aj|ej)"
+"eu" "" "" "(Yj|ej|aj|oj)"
+"i" "[aou]" "" "j"
+"y" "[aou]" "" "j"
+"ie" "" "" "I"
+"i" "" "[aou]" "j"
+"y" "" "[aoeu]" "j"
+
+// FOREIGN LETTERs
+"ñ" "" "" "n"
+"ã" "" "" "a"
+"ő" "" "" "o"
+"ű" "" "" "u"
+"ç" "" "" "s"
+
+// LATIN ALPHABET
+"a" "" "" "A"
+"b" "" "" "b"
+"c" "" "" "k"
+"d" "" "" "d"
+"e" "" "" "E"
+"f" "" "" "f"
+"g" "" "" "g"
+"h" "" "" "h"
+"i" "" "" "I"
+"j" "" "" "j"
+"k" "" "" "k"
+"l" "" "" "l"
+"m" "" "" "m"
+"n" "" "" "n"
+"o" "" "" "O"
+"p" "" "" "p"
+"q" "" "" "k"
+"r" "" "" "r"
+"s" "" "" "s"
+"t" "" "" "t"
+"u" "" "" "U"
+"v" "" "" "(f|v)"
+"w" "" "" "v"
+"x" "" "" "ks"
+"y" "" "" "i"
+"z" "" "" "ts"

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_greek.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_greek.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_greek.txt
new file mode 100644
index 0000000..f396a65
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_greek.txt
@@ -0,0 +1,97 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+"αυ" "" "$" "af"  // "av" before vowels and voiced consonants, "af" elsewhere
+"αυ" "" "(κ|π|σ|τ|φ|θ|χ|ψ)" "af" 
+"αυ" "" "" "av" 
+"ευ" "" "$" "ef" // "ev" before vowels and voiced consonants, "ef" elsewhere
+"ευ" "" "(κ|π|σ|τ|φ|θ|χ|ψ)" "ef" 
+"ευ" "" "" "ev" 
+"ηυ" "" "$" "if" // "iv" before vowels and voiced consonants, "if" elsewhere
+"ηυ" "" "(κ|π|σ|τ|φ|θ|χ|ψ)" "if" 
+"ηυ" "" "" "iv" 
+"ου" "" "" "u"  // [u:]
+
+"αι" "" "" "aj"  // modern [e]
+"ει" "" "" "ej" // modern [i]
+"οι" "" "" "oj" // modern [i]
+"ωι" "" "" "oj" 
+"ηι" "" "" "ej" 
+"υι" "" "" "i" // modern Greek "i"
+
+"γγ" "(ε|ι|η|α|ο|ω|υ)" "(ε|ι|η)" "(nj|j)"
+"γγ" "" "(ε|ι|η)" "j"
+"γγ" "(ε|ι|η|α|ο|ω|υ)" "" "(ng|g)"
+"γγ" "" "" "g" 
+"γκ" "^" "" "g"
+"γκ" "(ε|ι|η|α|ο|ω|υ)" "(ε|ι|η)" "(nj|j)"
+"γκ" "" "(ε|ι|η)" "j"
+"γκ" "(ε|ι|η|α|ο|ω|υ)" "" "(ng|g)"
+"γκ" "" "" "g" 
+"γι" "" "(α|ο|ω|υ)" "j"
+"γι" "" "" "(gi|i)"
+"γε" "" "(α|ο|ω|υ)" "j"
+"γε" "" "" "(ge|je)"
+
+"κζ" "" "" "gz"
+"τζ" "" "" "dz"
+"σ" "" "(β|γ|δ|μ|ν|ρ)" "z"
+
+"μβ" "" "" "(mb|b)"
+"μπ" "^" "" "b"
+"μπ" "(ε|ι|η|α|ο|ω|υ)" "" "mb"
+"μπ" "" "" "b" // after any consonant
+"ντ" "^" "" "d"
+"ντ" "(ε|ι|η|α|ο|ω|υ)" "" "(nd|nt)" // Greek is "nd" 
+"ντ" "" "" "(nt|d)" // Greek is "d" after any consonant
+
+"ά" "" "" "a"
+"έ" "" "" "e"
+"ή" "" "" "(i|e)" 
+"ί" "" "" "i"   
+"ό" "" "" "o"
+"ύ" "" "" "(Q|i|u)"
+"ώ" "" "" "o"
+"ΰ" "" "" "(Q|i|u)"
+"ϋ" "" "" "(Q|i|u)"
+"ϊ" "" "" "j"
+
+"α" "" "" "a"
+"β" "" "" "(v|b)" // modern "v", old "b"
+"γ" "" "" "g" 
+"δ" "" "" "d"    // modern like "th" in English "them", old "d"
+"ε" "" "" "e"
+"ζ" "" "" "z"
+"η" "" "" "(i|e)" // modern "i", old "e:"
+"ι" "" "" "i"
+"κ" "" "" "k"
+"λ" "" "" "l"
+"μ" "" "" "m"
+"ν" "" "" "n"
+"ξ" "" "" "ks"
+"ο" "" "" "o"
+"π" "" "" "p"
+"ρ" "" "" "r"
+"σ" "" "" "s"
+"ς" "" "" "s"
+"τ" "" "" "t" 
+"υ" "" "" "(Q|i|u)" // modern "i", old like German "ü"
+"φ" "" "" "f" 
+"θ" "" "" "t" // old greek like "th" in English "theme"
+"χ" "" "" "x"
+"ψ" "" "" "ps"
+"ω" "" "" "o"

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_greeklatin.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_greeklatin.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_greeklatin.txt
new file mode 100644
index 0000000..43ec3f5
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_greeklatin.txt
@@ -0,0 +1,118 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+"au" "" "$" "af"
+"au" "" "[kpstfh]" "af"
+"au" "" "" "av"
+"eu" "" "$" "ef"
+"eu" "" "[kpstfh]" "ef"
+"eu" "" "" "ev"
+"ou" "" "" "u"
+
+"gge" "[aeiouy]" "" "(nje|je)" // aggelopoulos
+"ggi" "[aeiouy]" "[aou]" "(nj|j)" 
+"ggi" "[aeiouy]" "" "(ni|i)" 
+"gge" "" "" "je"
+"ggi" "" "" "i"
+"gg" "[aeiouy]" "" "(ng|g)"
+"gg" "" "" "g" 
+"gk" "^" "" "g"
+"gke" "[aeiouy]" "" "(nje|je)"
+"gki" "[aeiouy]" "" "(ni|i)"
+"gke" "" "" "je"
+"gki" "" "" "i"
+"gk" "[aeiouy]" "" "(ng|g)"
+"gk" "" "" "g" 
+"nghi" "" "[aouy]" "Nj"
+"nghi" "" "" "(Ngi|Ni)" 
+"nghe" "" "[aouy]" "Nj"
+"nghe" "" "" "(Nje|Nge)" 
+"ghi" "" "[aouy]" "j"
+"ghi" "" "" "(gi|i)" 
+"ghe" "" "[aouy]" "j"
+"ghe" "" "" "(je|ge)" 
+"ngh" "" "" "Ng"
+"gh" "" "" "g"
+"ngi" "" "[aouy]" "Nj" 
+"ngi" "" "" "(Ngi|Ni)" 
+"nge" "" "[aouy]" "Nj" 
+"nge" "" "" "(Nje|Nge)" 
+"gi" "" "[aouy]" "j" 
+"gi" "" "" "(gi|i)" // what about Pantazis = Pantagis ???
+"ge" "" "[aouy]" "j" 
+"ge" "" "" "(je|ge)" 
+"ng" "" "" "Ng" // fragakis = fraggakis = frangakis; angel = agel = aggel 
+
+"i" "" "[aeou]" "j"
+"i" "[aeou]" "" "j"  
+"y" "" "[aeou]" "j"
+"y" "[aeou]" "" "j"  
+"yi" "" "[aeou]" "j"
+"yi" "" "" "i"
+
+"ch" "" "" "x"
+"kh" "" "" "x"
+"dh" "" "" "d"  // actually as "th" in English "that"
+"dj" "" "" "dZ" // Turkish words
+"ph" "" "" "f"
+"th" "" "" "t"
+"kz" "" "" "gz"
+"tz" "" "" "dz" 
+"s" "" "[bgdmnr]" "z"
+
+"mb" "" "" "(mb|b)" // Liberis = Limperis = Limberis
+"mp" "^" "" "b"
+"mp" "[aeiouy]" "" "mp"
+"mp" "" "" "b"
+"nt" "^" "" "d"
+"nt" "[aeiouy]" "" "(nd|nt)" // Greek "nd"
+"nt" "" "" "(nt|d)" // Greek "d" after any consonant
+
+"á" "" "" "a"  
+"é" "" "" "e"  
+"í" "" "" "i"  
+"ó" "" "" "o"  
+"óu" "" "" "u"  
+"ú" "" "" "u" 
+"ý" "" "" "(i|Q|u)" // [ü]
+
+"a" "" "" "a"
+"b" "" "" "(b|v)" // beta: modern "v", old "b"
+"c" "" "" "k"
+"d" "" "" "d"    // modern like "th" in English "them", old "d"
+"e" "" "" "e"
+"f" "" "" "f" 
+"g" "" "" "g" 
+"h" "" "" "x"
+"i" "" "" "i"
+"j" "" "" "(j|Z)" // Panajotti = Panaiotti; Louijos = Louizos; Pantajis = Pantazis = Pantagis
+"k" "" "" "k"
+"l" "" "" "l"
+"m" "" "" "m"
+"n" "" "" "n"
+"ο" "" "" "o"
+"p" "" "" "p"
+"q" "" "" "k" // foreign
+"r" "" "" "r"
+"s" "" "" "s"
+"t" "" "" "t" 
+"u" "" "" "u" 
+"v" "" "" "v" 
+"w" "" "" "v" // foreign
+"x" "" "" "ks"
+"y" "" "" "(i|Q|u)" // [ü] 
+"z" "" "" "z"

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_hebrew.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_hebrew.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_hebrew.txt
new file mode 100644
index 0000000..7e039d5
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_hebrew.txt
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// General = Ashkenazic
+
+"אי" "" "" "i"
+"עי" "" "" "i"
+"עו" "" "" "VV"
+"או" "" "" "VV"
+
+"ג׳" "" "" "Z"
+"ד׳" "" "" "dZ"
+
+"א" "" "" "L"
+"ב" "" "" "b"
+"ג" "" "" "g"
+"ד" "" "" "d"
+
+"ה" "^" "" "1"
+"ה" "" "$" "1"
+"ה" "" "" ""
+
+"וו" "" "" "V"
+"וי" "" "" "WW"
+"ו" "" "" "W"
+"ז" "" "" "z"
+"ח" "" "" "X"
+"ט" "" "" "T"
+"יי" "" "" "i"
+"י" "" "" "i"
+"ך" "" "" "X"
+"כ" "^" "" "K"
+"כ" "" "" "k"
+"ל" "" "" "l"
+"ם" "" "" "m"
+"מ" "" "" "m"
+"ן" "" "" "n"
+"נ" "" "" "n"
+"ס" "" "" "s"
+"ע" "" "" "L"
+"ף" "" "" "f"
+"פ" "" "" "f"
+"ץ" "" "" "C"
+"צ" "" "" "C"
+"ק" "" "" "K"
+"ר" "" "" "r"
+"ש" "" "" "s"
+"ת" "" "" "TB" // only Ashkenazic

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_hungarian.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_hungarian.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_hungarian.txt
new file mode 100644
index 0000000..615d26a
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_hungarian.txt
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// GENERAL
+
+// CONSONANTS
+"sz" "" "" "s"
+"zs" "" "" "Z"
+"cs" "" "" "tS"
+
+"ay" "" "" "(oj|aj)"
+"ai" "" "" "(oj|aj)"
+"aj" "" "" "(oj|aj)"
+
+"ei" "" "" "(aj|ej)" // German element
+"ey" "" "" "(aj|ej)" // German element
+
+"y" "[áo]" "" "j"
+"i" "[áo]" "" "j"
+"ee" "" "" "(ej|e)" 
+"ely" "" "" "(ej|eli)"
+"ly" "" "" "(j|li)"
+"gy" "" "[aeouáéóúüöőű]" "dj"
+"gy" "" "" "(d|gi)"
+"ny" "" "[aeouáéóúüöőű]" "nj"
+"ny" "" "" "(n|ni)"
+"ty" "" "[aeouáéóúüöőű]" "tj"
+"ty" "" "" "(t|ti)"
+"qu" "" "" "(ku|kv)"
+"h" "" "$" ""
+
+// SPECIAL VOWELS
+"á" "" "" "a"
+"é" "" "" "e"
+"í" "" "" "i"
+"ó" "" "" "o"
+"ú" "" "" "u"
+"ö" "" "" "Y"
+"ő" "" "" "Y" 
+"ü" "" "" "Q"
+"ű" "" "" "Q"
+
+// LATIN ALPHABET      
+"a" "" "" "a"
+"b" "" "" "b"
+"c" "" "" "ts" 
+"d" "" "" "d"
+"e" "" "" "E"
+"f" "" "" "f"
+"g" "" "" "g" 
+"h" "" "" "h"
+"i" "" "" "I"
+"j" "" "" "j"
+"k" "" "" "k"
+"l" "" "" "l"
+"m" "" "" "m"
+"n" "" "" "n"
+"o" "" "" "o"
+"p" "" "" "p"
+"q" "" "" "k"
+"r" "" "" "r"
+"s" "" "" "(S|s)" 
+"t" "" "" "t"
+"u" "" "" "u"
+"v" "" "" "v" 
+"w" "" "" "v" 
+"x" "" "" "ks"
+"y" "" "" "i" 
+"z" "" "" "z"

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_italian.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_italian.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_italian.txt
new file mode 100644
index 0000000..8775edd
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_italian.txt
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+"kh" "" "" "x" // foreign
+
+"gli" "" "" "(l|gli)"
+"gn" "" "[aeou]" "(n|nj|gn)"
+"gni" "" "" "(ni|gni)"
+
+"gi" "" "[aeou]" "dZ"
+"gg" "" "[ei]" "dZ"
+"g" "" "[ei]" "dZ"
+"h" "[bdgt]" "" "g" // gh is It; others from Arabic translit
+"h" "" "$" "" // foreign
+
+"ci" "" "[aeou]" "tS"
+"ch" "" "[ei]" "k"
+"sc" "" "[ei]" "S" 
+"cc" "" "[ei]" "tS"
+"c" "" "[ei]" "tS"
+"s" "[aeiou]" "[aeiou]" "z"
+
+"i" "[aeou]" "" "j"
+"i" "" "[aeou]" "j"
+"y" "[aeou]" "" "j" // foreign
+"y" "" "[aeou]" "j" // foreign
+
+"qu" "" "" "k"    
+"uo" "" "" "(vo|o)"
+"u" "" "[aei]" "v" 
+
+"�" "" "" "e" 
+"�" "" "" "e" 
+"�" "" "" "o"  
+"�" "" "" "o" 
+
+// LATIN ALPHABET    
+"a" "" "" "a"
+"b" "" "" "b"
+"c" "" "" "k"
+"d" "" "" "d"
+"e" "" "" "e"
+"f" "" "" "f"
+"g" "" "" "g"
+"h" "" "" "h"
+"i" "" "" "i"
+"j" "" "" "(Z|dZ|j)" // foreign
+"k" "" "" "k"
+"l" "" "" "l"
+"m" "" "" "m"
+"n" "" "" "n"
+"o" "" "" "o"
+"p" "" "" "p"
+"q" "" "" "k"    
+"r" "" "" "r"
+"s" "" "" "s"
+"t" "" "" "t"
+"u" "" "" "u"
+"v" "" "" "v"
+"w" "" "" "v"    // foreign
+"x" "" "" "ks"    // foreign
+"y" "" "" "i"    // foreign
+"z" "" "" "(ts|dz)"

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_polish.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_polish.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_polish.txt
new file mode 100644
index 0000000..dd72f6a
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_polish.txt
@@ -0,0 +1,185 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// GENERIC
+
+// CONVERTING FEMININE TO MASCULINE
+"ska" "" "$" "ski"   
+"cka" "" "$" "tski"   
+"lowa" "" "$" "(lova|lof|l|el)"   
+"kowa" "" "$" "(kova|kof|k|ek)"   
+"owa" "" "$" "(ova|of|)"  
+"lowna" "" "$" "(lovna|levna|l|el)" 
+"kowna" "" "$" "(kovna|k|ek)"  
+"owna" "" "$" "(ovna|)"   
+"lówna" "" "$" "(l|el)"   
+"kówna" "" "$" "(k|ek)"   
+"ówna" "" "$" ""   
+"a" "" "$" "(a|i)"   
+
+// CONSONANTS
+"czy" "" "" "tSi"
+"cze" "" "[bcdgkpstwzż]" "(tSe|tSF)"
+"ciewicz" "" "" "(tsevitS|tSevitS)"
+"siewicz" "" "" "(sevitS|SevitS)"
+"ziewicz" "" "" "(zevitS|ZevitS)"
+"riewicz" "" "" "rjevitS" 
+"diewicz" "" "" "djevitS" 
+"tiewicz" "" "" "tjevitS" 
+"iewicz" "" "" "evitS"
+"ewicz" "" "" "evitS"
+"owicz" "" "" "ovitS"
+"icz" "" "" "itS"
+"cz" "" "" "tS"
+"ch" "" "" "x"
+
+"cia" "" "[bcdgkpstwzż]" "(tSB|tsB)"
+"cia" "" "" "(tSa|tsa)" 
+"cią" "" "[bp]" "(tSom|tsom)"
+"cią" "" "" "(tSon|tson)"
+"cię" "" "[bp]" "(tSem|tsem)"
+"cię" "" "" "(tSen|tsen)"
+"cie" "" "[bcdgkpstwzż]" "(tSF|tsF)" 
+"cie" "" "" "(tSe|tse)" 
+"cio" "" "" "(tSo|tso)" 
+"ciu" "" "" "(tSu|tsu)" 
+"ci" "" "" "(tSi|tsI)"
+"ć" "" "" "(tS|ts)"
+
+"ssz" "" "" "S"
+"sz" "" "" "S"
+"sia" "" "[bcdgkpstwzż]" "(SB|sB|sja)" 
+"sia" "" "" "(Sa|sja)" 
+"sią" "" "[bp]" "(Som|som)"
+"sią" "" "" "(Son|son)"
+"się" "" "[bp]" "(Sem|sem)"
+"się" "" "" "(Sen|sen)"
+"sie" "" "[bcdgkpstwzż]" "(SF|sF|se)" 
+"sie" "" "" "(Se|se)" 
+"sio" "" "" "(So|so)" 
+"siu" "" "" "(Su|sju)" 
+"si" "" "" "(Si|sI)"
+"ś" "" "" "(S|s)"
+
+"zia" "" "[bcdgkpstwzż]" "(ZB|zB|zja)" 
+"zia" "" "" "(Za|zja)" 
+"zią" "" "[bp]" "(Zom|zom)"
+"zią" "" "" "(Zon|zon)"
+"zię" "" "[bp]" "(Zem|zem)"
+"zię" "" "" "(Zen|zen)"
+"zie" "" "[bcdgkpstwzż]" "(ZF|zF)" 
+"zie" "" "" "(Ze|ze)" 
+"zio" "" "" "(Zo|zo)" 
+"ziu" "" "" "(Zu|zju)" 
+"zi" "" "" "(Zi|zI)"
+
+"że" "" "[bcdgkpstwzż]" "(Ze|ZF)"
+"że" "" "[bcdgkpstwzż]" "(Ze|ZF|ze|zF)"
+"że" "" "" "Ze"
+"źe" "" "" "(Ze|ze)"
+"ży" "" "" "Zi"
+"źi" "" "" "(Zi|zi)"
+"ż" "" "" "Z"
+"ź" "" "" "(Z|z)"
+
+"rze" "t" "" "(Se|re)"
+"rze" "" "" "(Ze|re|rZe)"
+"rzy" "t" "" "(Si|ri)"
+"rzy" "" "" "(Zi|ri|rZi)"
+"rz" "t" "" "(S|r)"
+"rz" "" "" "(Z|r|rZ)"
+
+"lio" "" "" "(lo|le)"
+"ł" "" "" "l"
+"ń" "" "" "n"
+"qu" "" "" "k"
+"s" "" "s" "" 
+
+// VOWELS   
+"ó" "" "" "(u|o)"
+"ą" "" "[bp]" "om"
+"ę" "" "[bp]" "em"
+"ą" "" "" "on"
+"ę" "" "" "en"
+
+"ije" "" "" "je"
+"yje" "" "" "je"
+"iie" "" "" "je"
+"yie" "" "" "je"
+"iye" "" "" "je"
+"yye" "" "" "je"
+
+"ij" "" "[aou]" "j"
+"yj" "" "[aou]" "j"
+"ii" "" "[aou]" "j"
+"yi" "" "[aou]" "j"
+"iy" "" "[aou]" "j"
+"yy" "" "[aou]" "j"
+
+"rie" "" "" "rje" 
+"die" "" "" "dje" 
+"tie" "" "" "tje" 
+"ie" "" "[bcdgkpstwzż]" "F" 
+"ie" "" "" "e"
+
+"aue" "" "" "aue"
+"au" "" "" "au"
+
+"ei" "" "" "aj"
+"ey" "" "" "aj"
+"ej" "" "" "aj"
+
+"ai" "" "" "aj"
+"ay" "" "" "aj"
+"aj" "" "" "aj"
+
+"i" "[aeou]" "" "j" 
+"y" "[aeou]" "" "j" 
+"i" "" "[aou]" "j"
+"y" "" "[aeou]" "j"
+
+"a" "" "[bcdgkpstwzż]" "B" 
+"e" "" "[bcdgkpstwzż]" "(E|F)" 
+"o" "" "[bcćdgklłmnńrsśtwzźż]" "P" 
+
+// LATIN ALPHABET
+"a" "" "" "a"
+"b" "" "" "b"
+"c" "" "" "ts"
+"d" "" "" "d"
+"e" "" "" "E"
+"f" "" "" "f"
+"g" "" "" "g"
+"h" "" "" "(h|x)"
+"i" "" "" "I"
+"j" "" "" "j"
+"k" "" "" "k"
+"l" "" "" "l"
+"m" "" "" "m"
+"n" "" "" "n"
+"o" "" "" "o"
+"p" "" "" "p"
+"q" "" "" "k"
+"r" "" "" "r"
+"s" "" "" "s"
+"t" "" "" "t"
+"u" "" "" "u"
+"v" "" "" "v"
+"w" "" "" "v"
+"x" "" "" "ks"
+"y" "" "" "I"
+"z" "" "" "z"

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_portuguese.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_portuguese.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_portuguese.txt
new file mode 100644
index 0000000..74de1d7
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_portuguese.txt
@@ -0,0 +1,105 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+"kh" "" "" "x" // foreign
+"ch" "" "" "S"
+"ss" "" "" "s"
+"sc" "" "[ei]" "s"
+"sç" "" "[aou]" "s"
+"ç" "" "" "s"
+"c" "" "[ei]" "s"
+//  "c" "" "[aou]" "(k|C)"
+
+"s" "^" "" "s"
+"s" "[aáuiíoóeéêy]" "[aáuiíoóeéêy]" "z"
+"s" "" "[dglmnrv]" "(Z|S)" // Z is Brazil
+
+"z" "" "$" "(Z|s|S)" // s and S in Brazil
+"z" "" "[bdgv]" "(Z|z)" // Z in Brazil
+"z" "" "[ptckf]" "(s|S|z)" // s and S in Brazil
+
+"gu" "" "[eiu]" "g"    
+"gu" "" "[ao]" "gv"    
+"g" "" "[ei]" "Z"
+"qu" "" "[eiu]" "k"    
+"qu" "" "[ao]" "kv"    
+
+"uo" "" "" "(vo|o|u)"
+"u" "" "[aei]" "v" 
+
+"lh" "" "" "l"
+"nh" "" "" "nj"
+"h" "[bdgt]" "" "" // translit. from Arabic
+"h" "" "$" "" // foreign
+
+"ex" "" "[aáuiíoóeéêy]" "(ez|eS|eks)" // ez in Brazil
+"ex" "" "[cs]" "e" 
+
+"y" "[aáuiíoóeéê]" "" "j"
+"y" "" "[aeiíou]" "j"
+"m" "" "[bcdfglnprstv]" "(m|n)" // maybe to add a rule for m/n before a consonant that disappears [preceding vowel becomes nasalized]
+"m" "" "$" "(m|n)" // maybe to add a rule for final m/n that disappears [preceding vowel becomes nasalized]
+
+"ão" "" "" "(au|an|on)"
+"ãe" "" "" "(aj|an)"
+"ãi" "" "" "(aj|an)"
+"õe" "" "" "(oj|on)"
+"i" "[aáuoóeéê]" "" "j"
+"i" "" "[aeou]" "j"
+
+"â" "" "" "a"
+"à" "" "" "a"
+"á" "" "" "a"
+"ã" "" "" "(a|an|on)"
+"é" "" "" "e"
+"ê" "" "" "e"
+"í" "" "" "i"
+"ô" "" "" "o"
+"ó" "" "" "o"
+"õ" "" "" "(o|on)"
+"ú" "" "" "u"
+"ü" "" "" "u"
+
+"aue" "" "" "aue"
+
+// LATIN ALPHABET
+"a" "" "" "a"
+"b" "" "" "b"
+"c" "" "" "k"
+"d" "" "" "d"
+"e" "" "" "(e|i)"
+"f" "" "" "f"
+"g" "" "" "g"
+"h" "" "" "h"
+"i" "" "" "i"
+"j" "" "" "Z" 
+"k" "" "" "k"
+"l" "" "" "l"
+"m" "" "" "m"
+"n" "" "" "n"
+"o" "" "" "(o|u)"
+"p" "" "" "p"
+"q" "" "" "k"    
+"r" "" "" "r"
+"s" "" "" "S"
+"t" "" "" "t"
+"u" "" "" "u"
+"v" "" "" "v"
+"w" "" "" "v"    
+"x" "" "" "(S|ks)"   
+"y" "" "" "i"   
+"z" "" "" "z"

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_romanian.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_romanian.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_romanian.txt
new file mode 100644
index 0000000..a6d0aac
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_romanian.txt
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+"ce" "" "" "tSe"
+"ci" "" "" "(tSi|tS)"
+"ch" "" "[ei]" "k"
+"ch" "" "" "x" // foreign
+
+"gi" "" "" "(dZi|dZ)"
+"g" "" "[ei]" "dZ"
+"gh" "" "" "g"
+
+"i" "[aeou]" "" "j"
+"i" "" "[aeou]" "j"
+"ţ" "" "" "ts"
+"ş" "" "" "S"
+"qu" "" "" "k"    
+
+"î" "" "" "i"
+"ea" "" "" "ja"
+"ă" "" "" "(e|a)"
+"aue" "" "" "aue"
+
+// LATIN ALPHABET
+"a" "" "" "a"
+"b" "" "" "b"
+"c" "" "" "k"
+"d" "" "" "d"
+"e" "" "" "E"
+"f" "" "" "f"
+"g" "" "" "g"
+"h" "" "" "(x|h)"
+"i" "" "" "I"
+"j" "" "" "Z"
+"k" "" "" "k"
+"l" "" "" "l"
+"m" "" "" "m"
+"n" "" "" "n"
+"o" "" "" "o"
+"p" "" "" "p"
+"q" "" "" "k"    
+"r" "" "" "r"
+"s" "" "" "s"
+"t" "" "" "t"
+"u" "" "" "u"
+"v" "" "" "v"
+"w" "" "" "v"    
+"x" "" "" "ks"    
+"y" "" "" "i"    
+"z" "" "" "z"

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_russian.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_russian.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_russian.txt
new file mode 100644
index 0000000..310be84
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_russian.txt
@@ -0,0 +1,142 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//GENERAL// CONVERTING FEMININE TO MASCULINE
+"yna" "" "$" "(in|ina)" 
+"ina" "" "$" "(in|ina)" 
+"liova" "" "$" "(lof|lef)" 
+"lova" "" "$" "(lof|lef|lova)" 
+"ova" "" "$" "(of|ova)" 
+"eva" "" "$" "(ef|ova)" 
+"aia" "" "$" "(aja|i)" 
+"aja" "" "$" "(aja|i)" 
+"aya" "" "$" "(aja|i)" 
+
+//SPECIAL CONSONANTS
+"tsya" "" "" "tsa" 
+"tsyu" "" "" "tsu" 
+"tsia" "" "" "tsa" 
+"tsie" "" "" "tse" 
+"tsio" "" "" "tso"   
+"tsye" "" "" "tse" 
+"tsyo" "" "" "tso" 
+"tsiu" "" "" "tsu" 
+"sie" "" "" "se" 
+"sio" "" "" "so"   
+"zie" "" "" "ze" 
+"zio" "" "" "zo"   
+"sye" "" "" "se" 
+"syo" "" "" "so"   
+"zye" "" "" "ze" 
+"zyo" "" "" "zo"   
+
+"ger" "" "$" "ger" 
+"gen" "" "$" "gen" 
+"gin" "" "$" "gin" 
+"gg" "" "" "g" 
+"g" "[jaeoiuy]" "[aeoiu]" "g" 
+"g" "" "[aeoiu]" "(g|h)" 
+
+"kh" "" "" "x"
+"ch" "" "" "(tS|x)" 
+"sch" "" "" "(StS|S)"
+"ssh" "" "" "S"
+"sh" "" "" "S"
+"zh" "" "" "Z" 
+"tz" "" "$" "ts" 
+"tz" "" "" "(ts|tz)" 
+"c" "" "[iey]" "s" 
+"qu" "" "" "(kv|k)" 
+"s" "" "s" ""
+
+//SPECIAL VOWELS
+"lya" "" "" "la" 
+"lyu" "" "" "lu"  
+"lia" "" "" "la" // not in DJSRE
+"liu" "" "" "lu"  // not in DJSRE
+"lja" "" "" "la" // not in DJSRE
+"lju" "" "" "lu"  // not in DJSRE
+"le" "" "" "(lo|lE)" //not in DJSRE
+"lyo" "" "" "(lo|le)" //not in DJSRE
+"lio" "" "" "(lo|le)" 
+
+"ije" "" "" "je"
+"ie" "" "" "je"
+"iye" "" "" "je"
+"iie" "" "" "je"
+"yje" "" "" "je"
+"ye" "" "" "je"
+"yye" "" "" "je"
+"yie" "" "" "je"
+
+"ij" "" "[aou]" "j"
+"iy" "" "[aou]" "j"
+"ii" "" "[aou]" "j"
+"yj" "" "[aou]" "j"
+"yy" "" "[aou]" "j"
+"yi" "" "[aou]" "j"
+
+"io" "" "" "(jo|e)" 
+"i" "" "[au]" "j" 
+"i" "[aeou]" "" "j" 
+"yo" "" "" "(jo|e)" 
+"y" "" "[au]" "j"
+"y" "[aeiou]" "" "j" 
+
+"ii" "" "$" "i" 
+"iy" "" "$" "i" 
+"yy" "" "$" "i" 
+"yi" "" "$" "i" 
+"yj" "" "$" "i"
+"ij" "" "$" "i"
+
+"e" "^" "" "(je|E)" 
+"ee" "" "" "(aje|i)" 
+"e" "[aou]" "" "je" 
+"oo" "" "" "(oo|u)" 
+"'" "" "" "" 
+"\"" "" "" ""
+
+"aue" "" "" "aue"
+
+// LATIN ALPHABET 
+"a" "" "" "a"
+"b" "" "" "b"
+"c" "" "" "k" 
+"d" "" "" "d"
+"e" "" "" "E"
+"f" "" "" "f"
+"g" "" "" "g" 
+"h" "" "" "h" 
+"i" "" "" "I"
+"j" "" "" "j" 
+"k" "" "" "k"
+"l" "" "" "l"
+"m" "" "" "m"
+"n" "" "" "n"
+"o" "" "" "o"
+"p" "" "" "p"
+"q" "" "" "k" 
+"r" "" "" "r"
+"s" "" "" "s"
+"t" "" "" "t"
+"u" "" "" "u"
+"v" "" "" "v"
+"w" "" "" "v" 
+"x" "" "" "ks" 
+"y" "" "" "I"
+"z" "" "" "z"

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_spanish.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_spanish.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_spanish.txt
new file mode 100644
index 0000000..3ba2695
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_spanish.txt
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// GENERAL
+
+// Includes both Spanish (Castillian) & Catalan
+
+// CONSONANTS
+"ñ" "" "" "(n|nj)"
+"ny" "" "" "nj" // Catalan
+"ç" "" "" "s" // Catalan
+
+"ig" "[aeiou]" "" "(tS|ig)" // tS is Catalan
+"ix" "[aeiou]" "" "S" // Catalan
+"tx" "" "" "tS" // Catalan
+"tj" "" "$" "tS" // Catalan
+"tj" "" "" "dZ" // Catalan
+"tg" "" "" "(tg|dZ)" // dZ is Catalan
+"ch" "" "" "(tS|dZ)" // dZ is typical for Argentina
+"bh" "" "" "b" // translit. from Arabic
+"h" "[dgt]" "" "" // translit. from Arabic
+"h" "" "$" "" // foreign
+//"ll" "" "" "(l|Z)" // Z is typical for Argentina, only Ashkenazic
+"m" "" "[bpvf]" "(m|n)"
+"c" "" "[ei]" "s" 
+//  "c" "" "[aou]" "(k|C)"
+"gu" "" "[ei]" "(g|gv)" // "gv" because "u" can actually be "ü"
+"g" "" "[ei]" "(x|g|dZ)"  // "g" only for foreign words; dZ is Catalan
+"qu" "" "" "k"
+
+"uo" "" "" "(vo|o)"    
+"u" "" "[aei]" "v"
+
+// SPECIAL VOWELS
+"ü" "" "" "v"
+"á" "" "" "a"
+"é" "" "" "e"
+"í" "" "" "i"
+"ó" "" "" "o"
+"ú" "" "" "u"
+"à" "" "" "a"  // Catalan
+"è" "" "" "e" // Catalan
+"ò" "" "" "o"  // Catalan
+
+// LATIN ALPHABET      
+"a" "" "" "a"
+"b" "" "" "B"
+"c" "" "" "k"
+"d" "" "" "d"
+"e" "" "" "e"
+"f" "" "" "f"
+"g" "" "" "g" 
+"h" "" "" "h"
+"i" "" "" "i"
+"j" "" "" "(x|Z)" // Z is Catalan
+"k" "" "" "k"
+"l" "" "" "l"
+"m" "" "" "m"
+"n" "" "" "n"
+"o" "" "" "o"
+"p" "" "" "p"
+"q" "" "" "k"
+"r" "" "" "r"
+"s" "" "" "s"
+"t" "" "" "t"
+"u" "" "" "u"
+"v" "" "" "V"
+"w" "" "" "v" // foreign words
+"x" "" "" "(ks|gz|S)" // ks is Spanish, all are Catalan
+"y" "" "" "(i|j)"
+"z" "" "" "(z|s)" // as "c" befoire "e" or "i", in Spain it is like unvoiced English "th"

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_turkish.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_turkish.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_turkish.txt
new file mode 100644
index 0000000..c639a13
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/gen_rules_turkish.txt
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+"ç" "" "" "tS"
+"ğ" "" "" "" // to show that previous vowel is long
+"ş" "" "" "S"
+"ü" "" "" "Q"
+"ö" "" "" "Y"
+"ı" "" "" "(e|i|)" // as "e" in English "label"
+
+"a" "" "" "a"
+"b" "" "" "b"
+"c" "" "" "dZ"
+"d" "" "" "d"
+"e" "" "" "e"
+"f" "" "" "f"
+"g" "" "" "g"
+"h" "" "" "h"
+"i" "" "" "i"
+"j" "" "" "Z"
+"k" "" "" "k"
+"l" "" "" "l"
+"m" "" "" "m"
+"n" "" "" "n"
+"o" "" "" "o"
+"p" "" "" "p"
+"q" "" "" "k" // foreign words
+"r" "" "" "r"
+"s" "" "" "s"
+"t" "" "" "t"
+"u" "" "" "u"
+"v" "" "" "v"
+"w" "" "" "v" // foreign words
+"x" "" "" "ks" // foreign words
+"y" "" "" "j"
+"z" "" "" "z" 

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/lang.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/lang.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/lang.txt
new file mode 100644
index 0000000..99742b1
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/lang.txt
@@ -0,0 +1,293 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// 1. following are rules to accept the language
+// 1.1 Special letter combinations
+^o’ english true
+^o' english true
+^mc english true
+^fitz english true
+ceau french+romanian true
+eau$ french true // mp: I've added this
+eaux$ french true // mp: I've added this
+ault$ french true
+oult$ french true
+eux$ french true
+eix$ french true
+glou$ greeklatin true
+uu dutch true
+tx spanish true
+witz german true
+tz$ german+russian+english true
+^tz russian+english true
+poulos$ greeklatin true
+pulos$ greeklatin true
+iou greeklatin true
+sj$ dutch true
+^sj dutch true
+güe spanish true
+güi spanish true
+ghe romanian+greeklatin true
+ghi romanian+greeklatin true
+escu$ romanian true
+esco$ romanian true
+vici$ romanian true
+schi$ romanian true
+ii$ russian true
+iy$ russian true
+yy$ russian true
+yi$ russian true
+^rz polish true
+rz$ polish+german true
+[bcdfgklmnpstwz]rz polish true
+rz[bcdfghklmnpstw] polish true
+etti$ italian true
+eti$ italian true
+ati$ italian true
+ato$ italian true
+[aoei]no$ italian true
+[aoei]ni$ italian true
+esi$ italian true
+oli$ italian true
+field$ english true
+cki$ polish true
+ska$ polish true
+cka$ polish true
+ae german+russian+english true
+oe german+french+russian+english+dutch true
+th$ german+english true
+^th german+english+greeklatin true
+mann german true
+cz polish true
+cy polish+greeklatin true
+niew polish true
+stein german true
+heim$ german true
+heimer$ german true
+thal german true
+zweig german true
+[aeou]h german true
+äh german true
+öh german true
+üh german true
+[ln]h[ao]$ portuguese true
+[ln]h[aou] portuguese+french+german+dutch+czech+spanish+turkish true
+chsch german true
+tsch german true
+sch$ german+russian true
+^sch german+russian true
+ck$ german+english true
+c$ polish+romanian+hungarian+czech+turkish true
+sz polish+hungarian true
+cs$ hungarian true
+^cs hungarian true
+dzs hungarian true
+zs$ hungarian true
+^zs hungarian true
+^wl polish true
+^wr polish+english+german+dutch true
+
+gy$ hungarian true
+gy[aeou] hungarian true
+gy hungarian+russian+french+greeklatin true
+guy french true
+gu[ei] spanish+french+portuguese true
+gu[ao] spanish+portuguese true
+gi[aou] italian+greeklatin true
+
+ly hungarian+russian+polish+greeklatin true
+ny hungarian+russian+polish+spanish+greeklatin true
+ty hungarian+russian+polish+greeklatin true
+
+// 1.2 special characters
+ć polish true
+ç french+spanish+portuguese+turkish true
+č czech true
+ď czech true
+ğ turkish true
+ł polish true
+ń polish true
+ñ spanish true
+ň czech true
+ř czech true
+ś polish true
+ş romanian+turkish true
+š czech true
+ţ romanian true
+ť czech true
+ź polish true
+ż polish true
+
+ß german true
+
+ä german true
+á hungarian+spanish+portuguese+czech+greeklatin true
+â romanian+french+portuguese true
+ă romanian true
+ą polish true
+à portuguese true
+ã portuguese true
+ę polish true
+é french+hungarian+czech+greeklatin true
+è french+spanish+italian true
+ê french true
+ě czech true
+ê french+portuguese true
+í hungarian+spanish+portuguese+czech+greeklatin true
+î romanian+french true
+ı turkish true
+ó polish+hungarian+spanish+italian+portuguese+czech+greeklatin true
+ö german+hungarian+turkish true
+ô french+portuguese true
+õ portuguese+hungarian true
+ò italian+spanish true
+ű hungarian true
+ú hungarian+spanish+portuguese+czech+greeklatin true
+ü german+hungarian+spanish+portuguese+turkish true
+ù french true
+ů czech true
+ý czech+greeklatin true
+
+// Every Cyrillic word has at least one Cyrillic vowel (аёеоиуыэюя)
+а cyrillic true
+ё cyrillic true
+о cyrillic true
+е cyrillic true
+и cyrillic true
+у cyrillic true
+ы cyrillic true
+э cyrillic true
+ю cyrillic true
+я cyrillic true
+
+// Every Greek word has at least one Greek vowel
+α greek true
+ε greek true
+η greek true
+ι greek true
+ο greek true
+υ greek true
+ω greek true
+
+// Arabic (only initial)
+ا arabic true // alif (isol + init)   
+ب arabic true // ba' 
+ت arabic true // ta' 
+ث arabic true // tha'
+ج arabic true // jim
+ح arabic true // h.a' 
+خ' arabic true // kha' 
+د arabic true // dal (isol + init)
+ذ arabic true // dhal (isol + init)
+ر arabic true // ra' (isol + init)
+ز arabic true // za' (isol + init)
+س arabic true // sin 
+ش arabic true // shin 
+ص arabic true // s.ad 
+ض arabic true // d.ad 
+ط arabic true // t.a' 
+ظ arabic true // z.a' 
+ع arabic true // 'ayn
+غ arabic true // ghayn 
+ف arabic true // fa' 
+ق arabic true // qaf 
+ك arabic true // kaf  
+ل arabic true // lam 
+م arabic true // mim 
+ن arabic true // nun 
+ه arabic true // ha' 
+و arabic true // waw (isol + init)
+ي arabic true // ya' 
+    
+آ arabic true // alif madda  
+إ arabic true // alif + diacritic  
+أ arabic true // alif + hamza
+ؤ arabic true //  waw + hamza
+ئ arabic true //  ya' + hamza
+
+
+// Hebrew
+א hebrew true
+ב hebrew true
+ג hebrew true
+ד hebrew true
+ה hebrew true
+ו hebrew true
+ז hebrew true
+ח hebrew true
+ט hebrew true
+י hebrew true
+כ hebrew true
+ל hebrew true
+מ hebrew true
+נ hebrew true
+ס hebrew true
+ע hebrew true
+פ hebrew true
+צ hebrew true
+ק hebrew true
+ר hebrew true
+ש hebrew true
+ת hebrew true
+
+// 2. following are rules to reject the language
+
+// Every Latin character word has at least one Latin vowel
+a cyrillic+hebrew+greek+arabic false
+o cyrillic+hebrew+greek+arabic false
+e cyrillic+hebrew+greek+arabic false
+i cyrillic+hebrew+greek+arabic false
+y cyrillic+hebrew+greek+arabic+romanian+dutch false
+u cyrillic+hebrew+greek+arabic false
+
+j italian false
+j[^aoeiuy] french+spanish+portuguese+greeklatin false
+g czech false
+k romanian+spanish+portuguese+french+italian false
+q hungarian+polish+russian+romanian+czech+dutch+turkish+greeklatin false
+v polish false
+w french+romanian+spanish+hungarian+russian+czech+turkish+greeklatin false
+x czech+hungarian+dutch+turkish false // polish excluded from the list
+
+dj spanish+turkish false
+v[^aoeiu] german false // in german, "v" can be found before a vowel only
+y[^aoeiu] german false  // in german, "y" usually appears only in the last position; sometimes before a vowel
+c[^aohk] german false
+dzi german+english+french+turkish false
+ou german false
+a[eiou] turkish false // no diphthongs in Turkish
+ö[eaio] turkish false
+ü[eaio] turkish false
+e[aiou] turkish false
+i[aeou] turkish false
+o[aieu] turkish false
+u[aieo] turkish false
+aj german+english+french+dutch false
+ej german+english+french+dutch false
+oj german+english+french+dutch false
+uj german+english+french+dutch false
+eu russian+polish false
+ky polish false
+kie french+spanish+greeklatin false
+gie portuguese+romanian+spanish+greeklatin false
+ch[aou] italian false
+ch turkish false
+son$ german false
+sc[ei] french false
+sch hungarian+polish+french+spanish false
+^h russian false
+etti$ greeklatin false

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_approx_any.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_approx_any.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_approx_any.txt
new file mode 100644
index 0000000..390419e
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_approx_any.txt
@@ -0,0 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// SEPHARDIC
+
+"E" "" "" ""
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_approx_common.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_approx_common.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_approx_common.txt
new file mode 100644
index 0000000..e744d32
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_approx_common.txt
@@ -0,0 +1,115 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include sep_exact_approx_common
+
+"bens" "^" "" "(binz|s)" 
+"benS" "^" "" "(binz|s)" 
+"ben" "^" "" "(bin|)" 
+
+"abens" "^" "" "(abinz|binz|s)" 
+"abenS" "^" "" "(abinz|binz|s)" 
+"aben" "^" "" "(abin|bin|)"
+
+"els" "^" "" "(ilz|alz|s)" 
+"elS" "^" "" "(ilz|alz|s)" 
+"el" "^" "" "(il|al|)" 
+"als" "^" "" "(alz|s)" 
+"alS" "^" "" "(alz|s)" 
+"al" "^" "" "(al|)" 
+
+//"dels" "^" "" "(dilz|s)" 
+//"delS" "^" "" "(dilz|s)" 
+"del" "^" "" "(dil|)" 
+"dela" "^" "" "(dila|)" 
+//"delo" "^" "" "(dila|)" 
+"da" "^" "" "(da|)" 
+"de" "^" "" "(di|)" 
+//"des" "^" "" "(dis|dAs|)" 
+//"di" "^" "" "(di|)" 
+//"dos" "^" "" "(das|dus|)" 
+
+"oa" "" "" "(va|a|D)"
+"oe" "" "" "(vi|D)"
+"ae" "" "" "D"
+
+/// "s" "" "$" "(s|)" // Attia(s)
+/// "C" "" "" "s"  // "c" could actually be "�"
+
+"n" "" "[bp]" "m"
+
+"h" "" "" "(|h|f)" // sound "h" (absent) can be expressed via /x/, Cojab in Spanish = Kohab ; Hakim = Fakim
+"x" "" "" "h"
+
+// DIPHTHONGS ARE APPROXIMATELY equivalent
+"aja" "^" "" "(Da|ia)"                         
+"aje" "^" "" "(Di|Da|i|ia)"                         
+"aji" "^" "" "(Di|i)"                         
+"ajo" "^" "" "(Du|Da|iu|ia)"                         
+"aju" "^" "" "(Du|iu)"                         
+
+"aj" "" "" "D"                         
+"ej" "" "" "D"                         
+"oj" "" "" "D"                         
+"uj" "" "" "D"                         
+"au" "" "" "D"                         
+"eu" "" "" "D"                         
+"ou" "" "" "D"                         
+
+"a" "^" "" "(a|)"  // Arabic
+
+"ja" "^" "" "ia"                         
+"je" "^" "" "i"                         
+"jo" "^" "" "(iu|ia)"                         
+"ju" "^" "" "iu"                         
+
+"ja" "" "" "a"                         
+"je" "" "" "i"                         
+"ji" "" "" "i"                         
+"jo" "" "" "u"                         
+"ju" "" "" "u"                         
+
+"j" "" "" "i"                         
+
+// CONSONANTS {z & Z & dZ; s & S} are approximately interchangeable
+"s" "" "[rmnl]" "z"
+"S" "" "[rmnl]" "z"
+"s" "[rmnl]" "" "z"
+"S" "[rmnl]" "" "z" 
+
+"dS" "" "$" "S"
+"dZ" "" "$" "S"
+"Z" "" "$" "S"
+"S" "" "$" "(S|s)"
+"z" "" "$" "(S|s)"
+
+"S" "" "" "s"
+"dZ" "" "" "z"
+"Z" "" "" "z"
+
+"i" "" "$" "(i|)" // often in Arabic
+"e" "" "" "i"
+
+"o" "" "$" "(a|u)"
+"o" "" "" "u"
+
+// special character to deal correctly in Hebrew match
+"B" "" "" "b" 
+"V" "" "" "v" 
+
+// Arabic
+"p" "^" "" "b"

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_approx_french.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_approx_french.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_approx_french.txt
new file mode 100644
index 0000000..0990004
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_approx_french.txt
@@ -0,0 +1,18 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// empty
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_approx_hebrew.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_approx_hebrew.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_approx_hebrew.txt
new file mode 100644
index 0000000..0990004
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_approx_hebrew.txt
@@ -0,0 +1,18 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// empty
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_approx_italian.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_approx_italian.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_approx_italian.txt
new file mode 100644
index 0000000..58fe459
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_approx_italian.txt
@@ -0,0 +1,18 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include sep_approx_french
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_approx_portuguese.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_approx_portuguese.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_approx_portuguese.txt
new file mode 100644
index 0000000..4bca846
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_approx_portuguese.txt
@@ -0,0 +1,18 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include sep_approx_french

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_approx_spanish.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_approx_spanish.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_approx_spanish.txt
new file mode 100644
index 0000000..4bca846
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_approx_spanish.txt
@@ -0,0 +1,18 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include sep_approx_french

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_exact_any.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_exact_any.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_exact_any.txt
new file mode 100644
index 0000000..d4bf51e
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_exact_any.txt
@@ -0,0 +1,18 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+"E" "" "" "e"
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_exact_approx_common.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_exact_approx_common.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_exact_approx_common.txt
new file mode 100644
index 0000000..1f4e864
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_exact_approx_common.txt
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Sephardic
+
+"h" "" "$" ""
+
+// VOICED - UNVOICED CONSONANTS
+"b" "" "[fktSs]" "p"
+"b" "" "p" ""
+"b" "" "$" "p"
+"p" "" "[vgdZz]" "b"
+"p" "" "b" ""
+
+"v" "" "[pktSs]" "f"
+"v" "" "f" ""
+"v" "" "$" "f"
+"f" "" "[vbgdZz]" "v"
+"f" "" "v" ""
+
+"g" "" "[pftSs]" "k"
+"g" "" "k" ""
+"g" "" "$" "k"
+"k" "" "[vbdZz]" "g"
+"k" "" "g" ""
+
+"d" "" "[pfkSs]" "t"
+"d" "" "t" ""
+"d" "" "$" "t"
+"t" "" "[vbgZz]" "d"
+"t" "" "d" ""
+
+"s" "" "dZ" ""
+"s" "" "tS" ""
+
+"z" "" "[pfkSt]" "s"
+"z" "" "[sSzZ]" ""
+"s" "" "[sSzZ]" ""
+"Z" "" "[sSzZ]" ""
+"S" "" "[sSzZ]" ""
+
+// SIMPLIFICATION OF CONSONANT CLUSTERS
+"nm" "" "" "m"
+
+// DOUBLE --> SINGLE
+"ji" "^" "" "i"
+
+"a" "" "a" ""
+"b" "" "b" ""
+"d" "" "d" ""
+"e" "" "e" ""
+"f" "" "f" ""
+"g" "" "g" ""
+"i" "" "i" ""
+"k" "" "k" ""
+"l" "" "l" ""
+"m" "" "m" ""
+"n" "" "n" ""
+"o" "" "o" ""
+"p" "" "p" ""
+"r" "" "r" ""
+"t" "" "t" ""
+"u" "" "u" ""
+"v" "" "v" ""
+"z" "" "z" ""

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_exact_common.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_exact_common.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_exact_common.txt
new file mode 100644
index 0000000..b97c589
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_exact_common.txt
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include sep_exact_approx_common
+
+"h" "" "" ""
+//"C" "" "" "k"  // c that can actually be �
+
+// VOICED - UNVOICED CONSONANTS
+"s" "[^t]" "[bgZd]" "z"
+"Z" "" "[pfkst]" "S"
+"Z" "" "$" "S"
+"S" "" "[bgzd]" "Z"
+"z" "" "$" "s"
+
+//special character to deal correctly in Hebrew match
+"B" "" "" "b"
+"V" "" "" "v"

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_exact_french.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_exact_french.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_exact_french.txt
new file mode 100644
index 0000000..ea75dc4
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_exact_french.txt
@@ -0,0 +1,18 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Sephadic
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_exact_hebrew.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_exact_hebrew.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_exact_hebrew.txt
new file mode 100644
index 0000000..0990004
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_exact_hebrew.txt
@@ -0,0 +1,18 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// empty
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_exact_italian.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_exact_italian.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_exact_italian.txt
new file mode 100644
index 0000000..0990004
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_exact_italian.txt
@@ -0,0 +1,18 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// empty
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_exact_portuguese.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_exact_portuguese.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_exact_portuguese.txt
new file mode 100644
index 0000000..0990004
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_exact_portuguese.txt
@@ -0,0 +1,18 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// empty
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_exact_spanish.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_exact_spanish.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_exact_spanish.txt
new file mode 100644
index 0000000..0990004
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_exact_spanish.txt
@@ -0,0 +1,18 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// empty
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_hebrew_common.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_hebrew_common.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_hebrew_common.txt
new file mode 100644
index 0000000..00357f9
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_hebrew_common.txt
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include sep_exact_approx_common
+
+"E" "" "" ""  // final French "e": only in Sephardic
+
+"ts" "" "" "C" // for not confusion Gutes [=guts] and Guts [=guc]
+"tS" "" "" "C" // same reason
+"S" "" "" "s"
+"p" "" "" "f"   
+"b" "^" "" "b"    
+"b" "" "" "(b|v)"    
+
+"ja" "" "" "i"
+"je" "" "" "i"
+"aj" "" "" "i"
+"j" "" "" "i"
+
+"a" "^" "" "1"
+"e" "^" "" "1"
+"a" "" "$" "1"
+"e" "" "$" "1"
+
+"a" "" "" ""
+"e" "" "" ""
+
+"oj" "^" "" "(u|vi)"
+"uj" "^" "" "(u|vi)"
+
+"oj" "" "" "u"
+"uj" "" "" "u"
+
+"ou" "^" "" "(u|v|1)"
+"o" "^" "" "(u|v|1)"
+"u" "^" "" "(u|v|1)"
+
+"o" "" "$" "(u|1)"
+"u" "" "$" "(u|1)"
+
+"ou" "" "" "u"
+"o" "" "" "u"
+
+"VV" "" "" "u" // alef/ayin + vov from ruleshebrew
+"L" "^" "" "1" // alef/ayin from  ruleshebrew
+"L" "" "$" "1" // alef/ayin from  ruleshebrew
+"L" "" "" " " // alef/ayin from  ruleshebrew
+"WW" "^" "" "(vi|u)" // vav-yod from  ruleshebrew
+"WW" "" "" "u" // vav-yod from  ruleshebrew
+"W" "^" "" "(u|v)" // vav from  ruleshebrew
+"W" "" "" "u" // vav from  ruleshebrew
+
+// "g" "" "" "(g|Z)"
+// "z" "" "" "(z|Z)"
+// "d" "" "" "(d|dZ)"
+
+"T" "" "" "t"   // tet from  ruleshebrew
+
+// "k" "" "" "(k|x)"
+// "x" "" "" "(k|x)"
+"K" "" "" "k" // kof and initial kaf from ruleshebrew
+"X" "" "" "x" // khet and final kaf from ruleshebrew
+
+// special for Spanish initial B/V
+"B" "" "" "v"
+"V" "" "" "b"
+
+"H" "^" "" "(x|1)"
+"H" "" "$" "(x|1)"
+"H" "" "" "(x|)"
+"h" "^" "" "1"
+"h" "" "" ""

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_languages.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_languages.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_languages.txt
new file mode 100644
index 0000000..9a1935a
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_languages.txt
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+any
+french
+hebrew
+italian
+portuguese
+spanish

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1ee3a9cc/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_rules_any.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_rules_any.txt b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_rules_any.txt
new file mode 100644
index 0000000..fc08b5a
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Phonetic/Language/Bm/sep_rules_any.txt
@@ -0,0 +1,155 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// SEPHARDIC: INCORPORATES Portuguese + Italian + Spanish(+Catalan) + French
+
+// CONSONANTS
+"ph" "" "" "f" // foreign
+"sh" "" "" "S" // foreign
+"kh" "" "" "x" // foreign
+
+"gli" "" "" "(gli|l[italian])" 
+"gni" "" "" "(gni|ni[italian+french])"
+"gn" "" "[aeou]" "(n[italian+french]|nj[italian+french]|gn)" 
+"gh" "" "" "g" // It + translit. from Arabic
+"dh" "" "" "d" // translit. from Arabic
+"bh" "" "" "b" // translit. from Arabic
+"th" "" "" "t" // translit. from Arabic
+"lh" "" "" "l" // Port
+"nh" "" "" "nj" // Port
+
+"ig" "[aeiou]" "" "(ig|tS[spanish])"
+"ix" "[aeiou]" "" "S" // Sp
+"tx" "" "" "tS" // Sp
+"tj" "" "$" "tS" // Sp
+"tj" "" "" "dZ" // Sp
+"tg" "" "" "(tg|dZ[spanish])"
+
+"gi" "" "[aeou]" "dZ" // italian
+"g" "" "y" "Z" // french
+"gg" "" "[ei]" "(gZ[portuguese+french]|dZ[italian+spanish]|x[spanish])" 
+"g" "" "[ei]" "(Z[portuguese+french]|dZ[italian+spanish]|x[spanish])" 
+
+"guy" "" "" "gi"     
+"gue" "" "$" "(k[french]|ge)"
+"gu" "" "[ei]" "(g|gv)"     // not It
+"gu" "" "[ao]" "gv"  // not It  
+
+"ñ" "" "" "(n|nj)" 
+"ny" "" "" "nj" 
+
+"sc" "" "[ei]" "(s|S[italian])" 
+"sç" "" "[aeiou]" "s" // not It
+"ss" "" "" "s"
+"ç" "" "" "s"   // not It
+
+"ch" "" "[ei]" "(k[italian]|S[portuguese+french]|tS[spanish]|dZ[spanish])" 
+"ch" "" "" "(S|tS[spanish]|dZ[spanish])" 
+
+"ci" "" "[aeou]" "(tS[italian]|si)" 
+"cc" "" "[eiyéèê]" "(tS[italian]|ks[portuguese+french+spanish])" 
+"c" "" "[eiyéèê]" "(tS[italian]|s[portuguese+french+spanish])" 
+//"c" "" "[aou]" "(k|C[portuguese+spanish])" // "C" means that the actual letter could be "ç" (cedille omitted)
+
+"s" "^" "" "s"
+"s" "[aáuiíoóeéêy]" "[aáuiíoóeéêy]" "(s[spanish]|z[portuguese+french+italian])" 
+"s" "" "[dglmnrv]" "(z|Z[portuguese])" 
+
+"z" "" "$" "(s|ts[italian]|S[portuguese])" // ts It, s/S/Z Port, s in Sp, z Fr
+"z" "" "[bdgv]" "(z|dz[italian]|Z[portuguese])" // dz It, Z/z Port, z Sp & Fr
+"z" "" "[ptckf]" "(s|ts[italian]|S[portuguese])" // ts It, s/S/z Port, z/s Sp
+"z" "" "" "(z|dz[italian]|ts[italian]|s[spanish])" // ts/dz It, z Port & Fr, z/s Sp
+
+"que" "" "$" "(k[french]|ke)"
+"qu" "" "[eiu]" "k"    
+"qu" "" "[ao]" "(kv|k)" // k is It   
+
+"ex" "" "[aáuiíoóeéêy]" "(ez[portuguese]|eS[portuguese]|eks|egz)" 
+"ex" "" "[cs]" "(e[portuguese]|ek)" 
+
+"m" "" "[cdglnrst]" "(m|n[portuguese])" 
+"m" "" "[bfpv]" "(m|n[portuguese+spanish])" 
+"m" "" "$" "(m|n[portuguese])" 
+
+"b" "^" "" "(b|V[spanish])" 
+"v" "^" "" "(v|B[spanish])" 
+
+// VOWELS   
+"eau" "" "" "o" // Fr
+
+"ouh" "" "[aioe]" "(v[french]|uh)"
+"uh" "" "[aioe]" "(v|uh)"
+"ou" "" "[aioe]" "v" // french
+"uo" "" "" "(vo|o)"
+"u" "" "[aie]" "v"
+
+"i" "[aáuoóeéê]" "" "j"
+"i" "" "[aeou]" "j"
+"y" "[aáuiíoóeéê]" "" "j"
+"y" "" "[aeiíou]" "j"
+"e" "" "$" "(e|E[french])"
+
+"ão" "" "" "(au|an)" // Port
+"ãe" "" "" "(aj|an)" // Port
+"ãi" "" "" "(aj|an)" // Port
+"õe" "" "" "(oj|on)" // Port
+"où" "" "" "u" // Fr
+"ou" "" "" "(ou|u[french])" 
+
+"â" "" "" "a" // Port & Fr
+"à" "" "" "a" // Port 
+"á" "" "" "a" // Port & Sp
+"ã" "" "" "(a|an)" // Port
+"é" "" "" "e" 
+"ê" "" "" "e" // Port & Fr
+"è" "" "" "e" // Sp & Fr & It
+"í" "" "" "i" // Port & Sp
+"î" "" "" "i" // Fr
+"ô" "" "" "o" // Port & Fr
+"ó" "" "" "o" // Port & Sp & It
+"õ" "" "" "(o|on)" // Port
+"ò" "" "" "o"  // Sp & It
+"ú" "" "" "u" // Port & Sp
+"ü" "" "" "u" // Port & Sp
+
+// LATIN ALPHABET     
+"a" "" "" "a"
+"b" "" "" "(b|v[spanish])" 
+"c" "" "" "k"
+"d" "" "" "d"
+"e" "" "" "e"
+"f" "" "" "f"
+"g" "" "" "g"
+"h" "" "" "h" 
+"i" "" "" "i"
+"j" "" "" "(x[spanish]|Z)" // not It
+"k" "" "" "k"
+"l" "" "" "l"
+"m" "" "" "m"
+"n" "" "" "n"
+"o" "" "" "o"
+"p" "" "" "p"
+"q" "" "" "k"    
+"r" "" "" "r"
+"s" "" "" "(s|S[portuguese])" 
+"t" "" "" "t"
+"u" "" "" "u"
+"v" "" "" "(v|b[spanish])" 
+"w" "" "" "v"    // foreign
+"x" "" "" "(ks|gz|S[portuguese+spanish])"   // S/ks Port & Sp, gz Sp, It only ks
+"y" "" "" "i"   
+"z" "" "" "z"