You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by ni...@apache.org on 2017/07/23 17:36:28 UTC
[03/13] lucenenet git commit: Ported Lucene.Net.Analysis.Kuromoji +
tests
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f092010/src/Lucene.Net.Tests.Analysis.Kuromoji/Tools/UnknownDictionaryTest.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Kuromoji/Tools/UnknownDictionaryTest.cs b/src/Lucene.Net.Tests.Analysis.Kuromoji/Tools/UnknownDictionaryTest.cs
new file mode 100644
index 0000000..a4940f5
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Kuromoji/Tools/UnknownDictionaryTest.cs
@@ -0,0 +1,93 @@
+using Lucene.Net.Util;
+using NUnit.Framework;
+using System;
+
+namespace Lucene.Net.Analysis.Ja.Util
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ public class UnknownDictionaryTest : LuceneTestCase
+ {
+ public static readonly string FILENAME = "unk-tokeninfo-dict.obj";
+
+ [Test]
+ public void TestPutCharacterCategory()
+ {
+ UnknownDictionaryWriter unkDic = new UnknownDictionaryWriter(10 * 1024 * 1024);
+
+ try
+ {
+ unkDic.PutCharacterCategory(0, "DUMMY_NAME");
+ fail();
+ }
+#pragma warning disable 168
+ catch (Exception e)
+#pragma warning restore 168
+ {
+
+ }
+
+ try
+ {
+ unkDic.PutCharacterCategory(-1, "KATAKANA");
+ fail();
+ }
+#pragma warning disable 168
+ catch (Exception e)
+#pragma warning restore 168
+ {
+
+ }
+
+ unkDic.PutCharacterCategory(0, "DEFAULT");
+ unkDic.PutCharacterCategory(1, "GREEK");
+ unkDic.PutCharacterCategory(2, "HIRAGANA");
+ unkDic.PutCharacterCategory(3, "KATAKANA");
+ unkDic.PutCharacterCategory(4, "KANJI");
+ }
+
+ [Test]
+ public void TestPut()
+ {
+ UnknownDictionaryWriter unkDic = new UnknownDictionaryWriter(10 * 1024 * 1024);
+ try
+ {
+ unkDic.Put(CSVUtil.Parse("KANJI,1285,11426,名詞,一般,*,*,*,*,*,*,*"));
+ fail();
+ }
+#pragma warning disable 168
+ catch (Exception e)
+#pragma warning restore 168
+ {
+
+ }
+
+ String entry1 = "ALPHA,1285,1285,13398,名詞,一般,*,*,*,*,*,*,*";
+ String entry2 = "HIRAGANA,1285,1285,13069,名詞,一般,*,*,*,*,*,*,*";
+ String entry3 = "KANJI,1285,1285,11426,名詞,一般,*,*,*,*,*,*,*";
+
+ unkDic.PutCharacterCategory(0, "ALPHA");
+ unkDic.PutCharacterCategory(1, "HIRAGANA");
+ unkDic.PutCharacterCategory(2, "KANJI");
+
+ unkDic.Put(CSVUtil.Parse(entry1));
+ unkDic.Put(CSVUtil.Parse(entry2));
+ unkDic.Put(CSVUtil.Parse(entry3));
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f092010/src/Lucene.Net.Tests.Analysis.Kuromoji/Util/TestToStringUtil.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Kuromoji/Util/TestToStringUtil.cs b/src/Lucene.Net.Tests.Analysis.Kuromoji/Util/TestToStringUtil.cs
new file mode 100644
index 0000000..2922b27
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Kuromoji/Util/TestToStringUtil.cs
@@ -0,0 +1,121 @@
+using Lucene.Net.Util;
+using NUnit.Framework;
+using System;
+using System.Collections.Generic;
+
+namespace Lucene.Net.Analysis.Ja.Util
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ public class TestToStringUtil : LuceneTestCase
+ {
+ [Test]
+ public void TestPOS()
+ {
+ assertEquals("noun-suffix-verbal", ToStringUtil.GetPOSTranslation("名詞-接尾-サ変接続"));
+ }
+
+ [Test]
+ public void TestHepburn()
+ {
+ assertEquals("majan", ToStringUtil.GetRomanization("マージャン"));
+ assertEquals("uroncha", ToStringUtil.GetRomanization("ウーロンチャ"));
+ assertEquals("chahan", ToStringUtil.GetRomanization("チャーハン"));
+ assertEquals("chashu", ToStringUtil.GetRomanization("チャーシュー"));
+ assertEquals("shumai", ToStringUtil.GetRomanization("シューマイ"));
+ }
+
+ // see http://en.wikipedia.org/wiki/Hepburn_romanization,
+ // but this isnt even thorough or really probably what we want!
+ [Test]
+ public void TestHepburnTable()
+ {
+ IDictionary<String, String> table = new Dictionary<String, String>() {
+ { "ア", "a" }, { "イ", "i" }, { "ウ", "u" }, { "エ", "e" }, { "オ", "o" },
+ { "カ", "ka" }, { "キ", "ki" }, { "ク", "ku" }, { "ケ", "ke" }, { "コ", "ko" },
+ { "サ", "sa" }, { "シ", "shi" }, { "ス", "su" }, { "セ", "se" }, { "ソ", "so" },
+ { "タ", "ta" }, { "チ", "chi" }, { "ツ", "tsu" }, { "テ", "te" }, { "ト", "to" },
+ { "ナ", "na" }, { "ニ", "ni" }, { "ヌ", "nu" }, { "ネ", "ne" }, { "ノ", "no" },
+ { "ハ", "ha" }, { "ヒ", "hi" }, { "フ", "fu" }, { "ヘ", "he" }, { "ホ", "ho" },
+ { "マ", "ma" }, { "ミ", "mi" }, { "ム", "mu" }, { "メ", "me" }, { "モ", "mo" },
+ { "ヤ", "ya" }, { "ユ", "yu" }, { "ヨ", "yo" },
+ { "ラ", "ra" }, { "リ", "ri" }, { "ル", "ru" }, { "レ", "re" }, { "ロ", "ro" },
+ { "ワ", "wa" }, { "ヰ", "i" }, { "ヱ", "e" }, { "ヲ", "o" },
+ { "ン", "n" },
+ { "ガ", "ga" }, { "ギ", "gi" }, { "グ", "gu" }, { "ゲ", "ge" }, { "ゴ", "go" },
+ { "ザ", "za" }, { "ジ", "ji" }, { "ズ", "zu" }, { "ゼ", "ze" }, { "ゾ", "zo" },
+ { "ダ", "da" }, { "ヂ", "ji" }, { "ヅ", "zu" }, { "デ", "de" }, { "ド", "do" },
+ { "バ", "ba" }, { "ビ", "bi" }, { "ブ", "bu" }, { "ベ", "be" }, { "ボ", "bo" },
+ { "パ", "pa" }, { "ピ", "pi" }, { "プ", "pu" }, { "ペ", "pe" }, { "ポ", "po" },
+
+ { "キャ", "kya" }, { "キュ", "kyu" }, { "キョ", "kyo" },
+ { "シャ", "sha" }, { "シュ", "shu" }, { "ショ", "sho" },
+ { "チャ", "cha" }, { "チュ", "chu" }, { "チョ", "cho" },
+ { "ニャ", "nya" }, { "ニュ", "nyu" }, { "ニョ", "nyo" },
+ { "ヒャ", "hya" }, { "ヒュ", "hyu" }, { "ヒョ", "hyo" },
+ { "ミャ", "mya" }, { "ミュ", "myu" }, { "ミョ", "myo" },
+ { "リャ", "rya" }, { "リュ", "ryu" }, { "リョ", "ryo" },
+ { "ギャ", "gya" }, { "ギュ", "gyu" }, { "ギョ", "gyo" },
+ { "ジャ", "ja" }, { "ジュ", "ju" }, { "ジョ", "jo" },
+ { "ヂャ", "ja" }, { "ヂュ", "ju" }, { "ヂョ", "jo" },
+ { "ビャ", "bya" }, { "ビュ", "byu" }, { "ビョ", "byo" },
+ { "ピャ", "pya" }, { "ピュ", "pyu" }, { "ピョ", "pyo" },
+
+ { "イィ", "yi" }, { "イェ", "ye" },
+ { "ウァ", "wa" }, { "ウィ", "wi" }, { "ウゥ", "wu" }, { "ウェ", "we" }, { "ウォ", "wo" },
+ { "ウュ", "wyu" },
+ // TODO: really should be vu
+ { "ヴァ", "va" }, { "ヴィ", "vi" }, { "ヴ", "v" }, { "ヴェ", "ve" }, { "ヴォ", "vo" },
+ { "ヴャ", "vya" }, { "ヴュ", "vyu" }, { "ヴィェ", "vye" }, { "ヴョ", "vyo" },
+ { "キェ", "kye" },
+ { "ギェ", "gye" },
+ { "クァ", "kwa" }, { "クィ", "kwi" }, { "クェ", "kwe" }, { "クォ", "kwo" },
+ { "クヮ", "kwa" },
+ { "グァ", "gwa" }, { "グィ", "gwi" }, { "グェ", "gwe" }, { "グォ", "gwo" },
+ { "グヮ", "gwa" },
+ { "シェ", "she" },
+ { "ジェ", "je" },
+ { "スィ", "si" },
+ { "ズィ", "zi" },
+ { "チェ", "che" },
+ { "ツァ", "tsa" }, { "ツィ", "tsi" }, { "ツェ", "tse" }, { "ツォ", "tso" },
+ { "ツュ", "tsyu" },
+ { "ティ", "ti" }, { "トゥ", "tu" },
+ { "テュ", "tyu" },
+ { "ディ", "di" }, { "ドゥ", "du" },
+ { "デュ", "dyu" },
+ { "ニェ", "nye" },
+ { "ヒェ", "hye" },
+ { "ビェ", "bye" },
+ { "ピェ", "pye" },
+ { "ファ", "fa" }, { "フィ", "fi" }, { "フェ", "fe" }, { "フォ", "fo" },
+ { "フャ", "fya" }, { "フュ", "fyu" }, { "フィェ", "fye" }, { "フョ", "fyo" },
+ { "ホゥ", "hu" },
+ { "ミェ", "mye" },
+ { "リェ", "rye" },
+ { "ラ゜", "la" }, { "リ゜", "li" }, { "ル゜", "lu" }, { "レ゜", "le" }, { "ロ゜", "lo" },
+ { "ヷ", "va" }, { "ヸ", "vi" }, { "ヹ", "ve" }, { "ヺ", "vo" },
+ };
+
+ foreach (String s in table.Keys)
+ {
+ assertEquals(s, table[s], ToStringUtil.GetRomanization(s));
+ }
+ }
+ }
+}