You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by sy...@apache.org on 2015/12/10 19:39:13 UTC
[24/27] lucenenet git commit: adding converted analysis common tests
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Cjk/TestCJKBigramFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Cjk/TestCJKBigramFilter.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Cjk/TestCJKBigramFilter.cs
new file mode 100644
index 0000000..12e0b5b
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Cjk/TestCJKBigramFilter.cs
@@ -0,0 +1,171 @@
+using System;
+
+namespace org.apache.lucene.analysis.cjk
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+ using StandardTokenizer = org.apache.lucene.analysis.standard.StandardTokenizer;
+
+ public class TestCJKBigramFilter : BaseTokenStreamTestCase
+ {
+ internal Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper();
+
+ private class AnalyzerAnonymousInnerClassHelper : Analyzer
+ {
+ public AnalyzerAnonymousInnerClassHelper()
+ {
+ }
+
+ protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+ {
+ Tokenizer t = new StandardTokenizer(TEST_VERSION_CURRENT, reader);
+ return new TokenStreamComponents(t, new CJKBigramFilter(t));
+ }
+ }
+
+ internal Analyzer unibiAnalyzer = new AnalyzerAnonymousInnerClassHelper2();
+
+ private class AnalyzerAnonymousInnerClassHelper2 : Analyzer
+ {
+ public AnalyzerAnonymousInnerClassHelper2()
+ {
+ }
+
+ protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+ {
+ Tokenizer t = new StandardTokenizer(TEST_VERSION_CURRENT, reader);
+ return new TokenStreamComponents(t, new CJKBigramFilter(t, 0xff, true));
+ }
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testHuge() throws Exception
+ public virtual void testHuge()
+ {
+ assertAnalyzesTo(analyzer, "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた", new string[] {"多く", "くの", "の学", "学生", "生が", "が試", "試験", "験に", "に落", "落ち", "ちた", "た多", "多く", "くの", "の学", "学生", "生が", "が試", "試験", "験に", "に落", "落ち", "ちた", "た多", "多く", "くの", "の学", "学生", "生が", "が試", "試験", "験に", "に落", "落ち", "ちた", "た多", "多く", "くの", "の学", "学生", "生が", "が試", "試験", "験に", "に落", "落ち", "ちた", "た多", "多く", "くの", "の
学", "学生", "生が", "が試", "試験", "験に", "に落", "落ち", "ちた", "た多", "多く", "くの", "の学", "学生", "生が", "が試", "試験", "験に", "に落", "落ち", "ちた", "た多", "多く", "くの", "の学", "学生", "生が", "が試", "試験", "験に", "に落", "落ち", "ちた", "た多", "多く", "くの", "の学", "学生", "生が", "が試", "試験", "験に", "に落", "落ち", "ちた", "た多", "多く", "くの", "の学", "学生", "生が", "が試", "試験", "験に", "に落", "落ち", "ちた", "た多", "多く", "くの", "の学", "学生", "生が", "が試", "試験", "験に", "に落", "落ち", "ちた", "た多", "多く", "くの", "の学", "学生", "生が", "が試", "試験", "験に", "に落", "落ち", "ちた"});
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testHanOnly() throws Exception
+ public virtual void testHanOnly()
+ {
+ Analyzer a = new AnalyzerAnonymousInnerClassHelper3(this);
+ assertAnalyzesTo(a, "多くの学生が試験に落ちた。", new string[] {"多", "く", "の", "学生", "が", "試験", "に", "落", "ち", "た"}, new int[] {0, 1, 2, 3, 5, 6, 8, 9, 10, 11}, new int[] {1, 2, 3, 5, 6, 8, 9, 10, 11, 12}, new string[] {"<SINGLE>", "<HIRAGANA>", "<HIRAGANA>", "<DOUBLE>", "<HIRAGANA>", "<DOUBLE>", "<HIRAGANA>", "<SINGLE>", "<HIRAGANA>", "<HIRAGANA>", "<SINGLE>"}, new int[] {1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, new int[] {1, 1, 1, 1, 1, 1, 1, 1, 1, 1});
+ }
+
+ private class AnalyzerAnonymousInnerClassHelper3 : Analyzer
+ {
+ private readonly TestCJKBigramFilter outerInstance;
+
+ public AnalyzerAnonymousInnerClassHelper3(TestCJKBigramFilter outerInstance)
+ {
+ this.outerInstance = outerInstance;
+ }
+
+ protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+ {
+ Tokenizer t = new StandardTokenizer(TEST_VERSION_CURRENT, reader);
+ return new TokenStreamComponents(t, new CJKBigramFilter(t, CJKBigramFilter.HAN));
+ }
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testAllScripts() throws Exception
+ public virtual void testAllScripts()
+ {
+ Analyzer a = new AnalyzerAnonymousInnerClassHelper4(this);
+ assertAnalyzesTo(a, "多くの学生が試験に落ちた。", new string[] {"多く", "くの", "の学", "学生", "生が", "が試", "試験", "験に", "に落", "落ち", "ちた"});
+ }
+
+ private class AnalyzerAnonymousInnerClassHelper4 : Analyzer
+ {
+ private readonly TestCJKBigramFilter outerInstance;
+
+ public AnalyzerAnonymousInnerClassHelper4(TestCJKBigramFilter outerInstance)
+ {
+ this.outerInstance = outerInstance;
+ }
+
+ protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+ {
+ Tokenizer t = new StandardTokenizer(TEST_VERSION_CURRENT, reader);
+ return new TokenStreamComponents(t, new CJKBigramFilter(t, 0xff, false));
+ }
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testUnigramsAndBigramsAllScripts() throws Exception
+ public virtual void testUnigramsAndBigramsAllScripts()
+ {
+ assertAnalyzesTo(unibiAnalyzer, "多くの学生が試験に落ちた。", new string[] {"多", "多く", "く", "くの", "の", "の学", "学", "学生", "生", "生が", "が", "が試", "試", "試験", "験", "験に", "に", "に落", "落", "落ち", "ち", "ちた", "た"}, new int[] {0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11}, new int[] {1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12}, new string[] {"<SINGLE>", "<DOUBLE>", "<SINGLE>", "<DOUBLE>", "<SINGLE>", "<DOUBLE>", "<SINGLE>", "<DOUBLE>", "<SINGLE>", "<DOUBLE>", "<SINGLE>", "<DOUBLE>", "<SINGLE>", "<DOUBLE>", "<SINGLE>", "<DOUBLE>", "<SINGLE>", "<DOUBLE>", "<SINGLE>", "<DOUBLE>", "<SINGLE>", "<DOUBLE>", "<SINGLE>"}, new int[] {1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1}, new int[] {1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1});
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testUnigramsAndBigramsHanOnly() throws Exception
+ public virtual void testUnigramsAndBigramsHanOnly()
+ {
+ Analyzer a = new AnalyzerAnonymousInnerClassHelper5(this);
+ assertAnalyzesTo(a, "多くの学生が試験に落ちた。", new string[] {"多", "く", "の", "学", "学生", "生", "が", "試", "試験", "験", "に", "落", "ち", "た"}, new int[] {0, 1, 2, 3, 3, 4, 5, 6, 6, 7, 8, 9, 10, 11}, new int[] {1, 2, 3, 4, 5, 5, 6, 7, 8, 8, 9, 10, 11, 12}, new string[] {"<SINGLE>", "<HIRAGANA>", "<HIRAGANA>", "<SINGLE>", "<DOUBLE>", "<SINGLE>", "<HIRAGANA>", "<SINGLE>", "<DOUBLE>", "<SINGLE>", "<HIRAGANA>", "<SINGLE>", "<HIRAGANA>", "<HIRAGANA>", "<SINGLE>"}, new int[] {1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1}, new int[] {1, 1, 1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 1, 1});
+ }
+
+ private class AnalyzerAnonymousInnerClassHelper5 : Analyzer
+ {
+ private readonly TestCJKBigramFilter outerInstance;
+
+ public AnalyzerAnonymousInnerClassHelper5(TestCJKBigramFilter outerInstance)
+ {
+ this.outerInstance = outerInstance;
+ }
+
+ protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+ {
+ Tokenizer t = new StandardTokenizer(TEST_VERSION_CURRENT, reader);
+ return new TokenStreamComponents(t, new CJKBigramFilter(t, CJKBigramFilter.HAN, true));
+ }
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testUnigramsAndBigramsHuge() throws Exception
+ public virtual void testUnigramsAndBigramsHuge()
+ {
+ assertAnalyzesTo(unibiAnalyzer, "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた", new string[] {"多", "多く", "く", "くの", "の", "の学", "学", "学生", "生", "生が", "が", "が試", "試", "試験", "験", "験に", "に", "に落", "落", "落ち", "ち", "ちた", "た", "た多", "多", "多く", "く", "くの", "の", "の学", "学", "学生", "生", "生が", "が", "が試", "試", "試験", "験", "験に", "に", "に落", "落", "落ち", "ち", "ちた", "た", "た多", "多", "多く", "く", "くの", "の", "の学", "学", "学生", "生", "生が", "が",
"が試", "試", "試験", "験", "験に", "に", "に落", "落", "落ち", "ち", "ちた", "た", "た多", "多", "多く", "く", "くの", "の", "の学", "学", "学生", "生", "生が", "が", "が試", "試", "試験", "験", "験に", "に", "に落", "落", "落ち", "ち", "ちた", "た", "た多", "多", "多く", "く", "くの", "の", "の学", "学", "学生", "生", "生が", "が", "が試", "試", "試験", "験", "験に", "に", "に落", "落", "落ち", "ち", "ちた", "た", "た多", "多", "多く", "く", "くの", "の", "の学", "学", "学生", "生", "生が", "が", "が試", "試", "試験", "験", "験に", "に", "に落", "落", "落ち", "ち", "ちた", "た", "た多", "多", "多く", "く", "くの", "の", "の学", "学", "学生", "生", "生が", "が", "が試", "試", "試験", "験", "験に", "に", "に落", "落", "落ち", "ち", "ちた", "た", "た多", "多", "多く", "く", "くの", "の", "の学", "学", "学生",
"生", "生が", "が", "が試", "試", "試験", "験", "験に", "に", "に落", "落", "落ち", "ち", "ちた", "た", "た多", "多", "多く", "く", "くの", "の", "の学", "学", "学生", "生", "生が", "が", "が試", "試", "試験", "験", "験に", "に", "に落", "落", "落ち", "ち", "ちた", "た", "た多", "多", "多く", "く", "くの", "の", "の学", "学", "学生", "生", "生が", "が", "が試", "試", "試験", "験", "験に", "に", "に落", "落", "落ち", "ち", "ちた", "た", "た多", "多", "多く", "く", "くの", "の", "の学", "学", "学生", "生", "生が", "が", "が試", "試", "試験", "験", "験に", "に", "に落", "落", "落ち", "ち", "ちた", "た"});
+ }
+
+ /// <summary>
+ /// blast some random strings through the analyzer </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testRandomUnibiStrings() throws Exception
+ public virtual void testRandomUnibiStrings()
+ {
+ checkRandomData(random(), unibiAnalyzer, 1000 * RANDOM_MULTIPLIER);
+ }
+
+ /// <summary>
+ /// blast some random strings through the analyzer </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testRandomUnibiHugeStrings() throws Exception
+ public virtual void testRandomUnibiHugeStrings()
+ {
+ Random random = random();
+ checkRandomData(random, unibiAnalyzer, 100 * RANDOM_MULTIPLIER, 8192);
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Cjk/TestCJKBigramFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Cjk/TestCJKBigramFilterFactory.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Cjk/TestCJKBigramFilterFactory.cs
new file mode 100644
index 0000000..fd0d186
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Cjk/TestCJKBigramFilterFactory.cs
@@ -0,0 +1,77 @@
+namespace org.apache.lucene.analysis.cjk
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+ using BaseTokenStreamFactoryTestCase = org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
+
+ /// <summary>
+ /// Simple tests to ensure the CJK bigram factory is working.
+ /// </summary>
+ public class TestCJKBigramFilterFactory : BaseTokenStreamFactoryTestCase
+ {
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testDefaults() throws Exception
+ public virtual void testDefaults()
+ {
+ Reader reader = new StringReader("多くの学生が試験に落ちた。");
+ TokenStream stream = tokenizerFactory("standard").create(reader);
+ stream = tokenFilterFactory("CJKBigram").create(stream);
+ assertTokenStreamContents(stream, new string[] {"多く", "くの", "の学", "学生", "生が", "が試", "試験", "験に", "に落", "落ち", "ちた"});
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testHanOnly() throws Exception
+ public virtual void testHanOnly()
+ {
+ Reader reader = new StringReader("多くの学生が試験に落ちた。");
+ TokenStream stream = tokenizerFactory("standard").create(reader);
+ stream = tokenFilterFactory("CJKBigram", "hiragana", "false").create(stream);
+ assertTokenStreamContents(stream, new string[] {"多", "く", "の", "学生", "が", "試験", "に", "落", "ち", "た"});
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testHanOnlyUnigrams() throws Exception
+ public virtual void testHanOnlyUnigrams()
+ {
+ Reader reader = new StringReader("多くの学生が試験に落ちた。");
+ TokenStream stream = tokenizerFactory("standard").create(reader);
+ stream = tokenFilterFactory("CJKBigram", "hiragana", "false", "outputUnigrams", "true").create(stream);
+ assertTokenStreamContents(stream, new string[] {"多", "く", "の", "学", "学生", "生", "が", "試", "試験", "験", "に", "落", "ち", "た"});
+ }
+
+ /// <summary>
+ /// Test that bogus arguments result in exception </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testBogusArguments() throws Exception
+ public virtual void testBogusArguments()
+ {
+ try
+ {
+ tokenFilterFactory("CJKBigram", "bogusArg", "bogusValue");
+ fail();
+ }
+ catch (System.ArgumentException expected)
+ {
+ assertTrue(expected.Message.contains("Unknown parameters"));
+ }
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Cjk/TestCJKTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Cjk/TestCJKTokenizer.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Cjk/TestCJKTokenizer.cs
new file mode 100644
index 0000000..e1639d5
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Cjk/TestCJKTokenizer.cs
@@ -0,0 +1,240 @@
+using System;
+
+namespace org.apache.lucene.analysis.cjk
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ using Version = org.apache.lucene.util.Version;
+
+ /// @deprecated Remove when CJKTokenizer is removed (5.0)
+ [Obsolete("Remove when CJKTokenizer is removed (5.0)")]
+ public class TestCJKTokenizer : BaseTokenStreamTestCase
+ {
+
+ internal class TestToken
+ {
+ private readonly TestCJKTokenizer outerInstance;
+
+ public TestToken(TestCJKTokenizer outerInstance)
+ {
+ this.outerInstance = outerInstance;
+ }
+
+ internal string termText;
+ internal int start;
+ internal int end;
+ internal string type;
+ }
+
+ public virtual TestToken newToken(string termText, int start, int end, int type)
+ {
+ TestToken token = new TestToken(this);
+ token.termText = termText;
+ token.type = CJKTokenizer.TOKEN_TYPE_NAMES[type];
+ token.start = start;
+ token.end = end;
+ return token;
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void checkCJKToken(final String str, final TestToken[] out_tokens) throws java.io.IOException
+//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
+ public virtual void checkCJKToken(string str, TestToken[] out_tokens)
+ {
+ Analyzer analyzer = new CJKAnalyzer(Version.LUCENE_30);
+ string[] terms = new string[out_tokens.Length];
+ int[] startOffsets = new int[out_tokens.Length];
+ int[] endOffsets = new int[out_tokens.Length];
+ string[] types = new string[out_tokens.Length];
+ for (int i = 0; i < out_tokens.Length; i++)
+ {
+ terms[i] = out_tokens[i].termText;
+ startOffsets[i] = out_tokens[i].start;
+ endOffsets[i] = out_tokens[i].end;
+ types[i] = out_tokens[i].type;
+ }
+ assertAnalyzesTo(analyzer, str, terms, startOffsets, endOffsets, types, null);
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void checkCJKTokenReusable(final org.apache.lucene.analysis.Analyzer a, final String str, final TestToken[] out_tokens) throws java.io.IOException
+//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
+ public virtual void checkCJKTokenReusable(Analyzer a, string str, TestToken[] out_tokens)
+ {
+ Analyzer analyzer = new CJKAnalyzer(Version.LUCENE_30);
+ string[] terms = new string[out_tokens.Length];
+ int[] startOffsets = new int[out_tokens.Length];
+ int[] endOffsets = new int[out_tokens.Length];
+ string[] types = new string[out_tokens.Length];
+ for (int i = 0; i < out_tokens.Length; i++)
+ {
+ terms[i] = out_tokens[i].termText;
+ startOffsets[i] = out_tokens[i].start;
+ endOffsets[i] = out_tokens[i].end;
+ types[i] = out_tokens[i].type;
+ }
+ assertAnalyzesTo(analyzer, str, terms, startOffsets, endOffsets, types, null);
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testJa1() throws java.io.IOException
+ public virtual void testJa1()
+ {
+ string str = "\u4e00\u4e8c\u4e09\u56db\u4e94\u516d\u4e03\u516b\u4e5d\u5341";
+
+ TestToken[] out_tokens = new TestToken[] {newToken("\u4e00\u4e8c", 0, 2, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("\u4e8c\u4e09", 1, 3, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("\u4e09\u56db", 2, 4, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("\u56db\u4e94", 3, 5, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("\u4e94\u516d", 4, 6, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("\u516d\u4e03", 5, 7, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("\u4e03\u516b", 6, 8, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("\u516b\u4e5d", 7, 9, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("\u4e5d\u5341", 8,10, CJKTokenizer.DOUBLE_TOKEN_TYPE)};
+ checkCJKToken(str, out_tokens);
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testJa2() throws java.io.IOException
+ public virtual void testJa2()
+ {
+ string str = "\u4e00 \u4e8c\u4e09\u56db \u4e94\u516d\u4e03\u516b\u4e5d \u5341";
+
+ TestToken[] out_tokens = new TestToken[] {newToken("\u4e00", 0, 1, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("\u4e8c\u4e09", 2, 4, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("\u4e09\u56db", 3, 5, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("\u4e94\u516d", 6, 8, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("\u516d\u4e03", 7, 9, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("\u4e03\u516b", 8, 10, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("\u516b\u4e5d", 9, 11, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("\u5341", 12,13, CJKTokenizer.DOUBLE_TOKEN_TYPE)};
+ checkCJKToken(str, out_tokens);
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testC() throws java.io.IOException
+ public virtual void testC()
+ {
+ string str = "abc defgh ijklmn opqrstu vwxy z";
+
+ TestToken[] out_tokens = new TestToken[] {newToken("abc", 0, 3, CJKTokenizer.SINGLE_TOKEN_TYPE), newToken("defgh", 4, 9, CJKTokenizer.SINGLE_TOKEN_TYPE), newToken("ijklmn", 10, 16, CJKTokenizer.SINGLE_TOKEN_TYPE), newToken("opqrstu", 17, 24, CJKTokenizer.SINGLE_TOKEN_TYPE), newToken("vwxy", 25, 29, CJKTokenizer.SINGLE_TOKEN_TYPE), newToken("z", 30, 31, CJKTokenizer.SINGLE_TOKEN_TYPE)};
+ checkCJKToken(str, out_tokens);
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testMix() throws java.io.IOException
+ public virtual void testMix()
+ {
+ string str = "\u3042\u3044\u3046\u3048\u304aabc\u304b\u304d\u304f\u3051\u3053";
+
+ TestToken[] out_tokens = new TestToken[] {newToken("\u3042\u3044", 0, 2, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("\u3044\u3046", 1, 3, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("\u3046\u3048", 2, 4, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("\u3048\u304a", 3, 5, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("abc", 5, 8, CJKTokenizer.SINGLE_TOKEN_TYPE), newToken("\u304b\u304d", 8, 10, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("\u304d\u304f", 9, 11, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("\u304f\u3051", 10,12, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("\u3051\u3053", 11,13, CJKTokenizer.DOUBLE_TOKEN_TYPE)};
+ checkCJKToken(str, out_tokens);
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testMix2() throws java.io.IOException
+ public virtual void testMix2()
+ {
+ string str = "\u3042\u3044\u3046\u3048\u304aab\u3093c\u304b\u304d\u304f\u3051 \u3053";
+
+ TestToken[] out_tokens = new TestToken[] {newToken("\u3042\u3044", 0, 2, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("\u3044\u3046", 1, 3, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("\u3046\u3048", 2, 4, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("\u3048\u304a", 3, 5, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("ab", 5, 7, CJKTokenizer.SINGLE_TOKEN_TYPE), newToken("\u3093", 7, 8, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("c", 8, 9, CJKTokenizer.SINGLE_TOKEN_TYPE), newToken("\u304b\u304d", 9, 11, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("\u304d\u304f", 10, 12, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("\u304f\u3051", 11,13, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("\u3053", 14,15, CJKTokenizer.DOUBLE_TOKEN_TYPE)};
+ checkCJKToken(str, out_tokens);
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testSingleChar() throws java.io.IOException
+ public virtual void testSingleChar()
+ {
+ string str = "\u4e00";
+
+ TestToken[] out_tokens = new TestToken[] {newToken("\u4e00", 0, 1, CJKTokenizer.DOUBLE_TOKEN_TYPE)};
+ checkCJKToken(str, out_tokens);
+ }
+
+ /*
+ * Full-width text is normalized to half-width
+ */
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testFullWidth() throws Exception
+ public virtual void testFullWidth()
+ {
+ string str = "Test 1234";
+ TestToken[] out_tokens = new TestToken[] {newToken("test", 0, 4, CJKTokenizer.SINGLE_TOKEN_TYPE), newToken("1234", 5, 9, CJKTokenizer.SINGLE_TOKEN_TYPE)};
+ checkCJKToken(str, out_tokens);
+ }
+
+ /*
+ * Non-english text (not just CJK) is treated the same as CJK: C1C2 C2C3
+ */
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testNonIdeographic() throws Exception
+ public virtual void testNonIdeographic()
+ {
+ string str = "\u4e00 روبرت موير";
+ TestToken[] out_tokens = new TestToken[] {newToken("\u4e00", 0, 1, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("رو", 2, 4, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("وب", 3, 5, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("بر", 4, 6, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("رت", 5, 7, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("مو", 8, 10, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("وي", 9, 11, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("ير", 10, 12, CJKTokenizer.DOUBLE_TOKEN_TYPE)};
+ checkCJKToken(str, out_tokens);
+ }
+
+ /*
+ * Non-english text with nonletters (non-spacing marks,etc) is treated as C1C2 C2C3,
+ * except for words are split around non-letters.
+ */
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testNonIdeographicNonLetter() throws Exception
+ public virtual void testNonIdeographicNonLetter()
+ {
+ string str = "\u4e00 رُوبرت موير";
+ TestToken[] out_tokens = new TestToken[] {newToken("\u4e00", 0, 1, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("ر", 2, 3, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("وب", 4, 6, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("بر", 5, 7, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("رت", 6, 8, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("مو", 9, 11, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("وي", 10, 12, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("ير", 11, 13, CJKTokenizer.DOUBLE_TOKEN_TYPE)};
+ checkCJKToken(str, out_tokens);
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testTokenStream() throws Exception
+ public virtual void testTokenStream()
+ {
+ Analyzer analyzer = new CJKAnalyzer(Version.LUCENE_30);
+ assertAnalyzesTo(analyzer, "\u4e00\u4e01\u4e02", new string[] {"\u4e00\u4e01", "\u4e01\u4e02"});
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testReusableTokenStream() throws Exception
+ public virtual void testReusableTokenStream()
+ {
+ Analyzer analyzer = new CJKAnalyzer(Version.LUCENE_30);
+ string str = "\u3042\u3044\u3046\u3048\u304aabc\u304b\u304d\u304f\u3051\u3053";
+
+ TestToken[] out_tokens = new TestToken[] {newToken("\u3042\u3044", 0, 2, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("\u3044\u3046", 1, 3, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("\u3046\u3048", 2, 4, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("\u3048\u304a", 3, 5, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("abc", 5, 8, CJKTokenizer.SINGLE_TOKEN_TYPE), newToken("\u304b\u304d", 8, 10, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("\u304d\u304f", 9, 11, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("\u304f\u3051", 10,12, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("\u3051\u3053", 11,13, CJKTokenizer.DOUBLE_TOKEN_TYPE)};
+ checkCJKTokenReusable(analyzer, str, out_tokens);
+
+ str = "\u3042\u3044\u3046\u3048\u304aab\u3093c\u304b\u304d\u304f\u3051 \u3053";
+ TestToken[] out_tokens2 = new TestToken[] {newToken("\u3042\u3044", 0, 2, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("\u3044\u3046", 1, 3, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("\u3046\u3048", 2, 4, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("\u3048\u304a", 3, 5, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("ab", 5, 7, CJKTokenizer.SINGLE_TOKEN_TYPE), newToken("\u3093", 7, 8, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("c", 8, 9, CJKTokenizer.SINGLE_TOKEN_TYPE), newToken("\u304b\u304d", 9, 11, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("\u304d\u304f", 10, 12, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("\u304f\u3051", 11,13, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("\u3053", 14,15, CJKTokenizer.DOUBLE_TOKEN_TYPE)};
+ checkCJKTokenReusable(analyzer, str, out_tokens2);
+ }
+
+ /// <summary>
+ /// LUCENE-2207: wrong offset calculated by end()
+ /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testFinalOffset() throws java.io.IOException
+ public virtual void testFinalOffset()
+ {
+ checkCJKToken("あい", new TestToken[] {newToken("あい", 0, 2, CJKTokenizer.DOUBLE_TOKEN_TYPE)});
+ checkCJKToken("あい ", new TestToken[] {newToken("あい", 0, 2, CJKTokenizer.DOUBLE_TOKEN_TYPE)});
+ checkCJKToken("test", new TestToken[] {newToken("test", 0, 4, CJKTokenizer.SINGLE_TOKEN_TYPE)});
+ checkCJKToken("test ", new TestToken[] {newToken("test", 0, 4, CJKTokenizer.SINGLE_TOKEN_TYPE)});
+ checkCJKToken("あいtest", new TestToken[] {newToken("あい", 0, 2, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("test", 2, 6, CJKTokenizer.SINGLE_TOKEN_TYPE)});
+ checkCJKToken("testあい ", new TestToken[] {newToken("test", 0, 4, CJKTokenizer.SINGLE_TOKEN_TYPE), newToken("あい", 4, 6, CJKTokenizer.DOUBLE_TOKEN_TYPE)});
+ }
+
+ /// <summary>
+ /// blast some random strings through the analyzer </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testRandomStrings() throws Exception
+ public virtual void testRandomStrings()
+ {
+ checkRandomData(random(), new CJKAnalyzer(Version.LUCENE_30), 10000 * RANDOM_MULTIPLIER);
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Cjk/TestCJKTokenizerFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Cjk/TestCJKTokenizerFactory.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Cjk/TestCJKTokenizerFactory.cs
new file mode 100644
index 0000000..ab58c6f
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Cjk/TestCJKTokenizerFactory.cs
@@ -0,0 +1,62 @@
+using System;
+
+namespace org.apache.lucene.analysis.cjk
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+ using BaseTokenStreamFactoryTestCase = org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
+
+ /// <summary>
+ /// Simple tests to ensure the CJK tokenizer factory is working. </summary>
+ /// @deprecated remove this test in 5.0
+ [Obsolete("remove this test in 5.0")]
+ public class TestCJKTokenizerFactory : BaseTokenStreamFactoryTestCase
+ {
+ /// <summary>
+ /// Ensure the tokenizer actually tokenizes CJK text correctly
+ /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testTokenizer() throws Exception
+ public virtual void testTokenizer()
+ {
+ Reader reader = new StringReader("我是中国人");
+ TokenStream stream = tokenizerFactory("CJK").create(reader);
+ assertTokenStreamContents(stream, new string[] {"我是", "是中", "中国", "国人"});
+ }
+
+ /// <summary>
+ /// Test that bogus arguments result in exception </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testBogusArguments() throws Exception
+ public virtual void testBogusArguments()
+ {
+ try
+ {
+ tokenizerFactory("CJK", "bogusArg", "bogusValue");
+ fail();
+ }
+ catch (System.ArgumentException expected)
+ {
+ assertTrue(expected.Message.contains("Unknown parameters"));
+ }
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Cjk/TestCJKWidthFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Cjk/TestCJKWidthFilter.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Cjk/TestCJKWidthFilter.cs
new file mode 100644
index 0000000..85f195b
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Cjk/TestCJKWidthFilter.cs
@@ -0,0 +1,100 @@
+namespace org.apache.lucene.analysis.cjk
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+ using KeywordTokenizer = org.apache.lucene.analysis.core.KeywordTokenizer;
+
+ /// <summary>
+ /// Tests for <seealso cref="CJKWidthFilter"/>
+ /// </summary>
+ public class TestCJKWidthFilter : BaseTokenStreamTestCase
+ {
+ private Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper();
+
+ private class AnalyzerAnonymousInnerClassHelper : Analyzer
+ {
+ public AnalyzerAnonymousInnerClassHelper()
+ {
+ }
+
+ protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+ {
+ Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ return new TokenStreamComponents(source, new CJKWidthFilter(source));
+ }
+ }
+
+ /// <summary>
+ /// Full-width ASCII forms normalized to half-width (basic latin)
+ /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testFullWidthASCII() throws java.io.IOException
+ public virtual void testFullWidthASCII()
+ {
+ assertAnalyzesTo(analyzer, "Test 1234", new string[] {"Test", "1234"}, new int[] {0, 5}, new int[] {4, 9});
+ }
+
+ /// <summary>
+ /// Half-width katakana forms normalized to standard katakana.
+ /// A bit trickier in some cases, since half-width forms are decomposed
+ /// and voice marks need to be recombined with a preceding base form.
+ /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testHalfWidthKana() throws java.io.IOException
+ public virtual void testHalfWidthKana()
+ {
+ assertAnalyzesTo(analyzer, "カタカナ", new string[] {"カタカナ"});
+ assertAnalyzesTo(analyzer, "ヴィッツ", new string[] {"ヴィッツ"});
+ assertAnalyzesTo(analyzer, "パナソニック", new string[] {"パナソニック"});
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testRandomData() throws java.io.IOException
+ public virtual void testRandomData()
+ {
+ checkRandomData(random(), analyzer, 1000 * RANDOM_MULTIPLIER);
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testEmptyTerm() throws java.io.IOException
+ public virtual void testEmptyTerm()
+ {
+ Analyzer a = new AnalyzerAnonymousInnerClassHelper2(this);
+ checkOneTerm(a, "", "");
+ }
+
+ private class AnalyzerAnonymousInnerClassHelper2 : Analyzer
+ {
+ private readonly TestCJKWidthFilter outerInstance;
+
+ public AnalyzerAnonymousInnerClassHelper2(TestCJKWidthFilter outerInstance)
+ {
+ this.outerInstance = outerInstance;
+ }
+
+ protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+ {
+ Tokenizer tokenizer = new KeywordTokenizer(reader);
+ return new TokenStreamComponents(tokenizer, new CJKWidthFilter(tokenizer));
+ }
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Cjk/TestCJKWidthFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Cjk/TestCJKWidthFilterFactory.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Cjk/TestCJKWidthFilterFactory.cs
new file mode 100644
index 0000000..b139d10
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Cjk/TestCJKWidthFilterFactory.cs
@@ -0,0 +1,57 @@
+namespace org.apache.lucene.analysis.cjk
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+ using BaseTokenStreamFactoryTestCase = org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
+
+ /// <summary>
+ /// Simple tests to ensure the CJKWidthFilterFactory is working
+ /// </summary>
+ public class TestCJKWidthFilterFactory : BaseTokenStreamFactoryTestCase
+ {
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void test() throws Exception
+ public virtual void test()
+ {
+ Reader reader = new StringReader("Test 1234");
+ TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ stream = tokenFilterFactory("CJKWidth").create(stream);
+ assertTokenStreamContents(stream, new string[] {"Test", "1234"});
+ }
+
+ /// <summary>
+ /// Test that bogus arguments result in exception </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testBogusArguments() throws Exception
+ public virtual void testBogusArguments()
+ {
+ try
+ {
+ tokenFilterFactory("CJKWidth", "bogusArg", "bogusValue");
+ fail();
+ }
+ catch (System.ArgumentException expected)
+ {
+ assertTrue(expected.Message.contains("Unknown parameters"));
+ }
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ckb/TestSoraniAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ckb/TestSoraniAnalyzer.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ckb/TestSoraniAnalyzer.cs
new file mode 100644
index 0000000..3a9a555
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ckb/TestSoraniAnalyzer.cs
@@ -0,0 +1,82 @@
+namespace org.apache.lucene.analysis.ckb
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+
+ /// <summary>
+ /// Test the Sorani analyzer
+ /// </summary>
+ public class TestSoraniAnalyzer : BaseTokenStreamTestCase
+ {
+
+ /// <summary>
+ /// This test fails with NPE when the stopwords file is missing in classpath
+ /// </summary>
+ public virtual void testResourcesAvailable()
+ {
+ new SoraniAnalyzer(TEST_VERSION_CURRENT);
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testStopwords() throws java.io.IOException
+ public virtual void testStopwords()
+ {
+ Analyzer a = new SoraniAnalyzer(TEST_VERSION_CURRENT);
+ assertAnalyzesTo(a, "ئەم پیاوە", new string[] {"پیاو"});
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testCustomStopwords() throws java.io.IOException
+ public virtual void testCustomStopwords()
+ {
+ Analyzer a = new SoraniAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET);
+ assertAnalyzesTo(a, "ئەم پیاوە", new string[] {"ئەم", "پیاو"});
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testReusableTokenStream() throws java.io.IOException
+ public virtual void testReusableTokenStream()
+ {
+ Analyzer a = new SoraniAnalyzer(TEST_VERSION_CURRENT);
+ assertAnalyzesTo(a, "پیاوە", new string[] {"پیاو"});
+ assertAnalyzesTo(a, "پیاو", new string[] {"پیاو"});
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testWithStemExclusionSet() throws java.io.IOException
+ public virtual void testWithStemExclusionSet()
+ {
+ CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
+ set.add("پیاوە");
+ Analyzer a = new SoraniAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set);
+ assertAnalyzesTo(a, "پیاوە", new string[] {"پیاوە"});
+ }
+
+ /// <summary>
+ /// blast some random strings through the analyzer </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testRandomStrings() throws Exception
+ public virtual void testRandomStrings()
+ {
+ checkRandomData(random(), new SoraniAnalyzer(TEST_VERSION_CURRENT), 1000 * RANDOM_MULTIPLIER);
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ckb/TestSoraniNormalizationFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ckb/TestSoraniNormalizationFilter.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ckb/TestSoraniNormalizationFilter.cs
new file mode 100644
index 0000000..90738f0
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ckb/TestSoraniNormalizationFilter.cs
@@ -0,0 +1,122 @@
+namespace org.apache.lucene.analysis.ckb
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+ using KeywordTokenizer = org.apache.lucene.analysis.core.KeywordTokenizer;
+
+ /// <summary>
+ /// Tests normalization for Sorani (this is more critical than stemming...)
+ /// </summary>
+ public class TestSoraniNormalizationFilter : BaseTokenStreamTestCase
+ {
+ internal Analyzer a = new AnalyzerAnonymousInnerClassHelper();
+
+ private class AnalyzerAnonymousInnerClassHelper : Analyzer
+ {
+ public AnalyzerAnonymousInnerClassHelper()
+ {
+ }
+
+ protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+ {
+ Tokenizer tokenizer = new KeywordTokenizer(reader);
+ return new TokenStreamComponents(tokenizer, new SoraniNormalizationFilter(tokenizer));
+ }
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testY() throws Exception
+ public virtual void testY()
+ {
+ checkOneTerm(a, "\u064A", "\u06CC");
+ checkOneTerm(a, "\u0649", "\u06CC");
+ checkOneTerm(a, "\u06CC", "\u06CC");
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testK() throws Exception
+ public virtual void testK()
+ {
+ checkOneTerm(a, "\u0643", "\u06A9");
+ checkOneTerm(a, "\u06A9", "\u06A9");
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testH() throws Exception
+ public virtual void testH()
+ {
+ // initial
+ checkOneTerm(a, "\u0647\u200C", "\u06D5");
+ // medial
+ checkOneTerm(a, "\u0647\u200C\u06A9", "\u06D5\u06A9");
+
+ checkOneTerm(a, "\u06BE", "\u0647");
+ checkOneTerm(a, "\u0629", "\u06D5");
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testFinalH() throws Exception
+ public virtual void testFinalH()
+ {
+ // always (and in final form by def), so frequently omitted
+ checkOneTerm(a, "\u0647\u0647\u0647", "\u0647\u0647\u06D5");
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testRR() throws Exception
+ public virtual void testRR()
+ {
+ checkOneTerm(a, "\u0692", "\u0695");
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testInitialRR() throws Exception
+ public virtual void testInitialRR()
+ {
+ // always, so frequently omitted
+ checkOneTerm(a, "\u0631\u0631\u0631", "\u0695\u0631\u0631");
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testRemove() throws Exception
+ public virtual void testRemove()
+ {
+ checkOneTerm(a, "\u0640", "");
+ checkOneTerm(a, "\u064B", "");
+ checkOneTerm(a, "\u064C", "");
+ checkOneTerm(a, "\u064D", "");
+ checkOneTerm(a, "\u064E", "");
+ checkOneTerm(a, "\u064F", "");
+ checkOneTerm(a, "\u0650", "");
+ checkOneTerm(a, "\u0651", "");
+ checkOneTerm(a, "\u0652", "");
+ // we peek backwards in this case to look for h+200C, ensure this works
+ checkOneTerm(a, "\u200C", "");
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testEmptyTerm() throws java.io.IOException
+ public virtual void testEmptyTerm()
+ {
+ checkOneTerm(a, "", "");
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ckb/TestSoraniNormalizationFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ckb/TestSoraniNormalizationFilterFactory.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ckb/TestSoraniNormalizationFilterFactory.cs
new file mode 100644
index 0000000..30e5d0b
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ckb/TestSoraniNormalizationFilterFactory.cs
@@ -0,0 +1,58 @@
+namespace org.apache.lucene.analysis.ckb
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+ using BaseTokenStreamFactoryTestCase = org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
+
+ /// <summary>
+ /// Simple tests to ensure the Sorani normalization factory is working.
+ /// </summary>
+ public class TestSoraniNormalizationFilterFactory : BaseTokenStreamFactoryTestCase
+ {
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testNormalization() throws Exception
+ public virtual void testNormalization()
+ {
+ Reader reader = new StringReader("پیــــاوەکان");
+ TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ stream = tokenFilterFactory("SoraniNormalization").create(stream);
+ assertTokenStreamContents(stream, new string[] {"پیاوەکان"});
+ }
+
+ /// <summary>
+ /// Test that bogus arguments result in exception </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testBogusArguments() throws Exception
+ public virtual void testBogusArguments()
+ {
+ try
+ {
+ tokenFilterFactory("SoraniNormalization", "bogusArg", "bogusValue");
+ fail();
+ }
+ catch (System.ArgumentException expected)
+ {
+ assertTrue(expected.Message.contains("Unknown parameters"));
+ }
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ckb/TestSoraniStemFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ckb/TestSoraniStemFilter.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ckb/TestSoraniStemFilter.cs
new file mode 100644
index 0000000..4aebe0e
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ckb/TestSoraniStemFilter.cs
@@ -0,0 +1,144 @@
+namespace org.apache.lucene.analysis.ckb
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//JAVA TO C# CONVERTER TODO TASK: This Java 'import static' statement cannot be converted to C#:
+// import static org.apache.lucene.analysis.VocabularyAssert.assertVocabulary;
+
+
+ using KeywordTokenizer = org.apache.lucene.analysis.core.KeywordTokenizer;
+
+ /// <summary>
+ /// Test the Sorani Stemmer.
+ /// </summary>
+ public class TestSoraniStemFilter : BaseTokenStreamTestCase
+ {
+ internal SoraniAnalyzer a = new SoraniAnalyzer(TEST_VERSION_CURRENT);
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testIndefiniteSingular() throws Exception
+ public virtual void testIndefiniteSingular()
+ {
+ checkOneTerm(a, "پیاوێک", "پیاو"); // -ek
+ checkOneTerm(a, "دەرگایەک", "دەرگا"); // -yek
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testDefiniteSingular() throws Exception
+ public virtual void testDefiniteSingular()
+ {
+ checkOneTerm(a, "پیاوەكە", "پیاو"); // -aka
+ checkOneTerm(a, "دەرگاكە", "دەرگا"); // -ka
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testDemonstrativeSingular() throws Exception
+ public virtual void testDemonstrativeSingular()
+ {
+ checkOneTerm(a, "کتاویە", "کتاوی"); // -a
+ checkOneTerm(a, "دەرگایە", "دەرگا"); // -ya
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testIndefinitePlural() throws Exception
+ public virtual void testIndefinitePlural()
+ {
+ checkOneTerm(a, "پیاوان", "پیاو"); // -An
+ checkOneTerm(a, "دەرگایان", "دەرگا"); // -yAn
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testDefinitePlural() throws Exception
+ public virtual void testDefinitePlural()
+ {
+ checkOneTerm(a, "پیاوەکان", "پیاو"); // -akAn
+ checkOneTerm(a, "دەرگاکان", "دەرگا"); // -kAn
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testDemonstrativePlural() throws Exception
+ public virtual void testDemonstrativePlural()
+ {
+ checkOneTerm(a, "پیاوانە", "پیاو"); // -Ana
+ checkOneTerm(a, "دەرگایانە", "دەرگا"); // -yAna
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testEzafe() throws Exception
+ public virtual void testEzafe()
+ {
+ checkOneTerm(a, "هۆتیلی", "هۆتیل"); // singular
+ checkOneTerm(a, "هۆتیلێکی", "هۆتیل"); // indefinite
+ checkOneTerm(a, "هۆتیلانی", "هۆتیل"); // plural
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testPostpositions() throws Exception
+ public virtual void testPostpositions()
+ {
+ checkOneTerm(a, "دوورەوە", "دوور"); // -awa
+ checkOneTerm(a, "نیوەشەودا", "نیوەشەو"); // -dA
+ checkOneTerm(a, "سۆرانا", "سۆران"); // -A
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testPossessives() throws Exception
+ public virtual void testPossessives()
+ {
+ checkOneTerm(a, "پارەمان", "پارە"); // -mAn
+ checkOneTerm(a, "پارەتان", "پارە"); // -tAn
+ checkOneTerm(a, "پارەیان", "پارە"); // -yAn
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testEmptyTerm() throws java.io.IOException
+ public virtual void testEmptyTerm()
+ {
+ Analyzer a = new AnalyzerAnonymousInnerClassHelper(this);
+ checkOneTerm(a, "", "");
+ }
+
+ private class AnalyzerAnonymousInnerClassHelper : Analyzer
+ {
+ private readonly TestSoraniStemFilter outerInstance;
+
+ public AnalyzerAnonymousInnerClassHelper(TestSoraniStemFilter outerInstance)
+ {
+ this.outerInstance = outerInstance;
+ }
+
+ protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+ {
+ Tokenizer tokenizer = new KeywordTokenizer(reader);
+ return new TokenStreamComponents(tokenizer, new SoraniStemFilter(tokenizer));
+ }
+ }
+
+ /// <summary>
+ /// test against a basic vocabulary file </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testVocabulary() throws Exception
+ public virtual void testVocabulary()
+ {
+ // top 8k words or so: freq > 1000
+ assertVocabulary(a, getDataFile("ckbtestdata.zip"), "testdata.txt");
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ckb/TestSoraniStemFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ckb/TestSoraniStemFilterFactory.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ckb/TestSoraniStemFilterFactory.cs
new file mode 100644
index 0000000..ae5bdd8
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ckb/TestSoraniStemFilterFactory.cs
@@ -0,0 +1,58 @@
+namespace org.apache.lucene.analysis.ckb
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+ using BaseTokenStreamFactoryTestCase = org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
+
+ /// <summary>
+ /// Simple tests to ensure the Sorani stem factory is working.
+ /// </summary>
+ public class TestSoraniStemFilterFactory : BaseTokenStreamFactoryTestCase
+ {
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testStemming() throws Exception
+ public virtual void testStemming()
+ {
+ Reader reader = new StringReader("پیاوەکان");
+ TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ stream = tokenFilterFactory("SoraniStem").create(stream);
+ assertTokenStreamContents(stream, new string[] {"پیاو"});
+ }
+
+ /// <summary>
+ /// Test that bogus arguments result in exception </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testBogusArguments() throws Exception
+ public virtual void testBogusArguments()
+ {
+ try
+ {
+ tokenFilterFactory("SoraniStem", "bogusArg", "bogusValue");
+ fail();
+ }
+ catch (System.ArgumentException expected)
+ {
+ assertTrue(expected.Message.contains("Unknown parameters"));
+ }
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Cn/TestChineseFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Cn/TestChineseFilterFactory.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Cn/TestChineseFilterFactory.cs
new file mode 100644
index 0000000..5b8f9b1
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Cn/TestChineseFilterFactory.cs
@@ -0,0 +1,60 @@
+namespace org.apache.lucene.analysis.cn
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+ using BaseTokenStreamFactoryTestCase = org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
+
+ /// <summary>
+ /// Simple tests to ensure the Chinese filter factory is working.
+ /// </summary>
+ public class TestChineseFilterFactory : BaseTokenStreamFactoryTestCase
+ {
+ /// <summary>
+ /// Ensure the filter actually normalizes text (numerics, stopwords)
+ /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testFiltering() throws Exception
+ public virtual void testFiltering()
+ {
+ Reader reader = new StringReader("this 1234 Is such a silly filter");
+ TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ stream = tokenFilterFactory("Chinese").create(stream);
+ assertTokenStreamContents(stream, new string[] {"Is", "silly", "filter"});
+ }
+
+ /// <summary>
+ /// Test that bogus arguments result in exception </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testBogusArguments() throws Exception
+ public virtual void testBogusArguments()
+ {
+ try
+ {
+ tokenFilterFactory("Chinese", "bogusArg", "bogusValue");
+ fail();
+ }
+ catch (System.ArgumentException expected)
+ {
+ assertTrue(expected.Message.contains("Unknown parameters"));
+ }
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Cn/TestChineseTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Cn/TestChineseTokenizer.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Cn/TestChineseTokenizer.cs
new file mode 100644
index 0000000..0cc6a8d
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Cn/TestChineseTokenizer.cs
@@ -0,0 +1,151 @@
+using System;
+
+namespace org.apache.lucene.analysis.cn
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+ using org.apache.lucene.analysis;
+ using WhitespaceTokenizer = org.apache.lucene.analysis.core.WhitespaceTokenizer;
+ using OffsetAttribute = org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+ using Version = org.apache.lucene.util.Version;
+
+
+ /// @deprecated Remove this test when ChineseAnalyzer is removed.
+ [Obsolete("Remove this test when ChineseAnalyzer is removed.")]
+ public class TestChineseTokenizer : BaseTokenStreamTestCase
+ {
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testOtherLetterOffset() throws java.io.IOException
+ public virtual void testOtherLetterOffset()
+ {
+ string s = "a天b";
+ ChineseTokenizer tokenizer = new ChineseTokenizer(new StringReader(s));
+
+ int correctStartOffset = 0;
+ int correctEndOffset = 1;
+ OffsetAttribute offsetAtt = tokenizer.getAttribute(typeof(OffsetAttribute));
+ tokenizer.reset();
+ while (tokenizer.incrementToken())
+ {
+ assertEquals(correctStartOffset, offsetAtt.startOffset());
+ assertEquals(correctEndOffset, offsetAtt.endOffset());
+ correctStartOffset++;
+ correctEndOffset++;
+ }
+ tokenizer.end();
+ tokenizer.close();
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testReusableTokenStream() throws Exception
+ public virtual void testReusableTokenStream()
+ {
+ Analyzer a = new ChineseAnalyzer();
+ assertAnalyzesTo(a, "中华人民共和国", new string[] {"中", "华", "人", "民", "共", "和", "国"}, new int[] {0, 1, 2, 3, 4, 5, 6}, new int[] {1, 2, 3, 4, 5, 6, 7});
+ assertAnalyzesTo(a, "北京市", new string[] {"北", "京", "市"}, new int[] {0, 1, 2}, new int[] {1, 2, 3});
+ }
+
+ /*
+ * Analyzer that just uses ChineseTokenizer, not ChineseFilter.
+ * convenience to show the behavior of the tokenizer
+ */
+ private class JustChineseTokenizerAnalyzer : Analyzer
+ {
+ private readonly TestChineseTokenizer outerInstance;
+
+ public JustChineseTokenizerAnalyzer(TestChineseTokenizer outerInstance)
+ {
+ this.outerInstance = outerInstance;
+ }
+
+ public override TokenStreamComponents createComponents(string fieldName, Reader reader)
+ {
+ return new TokenStreamComponents(new ChineseTokenizer(reader));
+ }
+ }
+
+ /*
+ * Analyzer that just uses ChineseFilter, not ChineseTokenizer.
+ * convenience to show the behavior of the filter.
+ */
+ private class JustChineseFilterAnalyzer : Analyzer
+ {
+ private readonly TestChineseTokenizer outerInstance;
+
+ public JustChineseFilterAnalyzer(TestChineseTokenizer outerInstance)
+ {
+ this.outerInstance = outerInstance;
+ }
+
+ public override TokenStreamComponents createComponents(string fieldName, Reader reader)
+ {
+ Tokenizer tokenizer = new WhitespaceTokenizer(Version.LUCENE_CURRENT, reader);
+ return new TokenStreamComponents(tokenizer, new ChineseFilter(tokenizer));
+ }
+ }
+
+ /*
+ * ChineseTokenizer tokenizes numbers as one token, but they are filtered by ChineseFilter
+ */
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testNumerics() throws Exception
+ public virtual void testNumerics()
+ {
+ Analyzer justTokenizer = new JustChineseTokenizerAnalyzer(this);
+ assertAnalyzesTo(justTokenizer, "中1234", new string[] {"中", "1234"});
+
+ // in this case the ChineseAnalyzer (which applies ChineseFilter) will remove the numeric token.
+ Analyzer a = new ChineseAnalyzer();
+ assertAnalyzesTo(a, "中1234", new string[] {"中"});
+ }
+
+ /*
+ * ChineseTokenizer tokenizes english similar to SimpleAnalyzer.
+ * it will lowercase terms automatically.
+ *
+ * ChineseFilter has an english stopword list, it also removes any single character tokens.
+ * the stopword list is case-sensitive.
+ */
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testEnglish() throws Exception
+ public virtual void testEnglish()
+ {
+ Analyzer chinese = new ChineseAnalyzer();
+ assertAnalyzesTo(chinese, "This is a Test. b c d", new string[] {"test"});
+
+ Analyzer justTokenizer = new JustChineseTokenizerAnalyzer(this);
+ assertAnalyzesTo(justTokenizer, "This is a Test. b c d", new string[] {"this", "is", "a", "test", "b", "c", "d"});
+
+ Analyzer justFilter = new JustChineseFilterAnalyzer(this);
+ assertAnalyzesTo(justFilter, "This is a Test. b c d", new string[] {"This", "Test."});
+ }
+
+ /// <summary>
+ /// blast some random strings through the analyzer </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testRandomStrings() throws Exception
+ public virtual void testRandomStrings()
+ {
+ checkRandomData(random(), new ChineseAnalyzer(), 10000 * RANDOM_MULTIPLIER);
+ }
+
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Cn/TestChineseTokenizerFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Cn/TestChineseTokenizerFactory.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Cn/TestChineseTokenizerFactory.cs
new file mode 100644
index 0000000..03f8055
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Cn/TestChineseTokenizerFactory.cs
@@ -0,0 +1,59 @@
+namespace org.apache.lucene.analysis.cn
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+ using BaseTokenStreamFactoryTestCase = org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
+
+ /// <summary>
+ /// Simple tests to ensure the Chinese tokenizer factory is working.
+ /// </summary>
+ public class TestChineseTokenizerFactory : BaseTokenStreamFactoryTestCase
+ {
+ /// <summary>
+ /// Ensure the tokenizer actually tokenizes chinese text correctly
+ /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testTokenizer() throws Exception
+ public virtual void testTokenizer()
+ {
+ Reader reader = new StringReader("我是中国人");
+ TokenStream stream = tokenizerFactory("Chinese").create(reader);
+ assertTokenStreamContents(stream, new string[] {"我", "是", "中", "国", "人"});
+ }
+
+ /// <summary>
+ /// Test that bogus arguments result in exception </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testBogusArguments() throws Exception
+ public virtual void testBogusArguments()
+ {
+ try
+ {
+ tokenizerFactory("Chinese", "bogusArg", "bogusValue");
+ fail();
+ }
+ catch (System.ArgumentException expected)
+ {
+ assertTrue(expected.Message.contains("Unknown parameters"));
+ }
+ }
+ }
+
+}
\ No newline at end of file