You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by sy...@apache.org on 2015/12/10 19:39:13 UTC

[24/27] lucenenet git commit: adding converted analysis common tests

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Cjk/TestCJKBigramFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Cjk/TestCJKBigramFilter.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Cjk/TestCJKBigramFilter.cs
new file mode 100644
index 0000000..12e0b5b
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Cjk/TestCJKBigramFilter.cs
@@ -0,0 +1,171 @@
+using System;
+
+namespace org.apache.lucene.analysis.cjk
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using StandardTokenizer = org.apache.lucene.analysis.standard.StandardTokenizer;
+
+	public class TestCJKBigramFilter : BaseTokenStreamTestCase
+	{
+	  internal Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper();
+
+	  private class AnalyzerAnonymousInnerClassHelper : Analyzer
+	  {
+		  public AnalyzerAnonymousInnerClassHelper()
+		  {
+		  }
+
+		  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+		  {
+			Tokenizer t = new StandardTokenizer(TEST_VERSION_CURRENT, reader);
+			return new TokenStreamComponents(t, new CJKBigramFilter(t));
+		  }
+	  }
+
+	  internal Analyzer unibiAnalyzer = new AnalyzerAnonymousInnerClassHelper2();
+
+	  private class AnalyzerAnonymousInnerClassHelper2 : Analyzer
+	  {
+		  public AnalyzerAnonymousInnerClassHelper2()
+		  {
+		  }
+
+		  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+		  {
+			Tokenizer t = new StandardTokenizer(TEST_VERSION_CURRENT, reader);
+			return new TokenStreamComponents(t, new CJKBigramFilter(t, 0xff, true));
+		  }
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testHuge() throws Exception
+	  public virtual void testHuge()
+	  {
+		assertAnalyzesTo(analyzer, "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた", new string[] {"多く", "くの", "の学", "学生", "生が", "が試", "試験", "験に", "に落", "落ち", "ちた", "た多", "多く", "くの", "の学", "学生", "生が", "が試", "試験", "験に", "に落", "落ち", "ちた", "た多", "多く", "くの", "の学", "学生", "生が", "が試", "試験", "験に", "に落", "落ち", "ちた", "た多", "多く", "くの", "の学", "学生", "生が", "が試", "試験", "験に", "に落", "落ち", "ちた", "た多", "多く", "くの", "の
 学", "学生", "生が", "が試", "試験", "験に", "に落", "落ち", "ちた", "た多", "多く", "くの", "の学", "学生", "生が", "が試", "試験", "験に", "に落", "落ち", "ちた", "た多", "多く", "くの", "の学", "学生", "生が", "が試", "試験", "験に", "に落", "落ち", "ちた", "た多", "多く", "くの", "の学", "学生", "生が", "が試", "試験", "験に", "に落", "落ち", "ちた", "た多", "多く", "くの", "の学", "学生", "生が", "が試", "試験", "験に", "に落", "落ち", "ちた", "た多", "多く", "くの", "の学", "学生", "生が", "が試", "試験", "験に", "に落", "落ち", "ちた", "た多", "多く", "くの", "の学", "学生", "生が", "が試", "試験", "験に", "に落", "落ち", "ちた"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testHanOnly() throws Exception
+	  public virtual void testHanOnly()
+	  {
+		Analyzer a = new AnalyzerAnonymousInnerClassHelper3(this);
+		assertAnalyzesTo(a, "多くの学生が試験に落ちた。", new string[] {"多", "く", "の", "学生", "が", "試験", "に", "落", "ち", "た"}, new int[] {0, 1, 2, 3, 5, 6, 8, 9, 10, 11}, new int[] {1, 2, 3, 5, 6, 8, 9, 10, 11, 12}, new string[] {"<SINGLE>", "<HIRAGANA>", "<HIRAGANA>", "<DOUBLE>", "<HIRAGANA>", "<DOUBLE>", "<HIRAGANA>", "<SINGLE>", "<HIRAGANA>", "<HIRAGANA>", "<SINGLE>"}, new int[] {1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, new int[] {1, 1, 1, 1, 1, 1, 1, 1, 1, 1});
+	  }
+
+	  private class AnalyzerAnonymousInnerClassHelper3 : Analyzer
+	  {
+		  private readonly TestCJKBigramFilter outerInstance;
+
+		  public AnalyzerAnonymousInnerClassHelper3(TestCJKBigramFilter outerInstance)
+		  {
+			  this.outerInstance = outerInstance;
+		  }
+
+		  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+		  {
+			Tokenizer t = new StandardTokenizer(TEST_VERSION_CURRENT, reader);
+			return new TokenStreamComponents(t, new CJKBigramFilter(t, CJKBigramFilter.HAN));
+		  }
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testAllScripts() throws Exception
+	  public virtual void testAllScripts()
+	  {
+		Analyzer a = new AnalyzerAnonymousInnerClassHelper4(this);
+		assertAnalyzesTo(a, "多くの学生が試験に落ちた。", new string[] {"多く", "くの", "の学", "学生", "生が", "が試", "試験", "験に", "に落", "落ち", "ちた"});
+	  }
+
+	  private class AnalyzerAnonymousInnerClassHelper4 : Analyzer
+	  {
+		  private readonly TestCJKBigramFilter outerInstance;
+
+		  public AnalyzerAnonymousInnerClassHelper4(TestCJKBigramFilter outerInstance)
+		  {
+			  this.outerInstance = outerInstance;
+		  }
+
+		  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+		  {
+			Tokenizer t = new StandardTokenizer(TEST_VERSION_CURRENT, reader);
+			return new TokenStreamComponents(t, new CJKBigramFilter(t, 0xff, false));
+		  }
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testUnigramsAndBigramsAllScripts() throws Exception
+	  public virtual void testUnigramsAndBigramsAllScripts()
+	  {
+		assertAnalyzesTo(unibiAnalyzer, "多くの学生が試験に落ちた。", new string[] {"多", "多く", "く", "くの", "の", "の学", "学", "学生", "生", "生が", "が", "が試", "試", "試験", "験", "験に", "に", "に落", "落", "落ち", "ち", "ちた", "た"}, new int[] {0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11}, new int[] {1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12}, new string[] {"<SINGLE>", "<DOUBLE>", "<SINGLE>", "<DOUBLE>", "<SINGLE>", "<DOUBLE>", "<SINGLE>", "<DOUBLE>", "<SINGLE>", "<DOUBLE>", "<SINGLE>", "<DOUBLE>", "<SINGLE>", "<DOUBLE>", "<SINGLE>", "<DOUBLE>", "<SINGLE>", "<DOUBLE>", "<SINGLE>", "<DOUBLE>", "<SINGLE>", "<DOUBLE>", "<SINGLE>"}, new int[] {1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1}, new int[] {1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testUnigramsAndBigramsHanOnly() throws Exception
+	  public virtual void testUnigramsAndBigramsHanOnly()
+	  {
+		Analyzer a = new AnalyzerAnonymousInnerClassHelper5(this);
+		assertAnalyzesTo(a, "多くの学生が試験に落ちた。", new string[] {"多", "く", "の", "学", "学生", "生", "が", "試", "試験", "験", "に", "落", "ち", "た"}, new int[] {0, 1, 2, 3, 3, 4, 5, 6, 6, 7, 8, 9, 10, 11}, new int[] {1, 2, 3, 4, 5, 5, 6, 7, 8, 8, 9, 10, 11, 12}, new string[] {"<SINGLE>", "<HIRAGANA>", "<HIRAGANA>", "<SINGLE>", "<DOUBLE>", "<SINGLE>", "<HIRAGANA>", "<SINGLE>", "<DOUBLE>", "<SINGLE>", "<HIRAGANA>", "<SINGLE>", "<HIRAGANA>", "<HIRAGANA>", "<SINGLE>"}, new int[] {1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1}, new int[] {1, 1, 1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 1, 1});
+	  }
+
+	  private class AnalyzerAnonymousInnerClassHelper5 : Analyzer
+	  {
+		  private readonly TestCJKBigramFilter outerInstance;
+
+		  public AnalyzerAnonymousInnerClassHelper5(TestCJKBigramFilter outerInstance)
+		  {
+			  this.outerInstance = outerInstance;
+		  }
+
+		  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+		  {
+			Tokenizer t = new StandardTokenizer(TEST_VERSION_CURRENT, reader);
+			return new TokenStreamComponents(t, new CJKBigramFilter(t, CJKBigramFilter.HAN, true));
+		  }
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testUnigramsAndBigramsHuge() throws Exception
+	  public virtual void testUnigramsAndBigramsHuge()
+	  {
+		assertAnalyzesTo(unibiAnalyzer, "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた", new string[] {"多", "多く", "く", "くの", "の", "の学", "学", "学生", "生", "生が", "が", "が試", "試", "試験", "験", "験に", "に", "に落", "落", "落ち", "ち", "ちた", "た", "た多", "多", "多く", "く", "くの", "の", "の学", "学", "学生", "生", "生が", "が", "が試", "試", "試験", "験", "験に", "に", "に落", "落", "落ち", "ち", "ちた", "た", "た多", "多", "多く", "く", "くの", "の", "の学", "学", "学生", "生", "生が", "が",
  "が試", "試", "試験", "験", "験に", "に", "に落", "落", "落ち", "ち", "ちた", "た", "た多", "多", "多く", "く", "くの", "の", "の学", "学", "学生", "生", "生が", "が", "が試", "試", "試験", "験", "験に", "に", "に落", "落", "落ち", "ち", "ちた", "た", "た多", "多", "多く", "く", "くの", "の", "の学", "学", "学生", "生", "生が", "が", "が試", "試", "試験", "験", "験に", "に", "に落", "落", "落ち", "ち", "ちた", "た", "た多", "多", "多く", "く", "くの", "の", "の学", "学", "学生", "生", "生が", "が", "が試", "試", "試験", "験", "験に", "に", "に落", "落", "落ち", "ち", "ちた", "た", "た多", "多", "多く", "く", "くの", "の", "の学", "学", "学生", "生", "生が", "が", "が試", "試", "試験", "験", "験に", "に", "に落", "落", "落ち", "ち", "ちた", "た", "た多", "多", "多く", "く", "くの", "の", "の学", "学", "学生", 
 "生", "生が", "が", "が試", "試", "試験", "験", "験に", "に", "に落", "落", "落ち", "ち", "ちた", "た", "た多", "多", "多く", "く", "くの", "の", "の学", "学", "学生", "生", "生が", "が", "が試", "試", "試験", "験", "験に", "に", "に落", "落", "落ち", "ち", "ちた", "た", "た多", "多", "多く", "く", "くの", "の", "の学", "学", "学生", "生", "生が", "が", "が試", "試", "試験", "験", "験に", "に", "に落", "落", "落ち", "ち", "ちた", "た", "た多", "多", "多く", "く", "くの", "の", "の学", "学", "学生", "生", "生が", "が", "が試", "試", "試験", "験", "験に", "に", "に落", "落", "落ち", "ち", "ちた", "た"});
+	  }
+
+	  /// <summary>
+	  /// blast some random strings through the analyzer </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testRandomUnibiStrings() throws Exception
+	  public virtual void testRandomUnibiStrings()
+	  {
+		checkRandomData(random(), unibiAnalyzer, 1000 * RANDOM_MULTIPLIER);
+	  }
+
+	  /// <summary>
+	  /// blast some random strings through the analyzer </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testRandomUnibiHugeStrings() throws Exception
+	  public virtual void testRandomUnibiHugeStrings()
+	  {
+		Random random = random();
+		checkRandomData(random, unibiAnalyzer, 100 * RANDOM_MULTIPLIER, 8192);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Cjk/TestCJKBigramFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Cjk/TestCJKBigramFilterFactory.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Cjk/TestCJKBigramFilterFactory.cs
new file mode 100644
index 0000000..fd0d186
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Cjk/TestCJKBigramFilterFactory.cs
@@ -0,0 +1,77 @@
+namespace org.apache.lucene.analysis.cjk
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using BaseTokenStreamFactoryTestCase = org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
+
+	/// <summary>
+	/// Simple tests to ensure the CJK bigram factory is working.
+	/// </summary>
+	public class TestCJKBigramFilterFactory : BaseTokenStreamFactoryTestCase
+	{
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testDefaults() throws Exception
+	  public virtual void testDefaults()
+	  {
+		Reader reader = new StringReader("多くの学生が試験に落ちた。");
+		TokenStream stream = tokenizerFactory("standard").create(reader);
+		stream = tokenFilterFactory("CJKBigram").create(stream);
+		assertTokenStreamContents(stream, new string[] {"多く", "くの", "の学", "学生", "生が", "が試", "試験", "験に", "に落", "落ち", "ちた"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testHanOnly() throws Exception
+	  public virtual void testHanOnly()
+	  {
+		Reader reader = new StringReader("多くの学生が試験に落ちた。");
+		TokenStream stream = tokenizerFactory("standard").create(reader);
+		stream = tokenFilterFactory("CJKBigram", "hiragana", "false").create(stream);
+		assertTokenStreamContents(stream, new string[] {"多", "く", "の", "学生", "が", "試験", "に", "落", "ち", "た"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testHanOnlyUnigrams() throws Exception
+	  public virtual void testHanOnlyUnigrams()
+	  {
+		Reader reader = new StringReader("多くの学生が試験に落ちた。");
+		TokenStream stream = tokenizerFactory("standard").create(reader);
+		stream = tokenFilterFactory("CJKBigram", "hiragana", "false", "outputUnigrams", "true").create(stream);
+		assertTokenStreamContents(stream, new string[] {"多", "く", "の", "学", "学生", "生", "が", "試", "試験", "験", "に", "落", "ち", "た"});
+	  }
+
+	  /// <summary>
+	  /// Test that bogus arguments result in exception </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testBogusArguments() throws Exception
+	  public virtual void testBogusArguments()
+	  {
+		try
+		{
+		  tokenFilterFactory("CJKBigram", "bogusArg", "bogusValue");
+		  fail();
+		}
+		catch (System.ArgumentException expected)
+		{
+		  assertTrue(expected.Message.contains("Unknown parameters"));
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Cjk/TestCJKTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Cjk/TestCJKTokenizer.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Cjk/TestCJKTokenizer.cs
new file mode 100644
index 0000000..e1639d5
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Cjk/TestCJKTokenizer.cs
@@ -0,0 +1,240 @@
+using System;
+
+namespace org.apache.lucene.analysis.cjk
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using Version = org.apache.lucene.util.Version;
+
+	/// @deprecated Remove when CJKTokenizer is removed (5.0) 
+	[Obsolete("Remove when CJKTokenizer is removed (5.0)")]
+	public class TestCJKTokenizer : BaseTokenStreamTestCase
+	{
+
+	  internal class TestToken
+	  {
+		  private readonly TestCJKTokenizer outerInstance;
+
+		  public TestToken(TestCJKTokenizer outerInstance)
+		  {
+			  this.outerInstance = outerInstance;
+		  }
+
+		internal string termText;
+		internal int start;
+		internal int end;
+		internal string type;
+	  }
+
+	  public virtual TestToken newToken(string termText, int start, int end, int type)
+	  {
+		TestToken token = new TestToken(this);
+		token.termText = termText;
+		token.type = CJKTokenizer.TOKEN_TYPE_NAMES[type];
+		token.start = start;
+		token.end = end;
+		return token;
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void checkCJKToken(final String str, final TestToken[] out_tokens) throws java.io.IOException
+//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
+	  public virtual void checkCJKToken(string str, TestToken[] out_tokens)
+	  {
+		Analyzer analyzer = new CJKAnalyzer(Version.LUCENE_30);
+		string[] terms = new string[out_tokens.Length];
+		int[] startOffsets = new int[out_tokens.Length];
+		int[] endOffsets = new int[out_tokens.Length];
+		string[] types = new string[out_tokens.Length];
+		for (int i = 0; i < out_tokens.Length; i++)
+		{
+		  terms[i] = out_tokens[i].termText;
+		  startOffsets[i] = out_tokens[i].start;
+		  endOffsets[i] = out_tokens[i].end;
+		  types[i] = out_tokens[i].type;
+		}
+		assertAnalyzesTo(analyzer, str, terms, startOffsets, endOffsets, types, null);
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void checkCJKTokenReusable(final org.apache.lucene.analysis.Analyzer a, final String str, final TestToken[] out_tokens) throws java.io.IOException
+//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
+	  public virtual void checkCJKTokenReusable(Analyzer a, string str, TestToken[] out_tokens)
+	  {
+		Analyzer analyzer = new CJKAnalyzer(Version.LUCENE_30);
+		string[] terms = new string[out_tokens.Length];
+		int[] startOffsets = new int[out_tokens.Length];
+		int[] endOffsets = new int[out_tokens.Length];
+		string[] types = new string[out_tokens.Length];
+		for (int i = 0; i < out_tokens.Length; i++)
+		{
+		  terms[i] = out_tokens[i].termText;
+		  startOffsets[i] = out_tokens[i].start;
+		  endOffsets[i] = out_tokens[i].end;
+		  types[i] = out_tokens[i].type;
+		}
+		assertAnalyzesTo(analyzer, str, terms, startOffsets, endOffsets, types, null);
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testJa1() throws java.io.IOException
+	  public virtual void testJa1()
+	  {
+		string str = "\u4e00\u4e8c\u4e09\u56db\u4e94\u516d\u4e03\u516b\u4e5d\u5341";
+
+		TestToken[] out_tokens = new TestToken[] {newToken("\u4e00\u4e8c", 0, 2, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("\u4e8c\u4e09", 1, 3, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("\u4e09\u56db", 2, 4, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("\u56db\u4e94", 3, 5, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("\u4e94\u516d", 4, 6, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("\u516d\u4e03", 5, 7, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("\u4e03\u516b", 6, 8, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("\u516b\u4e5d", 7, 9, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("\u4e5d\u5341", 8,10, CJKTokenizer.DOUBLE_TOKEN_TYPE)};
+		checkCJKToken(str, out_tokens);
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testJa2() throws java.io.IOException
+	  public virtual void testJa2()
+	  {
+		string str = "\u4e00 \u4e8c\u4e09\u56db \u4e94\u516d\u4e03\u516b\u4e5d \u5341";
+
+		TestToken[] out_tokens = new TestToken[] {newToken("\u4e00", 0, 1, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("\u4e8c\u4e09", 2, 4, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("\u4e09\u56db", 3, 5, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("\u4e94\u516d", 6, 8, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("\u516d\u4e03", 7, 9, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("\u4e03\u516b", 8, 10, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("\u516b\u4e5d", 9, 11, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("\u5341", 12,13, CJKTokenizer.DOUBLE_TOKEN_TYPE)};
+		checkCJKToken(str, out_tokens);
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testC() throws java.io.IOException
+	  public virtual void testC()
+	  {
+		string str = "abc defgh ijklmn opqrstu vwxy z";
+
+		TestToken[] out_tokens = new TestToken[] {newToken("abc", 0, 3, CJKTokenizer.SINGLE_TOKEN_TYPE), newToken("defgh", 4, 9, CJKTokenizer.SINGLE_TOKEN_TYPE), newToken("ijklmn", 10, 16, CJKTokenizer.SINGLE_TOKEN_TYPE), newToken("opqrstu", 17, 24, CJKTokenizer.SINGLE_TOKEN_TYPE), newToken("vwxy", 25, 29, CJKTokenizer.SINGLE_TOKEN_TYPE), newToken("z", 30, 31, CJKTokenizer.SINGLE_TOKEN_TYPE)};
+		checkCJKToken(str, out_tokens);
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testMix() throws java.io.IOException
+	  public virtual void testMix()
+	  {
+		string str = "\u3042\u3044\u3046\u3048\u304aabc\u304b\u304d\u304f\u3051\u3053";
+
+		TestToken[] out_tokens = new TestToken[] {newToken("\u3042\u3044", 0, 2, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("\u3044\u3046", 1, 3, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("\u3046\u3048", 2, 4, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("\u3048\u304a", 3, 5, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("abc", 5, 8, CJKTokenizer.SINGLE_TOKEN_TYPE), newToken("\u304b\u304d", 8, 10, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("\u304d\u304f", 9, 11, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("\u304f\u3051", 10,12, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("\u3051\u3053", 11,13, CJKTokenizer.DOUBLE_TOKEN_TYPE)};
+		checkCJKToken(str, out_tokens);
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testMix2() throws java.io.IOException
+	  public virtual void testMix2()
+	  {
+		string str = "\u3042\u3044\u3046\u3048\u304aab\u3093c\u304b\u304d\u304f\u3051 \u3053";
+
+		TestToken[] out_tokens = new TestToken[] {newToken("\u3042\u3044", 0, 2, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("\u3044\u3046", 1, 3, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("\u3046\u3048", 2, 4, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("\u3048\u304a", 3, 5, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("ab", 5, 7, CJKTokenizer.SINGLE_TOKEN_TYPE), newToken("\u3093", 7, 8, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("c", 8, 9, CJKTokenizer.SINGLE_TOKEN_TYPE), newToken("\u304b\u304d", 9, 11, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("\u304d\u304f", 10, 12, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("\u304f\u3051", 11,13, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("\u3053", 14,15, CJKTokenizer.DOUBLE_TOKEN_TYPE)};
+		checkCJKToken(str, out_tokens);
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testSingleChar() throws java.io.IOException
+	  public virtual void testSingleChar()
+	  {
+		string str = "\u4e00";
+
+		TestToken[] out_tokens = new TestToken[] {newToken("\u4e00", 0, 1, CJKTokenizer.DOUBLE_TOKEN_TYPE)};
+		checkCJKToken(str, out_tokens);
+	  }
+
+	  /*
+	   * Full-width text is normalized to half-width 
+	   */
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testFullWidth() throws Exception
+	  public virtual void testFullWidth()
+	  {
+		string str = "Test 1234";
+		TestToken[] out_tokens = new TestToken[] {newToken("test", 0, 4, CJKTokenizer.SINGLE_TOKEN_TYPE), newToken("1234", 5, 9, CJKTokenizer.SINGLE_TOKEN_TYPE)};
+		checkCJKToken(str, out_tokens);
+	  }
+
+	  /*
+	   * Non-english text (not just CJK) is treated the same as CJK: C1C2 C2C3 
+	   */
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testNonIdeographic() throws Exception
+	  public virtual void testNonIdeographic()
+	  {
+		string str = "\u4e00 روبرت موير";
+		TestToken[] out_tokens = new TestToken[] {newToken("\u4e00", 0, 1, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("رو", 2, 4, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("وب", 3, 5, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("بر", 4, 6, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("رت", 5, 7, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("مو", 8, 10, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("وي", 9, 11, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("ير", 10, 12, CJKTokenizer.DOUBLE_TOKEN_TYPE)};
+		checkCJKToken(str, out_tokens);
+	  }
+
+	  /*
+	   * Non-english text with nonletters (non-spacing marks,etc) is treated as C1C2 C2C3,
+	   * except for words are split around non-letters.
+	   */
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testNonIdeographicNonLetter() throws Exception
+	  public virtual void testNonIdeographicNonLetter()
+	  {
+		string str = "\u4e00 رُوبرت موير";
+		TestToken[] out_tokens = new TestToken[] {newToken("\u4e00", 0, 1, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("ر", 2, 3, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("وب", 4, 6, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("بر", 5, 7, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("رت", 6, 8, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("مو", 9, 11, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("وي", 10, 12, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("ير", 11, 13, CJKTokenizer.DOUBLE_TOKEN_TYPE)};
+		checkCJKToken(str, out_tokens);
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testTokenStream() throws Exception
+	  public virtual void testTokenStream()
+	  {
+		Analyzer analyzer = new CJKAnalyzer(Version.LUCENE_30);
+		assertAnalyzesTo(analyzer, "\u4e00\u4e01\u4e02", new string[] {"\u4e00\u4e01", "\u4e01\u4e02"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testReusableTokenStream() throws Exception
+	  public virtual void testReusableTokenStream()
+	  {
+		Analyzer analyzer = new CJKAnalyzer(Version.LUCENE_30);
+		string str = "\u3042\u3044\u3046\u3048\u304aabc\u304b\u304d\u304f\u3051\u3053";
+
+		TestToken[] out_tokens = new TestToken[] {newToken("\u3042\u3044", 0, 2, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("\u3044\u3046", 1, 3, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("\u3046\u3048", 2, 4, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("\u3048\u304a", 3, 5, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("abc", 5, 8, CJKTokenizer.SINGLE_TOKEN_TYPE), newToken("\u304b\u304d", 8, 10, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("\u304d\u304f", 9, 11, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("\u304f\u3051", 10,12, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("\u3051\u3053", 11,13, CJKTokenizer.DOUBLE_TOKEN_TYPE)};
+		checkCJKTokenReusable(analyzer, str, out_tokens);
+
+		str = "\u3042\u3044\u3046\u3048\u304aab\u3093c\u304b\u304d\u304f\u3051 \u3053";
+		TestToken[] out_tokens2 = new TestToken[] {newToken("\u3042\u3044", 0, 2, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("\u3044\u3046", 1, 3, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("\u3046\u3048", 2, 4, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("\u3048\u304a", 3, 5, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("ab", 5, 7, CJKTokenizer.SINGLE_TOKEN_TYPE), newToken("\u3093", 7, 8, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("c", 8, 9, CJKTokenizer.SINGLE_TOKEN_TYPE), newToken("\u304b\u304d", 9, 11, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("\u304d\u304f", 10, 12, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("\u304f\u3051", 11,13, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("\u3053", 14,15, CJKTokenizer.DOUBLE_TOKEN_TYPE)};
+		checkCJKTokenReusable(analyzer, str, out_tokens2);
+	  }
+
+	  /// <summary>
+	  /// LUCENE-2207: wrong offset calculated by end() 
+	  /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testFinalOffset() throws java.io.IOException
+	  public virtual void testFinalOffset()
+	  {
+		checkCJKToken("あい", new TestToken[] {newToken("あい", 0, 2, CJKTokenizer.DOUBLE_TOKEN_TYPE)});
+		checkCJKToken("あい   ", new TestToken[] {newToken("あい", 0, 2, CJKTokenizer.DOUBLE_TOKEN_TYPE)});
+		checkCJKToken("test", new TestToken[] {newToken("test", 0, 4, CJKTokenizer.SINGLE_TOKEN_TYPE)});
+		checkCJKToken("test   ", new TestToken[] {newToken("test", 0, 4, CJKTokenizer.SINGLE_TOKEN_TYPE)});
+		checkCJKToken("あいtest", new TestToken[] {newToken("あい", 0, 2, CJKTokenizer.DOUBLE_TOKEN_TYPE), newToken("test", 2, 6, CJKTokenizer.SINGLE_TOKEN_TYPE)});
+		checkCJKToken("testあい    ", new TestToken[] {newToken("test", 0, 4, CJKTokenizer.SINGLE_TOKEN_TYPE), newToken("あい", 4, 6, CJKTokenizer.DOUBLE_TOKEN_TYPE)});
+	  }
+
+	  /// <summary>
+	  /// blast some random strings through the analyzer </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testRandomStrings() throws Exception
+	  public virtual void testRandomStrings()
+	  {
+		checkRandomData(random(), new CJKAnalyzer(Version.LUCENE_30), 10000 * RANDOM_MULTIPLIER);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Cjk/TestCJKTokenizerFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Cjk/TestCJKTokenizerFactory.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Cjk/TestCJKTokenizerFactory.cs
new file mode 100644
index 0000000..ab58c6f
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Cjk/TestCJKTokenizerFactory.cs
@@ -0,0 +1,62 @@
+using System;
+
+namespace org.apache.lucene.analysis.cjk
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using BaseTokenStreamFactoryTestCase = org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
+
+	/// <summary>
+	/// Simple tests to ensure the CJK tokenizer factory is working. </summary>
+	/// @deprecated remove this test in 5.0 
+	[Obsolete("remove this test in 5.0")]
+	public class TestCJKTokenizerFactory : BaseTokenStreamFactoryTestCase
+	{
+	  /// <summary>
+	  /// Ensure the tokenizer actually tokenizes CJK text correctly
+	  /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testTokenizer() throws Exception
+	  public virtual void testTokenizer()
+	  {
+		Reader reader = new StringReader("我是中国人");
+		TokenStream stream = tokenizerFactory("CJK").create(reader);
+		assertTokenStreamContents(stream, new string[] {"我是", "是中", "中国", "国人"});
+	  }
+
+	  /// <summary>
+	  /// Test that bogus arguments result in exception </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testBogusArguments() throws Exception
+	  public virtual void testBogusArguments()
+	  {
+		try
+		{
+		  tokenizerFactory("CJK", "bogusArg", "bogusValue");
+		  fail();
+		}
+		catch (System.ArgumentException expected)
+		{
+		  assertTrue(expected.Message.contains("Unknown parameters"));
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Cjk/TestCJKWidthFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Cjk/TestCJKWidthFilter.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Cjk/TestCJKWidthFilter.cs
new file mode 100644
index 0000000..85f195b
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Cjk/TestCJKWidthFilter.cs
@@ -0,0 +1,100 @@
+namespace org.apache.lucene.analysis.cjk
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using KeywordTokenizer = org.apache.lucene.analysis.core.KeywordTokenizer;
+
+	/// <summary>
+	/// Tests for <seealso cref="CJKWidthFilter"/>
+	/// </summary>
+	public class TestCJKWidthFilter : BaseTokenStreamTestCase
+	{
+	  private Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper();
+
+	  private class AnalyzerAnonymousInnerClassHelper : Analyzer
+	  {
+		  public AnalyzerAnonymousInnerClassHelper()
+		  {
+		  }
+
+		  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+		  {
+			Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+			return new TokenStreamComponents(source, new CJKWidthFilter(source));
+		  }
+	  }
+
+	  /// <summary>
+	  /// Full-width ASCII forms normalized to half-width (basic latin)
+	  /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testFullWidthASCII() throws java.io.IOException
+	  public virtual void testFullWidthASCII()
+	  {
+		assertAnalyzesTo(analyzer, "Test 1234", new string[] {"Test", "1234"}, new int[] {0, 5}, new int[] {4, 9});
+	  }
+
+	  /// <summary>
+	  /// Half-width katakana forms normalized to standard katakana.
+	  /// A bit trickier in some cases, since half-width forms are decomposed
+	  /// and voice marks need to be recombined with a preceding base form. 
+	  /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testHalfWidthKana() throws java.io.IOException
+	  public virtual void testHalfWidthKana()
+	  {
+		assertAnalyzesTo(analyzer, "カタカナ", new string[] {"カタカナ"});
+		assertAnalyzesTo(analyzer, "ヴィッツ", new string[] {"ヴィッツ"});
+		assertAnalyzesTo(analyzer, "パナソニック", new string[] {"パナソニック"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testRandomData() throws java.io.IOException
+	  public virtual void testRandomData()
+	  {
+		checkRandomData(random(), analyzer, 1000 * RANDOM_MULTIPLIER);
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testEmptyTerm() throws java.io.IOException
+	  public virtual void testEmptyTerm()
+	  {
+		Analyzer a = new AnalyzerAnonymousInnerClassHelper2(this);
+		checkOneTerm(a, "", "");
+	  }
+
+	  private class AnalyzerAnonymousInnerClassHelper2 : Analyzer
+	  {
+		  private readonly TestCJKWidthFilter outerInstance;
+
+		  public AnalyzerAnonymousInnerClassHelper2(TestCJKWidthFilter outerInstance)
+		  {
+			  this.outerInstance = outerInstance;
+		  }
+
+		  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+		  {
+			Tokenizer tokenizer = new KeywordTokenizer(reader);
+			return new TokenStreamComponents(tokenizer, new CJKWidthFilter(tokenizer));
+		  }
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Cjk/TestCJKWidthFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Cjk/TestCJKWidthFilterFactory.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Cjk/TestCJKWidthFilterFactory.cs
new file mode 100644
index 0000000..b139d10
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Cjk/TestCJKWidthFilterFactory.cs
@@ -0,0 +1,57 @@
+namespace org.apache.lucene.analysis.cjk
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using BaseTokenStreamFactoryTestCase = org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
+
+	/// <summary>
+	/// Simple tests to ensure the CJKWidthFilterFactory is working
+	/// </summary>
+	public class TestCJKWidthFilterFactory : BaseTokenStreamFactoryTestCase
+	{
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void test() throws Exception
+	  public virtual void test()
+	  {
+		Reader reader = new StringReader("Test 1234");
+		TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+		stream = tokenFilterFactory("CJKWidth").create(stream);
+		assertTokenStreamContents(stream, new string[] {"Test", "1234"});
+	  }
+
+	  /// <summary>
+	  /// Test that bogus arguments result in exception </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testBogusArguments() throws Exception
+	  public virtual void testBogusArguments()
+	  {
+		try
+		{
+		  tokenFilterFactory("CJKWidth", "bogusArg", "bogusValue");
+		  fail();
+		}
+		catch (System.ArgumentException expected)
+		{
+		  assertTrue(expected.Message.contains("Unknown parameters"));
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ckb/TestSoraniAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ckb/TestSoraniAnalyzer.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ckb/TestSoraniAnalyzer.cs
new file mode 100644
index 0000000..3a9a555
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ckb/TestSoraniAnalyzer.cs
@@ -0,0 +1,82 @@
+namespace org.apache.lucene.analysis.ckb
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+
+	/// <summary>
+	/// Test the Sorani analyzer
+	/// </summary>
+	public class TestSoraniAnalyzer : BaseTokenStreamTestCase
+	{
+
+	  /// <summary>
+	  /// This test fails with NPE when the stopwords file is missing in classpath
+	  /// </summary>
+	  public virtual void testResourcesAvailable()
+	  {
+		new SoraniAnalyzer(TEST_VERSION_CURRENT);
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testStopwords() throws java.io.IOException
+	  public virtual void testStopwords()
+	  {
+		Analyzer a = new SoraniAnalyzer(TEST_VERSION_CURRENT);
+		assertAnalyzesTo(a, "ئەم پیاوە", new string[] {"پیاو"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testCustomStopwords() throws java.io.IOException
+	  public virtual void testCustomStopwords()
+	  {
+		Analyzer a = new SoraniAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET);
+		assertAnalyzesTo(a, "ئەم پیاوە", new string[] {"ئەم", "پیاو"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testReusableTokenStream() throws java.io.IOException
+	  public virtual void testReusableTokenStream()
+	  {
+		Analyzer a = new SoraniAnalyzer(TEST_VERSION_CURRENT);
+		assertAnalyzesTo(a, "پیاوە", new string[] {"پیاو"});
+		assertAnalyzesTo(a, "پیاو", new string[] {"پیاو"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testWithStemExclusionSet() throws java.io.IOException
+	  public virtual void testWithStemExclusionSet()
+	  {
+		CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
+		set.add("پیاوە");
+		Analyzer a = new SoraniAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set);
+		assertAnalyzesTo(a, "پیاوە", new string[] {"پیاوە"});
+	  }
+
+	  /// <summary>
+	  /// blast some random strings through the analyzer </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testRandomStrings() throws Exception
+	  public virtual void testRandomStrings()
+	  {
+		checkRandomData(random(), new SoraniAnalyzer(TEST_VERSION_CURRENT), 1000 * RANDOM_MULTIPLIER);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ckb/TestSoraniNormalizationFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ckb/TestSoraniNormalizationFilter.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ckb/TestSoraniNormalizationFilter.cs
new file mode 100644
index 0000000..90738f0
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ckb/TestSoraniNormalizationFilter.cs
@@ -0,0 +1,122 @@
+namespace org.apache.lucene.analysis.ckb
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using KeywordTokenizer = org.apache.lucene.analysis.core.KeywordTokenizer;
+
+	/// <summary>
+	/// Tests normalization for Sorani (this is more critical than stemming...)
+	/// </summary>
+	public class TestSoraniNormalizationFilter : BaseTokenStreamTestCase
+	{
+	  internal Analyzer a = new AnalyzerAnonymousInnerClassHelper();
+
+	  private class AnalyzerAnonymousInnerClassHelper : Analyzer
+	  {
+		  public AnalyzerAnonymousInnerClassHelper()
+		  {
+		  }
+
+		  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+		  {
+			Tokenizer tokenizer = new KeywordTokenizer(reader);
+			return new TokenStreamComponents(tokenizer, new SoraniNormalizationFilter(tokenizer));
+		  }
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testY() throws Exception
+	  public virtual void testY()
+	  {
+		checkOneTerm(a, "\u064A", "\u06CC");
+		checkOneTerm(a, "\u0649", "\u06CC");
+		checkOneTerm(a, "\u06CC", "\u06CC");
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testK() throws Exception
+	  public virtual void testK()
+	  {
+		checkOneTerm(a, "\u0643", "\u06A9");
+		checkOneTerm(a, "\u06A9", "\u06A9");
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testH() throws Exception
+	  public virtual void testH()
+	  {
+		// initial
+		checkOneTerm(a, "\u0647\u200C", "\u06D5");
+		// medial
+		checkOneTerm(a, "\u0647\u200C\u06A9", "\u06D5\u06A9");
+
+		checkOneTerm(a, "\u06BE", "\u0647");
+		checkOneTerm(a, "\u0629", "\u06D5");
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testFinalH() throws Exception
+	  public virtual void testFinalH()
+	  {
+		// always (and in final form by def), so frequently omitted
+		checkOneTerm(a, "\u0647\u0647\u0647", "\u0647\u0647\u06D5");
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testRR() throws Exception
+	  public virtual void testRR()
+	  {
+		checkOneTerm(a, "\u0692", "\u0695");
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testInitialRR() throws Exception
+	  public virtual void testInitialRR()
+	  {
+		// always, so frequently omitted
+		checkOneTerm(a, "\u0631\u0631\u0631", "\u0695\u0631\u0631");
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testRemove() throws Exception
+	  public virtual void testRemove()
+	  {
+		checkOneTerm(a, "\u0640", "");
+		checkOneTerm(a, "\u064B", "");
+		checkOneTerm(a, "\u064C", "");
+		checkOneTerm(a, "\u064D", "");
+		checkOneTerm(a, "\u064E", "");
+		checkOneTerm(a, "\u064F", "");
+		checkOneTerm(a, "\u0650", "");
+		checkOneTerm(a, "\u0651", "");
+		checkOneTerm(a, "\u0652", "");
+		// we peek backwards in this case to look for h+200C, ensure this works
+		checkOneTerm(a, "\u200C", "");
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testEmptyTerm() throws java.io.IOException
+	  public virtual void testEmptyTerm()
+	  {
+		checkOneTerm(a, "", "");
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ckb/TestSoraniNormalizationFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ckb/TestSoraniNormalizationFilterFactory.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ckb/TestSoraniNormalizationFilterFactory.cs
new file mode 100644
index 0000000..30e5d0b
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ckb/TestSoraniNormalizationFilterFactory.cs
@@ -0,0 +1,58 @@
+namespace org.apache.lucene.analysis.ckb
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using BaseTokenStreamFactoryTestCase = org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
+
+	/// <summary>
+	/// Simple tests to ensure the Sorani normalization factory is working.
+	/// </summary>
+	public class TestSoraniNormalizationFilterFactory : BaseTokenStreamFactoryTestCase
+	{
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testNormalization() throws Exception
+	  public virtual void testNormalization()
+	  {
+		Reader reader = new StringReader("پیــــاوەکان");
+		TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+		stream = tokenFilterFactory("SoraniNormalization").create(stream);
+		assertTokenStreamContents(stream, new string[] {"پیاوەکان"});
+	  }
+
+	  /// <summary>
+	  /// Test that bogus arguments result in exception </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testBogusArguments() throws Exception
+	  public virtual void testBogusArguments()
+	  {
+		try
+		{
+		  tokenFilterFactory("SoraniNormalization", "bogusArg", "bogusValue");
+		  fail();
+		}
+		catch (System.ArgumentException expected)
+		{
+		  assertTrue(expected.Message.contains("Unknown parameters"));
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ckb/TestSoraniStemFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ckb/TestSoraniStemFilter.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ckb/TestSoraniStemFilter.cs
new file mode 100644
index 0000000..4aebe0e
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ckb/TestSoraniStemFilter.cs
@@ -0,0 +1,144 @@
+namespace org.apache.lucene.analysis.ckb
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+//JAVA TO C# CONVERTER TODO TASK: This Java 'import static' statement cannot be converted to C#:
+//	import static org.apache.lucene.analysis.VocabularyAssert.assertVocabulary;
+
+
+	using KeywordTokenizer = org.apache.lucene.analysis.core.KeywordTokenizer;
+
+	/// <summary>
+	/// Test the Sorani Stemmer.
+	/// </summary>
+	public class TestSoraniStemFilter : BaseTokenStreamTestCase
+	{
+	  internal SoraniAnalyzer a = new SoraniAnalyzer(TEST_VERSION_CURRENT);
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testIndefiniteSingular() throws Exception
+	  public virtual void testIndefiniteSingular()
+	  {
+		checkOneTerm(a, "پیاوێک", "پیاو"); // -ek
+		checkOneTerm(a, "دەرگایەک", "دەرگا"); // -yek
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testDefiniteSingular() throws Exception
+	  public virtual void testDefiniteSingular()
+	  {
+		checkOneTerm(a, "پیاوەكە", "پیاو"); // -aka
+		checkOneTerm(a, "دەرگاكە", "دەرگا"); // -ka
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testDemonstrativeSingular() throws Exception
+	  public virtual void testDemonstrativeSingular()
+	  {
+		checkOneTerm(a, "کتاویە", "کتاوی"); // -a
+		checkOneTerm(a, "دەرگایە", "دەرگا"); // -ya
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testIndefinitePlural() throws Exception
+	  public virtual void testIndefinitePlural()
+	  {
+		checkOneTerm(a, "پیاوان", "پیاو"); // -An
+		checkOneTerm(a, "دەرگایان", "دەرگا"); // -yAn
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testDefinitePlural() throws Exception
+	  public virtual void testDefinitePlural()
+	  {
+		checkOneTerm(a, "پیاوەکان", "پیاو"); // -akAn
+		checkOneTerm(a, "دەرگاکان", "دەرگا"); // -kAn
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testDemonstrativePlural() throws Exception
+	  public virtual void testDemonstrativePlural()
+	  {
+		checkOneTerm(a, "پیاوانە", "پیاو"); // -Ana
+		checkOneTerm(a, "دەرگایانە", "دەرگا"); // -yAna
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testEzafe() throws Exception
+	  public virtual void testEzafe()
+	  {
+		checkOneTerm(a, "هۆتیلی", "هۆتیل"); // singular
+		checkOneTerm(a, "هۆتیلێکی", "هۆتیل"); // indefinite
+		checkOneTerm(a, "هۆتیلانی", "هۆتیل"); // plural
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testPostpositions() throws Exception
+	  public virtual void testPostpositions()
+	  {
+		checkOneTerm(a, "دوورەوە", "دوور"); // -awa
+		checkOneTerm(a, "نیوەشەودا", "نیوەشەو"); // -dA
+		checkOneTerm(a, "سۆرانا", "سۆران"); // -A
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testPossessives() throws Exception
+	  public virtual void testPossessives()
+	  {
+		checkOneTerm(a, "پارەمان", "پارە"); // -mAn
+		checkOneTerm(a, "پارەتان", "پارە"); // -tAn
+		checkOneTerm(a, "پارەیان", "پارە"); // -yAn
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testEmptyTerm() throws java.io.IOException
+	  public virtual void testEmptyTerm()
+	  {
+		Analyzer a = new AnalyzerAnonymousInnerClassHelper(this);
+		checkOneTerm(a, "", "");
+	  }
+
+	  private class AnalyzerAnonymousInnerClassHelper : Analyzer
+	  {
+		  private readonly TestSoraniStemFilter outerInstance;
+
+		  public AnalyzerAnonymousInnerClassHelper(TestSoraniStemFilter outerInstance)
+		  {
+			  this.outerInstance = outerInstance;
+		  }
+
+		  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+		  {
+			Tokenizer tokenizer = new KeywordTokenizer(reader);
+			return new TokenStreamComponents(tokenizer, new SoraniStemFilter(tokenizer));
+		  }
+	  }
+
+	  /// <summary>
+	  /// test against a basic vocabulary file </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testVocabulary() throws Exception
+	  public virtual void testVocabulary()
+	  {
+		// top 8k words or so: freq > 1000
+		assertVocabulary(a, getDataFile("ckbtestdata.zip"), "testdata.txt");
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ckb/TestSoraniStemFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ckb/TestSoraniStemFilterFactory.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ckb/TestSoraniStemFilterFactory.cs
new file mode 100644
index 0000000..ae5bdd8
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ckb/TestSoraniStemFilterFactory.cs
@@ -0,0 +1,58 @@
+namespace org.apache.lucene.analysis.ckb
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using BaseTokenStreamFactoryTestCase = org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
+
+	/// <summary>
+	/// Simple tests to ensure the Sorani stem factory is working.
+	/// </summary>
+	public class TestSoraniStemFilterFactory : BaseTokenStreamFactoryTestCase
+	{
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testStemming() throws Exception
+	  public virtual void testStemming()
+	  {
+		Reader reader = new StringReader("پیاوەکان");
+		TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+		stream = tokenFilterFactory("SoraniStem").create(stream);
+		assertTokenStreamContents(stream, new string[] {"پیاو"});
+	  }
+
+	  /// <summary>
+	  /// Test that bogus arguments result in exception </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testBogusArguments() throws Exception
+	  public virtual void testBogusArguments()
+	  {
+		try
+		{
+		  tokenFilterFactory("SoraniStem", "bogusArg", "bogusValue");
+		  fail();
+		}
+		catch (System.ArgumentException expected)
+		{
+		  assertTrue(expected.Message.contains("Unknown parameters"));
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Cn/TestChineseFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Cn/TestChineseFilterFactory.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Cn/TestChineseFilterFactory.cs
new file mode 100644
index 0000000..5b8f9b1
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Cn/TestChineseFilterFactory.cs
@@ -0,0 +1,60 @@
+namespace org.apache.lucene.analysis.cn
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using BaseTokenStreamFactoryTestCase = org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
+
+	/// <summary>
+	/// Simple tests to ensure the Chinese filter factory is working.
+	/// </summary>
+	public class TestChineseFilterFactory : BaseTokenStreamFactoryTestCase
+	{
+	  /// <summary>
+	  /// Ensure the filter actually normalizes text (numerics, stopwords)
+	  /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testFiltering() throws Exception
+	  public virtual void testFiltering()
+	  {
+		Reader reader = new StringReader("this 1234 Is such a silly filter");
+		TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+		stream = tokenFilterFactory("Chinese").create(stream);
+		assertTokenStreamContents(stream, new string[] {"Is", "silly", "filter"});
+	  }
+
+	  /// <summary>
+	  /// Test that bogus arguments result in exception </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testBogusArguments() throws Exception
+	  public virtual void testBogusArguments()
+	  {
+		try
+		{
+		  tokenFilterFactory("Chinese", "bogusArg", "bogusValue");
+		  fail();
+		}
+		catch (System.ArgumentException expected)
+		{
+		  assertTrue(expected.Message.contains("Unknown parameters"));
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Cn/TestChineseTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Cn/TestChineseTokenizer.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Cn/TestChineseTokenizer.cs
new file mode 100644
index 0000000..0cc6a8d
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Cn/TestChineseTokenizer.cs
@@ -0,0 +1,151 @@
+using System;
+
+namespace org.apache.lucene.analysis.cn
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using org.apache.lucene.analysis;
+	using WhitespaceTokenizer = org.apache.lucene.analysis.core.WhitespaceTokenizer;
+	using OffsetAttribute = org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+	using Version = org.apache.lucene.util.Version;
+
+
+	/// @deprecated Remove this test when ChineseAnalyzer is removed. 
+	[Obsolete("Remove this test when ChineseAnalyzer is removed.")]
+	public class TestChineseTokenizer : BaseTokenStreamTestCase
+	{
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testOtherLetterOffset() throws java.io.IOException
+		public virtual void testOtherLetterOffset()
+		{
+			string s = "a天b";
+			ChineseTokenizer tokenizer = new ChineseTokenizer(new StringReader(s));
+
+			int correctStartOffset = 0;
+			int correctEndOffset = 1;
+			OffsetAttribute offsetAtt = tokenizer.getAttribute(typeof(OffsetAttribute));
+			tokenizer.reset();
+			while (tokenizer.incrementToken())
+			{
+			  assertEquals(correctStartOffset, offsetAtt.startOffset());
+			  assertEquals(correctEndOffset, offsetAtt.endOffset());
+			  correctStartOffset++;
+			  correctEndOffset++;
+			}
+			tokenizer.end();
+			tokenizer.close();
+		}
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testReusableTokenStream() throws Exception
+		public virtual void testReusableTokenStream()
+		{
+		  Analyzer a = new ChineseAnalyzer();
+		  assertAnalyzesTo(a, "中华人民共和国", new string[] {"中", "华", "人", "民", "共", "和", "国"}, new int[] {0, 1, 2, 3, 4, 5, 6}, new int[] {1, 2, 3, 4, 5, 6, 7});
+		  assertAnalyzesTo(a, "北京市", new string[] {"北", "京", "市"}, new int[] {0, 1, 2}, new int[] {1, 2, 3});
+		}
+
+		/*
+		 * Analyzer that just uses ChineseTokenizer, not ChineseFilter.
+		 * convenience to show the behavior of the tokenizer
+		 */
+		private class JustChineseTokenizerAnalyzer : Analyzer
+		{
+			private readonly TestChineseTokenizer outerInstance;
+
+			public JustChineseTokenizerAnalyzer(TestChineseTokenizer outerInstance)
+			{
+				this.outerInstance = outerInstance;
+			}
+
+		  public override TokenStreamComponents createComponents(string fieldName, Reader reader)
+		  {
+			return new TokenStreamComponents(new ChineseTokenizer(reader));
+		  }
+		}
+
+		/*
+		 * Analyzer that just uses ChineseFilter, not ChineseTokenizer.
+		 * convenience to show the behavior of the filter.
+		 */
+		private class JustChineseFilterAnalyzer : Analyzer
+		{
+			private readonly TestChineseTokenizer outerInstance;
+
+			public JustChineseFilterAnalyzer(TestChineseTokenizer outerInstance)
+			{
+				this.outerInstance = outerInstance;
+			}
+
+		  public override TokenStreamComponents createComponents(string fieldName, Reader reader)
+		  {
+			Tokenizer tokenizer = new WhitespaceTokenizer(Version.LUCENE_CURRENT, reader);
+			return new TokenStreamComponents(tokenizer, new ChineseFilter(tokenizer));
+		  }
+		}
+
+		/*
+		 * ChineseTokenizer tokenizes numbers as one token, but they are filtered by ChineseFilter
+		 */
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testNumerics() throws Exception
+		public virtual void testNumerics()
+		{
+		  Analyzer justTokenizer = new JustChineseTokenizerAnalyzer(this);
+		  assertAnalyzesTo(justTokenizer, "中1234", new string[] {"中", "1234"});
+
+		  // in this case the ChineseAnalyzer (which applies ChineseFilter) will remove the numeric token.
+		  Analyzer a = new ChineseAnalyzer();
+		  assertAnalyzesTo(a, "中1234", new string[] {"中"});
+		}
+
+		/*
+		 * ChineseTokenizer tokenizes english similar to SimpleAnalyzer.
+		 * it will lowercase terms automatically.
+		 * 
+		 * ChineseFilter has an english stopword list, it also removes any single character tokens.
+		 * the stopword list is case-sensitive.
+		 */
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testEnglish() throws Exception
+		public virtual void testEnglish()
+		{
+		  Analyzer chinese = new ChineseAnalyzer();
+		  assertAnalyzesTo(chinese, "This is a Test. b c d", new string[] {"test"});
+
+		  Analyzer justTokenizer = new JustChineseTokenizerAnalyzer(this);
+		  assertAnalyzesTo(justTokenizer, "This is a Test. b c d", new string[] {"this", "is", "a", "test", "b", "c", "d"});
+
+		  Analyzer justFilter = new JustChineseFilterAnalyzer(this);
+		  assertAnalyzesTo(justFilter, "This is a Test. b c d", new string[] {"This", "Test."});
+		}
+
+		/// <summary>
+		/// blast some random strings through the analyzer </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testRandomStrings() throws Exception
+		public virtual void testRandomStrings()
+		{
+		  checkRandomData(random(), new ChineseAnalyzer(), 10000 * RANDOM_MULTIPLIER);
+		}
+
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Cn/TestChineseTokenizerFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Cn/TestChineseTokenizerFactory.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Cn/TestChineseTokenizerFactory.cs
new file mode 100644
index 0000000..03f8055
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Cn/TestChineseTokenizerFactory.cs
@@ -0,0 +1,59 @@
+namespace org.apache.lucene.analysis.cn
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using BaseTokenStreamFactoryTestCase = org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
+
+	/// <summary>
+	/// Simple tests to ensure the Chinese tokenizer factory is working.
+	/// </summary>
+	public class TestChineseTokenizerFactory : BaseTokenStreamFactoryTestCase
+	{
+	  /// <summary>
+	  /// Ensure the tokenizer actually tokenizes chinese text correctly
+	  /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testTokenizer() throws Exception
+	  public virtual void testTokenizer()
+	  {
+		Reader reader = new StringReader("我是中国人");
+		TokenStream stream = tokenizerFactory("Chinese").create(reader);
+		assertTokenStreamContents(stream, new string[] {"我", "是", "中", "国", "人"});
+	  }
+
+	  /// <summary>
+	  /// Test that bogus arguments result in exception </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testBogusArguments() throws Exception
+	  public virtual void testBogusArguments()
+	  {
+		try
+		{
+		  tokenizerFactory("Chinese", "bogusArg", "bogusValue");
+		  fail();
+		}
+		catch (System.ArgumentException expected)
+		{
+		  assertTrue(expected.Message.contains("Unknown parameters"));
+		}
+	  }
+	}
+
+}
\ No newline at end of file