You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@lucenenet.apache.org by sy...@apache.org on 2016/09/01 14:36:21 UTC

[01/22] lucenenet git commit: Ported UpperCaseFilterFactory (there are no tests)

Repository: lucenenet
Updated Branches:
  refs/heads/analysis-work 4deebe8fe -> 7f877fdfc


Ported UpperCaseFilterFactory (there are no tests)


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/d4dd1d6c
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/d4dd1d6c
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/d4dd1d6c

Branch: refs/heads/analysis-work
Commit: d4dd1d6c0f6df10ac8c71f40ebb1cebd2cb8534f
Parents: 4deebe8
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Wed Aug 24 17:25:27 2016 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Wed Aug 24 17:25:27 2016 +0700

----------------------------------------------------------------------
 .../Analysis/Core/UpperCaseFilterFactory.cs               | 10 +++++-----
 .../Lucene.Net.Analysis.Common.csproj                     |  1 +
 2 files changed, 6 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/d4dd1d6c/src/Lucene.Net.Analysis.Common/Analysis/Core/UpperCaseFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Core/UpperCaseFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Core/UpperCaseFilterFactory.cs
index 7cc089e..3a31cf9 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Core/UpperCaseFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Core/UpperCaseFilterFactory.cs
@@ -1,9 +1,8 @@
-\ufeffusing System.Collections.Generic;
-using Lucene.Net.Analysis.Util;
+\ufeffusing Lucene.Net.Analysis.Util;
+using System.Collections.Generic;
 
 namespace Lucene.Net.Analysis.Core
 {
-
     /*
      * Licensed to the Apache Software Foundation (ASF) under one or more
      * contributor license agreements.  See the NOTICE file distributed with
@@ -20,6 +19,7 @@ namespace Lucene.Net.Analysis.Core
      * See the License for the specific language governing permissions and
      * limitations under the License.
      */
+
     /// <summary>
     /// Factory for <seealso cref="UpperCaseFilter"/>. 
     /// <pre class="prettyprint">
@@ -36,7 +36,7 @@ namespace Lucene.Net.Analysis.Core
     /// general search matching
     /// </para>
     /// </summary>
-    public class UpperCaseFilterFactory : TokenFilterFactory, MultiTermAwareComponent
+    public class UpperCaseFilterFactory : TokenFilterFactory, IMultiTermAwareComponent
     {
 
         /// <summary>
@@ -44,7 +44,7 @@ namespace Lucene.Net.Analysis.Core
         public UpperCaseFilterFactory(IDictionary<string, string> args)
             : base(args)
         {
-            assureMatchVersion();
+            AssureMatchVersion();
             if (args.Count > 0)
             {
                 throw new System.ArgumentException("Unknown parameters: " + args);

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/d4dd1d6c/src/Lucene.Net.Analysis.Common/Lucene.Net.Analysis.Common.csproj
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Lucene.Net.Analysis.Common.csproj b/src/Lucene.Net.Analysis.Common/Lucene.Net.Analysis.Common.csproj
index 615d1a0..0679473 100644
--- a/src/Lucene.Net.Analysis.Common/Lucene.Net.Analysis.Common.csproj
+++ b/src/Lucene.Net.Analysis.Common/Lucene.Net.Analysis.Common.csproj
@@ -121,6 +121,7 @@
     <Compile Include="Analysis\Core\StopFilterFactory.cs" />
     <Compile Include="Analysis\Core\TypeTokenFilter.cs" />
     <Compile Include="Analysis\Core\TypeTokenFilterFactory.cs" />
+    <Compile Include="Analysis\Core\UpperCaseFilterFactory.cs" />
     <Compile Include="Analysis\Core\WhitespaceTokenizerFactory.cs" />
     <Compile Include="Analysis\Cz\CzechAnalyzer.cs" />
     <Compile Include="Analysis\Cz\CzechStemFilter.cs" />

[02/22] lucenenet git commit: Fixed issues that were causing the project not to compile in VS2012.

Posted by sy...@apache.org.

Fixed issues that were causing the project not to compile in VS2012.


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/85789c09
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/85789c09
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/85789c09

Branch: refs/heads/analysis-work
Commit: 85789c0906337392f4ddc9d91fba213e61c2e177
Parents: d4dd1d6
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Wed Aug 24 17:42:36 2016 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Wed Aug 24 17:42:36 2016 +0700

----------------------------------------------------------------------
 .../Analysis/Nl/DutchAnalyzer.cs                          |  3 +--
 .../Analysis/Ru/RussianLetterTokenizerFactory.cs          |  4 ++--
 .../Analysis/Synonym/SynonymFilter.cs                     | 10 +++++-----
 .../Analysis/Miscellaneous/TestStemmerOverrideFilter.cs   |  4 ++--
 4 files changed, 10 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/85789c09/src/Lucene.Net.Analysis.Common/Analysis/Nl/DutchAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Nl/DutchAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Nl/DutchAnalyzer.cs
index ad57c53..cc9784f 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Nl/DutchAnalyzer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Nl/DutchAnalyzer.cs
@@ -7,7 +7,6 @@ using Lucene.Net.Util;
 using System;
 using System.IO;
 using System.Text;
-using static Lucene.Net.Analysis.Miscellaneous.StemmerOverrideFilter;
 
 namespace Lucene.Net.Analysis.Nl
 {
@@ -113,7 +112,7 @@ namespace Lucene.Net.Analysis.Nl
         /// </summary>
         private CharArraySet excltable = CharArraySet.EMPTY_SET;
 
-        private readonly StemmerOverrideMap stemdict;
+        private readonly StemmerOverrideFilter.StemmerOverrideMap stemdict;
 
         // null if on 3.1 or later - only for bw compat
         private readonly CharArrayMap<string> origStemdict;

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/85789c09/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianLetterTokenizerFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianLetterTokenizerFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianLetterTokenizerFactory.cs
index ba27ee7..666469e 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianLetterTokenizerFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianLetterTokenizerFactory.cs
@@ -1,8 +1,8 @@
 \ufeffusing Lucene.Net.Analysis.Util;
+using Lucene.Net.Util;
 using System;
 using System.Collections.Generic;
 using System.IO;
-using static Lucene.Net.Util.AttributeSource;
 
 namespace Lucene.Net.Analysis.Ru
 {
@@ -40,7 +40,7 @@ namespace Lucene.Net.Analysis.Ru
             }
         }
 
-        public override Tokenizer Create(AttributeFactory factory, TextReader input)
+        public override Tokenizer Create(AttributeSource.AttributeFactory factory, TextReader input)
         {
             return new RussianLetterTokenizer(luceneMatchVersion, factory, input);
         }

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/85789c09/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SynonymFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SynonymFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SynonymFilter.cs
index d1ef34e..ced9330 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SynonymFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SynonymFilter.cs
@@ -383,10 +383,10 @@ namespace Lucene.Net.Analysis.Synonym
                         {
                             buffer = termAtt.Buffer();
                             bufferLen = termAtt.Length;
-                            PendingInput input = futureInputs[nextWrite];
-                            lastStartOffset = input.startOffset = offsetAtt.StartOffset();
-                            lastEndOffset = input.endOffset = offsetAtt.EndOffset();
-                            inputEndOffset = input.endOffset;
+                            PendingInput pendingInput = futureInputs[nextWrite];
+                            lastStartOffset = pendingInput.startOffset = offsetAtt.StartOffset();
+                            lastEndOffset = pendingInput.endOffset = offsetAtt.EndOffset();
+                            inputEndOffset = pendingInput.endOffset;
                             //System.out.println("  new token=" + new String(buffer, 0, bufferLen));
                             if (nextRead != nextWrite)
                             {
@@ -394,7 +394,7 @@ namespace Lucene.Net.Analysis.Synonym
                             }
                             else
                             {
-                                input.consumed = false;
+                                pendingInput.consumed = false;
                             }
 
                         }

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/85789c09/src/Lucene.Net.Tests.Analysis.Common/Analysis/Miscellaneous/TestStemmerOverrideFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Miscellaneous/TestStemmerOverrideFilter.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Miscellaneous/TestStemmerOverrideFilter.cs
index b67ed2e..9ef56ef 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Miscellaneous/TestStemmerOverrideFilter.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Miscellaneous/TestStemmerOverrideFilter.cs
@@ -1,5 +1,6 @@
 \ufeffusing Lucene.Net.Analysis.Core;
 using Lucene.Net.Analysis.En;
+using Lucene.Net.Analysis.Miscellaneous;
 using Lucene.Net.Support;
 using Lucene.Net.Util;
 using NUnit.Framework;
@@ -7,7 +8,6 @@ using System.Collections.Generic;
 using System.IO;
 using System.Linq;
 using System.Text;
-using static Lucene.Net.Analysis.Miscellaneous.StemmerOverrideFilter;
 
 namespace Lucene.Net.Analysis.Miscellaneous
 {
@@ -137,7 +137,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
             {
                 builder.Add(entry.Key, entry.Value);
             }
-            StemmerOverrideMap build = builder.Build();
+            StemmerOverrideFilter.StemmerOverrideMap build = builder.Build();
             foreach (KeyValuePair<string, string> entry in entrySet)
             {
                 if (Random().nextBoolean())

[07/22] lucenenet git commit: Fixed casing of tests

Posted by sy...@apache.org.

Fixed casing of tests


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/ab404469
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/ab404469
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/ab404469

Branch: refs/heads/analysis-work
Commit: ab404469ef7e6ea59f4955eb9879508227718ea8
Parents: 65f1c5f
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Wed Aug 24 19:34:36 2016 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Wed Aug 24 19:34:36 2016 +0700

----------------------------------------------------------------------
 .../Analysis/Miscellaneous/TestLimitTokenCountFilter.cs          | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/ab404469/src/Lucene.Net.Tests.Analysis.Common/Analysis/Miscellaneous/TestLimitTokenCountFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Miscellaneous/TestLimitTokenCountFilter.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Miscellaneous/TestLimitTokenCountFilter.cs
index 9e1e34e..f0e9e4b 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Miscellaneous/TestLimitTokenCountFilter.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Miscellaneous/TestLimitTokenCountFilter.cs
@@ -25,7 +25,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
     {
 
         [Test]
-        public virtual void test()
+        public virtual void Test()
         {
             foreach (bool consumeAll in new bool[] { true, false })
             {
@@ -38,7 +38,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
 
         [Test]
         [ExpectedException(ExpectedException = typeof(ArgumentOutOfRangeException))]
-        public virtual void testIllegalArguments()
+        public virtual void TestIllegalArguments()
         {
             new LimitTokenCountFilter(new MockTokenizer(new StringReader("A1 B2 C3 D4 E5 F6"), MockTokenizer.WHITESPACE, false), -1);
         }

[17/22] lucenenet git commit: Ported Analysis.Core.TestAllAnalyzersHaveFactories.

Posted by sy...@apache.org.

Ported Analysis.Core.TestAllAnalyzersHaveFactories.


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/9ed5b8f3
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/9ed5b8f3
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/9ed5b8f3

Branch: refs/heads/analysis-work
Commit: 9ed5b8f3b44b0e2ede521608d7edaf4a9b81f92c
Parents: 0a5198e
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Fri Aug 26 13:10:26 2016 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Sat Aug 27 02:20:02 2016 +0700

----------------------------------------------------------------------
 .../Core/TestAllAnalyzersHaveFactories.cs       | 315 +++++++++----------
 .../Lucene.Net.Tests.Analysis.Common.csproj     |   1 +
 2 files changed, 153 insertions(+), 163 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/9ed5b8f3/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestAllAnalyzersHaveFactories.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestAllAnalyzersHaveFactories.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestAllAnalyzersHaveFactories.cs
index 0c6a4ca..5bf0429 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestAllAnalyzersHaveFactories.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestAllAnalyzersHaveFactories.cs
@@ -1,10 +1,23 @@
-\ufeffusing System;
+\ufeffusing Lucene.Net.Analysis.Fr;
+using Lucene.Net.Analysis.In;
+using Lucene.Net.Analysis.Miscellaneous;
+using Lucene.Net.Analysis.Nl;
+using Lucene.Net.Analysis.Path;
+using Lucene.Net.Analysis.Sinks;
+using Lucene.Net.Analysis.Snowball;
+using Lucene.Net.Analysis.Standard;
+using Lucene.Net.Analysis.Util;
+using Lucene.Net.Util;
+using NUnit.Framework;
+using System;
 using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+using System.Reflection;
 
-namespace org.apache.lucene.analysis.core
+namespace Lucene.Net.Analysis.Core
 {
-
-	/*
+    /*
 	 * Licensed to the Apache Software Foundation (ASF) under one or more
 	 * contributor license agreements.  See the NOTICE file distributed with
 	 * this work for additional information regarding copyright ownership.
@@ -21,173 +34,149 @@ namespace org.apache.lucene.analysis.core
 	 * limitations under the License.
 	 */
 
+    /// <summary>
+    /// Tests that any newly added Tokenizers/TokenFilters/CharFilters have a
+    /// corresponding factory (and that the SPI configuration is correct)
+    /// </summary>
+    public class TestAllAnalyzersHaveFactories : LuceneTestCase
+    {
 
-	using PatternKeywordMarkerFilter = org.apache.lucene.analysis.miscellaneous.PatternKeywordMarkerFilter;
-	using SetKeywordMarkerFilter = org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
-	using FrenchStemFilter = org.apache.lucene.analysis.fr.FrenchStemFilter;
-	using IndicTokenizer = org.apache.lucene.analysis.@in.IndicTokenizer;
-	using DutchStemFilter = org.apache.lucene.analysis.nl.DutchStemFilter;
-	using ReversePathHierarchyTokenizer = org.apache.lucene.analysis.path.ReversePathHierarchyTokenizer;
-	using TeeSinkTokenFilter = org.apache.lucene.analysis.sinks.TeeSinkTokenFilter;
-	using SnowballFilter = org.apache.lucene.analysis.snowball.SnowballFilter;
-	using CharFilterFactory = org.apache.lucene.analysis.util.CharFilterFactory;
-	using ResourceLoader = org.apache.lucene.analysis.util.ResourceLoader;
-	using ResourceLoaderAware = org.apache.lucene.analysis.util.ResourceLoaderAware;
-	using StringMockResourceLoader = org.apache.lucene.analysis.util.StringMockResourceLoader;
-	using TokenFilterFactory = org.apache.lucene.analysis.util.TokenFilterFactory;
-	using TokenizerFactory = org.apache.lucene.analysis.util.TokenizerFactory;
-	using LuceneTestCase = org.apache.lucene.util.LuceneTestCase;
-
-	/// <summary>
-	/// Tests that any newly added Tokenizers/TokenFilters/CharFilters have a
-	/// corresponding factory (and that the SPI configuration is correct)
-	/// </summary>
-	public class TestAllAnalyzersHaveFactories : LuceneTestCase
-	{
-
-	  // these are test-only components (e.g. test-framework)
-	  private static readonly ISet<Type> testComponents = Collections.newSetFromMap(new IdentityHashMap<Type, bool?>());
-	  static TestAllAnalyzersHaveFactories()
-	  {
-		Collections.addAll<Type>(testComponents, typeof(MockTokenizer), typeof(MockCharFilter), typeof(MockFixedLengthPayloadFilter), typeof(MockGraphTokenFilter), typeof(MockHoleInjectingTokenFilter), typeof(MockRandomLookaheadTokenFilter), typeof(MockTokenFilter), typeof(MockVariableLengthPayloadFilter), typeof(ValidatingTokenFilter), typeof(CrankyTokenFilter));
-		Collections.addAll<Type>(crazyComponents, typeof(CachingTokenFilter), typeof(TeeSinkTokenFilter));
-		Collections.addAll<Type>(deprecatedDuplicatedComponents, typeof(DutchStemFilter), typeof(FrenchStemFilter), typeof(IndicTokenizer));
-		Collections.addAll<Type>(oddlyNamedComponents, typeof(ReversePathHierarchyTokenizer), typeof(SnowballFilter), typeof(PatternKeywordMarkerFilter), typeof(SetKeywordMarkerFilter)); // this is called SnowballPorterFilterFactory -  this is supported via an option to PathHierarchyTokenizer's factory
-	  }
+        // these are test-only components (e.g. test-framework)
+        private static readonly ISet<Type> testComponents = new HashSet<Type>();
+        static TestAllAnalyzersHaveFactories()
+        {
+            testComponents.addAll(new Type[] { typeof(MockTokenizer), typeof(MockCharFilter), typeof(MockFixedLengthPayloadFilter), typeof(MockGraphTokenFilter), typeof(MockHoleInjectingTokenFilter), typeof(MockRandomLookaheadTokenFilter), typeof(MockTokenFilter), typeof(MockVariableLengthPayloadFilter), typeof(ValidatingTokenFilter) });
+            crazyComponents.addAll(new Type[] { typeof(CachingTokenFilter), typeof(TeeSinkTokenFilter),
+                // LUCENENET: Added this specialized BufferedCharFilter which doesn't need a factory
+                typeof(BufferedCharFilter)
+            });
+            deprecatedDuplicatedComponents.addAll(new Type[] { typeof(DutchStemFilter), typeof(FrenchStemFilter), typeof(IndicTokenizer) });
+            oddlyNamedComponents.addAll(new Type[] { typeof(ReversePathHierarchyTokenizer), typeof(SnowballFilter), typeof(PatternKeywordMarkerFilter), typeof(SetKeywordMarkerFilter) }); // this is called SnowballPorterFilterFactory -  this is supported via an option to PathHierarchyTokenizer's factory
+        }
 
-	  // these are 'crazy' components like cachingtokenfilter. does it make sense to add factories for these?
-	  private static readonly ISet<Type> crazyComponents = Collections.newSetFromMap(new IdentityHashMap<Type, bool?>());
+        // these are 'crazy' components like cachingtokenfilter. does it make sense to add factories for these?
+        private static readonly ISet<Type> crazyComponents = new HashSet<Type>();
 
-	  // these are deprecated components that are just exact dups of other functionality: they dont need factories
-	  // (they never had them)
-	  private static readonly ISet<Type> deprecatedDuplicatedComponents = Collections.newSetFromMap(new IdentityHashMap<Type, bool?>());
+        // these are deprecated components that are just exact dups of other functionality: they dont need factories
+        // (they never had them)
+        private static readonly ISet<Type> deprecatedDuplicatedComponents = new HashSet<Type>();
 
-	  // these are oddly-named (either the actual analyzer, or its factory)
-	  // they do actually have factories.
-	  // TODO: clean this up!
-	  private static readonly ISet<Type> oddlyNamedComponents = Collections.newSetFromMap(new IdentityHashMap<Type, bool?>());
+        // these are oddly-named (either the actual analyzer, or its factory)
+        // they do actually have factories.
+        // TODO: clean this up!
+        private static readonly ISet<Type> oddlyNamedComponents = new HashSet<Type>();
 
-	  private static readonly ResourceLoader loader = new StringMockResourceLoader("");
+        private static readonly IResourceLoader loader = new StringMockResourceLoader("");
 
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: public void test() throws Exception
-	  public virtual void test()
-	  {
-		IList<Type> analysisClasses = new List<Type>();
-		((List<Type>)analysisClasses).AddRange(TestRandomChains.getClassesForPackage("org.apache.lucene.analysis"));
-		((List<Type>)analysisClasses).AddRange(TestRandomChains.getClassesForPackage("org.apache.lucene.collation"));
+        [Test]
+        public virtual void Test()
+        {
+            IList<Type> analysisClasses = new List<Type>(
+                typeof(StandardAnalyzer).Assembly.GetTypes()
+                    .Where(c => !c.IsAbstract && c.IsPublic && !c.IsInterface && c.IsClass && (c.GetCustomAttribute<ObsoleteAttribute>() == null)
+                        && !testComponents.Contains(c) && !crazyComponents.Contains(c) && !oddlyNamedComponents.Contains(c) && !deprecatedDuplicatedComponents.Contains(c)
+                        && (c.IsSubclassOf(typeof(Tokenizer)) || c.IsSubclassOf(typeof(TokenFilter)) || c.IsSubclassOf(typeof(CharFilter)))
+                    ));
 
-		foreach (Class c in analysisClasses)
-		{
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final int modifiers = c.getModifiers();
-		  int modifiers = c.Modifiers;
-		  if (Modifier.isAbstract(modifiers) || !Modifier.isPublic(modifiers) || c.Synthetic || c.AnonymousClass || c.MemberClass || c.Interface || testComponents.Contains(c) || crazyComponents.Contains(c) || oddlyNamedComponents.Contains(c) || deprecatedDuplicatedComponents.Contains(c) || c.isAnnotationPresent(typeof(Deprecated)) || !(c.IsSubclassOf(typeof(Tokenizer)) || c.IsSubclassOf(typeof(TokenFilter)) || c.IsSubclassOf(typeof(CharFilter))))
-		  { // deprecated ones are typically back compat hacks
-			// don't waste time with abstract classes
-			continue;
-		  }
 
-		  IDictionary<string, string> args = new Dictionary<string, string>();
-		  args["luceneMatchVersion"] = TEST_VERSION_CURRENT.ToString();
+            foreach (Type c in analysisClasses)
+            {
 
-		  if (c.IsSubclassOf(typeof(Tokenizer)))
-		  {
-			string clazzName = c.SimpleName;
-			assertTrue(clazzName.EndsWith("Tokenizer", StringComparison.Ordinal));
-			string simpleName = clazzName.Substring(0, clazzName.Length - 9);
-			assertNotNull(TokenizerFactory.lookupClass(simpleName));
-			TokenizerFactory instance = null;
-			try
-			{
-			  instance = TokenizerFactory.forName(simpleName, args);
-			  assertNotNull(instance);
-			  if (instance is ResourceLoaderAware)
-			  {
-				((ResourceLoaderAware) instance).inform(loader);
-			  }
-			  assertSame(c, instance.create(new StringReader("")).GetType());
-			}
-			catch (System.ArgumentException e)
-			{
-			  if (e.InnerException is NoSuchMethodException)
-			  {
-				// there is no corresponding ctor available
-				throw e;
-			  }
-			  // TODO: For now pass because some factories have not yet a default config that always works
-			}
-		  }
-		  else if (c.IsSubclassOf(typeof(TokenFilter)))
-		  {
-			string clazzName = c.SimpleName;
-			assertTrue(clazzName.EndsWith("Filter", StringComparison.Ordinal));
-			string simpleName = clazzName.Substring(0, clazzName.Length - (clazzName.EndsWith("TokenFilter", StringComparison.Ordinal) ? 11 : 6));
-			assertNotNull(TokenFilterFactory.lookupClass(simpleName));
-			TokenFilterFactory instance = null;
-			try
-			{
-			  instance = TokenFilterFactory.forName(simpleName, args);
-			  assertNotNull(instance);
-			  if (instance is ResourceLoaderAware)
-			  {
-				((ResourceLoaderAware) instance).inform(loader);
-			  }
-//JAVA TO C# CONVERTER TODO TASK: Java wildcard generics are not converted to .NET:
-//ORIGINAL LINE: Class<? extends org.apache.lucene.analysis.TokenStream> createdClazz = instance.create(new KeywordTokenizer(new java.io.StringReader(""))).getClass();
-			  Type<?> createdClazz = instance.create(new KeywordTokenizer(new StringReader(""))).GetType();
-			  // only check instance if factory have wrapped at all!
-			  if (typeof(KeywordTokenizer) != createdClazz)
-			  {
-				assertSame(c, createdClazz);
-			  }
-			}
-			catch (System.ArgumentException e)
-			{
-			  if (e.InnerException is NoSuchMethodException)
-			  {
-				// there is no corresponding ctor available
-				throw e;
-			  }
-			  // TODO: For now pass because some factories have not yet a default config that always works
-			}
-		  }
-		  else if (c.IsSubclassOf(typeof(CharFilter)))
-		  {
-			string clazzName = c.SimpleName;
-			assertTrue(clazzName.EndsWith("CharFilter", StringComparison.Ordinal));
-			string simpleName = clazzName.Substring(0, clazzName.Length - 10);
-			assertNotNull(CharFilterFactory.lookupClass(simpleName));
-			CharFilterFactory instance = null;
-			try
-			{
-			  instance = CharFilterFactory.forName(simpleName, args);
-			  assertNotNull(instance);
-			  if (instance is ResourceLoaderAware)
-			  {
-				((ResourceLoaderAware) instance).inform(loader);
-			  }
-//JAVA TO C# CONVERTER TODO TASK: Java wildcard generics are not converted to .NET:
-//ORIGINAL LINE: Class<? extends java.io.Reader> createdClazz = instance.create(new java.io.StringReader("")).getClass();
-			  Type<?> createdClazz = instance.create(new StringReader("")).GetType();
-			  // only check instance if factory have wrapped at all!
-			  if (typeof(StringReader) != createdClazz)
-			  {
-				assertSame(c, createdClazz);
-			  }
-			}
-			catch (System.ArgumentException e)
-			{
-			  if (e.InnerException is NoSuchMethodException)
-			  {
-				// there is no corresponding ctor available
-				throw e;
-			  }
-			  // TODO: For now pass because some factories have not yet a default config that always works
-			}
-		  }
-		}
-	  }
-	}
+                IDictionary<string, string> args = new Dictionary<string, string>();
+                args["luceneMatchVersion"] = TEST_VERSION_CURRENT.ToString();
 
+                if (c.IsSubclassOf(typeof(Tokenizer)))
+                {
+                    string clazzName = c.Name;
+                    assertTrue(clazzName.EndsWith("Tokenizer", StringComparison.Ordinal));
+                    string simpleName = clazzName.Substring(0, clazzName.Length - 9);
+                    assertNotNull(TokenizerFactory.LookupClass(simpleName));
+                    TokenizerFactory instance = null;
+                    try
+                    {
+                        instance = TokenizerFactory.ForName(simpleName, args);
+                        assertNotNull(instance);
+                        if (instance is IResourceLoaderAware)
+                        {
+                            ((IResourceLoaderAware)instance).Inform(loader);
+                        }
+                        assertSame(c, instance.Create(new StringReader("")).GetType());
+                    }
+                    catch (System.ArgumentException e)
+                    {
+                        if (e.InnerException is MissingMethodException)
+                        {
+                            // there is no corresponding ctor available
+                            throw e;
+                        }
+                        // TODO: For now pass because some factories have not yet a default config that always works
+                    }
+                }
+                else if (c.IsSubclassOf(typeof(TokenFilter)))
+                {
+                    string clazzName = c.Name;
+                    assertTrue(clazzName.EndsWith("Filter", StringComparison.Ordinal));
+                    string simpleName = clazzName.Substring(0, clazzName.Length - (clazzName.EndsWith("TokenFilter", StringComparison.Ordinal) ? 11 : 6));
+                    assertNotNull(TokenFilterFactory.LookupClass(simpleName));
+                    TokenFilterFactory instance = null;
+                    try
+                    {
+                        instance = TokenFilterFactory.ForName(simpleName, args);
+                        assertNotNull(instance);
+                        if (instance is IResourceLoaderAware)
+                        {
+                            ((IResourceLoaderAware)instance).Inform(loader);
+                        }
+                        Type createdClazz = instance.Create(new KeywordTokenizer(new StringReader(""))).GetType();
+                        // only check instance if factory have wrapped at all!
+                        if (typeof(KeywordTokenizer) != createdClazz)
+                        {
+                            assertSame(c, createdClazz);
+                        }
+                    }
+                    catch (System.ArgumentException e)
+                    {
+                        if (e.InnerException is MissingMethodException)
+                        {
+                            // there is no corresponding ctor available
+                            throw e;
+                        }
+                        // TODO: For now pass because some factories have not yet a default config that always works
+                    }
+                }
+                else if (c.IsSubclassOf(typeof(CharFilter)))
+                {
+                    string clazzName = c.Name;
+                    assertTrue(clazzName.EndsWith("CharFilter", StringComparison.Ordinal));
+                    string simpleName = clazzName.Substring(0, clazzName.Length - 10);
+                    assertNotNull(CharFilterFactory.LookupClass(simpleName));
+                    CharFilterFactory instance = null;
+                    try
+                    {
+                        instance = CharFilterFactory.ForName(simpleName, args);
+                        assertNotNull(instance);
+                        if (instance is IResourceLoaderAware)
+                        {
+                            ((IResourceLoaderAware)instance).Inform(loader);
+                        }
+                        Type createdClazz = instance.Create(new StringReader("")).GetType();
+                        // only check instance if factory have wrapped at all!
+                        if (typeof(StringReader) != createdClazz)
+                        {
+                            assertSame(c, createdClazz);
+                        }
+                    }
+                    catch (System.ArgumentException e)
+                    {
+                        if (e.InnerException is MissingMethodException)
+                        {
+                            // there is no corresponding ctor available
+                            throw e;
+                        }
+                        // TODO: For now pass because some factories have not yet a default config that always works
+                    }
+                }
+            }
+        }
+    }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/9ed5b8f3/src/Lucene.Net.Tests.Analysis.Common/Lucene.Net.Tests.Analysis.Common.csproj
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Lucene.Net.Tests.Analysis.Common.csproj b/src/Lucene.Net.Tests.Analysis.Common/Lucene.Net.Tests.Analysis.Common.csproj
index cb94f28..6d6c668 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Lucene.Net.Tests.Analysis.Common.csproj
+++ b/src/Lucene.Net.Tests.Analysis.Common/Lucene.Net.Tests.Analysis.Common.csproj
@@ -87,6 +87,7 @@
     <Compile Include="Analysis\Compound\TestCompoundWordTokenFilter.cs" />
     <Compile Include="Analysis\Compound\TestDictionaryCompoundWordTokenFilterFactory.cs" />
     <Compile Include="Analysis\Compound\TestHyphenationCompoundWordTokenFilterFactory.cs" />
+    <Compile Include="Analysis\Core\TestAllAnalyzersHaveFactories.cs" />
     <Compile Include="Analysis\Core\TestAnalyzers.cs" />
     <Compile Include="Analysis\Core\TestBugInSomething.cs" />
     <Compile Include="Analysis\Core\TestClassicAnalyzer.cs" />

[11/22] lucenenet git commit: Updated TestSynonymMap formatting to make them easier to read.

Posted by sy...@apache.org.

Updated TestSynonymMap formatting to make them easier to read.


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/196ce641
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/196ce641
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/196ce641

Branch: refs/heads/analysis-work
Commit: 196ce641f26f0de13978f905ce890623f380c552
Parents: 63e3e22
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Thu Aug 25 22:04:57 2016 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Thu Aug 25 22:04:57 2016 +0700

----------------------------------------------------------------------
 .../Analysis/Synonym/TestSynonymMapFilter.cs    | 20 +++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/196ce641/src/Lucene.Net.Tests.Analysis.Common/Analysis/Synonym/TestSynonymMapFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Synonym/TestSynonymMapFilter.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Synonym/TestSynonymMapFilter.cs
index a4a3f35..d53bb3b 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Synonym/TestSynonymMapFilter.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Synonym/TestSynonymMapFilter.cs
@@ -112,7 +112,7 @@ namespace Lucene.Net.Analysis.Synonym
                     int expectedPosLen;
                     if (colonIndex != -1)
                     {
-                        expectedToken = expectedAtPos[atPos].Substring(0, colonIndex);
+                        expectedToken = expectedAtPos[atPos].Substring(0, colonIndex - 0);
                         if (underbarIndex != -1)
                         {
                             expectedEndOffset = int.Parse(expectedAtPos[atPos].Substring(1 + colonIndex, underbarIndex - (1 + colonIndex)));
@@ -157,7 +157,14 @@ namespace Lucene.Net.Analysis.Synonym
             SynonymMap map = b.Build();
             Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper(this, map);
 
-            AssertAnalyzesTo(analyzer, "a b c", new string[] { "foo", "c" }, new int[] { 0, 4 }, new int[] { 3, 5 }, null, new int[] { 1, 1 }, new int[] { 1, 1 }, true);
+            AssertAnalyzesTo(analyzer, "a b c", 
+                            new string[] { "foo", "c" }, 
+                            new int[] { 0, 4 }, 
+                            new int[] { 3, 5 }, 
+                            null, 
+                            new int[] { 1, 1 }, 
+                            new int[] { 1, 1 }, 
+                            true);
             CheckAnalysisConsistency(Random(), analyzer, false, "a b c");
         }
 
@@ -190,7 +197,14 @@ namespace Lucene.Net.Analysis.Synonym
 
             Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper2(this, map);
 
-            AssertAnalyzesTo(analyzer, "a b c", new string[] { "a", "foo", "b", "c" }, new int[] { 0, 0, 2, 4 }, new int[] { 1, 3, 3, 5 }, null, new int[] { 1, 0, 1, 1 }, new int[] { 1, 2, 1, 1 }, true);
+            AssertAnalyzesTo(analyzer, "a b c", 
+                            new string[] { "a", "foo", "b", "c" }, 
+                            new int[] { 0, 0, 2, 4 }, 
+                            new int[] { 1, 3, 3, 5 }, 
+                            null, 
+                            new int[] { 1, 0, 1, 1 }, 
+                            new int[] { 1, 2, 1, 1 }, 
+                            true);
             CheckAnalysisConsistency(Random(), analyzer, false, "a b c");
         }

[19/22] lucenenet git commit: Fixed 2 bugs in Analysis.PatternCaptureGroupTokenFilter that were causing Analysis.TestPatternCaptureGroupTokenFilter.TestRandom() to fail.

Posted by sy...@apache.org.

Fixed 2 bugs in Analysis.PatternCaptureGroupTokenFilter that were causing Analysis.TestPatternCaptureGroupTokenFilter.TestRandom() to fail.


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/bc48844b
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/bc48844b
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/bc48844b

Branch: refs/heads/analysis-work
Commit: bc48844b5b85542a6df5e6c409f17ed7a4634183
Parents: defcabe
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Fri Aug 26 22:21:34 2016 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Sat Aug 27 02:20:08 2016 +0700

----------------------------------------------------------------------
 .../Analysis/Pattern/PatternCaptureGroupTokenFilter.cs           | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/bc48844b/src/Lucene.Net.Analysis.Common/Analysis/Pattern/PatternCaptureGroupTokenFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Pattern/PatternCaptureGroupTokenFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Pattern/PatternCaptureGroupTokenFilter.cs
index 7471df1..23ecb69 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Pattern/PatternCaptureGroupTokenFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Pattern/PatternCaptureGroupTokenFilter.cs
@@ -95,7 +95,7 @@ namespace Lucene.Net.Analysis.Pattern
             this.patterns = patterns;
             for (int i = 0; i < patterns.Length; i++)
             {
-                this.groupCounts[i] = patterns[0].GetGroupNumbers().Length;
+                this.groupCounts[i] = patterns[i].GetGroupNumbers().Length;
                 this.currentGroup[i] = -1;
                 this.matchers[i] = null; // Reset to null so we can tell we are at the head of the chain
             }
@@ -114,7 +114,7 @@ namespace Lucene.Net.Analysis.Pattern
                 if (currentGroup[i] == -1)
                 {
                     if (matchers[i] == null)
-                        matchers[i] = patterns[i].Match(new string(spare.Chars)); 
+                        matchers[i] = patterns[i].Match(new string(spare.Chars, spare.Offset, spare.Length)); 
                     else
                         matchers[i] = matchers[i].NextMatch();
                     currentGroup[i] = matchers[i].Success ? 1 : 0;

[15/22] lucenenet git commit: Ported Analysis.Core.TestRandomChains and moved the CheckThatYouDidntReadAnythingReaderWrapper back into that class from TestBugInSomething.

Posted by sy...@apache.org.

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0a5198ec/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestRandomChains.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestRandomChains.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestRandomChains.cs
index a1e8438..8da141f 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestRandomChains.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestRandomChains.cs
@@ -1,11 +1,36 @@
-\ufeffusing System;
-using System.Diagnostics;
+\ufeffusing Lucene.Net.Analysis.CharFilters;
+using Lucene.Net.Analysis.Cjk;
+using Lucene.Net.Analysis.CommonGrams;
+using Lucene.Net.Analysis.Compound;
+using Lucene.Net.Analysis.Compound.Hyphenation;
+using Lucene.Net.Analysis.Hunspell;
+using Lucene.Net.Analysis.Miscellaneous;
+using Lucene.Net.Analysis.Ngram;
+using Lucene.Net.Analysis.Path;
+using Lucene.Net.Analysis.Payloads;
+using Lucene.Net.Analysis.Snowball;
+using Lucene.Net.Analysis.Standard;
+using Lucene.Net.Analysis.Synonym;
+using Lucene.Net.Analysis.Util;
+using Lucene.Net.Analysis.Wikipedia;
+using Lucene.Net.Attributes;
+using Lucene.Net.Support;
+using Lucene.Net.Tartarus.Snowball;
+using Lucene.Net.Util;
+using Lucene.Net.Util.Automaton;
+using NUnit.Framework;
+using System;
 using System.Collections.Generic;
+using System.Diagnostics;
+using System.IO;
+using System.Linq;
+using System.Reflection;
+using System.Text;
+using System.Text.RegularExpressions;
 
-namespace org.apache.lucene.analysis.core
+namespace Lucene.Net.Analysis.Core
 {
-
-	/*
+    /*
 	 * Licensed to the Apache Software Foundation (ASF) under one or more
 	 * contributor license agreements.  See the NOTICE file distributed with
 	 * this work for additional information regarding copyright ownership.
@@ -22,1566 +47,1086 @@ namespace org.apache.lucene.analysis.core
 	 * limitations under the License.
 	 */
 
+    /// <summary>
+    /// tests random analysis chains </summary>
+    public class TestRandomChains : BaseTokenStreamTestCase
+    {
+
+        internal static List<ConstructorInfo> tokenizers;
+        internal static List<ConstructorInfo> tokenfilters;
+        internal static List<ConstructorInfo> charfilters;
+
+        private interface IPredicate<T>
+        {
+            bool Apply(T o);
+        }
+
+        private static readonly IPredicate<object[]> ALWAYS = new PredicateAnonymousInnerClassHelper();
+
+        private class PredicateAnonymousInnerClassHelper : IPredicate<object[]>
+        {
+            public PredicateAnonymousInnerClassHelper()
+            {
+            }
+
+            public virtual bool Apply(object[] args)
+            {
+                return true;
+            }
+        }
+
+        private static readonly IDictionary<ConstructorInfo, IPredicate<object[]>> brokenConstructors = new Dictionary<ConstructorInfo, IPredicate<object[]>>();
+        // TODO: also fix these and remove (maybe):
+        // Classes/options that don't produce consistent graph offsets:
+        private static readonly IDictionary<ConstructorInfo, IPredicate<object[]>> brokenOffsetsConstructors = new Dictionary<ConstructorInfo, IPredicate<object[]>>();
+
+        internal static readonly ISet<Type> allowedTokenizerArgs, allowedTokenFilterArgs, allowedCharFilterArgs;
+        static TestRandomChains()
+        {
+            try
+            {
+                brokenConstructors[typeof(LimitTokenCountFilter).GetConstructor(new Type[] { typeof(TokenStream), typeof(int) })] = ALWAYS;
+                brokenConstructors[typeof(LimitTokenCountFilter).GetConstructor(new Type[] { typeof(TokenStream), typeof(int), typeof(bool) })] = new PredicateAnonymousInnerClassHelper2();
+                brokenConstructors[typeof(LimitTokenPositionFilter).GetConstructor(new Type[] { typeof(TokenStream), typeof(int) })] = ALWAYS;
+                brokenConstructors[typeof(LimitTokenPositionFilter).GetConstructor(new Type[] { typeof(TokenStream), typeof(int), typeof(bool) })] = new PredicateAnonymousInnerClassHelper3();
+                foreach (Type c in Arrays.AsList(
+                    // TODO: can we promote some of these to be only
+                    // offsets offenders?
+                    // doesn't actual reset itself:
+                    typeof(CachingTokenFilter),
+                    // Not broken: we forcefully add this, so we shouldn't
+                    // also randomly pick it:
+                    typeof(ValidatingTokenFilter)))
+                {
+                    foreach (ConstructorInfo ctor in c.GetConstructors())
+                    {
+                        brokenConstructors[ctor] = ALWAYS;
+                    }
+                }
+            }
+            catch (Exception e)
+            {
+                throw new Exception(e.Message, e);
+            }
+            try
+            {
+                foreach (Type c in Arrays.AsList(
+                    typeof(ReversePathHierarchyTokenizer),
+                    typeof(PathHierarchyTokenizer),
+                    // TODO: it seems to mess up offsets!?
+                    typeof(WikipediaTokenizer),
+                    // TODO: doesn't handle graph inputs
+                    typeof(CJKBigramFilter),
+                    // TODO: doesn't handle graph inputs (or even look at positionIncrement)
+                    typeof(HyphenatedWordsFilter),
+                    // TODO: LUCENE-4983
+                    typeof(CommonGramsFilter),
+                    // TODO: doesn't handle graph inputs
+                    typeof(CommonGramsQueryFilter),
+                    // TODO: probably doesnt handle graph inputs, too afraid to try
+                    typeof(WordDelimiterFilter)))
+                {
+                    foreach (ConstructorInfo ctor in c.GetConstructors())
+                    {
+                        brokenOffsetsConstructors[ctor] = ALWAYS;
+                    }
+                }
+            }
+            catch (Exception e)
+            {
+                throw new Exception(e.Message, e);
+            }
+
+            allowedTokenizerArgs = new HashSet<Type>(); // Collections.NewSetFromMap(new IdentityHashMap<Type, bool?>());
+            allowedTokenizerArgs.addAll(argProducers.Keys);
+            allowedTokenizerArgs.Add(typeof(TextReader));
+            allowedTokenizerArgs.Add(typeof(AttributeSource.AttributeFactory));
+            allowedTokenizerArgs.Add(typeof(AttributeSource));
+
+            allowedTokenFilterArgs = new HashSet<Type>();  //Collections.newSetFromMap(new IdentityHashMap<Type, bool?>());
+            allowedTokenFilterArgs.addAll(argProducers.Keys);
+            allowedTokenFilterArgs.Add(typeof(TokenStream));
+            // TODO: fix this one, thats broken:
+            allowedTokenFilterArgs.Add(typeof(CommonGramsFilter));
+
+            allowedCharFilterArgs = new HashSet<Type>(); //Collections.newSetFromMap(new IdentityHashMap<Type, bool?>());
+            allowedCharFilterArgs.addAll(argProducers.Keys);
+            allowedCharFilterArgs.Add(typeof(TextReader));
+        }
+
+        private class PredicateAnonymousInnerClassHelper2 : IPredicate<object[]>
+        {
+            public PredicateAnonymousInnerClassHelper2()
+            {
+            }
+
+            public virtual bool Apply(object[] args)
+            {
+                Debug.Assert(args.Length == 3);
+                return !((bool)args[2]); // args are broken if consumeAllTokens is false
+            }
+        }
+
+        private class PredicateAnonymousInnerClassHelper3 : IPredicate<object[]>
+        {
+            public PredicateAnonymousInnerClassHelper3()
+            {
+            }
+
+            public virtual bool Apply(object[] args)
+            {
+                Debug.Assert(args.Length == 3);
+                return !((bool)args[2]); // args are broken if consumeAllTokens is false
+            }
+        }
+
+        [TestFixtureSetUp]
+        public static void BeforeClass()
+        {
+            IEnumerable<Type> analysisClasses = typeof(StandardAnalyzer).Assembly.GetTypes()
+                .Where(c => !c.IsAbstract && c.IsPublic && !c.IsInterface && c.IsClass && (c.GetCustomAttribute<ObsoleteAttribute>() == null)
+                && (c.IsSubclassOf(typeof(Tokenizer)) || c.IsSubclassOf(typeof(TokenFilter)) || c.IsSubclassOf(typeof(CharFilter)))).ToArray();
+            tokenizers = new List<ConstructorInfo>();
+            tokenfilters = new List<ConstructorInfo>();
+            charfilters = new List<ConstructorInfo>();
+            foreach (Type c in analysisClasses)
+            {
+                foreach (ConstructorInfo ctor in c.GetConstructors())
+                {
+                    if (ctor.GetCustomAttribute<ObsoleteAttribute>() != null || (brokenConstructors.ContainsKey(ctor) && brokenConstructors[ctor] == ALWAYS))
+                    {
+                        continue;
+                    }
+
+                    if (c.IsSubclassOf(typeof(Tokenizer)))
+                    {
+                        assertTrue(ctor.ToString() + " has unsupported parameter types", 
+                            allowedTokenizerArgs.containsAll(Arrays.AsList(ctor.GetParameters().Select(p => p.ParameterType).ToArray())));
+                        tokenizers.Add(ctor);
+                    }
+                    else if (c.IsSubclassOf(typeof(TokenFilter)))
+                    {
+                        assertTrue(ctor.ToString() + " has unsupported parameter types", 
+                            allowedTokenFilterArgs.containsAll(Arrays.AsList(ctor.GetParameters().Select(p => p.ParameterType).ToArray())));
+                        tokenfilters.Add(ctor);
+                    }
+                    else if (c.IsSubclassOf(typeof(CharFilter)))
+                    {
+                        assertTrue(ctor.ToString() + " has unsupported parameter types", 
+                            allowedCharFilterArgs.containsAll(Arrays.AsList(ctor.GetParameters().Select(p => p.ParameterType).ToArray())));
+                        charfilters.Add(ctor);
+                    }
+                    else
+                    {
+                        fail("Cannot get here");
+                    }
+                }
+            }
+
+            IComparer<ConstructorInfo> ctorComp = new ComparatorAnonymousInnerClassHelper();
+            tokenizers.Sort(ctorComp);
+            tokenfilters.Sort(ctorComp);
+            charfilters.Sort(ctorComp);
+            if (VERBOSE)
+            {
+                Console.WriteLine("tokenizers = " + tokenizers);
+                Console.WriteLine("tokenfilters = " + tokenfilters);
+                Console.WriteLine("charfilters = " + charfilters);
+            }
+        }
+
+        private class ComparatorAnonymousInnerClassHelper : IComparer<ConstructorInfo>
+        {
+            public ComparatorAnonymousInnerClassHelper()
+            {
+            }
+
+            public virtual int Compare(ConstructorInfo arg0, ConstructorInfo arg1)
+            {
+                // LUCENENET TODO: Need to ensure we have the right sort order
+                // original: arg0.toGenericString().compareTo(arg1.toGenericString());
+                return arg0.ToString().CompareTo(arg1.ToString());
+            }
+        }
+
+        [TestFixtureTearDown]
+        public static void AfterClass()
+        {
+            tokenizers = null;
+            tokenfilters = null;
+            charfilters = null;
+        }
+
+
+        private interface IArgProducer
+        {
+            object Create(Random random);
+        }
+
+        private static readonly IDictionary<Type, IArgProducer> argProducers = new IdentityHashMap<Type, IArgProducer>()
+        {
+            { typeof(int), new IntArgProducer() },
+            { typeof(char), new CharArgProducer() },
+            { typeof(float), new FloatArgProducer() },
+            { typeof(bool), new BooleanArgProducer() },
+            { typeof(byte), new ByteArgProducer() },
+            { typeof(byte[]), new ByteArrayArgProducer() },
+            { typeof(sbyte[]), new SByteArrayArgProducer() },
+            { typeof(Random), new RandomArgProducer() },
+            { typeof(LuceneVersion), new VersionArgProducer() },
+            { typeof(IEnumerable<string>), new StringEnumerableArgProducer() },
+            { typeof(ICollection<char[]>), new CharArrayCollectionArgProducer() },// CapitalizationFilter
+            { typeof(CharArraySet), new CharArraySetArgProducer() },
+            { typeof(Regex), new RegexArgProducer() },
+            { typeof(Regex[]), new RegexArrayArgProducer() },
+            { typeof(IPayloadEncoder), new PayloadEncoderArgProducer() },
+            { typeof(Dictionary), new DictionaryArgProducer() },
+            { typeof(Lucene43EdgeNGramTokenizer.Side), new Lucene43SideArgProducer() },
+            { typeof(EdgeNGramTokenFilter.Side), new SideArgProducer() },
+            { typeof(HyphenationTree), new HyphenationTreeArgProducer() },
+            { typeof(SnowballProgram), new SnowballProgramArgProducer() },
+            { typeof(string), new StringArgProducer() },
+            { typeof(NormalizeCharMap), new NormalizeCharMapArgProducer() },
+            { typeof(CharacterRunAutomaton), new CharacterRunAutomatonArgProducer() },
+            { typeof(CharArrayMap<string>), new StringCharArrayMapArgProducer() },
+            { typeof(StemmerOverrideFilter.StemmerOverrideMap), new StemmerOverrideMapArgProducer() },
+            { typeof(SynonymMap), new SynonymMapArgProducer() },
+        };
+
+        private class IntArgProducer : IArgProducer
+        {
+            public object Create(Random random)
+            {
+                // TODO: could cause huge ram usage to use full int range for some filters
+                // (e.g. allocate enormous arrays)
+                // return Integer.valueOf(random.nextInt());
+                return TestUtil.NextInt(random, -100, 100);
+            }
+        }
+
+        private class CharArgProducer : IArgProducer
+        {
+            public object Create(Random random)
+            {
+                // TODO: fix any filters that care to throw IAE instead.
+                // also add a unicode validating filter to validate termAtt?
+                // return Character.valueOf((char)random.nextInt(65536));
+                while (true)
+                {
+                    char c = (char)random.nextInt(65536);
+                    if (c < '\uD800' || c > '\uDFFF')
+                    {
+                        return c;
+                    }
+                }
+            }
+        }
+
+        private class FloatArgProducer : IArgProducer
+        {
+            public object Create(Random random)
+            {
+                return (float)random.NextDouble();
+            }
+        }
+
+        private class BooleanArgProducer : IArgProducer
+        {
+            public object Create(Random random)
+            {
+                return random.nextBoolean();
+            }
+        }
+
+        private class ByteArgProducer : IArgProducer
+        {
+            public object Create(Random random)
+            {
+                // this wraps to negative when casting to byte
+                return (byte)random.nextInt(256);
+            }
+        }
+
+        private class ByteArrayArgProducer : IArgProducer
+        {
+            public object Create(Random random)
+            {
+                byte[] bytes = new byte[random.nextInt(256)];
+                random.NextBytes(bytes);
+                return bytes;
+            }
+        }
+
+        private class SByteArrayArgProducer : IArgProducer
+        {
+            public object Create(Random random)
+            {
+                byte[] bytes = new byte[random.nextInt(256)];
+                random.NextBytes(bytes);
+                return (sbyte[])(Array)bytes;
+            }
+        }
+
+        private class RandomArgProducer : IArgProducer
+        {
+            public object Create(Random random)
+            {
+                return new Random(random.Next());
+            }
+        }
+
+        private class VersionArgProducer : IArgProducer
+        {
+            public object Create(Random random)
+            {
+                // we expect bugs in emulating old versions
+                return TEST_VERSION_CURRENT;
+            }
+        }
+
+        private class StringEnumerableArgProducer : IArgProducer
+        {
+            public object Create(Random random)
+            {
+                // TypeTokenFilter
+                ISet<string> set = new HashSet<string>();
+                int num = random.nextInt(5);
+                for (int i = 0; i < num; i++)
+                {
+                    set.Add(StandardTokenizer.TOKEN_TYPES[random.nextInt(StandardTokenizer.TOKEN_TYPES.Length)]);
+                }
+                return set;
+            }
+        }
+        private class CharArrayCollectionArgProducer : IArgProducer
+        {
+            public object Create(Random random)
+            {
+                // CapitalizationFilter
+                ICollection<char[]> col = new List<char[]>();
+                int num = random.nextInt(5);
+                for (int i = 0; i < num; i++)
+                {
+                    col.Add(TestUtil.RandomSimpleString(random).toCharArray());
+                }
+                return col;
+            }
+        }
+
+        private class CharArraySetArgProducer : IArgProducer
+        {
+            public object Create(Random random)
+            {
+                int num = random.nextInt(10);
+                CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, num, random.nextBoolean());
+                for (int i = 0; i < num; i++)
+                {
+                    // TODO: make nastier
+                    set.add(TestUtil.RandomSimpleString(random));
+                }
+                return set;
+            }
+        }
+
+        private class RegexArgProducer : IArgProducer
+        {
+            public object Create(Random random)
+            {
+                // TODO: don't want to make the exponentially slow ones Dawid documents
+                // in TestPatternReplaceFilter, so dont use truly random patterns (for now)
+                return new Regex("a", RegexOptions.Compiled);
+            }
+        }
+
+        private class RegexArrayArgProducer : IArgProducer
+        {
+            public object Create(Random random)
+            {
+                return new Regex[] { new Regex("([a-z]+)", RegexOptions.Compiled), new Regex("([0-9]+)", RegexOptions.Compiled) };
+            }
+        }
+
+        private class PayloadEncoderArgProducer : IArgProducer
+        {
+            public object Create(Random random)
+            {
+                return new IdentityEncoder(); // the other encoders will throw exceptions if tokens arent numbers?
+            }
+        }
+
+        private class DictionaryArgProducer : IArgProducer
+        {
+            public object Create(Random random)
+            {
+                // TODO: make nastier
+                using (Stream affixStream = typeof(TestHunspellStemFilter).getResourceAsStream("simple.aff"))
+                {
+                    using (Stream dictStream = typeof(TestHunspellStemFilter).getResourceAsStream("simple.dic"))
+                    {
+                        try
+                        {
+                            return new Dictionary(affixStream, dictStream);
+                        }
+                        catch (Exception ex)
+                        {
+                            throw ex;
+                            return null; // unreachable code
+                        }
+                    }
+                }
+            }
+        }
+
+        private class Lucene43SideArgProducer : IArgProducer
+        {
+            public object Create(Random random)
+            {
+                return random.nextBoolean()
+                    ? Lucene43EdgeNGramTokenizer.Side.FRONT
+                    : Lucene43EdgeNGramTokenizer.Side.BACK;
+            }
+        }
+
+        private class SideArgProducer : IArgProducer
+        {
+            public object Create(Random random)
+            {
+                return random.nextBoolean()
+                    ? EdgeNGramTokenFilter.Side.FRONT
+                    : EdgeNGramTokenFilter.Side.BACK;
+            }
+        }
+
+        private class HyphenationTreeArgProducer : IArgProducer
+        {
+            public object Create(Random random)
+            {
+                // TODO: make nastier
+                try
+                {
+                    using (Stream @is = typeof(TestCompoundWordTokenFilter).getResourceAsStream("da_UTF8.xml"))
+                    {
+                        HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter.GetHyphenationTree(@is);
+                        return hyphenator;
+                    }
+                }
+                catch (Exception ex)
+                {
+                    throw ex;
+                    return null; // unreachable code
+                }
+            }
+        }
+
+        private class SnowballProgramArgProducer : IArgProducer
+        {
+            public object Create(Random random)
+            {
+                try
+                {
+                    string lang = TestSnowball.SNOWBALL_LANGS[random.nextInt(TestSnowball.SNOWBALL_LANGS.Length)];
+                    Type clazz = Type.GetType("Lucene.Net.Tartarus.Snowball.Ext." + lang + "Stemmer, Lucene.Net.Analysis.Common");
+                    return clazz.GetConstructor(new Type[0]).Invoke(new object[0]);
+                }
+                catch (Exception ex)
+                {
+                    throw ex;
+                    return null; // unreachable code
+                }
+            }
+        }
+
+        private class StringArgProducer : IArgProducer
+        {
+            public object Create(Random random)
+            {
+                // TODO: make nastier
+                if (random.nextBoolean())
+                {
+                    // a token type
+                    return StandardTokenizer.TOKEN_TYPES[random.nextInt(StandardTokenizer.TOKEN_TYPES.Length)];
+                }
+                else
+                {
+                    return TestUtil.RandomSimpleString(random);
+                }
+            }
+        }
+
+        private class NormalizeCharMapArgProducer : IArgProducer
+        {
+            public object Create(Random random)
+            {
+                NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
+                // we can't add duplicate keys, or NormalizeCharMap gets angry
+                ISet<string> keys = new HashSet<string>();
+                int num = random.nextInt(5);
+                //System.out.println("NormalizeCharMap=");
+                for (int i = 0; i < num; i++)
+                {
+                    string key = TestUtil.RandomSimpleString(random);
+                    if (!keys.contains(key) && key.Length > 0)
+                    {
+                        string value = TestUtil.RandomSimpleString(random);
+                        builder.Add(key, value);
+                        keys.add(key);
+                        //System.out.println("mapping: '" + key + "' => '" + value + "'");
+                    }
+                }
+                return builder.Build();
+            }
+        }
+
+        private class CharacterRunAutomatonArgProducer : IArgProducer
+        {
+            public object Create(Random random)
+            {
+                // TODO: could probably use a purely random automaton
+                switch (random.nextInt(5))
+                {
+                    case 0: return MockTokenizer.KEYWORD;
+                    case 1: return MockTokenizer.SIMPLE;
+                    case 2: return MockTokenizer.WHITESPACE;
+                    case 3: return MockTokenFilter.EMPTY_STOPSET;
+                    default: return MockTokenFilter.ENGLISH_STOPSET;
+                }
+            }
+        }
+
+        private class StringCharArrayMapArgProducer : IArgProducer
+        {
+            public object Create(Random random)
+            {
+                int num = random.nextInt(10);
+                CharArrayMap<string> map = new CharArrayMap<string>(TEST_VERSION_CURRENT, num, random.nextBoolean());
+                for (int i = 0; i < num; i++)
+                {
+                    // TODO: make nastier
+                    map.Put(TestUtil.RandomSimpleString(random), TestUtil.RandomSimpleString(random));
+                }
+                return map;
+            }
+        }
+
+        private class StemmerOverrideMapArgProducer : IArgProducer
+        {
+            public object Create(Random random)
+            {
+                int num = random.nextInt(10);
+                StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder(random.nextBoolean());
+                for (int i = 0; i < num; i++)
+                {
+                    string input = "";
+                    do
+                    {
+                        input = TestUtil.RandomRealisticUnicodeString(random);
+                    } while (input == string.Empty);
+                    string @out = ""; TestUtil.RandomSimpleString(random);
+                    do
+                    {
+                        @out = TestUtil.RandomRealisticUnicodeString(random);
+                    } while (@out == string.Empty);
+                    builder.Add(input, @out);
+                }
+                try
+                {
+                    return builder.Build();
+                }
+                catch (Exception ex)
+                {
+                    throw ex;
+                    return null; // unreachable code
+                }
+            }
+        }
+
+        private class SynonymMapArgProducer : IArgProducer
+        {
+            public object Create(Random random)
+            {
+                SynonymMap.Builder b = new SynonymMap.Builder(random.nextBoolean());
+                int numEntries = AtLeast(10);
+                for (int j = 0; j < numEntries; j++)
+                {
+                    AddSyn(b, RandomNonEmptyString(random), RandomNonEmptyString(random), random.nextBoolean());
+                }
+                try
+                {
+                    return b.Build();
+                }
+                catch (Exception ex)
+                {
+                    throw ex;
+                    return null; // unreachable code
+                }
+            }
+
+            private void AddSyn(SynonymMap.Builder b, string input, string output, bool keepOrig)
+            {
+                b.Add(new CharsRef(input.Replace(" +", "\u0000")),
+                      new CharsRef(output.Replace(" +", "\u0000")),
+                      keepOrig);
+            }
+
+            private string RandomNonEmptyString(Random random)
+            {
+                while (true)
+                {
+                    string s = TestUtil.RandomUnicodeString(random).Trim();
+                    if (s.Length != 0 && s.IndexOf('\u0000') == -1)
+                    {
+                        return s;
+                    }
+                }
+            }
+        }
+
+
+
+        internal static T NewRandomArg<T>(Random random, Type paramType)
+        {
+            IArgProducer producer = argProducers[paramType];
+            assertNotNull("No producer for arguments of type " + paramType + " found", producer);
+            return (T)producer.Create(random);
+        }
+
+        internal static object[] NewTokenizerArgs(Random random, TextReader reader, Type[] paramTypes)
+        {
+            object[] args = new object[paramTypes.Length];
+            for (int i = 0; i < args.Length; i++)
+            {
+                Type paramType = paramTypes[i];
+                if (paramType == typeof(TextReader))
+                {
+                    args[i] = reader;
+                }
+                else if (paramType == typeof(AttributeSource.AttributeFactory))
+                {
+                    // TODO: maybe the collator one...???
+                    args[i] = AttributeSource.AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY;
+                }
+                else if (paramType == typeof(AttributeSource))
+                {
+                    // TODO: args[i] = new AttributeSource();
+                    // this is currently too scary to deal with!
+                    args[i] = null; // force IAE
+                }
+                else
+                {
+                    args[i] = NewRandomArg<object>(random, paramType);
+                }
+            }
+            return args;
+        }
+
+        internal static object[] NewCharFilterArgs(Random random, TextReader reader, Type[] paramTypes)
+        {
+            object[] args = new object[paramTypes.Length];
+            for (int i = 0; i < args.Length; i++)
+            {
+                Type paramType = paramTypes[i];
+                if (paramType == typeof(TextReader))
+                {
+                    args[i] = reader;
+                }
+                else
+                {
+                    args[i] = NewRandomArg<object>(random, paramType);
+                }
+            }
+            return args;
+        }
+
+        static object[] NewFilterArgs(Random random, TokenStream stream, Type[] paramTypes)
+        {
+            object[] args = new object[paramTypes.Length];
+            for (int i = 0; i < args.Length; i++)
+            {
+                Type paramType = paramTypes[i];
+                if (paramType == typeof(TokenStream))
+                {
+                    args[i] = stream;
+                }
+                else if (paramType == typeof(CommonGramsFilter))
+                {
+                    // TODO: fix this one, thats broken: CommonGramsQueryFilter takes this one explicitly
+                    args[i] = new CommonGramsFilter(TEST_VERSION_CURRENT, stream, NewRandomArg<CharArraySet>(random, typeof(CharArraySet)));
+                }
+                else
+                {
+                    args[i] = NewRandomArg<object>(random, paramType);
+                }
+            }
+            return args;
+        }
+
+        private class MockRandomAnalyzer : Analyzer
+        {
+            internal readonly int seed;
+
+
+            public MockRandomAnalyzer(int seed)
+            {
+                this.seed = seed;
+            }
+
+            public bool OffsetsAreCorrect
+            {
+                get
+                {
+                    // TODO: can we not do the full chain here!?
+                    Random random = new Random(seed);
+                    TokenizerSpec tokenizerSpec = NewTokenizer(random, new StringReader(""));
+                    TokenFilterSpec filterSpec = NewFilterChain(random, tokenizerSpec.tokenizer, tokenizerSpec.offsetsAreCorrect);
+                    return filterSpec.offsetsAreCorrect;
+                }
+            }
+
+            public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
+            {
+                Random random = new Random(seed);
+                TokenizerSpec tokenizerSpec = NewTokenizer(random, reader);
+                //System.out.println("seed=" + seed + ",create tokenizer=" + tokenizerSpec.toString);
+                TokenFilterSpec filterSpec = NewFilterChain(random, tokenizerSpec.tokenizer, tokenizerSpec.offsetsAreCorrect);
+                //System.out.println("seed=" + seed + ",create filter=" + filterSpec.toString);
+                return new TokenStreamComponents(tokenizerSpec.tokenizer, filterSpec.stream);
+            }
+
+            public override TextReader InitReader(string fieldName, TextReader reader)
+            {
+                Random random = new Random(seed);
+                CharFilterSpec charfilterspec = NewCharFilterChain(random, reader);
+                return charfilterspec.reader;
+            }
+
+
+            public override string ToString()
+            {
+                Random random = new Random(seed);
+                StringBuilder sb = new StringBuilder();
+                CharFilterSpec charFilterSpec = NewCharFilterChain(random, new StringReader(""));
+                sb.Append("\ncharfilters=");
+                sb.Append(charFilterSpec.toString);
+                // intentional: initReader gets its own separate random
+                random = new Random(seed);
+                TokenizerSpec tokenizerSpec = NewTokenizer(random, charFilterSpec.reader);
+                sb.Append("\n");
+                sb.Append("tokenizer=");
+                sb.Append(tokenizerSpec.toString);
+                TokenFilterSpec tokenFilterSpec = NewFilterChain(random, tokenizerSpec.tokenizer, tokenizerSpec.offsetsAreCorrect);
+                sb.Append("\n");
+                sb.Append("filters=");
+                sb.Append(tokenFilterSpec.toString);
+                sb.Append("\n");
+                sb.Append("offsetsAreCorrect=" + tokenFilterSpec.offsetsAreCorrect);
+                return sb.ToString();
+            }
+
+            private T CreateComponent<T>(ConstructorInfo ctor, object[] args, StringBuilder descr)
+            {
+                try
+                {
+                    T instance = (T)ctor.Invoke(args);
+                    /*
+                    if (descr.length() > 0) {
+                      descr.append(",");
+                    }
+                    */
+                    descr.append("\n  ");
+                    descr.append(ctor.DeclaringType.Name);
+                    string @params = Arrays.ToString(args);
+                    //@params = @params.Substring(1, (@params.Length - 1) - 1); // LUCENENET - This is causing truncation of types
+                    descr.append("(").append(@params).append(")");
+                    return instance;
+                }
+                catch (TargetInvocationException ite)
+                {
+                    if (ite.InnerException != null && (ite.InnerException.GetType().Equals(typeof(ArgumentException))
+                        || ite.InnerException.GetType().Equals(typeof(ArgumentOutOfRangeException))
+                        || ite.InnerException.GetType().Equals(typeof(NotSupportedException))))
+                    {
+
+                        // thats ok, ignore
+                        if (VERBOSE)
+                        {
+                            Console.WriteLine("Ignoring IAE/UOE from ctor:");
+                            //cause.printStackTrace(System.err);
+                        }
+                    }
+                    else
+                    {
+                        throw ite;
+                    }
+                }
+                //catch (IllegalAccessException iae)
+                //{
+                //    Rethrow.rethrow(iae);
+                //}
+                //catch (InstantiationException ie)
+                //{
+                //    Rethrow.rethrow(ie);
+                //}
+                return default(T); // no success
+            }
+
+            private bool Broken(ConstructorInfo ctor, object[] args)
+            {
+                IPredicate<object[]> pred = brokenConstructors.ContainsKey(ctor) ? brokenConstructors[ctor] : null;
+                return pred != null && pred.Apply(args);
+            }
+
+            private bool BrokenOffsets(ConstructorInfo ctor, object[] args)
+            {
+                IPredicate<object[]> pred = brokenOffsetsConstructors.ContainsKey(ctor) ? brokenOffsetsConstructors[ctor] : null;
+                return pred != null && pred.Apply(args);
+            }
+
+            // create a new random tokenizer from classpath
+            private TokenizerSpec NewTokenizer(Random random, TextReader reader)
+            {
+                TokenizerSpec spec = new TokenizerSpec();
+                while (spec.tokenizer == null)
+                {
+                    ConstructorInfo ctor = tokenizers[random.nextInt(tokenizers.size())];
+                    StringBuilder descr = new StringBuilder();
+                    CheckThatYouDidntReadAnythingReaderWrapper wrapper = new CheckThatYouDidntReadAnythingReaderWrapper(reader);
+                    object[] args = NewTokenizerArgs(random, wrapper, ctor.GetParameters().Select(p => p.ParameterType).ToArray());
+                    if (Broken(ctor, args))
+                    {
+                        continue;
+                    }
+                    spec.tokenizer = CreateComponent<Tokenizer>(ctor, args, descr);
+                    if (spec.tokenizer != null)
+                    {
+                        spec.offsetsAreCorrect &= !BrokenOffsets(ctor, args);
+                        spec.toString = descr.toString();
+                    }
+                    else
+                    {
+                        assertFalse(ctor.DeclaringType.Name + " has read something in ctor but failed with UOE/IAE", wrapper.readSomething);
+                    }
+                }
+                return spec;
+            }
+
+            private CharFilterSpec NewCharFilterChain(Random random, TextReader reader)
+            {
+                CharFilterSpec spec = new CharFilterSpec();
+                spec.reader = reader;
+                StringBuilder descr = new StringBuilder();
+                int numFilters = random.nextInt(3);
+                for (int i = 0; i < numFilters; i++)
+                {
+                    while (true)
+                    {
+                        ConstructorInfo ctor = charfilters[random.nextInt(charfilters.size())];
+                        object[] args = NewCharFilterArgs(random, spec.reader, ctor.GetParameters().Select(p => p.ParameterType).ToArray());
+                        if (Broken(ctor, args))
+                        {
+                            continue;
+                        }
+                        reader = CreateComponent<TextReader>(ctor, args, descr);
+                        if (reader != null)
+                        {
+                            spec.reader = reader;
+                            break;
+                        }
+                    }
+                }
+                spec.toString = descr.toString();
+                return spec;
+            }
+
+            private TokenFilterSpec NewFilterChain(Random random, Tokenizer tokenizer, bool offsetsAreCorrect)
+            {
+                TokenFilterSpec spec = new TokenFilterSpec();
+                spec.offsetsAreCorrect = offsetsAreCorrect;
+                spec.stream = tokenizer;
+                StringBuilder descr = new StringBuilder();
+                int numFilters = random.nextInt(5);
+                for (int i = 0; i < numFilters; i++)
+                {
+
+                    // Insert ValidatingTF after each stage so we can
+                    // catch problems right after the TF that "caused"
+                    // them:
+                    spec.stream = new ValidatingTokenFilter(spec.stream, "stage " + i, spec.offsetsAreCorrect);
+
+                    while (true)
+                    {
+                        ConstructorInfo ctor = tokenfilters[random.nextInt(tokenfilters.size())];
+
+                        // hack: MockGraph/MockLookahead has assertions that will trip if they follow
+                        // an offsets violator. so we cant use them after e.g. wikipediatokenizer
+                        if (!spec.offsetsAreCorrect &&
+                            (ctor.DeclaringType.Equals(typeof(MockGraphTokenFilter)))
+                                || ctor.DeclaringType.Equals(typeof(MockRandomLookaheadTokenFilter)))
+                        {
+                            continue;
+                        }
+
+                        object[] args = NewFilterArgs(random, spec.stream, ctor.GetParameters().Select(p => p.ParameterType).ToArray());
+                        if (Broken(ctor, args))
+                        {
+                            continue;
+                        }
+                        TokenFilter flt = CreateComponent<TokenFilter>(ctor, args, descr);
+                        if (flt != null)
+                        {
+                            spec.offsetsAreCorrect &= !BrokenOffsets(ctor, args);
+                            spec.stream = flt;
+                            break;
+                        }
+                    }
+                }
+
+                // Insert ValidatingTF after each stage so we can
+                // catch problems right after the TF that "caused"
+                // them:
+                spec.stream = new ValidatingTokenFilter(spec.stream, "last stage", spec.offsetsAreCorrect);
+
+                spec.toString = descr.toString();
+                return spec;
+            }
+        }
+
+
+        internal class CheckThatYouDidntReadAnythingReaderWrapper : CharFilter
+        {
+            internal bool readSomething;
+
+            public CheckThatYouDidntReadAnythingReaderWrapper(TextReader @in)
+                : base(@in)
+            { }
+
+            private CharFilter Input
+            {
+                get { return (CharFilter)this.input; }
+            }
+
+            protected override int Correct(int currentOff)
+            {
+                return currentOff; // we don't change any offsets
+            }
+
+            public override int Read(char[] cbuf, int off, int len)
+            {
+                readSomething = true;
+                return input.Read(cbuf, off, len);
+            }
+
+            public override int Read()
+            {
+                readSomething = true;
+                return input.Read();
+            }
+
+            // LUCENENET: TextReader dosn't support this overload 
+            //public int read(char[] cbuf)
+            //{
+            //    readSomething = true;
+            //    return input.read(cbuf);
+            //}
+
+            public override long Skip(int n)
+            {
+                readSomething = true;
+                return Input.Skip(n);
+            }
+
+            public override void Mark(int readAheadLimit)
+            {
+                Input.Mark(readAheadLimit);
+            }
+
+            public override bool IsMarkSupported
+            {
+                get
+                {
+                    return Input.IsMarkSupported;
+                }
+            }
+
+            public override bool Ready()
+            {
+                return Input.Ready();
+            }
+
+            public override void Reset()
+            {
+                Input.Reset();
+            }
+        }
+
+        internal class TokenizerSpec
+        {
+            internal Tokenizer tokenizer;
+            internal string toString;
+            internal bool offsetsAreCorrect = true;
+        }
+
+        internal class TokenFilterSpec
+        {
+            internal TokenStream stream;
+            internal string toString;
+            internal bool offsetsAreCorrect = true;
+        }
+
+        internal class CharFilterSpec
+        {
+            internal TextReader reader;
+            internal string toString;
+        }
+
+        [Test, LongRunningTest]
+        public void TestRandomChains_()
+        {
+            int numIterations = AtLeast(20);
+            Random random = Random();
+            for (int i = 0; i < numIterations; i++)
+            {
+                MockRandomAnalyzer a = new MockRandomAnalyzer(random.Next());
+                if (VERBOSE)
+                {
+                    Console.WriteLine("Creating random analyzer:" + a);
+                }
+                try
+                {
+                    CheckRandomData(random, a, 500 * RANDOM_MULTIPLIER, 20, false,
+                                    false /* We already validate our own offsets... */);
+                }
+                catch (Exception e)
+                {
+                    Console.WriteLine("Exception from random analyzer: " + a);
+                    throw e;
+                }
+            }
+        }
 
-	using NormalizeCharMap = org.apache.lucene.analysis.charfilter.NormalizeCharMap;
-	using CJKBigramFilter = org.apache.lucene.analysis.cjk.CJKBigramFilter;
-	using CommonGramsFilter = org.apache.lucene.analysis.commongrams.CommonGramsFilter;
-	using CommonGramsQueryFilter = org.apache.lucene.analysis.commongrams.CommonGramsQueryFilter;
-	using HyphenationCompoundWordTokenFilter = org.apache.lucene.analysis.compound.HyphenationCompoundWordTokenFilter;
-	using TestCompoundWordTokenFilter = org.apache.lucene.analysis.compound.TestCompoundWordTokenFilter;
-	using HyphenationTree = org.apache.lucene.analysis.compound.hyphenation.HyphenationTree;
-	using Dictionary = org.apache.lucene.analysis.hunspell.Dictionary;
-	using TestHunspellStemFilter = org.apache.lucene.analysis.hunspell.TestHunspellStemFilter;
-	using HyphenatedWordsFilter = org.apache.lucene.analysis.miscellaneous.HyphenatedWordsFilter;
-	using LimitTokenCountFilter = org.apache.lucene.analysis.miscellaneous.LimitTokenCountFilter;
-	using LimitTokenPositionFilter = org.apache.lucene.analysis.miscellaneous.LimitTokenPositionFilter;
-	using StemmerOverrideFilter = org.apache.lucene.analysis.miscellaneous.StemmerOverrideFilter;
-	using StemmerOverrideMap = org.apache.lucene.analysis.miscellaneous.StemmerOverrideFilter.StemmerOverrideMap;
-	using WordDelimiterFilter = org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter;
-	using EdgeNGramTokenFilter = org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter;
-	using Lucene43EdgeNGramTokenizer = org.apache.lucene.analysis.ngram.Lucene43EdgeNGramTokenizer;
-	using PathHierarchyTokenizer = org.apache.lucene.analysis.path.PathHierarchyTokenizer;
-	using ReversePathHierarchyTokenizer = org.apache.lucene.analysis.path.ReversePathHierarchyTokenizer;
-	using IdentityEncoder = org.apache.lucene.analysis.payloads.IdentityEncoder;
-	using PayloadEncoder = org.apache.lucene.analysis.payloads.PayloadEncoder;
-	using TestSnowball = org.apache.lucene.analysis.snowball.TestSnowball;
-	using StandardTokenizer = org.apache.lucene.analysis.standard.StandardTokenizer;
-	using SynonymMap = org.apache.lucene.analysis.synonym.SynonymMap;
-	using CharArrayMap = org.apache.lucene.analysis.util.CharArrayMap;
-	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
-	using WikipediaTokenizer = org.apache.lucene.analysis.wikipedia.WikipediaTokenizer;
-	using AttributeSource = org.apache.lucene.util.AttributeSource;
-	using AttributeFactory = org.apache.lucene.util.AttributeSource.AttributeFactory;
-	using CharsRef = org.apache.lucene.util.CharsRef;
-	using Rethrow = org.apache.lucene.util.Rethrow;
-	using TestUtil = org.apache.lucene.util.TestUtil;
-	using Version = org.apache.lucene.util.Version;
-	using CharacterRunAutomaton = org.apache.lucene.util.automaton.CharacterRunAutomaton;
-	using AfterClass = org.junit.AfterClass;
-	using BeforeClass = org.junit.BeforeClass;
-	using SnowballProgram = org.tartarus.snowball.SnowballProgram;
-	using InputSource = org.xml.sax.InputSource;
-
-	/// <summary>
-	/// tests random analysis chains </summary>
-	public class TestRandomChains : BaseTokenStreamTestCase
-	{
-
-//JAVA TO C# CONVERTER TODO TASK: Java wildcard generics are not converted to .NET:
-//ORIGINAL LINE: static java.util.List<Constructor<? extends org.apache.lucene.analysis.Tokenizer>> tokenizers;
-	  internal static IList<Constructor<?>> tokenizers;
-//JAVA TO C# CONVERTER TODO TASK: Java wildcard generics are not converted to .NET:
-//ORIGINAL LINE: static java.util.List<Constructor<? extends org.apache.lucene.analysis.TokenFilter>> tokenfilters;
-	  internal static IList<Constructor<?>> tokenfilters;
-//JAVA TO C# CONVERTER TODO TASK: Java wildcard generics are not converted to .NET:
-//ORIGINAL LINE: static java.util.List<Constructor<? extends org.apache.lucene.analysis.CharFilter>> charfilters;
-	  internal static IList<Constructor<?>> charfilters;
-
-	  private interface Predicate<T>
-	  {
-		bool apply(T o);
-	  }
-
-	  private static readonly Predicate<object[]> ALWAYS = new PredicateAnonymousInnerClassHelper();
-
-	  private class PredicateAnonymousInnerClassHelper : Predicate<object[]>
-	  {
-		  public PredicateAnonymousInnerClassHelper()
-		  {
-		  }
-
-		  public virtual bool apply(object[] args)
-		  {
-			return true;
-		  };
-	  }
-
-//JAVA TO C# CONVERTER TODO TASK: Java wildcard generics are not converted to .NET:
-//ORIGINAL LINE: private static final java.util.Map<Constructor<?>,Predicate<Object[]>> brokenConstructors = new java.util.HashMap<>();
-	  private static readonly IDictionary<Constructor<?>, Predicate<object[]>> brokenConstructors = new Dictionary<Constructor<?>, Predicate<object[]>>();
-	  static TestRandomChains()
-	  {
-		try
-		{
-		  brokenConstructors[typeof(LimitTokenCountFilter).GetConstructor(typeof(TokenStream), typeof(int))] = ALWAYS;
-		  brokenConstructors[typeof(LimitTokenCountFilter).GetConstructor(typeof(TokenStream), typeof(int), typeof(bool))] = new PredicateAnonymousInnerClassHelper2();
-		  brokenConstructors[typeof(LimitTokenPositionFilter).GetConstructor(typeof(TokenStream), typeof(int))] = ALWAYS;
-		  brokenConstructors[typeof(LimitTokenPositionFilter).GetConstructor(typeof(TokenStream), typeof(int), typeof(bool))] = new PredicateAnonymousInnerClassHelper3();
-		  foreach (Type c in Arrays.asList<Type>(typeof(CachingTokenFilter), typeof(CrankyTokenFilter), typeof(ValidatingTokenFilter)))
-			  // TODO: can we promote some of these to be only
-			  // offsets offenders?
-			  // doesn't actual reset itself!
-			  // Not broken, simulates brokenness:
-			  // Not broken: we forcefully add this, so we shouldn't
-			  // also randomly pick it:
-		  {
-//JAVA TO C# CONVERTER TODO TASK: Java wildcard generics are not converted to .NET:
-//ORIGINAL LINE: for (Constructor<?> ctor : c.getConstructors())
-			foreach (Constructor<?> ctor in c.GetConstructors())
-			{
-			  brokenConstructors[ctor] = ALWAYS;
-			}
-		  }
-		}
-		catch (Exception e)
-		{
-		  throw new Exception(e);
-		}
-		try
-		{
-		  foreach (Type c in Arrays.asList<Type>(typeof(ReversePathHierarchyTokenizer), typeof(PathHierarchyTokenizer), typeof(WikipediaTokenizer), typeof(CJKBigramFilter), typeof(HyphenatedWordsFilter), typeof(CommonGramsFilter), typeof(CommonGramsQueryFilter), typeof(WordDelimiterFilter)))
-			  // TODO: it seems to mess up offsets!?
-			  // TODO: doesn't handle graph inputs
-			  // TODO: doesn't handle graph inputs (or even look at positionIncrement)
-			  // TODO: LUCENE-4983
-			  // TODO: doesn't handle graph inputs
-			  // TODO: probably doesnt handle graph inputs, too afraid to try
-		  {
-//JAVA TO C# CONVERTER TODO TASK: Java wildcard generics are not converted to .NET:
-//ORIGINAL LINE: for (Constructor<?> ctor : c.getConstructors())
-			foreach (Constructor<?> ctor in c.GetConstructors())
-			{
-			  brokenOffsetsConstructors[ctor] = ALWAYS;
-			}
-		  }
-		}
-		catch (Exception e)
-		{
-		  throw new Exception(e);
-		}
-		allowedTokenizerArgs = Collections.newSetFromMap(new IdentityHashMap<Type, bool?>());
-		allowedTokenizerArgs.addAll(argProducers.Keys);
-		allowedTokenizerArgs.Add(typeof(Reader));
-		allowedTokenizerArgs.Add(typeof(AttributeSource.AttributeFactory));
-		allowedTokenizerArgs.Add(typeof(AttributeSource));
-
-		allowedTokenFilterArgs = Collections.newSetFromMap(new IdentityHashMap<Type, bool?>());
-		allowedTokenFilterArgs.addAll(argProducers.Keys);
-		allowedTokenFilterArgs.Add(typeof(TokenStream));
-		// TODO: fix this one, thats broken:
-		allowedTokenFilterArgs.Add(typeof(CommonGramsFilter));
-
-		allowedCharFilterArgs = Collections.newSetFromMap(new IdentityHashMap<Type, bool?>());
-		allowedCharFilterArgs.addAll(argProducers.Keys);
-		allowedCharFilterArgs.Add(typeof(Reader));
-	  }
-
-	  private class PredicateAnonymousInnerClassHelper2 : Predicate<object[]>
-	  {
-		  public PredicateAnonymousInnerClassHelper2()
-		  {
-		  }
-
-		  public virtual bool apply(object[] args)
-		  {
-			Debug.Assert(args.Length == 3);
-			return !((bool?) args[2]); // args are broken if consumeAllTokens is false
-		  }
-	  }
-
-	  private class PredicateAnonymousInnerClassHelper3 : Predicate<object[]>
-	  {
-		  public PredicateAnonymousInnerClassHelper3()
-		  {
-		  }
-
-		  public virtual bool apply(object[] args)
-		  {
-			Debug.Assert(args.Length == 3);
-			return !((bool?) args[2]); // args are broken if consumeAllTokens is false
-		  }
-	  }
-
-	  // TODO: also fix these and remove (maybe):
-	  // Classes/options that don't produce consistent graph offsets:
-//JAVA TO C# CONVERTER TODO TASK: Java wildcard generics are not converted to .NET:
-//ORIGINAL LINE: private static final java.util.Map<Constructor<?>,Predicate<Object[]>> brokenOffsetsConstructors = new java.util.HashMap<>();
-	  private static readonly IDictionary<Constructor<?>, Predicate<object[]>> brokenOffsetsConstructors = new Dictionary<Constructor<?>, Predicate<object[]>>();
-
-//JAVA TO C# CONVERTER TODO TASK: Most Java annotations will not have direct .NET equivalent attributes:
-//ORIGINAL LINE: @BeforeClass public static void beforeClass() throws Exception
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-	  public static void beforeClass()
-	  {
-		IList<Type> analysisClasses = getClassesForPackage("org.apache.lucene.analysis");
-		tokenizers = new List<>();
-		tokenfilters = new List<>();
-		charfilters = new List<>();
-		foreach (Class c in analysisClasses)
-		{
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final int modifiers = c.getModifiers();
-		  int modifiers = c.Modifiers;
-		  if (Modifier.isAbstract(modifiers) || !Modifier.isPublic(modifiers) || c.Synthetic || c.AnonymousClass || c.MemberClass || c.Interface || c.isAnnotationPresent(typeof(Deprecated)) || !(c.IsSubclassOf(typeof(Tokenizer)) || c.IsSubclassOf(typeof(TokenFilter)) || c.IsSubclassOf(typeof(CharFilter))))
-		  {
-			// don't waste time with abstract classes or deprecated known-buggy ones
-			continue;
-		  }
-
-//JAVA TO C# CONVERTER TODO TASK: Java wildcard generics are not converted to .NET:
-//ORIGINAL LINE: for (final Constructor<?> ctor : c.getConstructors())
-		  foreach (Constructor<?> ctor in c.Constructors)
-		  {
-			// don't test synthetic or deprecated ctors, they likely have known bugs:
-			if (ctor.Synthetic || ctor.isAnnotationPresent(typeof(Deprecated)) || brokenConstructors[ctor] == ALWAYS)
-			{
-			  continue;
-			}
-			if (c.IsSubclassOf(typeof(Tokenizer)))
-			{
-//JAVA TO C# CONVERTER TODO TASK: There is no .NET equivalent to the java.util.Collection 'containsAll' method:
-			  assertTrue(ctor.toGenericString() + " has unsupported parameter types", allowedTokenizerArgs.containsAll(Arrays.asList(ctor.ParameterTypes)));
-			  tokenizers.Add(castConstructor(typeof(Tokenizer), ctor));
-			}
-			else if (c.IsSubclassOf(typeof(TokenFilter)))
-			{
-//JAVA TO C# CONVERTER TODO TASK: There is no .NET equivalent to the java.util.Collection 'containsAll' method:
-			  assertTrue(ctor.toGenericString() + " has unsupported parameter types", allowedTokenFilterArgs.containsAll(Arrays.asList(ctor.ParameterTypes)));
-			  tokenfilters.Add(castConstructor(typeof(TokenFilter), ctor));
-			}
-			else if (c.IsSubclassOf(typeof(CharFilter)))
-			{
-//JAVA TO C# CONVERTER TODO TASK: There is no .NET equivalent to the java.util.Collection 'containsAll' method:
-			  assertTrue(ctor.toGenericString() + " has unsupported parameter types", allowedCharFilterArgs.containsAll(Arrays.asList(ctor.ParameterTypes)));
-			  charfilters.Add(castConstructor(typeof(CharFilter), ctor));
-			}
-			else
-			{
-			  fail("Cannot get here");
-			}
-		  }
-		}
-
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final java.util.Comparator<Constructor<?>> ctorComp = new java.util.Comparator<Constructor<?>>()
-//JAVA TO C# CONVERTER TODO TASK: Java wildcard generics are not converted to .NET:
-		IComparer<Constructor<?>> ctorComp = new ComparatorAnonymousInnerClassHelper();
-		tokenizers.Sort(ctorComp);
-		tokenfilters.Sort(ctorComp);
-		charfilters.Sort(ctorComp);
-		if (VERBOSE)
-		{
-		  Console.WriteLine("tokenizers = " + tokenizers);
-		  Console.WriteLine("tokenfilters = " + tokenfilters);
-		  Console.WriteLine("charfilters = " + charfilters);
-		}
-	  }
-
-	  private class ComparatorAnonymousInnerClassHelper : IComparer<Constructor<JavaToDotNetGenericWildcard>>
-	  {
-		  public ComparatorAnonymousInnerClassHelper()
-		  {
-		  }
-
-		  public virtual int compare<T1, T2>(Constructor<T1> arg0, Constructor<T2> arg1)
-		  {
-			return arg0.toGenericString().compareTo(arg1.toGenericString());
-		  }
-	  }
-
-//JAVA TO C# CONVERTER TODO TASK: Most Java annotations will not have direct .NET equivalent attributes:
-//ORIGINAL LINE: @AfterClass public static void afterClass()
-	  public static void afterClass()
-	  {
-		tokenizers = null;
-		tokenfilters = null;
-		charfilters = null;
-	  }
-
-	  /// <summary>
-	  /// Hack to work around the stupidness of Oracle's strict Java backwards compatibility.
-	  /// {@code Class<T>#getConstructors()} should return unmodifiable {@code List<Constructor<T>>} not array! 
-	  /// </summary>
-//JAVA TO C# CONVERTER TODO TASK: Most Java annotations will not have direct .NET equivalent attributes:
-//ORIGINAL LINE: @SuppressWarnings("unchecked") private static <T> Constructor<T> castConstructor(Class<T> instanceClazz, Constructor<?> ctor)
-	  private static Constructor<T> castConstructor<T, T1>(Type<T> instanceClazz, Constructor<T1> ctor)
-	  {
-		return (Constructor<T>) ctor;
-	  }
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: public static java.util.List<Class> getClassesForPackage(String pckgname) throws Exception
-	  public static IList<Type> getClassesForPackage(string pckgname)
-	  {
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final java.util.List<Class> classes = new java.util.ArrayList<>();
-		IList<Type> classes = new List<Type>();
-		collectClassesForPackage(pckgname, classes);
-		assertFalse("No classes found in package '" + pckgname + "'; maybe your test classes are packaged as JAR file?", classes.Count == 0);
-		return classes;
-	  }
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: private static void collectClassesForPackage(String pckgname, java.util.List<Class> classes) throws Exception
-	  private static void collectClassesForPackage(string pckgname, IList<Type> classes)
-	  {
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final ClassLoader cld = TestRandomChains.class.getClassLoader();
-		ClassLoader cld = typeof(TestRandomChains).ClassLoader;
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final String path = pckgname.replace('.', '/');
-		string path = pckgname.Replace('.', '/');
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final java.util.Iterator<java.net.URL> resources = cld.getResources(path);
-		IEnumerator<URL> resources = cld.getResources(path);
-		while (resources.MoveNext())
-		{
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final java.net.URI uri = resources.Current.toURI();
-		  URI uri = resources.Current.toURI();
-		  if (!"file".Equals(uri.Scheme, StringComparison.CurrentCultureIgnoreCase))
-		  {
-			continue;
-		  }
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final java.io.File directory = new java.io.File(uri);
-		  File directory = new File(uri);
-		  if (directory.exists())
-		  {
-			string[] files = directory.list();
-			foreach (string file in files)
-			{
-			  if ((new File(directory, file)).Directory)
-			  {
-				// recurse
-				string subPackage = pckgname + "." + file;
-				collectClassesForPackage(subPackage, classes);
-			  }
-			  if (file.EndsWith(".class", StringComparison.Ordinal))
-			  {
-				string clazzName = file.Substring(0, file.Length - 6);
-				// exclude Test classes that happen to be in these packages.
-				// class.ForName'ing some of them can cause trouble.
-				if (!clazzName.EndsWith("Test", StringComparison.Ordinal) && !clazzName.StartsWith("Test", StringComparison.Ordinal))
-				{
-				  // Don't run static initializers, as we won't use most of them.
-				  // Java will do that automatically once accessed/instantiated.
-				  classes.Add(Type.GetType(pckgname + '.' + clazzName, false, cld));
-				}
-			  }
-			}
-		  }
-		}
-	  }
-
-	  private interface ArgProducer
-	  {
-		object create(Random random);
-	  }
-
-	  private static readonly IDictionary<Type, ArgProducer> argProducers = new IdentityHashMapAnonymousInnerClassHelper();
-
-	  private class IdentityHashMapAnonymousInnerClassHelper : IdentityHashMap<Type, ArgProducer>
-	  {
-		  public IdentityHashMapAnonymousInnerClassHelper()
-		  {
-		  }
-
-	//	  {
-	//	put(int.class, new ArgProducer()
-	//	{
-	//	  @@Override public Object create(Random random)
-	//	  {
-	//		// TODO: could cause huge ram usage to use full int range for some filters
-	//		// (e.g. allocate enormous arrays)
-	//		// return Integer.valueOf(random.nextInt());
-	//		return Integer.valueOf(TestUtil.nextInt(random, -100, 100));
-	//	  }
-	//	}
-	//   );
-	//	put(char.class, new ArgProducer()
-	//	{
-	//	  @@Override public Object create(Random random)
-	//	  {
-	//		// TODO: fix any filters that care to throw IAE instead.
-	//		// also add a unicode validating filter to validate termAtt?
-	//		// return Character.valueOf((char)random.nextInt(65536));
-	//		while(true)
-	//		{
-	//		  char c = (char)random.nextInt(65536);
-	//		  if (c < '\uD800' || c > '\uDFFF')
-	//		  {
-	//			return Character.valueOf(c);
-	//		  }
-	//		}
-	//	  }
-	//	}
-	//   );
-	//	put(float.class, new ArgProducer()
-	//	{
-	//	  @@Override public Object create(Random random)
-	//	  {
-	//		return Float.valueOf(random.nextFloat());
-	//	  }
-	//	}
-	//   );
-	//	put(boolean.class, new ArgProducer()
-	//	{
-	//	  @@Override public Object create(Random random)
-	//	  {
-	//		return Boolean.valueOf(random.nextBoolean());
-	//	  }
-	//	}
-	//   );
-	//	put(byte.class, new ArgProducer()
-	//	{
-	//	  @@Override public Object create(Random random)
-	//	  {
-	//		// this wraps to negative when casting to byte
-	//		return Byte.valueOf((byte) random.nextInt(256));
-	//	  }
-	//	}
-	//   );
-	//	put(byte[].class, new ArgProducer()
-	//	{
-	//	  @@Override public Object create(Random random)
-	//	  {
-	//		byte bytes[] = new byte[random.nextInt(256)];
-	//		random.nextBytes(bytes);
-	//		return bytes;
-	//	  }
-	//	}
-	//   );
-	//	put(Random.class, new ArgProducer()
-	//	{
-	//	  @@Override public Object create(Random random)
-	//	  {
-	//		return new Random(random.nextLong());
-	//	  }
-	//	}
-	//   );
-	//	put(Version.class, new ArgProducer()
-	//	{
-	//	  @@Override public Object create(Random random)
-	//	  {
-	//		// we expect bugs in emulating old versions
-	//		return TEST_VERSION_CURRENT;
-	//	  }
-	//	}
-	//   );
-	//	put(Set.class, new ArgProducer()
-	//	{
-	//	  @@Override public Object create(Random random)
-	//	  {
-	//		// TypeTokenFilter
-	//		Set<String> set = new HashSet<>();
-	//		int num = random.nextInt(5);
-	//		for (int i = 0; i < num; i++)
-	//		{
-	//		  set.add(StandardTokenizer.TOKEN_TYPES[random.nextInt(StandardTokenizer.TOKEN_TYPES.length)]);
-	//		}
-	//		return set;
-	//	  }
-	//	}
-	//   );
-	//	put(Collection.class, new ArgProducer()
-	//	{
-	//	  @@Override public Object create(Random random)
-	//	  {
-	//		// CapitalizationFilter
-	//		Collection<char[]> col = new ArrayList<>();
-	//		int num = random.nextInt(5);
-	//		for (int i = 0; i < num; i++)
-	//		{
-	//		  col.add(TestUtil.randomSimpleString(random).toCharArray());
-	//		}
-	//		return col;
-	//	  }
-	//	}
-	//   );
-	//	put(CharArraySet.class, new ArgProducer()
-	//	{
-	//	  @@Override public Object create(Random random)
-	//	  {
-	//		int num = random.nextInt(10);
-	//		CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, num, random.nextBoolean());
-	//		for (int i = 0; i < num; i++)
-	//		{
-	//		  // TODO: make nastier
-	//		  set.add(TestUtil.randomSimpleString(random));
-	//		}
-	//		return set;
-	//	  }
-	//	}
-	//   );
-	//	put(Pattern.class, new ArgProducer()
-	//	{
-	//	  @@Override public Object create(Random random)
-	//	  {
-	//		// TODO: don't want to make the exponentially slow ones Dawid documents
-	//		// in TestPatternReplaceFilter, so dont use truly random patterns (for now)
-	//		return Pattern.compile("a");
-	//	  }
-	//	}
-	//   );
-	//
-	//	put(Pattern[].class, new ArgProducer()
-	//	{
-	//	  @@Override public Object create(Random random)
-	//	  {
-	//		return new Pattern[] {Pattern.compile("([a-z]+)"), Pattern.compile("([0-9]+)")};
-	//	  }
-	//	}
-	//   );
-	//	put(PayloadEncoder.class, new ArgProducer()
-	//	{
-	//	  @@Override public Object create(Random random)
-	//	  {
-	//		return new IdentityEncoder(); // the other encoders will throw exceptions if tokens arent numbers?
-	//	  }
-	//	}
-	//   );
-	//	put(Dictionary.class, new ArgProducer()
-	//	{
-	//	  @@Override public Object create(Random random)
-	//	  {
-	//		// TODO: make nastier
-	//		InputStream affixStream = TestHunspellStemFilter.class.getResourceAsStream("simple.aff");
-	//		InputStream dictStream = TestHunspellStemFilter.class.getResourceAsStream("simple.dic");
-	//		try
-	//		{
-	//		 return new Dictionary(affixStream, dictStream);
-	//		}
-	//		catch (Exception ex)
-	//		{
-	//		  Rethrow.rethrow(ex);
-	//		  return null; // unreachable code
-	//		}
-	//	  }
-	//	}
-	//   );
-	//	put(Lucene43EdgeNGramTokenizer.Side.class, new ArgProducer()
-	//	{
-	//	  @@Override public Object create(Random random)
-	//	  {
-	//		return random.nextBoolean() ? Lucene43EdgeNGramTokenizer.Side.FRONT : Lucene43EdgeNGramTokenizer.Side.BACK;
-	//	  }
-	//	}
-	//   );
-	//	put(EdgeNGramTokenFilter.Side.class, new ArgProducer()
-	//	{
-	//	  @@Override public Object create(Random random)
-	//	  {
-	//		return random.nextBoolean() ? EdgeNGramTokenFilter.Side.FRONT : EdgeNGramTokenFilter.Side.BACK;
-	//	  }
-	//	}
-	//   );
-	//	put(HyphenationTree.class, new ArgProducer()
-	//	{
-	//	  @@Override public Object create(Random random)
-	//	  {
-	//		// TODO: make nastier
-	//		try
-	//		{
-	//		  InputSource @is = new InputSource(TestCompoundWordTokenFilter.class.getResource("da_UTF8.xml").toExternalForm());
-	//		  HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter.getHyphenationTree(@is);
-	//		  return hyphenator;
-	//		}
-	//		catch (Exception ex)
-	//		{
-	//		  Rethrow.rethrow(ex);
-	//		  return null; // unreachable code
-	//		}
-	//	  }
-	//	}
-	//   );
-	//	put(SnowballProgram.class, new ArgProducer()
-	//	{
-	//	  @@Override public Object create(Random random)
-	//	  {
-	//		try
-	//		{
-	//		  String lang = TestSnowball.SNOWBALL_LANGS[random.nextInt(TestSnowball.SNOWBALL_LANGS.length)];
-	//		  Class<? extends SnowballProgram> clazz = Class.forName("org.tartarus.snowball.ext." + lang + "Stemmer").asSubclass(SnowballProgram.class);
-	//		  return clazz.newInstance();
-	//		}
-	//		catch (Exception ex)
-	//		{
-	//		  Rethrow.rethrow(ex);
-	//		  return null; // unreachable code
-	//		}
-	//	  }
-	//	}
-	//   );
-	//	put(String.class, new ArgProducer()
-	//	{
-	//	  @@Override public Object create(Random random)
-	//	  {
-	//		// TODO: make nastier
-	//		if (random.nextBoolean())
-	//		{
-	//		  // a token type
-	//		  return StandardTokenizer.TOKEN_TYPES[random.nextInt(StandardTokenizer.TOKEN_TYPES.length)];
-	//		}
-	//		else
-	//		{
-	//		  return TestUtil.randomSimpleString(random);
-	//		}
-	//	  }
-	//	}
-	//   );
-	//	put(NormalizeCharMap.class, new ArgProducer()
-	//	{
-	//	  @@Override public Object create(Random random)
-	//	  {
-	//		NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
-	//		// we can't add duplicate keys, or NormalizeCharMap gets angry
-	//		Set<String> keys = new HashSet<>();
-	//		int num = random.nextInt(5);
-	//		//System.out.println("NormalizeCharMap=");
-	//		for (int i = 0; i < num; i++)
-	//		{
-	//		  String key = TestUtil.randomSimpleString(random);
-	//		  if (!keys.contains(key) && key.length() > 0)
-	//		  {
-	//			String value = TestUtil.randomSimpleString(random);
-	//			builder.add(key, value);
-	//			keys.add(key);
-	//			//System.out.println("mapping: '" + key + "' => '" + value + "'");
-	//		  }
-	//		}
-	//		return builder.build();
-	//	  }
-	//	}
-	//   );
-	//	put(CharacterRunAutomaton.class, new ArgProducer()
-	//	{
-	//	  @@Override public Object create(Random random)
-	//	  {
-	//		// TODO: could probably use a purely random automaton
-	//		switch(random.nextInt(5))
-	//		{
-	//		  case 0:
-	//			  return MockTokenizer.KEYWORD;
-	//		  case 1:
-	//			  return MockTokenizer.SIMPLE;
-	//		  case 2:
-	//			  return MockTokenizer.WHITESPACE;
-	//		  case 3:
-	//			  return MockTokenFilter.EMPTY_STOPSET;
-	//		  default:
-	//			  return MockTokenFilter.ENGLISH_STOPSET;
-	//		}
-	//	  }
-	//	}
-	//   );
-	//	put(CharArrayMap.class, new ArgProducer()
-	//	{
-	//	  @@Override public Object create(Random random)
-	//	  {
-	//		int num = random.nextInt(10);
-	//		CharArrayMap<String> map = new CharArrayMap<>(TEST_VERSION_CURRENT, num, random.nextBoolean());
-	//		for (int i = 0; i < num; i++)
-	//		{
-	//		  // TODO: make nastier
-	//		  map.put(TestUtil.randomSimpleString(random), TestUtil.randomSimpleString(random));
-	//		}
-	//		return map;
-	//	  }
-	//	}
-	//   );
-	//	put(StemmerOverrideMap.class, new ArgProducer()
-	//	{
-	//	  @@Override public Object create(Random random)
-	//	  {
-	//		int num = random.nextInt(10);
-	//		StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder(random.nextBoolean());
-	//		for (int i = 0; i < num; i++)
-	//		{
-	//		  String input = "";
-	//		  do
-	//		  {
-	//			input = TestUtil.randomRealisticUnicodeString(random);
-	//		  } while(input.isEmpty());
-	//		  String @out = "";
-	//		  TestUtil.randomSimpleString(random);
-	//		  do
-	//		  {
-	//			@out = TestUtil.randomRealisticUnicodeString(random);
-	//		  } while(@out.isEmpty());
-	//		  builder.add(input, @out);
-	//		}
-	//		try
-	//		{
-	//		  return builder.build();
-	//		}
-	//		catch (Exception ex)
-	//		{
-	//		  Rethrow.rethrow(ex);
-	//		  return null; // unreachable code
-	//		}
-	//	  }
-	//	}
-	//   );
-	//	put(SynonymMap.class, new ArgProducer()
-	//	{
-	//	  @@Override public Object create(Random random)
-	//	  {
-	//		SynonymMap.Builder b = new SynonymMap.Builder(random.nextBoolean());
-	//		final int numEntries = atLeast(10);
-	//		for (int j = 0; j < numEntries; j++)
-	//		{
-	//		  addSyn(b, randomNonEmptyString(random), randomNonEmptyString(random), random.nextBoolean());
-	//		}
-	//		try
-	//		{
-	//		  return b.build();
-	//		}
-	//		catch (Exception ex)
-	//		{
-	//		  Rethrow.rethrow(ex);
-	//		  return null; // unreachable code
-	//		}
-	//	  }
-	//
-	//	  private void addSyn(SynonymMap.Builder b, String input, String output, boolean keepOrig)
-	//	  {
-	//		b.add(new CharsRef(input.replaceAll(" +", "\u0000")), new CharsRef(output.replaceAll(" +", "\u0000")), keepOrig);
-	//	  }
-	//
-	//	  private String randomNonEmptyString(Random random)
-	//	  {
-	//		while(true)
-	//		{
-	//		  final String s = TestUtil.randomUnicodeString(random).trim();
-	//		  if (s.length() != 0 && s.indexOf('\u0000') == -1)
-	//		  {
-	//			return s;
-	//		  }
-	//		}
-	//	  }
-	//	}
-	//   );
-	//  }
-	//  }
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//
-	//  static final Set<Class> allowedTokenizerArgs, allowedTokenFilterArgs, allowedCharFilterArgs;
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//ignore
-	//
-	//  @@SuppressWarnings("unchecked") static <T> T newRandomArg(Random random, Class<T> paramType)
-	//  {
-	//	final ArgProducer producer = argProducers.get(paramType);
-	//	assertNotNull("No producer for arguments of type " + paramType.getName() + " found", producer);
-	//	return (T) producer.create(random);
-	//  }
-	//
-	//  static Object[] newTokenizerArgs(Random random, Reader reader, Class[] paramTypes)
-	//  {
-	//	Object[] args = new Object[paramTypes.length];
-	//	for (int i = 0; i < args.length; i++)
-	//	{
-	//	  Class paramType = paramTypes[i];
-	//	  if (paramType == Reader.class)
-	//	  {
-	//		args[i] = reader;
-	//	  }
-	//	  else if (paramType == AttributeFactory.class)
-	//	  {
-	//		// TODO: maybe the collator one...???
-	//		args[i] = AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY;
-	//	  }
-	//	  else if (paramType == AttributeSource.class)
-	//	  {
-	//		// TODO: args[i] = new AttributeSource();
-	//		// this is currently too scary to deal with!
-	//		args[i] = null; // force IAE
-	//	  }
-	//	  else
-	//	  {
-	//		args[i] = newRandomArg(random, paramType);
-	//	  }
-	//	}
-	//	return args;
-	//  }
-	//
-	//  static Object[] newCharFilterArgs(Random random, Reader reader, Class[] paramTypes)
-	//  {
-	//	Object[] args = new Object[paramTypes.length];
-	//	for (int i = 0; i < args.length; i++)
-	//	{
-	//	  Class paramType = paramTypes[i];
-	//	  if (paramType == Reader.class)
-	//	  {
-	//		args[i] = reader;
-	//	  }
-	//	  else
-	//	  {
-	//		args[i] = newRandomArg(random, paramType);
-	//	  }
-	//	}
-	//	return args;
-	//  }
-	//
-	//  static Object[] newFilterArgs(Random random, TokenStream stream, Class[] paramTypes)
-	//  {
-	//	Object[] args = new Object[paramTypes.length];
-	//	for (int i = 0; i < args.length; i++)
-	//	{
-	//	  Class paramType = paramTypes[i];
-	//	  if (paramType == TokenStream.class)
-	//	  {
-	//		args[i] = stream;
-	//	  }
-	//	  else if (paramType == CommonGramsFilter.class)
-	//	  {
-	//		// TODO: fix this one, thats broken: CommonGramsQueryFilter takes this one explicitly
-	//		args[i] = new CommonGramsFilter(TEST_VERSION_CURRENT, stream, newRandomArg(random, CharArraySet.class));
-	//	  }
-	//	  else
-	//	  {
-	//		args[i] = newRandomArg(random, paramType);
-	//	  }
-	//	}
-	//	return args;
-	//  }
-	//
-	//  static class MockRandomAnalyzer extends Analyzer
-	//  {
-	//	final long seed;
-	//
-	//	MockRandomAnalyzer(long seed)
-	//	{
-	//	  this.seed = seed;
-	//	}
-	//
-	//	public boolean offsetsAreCorrect()
-	//	{
-	//	  // TODO: can we not do the full chain here!?
-	//	  Random random = new Random(seed);
-	//	  TokenizerSpec tokenizerSpec = newTokenizer(random, new StringReader(""));
-	//	  TokenFilterSpec filterSpec = newFilterChain(random, tokenizerSpec.tokenizer, tokenizerSpec.offsetsAreCorrect);
-	//	  return filterSpec.offsetsAreCorrect;
-	//	}
-	//
-	//	@@Override protected TokenStreamComponents createComponents(String fieldName, Reader reader)
-	//	{
-	//	  Random random = new Random(seed);
-	//	  TokenizerSpec tokenizerSpec = newTokenizer(random, reader);
-	//	  //System.out.println("seed=" + seed + ",create tokenizer=" + tokenizerSpec.toString);
-	//	  TokenFilterSpec filterSpec = newFilterChain(random, tokenizerSpec.tokenizer, tokenizerSpec.offsetsAreCorrect);
-	//	  //System.out.println("seed=" + seed + ",create filter=" + filterSpec.toString);
-	//	  return new TokenStreamComponents(tokenizerSpec.tokenizer, filterSpec.stream);
-	//	}
-	//
-	//	@@Override protected Reader initReader(String fieldName, Reader reader)
-	//	{
-	//	  Random random = new Random(seed);
-	//	  CharFilterSpec charfilterspec = newCharFilterChain(random, reader);
-	//	  return charfilterspec.reader;
-	//	}
-	//
-	//	@@Override public String toString()
-	//	{
-	//	  Random random = new Random(seed);
-	//	  StringBuilder sb = new StringBuilder();
-	//	  CharFilterSpec charFilterSpec = newCharFilterChain(random, new StringReader(""));
-	//	  sb.append("\ncharfilters=");
-	//	  sb.append(charFilterSpec.toString);
-	//	  // intentional: initReader gets its own separate random
-	//	  random = new Random(seed);
-	//	  TokenizerSpec tokenizerSpec = newTokenizer(random, charFilterSpec.reader);
-	//	  sb.append("\n");
-	//	  sb.append("tokenizer=");
-	//	  sb.append(tokenizerSpec.toString);
-	//	  TokenFilterSpec tokenFilterSpec = newFilterChain(random, tokenizerSpec.tokenizer, tokenizerSpec.offsetsAreCorrect);
-	//	  sb.append("\n");
-	//	  sb.append("filters=");
-	//	  sb.append(tokenFilterSpec.toString);
-	//	  sb.append("\n");
-	//	  sb.append("offsetsAreCorrect=" + tokenFilterSpec.offsetsAreCorrect);
-	//	  return sb.toString();
-	//	}
-	//
-	//	private <T> T createComponent(Constructor<T> ctor, Object[] args, StringBuilder descr)
-	//	{
-	//	  try
-	//	  {
-	//		final T instance = ctor.newInstance(args);
-	// /*
-	// if (descr.length() > 0) {
-	//   descr.append(",");
-	// }
-	// */
-	//		descr.append("\n  ");
-	//		descr.append(ctor.getDeclaringClass().getName());
-	//		String @params = Arrays.deepToString(args);
-	//		@params = @params.substring(1, (@params.length()-1) - 1);
-	//		descr.append("(").append(@params).append(")");
-	//		return instance;
-	//	  }
-	//	  catch (InvocationTargetException ite)
-	//	  {
-	//		final Throwable cause = ite.getCause();
-	//		if (cause instanceof IllegalArgumentException || cause instanceof UnsupportedOperationException)
-	//	{
-	//		  // thats ok, ignore
-	//		  if (VERBOSE)
-	//		  {
-	//			System.err.println("Ignoring IAE/UOE from ctor:");
-	//			cause.printStackTrace(System.err);
-	//		  }
-	//		}
-	//		else
-	//		{
-	//		  Rethrow.rethrow(cause);
-	//		}
-	//	  }
-	//	  catch (IllegalAccessException iae)
-	//	  {
-	//		Rethrow.rethrow(iae);
-	//	  }
-	//	  catch (InstantiationException ie)
-	//	  {
-	//		Rethrow.rethrow(ie);
-	//	  }
-	//	  return null; // no success
-	//	}
-	//
-	//	private boolean broken(Constructor<?> ctor, Object[] args)
-	//	{
-	//	  final Predicate<Object[]> pred = brokenConstructors.get(ctor);
-	//	  return pred != null && pred.apply(args);
-	//	}
-	//
-	//	private boolean brokenOffsets(Constructor<?> ctor, Object[] args)
-	//	{
-	//	  final Predicate<Object[]> pred = brokenOffsetsConstructors.get(ctor);
-	//	  return pred != null && pred.apply(args);
-	//	}
-	//
-	//	// create a new random tokenizer from classpath
-	//	private TokenizerSpec newTokenizer(Random random, Reader reader)
-	//	{
-	//	  TokenizerSpec spec = new TokenizerSpec();
-	//	  while (spec.tokenizer == null)
-	//	  {
-	//		final Constructor<? extends Tokenizer> ctor = tokenizers.get(random.nextInt(tokenizers.size()));
-	//		final StringBuilder descr = new StringBuilder();
-	//		final CheckThatYouDidntReadAnythingReaderWrapper wrapper = new CheckThatYouDidntReadAnythingReaderWrapper(reader);
-	//		final Object args[] = newTokenizerArgs(random, wrapper, ctor.getParameterTypes());
-	//		if (broken(ctor, args))
-	//		{
-	//		  continue;
-	//		}
-	//		spec.tokenizer = createComponent(ctor, args, descr);
-	//		if (spec.tokenizer != null)
-	//		{
-	//		  spec.offsetsAreCorrect &= !brokenOffsets(ctor, args);
-	//		  spec.toString = descr.toString();
-	//		}
-	//		else
-	//		{
-	//		  assertFalse(ctor.getDeclaringClass().getName() + " has read something in ctor but failed with UOE/IAE", wrapper.readSomething);
-	//		}
-	//	  }
-	//	  return spec;
-	//	}
-	//
-	//	private CharFilterSpec newCharFilterChain(Random random, Reader reader)
-	//	{
-	//	  CharFilterSpec spec = new CharFilterSpec();
-	//	  spec.reader = reader;
-	//	  StringBuilder descr = new StringBuilder();
-	//	  int numFilters = random.nextInt(3);
-	//	  for (int i = 0; i < numFilters; i++)
-	//	  {
-	//		while (true)
-	//		{
-	//		  final Constructor<? extends CharFilter> ctor = charfilters.get(random.nextInt(charfilters.size()));
-	//		  final Object args[] = newCharFilterArgs(random, spec.reader, ctor.getParameterTypes());
-	//		  if (broken(ctor, args))
-	//		  {
-	//			continue;
-	//		  }
-	//		  reader = createComponent(ctor, args, descr);
-	//		  if (reader != null)
-	//		  {
-	//			spec.reader = reader;
-	//			break;
-	//		  }
-	//		}
-	//	  }
-	//	  spec.toString = descr.toString();
-	//	  return spec;
-	//	}
-	//
-	//	private TokenFilterSpec newFilterChain(Random random, Tokenizer tokenizer, boolean offsetsAreCorrect)
-	//	{
-	//	  TokenFilterSpec spec = new TokenFilterSpec();
-	//	  spec.offsetsAreCorrect = offsetsAreCorrect;
-	//	  spec.stream = tokenizer;
-	//	  StringBuilder descr = new StringBuilder();
-	//	  int numFilters = random.nextInt(5);
-	//	  for (int i = 0; i < numFilters; i++)
-	//	  {
-	//
-	//		// Insert ValidatingTF after each stage so we can
-	//		// catch problems right after the TF that "caused"
-	//		// them:
-	//		spec.stream = new ValidatingTokenFilter(spec.stream, "stage " + i, spec.offsetsAreCorrect);
-	//
-	//		while (true)
-	//		{
-	//		  final Constructor<? extends TokenFilter> ctor = tokenfilters.get(random.nextInt(tokenfilters.size()));
-	//
-	//		  // hack: MockGraph/MockLookahead has assertions that will trip if they follow
-	//		  // an offsets violator. so we cant use them after e.g. wikipediatokenizer
-	//		  if (!spec.offsetsAreCorrect && (ctor.getDeclaringClass().equals(MockGraphTokenFilter.class) || ctor.getDeclaringClass().equals(MockRandomLookaheadTokenFilter.class)))
-	//		  {
-	//			continue;
-	//		  }
-	//
-	//		  final Object args[] = newFilterArgs(random, spec.stream, ctor.getParameterTypes());
-	//		  if (broken(ctor, args))
-	//		  {
-	//			continue;
-	//		  }
-	//		  final TokenFilter flt = createComponent(ctor, args, descr);
-	//		  if (flt != null)
-	//		  {
-	//			spec.offsetsAreCorrect &= !brokenOffsets(ctor, args);
-	//			spec.stream = flt;
-	//			break;
-	//		  }
-	//		}
-	//	  }
-	//
-	//	  // Insert ValidatingTF after each stage so we can
-	//	  // catch problems right after the TF that "caused"
-	//	  // them:
-	//	  spec.stream = new ValidatingTokenFilter(spec.stream, "last stage", spec.offsetsAreCorrect);
-	//
-	//	  spec.toString = descr.toString();
-	//	  return spec;
-	//	}
-	//  }
-	//
-	//  static class CheckThatYouDidntReadAnythingReaderWrapper extends CharFilter
-	//  {
-	//	boolean readSomething;
-	//
-	//	CheckThatYouDidntReadAnythingReaderWrapper(Reader @in)
-	//	{
-	//	  base(@in);
-	//	}
-	//
-	//	@@Override public int correct(int currentOff)
-	//	{
-	//	  return currentOff; // we don't change any offsets
-	//	}
-	//
-	//	@@Override public int read(char[] cbuf, int off, int len) throws IOException
-	//	{
-	//	  readSomething = true;
-	//	  return input.read(cbuf, off, len);
-	//	}
-	//
-	//	@@Override public int read() throws IOException
-	//	{
-	//	  readSomething = true;
-	//	  return input.read();
-	//	}
-	//
-	//	@@Override public int read(CharBuffer target) throws IOException
-	//	{
-	//	  readSomething = true;
-	//	  return input.read(target);
-	//	}
-	//
-	//	@@Override public int read(char[] cbuf) throws IOException
-	//	{
-	//	  readSomething = true;
-	//	  return input.read(cbuf);
-	//	}
-	//
-	//	@@Override public long skip(long n) throws IOException
-	//	{
-	//	  readSomething = true;
-	//	  return input.skip(n);
-	//	}
-	//
-	//	@@Override public void mark(int readAheadLimit) throws IOException
-	//	{
-	//	  input.mark(readAheadLimit);
-	//	}
-	//
-	//	@@Override public boolean markSupported()
-	//	{
-	//	  return input.markSupported();
-	//	}
-	//
-	//	@@Override public boolean ready() throws IOException
-	//	{
-	//	  return input.ready();
-	//	}
-	//
-	//	@@Override public void reset() throws IOException
-	//	{
-	//	  input.reset();
-	//	}
-	//  }
-	//
-	//  static class TokenizerSpec
-	//  {
-	//	Tokenizer tokenizer;
-	//	String toString;
-	//	boolean offsetsAreCorrect = true;
-	//  }
-	//
-	//  static class TokenFilterSpec
-	//  {
-	//	TokenStream stream;
-	//	String toString;
-	//	boolean offsetsAreCorrect = true;
-	//  }
-	//
-	//  static class CharFilterSpec
-	//  {
-	//	Reader reader;
-	//	String toString;
-	//  }
-	//
-	//  public void testRandomChains() throws Throwable
-	//  {
-	//	int numIterations = atLeast(20);
-	//	Random random = random();
-	//	for (int i = 0; i < numIterations; i++)
-	//	{
-	//	  MockRandomAnalyzer a = new MockRandomAnalyzer(random.nextLong());
-	//	  if (VERBOSE)
-	//	  {
-	//		System.out.println("Creating random analyzer:" + a);
-	//	  }
-	//	  try
-	//	  {
-	//		checkRandomData(random, a, 500*RANDOM_MULTIPLIER, 20, false, false); // We already validate our own offsets...
-	//	  }
-	//	  catch (Throwable e)
-	//	  {
-	//		System.err.println("Exception from random analyzer: " + a);
-	//		throw e;
-	//	  }
-	//	}
-	//  }
-	

<TRUNCATED>

[03/22] lucenenet git commit: Fixed problems with the Analysis.Core.TestBugInSomething.TestWrapping() test that were causing it to fail.

Posted by sy...@apache.org.

Fixed problems with the Analysis.Core.TestBugInSomething.TestWrapping() test that were causing it to fail.


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/e3bbea4f
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/e3bbea4f
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/e3bbea4f

Branch: refs/heads/analysis-work
Commit: e3bbea4f7562c7e7011f93026c11962429bcc470
Parents: 85789c0
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Wed Aug 24 18:21:17 2016 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Wed Aug 24 18:21:17 2016 +0700

----------------------------------------------------------------------
 .../Analysis/Core/TestBugInSomething.cs         | 94 ++++++++++----------
 1 file changed, 45 insertions(+), 49 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/e3bbea4f/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestBugInSomething.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestBugInSomething.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestBugInSomething.cs
index 8fe1562..0d9d742 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestBugInSomething.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestBugInSomething.cs
@@ -90,7 +90,7 @@ namespace Lucene.Net.Analysis.Core
 
             public override void Mark(int readAheadLimit)
             {
-                throw new System.NotSupportedException("mark(int)");
+                throw new System.NotSupportedException("Mark(int)");
             }
 
             public override bool IsMarkSupported
@@ -103,47 +103,48 @@ namespace Lucene.Net.Analysis.Core
 
             public override int Read()
             {
-                throw new System.NotSupportedException("read()");
+                throw new System.NotSupportedException("Read()");
             }
 
+            // LUCENENET: We don't support these overloads in .NET
             // public override int Read(char[] cbuf)
             // {
-            //throw new System.NotSupportedException("read(char[])");
+            //throw new System.NotSupportedException("Read(char[])");
             // }
 
             //public override int read(CharBuffer target)
             //{
-            //    throw new System.NotSupportedException("read(CharBuffer)");
+            //    throw new System.NotSupportedException("Read(CharBuffer)");
             //}
 
             public override bool Ready()
             {
-                throw new System.NotSupportedException("ready()");
+                throw new System.NotSupportedException("Ready()");
             }
 
             public override void Reset()
             {
-                throw new System.NotSupportedException("reset()");
+                throw new System.NotSupportedException("Reset()");
             }
 
             public override long Skip(int n)
             {
-                throw new System.NotSupportedException("skip(long)");
+                throw new System.NotSupportedException("Skip(long)");
             }
 
             protected override int Correct(int currentOff)
             {
-                throw new System.NotSupportedException("correct(int)");
+                throw new System.NotSupportedException("Correct(int)");
             }
 
             public override void Close()
             {
-                throw new System.NotSupportedException("close()");
+                throw new System.NotSupportedException("Close()");
             }
 
             public override int Read(char[] arg0, int arg1, int arg2)
             {
-                throw new System.NotSupportedException("read(char[], int, int)");
+                throw new System.NotSupportedException("Read(char[], int, int)");
             }
         }
 
@@ -158,7 +159,7 @@ namespace Lucene.Net.Analysis.Core
             }
             catch (Exception e)
             {
-                assertEquals("mark(int)", e.Message);
+                assertEquals("Mark(int)", e.Message);
             }
 
             try
@@ -168,7 +169,7 @@ namespace Lucene.Net.Analysis.Core
             }
             catch (Exception e)
             {
-                assertEquals("markSupported()", e.Message);
+                assertEquals("IsMarkSupported", e.Message);
             }
 
             try
@@ -178,7 +179,7 @@ namespace Lucene.Net.Analysis.Core
             }
             catch (Exception e)
             {
-                assertEquals("read()", e.Message);
+                assertEquals("Read()", e.Message);
             }
 
             try
@@ -188,18 +189,24 @@ namespace Lucene.Net.Analysis.Core
             }
             catch (Exception e)
             {
-                assertEquals("read(char[])", e.Message);
+                // LUCENENET NOTE: TextReader doesn't support an overload that doesn't supply
+                // index and count. We have an extension method that does in test environment,
+                // but the error will be for the cascaded overload
+                //assertEquals("Read(char[])", e.Message);
+                assertEquals("Read(char[], int, int)", e.Message);
             }
 
-            try
-            {
-                cs.read(new char[0]);
-                fail();
-            }
-            catch (Exception e)
-            {
-                assertEquals("read(CharBuffer)", e.Message);
-            }
+            // LUCENENET NOTE: We don't have a CharBuffer type in Lucene.Net,
+            // nor do we have an overload that accepts it.
+            //try
+            //{
+            //    cs.read(CharBuffer.wrap(new char[0]));
+            //    fail();
+            //}
+            //catch (Exception e)
+            //{
+            //    assertEquals("Read(CharBuffer)", e.Message);
+            //}
 
             try
             {
@@ -208,7 +215,7 @@ namespace Lucene.Net.Analysis.Core
             }
             catch (Exception e)
             {
-                assertEquals("reset()", e.Message);
+                assertEquals("Reset()", e.Message);
             }
 
             try
@@ -218,7 +225,7 @@ namespace Lucene.Net.Analysis.Core
             }
             catch (Exception e)
             {
-                assertEquals("skip(long)", e.Message);
+                assertEquals("Skip(long)", e.Message);
             }
 
             try
@@ -228,7 +235,7 @@ namespace Lucene.Net.Analysis.Core
             }
             catch (Exception e)
             {
-                assertEquals("correct(int)", e.Message);
+                assertEquals("Correct(int)", e.Message);
             }
 
             try
@@ -238,7 +245,7 @@ namespace Lucene.Net.Analysis.Core
             }
             catch (Exception e)
             {
-                assertEquals("close()", e.Message);
+                assertEquals("Close()", e.Message);
             }
 
             try
@@ -248,7 +255,7 @@ namespace Lucene.Net.Analysis.Core
             }
             catch (Exception e)
             {
-                assertEquals("read(char[], int, int)", e.Message);
+                assertEquals("Read(char[], int, int)", e.Message);
             }
         }
 
@@ -365,24 +372,12 @@ namespace Lucene.Net.Analysis.Core
             bool readSomething;
 
             public CheckThatYouDidntReadAnythingReaderWrapper(TextReader @in)
-                : base(GetBufferedReader(@in))
+                : base(@in)
             { }
 
-            /// <summary>
-            /// LUCENENET: Copied this method from the WordlistLoader class - this class requires readers
-            /// with a Reset() method (which .NET readers don't support). So, we use the Java BufferedReader
-            /// as a wrapper for whatever reader the user passes (unless it is already a BufferedReader).
-            /// </summary>
-            /// <param name="reader"></param>
-            /// <returns></returns>
-            private static BufferedCharFilter GetBufferedReader(TextReader reader)
+            private CharFilter Input
             {
-                return (reader is BufferedCharFilter) ? (BufferedCharFilter)reader : new BufferedCharFilter(reader);
-            }
-
-            private BufferedCharFilter Input
-            {
-                get { return (BufferedCharFilter)this.input; }
+                get { return (CharFilter)this.input; }
             }
 
             protected override int Correct(int currentOff)
@@ -402,11 +397,12 @@ namespace Lucene.Net.Analysis.Core
                 return input.Read();
             }
 
-            public int read(char[] cbuf)
-            {
-                readSomething = true;
-                return input.read(cbuf);
-            }
+            // LUCENENET: TextReader dosn't support this overload 
+            //public int read(char[] cbuf)
+            //{
+            //    readSomething = true;
+            //    return input.read(cbuf);
+            //}
 
             public override long Skip(int n)
             {
@@ -432,7 +428,7 @@ namespace Lucene.Net.Analysis.Core
                 return Input.Ready();
             }
 
-            public void reset()
+            public override void Reset()
             {
                 Input.Reset();
             }

[13/22] lucenenet git commit: Fixed formatting bug in BaseTokenStreamTestCase that was causing the input characters to be excluded from the output message.

Posted by sy...@apache.org.

Fixed formatting bug in BaseTokenStreamTestCase that was causing the input characters to be excluded from the output message.


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/4e9fd4f9
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/4e9fd4f9
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/4e9fd4f9

Branch: refs/heads/analysis-work
Commit: 4e9fd4f99d2b454928014d19fbc00b429998dc4b
Parents: 91f0608
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Fri Aug 26 12:30:57 2016 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Sat Aug 27 02:19:57 2016 +0700

----------------------------------------------------------------------
 src/Lucene.Net.TestFramework/Analysis/BaseTokenStreamTestCase.cs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4e9fd4f9/src/Lucene.Net.TestFramework/Analysis/BaseTokenStreamTestCase.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.TestFramework/Analysis/BaseTokenStreamTestCase.cs b/src/Lucene.Net.TestFramework/Analysis/BaseTokenStreamTestCase.cs
index f96955c..f2ffbe0 100644
--- a/src/Lucene.Net.TestFramework/Analysis/BaseTokenStreamTestCase.cs
+++ b/src/Lucene.Net.TestFramework/Analysis/BaseTokenStreamTestCase.cs
@@ -874,7 +874,7 @@ namespace Lucene.Net.Analysis
                 {
                     // TODO: we can make ascii easier to read if we
                     // don't escape...
-                    sb.Append(string.Format(CultureInfo.InvariantCulture, "\\u%04x", c));
+                    sb.AppendFormat(CultureInfo.InvariantCulture, "\\u{0:x4}", c);
                 }
                 charUpto++;
             }

[18/22] lucenenet git commit: Fixed bug in Analysis.Hunspell and Core.Util.OfflineSorter where different threads have problems competing for temp file usage.

Posted by sy...@apache.org.

Fixed bug in Analysis.Hunspell and Core.Util.OfflineSorter where different threads have problems competing for temp file usage.


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/defcabee
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/defcabee
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/defcabee

Branch: refs/heads/analysis-work
Commit: defcabee7437ed8595bf841a3c332960c8a1ba9c
Parents: 9ed5b8f
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Fri Aug 26 12:29:34 2016 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Sat Aug 27 02:20:05 2016 +0700

----------------------------------------------------------------------
 .../Analysis/Hunspell/Dictionary.cs             | 250 +++++++++----------
 src/Lucene.Net.Core/Support/FileSupport.cs      |  70 ++++++
 src/Lucene.Net.Core/Util/OfflineSorter.cs       |  50 ++--
 3 files changed, 220 insertions(+), 150 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/defcabee/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/Dictionary.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/Dictionary.cs b/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/Dictionary.cs
index 8bab079..e9fc124 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/Dictionary.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/Dictionary.cs
@@ -134,43 +134,41 @@ namespace Lucene.Net.Analysis.Hunspell
             this.needsOutputCleaning = false; // set if we have an OCONV
             flagLookup.Add(new BytesRef()); // no flags -> ord 0
 
-            FileInfo aff = new FileInfo(System.IO.Path.Combine(tempDir.FullName, "affix.aff"));
-            using (Stream @out = aff.Create())
+            FileInfo aff = FileSupport.CreateTempFile("affix", "aff", tempDir);
+            using (Stream @out = aff.Open(FileMode.Open, FileAccess.ReadWrite))
             {
-                Stream aff1 = null;
-                Stream aff2 = null;
-                try
-                {
-                    // copy contents of affix stream to temp file
-                    byte[] buffer = new byte[1024 * 8];
-                    int len;
-                    while ((len = affix.Read(buffer, 0, buffer.Length)) > 0)
-                    {
-                        @out.Write(buffer, 0, len);
-                    }
-                    @out.Close(); // LUCENENET: Release the file handle - we dispose @out later
-
-                    // pass 1: get encoding
-                    aff1 = File.OpenRead(aff.FullName);
-                    string encoding = GetDictionaryEncoding(aff1);
-
-                    // pass 2: parse affixes
-                    Encoding decoder = GetSystemEncoding(encoding);
-                    aff2 = File.OpenRead(aff.FullName);
-                    ReadAffixFile(aff2, decoder);
-
-                    // read dictionary entries
-                    IntSequenceOutputs o = IntSequenceOutputs.Singleton;
-                    Builder<IntsRef> b = new Builder<IntsRef>(FST.INPUT_TYPE.BYTE4, o);
-                    ReadDictionaryFiles(dictionaries, decoder, b);
-                    words = b.Finish();
-                    aliases = null; // no longer needed
-                }
-                finally
-                {
-                    IOUtils.CloseWhileHandlingException(aff1, aff2);
-                    aff.Delete();
-                }
+                // copy contents of affix stream to temp file
+                affix.CopyTo(@out);
+            }
+
+            // pass 1: get encoding
+            string encoding;
+            using (Stream aff1 = aff.Open(FileMode.Open, FileAccess.Read))
+            {
+                encoding = GetDictionaryEncoding(aff1);
+            }
+
+            // pass 2: parse affixes
+            Encoding decoder = GetSystemEncoding(encoding);
+            using (Stream aff2 = aff.Open(FileMode.Open, FileAccess.Read))
+            {
+                ReadAffixFile(aff2, decoder);
+            }
+
+            // read dictionary entries
+            IntSequenceOutputs o = IntSequenceOutputs.Singleton;
+            Builder<IntsRef> b = new Builder<IntsRef>(FST.INPUT_TYPE.BYTE4, o);
+            ReadDictionaryFiles(dictionaries, decoder, b);
+            words = b.Finish();
+            aliases = null; // no longer needed
+
+            try
+            {
+                aff.Delete();
+            }
+            catch
+            {
+                // ignore
             }
         }
 
@@ -744,10 +742,8 @@ namespace Lucene.Net.Analysis.Hunspell
 
             StringBuilder sb = new StringBuilder();
 
-            FileInfo unsorted = new FileInfo(System.IO.Path.Combine(tempDir.FullName, "unsorted.dat"));
-            OfflineSorter.ByteSequencesWriter writer = new OfflineSorter.ByteSequencesWriter(unsorted);
-            bool success = false;
-            try
+            FileInfo unsorted = FileSupport.CreateTempFile("unsorted", "dat", tempDir);
+            using (OfflineSorter.ByteSequencesWriter writer = new OfflineSorter.ByteSequencesWriter(unsorted))
             {
                 foreach (Stream dictionary in dictionaries)
                 {
@@ -784,113 +780,115 @@ namespace Lucene.Net.Analysis.Hunspell
                         }
                     }
                 }
-                success = true;
-            }
-            finally
-            {
-                if (success)
-                {
-                    IOUtils.Close(writer);
-                }
-                else
-                {
-                    IOUtils.CloseWhileHandlingException(writer);
-                }
             }
-            FileInfo sorted = new FileInfo(System.IO.Path.Combine(tempDir.FullName, "sorted.dat"));
-            using (var temp = sorted.Create()) { }
+
+            FileInfo sorted = FileSupport.CreateTempFile("sorted", "dat", tempDir);
 
             OfflineSorter sorter = new OfflineSorter(new ComparatorAnonymousInnerClassHelper(this));
             sorter.Sort(unsorted, sorted);
-            unsorted.Delete();
-
-            OfflineSorter.ByteSequencesReader reader = new OfflineSorter.ByteSequencesReader(sorted);
-            BytesRef scratchLine = new BytesRef();
+            try
+            {
+                unsorted.Delete();
+            }
+            catch
+            {
+                // ignore
+            }
 
-            // TODO: the flags themselves can be double-chars (long) or also numeric
-            // either way the trick is to encode them as char... but they must be parsed differently
+            using (OfflineSorter.ByteSequencesReader reader = new OfflineSorter.ByteSequencesReader(sorted))
+            {
+                BytesRef scratchLine = new BytesRef();
 
-            string currentEntry = null;
-            IntsRef currentOrds = new IntsRef();
+                // TODO: the flags themselves can be double-chars (long) or also numeric
+                // either way the trick is to encode them as char... but they must be parsed differently
 
-            string line2;
-            while (reader.Read(scratchLine))
-            {
-                line2 = scratchLine.Utf8ToString();
-                string entry;
-                char[] wordForm;
+                string currentEntry = null;
+                IntsRef currentOrds = new IntsRef();
 
-                int flagSep = line2.LastIndexOf(FLAG_SEPARATOR);
-                if (flagSep == -1)
+                string line2;
+                while (reader.Read(scratchLine))
                 {
-                    wordForm = NOFLAGS;
-                    entry = line2;
-                }
-                else
-                {
-                    // note, there can be comments (morph description) after a flag.
-                    // we should really look for any whitespace: currently just tab and space
-                    int end = line2.IndexOf('\t', flagSep);
-                    if (end == -1)
+                    line2 = scratchLine.Utf8ToString();
+                    string entry;
+                    char[] wordForm;
+
+                    int flagSep = line2.LastIndexOf(FLAG_SEPARATOR);
+                    if (flagSep == -1)
                     {
-                        end = line2.Length;
+                        wordForm = NOFLAGS;
+                        entry = line2;
                     }
-                    int end2 = line2.IndexOf(' ', flagSep);
-                    if (end2 == -1)
+                    else
                     {
-                        end2 = line2.Length;
-                    }
-                    end = Math.Min(end, end2);
+                        // note, there can be comments (morph description) after a flag.
+                        // we should really look for any whitespace: currently just tab and space
+                        int end = line2.IndexOf('\t', flagSep);
+                        if (end == -1)
+                        {
+                            end = line2.Length;
+                        }
+                        int end2 = line2.IndexOf(' ', flagSep);
+                        if (end2 == -1)
+                        {
+                            end2 = line2.Length;
+                        }
+                        end = Math.Min(end, end2);
 
-                    string flagPart = line2.Substring(flagSep + 1, end - (flagSep + 1));
-                    if (aliasCount > 0)
-                    {
-                        flagPart = GetAliasValue(int.Parse(flagPart, CultureInfo.InvariantCulture));
-                    }
+                        string flagPart = line2.Substring(flagSep + 1, end - (flagSep + 1));
+                        if (aliasCount > 0)
+                        {
+                            flagPart = GetAliasValue(int.Parse(flagPart, CultureInfo.InvariantCulture));
+                        }
 
-                    wordForm = flagParsingStrategy.ParseFlags(flagPart);
-                    Array.Sort(wordForm);
-                    entry = line2.Substring(0, flagSep - 0);
-                }
-                // LUCENENET NOTE: CompareToOrdinal is an extension method that works similarly to
-                // Java's String.compareTo method.
-                int cmp = currentEntry == null ? 1 : entry.CompareToOrdinal(currentEntry);
-                if (cmp < 0)
-                {
-                    throw new System.ArgumentException("out of order: " + entry + " < " + currentEntry);
-                }
-                else
-                {
-                    EncodeFlags(flagsScratch, wordForm);
-                    int ord = flagLookup.Add(flagsScratch);
-                    if (ord < 0)
-                    {
-                        // already exists in our hash
-                        ord = (-ord) - 1;
+                        wordForm = flagParsingStrategy.ParseFlags(flagPart);
+                        Array.Sort(wordForm);
+                        entry = line2.Substring(0, flagSep - 0);
                     }
-                    // finalize current entry, and switch "current" if necessary
-                    if (cmp > 0 && currentEntry != null)
+                    // LUCENENET NOTE: CompareToOrdinal is an extension method that works similarly to
+                    // Java's String.compareTo method.
+                    int cmp = currentEntry == null ? 1 : entry.CompareToOrdinal(currentEntry);
+                    if (cmp < 0)
                     {
-                        Lucene.Net.Util.Fst.Util.ToUTF32(currentEntry, scratchInts);
-                        words.Add(scratchInts, currentOrds);
+                        throw new System.ArgumentException("out of order: " + entry + " < " + currentEntry);
                     }
-                    // swap current
-                    if (cmp > 0 || currentEntry == null)
+                    else
                     {
-                        currentEntry = entry;
-                        currentOrds = new IntsRef(); // must be this way
+                        EncodeFlags(flagsScratch, wordForm);
+                        int ord = flagLookup.Add(flagsScratch);
+                        if (ord < 0)
+                        {
+                            // already exists in our hash
+                            ord = (-ord) - 1;
+                        }
+                        // finalize current entry, and switch "current" if necessary
+                        if (cmp > 0 && currentEntry != null)
+                        {
+                            Lucene.Net.Util.Fst.Util.ToUTF32(currentEntry, scratchInts);
+                            words.Add(scratchInts, currentOrds);
+                        }
+                        // swap current
+                        if (cmp > 0 || currentEntry == null)
+                        {
+                            currentEntry = entry;
+                            currentOrds = new IntsRef(); // must be this way
+                        }
+                        currentOrds.Grow(currentOrds.Length + 1);
+                        currentOrds.Ints[currentOrds.Length++] = ord;
                     }
-                    currentOrds.Grow(currentOrds.Length + 1);
-                    currentOrds.Ints[currentOrds.Length++] = ord;
                 }
-            }
-
-            // finalize last entry
-            Lucene.Net.Util.Fst.Util.ToUTF32(currentEntry, scratchInts);
-            words.Add(scratchInts, currentOrds);
 
-            reader.Dispose();
-            sorted.Delete();
+                // finalize last entry
+                Lucene.Net.Util.Fst.Util.ToUTF32(currentEntry, scratchInts);
+                words.Add(scratchInts, currentOrds);
+            }
+            try
+            {
+                sorted.Delete();
+            }
+            catch
+            {
+                // ignore
+            }
         }
 
         private class ComparatorAnonymousInnerClassHelper : IComparer<BytesRef>

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/defcabee/src/Lucene.Net.Core/Support/FileSupport.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Support/FileSupport.cs b/src/Lucene.Net.Core/Support/FileSupport.cs
index 86c070c..ebc34e2 100644
--- a/src/Lucene.Net.Core/Support/FileSupport.cs
+++ b/src/Lucene.Net.Core/Support/FileSupport.cs
@@ -21,6 +21,8 @@
 
 using System;
 using System.IO;
+using System.Runtime.CompilerServices;
+using System.Text;
 
 namespace Lucene.Net.Support
 {
@@ -117,5 +119,73 @@ namespace Lucene.Net.Support
 
         [System.Runtime.InteropServices.DllImport("kernel32.dll")]
         extern static bool FlushFileBuffers(IntPtr hFile);
+
+
+        /// <summary>
+        /// Creates a new empty file in the specified directory, using the given prefix and suffix strings to generate its name. 
+        /// If this method returns successfully then it is guaranteed that:
+        /// <list type="number">
+        /// <item>The file denoted by the returned abstract pathname did not exist before this method was invoked, and</item>
+        /// <item>Neither this method nor any of its variants will return the same abstract pathname again in the current invocation of the virtual machine.</item>
+        /// </list>
+        /// This method provides only part of a temporary-file facility.To arrange for a file created by this method to be deleted automatically, use the deleteOnExit() method.
+        /// The prefix argument must be at least three characters long. It is recommended that the prefix be a short, meaningful string such as "hjb" or "mail". The suffix argument may be null, in which case the suffix ".tmp" will be used.
+        /// To create the new file, the prefix and the suffix may first be adjusted to fit the limitations of the underlying platform.If the prefix is too long then it will be truncated, but its first three characters will always be preserved.If the suffix is too long then it too will be truncated, but if it begins with a period character ('.') then the period and the first three characters following it will always be preserved.Once these adjustments have been made the name of the new file will be generated by concatenating the prefix, five or more internally-generated characters, and the suffix.
+        /// If the directory argument is null then the system-dependent default temporary-file directory will be used.The default temporary-file directory is specified by the system property java.io.tmpdir.On UNIX systems the default value of this property is typically "/tmp" or "/var/tmp"; on Microsoft Windows systems it is typically "C:\\WINNT\\TEMP". A different value may be given to this system property when the Java virtual machine is invoked, but programmatic changes to this property are not guaranteed to have any effect upon the temporary directory used by this method.
+        /// 
+        /// Ported over from the java.io.File class. Used by the Analysis.Hunspell.Directory
+        /// class, but this can probably be removed when that class is upgraded to a more recent
+        /// version of lucene, where it uses the lucene Store.Directory class to create a temporary
+        /// file.
+        /// </summary>
+        /// <param name="prefix">The prefix string to be used in generating the file's name; must be at least three characters long</param>
+        /// <param name="suffix">The suffix string to be used in generating the file's name; may be null, in which case the suffix ".tmp" will be used</param>
+        /// <param name="directory">The directory in which the file is to be created, or null if the default temporary-file directory is to be used</param>
+        /// <returns></returns>
+        [MethodImpl(MethodImplOptions.Synchronized)]
+        public static FileInfo CreateTempFile(string prefix, string suffix, DirectoryInfo directory)
+        {
+            if (string.IsNullOrEmpty(prefix))
+                throw new ArgumentNullException("prefix");
+            if (prefix.Length < 3)
+                throw new ArgumentException("Prefix string too short");
+            string s = (suffix == null) ? ".tmp" : suffix;
+            if (directory == null)
+            {
+                string tmpDir = Path.GetTempPath();
+                directory = new DirectoryInfo(tmpDir);
+            }
+            int attempt = 0;
+            string extension = suffix.StartsWith(".") ? suffix : '.' + suffix;
+            string fileName = Path.Combine(directory.FullName, string.Concat(prefix, extension));
+            while (true)
+            {
+                try
+                {
+                    if (attempt > 0)
+                    {
+                        fileName = Path.Combine(directory.FullName, string.Concat(prefix, attempt.ToString(), extension));
+                    }
+                    if (File.Exists(fileName))
+                    {
+                        attempt++;
+                        continue;
+                    }
+                    // Create the file
+                    File.WriteAllText(fileName, string.Empty, Encoding.UTF8);
+                    break;
+                }
+                catch (IOException e)
+                {
+                    if (!e.Message.Contains("already exists"))
+                    {
+                        throw e;
+                    }
+
+                    attempt++;
+                }
+            }
+            return new FileInfo(fileName);
+        }
     }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/defcabee/src/Lucene.Net.Core/Util/OfflineSorter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Util/OfflineSorter.cs b/src/Lucene.Net.Core/Util/OfflineSorter.cs
index 4f3084b..0f15405 100644
--- a/src/Lucene.Net.Core/Util/OfflineSorter.cs
+++ b/src/Lucene.Net.Core/Util/OfflineSorter.cs
@@ -4,6 +4,8 @@ using System.Diagnostics;
 using System.IO;
 using Lucene.Net.Store;
 using Lucene.Net.Support.Compatibility;
+using System.Linq;
+using Lucene.Net.Support;
 
 namespace Lucene.Net.Util
 {
@@ -267,7 +269,9 @@ namespace Lucene.Net.Util
         {
             sortInfo = new SortInfo(this) { TotalTime = DateTime.Now.Millisecond };
 
-            output.Delete();
+            // LUCENENET NOTE: Can't do this because another thread could recreate the file before we are done here.
+            // and cause this to bomb. We use the existence of the file as an indicator that we are done using it.
+            //output.Delete(); 
 
             var merges = new List<FileInfo>();
             bool success2 = false;
@@ -369,7 +373,13 @@ namespace Lucene.Net.Util
         /// </summary>
         private static void Copy(FileInfo file, FileInfo output)
         {
-            File.Copy(file.FullName, output.FullName);
+            using (Stream inputStream = file.OpenRead())
+            {
+                using (Stream outputStream = output.OpenWrite())
+                {
+                    inputStream.CopyTo(outputStream);
+                }
+            }
         }
 
         /// <summary>
@@ -377,53 +387,45 @@ namespace Lucene.Net.Util
         internal FileInfo SortPartition(int len)
         {
             var data = this.Buffer;
-            var tempFile = new FileInfo(Path.GetTempFileName());
-            //var tempFile1 = File.Create(new ());
-            //FileInfo tempFile = FileInfo.createTempFile("sort", "partition", TempDirectory);
+            FileInfo tempFile = FileSupport.CreateTempFile("sort", "partition", DefaultTempDir());
 
             long start = DateTime.Now.Millisecond;
             sortInfo.SortTime += (DateTime.Now.Millisecond - start);
 
-            var @out = new ByteSequencesWriter(tempFile);
-            BytesRef spare;
-            try
+            using (var @out = new ByteSequencesWriter(tempFile))
             {
+                BytesRef spare;
+
                 BytesRefIterator iter = Buffer.Iterator(comparator);
                 while ((spare = iter.Next()) != null)
                 {
                     Debug.Assert(spare.Length <= short.MaxValue);
                     @out.Write(spare);
                 }
-
-                @out.Dispose();
-
-                // Clean up the buffer for the next partition.
-                data.Clear();
-                return tempFile;
-            }
-            finally
-            {
-                IOUtils.Close(@out);
             }
+
+            // Clean up the buffer for the next partition.
+            data.Clear();
+            return tempFile;
         }
 
         /// <summary>
         /// Merge a list of sorted temporary files (partitions) into an output file </summary>
-        internal void MergePartitions(IList<FileInfo> merges, FileInfo outputFile)
+        internal void MergePartitions(IEnumerable<FileInfo> merges, FileInfo outputFile)
         {
             long start = DateTime.Now.Millisecond;
 
             var @out = new ByteSequencesWriter(outputFile);
 
-            PriorityQueue<FileAndTop> queue = new PriorityQueueAnonymousInnerClassHelper(this, merges.Count);
+            PriorityQueue<FileAndTop> queue = new PriorityQueueAnonymousInnerClassHelper(this, merges.Count());
 
-            var streams = new ByteSequencesReader[merges.Count];
+            var streams = new ByteSequencesReader[merges.Count()];
             try
             {
                 // Open streams and read the top for each file
-                for (int i = 0; i < merges.Count; i++)
+                for (int i = 0; i < merges.Count(); i++)
                 {
-                    streams[i] = new ByteSequencesReader(merges[i]);
+                    streams[i] = new ByteSequencesReader(merges.ElementAt(i));
                     byte[] line = streams[i].Read();
                     if (line != null)
                     {
@@ -528,7 +530,7 @@ namespace Lucene.Net.Util
             /// <summary>
             /// Constructs a ByteSequencesWriter to the provided File </summary>
             public ByteSequencesWriter(FileInfo file)
-                : this(new BinaryWriterDataOutput(new BinaryWriter(new FileStream(file.FullName, FileMode.OpenOrCreate))))
+                : this(new BinaryWriterDataOutput(new BinaryWriter(new FileStream(file.FullName, FileMode.Open))))
             {
             }

[06/22] lucenenet git commit: Ported Analysis.Miscellaneous.TestSingleTokenTokenFilter

Posted by sy...@apache.org.

Ported Analysis.Miscellaneous.TestSingleTokenTokenFilter


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/65f1c5f0
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/65f1c5f0
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/65f1c5f0

Branch: refs/heads/analysis-work
Commit: 65f1c5f0d2571e687073848b0ec14b98e8a603d8
Parents: 0549bf1
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Wed Aug 24 19:34:00 2016 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Wed Aug 24 19:34:00 2016 +0700

----------------------------------------------------------------------
 .../Miscellaneous/TestSingleTokenTokenFilter.cs | 65 ++++++++++----------
 .../Lucene.Net.Tests.Analysis.Common.csproj     |  1 +
 2 files changed, 32 insertions(+), 34 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/65f1c5f0/src/Lucene.Net.Tests.Analysis.Common/Analysis/Miscellaneous/TestSingleTokenTokenFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Miscellaneous/TestSingleTokenTokenFilter.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Miscellaneous/TestSingleTokenTokenFilter.cs
index 7781dc4..a6ee91a 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Miscellaneous/TestSingleTokenTokenFilter.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Miscellaneous/TestSingleTokenTokenFilter.cs
@@ -1,7 +1,10 @@
-\ufeffnamespace org.apache.lucene.analysis.miscellaneous
-{
+\ufeffusing Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Util;
+using NUnit.Framework;
 
-	/*
+namespace Lucene.Net.Analysis.Miscellaneous
+{
+    /*
 	 * Licensed to the Apache Software Foundation (ASF) under one or more
 	 * contributor license agreements.  See the NOTICE file distributed with
 	 * this work for additional information regarding copyright ownership.
@@ -18,35 +21,29 @@
 	 * limitations under the License.
 	 */
 
-	using LuceneTestCase = org.apache.lucene.util.LuceneTestCase;
-	using AttributeImpl = org.apache.lucene.util.AttributeImpl;
-	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-
-	public class TestSingleTokenTokenFilter : LuceneTestCase
-	{
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: public void test() throws java.io.IOException
-	  public virtual void test()
-	  {
-		Token token = new Token();
-		SingleTokenTokenStream ts = new SingleTokenTokenStream(token);
-		AttributeImpl tokenAtt = (AttributeImpl) ts.addAttribute(typeof(CharTermAttribute));
-		assertTrue(tokenAtt is Token);
-		ts.reset();
-
-		assertTrue(ts.incrementToken());
-		assertEquals(token, tokenAtt);
-		assertFalse(ts.incrementToken());
-
-		token = new Token("hallo", 10, 20, "someType");
-		ts.Token = token;
-		ts.reset();
-
-		assertTrue(ts.incrementToken());
-		assertEquals(token, tokenAtt);
-		assertFalse(ts.incrementToken());
-	  }
-	}
-
+    public class TestSingleTokenTokenFilter : LuceneTestCase
+    {
+
+        [Test]
+        public virtual void Test()
+        {
+            Token token = new Token();
+            SingleTokenTokenStream ts = new SingleTokenTokenStream(token);
+            var tokenAtt = ts.AddAttribute<ICharTermAttribute>();
+            assertTrue(tokenAtt is Token);
+            ts.Reset();
+
+            assertTrue(ts.IncrementToken());
+            assertEquals(token, tokenAtt);
+            assertFalse(ts.IncrementToken());
+
+            token = new Token("hallo", 10, 20, "someType");
+            ts.Token = token;
+            ts.Reset();
+
+            assertTrue(ts.IncrementToken());
+            assertEquals(token, tokenAtt);
+            assertFalse(ts.IncrementToken());
+        }
+    }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/65f1c5f0/src/Lucene.Net.Tests.Analysis.Common/Lucene.Net.Tests.Analysis.Common.csproj
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Lucene.Net.Tests.Analysis.Common.csproj b/src/Lucene.Net.Tests.Analysis.Common/Lucene.Net.Tests.Analysis.Common.csproj
index 048bac7..3ed4a9d 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Lucene.Net.Tests.Analysis.Common.csproj
+++ b/src/Lucene.Net.Tests.Analysis.Common/Lucene.Net.Tests.Analysis.Common.csproj
@@ -220,6 +220,7 @@
     <Compile Include="Analysis\Miscellaneous\TestScandinavianFoldingFilterFactory.cs" />
     <Compile Include="Analysis\Miscellaneous\TestScandinavianNormalizationFilter.cs" />
     <Compile Include="Analysis\Miscellaneous\TestScandinavianNormalizationFilterFactory.cs" />
+    <Compile Include="Analysis\Miscellaneous\TestSingleTokenTokenFilter.cs" />
     <Compile Include="Analysis\Miscellaneous\TestStemmerOverrideFilter.cs" />
     <Compile Include="Analysis\Miscellaneous\TestStemmerOverrideFilterFactory.cs" />
     <Compile Include="Analysis\Miscellaneous\TestTrimFilter.cs" />

[21/22] lucenenet git commit: Fixed some minor code differences in Analysis.Compound.Hyphenation

Posted by sy...@apache.org.

Fixed some minor code differences in Analysis.Compound.Hyphenation


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/8a05b168
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/8a05b168
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/8a05b168

Branch: refs/heads/analysis-work
Commit: 8a05b1682b9d1405ed71d42e75d06a296736641d
Parents: 3664f1d
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Sat Aug 27 01:57:32 2016 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Sat Aug 27 02:20:14 2016 +0700

----------------------------------------------------------------------
 .../Analysis/Compound/hyphenation/HyphenationTree.cs     | 11 ++++++-----
 .../Analysis/Compound/hyphenation/PatternParser.cs       | 10 ++++------
 2 files changed, 10 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/8a05b168/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/HyphenationTree.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/HyphenationTree.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/HyphenationTree.cs
index 33bc310..287f6f3 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/HyphenationTree.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/HyphenationTree.cs
@@ -1,4 +1,5 @@
-\ufeffusing System;
+\ufeffusing Lucene.Net.Support;
+using System;
 using System.Collections.Generic;
 using System.IO;
 using System.Text;
@@ -40,7 +41,7 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation
         /// <summary>
         /// This map stores hyphenation exceptions
         /// </summary>
-        protected internal Dictionary<string, List<object>> stoplist;
+        protected internal IDictionary<string, IList<object>> stoplist;
 
         /// <summary>
         /// This map stores the character classes
@@ -55,7 +56,7 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation
 
         public HyphenationTree()
         {
-            stoplist = new Dictionary<string, List<object>>(23); // usually a small table
+            stoplist = new HashMap<string, IList<object>>(23); // usually a small table
             classmap = new TernaryTree();
             vspace = new ByteVector();
             vspace.Alloc(1); // this reserves index 0, which we don't use
@@ -234,7 +235,7 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation
             sbyte v = vspace[k++];
             while (v != 0)
             {
-                char c = (char)(((int)((uint)v >> 4)) - 1);
+                char c = (char)((((int)((uint)v >> 4))) - 1);
                 buf.Append(c);
                 c = (char)(v & 0x0f);
                 if (c == 0)
@@ -453,7 +454,7 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation
             {
                 // assume only simple hyphens (Hyphen.pre="-", Hyphen.post = Hyphen.no =
                 // null)
-                List<object> hw = stoplist[sw];
+                IList<object> hw = stoplist[sw];
                 int j = 0;
                 for (i = 0; i < hw.Count; i++)
                 {

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/8a05b168/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/PatternParser.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/PatternParser.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/PatternParser.cs
index e94e8cf..33e07dd 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/PatternParser.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/PatternParser.cs
@@ -237,7 +237,7 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation
                     break;
                 }
             }
-            token.Append(chars.ToString(0, i));
+            token.Append(chars.ToString(0, i - 0));
             // chars.delete(0,i);
             for (int countr = i; countr < chars.Length; countr++)
             {
@@ -250,7 +250,7 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation
                 token.Length = 0;
                 return word;
             }
-            token.Append(chars);
+            token.Append(chars.ToString());
             return null;
         }
 
@@ -360,7 +360,7 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation
             public override object GetEntity(Uri absoluteUri, string role, Type ofObjectToReturn)
             {
                 string dtdFilename = "hyphenation.dtd";
-                if (dtdFilename.Equals(absoluteUri.Segments.LastOrDefault()))
+                if (dtdFilename.Equals(absoluteUri.Segments.LastOrDefault(), StringComparison.OrdinalIgnoreCase))
                 {
                     var qualifedDtdFilename = string.Concat(GetType().Namespace, ".", dtdFilename);
                     return GetType().Assembly.GetManifestResourceStream(qualifedDtdFilename);
@@ -415,7 +415,6 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation
         ///      java.lang.String, java.lang.String) </seealso>
         public void EndElement(string uri, string local, string raw)
         {
-
             if (token.Length > 0)
             {
                 string word = token.ToString();
@@ -449,7 +448,6 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation
             {
                 currElement = 0;
             }
-
         }
 
         /// <seealso cref= org.xml.sax.ContentHandler#characters(char[], int, int) </seealso>
@@ -458,7 +456,7 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation
             StringBuilder chars = new StringBuilder(length);
             chars.Append(ch, start, length);
             string word = ReadToken(chars);
-            while (!string.IsNullOrEmpty(word))
+            while (word != null)
             {
                 // System.out.println("\"" + word + "\"");
                 switch (currElement)

[14/22] lucenenet git commit: Ported Analysis.Core.TestRandomChains and moved the CheckThatYouDidntReadAnythingReaderWrapper back into that class from TestBugInSomething.

Posted by sy...@apache.org.

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0a5198ec/src/Lucene.Net.Tests.Analysis.Common/Lucene.Net.Tests.Analysis.Common.csproj
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Lucene.Net.Tests.Analysis.Common.csproj b/src/Lucene.Net.Tests.Analysis.Common/Lucene.Net.Tests.Analysis.Common.csproj
index 3ed4a9d..cb94f28 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Lucene.Net.Tests.Analysis.Common.csproj
+++ b/src/Lucene.Net.Tests.Analysis.Common/Lucene.Net.Tests.Analysis.Common.csproj
@@ -93,6 +93,7 @@
     <Compile Include="Analysis\Core\TestDuelingAnalyzers.cs" />
     <Compile Include="Analysis\Core\TestFactories.cs" />
     <Compile Include="Analysis\Core\TestKeywordAnalyzer.cs" />
+    <Compile Include="Analysis\Core\TestRandomChains.cs" />
     <Compile Include="Analysis\Core\TestStandardAnalyzer.cs" />
     <Compile Include="Analysis\Core\TestStopAnalyzer.cs">
       <SubType>Code</SubType>

[16/22] lucenenet git commit: Ported Analysis.Core.TestRandomChains and moved the CheckThatYouDidntReadAnythingReaderWrapper back into that class from TestBugInSomething.

Posted by sy...@apache.org.

Ported Analysis.Core.TestRandomChains and moved the CheckThatYouDidntReadAnythingReaderWrapper back into that class from TestBugInSomething.


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/0a5198ec
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/0a5198ec
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/0a5198ec

Branch: refs/heads/analysis-work
Commit: 0a5198ecdacaf3ca71d4dbcf486ffa1430bad4e6
Parents: 4e9fd4f
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Fri Aug 26 12:37:29 2016 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Sat Aug 27 02:19:59 2016 +0700

----------------------------------------------------------------------
 .../Analysis/Core/TestBugInSomething.cs         |   76 +-
 .../Analysis/Core/TestRandomChains.cs           | 2679 ++++++++----------
 .../Lucene.Net.Tests.Analysis.Common.csproj     |    1 +
 3 files changed, 1115 insertions(+), 1641 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0a5198ec/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestBugInSomething.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestBugInSomething.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestBugInSomething.cs
index 0d9d742..42994be 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestBugInSomething.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestBugInSomething.cs
@@ -66,7 +66,7 @@ namespace Lucene.Net.Analysis.Core
 
             public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
             {
-                Tokenizer t = new MockTokenizer(new /* TestRandomChains. */ CheckThatYouDidntReadAnythingReaderWrapper(reader), MockTokenFilter.ENGLISH_STOPSET, false, -65);
+                Tokenizer t = new MockTokenizer(new TestRandomChains.CheckThatYouDidntReadAnythingReaderWrapper(reader), MockTokenFilter.ENGLISH_STOPSET, false, -65);
                 TokenFilter f = new CommonGramsFilter(TEST_VERSION_CURRENT, t, cas);
                 return new TokenStreamComponents(t, f);
             }
@@ -151,7 +151,7 @@ namespace Lucene.Net.Analysis.Core
         [Test]
         public virtual void TestWrapping()
         {
-            CharFilter cs = new /* TestRandomChains. */ CheckThatYouDidntReadAnythingReaderWrapper(wrappedStream);
+            CharFilter cs = new TestRandomChains.CheckThatYouDidntReadAnythingReaderWrapper(wrappedStream);
             try
             {
                 cs.Mark(1);
@@ -361,77 +361,5 @@ namespace Lucene.Net.Analysis.Core
                 return new TokenStreamComponents(tokenizer, stream);
             }
         }
-
-
-
-        // LUCENENET NOTE: Borrowed this class from the TestRandomChains class. It was in a commented section
-        // that said "ignore". But it is required for this test. If/when TestRandomChains is ported, we can
-        // use it there.
-        private class CheckThatYouDidntReadAnythingReaderWrapper : CharFilter
-        {
-            bool readSomething;
-
-            public CheckThatYouDidntReadAnythingReaderWrapper(TextReader @in)
-                : base(@in)
-            { }
-
-            private CharFilter Input
-            {
-                get { return (CharFilter)this.input; }
-            }
-
-            protected override int Correct(int currentOff)
-            {
-                return currentOff; // we don't change any offsets
-            }
-
-            public override int Read(char[] cbuf, int off, int len)
-            {
-                readSomething = true;
-                return input.Read(cbuf, off, len);
-            }
-
-            public override int Read()
-            {
-                readSomething = true;
-                return input.Read();
-            }
-
-            // LUCENENET: TextReader dosn't support this overload 
-            //public int read(char[] cbuf)
-            //{
-            //    readSomething = true;
-            //    return input.read(cbuf);
-            //}
-
-            public override long Skip(int n)
-            {
-                readSomething = true;
-                return Input.Skip(n);
-            }
-
-            public override void Mark(int readAheadLimit)
-            {
-                Input.Mark(readAheadLimit);
-            }
-
-            public override bool IsMarkSupported
-            {
-                get
-                {
-                    return Input.IsMarkSupported;
-                }
-            }
-
-            public override bool Ready()
-            {
-                return Input.Ready();
-            }
-
-            public override void Reset()
-            {
-                Input.Reset();
-            }
-        }
     }
 }
\ No newline at end of file

[20/22] lucenenet git commit: Fixed a bug in the Analysis.Tr.TurkishLowerCaseFilter that caused the Analysis.Tr.TestTurkishLowerCaseFilter_.TestTurkishLowerCaseFilter() test to fail.

Posted by sy...@apache.org.

Fixed a bug in the Analysis.Tr.TurkishLowerCaseFilter that caused the Analysis.Tr.TestTurkishLowerCaseFilter_.TestTurkishLowerCaseFilter() test to fail.


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/3664f1d7
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/3664f1d7
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/3664f1d7

Branch: refs/heads/analysis-work
Commit: 3664f1d7dc4f31ce28ef36cbccc7d1cf9b79577f
Parents: bc48844
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Fri Aug 26 23:45:11 2016 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Sat Aug 27 02:20:11 2016 +0700

----------------------------------------------------------------------
 .../Analysis/Tr/TurkishLowerCaseFilter.cs       | 34 ++++++--
 src/Lucene.Net.Core/Lucene.Net.csproj           |  1 +
 src/Lucene.Net.Core/Support/CultureContext.cs   | 81 ++++++++++++++++++++
 3 files changed, 109 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/3664f1d7/src/Lucene.Net.Analysis.Common/Analysis/Tr/TurkishLowerCaseFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Tr/TurkishLowerCaseFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Tr/TurkishLowerCaseFilter.cs
index 8b53666..4aaee6a 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Tr/TurkishLowerCaseFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Tr/TurkishLowerCaseFilter.cs
@@ -34,10 +34,11 @@ namespace Lucene.Net.Analysis.Tr
     public sealed class TurkishLowerCaseFilter : TokenFilter
     {
         private const int LATIN_CAPITAL_LETTER_I = '\u0049';
-        //private const int LATIN_CAPITAL_LETTER_I = '\u0130';
+        private const int LATIN_CAPITAL_LETTER_DOTTED_I = '\u0130';
         private const int LATIN_SMALL_LETTER_I = '\u0069';
         private const int LATIN_SMALL_LETTER_DOTLESS_I = '\u0131';
         private const int COMBINING_DOT_ABOVE = '\u0307';
+
         private readonly ICharTermAttribute termAtt;
 
         /// <summary>
@@ -62,12 +63,6 @@ namespace Lucene.Net.Analysis.Tr
                 int length = termAtt.Length;
                 for (int i = 0; i < length;)
                 {
-
-                    // LUCENENET TODO: This line is failing, causing the TestTurkishLowerCaseFilter() test to fail. According to the MSDN documentation
-                    // https://msdn.microsoft.com/en-us/library/system.globalization.unicodecategory(v=vs.110).aspx
-                    // a non-spacing mark is a modifier to a character. This logic is expecting the first codepoint to be an upper case Latin I,
-                    // and the second to be a non-spacing mark, but it is coming back as a single codepoint 304 that doesn't match Latin I.
-                    // Also, char.GetUnicodeCategory((char)304) returns UpperCaseLetter (not sure if that is pertinent).
                     int ch = Character.CodePointAt(buffer, i, length);
 
                     iOrAfter = (ch == LATIN_CAPITAL_LETTER_I || (iOrAfter && char.GetUnicodeCategory((char)ch) == UnicodeCategory.NonSpacingMark));
@@ -99,6 +94,31 @@ namespace Lucene.Net.Analysis.Tr
                         }
                     }
 
+                    using (var culture = new CultureContext("tr-TR"))
+                    {
+                        switch (ch)
+                        {
+                            // LUCENENET: The .NET char.ToLower() function works correctly in 
+                            // Turkish as long as the current thread is set to tr-TR (well, technically the 
+                            // culture change is only required for the LATIN_CAPITAL_LETTER_I case). .NET does 
+                            // not split these characters into separate letter/non-spacing mark characters,
+                            // but the user might still input them that way so we still need the above
+                            // block to handle that case.
+                            //
+                            // LUCENENET TODO: Oddly, the Character.ToLowerCase() function below does not work right
+                            // for Turkish. Which begs the question, should this special case be there so Turkish works
+                            // everywhere? Or should we leave it a special case here because that is the way it works in Java?
+                            //
+                            // References:
+                            // http://haacked.com/archive/2012/07/05/turkish-i-problem-and-why-you-should-care.aspx/
+                            // http://www.i18nguy.com/unicode/turkish-i18n.html
+                            case LATIN_CAPITAL_LETTER_I:
+                            case LATIN_CAPITAL_LETTER_DOTTED_I:
+                                i += Character.ToChars(char.ToLower((char)ch), buffer, i);
+                                continue;
+                        }
+                    }
+
                     i += Character.ToChars(Character.ToLowerCase(ch), buffer, i);
                 }
 

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/3664f1d7/src/Lucene.Net.Core/Lucene.Net.csproj
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Lucene.Net.csproj b/src/Lucene.Net.Core/Lucene.Net.csproj
index 26c8906..9dbcd4c 100644
--- a/src/Lucene.Net.Core/Lucene.Net.csproj
+++ b/src/Lucene.Net.Core/Lucene.Net.csproj
@@ -623,6 +623,7 @@
     <Compile Include="Support\Compatibility\Collections.cs" />
     <Compile Include="Support\ConcurrentHashMapWrapper.cs" />
     <Compile Include="Support\ConcurrentHashSet.cs" />
+    <Compile Include="Support\CultureContext.cs" />
     <Compile Include="Support\ErrorHandling.cs" />
     <Compile Include="Support\FileStreamExtensions.cs" />
     <Compile Include="Support\HashCodeMerge.cs" />

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/3664f1d7/src/Lucene.Net.Core/Support/CultureContext.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Support/CultureContext.cs b/src/Lucene.Net.Core/Support/CultureContext.cs
new file mode 100644
index 0000000..59f578e
--- /dev/null
+++ b/src/Lucene.Net.Core/Support/CultureContext.cs
@@ -0,0 +1,81 @@
+\ufeffusing System;
+using System.Globalization;
+using System.Threading;
+
+namespace Lucene.Net.Support
+{
+    /// <summary>
+    /// Allows switching the current thread to a new culture in a using block that will automatically 
+    /// return the culture to its previous state upon completion.
+    /// </summary>
+    public class CultureContext : IDisposable
+    {
+        public CultureContext(int culture)
+            : this(new CultureInfo(culture), Thread.CurrentThread.CurrentUICulture)
+        {
+        }
+
+        public CultureContext(int culture, int uiCulture)
+            : this(new CultureInfo(culture), new CultureInfo(uiCulture))
+        {
+        }
+
+        public CultureContext(string cultureName)
+            : this(new CultureInfo(cultureName), Thread.CurrentThread.CurrentUICulture)
+        {
+        }
+
+        public CultureContext(string cultureName, string uiCultureName)
+            : this(new CultureInfo(cultureName), new CultureInfo(uiCultureName))
+        {
+        }
+
+        public CultureContext(CultureInfo culture)
+            : this(culture, Thread.CurrentThread.CurrentUICulture)
+        {
+        }
+
+        public CultureContext(CultureInfo culture, CultureInfo uiCulture)
+        {
+            if (culture == null)
+                throw new ArgumentNullException("culture");
+            if (uiCulture == null)
+                throw new ArgumentNullException("uiCulture");
+
+            this.currentThread = Thread.CurrentThread;
+
+            // Record the current culture settings so they can be restored later.
+            this.originalCulture = this.currentThread.CurrentCulture;
+            this.originalUICulture = this.currentThread.CurrentUICulture;
+
+            // Set both the culture and UI culture for this context.
+            this.currentThread.CurrentCulture = culture;
+            this.currentThread.CurrentUICulture = uiCulture;
+        }
+
+        private readonly Thread currentThread;
+        private readonly CultureInfo originalCulture;
+        private readonly CultureInfo originalUICulture;
+
+        public CultureInfo OriginalCulture
+        {
+            get { return this.originalCulture; }
+        }
+
+        public CultureInfo OriginalUICulture
+        {
+            get { return this.originalUICulture; }
+        }
+
+        public void RestoreOriginalCulture()
+        {
+            // Restore the culture to the way it was before the constructor was called.
+            this.currentThread.CurrentCulture = this.originalCulture;
+            this.currentThread.CurrentUICulture = this.originalUICulture;
+        }
+        public void Dispose()
+        {
+            RestoreOriginalCulture();
+        }
+    }
+}

[10/22] lucenenet git commit: Wrapped ICU4NET BreakIterator with a new class named ThaiWordBreaker to fix the broken behavior of not splitting non-Thai and Thai characters into separate words.

Posted by sy...@apache.org.

Wrapped ICU4NET BreakIterator with a new class named ThaiWordBreaker to fix the broken behavior of not splitting non-Thai and Thai characters into separate words.


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/63e3e22d
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/63e3e22d
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/63e3e22d

Branch: refs/heads/analysis-work
Commit: 63e3e22d8e4768e03295e7bdc07924120f307ad3
Parents: ddd93cb
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Thu Aug 25 15:49:56 2016 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Thu Aug 25 15:49:56 2016 +0700

----------------------------------------------------------------------
 .../Analysis/Th/ThaiTokenizer.cs                | 103 ++++++++++++++++++-
 .../Analysis/Th/ThaiWordFilter.cs               |   2 +-
 2 files changed, 102 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/63e3e22d/src/Lucene.Net.Analysis.Common/Analysis/Th/ThaiTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Th/ThaiTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Th/ThaiTokenizer.cs
index e11ebf8..d8625d9 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Th/ThaiTokenizer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Th/ThaiTokenizer.cs
@@ -3,7 +3,11 @@ using ICU4NETExtension;
 using Lucene.Net.Analysis.Tokenattributes;
 using Lucene.Net.Analysis.Util;
 using Lucene.Net.Support;
+using System;
+using System.Collections.Generic;
 using System.IO;
+using System.Linq;
+using System.Text.RegularExpressions;
 
 namespace Lucene.Net.Analysis.Th
 {
@@ -47,7 +51,7 @@ namespace Lucene.Net.Analysis.Th
             DBBI_AVAILABLE = proto.IsBoundary(4);
         }
 
-        private readonly BreakIterator wordBreaker;
+        private readonly ThaiWordBreaker wordBreaker;
         private readonly CharArrayIterator wrapper = CharArrayIterator.NewWordInstance();
 
         internal int sentenceStart;
@@ -72,7 +76,7 @@ namespace Lucene.Net.Analysis.Th
             {
                 throw new System.NotSupportedException("This JRE does not have support for Thai segmentation");
             }
-            wordBreaker = BreakIterator.CreateWordInstance(Locale.GetUS());
+            wordBreaker = new ThaiWordBreaker(BreakIterator.CreateWordInstance(Locale.GetUS()));
             termAtt = AddAttribute<ICharTermAttribute>();
             offsetAtt = AddAttribute<IOffsetAttribute>();
         }
@@ -112,4 +116,99 @@ namespace Lucene.Net.Analysis.Th
             return true;
         }
     }
+
+    /// <summary>
+    /// LUCENENET specific class to patch the behavior of the ICU BreakIterator.
+    /// Corrects the breaking of words by finding transitions between Thai and non-Thai
+    /// characters.
+    /// 
+    /// This logic assumes that the Java BreakIterator also breaks up Thai numerals from
+    /// Arabic numerals (1, 2, 3, etc.). That is, it assumes the first test below passes
+    /// and the second test fails in Lucene (not attempted).
+    /// 
+    /// ThaiAnalyzer analyzer = new ThaiAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET);
+    /// AssertAnalyzesTo(analyzer, "\u0e51\u0e52\u0e53456", new string[] { "\u0e51\u0e52\u0e53", "456" });
+    /// AssertAnalyzesTo(analyzer, "\u0e51\u0e52\u0e53456", new string[] { "\u0e51\u0e52\u0e53456" });
+    /// </summary>
+    internal class ThaiWordBreaker
+    {
+        private readonly BreakIterator wordBreaker;
+        private string text;
+        private readonly IList<int> transitions = new List<int>();
+        private readonly static Regex thaiPattern = new Regex(@"\p{IsThai}", RegexOptions.Compiled | RegexOptions.CultureInvariant);
+
+        public ThaiWordBreaker(BreakIterator wordBreaker) 
+        {
+            if (wordBreaker == null)
+            {
+                throw new ArgumentNullException("wordBreaker");
+            }
+            this.wordBreaker = wordBreaker;
+        }
+
+        public void SetText(string text)
+        {
+            this.text = text;
+            wordBreaker.SetText(text);
+        }
+
+        public int Current()
+        {
+            if (transitions.Any())
+            {
+                return transitions.First();
+            }
+            return wordBreaker.Current();
+        }
+
+        public int Next()
+        {
+            if (transitions.Any())
+            {
+                transitions.RemoveAt(0);
+            }
+            if (transitions.Any())
+            {
+                return transitions.First();
+            }
+            return GetNext();
+        }
+
+        private int GetNext()
+        {
+            bool isThai = false, isNonThai = false;
+            bool prevWasThai = false, prevWasNonThai = false;
+            int prev = wordBreaker.Current();
+            int current = wordBreaker.Next();
+
+            if (current != BreakIterator.DONE && current - prev > 0)
+            {
+                // Find all of the transitions between Thai and non-Thai characters and digits
+                for (int i = prev; i < current; i++)
+                {
+                    char c = text[i];
+                    isThai = thaiPattern.IsMatch(c.ToString());
+                    isNonThai = char.IsLetterOrDigit(c) && !isThai;
+
+                    if ((prevWasThai && isNonThai) ||
+                        (prevWasNonThai && isThai))
+                    {
+                        transitions.Add(i);
+                    }
+
+                    // record the values for comparison with the next loop
+                    prevWasThai = isThai;
+                    prevWasNonThai = isNonThai;
+                }
+
+                if (transitions.Any())
+                {
+                    transitions.Add(current);
+                    return transitions.First();
+                }
+            }
+
+            return current;
+        }
+    }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/63e3e22d/src/Lucene.Net.Analysis.Common/Analysis/Th/ThaiWordFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Th/ThaiWordFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Th/ThaiWordFilter.cs
index 9864b7c..cbd9b6a 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Th/ThaiWordFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Th/ThaiWordFilter.cs
@@ -49,7 +49,7 @@ namespace Lucene.Net.Analysis.Th
         /// If this is false, this filter will not work at all!
         /// </summary>
         public static readonly bool DBBI_AVAILABLE = ThaiTokenizer.DBBI_AVAILABLE;
-        private readonly BreakIterator breaker = BreakIterator.CreateWordInstance(new Locale());
+        private readonly ThaiWordBreaker breaker = new ThaiWordBreaker(BreakIterator.CreateWordInstance(new Locale()));
         private readonly CharArrayIterator charIterator = CharArrayIterator.NewWordInstance();
 
         private readonly bool handlePosIncr;

[12/22] lucenenet git commit: Fixed bug in HTMLStripCharFilter that was not allowing null characters to pass from the reader to the business logic, which was causing the Analysis.CharFilters.HTMLStripCharFilterTest.TestRandom() and Analysis.CharFilters.H

Posted by sy...@apache.org.

Fixed bug in HTMLStripCharFilter that was not allowing null characters to pass from the reader to the business logic, which was causing the Analysis.CharFilters.HTMLStripCharFilterTest.TestRandom() and Analysis.CharFilters.HTMLStripCharFilterTest.TestRandomHugeStrings() tests to fail.


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/91f06088
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/91f06088
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/91f06088

Branch: refs/heads/analysis-work
Commit: 91f0608859f1f92c41aeeef29c7a1e8a98ea051b
Parents: 196ce64
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Fri Aug 26 11:26:42 2016 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Sat Aug 27 02:17:59 2016 +0700

----------------------------------------------------------------------
 .../Analysis/CharFilter/HTMLStripCharFilter.cs                   | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/91f06088/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/HTMLStripCharFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/HTMLStripCharFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/HTMLStripCharFilter.cs
index b0ca65e..cab9206 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/HTMLStripCharFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/HTMLStripCharFilter.cs
@@ -30879,7 +30879,7 @@ namespace Lucene.Net.Analysis.CharFilters
             for (; i < len; ++i)
             {
                 int ch = Read();
-                if (ch <= 0) break;
+                if (ch == -1) break;
                 cbuf[off++] = (char)ch;
             }
             return i > 0 ? i : (len == 0 ? 0 : -1);
@@ -31006,7 +31006,7 @@ namespace Lucene.Net.Analysis.CharFilters
             if (numRead == 0)
             {
                 int c = zzReader.Read();
-                if (c <= 0)
+                if (c == -1)
                 {
                     return true;
                 }

[08/22] lucenenet git commit: Fixed several bugs that were causing most of the Analysis.Th tests to fail.

Posted by sy...@apache.org.

Fixed several bugs that were causing most of the Analysis.Th tests to fail.


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/edde0fba
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/edde0fba
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/edde0fba

Branch: refs/heads/analysis-work
Commit: edde0fba58612e1c82aed16da6d1ffb763798612
Parents: ab40446
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Thu Aug 25 02:00:29 2016 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Thu Aug 25 02:00:29 2016 +0700

----------------------------------------------------------------------
 .../Analysis/Th/ThaiTokenizer.cs                   | 14 +++++---------
 .../Analysis/Th/ThaiWordFilter.cs                  | 14 +++++++-------
 .../Analysis/Util/SegmentingTokenizerBase.cs       | 17 +++++++----------
 3 files changed, 19 insertions(+), 26 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/edde0fba/src/Lucene.Net.Analysis.Common/Analysis/Th/ThaiTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Th/ThaiTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Th/ThaiTokenizer.cs
index ca41da1..e11ebf8 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Th/ThaiTokenizer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Th/ThaiTokenizer.cs
@@ -1,4 +1,5 @@
 \ufeffusing ICU4NET;
+using ICU4NETExtension;
 using Lucene.Net.Analysis.Tokenattributes;
 using Lucene.Net.Analysis.Util;
 using Lucene.Net.Support;
@@ -38,18 +39,13 @@ namespace Lucene.Net.Analysis.Th
         /// If this is false, this tokenizer will not work at all!
         /// </summary>
         public static readonly bool DBBI_AVAILABLE;
-        private static readonly BreakIterator proto = BreakIterator.CreateWordInstance(Locale.GetUS());   //GetWordInstance(new Locale("th"));
+        private static readonly BreakIterator proto = BreakIterator.CreateWordInstance(Locale.GetUS());
         static ThaiTokenizer()
         {
             // check that we have a working dictionary-based break iterator for thai
             proto.SetText("\u0e20\u0e32\u0e29\u0e32\u0e44\u0e17\u0e22");
             DBBI_AVAILABLE = proto.IsBoundary(4);
         }
-        
-
-        /// <summary>
-        /// used for breaking the text into sentences </summary>
-        private static readonly BreakIterator sentenceProto = BreakIterator.CreateSentenceInstance(Locale.GetUS());    //GetSentenceInstance(Locale.ROOT);
 
         private readonly BreakIterator wordBreaker;
         private readonly CharArrayIterator wrapper = CharArrayIterator.NewWordInstance();
@@ -70,13 +66,13 @@ namespace Lucene.Net.Analysis.Th
         /// <summary>
         /// Creates a new ThaiTokenizer, supplying the AttributeFactory </summary>
         public ThaiTokenizer(AttributeFactory factory, TextReader reader)
-              : base(factory, reader, (BreakIterator)sentenceProto.Clone())
+              : base(factory, reader, BreakIterator.CreateSentenceInstance(Locale.GetUS()))
         {
             if (!DBBI_AVAILABLE)
             {
                 throw new System.NotSupportedException("This JRE does not have support for Thai segmentation");
             }
-            wordBreaker = (BreakIterator)proto.Clone();
+            wordBreaker = BreakIterator.CreateWordInstance(Locale.GetUS());
             termAtt = AddAttribute<ICharTermAttribute>();
             offsetAtt = AddAttribute<IOffsetAttribute>();
         }
@@ -86,7 +82,7 @@ namespace Lucene.Net.Analysis.Th
             this.sentenceStart = sentenceStart;
             this.sentenceEnd = sentenceEnd;
             wrapper.SetText(buffer, sentenceStart, sentenceEnd - sentenceStart);
-            wordBreaker.SetText(new string(wrapper.Text));
+            wordBreaker.SetText(new string(wrapper.Text, wrapper.Start, wrapper.Length));
         }
 
         protected internal override bool IncrementWord()

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/edde0fba/src/Lucene.Net.Analysis.Common/Analysis/Th/ThaiWordFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Th/ThaiWordFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Th/ThaiWordFilter.cs
index b0a23a0..9864b7c 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Th/ThaiWordFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Th/ThaiWordFilter.cs
@@ -1,9 +1,10 @@
 \ufeffusing ICU4NET;
-using System;
-using Lucene.Net.Analysis.Util;
+using ICU4NETExtension;
+using Lucene.Net.Analysis.Core;
 using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Analysis.Util;
 using Lucene.Net.Util;
-using Lucene.Net.Analysis.Core;
+using System;
 using System.Text.RegularExpressions;
 
 namespace Lucene.Net.Analysis.Th
@@ -48,8 +49,7 @@ namespace Lucene.Net.Analysis.Th
         /// If this is false, this filter will not work at all!
         /// </summary>
         public static readonly bool DBBI_AVAILABLE = ThaiTokenizer.DBBI_AVAILABLE;
-        private static readonly BreakIterator proto = BreakIterator.CreateWordInstance(new Locale());    //.getWordInstance(new Locale("th"));
-        private readonly BreakIterator breaker = (BreakIterator)proto.Clone();
+        private readonly BreakIterator breaker = BreakIterator.CreateWordInstance(new Locale());
         private readonly CharArrayIterator charIterator = CharArrayIterator.NewWordInstance();
 
         private readonly bool handlePosIncr;
@@ -111,7 +111,7 @@ namespace Lucene.Net.Analysis.Th
                 return false;
             }
 
-            if (termAtt.Length == 0 || Regex.IsMatch(termAtt.ToString().Substring(0, 1), @"\p{IsThai}"))
+            if (termAtt.Length == 0 || !Regex.IsMatch(termAtt.ToString().Substring(0, 1), @"\p{IsThai}"))
             {
                 return true;
             }
@@ -136,7 +136,7 @@ namespace Lucene.Net.Analysis.Th
 
             // reinit CharacterIterator
             charIterator.SetText(clonedTermAtt.Buffer(), 0, clonedTermAtt.Length);
-            breaker.SetText(new string(charIterator.Text));
+            breaker.SetText(new string(charIterator.Text, charIterator.Start, charIterator.Length));
             int end2 = breaker.Next();
             if (end2 != BreakIterator.DONE)
             {

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/edde0fba/src/Lucene.Net.Analysis.Common/Analysis/Util/SegmentingTokenizerBase.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/SegmentingTokenizerBase.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/SegmentingTokenizerBase.cs
index c4c4643..73944af 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Util/SegmentingTokenizerBase.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/SegmentingTokenizerBase.cs
@@ -1,14 +1,11 @@
-\ufeffusing System;
+\ufeffusing ICU4NET;
+using Lucene.Net.Analysis.Tokenattributes;
+using System;
 using System.Diagnostics;
 using System.IO;
-using ICU4NET;
-using Lucene.Net.Analysis.Tokenattributes;
-using Reader = System.IO.TextReader;
-using Version = Lucene.Net.Util.LuceneVersion;
 
 namespace Lucene.Net.Analysis.Util
 {
-
     /*
      * Licensed to the Apache Software Foundation (ASF) under one or more
      * contributor license agreements.  See the NOTICE file distributed with
@@ -70,7 +67,7 @@ namespace Lucene.Net.Analysis.Util
         /// be provided to this constructor.
         /// </para>
         /// </summary>
-        protected SegmentingTokenizerBase(Reader reader, BreakIterator iterator)
+        protected SegmentingTokenizerBase(TextReader reader, BreakIterator iterator)
             : this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, reader, iterator)
         {
         }
@@ -78,7 +75,7 @@ namespace Lucene.Net.Analysis.Util
         /// <summary>
         /// Construct a new SegmenterBase, also supplying the AttributeFactory
         /// </summary>
-        protected SegmentingTokenizerBase(AttributeFactory factory, Reader reader, BreakIterator iterator)
+        protected SegmentingTokenizerBase(AttributeFactory factory, TextReader reader, BreakIterator iterator)
             : base(factory, reader)
         {
             offsetAtt = AddAttribute<IOffsetAttribute>();
@@ -106,7 +103,7 @@ namespace Lucene.Net.Analysis.Util
         {
             base.Reset();
             wrapper.SetText(buffer, 0, 0);
-            iterator.SetText(new string(wrapper.Text));
+            iterator.SetText(new string(wrapper.Text, wrapper.Start, wrapper.Length));
             length = usableLength = offset = 0;
         }
 
@@ -177,7 +174,7 @@ namespace Lucene.Net.Analysis.Util
             }
 
             wrapper.SetText(buffer, 0, Math.Max(0, usableLength));
-            iterator.SetText(new string(wrapper.Text, 0, Math.Max(0, usableLength)));
+            iterator.SetText(new string(wrapper.Text, wrapper.Start, wrapper.Length));
         }
 
         // TODO: refactor to a shared readFully somewhere

[04/22] lucenenet git commit: Fixed inconsistences between namepaces and license headers in Analysis.Util.

Posted by sy...@apache.org.

Fixed inconsistences between namepaces and license headers in Analysis.Util.


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/753dd0b6
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/753dd0b6
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/753dd0b6

Branch: refs/heads/analysis-work
Commit: 753dd0b62c3b870025b3a91446b1adc1fe6f19ba
Parents: e3bbea4
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Wed Aug 24 18:32:41 2016 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Wed Aug 24 18:32:41 2016 +0700

----------------------------------------------------------------------
 .../Analysis/Util/TestCharArrayIterator.cs      |  4 +--
 .../Analysis/Util/TestCharArrayMap.cs           | 36 ++++++++++----------
 .../Analysis/Util/TestCharArraySet.cs           |  3 +-
 .../Analysis/Util/TestCharTokenizers.cs         |  4 +--
 .../Analysis/Util/TestCharacterUtils.cs         |  4 +--
 .../Analysis/Util/TestRollingCharBuffer.cs      |  3 +-
 .../Util/TestSegmentingTokenizerBase.cs         |  4 +--
 .../Analysis/Util/TestWordlistLoader.cs         |  2 --
 8 files changed, 28 insertions(+), 32 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/753dd0b6/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestCharArrayIterator.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestCharArrayIterator.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestCharArrayIterator.cs
index 7978d67..1cff55d 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestCharArrayIterator.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestCharArrayIterator.cs
@@ -5,9 +5,8 @@ using Lucene.Net.Util;
 using NUnit.Framework;
 using CharacterIterator = Lucene.Net.Support.CharacterIterator;
 
-namespace Lucene.Net.Tests.Analysis.Common.Analysis.Util
+namespace Lucene.Net.Analysis.Util
 {
-
     /*
      * Licensed to the Apache Software Foundation (ASF) under one or more
      * contributor license agreements.  See the NOTICE file distributed with
@@ -24,6 +23,7 @@ namespace Lucene.Net.Tests.Analysis.Common.Analysis.Util
      * See the License for the specific language governing permissions and
      * limitations under the License.
      */
+
     [TestFixture]
     public class TestCharArrayIterator : LuceneTestCase
     {

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/753dd0b6/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestCharArrayMap.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestCharArrayMap.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestCharArrayMap.cs
index fa14fb9..e2abe3d 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestCharArrayMap.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestCharArrayMap.cs
@@ -5,25 +5,25 @@ using NUnit.Framework;
 using System.Collections.Generic;
 using System.Text;
 
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-namespace Lucene.Net.Tests.Analysis.Common.Analysis.Util
+namespace Lucene.Net.Analysis.Util
 {
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
     [TestFixture]
     public class TestCharArrayMap_ : LuceneTestCase
     {

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/753dd0b6/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestCharArraySet.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestCharArraySet.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestCharArraySet.cs
index 97ae480..2615e78 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestCharArraySet.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestCharArraySet.cs
@@ -7,9 +7,8 @@ using System.Collections.Generic;
 using System.Text;
 using Version = Lucene.Net.Util.LuceneVersion;
 
-namespace Lucene.Net.Tests.Analysis.Common.Analysis.Util
+namespace Lucene.Net.Analysis.Util
 {
-
     /*
 	 * Licensed to the Apache Software Foundation (ASF) under one or more
 	 * contributor license agreements.  See the NOTICE file distributed with

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/753dd0b6/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestCharTokenizers.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestCharTokenizers.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestCharTokenizers.cs
index 40ae0bb..7c68fbc 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestCharTokenizers.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestCharTokenizers.cs
@@ -9,9 +9,8 @@ using Lucene.Net.Support;
 using Lucene.Net.Util;
 using NUnit.Framework;
 
-namespace Lucene.Net.Tests.Analysis.Common.Analysis.Util
+namespace Lucene.Net.Analysis.Util
 {
-
     /*
      * Licensed to the Apache Software Foundation (ASF) under one or more
      * contributor license agreements.  See the NOTICE file distributed with
@@ -28,6 +27,7 @@ namespace Lucene.Net.Tests.Analysis.Common.Analysis.Util
      * See the License for the specific language governing permissions and
      * limitations under the License.
      */
+
     /// <summary>
     /// Testcase for <seealso cref="CharTokenizer"/> subclasses
     /// </summary>

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/753dd0b6/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestCharacterUtils.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestCharacterUtils.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestCharacterUtils.cs
index 7b94e9d..0f4e14a 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestCharacterUtils.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestCharacterUtils.cs
@@ -5,9 +5,8 @@ using Lucene.Net.Support;
 using Lucene.Net.Util;
 using NUnit.Framework;
 
-namespace Lucene.Net.Tests.Analysis.Common.Analysis.Util
+namespace Lucene.Net.Analysis.Util
 {
-
     /*
      * Licensed to the Apache Software Foundation (ASF) under one or more
      * contributor license agreements.  See the NOTICE file distributed with
@@ -24,6 +23,7 @@ namespace Lucene.Net.Tests.Analysis.Common.Analysis.Util
      * See the License for the specific language governing permissions and
      * limitations under the License.
      */
+
     /// <summary>
     /// TestCase for the <seealso cref="CharacterUtils"/> class.
     /// </summary>

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/753dd0b6/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestRollingCharBuffer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestRollingCharBuffer.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestRollingCharBuffer.cs
index 2cf089e..95d8b2c 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestRollingCharBuffer.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestRollingCharBuffer.cs
@@ -5,9 +5,8 @@ using NUnit.Framework;
 using System;
 using System.IO;
 
-namespace Lucene.Net.Tests.Analysis.Common.Analysis.Util
+namespace Lucene.Net.Analysis.Util
 {
-
     /*
      * Licensed to the Apache Software Foundation (ASF) under one or more
      * contributor license agreements.  See the NOTICE file distributed with

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/753dd0b6/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestSegmentingTokenizerBase.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestSegmentingTokenizerBase.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestSegmentingTokenizerBase.cs
index d648619..272ebe0 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestSegmentingTokenizerBase.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestSegmentingTokenizerBase.cs
@@ -7,9 +7,8 @@ using NUnit.Framework;
 using System.IO;
 using System.Text;
 
-namespace Lucene.Net.Tests.Analysis.Common.Analysis.Util
+namespace Lucene.Net.Analysis.Util
 {
-
     /*
      * Licensed to the Apache Software Foundation (ASF) under one or more
      * contributor license agreements.  See the NOTICE file distributed with
@@ -26,6 +25,7 @@ namespace Lucene.Net.Tests.Analysis.Common.Analysis.Util
      * See the License for the specific language governing permissions and
      * limitations under the License.
      */
+
     /// <summary>
     /// Basic tests for <seealso cref="SegmentingTokenizerBase"/> </summary>
     [TestFixture]

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/753dd0b6/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestWordlistLoader.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestWordlistLoader.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestWordlistLoader.cs
index b8131ed..9df94b0 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestWordlistLoader.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestWordlistLoader.cs
@@ -6,7 +6,6 @@ using System.Text;
 
 namespace Lucene.Net.Analysis.Util
 {
-
     /*
      * Licensed to the Apache Software Foundation (ASF) under one or more
      * contributor license agreements.  See the NOTICE file distributed with
@@ -24,7 +23,6 @@ namespace Lucene.Net.Analysis.Util
      * limitations under the License.
      */
 
-
     public class TestWordlistLoader : LuceneTestCase
     {

[22/22] lucenenet git commit: Ported Analysis.Miscellaneous.PatternAnalyzer + tests

Posted by sy...@apache.org.

Ported Analysis.Miscellaneous.PatternAnalyzer + tests


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/7f877fdf
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/7f877fdf
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/7f877fdf

Branch: refs/heads/analysis-work
Commit: 7f877fdfc2ba25a7c1b0386795b4f83b46f50767
Parents: 8a05b16
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Sun Aug 28 00:11:18 2016 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Sun Aug 28 11:24:43 2016 +0700

----------------------------------------------------------------------
 .../Analysis/Miscellaneous/PatternAnalyzer.cs   | 159 +++++++---
 .../Lucene.Net.Analysis.Common.csproj           |   1 +
 .../Miscellaneous/PatternAnalyzerTest.cs        | 317 +++++++------------
 .../Lucene.Net.Tests.Analysis.Common.csproj     |   1 +
 4 files changed, 236 insertions(+), 242 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/7f877fdf/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternAnalyzer.cs
index 6c28927..933e714 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternAnalyzer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternAnalyzer.cs
@@ -1,13 +1,14 @@
-\ufeffusing System;
-using System.IO;
-using Lucene.Net.Analysis.Core;
+\ufeffusing Lucene.Net.Analysis.Core;
 using Lucene.Net.Analysis.Tokenattributes;
 using Lucene.Net.Analysis.Util;
+using Lucene.Net.Support;
 using Lucene.Net.Util;
+using System;
+using System.IO;
+using System.Text.RegularExpressions;
 
 namespace Lucene.Net.Analysis.Miscellaneous
 {
-
     /*
      * Licensed to the Apache Software Foundation (ASF) under one or more
      * contributor license agreements.  See the NOTICE file distributed with
@@ -24,6 +25,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
      * See the License for the specific language governing permissions and
      * limitations under the License.
      */
+
     /// <summary>
     /// Efficient Lucene analyzer/tokenizer that preferably operates on a String rather than a
     /// <seealso cref="TextReader"/>, that can flexibly separate text into terms via a regular expression <seealso cref="Pattern"/>
@@ -61,19 +63,65 @@ namespace Lucene.Net.Analysis.Miscellaneous
 
         /// <summary>
         /// <code>"\\W+"</code>; Divides text at non-letters (NOT Character.isLetter(c)) </summary>
-        public static readonly Pattern NON_WORD_PATTERN = Pattern.compile("\\W+");
+        public static readonly Regex NON_WORD_PATTERN = new Regex("\\W+", RegexOptions.Compiled);
 
         /// <summary>
         /// <code>"\\s+"</code>; Divides text at whitespaces (Character.isWhitespace(c)) </summary>
-        public static readonly Pattern WHITESPACE_PATTERN = Pattern.compile("\\s+");
-
-        private static readonly CharArraySet EXTENDED_ENGLISH_STOP_WORDS = CharArraySet.UnmodifiableSet(new CharArraySet(LuceneVersion.LUCENE_CURRENT, Arrays.asList("a", "about", "above", "across", "adj", "after", "afterwards", "again", "against", "albeit", "all", "almost", "alone", "along", "already", "also", "although", "always", "among", "amongst", "an", "and", "another", "any", "anyhow", "anyone", "anything", "anywhere", "are", "around", "as", "at", "be", "became", "because", "become", "becomes", "becoming", "been", "before", "beforehand", "behind", "being", "below", "beside", "besides", "between", "beyond", "both", "but", "by", "can", "cannot", "co", "could", "down", "during", "each", "eg", "either", "else", "elsewhere", "enough", "etc", "even", "ever", "every", "everyone", "everything", "everywhere", "except", "few", "first", "for", "former", "formerly", "from", "further", "had", "has", "have", "he", "hence", "her", "here", "hereafter", "hereby", "herein", "hereupon", "hers", 
 "herself", "him", "himself", "his", "how", "however", "i", "ie", "if", "in", "inc", "indeed", "into", "is", "it", "its", "itself", "last", "latter", "latterly", "least", "less", "ltd", "many", "may", "me", "meanwhile", "might", "more", "moreover", "most", "mostly", "much", "must", "my", "myself", "namely", "neither", "never", "nevertheless", "next", "no", "nobody", "none", "noone", "nor", "not", "nothing", "now", "nowhere", "of", "off", "often", "on", "once one", "only", "onto", "or", "other", "others", "otherwise", "our", "ours", "ourselves", "out", "over", "own", "per", "perhaps", "rather", "s", "same", "seem", "seemed", "seeming", "seems", "several", "she", "should", "since", "so", "some", "somehow", "someone", "something", "sometime", "sometimes", "somewhere", "still", "such", "t", "than", "that", "the", "their", "them", "themselves", "then", "thence", "there", "thereafter", "thereby", "therefor", "therein", "thereupon", "these", "they", "this", "those", "though", "through", "th
 roughout", "thru", "thus", "to", "together", "too", "toward", "towards", "under", "until", "up", "upon", "us", "very", "via", "was", "we", "well", "were", "what", "whatever", "whatsoever", "when", "whence", "whenever", "whensoever", "where", "whereafter", "whereas", "whereat", "whereby", "wherefrom", "wherein", "whereinto", "whereof", "whereon", "whereto", "whereunto", "whereupon", "wherever", "wherewith", "whether", "which", "whichever", "whichsoever", "while", "whilst", "whither", "who", "whoever", "whole", "whom", "whomever", "whomsoever", "whose", "whosoever", "why", "will", "with", "within", "without", "would", "xsubj", "xcal", "xauthor", "xother ", "xnote", "yet", "you", "your", "yours", "yourself", "yourselves"), true));
+        public static readonly Regex WHITESPACE_PATTERN = new Regex("\\s+", RegexOptions.Compiled);
+
+        private static readonly CharArraySet EXTENDED_ENGLISH_STOP_WORDS = 
+            CharArraySet.UnmodifiableSet(new CharArraySet(LuceneVersion.LUCENE_CURRENT, 
+                Arrays.AsList(
+                    "a", "about", "above", "across", "adj", "after", "afterwards",
+                    "again", "against", "albeit", "all", "almost", "alone", "along",
+                    "already", "also", "although", "always", "among", "amongst", "an",
+                    "and", "another", "any", "anyhow", "anyone", "anything",
+                    "anywhere", "are", "around", "as", "at", "be", "became", "because",
+                    "become", "becomes", "becoming", "been", "before", "beforehand",
+                    "behind", "being", "below", "beside", "besides", "between",
+                    "beyond", "both", "but", "by", "can", "cannot", "co", "could",
+                    "down", "during", "each", "eg", "either", "else", "elsewhere",
+                    "enough", "etc", "even", "ever", "every", "everyone", "everything",
+                    "everywhere", "except", "few", "first", "for", "former",
+                    "formerly", "from", "further", "had", "has", "have", "he", "hence",
+                    "her", "here", "hereafter", "hereby", "herein", "hereupon", "hers",
+                    "herself", "him", "himself", "his", "how", "however", "i", "ie", "if",
+                    "in", "inc", "indeed", "into", "is", "it", "its", "itself", "last",
+                    "latter", "latterly", "least", "less", "ltd", "many", "may", "me",
+                    "meanwhile", "might", "more", "moreover", "most", "mostly", "much",
+                    "must", "my", "myself", "namely", "neither", "never",
+                    "nevertheless", "next", "no", "nobody", "none", "noone", "nor",
+                    "not", "nothing", "now", "nowhere", "of", "off", "often", "on",
+                    "once one", "only", "onto", "or", "other", "others", "otherwise",
+                    "our", "ours", "ourselves", "out", "over", "own", "per", "perhaps",
+                    "rather", "s", "same", "seem", "seemed", "seeming", "seems",
+                    "several", "she", "should", "since", "so", "some", "somehow",
+                    "someone", "something", "sometime", "sometimes", "somewhere",
+                    "still", "such", "t", "than", "that", "the", "their", "them",
+                    "themselves", "then", "thence", "there", "thereafter", "thereby",
+                    "therefor", "therein", "thereupon", "these", "they", "this",
+                    "those", "though", "through", "throughout", "thru", "thus", "to",
+                    "together", "too", "toward", "towards", "under", "until", "up",
+                    "upon", "us", "very", "via", "was", "we", "well", "were", "what",
+                    "whatever", "whatsoever", "when", "whence", "whenever",
+                    "whensoever", "where", "whereafter", "whereas", "whereat",
+                    "whereby", "wherefrom", "wherein", "whereinto", "whereof",
+                    "whereon", "whereto", "whereunto", "whereupon", "wherever",
+                    "wherewith", "whether", "which", "whichever", "whichsoever",
+                    "while", "whilst", "whither", "who", "whoever", "whole", "whom",
+                    "whomever", "whomsoever", "whose", "whosoever", "why", "will",
+                    "with", "within", "without", "would", "xsubj", "xcal", "xauthor",
+                    "xother ", "xnote", "yet", "you", "your", "yours", "yourself",
+                    "yourselves"
+                
+                    ), true));
 
         /// <summary>
         /// A lower-casing word analyzer with English stop words (can be shared
         /// freely across threads without harm); global per class loader.
         /// </summary>
-        public static readonly PatternAnalyzer DEFAULT_ANALYZER = new PatternAnalyzer(LuceneVersion.LUCENE_CURRENT, NON_WORD_PATTERN, true, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
+        public static readonly PatternAnalyzer DEFAULT_ANALYZER = new PatternAnalyzer(
+            LuceneVersion.LUCENE_CURRENT, NON_WORD_PATTERN, true, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
 
         /// <summary>
         /// A lower-casing word analyzer with <b>extended </b> English stop words
@@ -82,9 +130,10 @@ namespace Lucene.Net.Analysis.Miscellaneous
         /// http://thomas.loc.gov/home/stopwords.html, see
         /// http://thomas.loc.gov/home/all.about.inquery.html
         /// </summary>
-        public static readonly PatternAnalyzer EXTENDED_ANALYZER = new PatternAnalyzer(LuceneVersion.LUCENE_CURRENT, NON_WORD_PATTERN, true, EXTENDED_ENGLISH_STOP_WORDS);
+        public static readonly PatternAnalyzer EXTENDED_ANALYZER = new PatternAnalyzer(
+            LuceneVersion.LUCENE_CURRENT, NON_WORD_PATTERN, true, EXTENDED_ENGLISH_STOP_WORDS);
 
-        private readonly Pattern pattern;
+        private readonly Regex pattern;
         private readonly bool toLowerCase;
         private readonly CharArraySet stopWords;
 
@@ -108,23 +157,23 @@ namespace Lucene.Net.Analysis.Miscellaneous
         ///            <code>WordlistLoader.getWordSet(new File("samples/fulltext/stopwords.txt")</code>
         ///            or <a href="http://www.unine.ch/info/clef/">other stop words
         ///            lists </a>. </param>
-        public PatternAnalyzer(LuceneVersion matchVersion, Pattern pattern, bool toLowerCase, CharArraySet stopWords)
+        public PatternAnalyzer(LuceneVersion matchVersion, Regex pattern, bool toLowerCase, CharArraySet stopWords)
         {
             if (pattern == null)
             {
                 throw new System.ArgumentException("pattern must not be null");
             }
 
-            if (eqPattern(NON_WORD_PATTERN, pattern))
+            if (EqPattern(NON_WORD_PATTERN, pattern))
             {
                 pattern = NON_WORD_PATTERN;
             }
-            else if (eqPattern(WHITESPACE_PATTERN, pattern))
+            else if (EqPattern(WHITESPACE_PATTERN, pattern))
             {
                 pattern = WHITESPACE_PATTERN;
             }
 
-            if (stopWords != null && stopWords.Size == 0)
+            if (stopWords != null && stopWords.Count == 0)
             {
                 stopWords = null;
             }
@@ -146,7 +195,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
         /// <param name="text">
         ///            the string to tokenize </param>
         /// <returns> a new token stream </returns>
-        public TokenStreamComponents createComponents(string fieldName, TextReader reader, string text)
+        public TokenStreamComponents CreateComponents(string fieldName, TextReader reader, string text)
         {
             // Ideally the Analyzer superclass should have a method with the same signature, 
             // with a default impl that simply delegates to the StringReader flavour. 
@@ -165,7 +214,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
             }
 
             Tokenizer tokenizer = new PatternTokenizer(reader, pattern, toLowerCase);
-            TokenStream result = (stopWords != null) ? new StopFilter(matchVersion, tokenizer, stopWords) : tokenizer;
+            TokenStream result = (stopWords != null) ? (TokenStream)new StopFilter(matchVersion, tokenizer, stopWords) : tokenizer;
             return new TokenStreamComponents(tokenizer, result);
         }
 
@@ -181,7 +230,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
         /// <returns> a new token stream </returns>
         public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
         {
-            return createComponents(fieldName, reader, null);
+            return CreateComponents(fieldName, reader, null);
         }
 
         /// <summary>
@@ -208,7 +257,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
             var p2 = other as PatternAnalyzer;
             if (p2 != null)
             {
-                return toLowerCase == p2.toLowerCase && eqPattern(pattern, p2.pattern) && eq(stopWords, p2.stopWords);
+                return toLowerCase == p2.toLowerCase && EqPattern(pattern, p2.pattern) && Eq(stopWords, p2.stopWords);
             }
             return false;
         }
@@ -229,8 +278,8 @@ namespace Lucene.Net.Analysis.Miscellaneous
             }
 
             int h = 1;
-            h = 31 * h + pattern.pattern().GetHashCode();
-            h = 31 * h + pattern.flags();
+            h = 31 * h + pattern.ToString().GetHashCode();
+            h = 31 * h + (int)pattern.Options;
             h = 31 * h + (toLowerCase ? 1231 : 1237);
             h = 31 * h + (stopWords != null ? stopWords.GetHashCode() : 0);
             return h;
@@ -238,16 +287,16 @@ namespace Lucene.Net.Analysis.Miscellaneous
 
         /// <summary>
         /// equality where o1 and/or o2 can be null </summary>
-        private static bool eq(object o1, object o2)
+        private static bool Eq(object o1, object o2)
         {
             return (o1 == o2) || (o1 != null ? o1.Equals(o2) : false);
         }
 
         /// <summary>
         /// assumes p1 and p2 are not null </summary>
-        private static bool eqPattern(Pattern p1, Pattern p2)
+        private static bool EqPattern(Regex p1, Regex p2)
         {
-            return p1 == p2 || (p1.flags() == p2.flags() && p1.pattern().Equals(p2.pattern()));
+            return p1 == p2 || (p1.Options == p2.Options && p1.ToString().Equals(p2.ToString()));
         }
 
         /// <summary>
@@ -271,7 +320,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
 
                 len = 0;
                 int n;
-                while ((n = input.Read(buffer)) >= 0)
+                while ((n = input.Read(buffer, 0, buffer.Length)) > 0)
                 {
                     if (len + n > output.Length) // grow capacity
                     {
@@ -306,23 +355,23 @@ namespace Lucene.Net.Analysis.Miscellaneous
         /// </summary>
         private sealed class PatternTokenizer : Tokenizer
         {
-            private readonly Pattern pattern;
+            private readonly Regex pattern;
             private string str;
             private readonly bool toLowerCase;
-            private Matcher matcher;
+            private Match matcher;
             private int pos = 0;
             private bool initialized = false;
-            private static readonly Locale locale = Locale.Default;
+            private bool isReset = false; // Flag to keep track of the first match vs subsequent matches
             private readonly ICharTermAttribute termAtt;
             private readonly IOffsetAttribute offsetAtt;
 
-            public PatternTokenizer(TextReader input, Pattern pattern, bool toLowerCase)
+            public PatternTokenizer(TextReader input, Regex pattern, bool toLowerCase)
                 : base(input)
             {
                 termAtt = AddAttribute<ICharTermAttribute>();
                 offsetAtt = AddAttribute<IOffsetAttribute>();
                 this.pattern = pattern;
-                this.matcher = pattern.matcher("");
+                this.matcher = pattern.Match("");
                 this.toLowerCase = toLowerCase;
             }
 
@@ -340,28 +389,33 @@ namespace Lucene.Net.Analysis.Miscellaneous
                 while (true) // loop takes care of leading and trailing boundary cases
                 {
                     int start = pos;
-                    int end_Renamed;
-                    bool isMatch = matcher.find();
+                    int end;
+                    if (!isReset)
+                    {
+                        matcher = matcher.NextMatch();
+                    }
+                    isReset = false;
+                    bool isMatch = matcher.Success;
                     if (isMatch)
                     {
-                        end_Renamed = matcher.start();
-                        pos = matcher.end();
+                        end = matcher.Index;
+                        pos = matcher.Index + matcher.Length;
                     }
                     else
                     {
-                        end_Renamed = str.Length;
+                        end = str.Length;
                         matcher = null; // we're finished
                     }
 
-                    if (start != end_Renamed) // non-empty match (header/trailer)
+                    if (start != end) // non-empty match (header/trailer)
                     {
-                        string text = str.Substring(start, end_Renamed - start);
+                        string text = str.Substring(start, end - start);
                         if (toLowerCase)
                         {
-                            text = text.ToLower(locale);
+                            text = text.ToLower();
                         }
                         termAtt.SetEmpty().Append(text);
-                        offsetAtt.SetOffset(CorrectOffset(start), CorrectOffset(end_Renamed));
+                        offsetAtt.SetOffset(CorrectOffset(start), CorrectOffset(end));
                         return true;
                     }
                     if (!isMatch)
@@ -389,7 +443,18 @@ namespace Lucene.Net.Analysis.Miscellaneous
             {
                 base.Reset();
                 this.str = PatternAnalyzer.ToString(input);
-                this.matcher = pattern.matcher(this.str);
+
+                // LUCENENET: Since we need to "reset" the Match
+                // object, we also need an "isReset" flag to indicate
+                // whether we are at the head of the match and to 
+                // take the appropriate measures to ensure we don't 
+                // overwrite our matcher variable with 
+                // matcher = matcher.NextMatch();
+                // before it is time. A string could potentially
+                // match on index 0, so we need another variable to
+                // manage this state.
+                this.matcher = pattern.Match(this.str);
+                this.isReset = true;
                 this.pos = 0;
                 this.initialized = true;
             }
@@ -410,7 +475,6 @@ namespace Lucene.Net.Analysis.Miscellaneous
             private readonly bool isLetter;
             private readonly bool toLowerCase;
             private readonly CharArraySet stopWords;
-            private static readonly Locale locale = Locale.Default;
             private readonly ICharTermAttribute termAtt;
             private readonly IOffsetAttribute offsetAtt;
 
@@ -444,7 +508,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
                 {
                     // find beginning of token
                     text = null;
-                    while (i < len && !isTokenChar(s[i], letter))
+                    while (i < len && !IsTokenChar(s[i], letter))
                     {
                         i++;
                     }
@@ -452,7 +516,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
                     if (i < len) // found beginning; now find end of token
                     {
                         start = i;
-                        while (i < len && isTokenChar(s[i], letter))
+                        while (i < len && IsTokenChar(s[i], letter))
                         {
                             i++;
                         }
@@ -460,7 +524,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
                         text = s.Substring(start, i - start);
                         if (toLowerCase)
                         {
-                            text = text.ToLower(locale);
+                            text = text.ToLower();
                         }
                         //          if (toLowerCase) {            
                         ////            use next line once JDK 1.5 String.toLowerCase() performance regression is fixed
@@ -473,7 +537,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
                         //            text = s.substring(start, i);
                         //          }
                     }
-                } while (text != null && isStopWord(text));
+                } while (text != null && IsStopWord(text));
 
                 pos = i;
                 if (text == null)
@@ -493,12 +557,12 @@ namespace Lucene.Net.Analysis.Miscellaneous
                 this.offsetAtt.SetOffset(CorrectOffset(finalOffset), CorrectOffset(finalOffset));
             }
 
-            private bool isTokenChar(char c, bool isLetter)
+            private bool IsTokenChar(char c, bool isLetter)
             {
                 return isLetter ? char.IsLetter(c) : !char.IsWhiteSpace(c);
             }
 
-            private bool isStopWord(string text)
+            private bool IsStopWord(string text)
             {
                 return stopWords != null && stopWords.Contains(text);
             }
@@ -544,6 +608,5 @@ namespace Lucene.Net.Analysis.Miscellaneous
                 }
             }
         }
-
     }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/7f877fdf/src/Lucene.Net.Analysis.Common/Lucene.Net.Analysis.Common.csproj
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Lucene.Net.Analysis.Common.csproj b/src/Lucene.Net.Analysis.Common/Lucene.Net.Analysis.Common.csproj
index 0679473..475338b 100644
--- a/src/Lucene.Net.Analysis.Common/Lucene.Net.Analysis.Common.csproj
+++ b/src/Lucene.Net.Analysis.Common/Lucene.Net.Analysis.Common.csproj
@@ -256,6 +256,7 @@
     <Compile Include="Analysis\Miscellaneous\LimitTokenPositionFilter.cs" />
     <Compile Include="Analysis\Miscellaneous\LimitTokenPositionFilterFactory.cs" />
     <Compile Include="Analysis\Miscellaneous\Lucene47WordDelimiterFilter.cs" />
+    <Compile Include="Analysis\Miscellaneous\PatternAnalyzer.cs" />
     <Compile Include="Analysis\Miscellaneous\PatternKeywordMarkerFilter.cs" />
     <Compile Include="Analysis\Miscellaneous\PerFieldAnalyzerWrapper.cs" />
     <Compile Include="Analysis\Miscellaneous\PrefixAndSuffixAwareTokenFilter.cs" />

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/7f877fdf/src/Lucene.Net.Tests.Analysis.Common/Analysis/Miscellaneous/PatternAnalyzerTest.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Miscellaneous/PatternAnalyzerTest.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Miscellaneous/PatternAnalyzerTest.cs
index de1db38..85a9632 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Miscellaneous/PatternAnalyzerTest.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Miscellaneous/PatternAnalyzerTest.cs
@@ -1,11 +1,13 @@
-\ufeffusing System;
+\ufeffusing Lucene.Net.Analysis.Core;
+using Lucene.Net.Support;
+using NUnit.Framework;
+using System.IO;
 using System.Text;
-using System.Threading;
+using System.Text.RegularExpressions;
 
-namespace org.apache.lucene.analysis.miscellaneous
+namespace Lucene.Net.Analysis.Miscellaneous
 {
-
-	/*
+    /*
 	 * Licensed to the Apache Software Foundation (ASF) under one or more
 	 * contributor license agreements.  See the NOTICE file distributed with
 	 * this work for additional information regarding copyright ownership.
@@ -22,193 +24,120 @@ namespace org.apache.lucene.analysis.miscellaneous
 	 * limitations under the License.
 	 */
 
-	using UncaughtExceptionHandler = Thread.UncaughtExceptionHandler;
-
-	using StopAnalyzer = org.apache.lucene.analysis.core.StopAnalyzer;
-
-	/// <summary>
-	/// Verifies the behavior of PatternAnalyzer.
-	/// </summary>
-	public class PatternAnalyzerTest : BaseTokenStreamTestCase
-	{
-
-	  /// <summary>
-	  /// Test PatternAnalyzer when it is configured with a non-word pattern.
-	  /// Behavior can be similar to SimpleAnalyzer (depending upon options)
-	  /// </summary>
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: public void testNonWordPattern() throws java.io.IOException
-	  public virtual void testNonWordPattern()
-	  {
-		// Split on non-letter pattern, do not lowercase, no stopwords
-		PatternAnalyzer a = new PatternAnalyzer(TEST_VERSION_CURRENT, PatternAnalyzer.NON_WORD_PATTERN, false, null);
-		check(a, "The quick brown Fox,the abcd1234 (56.78) dc.", new string[] {"The", "quick", "brown", "Fox", "the", "abcd", "dc"});
-
-		// split on non-letter pattern, lowercase, english stopwords
-		PatternAnalyzer b = new PatternAnalyzer(TEST_VERSION_CURRENT, PatternAnalyzer.NON_WORD_PATTERN, true, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
-		check(b, "The quick brown Fox,the abcd1234 (56.78) dc.", new string[] {"quick", "brown", "fox", "abcd", "dc"});
-	  }
-
-	  /// <summary>
-	  /// Test PatternAnalyzer when it is configured with a whitespace pattern.
-	  /// Behavior can be similar to WhitespaceAnalyzer (depending upon options)
-	  /// </summary>
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: public void testWhitespacePattern() throws java.io.IOException
-	  public virtual void testWhitespacePattern()
-	  {
-		// Split on whitespace patterns, do not lowercase, no stopwords
-		PatternAnalyzer a = new PatternAnalyzer(TEST_VERSION_CURRENT, PatternAnalyzer.WHITESPACE_PATTERN, false, null);
-		check(a, "The quick brown Fox,the abcd1234 (56.78) dc.", new string[] {"The", "quick", "brown", "Fox,the", "abcd1234", "(56.78)", "dc."});
-
-		// Split on whitespace patterns, lowercase, english stopwords
-		PatternAnalyzer b = new PatternAnalyzer(TEST_VERSION_CURRENT, PatternAnalyzer.WHITESPACE_PATTERN, true, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
-		check(b, "The quick brown Fox,the abcd1234 (56.78) dc.", new string[] {"quick", "brown", "fox,the", "abcd1234", "(56.78)", "dc."});
-	  }
-
-	  /// <summary>
-	  /// Test PatternAnalyzer when it is configured with a custom pattern. In this
-	  /// case, text is tokenized on the comma ","
-	  /// </summary>
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: public void testCustomPattern() throws java.io.IOException
-	  public virtual void testCustomPattern()
-	  {
-		// Split on comma, do not lowercase, no stopwords
-		PatternAnalyzer a = new PatternAnalyzer(TEST_VERSION_CURRENT, Pattern.compile(","), false, null);
-		check(a, "Here,Are,some,Comma,separated,words,", new string[] {"Here", "Are", "some", "Comma", "separated", "words"});
-
-		// split on comma, lowercase, english stopwords
-		PatternAnalyzer b = new PatternAnalyzer(TEST_VERSION_CURRENT, Pattern.compile(","), true, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
-		check(b, "Here,Are,some,Comma,separated,words,", new string[] {"here", "some", "comma", "separated", "words"});
-	  }
-
-	  /// <summary>
-	  /// Test PatternAnalyzer against a large document.
-	  /// </summary>
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: public void testHugeDocument() throws java.io.IOException
-	  public virtual void testHugeDocument()
-	  {
-		StringBuilder document = new StringBuilder();
-		// 5000 a's
-		char[] largeWord = new char[5000];
-		Arrays.fill(largeWord, 'a');
-		document.Append(largeWord);
-
-		// a space
-		document.Append(' ');
-
-		// 2000 b's
-		char[] largeWord2 = new char[2000];
-		Arrays.fill(largeWord2, 'b');
-		document.Append(largeWord2);
-
-		// Split on whitespace patterns, do not lowercase, no stopwords
-		PatternAnalyzer a = new PatternAnalyzer(TEST_VERSION_CURRENT, PatternAnalyzer.WHITESPACE_PATTERN, false, null);
-		check(a, document.ToString(), new string[]
-		{
-			new string(largeWord),
-			new string(largeWord2)
-		});
-	  }
-
-	  /// <summary>
-	  /// Verify the analyzer analyzes to the expected contents. For PatternAnalyzer,
-	  /// several methods are verified:
-	  /// <ul>
-	  /// <li>Analysis with a normal Reader
-	  /// <li>Analysis with a FastStringReader
-	  /// <li>Analysis with a String
-	  /// </ul>
-	  /// </summary>
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: private void check(PatternAnalyzer analyzer, String document, String expected[]) throws java.io.IOException
-	  private void check(PatternAnalyzer analyzer, string document, string[] expected)
-	  {
-		// ordinary analysis of a Reader
-		assertAnalyzesTo(analyzer, document, expected);
-
-		// analysis with a "FastStringReader"
-		TokenStream ts = analyzer.tokenStream("dummy", new PatternAnalyzer.FastStringReader(document));
-		assertTokenStreamContents(ts, expected);
-
-		// analysis of a String, uses PatternAnalyzer.tokenStream(String, String)
-		TokenStream ts2 = analyzer.tokenStream("dummy", new StringReader(document));
-		assertTokenStreamContents(ts2, expected);
-	  }
-
-	  /// <summary>
-	  /// blast some random strings through the analyzer </summary>
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: public void testRandomStrings() throws Exception
-	  public virtual void testRandomStrings()
-	  {
-		Analyzer a = new PatternAnalyzer(TEST_VERSION_CURRENT, Pattern.compile(","), true, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
-
-		// dodge jre bug http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=7104012
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final Thread.UncaughtExceptionHandler savedHandler = Thread.getDefaultUncaughtExceptionHandler();
-		UncaughtExceptionHandler savedHandler = Thread.DefaultUncaughtExceptionHandler;
-		Thread.DefaultUncaughtExceptionHandler = new UncaughtExceptionHandlerAnonymousInnerClassHelper(this, savedHandler);
-
-		try
-		{
-		  Thread.DefaultUncaughtExceptionHandler;
-		  checkRandomData(random(), a, 10000 * RANDOM_MULTIPLIER);
-		}
-		catch (System.IndexOutOfRangeException ex)
-		{
-		  assumeTrue("not failing due to jre bug ", !isJREBug7104012(ex));
-		  throw ex; // otherwise rethrow
-		}
-		finally
-		{
-		  Thread.DefaultUncaughtExceptionHandler = savedHandler;
-		}
-	  }
-
-	  private class UncaughtExceptionHandlerAnonymousInnerClassHelper : UncaughtExceptionHandler
-	  {
-		  private readonly PatternAnalyzerTest outerInstance;
-
-		  private UncaughtExceptionHandler savedHandler;
-
-		  public UncaughtExceptionHandlerAnonymousInnerClassHelper(PatternAnalyzerTest outerInstance, UncaughtExceptionHandler savedHandler)
-		  {
-			  this.outerInstance = outerInstance;
-			  this.savedHandler = savedHandler;
-		  }
-
-		  public override void uncaughtException(Thread thread, Exception throwable)
-		  {
-			assumeTrue("not failing due to jre bug ", !isJREBug7104012(throwable));
-			// otherwise its some other bug, pass to default handler
-			savedHandler.uncaughtException(thread, throwable);
-		  }
-	  }
-
-	  internal static bool isJREBug7104012(Exception t)
-	  {
-		if (!(t is System.IndexOutOfRangeException))
-		{
-		  // BaseTokenStreamTestCase now wraps exc in a new RuntimeException:
-		  t = t.InnerException;
-		  if (!(t is System.IndexOutOfRangeException))
-		  {
-			return false;
-		  }
-		}
-		StackTraceElement[] trace = t.StackTrace;
-		foreach (StackTraceElement st in trace)
-		{
-		  if ("java.text.RuleBasedBreakIterator".Equals(st.ClassName) || "sun.util.locale.provider.RuleBasedBreakIterator".Equals(st.ClassName) && "lookupBackwardState".Equals(st.MethodName))
-		  {
-			return true;
-		  }
-		}
-		return false;
-	  }
-	}
-
+    /// <summary>
+    /// Verifies the behavior of PatternAnalyzer.
+    /// </summary>
+    public class PatternAnalyzerTest : BaseTokenStreamTestCase
+    {
+
+        /// <summary>
+        /// Test PatternAnalyzer when it is configured with a non-word pattern.
+        /// Behavior can be similar to SimpleAnalyzer (depending upon options)
+        /// </summary>
+        [Test]
+        public virtual void TestNonWordPattern()
+        {
+            // Split on non-letter pattern, do not lowercase, no stopwords
+            PatternAnalyzer a = new PatternAnalyzer(TEST_VERSION_CURRENT, PatternAnalyzer.NON_WORD_PATTERN, false, null);
+            Check(a, "The quick brown Fox,the abcd1234 (56.78) dc.", new string[] { "The", "quick", "brown", "Fox", "the", "abcd", "dc" });
+
+            // split on non-letter pattern, lowercase, english stopwords
+            PatternAnalyzer b = new PatternAnalyzer(TEST_VERSION_CURRENT, PatternAnalyzer.NON_WORD_PATTERN, true, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
+            Check(b, "The quick brown Fox,the abcd1234 (56.78) dc.", new string[] { "quick", "brown", "fox", "abcd", "dc" });
+        }
+
+        /// <summary>
+        /// Test PatternAnalyzer when it is configured with a whitespace pattern.
+        /// Behavior can be similar to WhitespaceAnalyzer (depending upon options)
+        /// </summary>
+        [Test]
+        public virtual void TestWhitespacePattern()
+        {
+            // Split on whitespace patterns, do not lowercase, no stopwords
+            PatternAnalyzer a = new PatternAnalyzer(TEST_VERSION_CURRENT, PatternAnalyzer.WHITESPACE_PATTERN, false, null);
+            Check(a, "The quick brown Fox,the abcd1234 (56.78) dc.", new string[] { "The", "quick", "brown", "Fox,the", "abcd1234", "(56.78)", "dc." });
+
+            // Split on whitespace patterns, lowercase, english stopwords
+            PatternAnalyzer b = new PatternAnalyzer(TEST_VERSION_CURRENT, PatternAnalyzer.WHITESPACE_PATTERN, true, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
+            Check(b, "The quick brown Fox,the abcd1234 (56.78) dc.", new string[] { "quick", "brown", "fox,the", "abcd1234", "(56.78)", "dc." });
+        }
+
+        /// <summary>
+        /// Test PatternAnalyzer when it is configured with a custom pattern. In this
+        /// case, text is tokenized on the comma ","
+        /// </summary>
+        [Test]
+        public virtual void TestCustomPattern()
+        {
+            // Split on comma, do not lowercase, no stopwords
+            PatternAnalyzer a = new PatternAnalyzer(TEST_VERSION_CURRENT, new Regex(",", RegexOptions.Compiled), false, null);
+            Check(a, "Here,Are,some,Comma,separated,words,", new string[] { "Here", "Are", "some", "Comma", "separated", "words" });
+
+            // split on comma, lowercase, english stopwords
+            PatternAnalyzer b = new PatternAnalyzer(TEST_VERSION_CURRENT, new Regex(",", RegexOptions.Compiled), true, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
+            Check(b, "Here,Are,some,Comma,separated,words,", new string[] { "here", "some", "comma", "separated", "words" });
+        }
+
+        /// <summary>
+        /// Test PatternAnalyzer against a large document.
+        /// </summary>
+        [Test]
+        public virtual void TestHugeDocument()
+        {
+            StringBuilder document = new StringBuilder();
+            // 5000 a's
+            char[] largeWord = new char[5000];
+            Arrays.Fill(largeWord, 'a');
+            document.Append(largeWord);
+
+            // a space
+            document.Append(' ');
+
+            // 2000 b's
+            char[] largeWord2 = new char[2000];
+            Arrays.Fill(largeWord2, 'b');
+            document.Append(largeWord2);
+
+            // Split on whitespace patterns, do not lowercase, no stopwords
+            PatternAnalyzer a = new PatternAnalyzer(TEST_VERSION_CURRENT, PatternAnalyzer.WHITESPACE_PATTERN, false, null);
+            Check(a, document.ToString(), new string[]
+            {
+            new string(largeWord),
+            new string(largeWord2)
+            });
+        }
+
+        /// <summary>
+        /// Verify the analyzer analyzes to the expected contents. For PatternAnalyzer,
+        /// several methods are verified:
+        /// <ul>
+        /// <li>Analysis with a normal Reader
+        /// <li>Analysis with a FastStringReader
+        /// <li>Analysis with a String
+        /// </ul>
+        /// </summary>
+        private void Check(PatternAnalyzer analyzer, string document, string[] expected)
+        {
+            // ordinary analysis of a Reader
+            AssertAnalyzesTo(analyzer, document, expected);
+
+            // analysis with a "FastStringReader"
+            TokenStream ts = analyzer.TokenStream("dummy", new PatternAnalyzer.FastStringReader(document));
+            AssertTokenStreamContents(ts, expected);
+
+            // analysis of a String, uses PatternAnalyzer.tokenStream(String, String)
+            TokenStream ts2 = analyzer.TokenStream("dummy", new StringReader(document));
+            AssertTokenStreamContents(ts2, expected);
+        }
+
+        /// <summary>
+        /// blast some random strings through the analyzer </summary>
+        [Test]
+        public virtual void TestRandomStrings()
+        {
+            Analyzer a = new PatternAnalyzer(TEST_VERSION_CURRENT, new Regex(",", RegexOptions.Compiled), true, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
+
+            CheckRandomData(Random(), a, 10000 * RANDOM_MULTIPLIER);
+        }
+    }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/7f877fdf/src/Lucene.Net.Tests.Analysis.Common/Lucene.Net.Tests.Analysis.Common.csproj
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Lucene.Net.Tests.Analysis.Common.csproj b/src/Lucene.Net.Tests.Analysis.Common/Lucene.Net.Tests.Analysis.Common.csproj
index 6d6c668..029a40f 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Lucene.Net.Tests.Analysis.Common.csproj
+++ b/src/Lucene.Net.Tests.Analysis.Common/Lucene.Net.Tests.Analysis.Common.csproj
@@ -194,6 +194,7 @@
     <Compile Include="Analysis\Lv\TestLatvianAnalyzer.cs" />
     <Compile Include="Analysis\Lv\TestLatvianStemFilterFactory.cs" />
     <Compile Include="Analysis\Lv\TestLatvianStemmer.cs" />
+    <Compile Include="Analysis\Miscellaneous\PatternAnalyzerTest.cs" />
     <Compile Include="Analysis\Miscellaneous\TestASCIIFoldingFilter.cs" />
     <Compile Include="Analysis\Miscellaneous\TestCapitalizationFilter.cs" />
     <Compile Include="Analysis\Miscellaneous\TestCapitalizationFilterFactory.cs" />

[09/22] lucenenet git commit: Fixed casing of method names in SynonymFilter

Posted by sy...@apache.org.

Fixed casing of method names in SynonymFilter


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/ddd93cba
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/ddd93cba
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/ddd93cba

Branch: refs/heads/analysis-work
Commit: ddd93cbadc184a6979d09be79f80309d3b0cf6d7
Parents: edde0fb
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Thu Aug 25 10:05:08 2016 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Thu Aug 25 10:05:08 2016 +0700

----------------------------------------------------------------------
 .../Analysis/Synonym/SynonymFilter.cs           | 42 ++++++++++----------
 1 file changed, 21 insertions(+), 21 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/ddd93cba/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SynonymFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SynonymFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SynonymFilter.cs
index ced9330..d2fb3c8 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SynonymFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SynonymFilter.cs
@@ -168,13 +168,13 @@ namespace Lucene.Net.Analysis.Synonym
                 posLengths = new int[1];
             }
 
-            public virtual void reset()
+            public virtual void Reset()
             {
                 upto = count = 0;
                 posIncr = 1;
             }
 
-            public virtual CharsRef pullNext()
+            public virtual CharsRef PullNext()
             {
                 Debug.Assert(upto < count);
                 lastEndOffset = endOffsets[upto];
@@ -183,7 +183,7 @@ namespace Lucene.Net.Analysis.Synonym
                 posIncr = 0;
                 if (upto == count)
                 {
-                    reset();
+                    Reset();
                 }
                 return result;
             }
@@ -204,7 +204,7 @@ namespace Lucene.Net.Analysis.Synonym
                 }
             }
 
-            public virtual void add(char[] output, int offset, int len, int endOffset, int posLength)
+            public virtual void Add(char[] output, int offset, int len, int endOffset, int posLength)
             {
                 if (count == outputs.Length)
                 {
@@ -303,7 +303,7 @@ namespace Lucene.Net.Analysis.Synonym
             scratchArc = new FST.Arc<BytesRef>();
         }
 
-        private void capture()
+        private void Capture()
         {
             captureCount++;
             //System.out.println("  capture slot=" + nextWrite);
@@ -313,7 +313,7 @@ namespace Lucene.Net.Analysis.Synonym
             input.consumed = false;
             input.term.CopyChars(termAtt.Buffer(), 0, termAtt.Length);
 
-            nextWrite = rollIncr(nextWrite);
+            nextWrite = RollIncr(nextWrite);
 
             // Buffer head should never catch up to tail:
             Debug.Assert(nextWrite != nextRead);
@@ -390,7 +390,7 @@ namespace Lucene.Net.Analysis.Synonym
                             //System.out.println("  new token=" + new String(buffer, 0, bufferLen));
                             if (nextRead != nextWrite)
                             {
-                                capture();
+                                Capture();
                             }
                             else
                             {
@@ -461,18 +461,18 @@ namespace Lucene.Net.Analysis.Synonym
                     pendingOutput = fst.Outputs.Add(pendingOutput, scratchArc.Output);
                     if (nextRead == nextWrite)
                     {
-                        capture();
+                        Capture();
                     }
                 }
 
-                curNextRead = rollIncr(curNextRead);
+                curNextRead = RollIncr(curNextRead);
             }
             byTokenBreak:
 
             if (nextRead == nextWrite && !finished)
             {
                 //System.out.println("  skip write slot=" + nextWrite);
-                nextWrite = rollIncr(nextWrite);
+                nextWrite = RollIncr(nextWrite);
             }
 
             if (matchOutput != null)
@@ -541,11 +541,11 @@ namespace Lucene.Net.Analysis.Synonym
                             endOffset = -1;
                             posLen = 1;
                         }
-                        futureOutputs[outputUpto].add(scratchChars.Chars, lastStart, outputLen, endOffset, posLen);
+                        futureOutputs[outputUpto].Add(scratchChars.Chars, lastStart, outputLen, endOffset, posLen);
                         //System.out.println("      " + new String(scratchChars.chars, lastStart, outputLen) + " outputUpto=" + outputUpto);
                         lastStart = 1 + chIDX;
                         //System.out.println("  slot=" + outputUpto + " keepOrig=" + keepOrig);
-                        outputUpto = rollIncr(outputUpto);
+                        outputUpto = RollIncr(outputUpto);
                         Debug.Assert(futureOutputs[outputUpto].posIncr == 1, "outputUpto=" + outputUpto + " vs nextWrite=" + nextWrite);
                     }
                 }
@@ -556,12 +556,12 @@ namespace Lucene.Net.Analysis.Synonym
             {
                 futureInputs[upto].keepOrig |= keepOrig;
                 futureInputs[upto].matched = true;
-                upto = rollIncr(upto);
+                upto = RollIncr(upto);
             }
         }
 
         // ++ mod rollBufferSize
-        private int rollIncr(int count)
+        private int RollIncr(int count)
         {
             count++;
             if (count == rollBufferSize)
@@ -627,7 +627,7 @@ namespace Lucene.Net.Analysis.Synonym
                         }
                         else
                         {
-                            nextRead = rollIncr(nextRead);
+                            nextRead = RollIncr(nextRead);
                             inputSkipCount--;
                         }
                         //System.out.println("  return token=" + termAtt.toString());
@@ -639,7 +639,7 @@ namespace Lucene.Net.Analysis.Synonym
                         // position
                         input.Reset();
                         int posIncr = outputs.posIncr;
-                        CharsRef output = outputs.pullNext();
+                        CharsRef output = outputs.PullNext();
                         ClearAttributes();
                         termAtt.CopyBuffer(output.Chars, output.Offset, output.Length);
                         typeAtt.Type = TYPE_SYNONYM;
@@ -655,7 +655,7 @@ namespace Lucene.Net.Analysis.Synonym
                         {
                             // Done with the buffered input and all outputs at
                             // this position
-                            nextRead = rollIncr(nextRead);
+                            nextRead = RollIncr(nextRead);
                             inputSkipCount--;
                         }
                         //System.out.println("  return token=" + termAtt.toString());
@@ -666,7 +666,7 @@ namespace Lucene.Net.Analysis.Synonym
                         // Done with the buffered input and all outputs at
                         // this position
                         input.Reset();
-                        nextRead = rollIncr(nextRead);
+                        nextRead = RollIncr(nextRead);
                         inputSkipCount--;
                     }
                 }
@@ -679,11 +679,11 @@ namespace Lucene.Net.Analysis.Synonym
                     if (outputs.upto < outputs.count)
                     {
                         int posIncr = outputs.posIncr;
-                        CharsRef output = outputs.pullNext();
+                        CharsRef output = outputs.PullNext();
                         futureInputs[nextRead].Reset();
                         if (outputs.count == 0)
                         {
-                            nextWrite = nextRead = rollIncr(nextRead);
+                            nextWrite = nextRead = RollIncr(nextRead);
                         }
                         ClearAttributes();
                         // Keep offset from last input token:
@@ -725,7 +725,7 @@ namespace Lucene.Net.Analysis.Synonym
             }
             foreach (PendingOutputs output in futureOutputs)
             {
-                output.reset();
+                output.Reset();
             }
         }
     }

[05/22] lucenenet git commit: Ported Analysis.Util.TestAnalysisSPILoader and fixed a bug in AnalysisSPILoader.

Posted by sy...@apache.org.

Ported Analysis.Util.TestAnalysisSPILoader and fixed a bug in AnalysisSPILoader.


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/0549bf11
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/0549bf11
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/0549bf11

Branch: refs/heads/analysis-work
Commit: 0549bf1111831fe8589d554960e763bb197c34ab
Parents: 753dd0b
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Wed Aug 24 19:27:49 2016 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Wed Aug 24 19:27:49 2016 +0700

----------------------------------------------------------------------
 .../Analysis/Util/AnalysisSPILoader.cs          |   2 +-
 .../Analysis/Util/TestAnalysisSPILoader.cs      | 488 ++++++++++---------
 .../Lucene.Net.Tests.Analysis.Common.csproj     |   1 +
 3 files changed, 252 insertions(+), 239 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0549bf11/src/Lucene.Net.Analysis.Common/Analysis/Util/AnalysisSPILoader.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/AnalysisSPILoader.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/AnalysisSPILoader.cs
index 56c7f5a..e92bf3f 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Util/AnalysisSPILoader.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/AnalysisSPILoader.cs
@@ -106,7 +106,7 @@ namespace Lucene.Net.Analysis.Util
             Type service = LookupClass(name);
             try
             {
-                return (S)Activator.CreateInstance(clazz,
+                return (S)Activator.CreateInstance(service,
                     BindingFlags.Public | BindingFlags.NonPublic | BindingFlags.Instance,
                     null, new object[] { args }, CultureInfo.InvariantCulture);
             }

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0549bf11/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestAnalysisSPILoader.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestAnalysisSPILoader.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestAnalysisSPILoader.cs
index edfad89..736457e 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestAnalysisSPILoader.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestAnalysisSPILoader.cs
@@ -1,9 +1,14 @@
-\ufeffusing System.Collections.Generic;
-
-namespace org.apache.lucene.analysis.util
+\ufeffusing Lucene.Net.Analysis.CharFilters;
+using Lucene.Net.Analysis.Core;
+using Lucene.Net.Analysis.Miscellaneous;
+using Lucene.Net.Util;
+using NUnit.Framework;
+using System.Collections.Generic;
+using System.Linq;
+
+namespace Lucene.Net.Analysis.Util
 {
-
-	/*
+    /*
 	 * Licensed to the Apache Software Foundation (ASF) under one or more
 	 * contributor license agreements.  See the NOTICE file distributed with
 	 * this work for additional information regarding copyright ownership.
@@ -20,237 +25,244 @@ namespace org.apache.lucene.analysis.util
 	 * limitations under the License.
 	 */
 
-
-	using HTMLStripCharFilterFactory = org.apache.lucene.analysis.charfilter.HTMLStripCharFilterFactory;
-	using LowerCaseFilterFactory = org.apache.lucene.analysis.core.LowerCaseFilterFactory;
-	using WhitespaceTokenizerFactory = org.apache.lucene.analysis.core.WhitespaceTokenizerFactory;
-	using RemoveDuplicatesTokenFilterFactory = org.apache.lucene.analysis.miscellaneous.RemoveDuplicatesTokenFilterFactory;
-	using LuceneTestCase = org.apache.lucene.util.LuceneTestCase;
-
-	public class TestAnalysisSPILoader : LuceneTestCase
-	{
-
-	  private IDictionary<string, string> versionArgOnly()
-	  {
-		return new HashMapAnonymousInnerClassHelper(this);
-	  }
-
-	  private class HashMapAnonymousInnerClassHelper : Dictionary<string, string>
-	  {
-		  private readonly TestAnalysisSPILoader outerInstance;
-
-		  public HashMapAnonymousInnerClassHelper(TestAnalysisSPILoader outerInstance)
-		  {
-			  this.outerInstance = outerInstance;
-
-			  this.put("luceneMatchVersion", TEST_VERSION_CURRENT.ToString());
-		  }
-
-	  }
-
-	  public virtual void testLookupTokenizer()
-	  {
-		assertSame(typeof(WhitespaceTokenizerFactory), TokenizerFactory.forName("Whitespace", versionArgOnly()).GetType());
-		assertSame(typeof(WhitespaceTokenizerFactory), TokenizerFactory.forName("WHITESPACE", versionArgOnly()).GetType());
-		assertSame(typeof(WhitespaceTokenizerFactory), TokenizerFactory.forName("whitespace", versionArgOnly()).GetType());
-	  }
-
-	  public virtual void testBogusLookupTokenizer()
-	  {
-		try
-		{
-		  TokenizerFactory.forName("sdfsdfsdfdsfsdfsdf", new Dictionary<string, string>());
-		  fail();
-		}
-		catch (System.ArgumentException)
-		{
-		  //
-		}
-
-		try
-		{
-		  TokenizerFactory.forName("!(**#$U*#$*", new Dictionary<string, string>());
-		  fail();
-		}
-		catch (System.ArgumentException)
-		{
-		  //
-		}
-	  }
-
-	  public virtual void testLookupTokenizerClass()
-	  {
-		assertSame(typeof(WhitespaceTokenizerFactory), TokenizerFactory.lookupClass("Whitespace"));
-		assertSame(typeof(WhitespaceTokenizerFactory), TokenizerFactory.lookupClass("WHITESPACE"));
-		assertSame(typeof(WhitespaceTokenizerFactory), TokenizerFactory.lookupClass("whitespace"));
-	  }
-
-	  public virtual void testBogusLookupTokenizerClass()
-	  {
-		try
-		{
-		  TokenizerFactory.lookupClass("sdfsdfsdfdsfsdfsdf");
-		  fail();
-		}
-		catch (System.ArgumentException)
-		{
-		  //
-		}
-
-		try
-		{
-		  TokenizerFactory.lookupClass("!(**#$U*#$*");
-		  fail();
-		}
-		catch (System.ArgumentException)
-		{
-		  //
-		}
-	  }
-
-	  public virtual void testAvailableTokenizers()
-	  {
-		assertTrue(TokenizerFactory.availableTokenizers().contains("whitespace"));
-	  }
-
-	  public virtual void testLookupTokenFilter()
-	  {
-		assertSame(typeof(LowerCaseFilterFactory), TokenFilterFactory.forName("Lowercase", versionArgOnly()).GetType());
-		assertSame(typeof(LowerCaseFilterFactory), TokenFilterFactory.forName("LOWERCASE", versionArgOnly()).GetType());
-		assertSame(typeof(LowerCaseFilterFactory), TokenFilterFactory.forName("lowercase", versionArgOnly()).GetType());
-
-		assertSame(typeof(RemoveDuplicatesTokenFilterFactory), TokenFilterFactory.forName("RemoveDuplicates", versionArgOnly()).GetType());
-		assertSame(typeof(RemoveDuplicatesTokenFilterFactory), TokenFilterFactory.forName("REMOVEDUPLICATES", versionArgOnly()).GetType());
-		assertSame(typeof(RemoveDuplicatesTokenFilterFactory), TokenFilterFactory.forName("removeduplicates", versionArgOnly()).GetType());
-	  }
-
-	  public virtual void testBogusLookupTokenFilter()
-	  {
-		try
-		{
-		  TokenFilterFactory.forName("sdfsdfsdfdsfsdfsdf", new Dictionary<string, string>());
-		  fail();
-		}
-		catch (System.ArgumentException)
-		{
-		  //
-		}
-
-		try
-		{
-		  TokenFilterFactory.forName("!(**#$U*#$*", new Dictionary<string, string>());
-		  fail();
-		}
-		catch (System.ArgumentException)
-		{
-		  //
-		}
-	  }
-
-	  public virtual void testLookupTokenFilterClass()
-	  {
-		assertSame(typeof(LowerCaseFilterFactory), TokenFilterFactory.lookupClass("Lowercase"));
-		assertSame(typeof(LowerCaseFilterFactory), TokenFilterFactory.lookupClass("LOWERCASE"));
-		assertSame(typeof(LowerCaseFilterFactory), TokenFilterFactory.lookupClass("lowercase"));
-
-		assertSame(typeof(RemoveDuplicatesTokenFilterFactory), TokenFilterFactory.lookupClass("RemoveDuplicates"));
-		assertSame(typeof(RemoveDuplicatesTokenFilterFactory), TokenFilterFactory.lookupClass("REMOVEDUPLICATES"));
-		assertSame(typeof(RemoveDuplicatesTokenFilterFactory), TokenFilterFactory.lookupClass("removeduplicates"));
-	  }
-
-	  public virtual void testBogusLookupTokenFilterClass()
-	  {
-		try
-		{
-		  TokenFilterFactory.lookupClass("sdfsdfsdfdsfsdfsdf");
-		  fail();
-		}
-		catch (System.ArgumentException)
-		{
-		  //
-		}
-
-		try
-		{
-		  TokenFilterFactory.lookupClass("!(**#$U*#$*");
-		  fail();
-		}
-		catch (System.ArgumentException)
-		{
-		  //
-		}
-	  }
-
-	  public virtual void testAvailableTokenFilters()
-	  {
-		assertTrue(TokenFilterFactory.availableTokenFilters().contains("lowercase"));
-		assertTrue(TokenFilterFactory.availableTokenFilters().contains("removeduplicates"));
-	  }
-
-	  public virtual void testLookupCharFilter()
-	  {
-		assertSame(typeof(HTMLStripCharFilterFactory), CharFilterFactory.forName("HTMLStrip", versionArgOnly()).GetType());
-		assertSame(typeof(HTMLStripCharFilterFactory), CharFilterFactory.forName("HTMLSTRIP", versionArgOnly()).GetType());
-		assertSame(typeof(HTMLStripCharFilterFactory), CharFilterFactory.forName("htmlstrip", versionArgOnly()).GetType());
-	  }
-
-	  public virtual void testBogusLookupCharFilter()
-	  {
-		try
-		{
-		  CharFilterFactory.forName("sdfsdfsdfdsfsdfsdf", new Dictionary<string, string>());
-		  fail();
-		}
-		catch (System.ArgumentException)
-		{
-		  //
-		}
-
-		try
-		{
-		  CharFilterFactory.forName("!(**#$U*#$*", new Dictionary<string, string>());
-		  fail();
-		}
-		catch (System.ArgumentException)
-		{
-		  //
-		}
-	  }
-
-	  public virtual void testLookupCharFilterClass()
-	  {
-		assertSame(typeof(HTMLStripCharFilterFactory), CharFilterFactory.lookupClass("HTMLStrip"));
-		assertSame(typeof(HTMLStripCharFilterFactory), CharFilterFactory.lookupClass("HTMLSTRIP"));
-		assertSame(typeof(HTMLStripCharFilterFactory), CharFilterFactory.lookupClass("htmlstrip"));
-	  }
-
-	  public virtual void testBogusLookupCharFilterClass()
-	  {
-		try
-		{
-		  CharFilterFactory.lookupClass("sdfsdfsdfdsfsdfsdf");
-		  fail();
-		}
-		catch (System.ArgumentException)
-		{
-		  //
-		}
-
-		try
-		{
-		  CharFilterFactory.lookupClass("!(**#$U*#$*");
-		  fail();
-		}
-		catch (System.ArgumentException)
-		{
-		  //
-		}
-	  }
-
-	  public virtual void testAvailableCharFilters()
-	  {
-		assertTrue(CharFilterFactory.availableCharFilters().contains("htmlstrip"));
-	  }
-	}
-
+    public class TestAnalysisSPILoader : LuceneTestCase
+    {
+
+        private IDictionary<string, string> VersionArgOnly()
+        {
+            return new HashMapAnonymousInnerClassHelper(this);
+        }
+
+        private class HashMapAnonymousInnerClassHelper : Dictionary<string, string>
+        {
+            private readonly TestAnalysisSPILoader outerInstance;
+
+            public HashMapAnonymousInnerClassHelper(TestAnalysisSPILoader outerInstance)
+            {
+                this.outerInstance = outerInstance;
+
+                this["luceneMatchVersion"] = TEST_VERSION_CURRENT.ToString();
+            }
+
+        }
+
+        [Test]
+        public virtual void TestLookupTokenizer()
+        {
+            assertSame(typeof(WhitespaceTokenizerFactory), TokenizerFactory.ForName("Whitespace", VersionArgOnly()).GetType());
+            assertSame(typeof(WhitespaceTokenizerFactory), TokenizerFactory.ForName("WHITESPACE", VersionArgOnly()).GetType());
+            assertSame(typeof(WhitespaceTokenizerFactory), TokenizerFactory.ForName("whitespace", VersionArgOnly()).GetType());
+        }
+
+        [Test]
+        public virtual void TestBogusLookupTokenizer()
+        {
+            try
+            {
+                TokenizerFactory.ForName("sdfsdfsdfdsfsdfsdf", new Dictionary<string, string>());
+                fail();
+            }
+            catch (System.ArgumentException)
+            {
+                //
+            }
+
+            try
+            {
+                TokenizerFactory.ForName("!(**#$U*#$*", new Dictionary<string, string>());
+                fail();
+            }
+            catch (System.ArgumentException)
+            {
+                //
+            }
+        }
+
+        [Test]
+        public virtual void TestLookupTokenizerClass()
+        {
+            assertSame(typeof(WhitespaceTokenizerFactory), TokenizerFactory.LookupClass("Whitespace"));
+            assertSame(typeof(WhitespaceTokenizerFactory), TokenizerFactory.LookupClass("WHITESPACE"));
+            assertSame(typeof(WhitespaceTokenizerFactory), TokenizerFactory.LookupClass("whitespace"));
+        }
+
+        [Test]
+        public virtual void TestBogusLookupTokenizerClass()
+        {
+            try
+            {
+                TokenizerFactory.LookupClass("sdfsdfsdfdsfsdfsdf");
+                fail();
+            }
+            catch (System.ArgumentException)
+            {
+                //
+            }
+
+            try
+            {
+                TokenizerFactory.LookupClass("!(**#$U*#$*");
+                fail();
+            }
+            catch (System.ArgumentException)
+            {
+                //
+            }
+        }
+
+        [Test]
+        public virtual void TestAvailableTokenizers()
+        {
+            assertTrue(TokenizerFactory.AvailableTokenizers.Contains("whitespace"));
+        }
+
+        [Test]
+        public virtual void TestLookupTokenFilter()
+        {
+            assertSame(typeof(LowerCaseFilterFactory), TokenFilterFactory.ForName("Lowercase", VersionArgOnly()).GetType());
+            assertSame(typeof(LowerCaseFilterFactory), TokenFilterFactory.ForName("LOWERCASE", VersionArgOnly()).GetType());
+            assertSame(typeof(LowerCaseFilterFactory), TokenFilterFactory.ForName("lowercase", VersionArgOnly()).GetType());
+
+            assertSame(typeof(RemoveDuplicatesTokenFilterFactory), TokenFilterFactory.ForName("RemoveDuplicates", VersionArgOnly()).GetType());
+            assertSame(typeof(RemoveDuplicatesTokenFilterFactory), TokenFilterFactory.ForName("REMOVEDUPLICATES", VersionArgOnly()).GetType());
+            assertSame(typeof(RemoveDuplicatesTokenFilterFactory), TokenFilterFactory.ForName("removeduplicates", VersionArgOnly()).GetType());
+        }
+
+        [Test]
+        public virtual void TestBogusLookupTokenFilter()
+        {
+            try
+            {
+                TokenFilterFactory.ForName("sdfsdfsdfdsfsdfsdf", new Dictionary<string, string>());
+                fail();
+            }
+            catch (System.ArgumentException)
+            {
+                //
+            }
+
+            try
+            {
+                TokenFilterFactory.ForName("!(**#$U*#$*", new Dictionary<string, string>());
+                fail();
+            }
+            catch (System.ArgumentException)
+            {
+                //
+            }
+        }
+
+        [Test]
+        public virtual void TestLookupTokenFilterClass()
+        {
+            assertSame(typeof(LowerCaseFilterFactory), TokenFilterFactory.LookupClass("Lowercase"));
+            assertSame(typeof(LowerCaseFilterFactory), TokenFilterFactory.LookupClass("LOWERCASE"));
+            assertSame(typeof(LowerCaseFilterFactory), TokenFilterFactory.LookupClass("lowercase"));
+
+            assertSame(typeof(RemoveDuplicatesTokenFilterFactory), TokenFilterFactory.LookupClass("RemoveDuplicates"));
+            assertSame(typeof(RemoveDuplicatesTokenFilterFactory), TokenFilterFactory.LookupClass("REMOVEDUPLICATES"));
+            assertSame(typeof(RemoveDuplicatesTokenFilterFactory), TokenFilterFactory.LookupClass("removeduplicates"));
+        }
+
+        [Test]
+        public virtual void TestBogusLookupTokenFilterClass()
+        {
+            try
+            {
+                TokenFilterFactory.LookupClass("sdfsdfsdfdsfsdfsdf");
+                fail();
+            }
+            catch (System.ArgumentException)
+            {
+                //
+            }
+
+            try
+            {
+                TokenFilterFactory.LookupClass("!(**#$U*#$*");
+                fail();
+            }
+            catch (System.ArgumentException)
+            {
+                //
+            }
+        }
+
+        [Test]
+        public virtual void TestAvailableTokenFilters()
+        {
+            assertTrue(TokenFilterFactory.AvailableTokenFilters.Contains("lowercase"));
+            assertTrue(TokenFilterFactory.AvailableTokenFilters.Contains("removeduplicates"));
+        }
+
+        [Test]
+        public virtual void TestLookupCharFilter()
+        {
+            assertSame(typeof(HTMLStripCharFilterFactory), CharFilterFactory.ForName("HTMLStrip", VersionArgOnly()).GetType());
+            assertSame(typeof(HTMLStripCharFilterFactory), CharFilterFactory.ForName("HTMLSTRIP", VersionArgOnly()).GetType());
+            assertSame(typeof(HTMLStripCharFilterFactory), CharFilterFactory.ForName("htmlstrip", VersionArgOnly()).GetType());
+        }
+
+        [Test]
+        public virtual void TestBogusLookupCharFilter()
+        {
+            try
+            {
+                CharFilterFactory.ForName("sdfsdfsdfdsfsdfsdf", new Dictionary<string, string>());
+                fail();
+            }
+            catch (System.ArgumentException)
+            {
+                //
+            }
+
+            try
+            {
+                CharFilterFactory.ForName("!(**#$U*#$*", new Dictionary<string, string>());
+                fail();
+            }
+            catch (System.ArgumentException)
+            {
+                //
+            }
+        }
+
+        [Test]
+        public virtual void TestLookupCharFilterClass()
+        {
+            assertSame(typeof(HTMLStripCharFilterFactory), CharFilterFactory.LookupClass("HTMLStrip"));
+            assertSame(typeof(HTMLStripCharFilterFactory), CharFilterFactory.LookupClass("HTMLSTRIP"));
+            assertSame(typeof(HTMLStripCharFilterFactory), CharFilterFactory.LookupClass("htmlstrip"));
+        }
+
+        [Test]
+        public virtual void TestBogusLookupCharFilterClass()
+        {
+            try
+            {
+                CharFilterFactory.LookupClass("sdfsdfsdfdsfsdfsdf");
+                fail();
+            }
+            catch (System.ArgumentException)
+            {
+                //
+            }
+
+            try
+            {
+                CharFilterFactory.LookupClass("!(**#$U*#$*");
+                fail();
+            }
+            catch (System.ArgumentException)
+            {
+                //
+            }
+        }
+
+        [Test]
+        public virtual void TestAvailableCharFilters()
+        {
+            assertTrue(CharFilterFactory.AvailableCharFilters.Contains("htmlstrip"));
+        }
+    }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0549bf11/src/Lucene.Net.Tests.Analysis.Common/Lucene.Net.Tests.Analysis.Common.csproj
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Lucene.Net.Tests.Analysis.Common.csproj b/src/Lucene.Net.Tests.Analysis.Common/Lucene.Net.Tests.Analysis.Common.csproj
index 532ee86..048bac7 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Lucene.Net.Tests.Analysis.Common.csproj
+++ b/src/Lucene.Net.Tests.Analysis.Common/Lucene.Net.Tests.Analysis.Common.csproj
@@ -303,6 +303,7 @@
     <Compile Include="Analysis\Tr\TestTurkishLowerCaseFilterFactory.cs" />
     <Compile Include="Analysis\Util\BaseTokenStreamFactoryTestCase.cs" />
     <Compile Include="Analysis\Util\StringMockResourceLoader.cs" />
+    <Compile Include="Analysis\Util\TestAnalysisSPILoader.cs" />
     <Compile Include="Analysis\Util\TestCharacterUtils.cs" />
     <Compile Include="Analysis\Util\TestCharArrayIterator.cs" />
     <Compile Include="Analysis\Util\TestCharArrayMap.cs" />