You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by sy...@apache.org on 2016/09/01 14:36:35 UTC
[15/22] lucenenet git commit: Ported Analysis.Core.TestRandomChains
and moved the CheckThatYouDidntReadAnythingReaderWrapper back into that class
from TestBugInSomething.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0a5198ec/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestRandomChains.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestRandomChains.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestRandomChains.cs
index a1e8438..8da141f 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestRandomChains.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestRandomChains.cs
@@ -1,11 +1,36 @@
-\ufeffusing System;
-using System.Diagnostics;
+\ufeffusing Lucene.Net.Analysis.CharFilters;
+using Lucene.Net.Analysis.Cjk;
+using Lucene.Net.Analysis.CommonGrams;
+using Lucene.Net.Analysis.Compound;
+using Lucene.Net.Analysis.Compound.Hyphenation;
+using Lucene.Net.Analysis.Hunspell;
+using Lucene.Net.Analysis.Miscellaneous;
+using Lucene.Net.Analysis.Ngram;
+using Lucene.Net.Analysis.Path;
+using Lucene.Net.Analysis.Payloads;
+using Lucene.Net.Analysis.Snowball;
+using Lucene.Net.Analysis.Standard;
+using Lucene.Net.Analysis.Synonym;
+using Lucene.Net.Analysis.Util;
+using Lucene.Net.Analysis.Wikipedia;
+using Lucene.Net.Attributes;
+using Lucene.Net.Support;
+using Lucene.Net.Tartarus.Snowball;
+using Lucene.Net.Util;
+using Lucene.Net.Util.Automaton;
+using NUnit.Framework;
+using System;
using System.Collections.Generic;
+using System.Diagnostics;
+using System.IO;
+using System.Linq;
+using System.Reflection;
+using System.Text;
+using System.Text.RegularExpressions;
-namespace org.apache.lucene.analysis.core
+namespace Lucene.Net.Analysis.Core
{
-
- /*
+ /*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -22,1566 +47,1086 @@ namespace org.apache.lucene.analysis.core
* limitations under the License.
*/
+ /// <summary>
+ /// tests random analysis chains </summary>
+ public class TestRandomChains : BaseTokenStreamTestCase
+ {
+
+ internal static List<ConstructorInfo> tokenizers;
+ internal static List<ConstructorInfo> tokenfilters;
+ internal static List<ConstructorInfo> charfilters;
+
+ private interface IPredicate<T>
+ {
+ bool Apply(T o);
+ }
+
+ private static readonly IPredicate<object[]> ALWAYS = new PredicateAnonymousInnerClassHelper();
+
+ private class PredicateAnonymousInnerClassHelper : IPredicate<object[]>
+ {
+ public PredicateAnonymousInnerClassHelper()
+ {
+ }
+
+ public virtual bool Apply(object[] args)
+ {
+ return true;
+ }
+ }
+
+ private static readonly IDictionary<ConstructorInfo, IPredicate<object[]>> brokenConstructors = new Dictionary<ConstructorInfo, IPredicate<object[]>>();
+ // TODO: also fix these and remove (maybe):
+ // Classes/options that don't produce consistent graph offsets:
+ private static readonly IDictionary<ConstructorInfo, IPredicate<object[]>> brokenOffsetsConstructors = new Dictionary<ConstructorInfo, IPredicate<object[]>>();
+
+ internal static readonly ISet<Type> allowedTokenizerArgs, allowedTokenFilterArgs, allowedCharFilterArgs;
+ static TestRandomChains()
+ {
+ try
+ {
+ brokenConstructors[typeof(LimitTokenCountFilter).GetConstructor(new Type[] { typeof(TokenStream), typeof(int) })] = ALWAYS;
+ brokenConstructors[typeof(LimitTokenCountFilter).GetConstructor(new Type[] { typeof(TokenStream), typeof(int), typeof(bool) })] = new PredicateAnonymousInnerClassHelper2();
+ brokenConstructors[typeof(LimitTokenPositionFilter).GetConstructor(new Type[] { typeof(TokenStream), typeof(int) })] = ALWAYS;
+ brokenConstructors[typeof(LimitTokenPositionFilter).GetConstructor(new Type[] { typeof(TokenStream), typeof(int), typeof(bool) })] = new PredicateAnonymousInnerClassHelper3();
+ foreach (Type c in Arrays.AsList(
+ // TODO: can we promote some of these to be only
+ // offsets offenders?
+ // doesn't actual reset itself:
+ typeof(CachingTokenFilter),
+ // Not broken: we forcefully add this, so we shouldn't
+ // also randomly pick it:
+ typeof(ValidatingTokenFilter)))
+ {
+ foreach (ConstructorInfo ctor in c.GetConstructors())
+ {
+ brokenConstructors[ctor] = ALWAYS;
+ }
+ }
+ }
+ catch (Exception e)
+ {
+ throw new Exception(e.Message, e);
+ }
+ try
+ {
+ foreach (Type c in Arrays.AsList(
+ typeof(ReversePathHierarchyTokenizer),
+ typeof(PathHierarchyTokenizer),
+ // TODO: it seems to mess up offsets!?
+ typeof(WikipediaTokenizer),
+ // TODO: doesn't handle graph inputs
+ typeof(CJKBigramFilter),
+ // TODO: doesn't handle graph inputs (or even look at positionIncrement)
+ typeof(HyphenatedWordsFilter),
+ // TODO: LUCENE-4983
+ typeof(CommonGramsFilter),
+ // TODO: doesn't handle graph inputs
+ typeof(CommonGramsQueryFilter),
+ // TODO: probably doesnt handle graph inputs, too afraid to try
+ typeof(WordDelimiterFilter)))
+ {
+ foreach (ConstructorInfo ctor in c.GetConstructors())
+ {
+ brokenOffsetsConstructors[ctor] = ALWAYS;
+ }
+ }
+ }
+ catch (Exception e)
+ {
+ throw new Exception(e.Message, e);
+ }
+
+ allowedTokenizerArgs = new HashSet<Type>(); // Collections.NewSetFromMap(new IdentityHashMap<Type, bool?>());
+ allowedTokenizerArgs.addAll(argProducers.Keys);
+ allowedTokenizerArgs.Add(typeof(TextReader));
+ allowedTokenizerArgs.Add(typeof(AttributeSource.AttributeFactory));
+ allowedTokenizerArgs.Add(typeof(AttributeSource));
+
+ allowedTokenFilterArgs = new HashSet<Type>(); //Collections.newSetFromMap(new IdentityHashMap<Type, bool?>());
+ allowedTokenFilterArgs.addAll(argProducers.Keys);
+ allowedTokenFilterArgs.Add(typeof(TokenStream));
+ // TODO: fix this one, thats broken:
+ allowedTokenFilterArgs.Add(typeof(CommonGramsFilter));
+
+ allowedCharFilterArgs = new HashSet<Type>(); //Collections.newSetFromMap(new IdentityHashMap<Type, bool?>());
+ allowedCharFilterArgs.addAll(argProducers.Keys);
+ allowedCharFilterArgs.Add(typeof(TextReader));
+ }
+
+ private class PredicateAnonymousInnerClassHelper2 : IPredicate<object[]>
+ {
+ public PredicateAnonymousInnerClassHelper2()
+ {
+ }
+
+ public virtual bool Apply(object[] args)
+ {
+ Debug.Assert(args.Length == 3);
+ return !((bool)args[2]); // args are broken if consumeAllTokens is false
+ }
+ }
+
+ private class PredicateAnonymousInnerClassHelper3 : IPredicate<object[]>
+ {
+ public PredicateAnonymousInnerClassHelper3()
+ {
+ }
+
+ public virtual bool Apply(object[] args)
+ {
+ Debug.Assert(args.Length == 3);
+ return !((bool)args[2]); // args are broken if consumeAllTokens is false
+ }
+ }
+
+ [TestFixtureSetUp]
+ public static void BeforeClass()
+ {
+ IEnumerable<Type> analysisClasses = typeof(StandardAnalyzer).Assembly.GetTypes()
+ .Where(c => !c.IsAbstract && c.IsPublic && !c.IsInterface && c.IsClass && (c.GetCustomAttribute<ObsoleteAttribute>() == null)
+ && (c.IsSubclassOf(typeof(Tokenizer)) || c.IsSubclassOf(typeof(TokenFilter)) || c.IsSubclassOf(typeof(CharFilter)))).ToArray();
+ tokenizers = new List<ConstructorInfo>();
+ tokenfilters = new List<ConstructorInfo>();
+ charfilters = new List<ConstructorInfo>();
+ foreach (Type c in analysisClasses)
+ {
+ foreach (ConstructorInfo ctor in c.GetConstructors())
+ {
+ if (ctor.GetCustomAttribute<ObsoleteAttribute>() != null || (brokenConstructors.ContainsKey(ctor) && brokenConstructors[ctor] == ALWAYS))
+ {
+ continue;
+ }
+
+ if (c.IsSubclassOf(typeof(Tokenizer)))
+ {
+ assertTrue(ctor.ToString() + " has unsupported parameter types",
+ allowedTokenizerArgs.containsAll(Arrays.AsList(ctor.GetParameters().Select(p => p.ParameterType).ToArray())));
+ tokenizers.Add(ctor);
+ }
+ else if (c.IsSubclassOf(typeof(TokenFilter)))
+ {
+ assertTrue(ctor.ToString() + " has unsupported parameter types",
+ allowedTokenFilterArgs.containsAll(Arrays.AsList(ctor.GetParameters().Select(p => p.ParameterType).ToArray())));
+ tokenfilters.Add(ctor);
+ }
+ else if (c.IsSubclassOf(typeof(CharFilter)))
+ {
+ assertTrue(ctor.ToString() + " has unsupported parameter types",
+ allowedCharFilterArgs.containsAll(Arrays.AsList(ctor.GetParameters().Select(p => p.ParameterType).ToArray())));
+ charfilters.Add(ctor);
+ }
+ else
+ {
+ fail("Cannot get here");
+ }
+ }
+ }
+
+ IComparer<ConstructorInfo> ctorComp = new ComparatorAnonymousInnerClassHelper();
+ tokenizers.Sort(ctorComp);
+ tokenfilters.Sort(ctorComp);
+ charfilters.Sort(ctorComp);
+ if (VERBOSE)
+ {
+ Console.WriteLine("tokenizers = " + tokenizers);
+ Console.WriteLine("tokenfilters = " + tokenfilters);
+ Console.WriteLine("charfilters = " + charfilters);
+ }
+ }
+
+ private class ComparatorAnonymousInnerClassHelper : IComparer<ConstructorInfo>
+ {
+ public ComparatorAnonymousInnerClassHelper()
+ {
+ }
+
+ public virtual int Compare(ConstructorInfo arg0, ConstructorInfo arg1)
+ {
+ // LUCENENET TODO: Need to ensure we have the right sort order
+ // original: arg0.toGenericString().compareTo(arg1.toGenericString());
+ return arg0.ToString().CompareTo(arg1.ToString());
+ }
+ }
+
+ [TestFixtureTearDown]
+ public static void AfterClass()
+ {
+ tokenizers = null;
+ tokenfilters = null;
+ charfilters = null;
+ }
+
+
+ private interface IArgProducer
+ {
+ object Create(Random random);
+ }
+
+ private static readonly IDictionary<Type, IArgProducer> argProducers = new IdentityHashMap<Type, IArgProducer>()
+ {
+ { typeof(int), new IntArgProducer() },
+ { typeof(char), new CharArgProducer() },
+ { typeof(float), new FloatArgProducer() },
+ { typeof(bool), new BooleanArgProducer() },
+ { typeof(byte), new ByteArgProducer() },
+ { typeof(byte[]), new ByteArrayArgProducer() },
+ { typeof(sbyte[]), new SByteArrayArgProducer() },
+ { typeof(Random), new RandomArgProducer() },
+ { typeof(LuceneVersion), new VersionArgProducer() },
+ { typeof(IEnumerable<string>), new StringEnumerableArgProducer() },
+ { typeof(ICollection<char[]>), new CharArrayCollectionArgProducer() },// CapitalizationFilter
+ { typeof(CharArraySet), new CharArraySetArgProducer() },
+ { typeof(Regex), new RegexArgProducer() },
+ { typeof(Regex[]), new RegexArrayArgProducer() },
+ { typeof(IPayloadEncoder), new PayloadEncoderArgProducer() },
+ { typeof(Dictionary), new DictionaryArgProducer() },
+ { typeof(Lucene43EdgeNGramTokenizer.Side), new Lucene43SideArgProducer() },
+ { typeof(EdgeNGramTokenFilter.Side), new SideArgProducer() },
+ { typeof(HyphenationTree), new HyphenationTreeArgProducer() },
+ { typeof(SnowballProgram), new SnowballProgramArgProducer() },
+ { typeof(string), new StringArgProducer() },
+ { typeof(NormalizeCharMap), new NormalizeCharMapArgProducer() },
+ { typeof(CharacterRunAutomaton), new CharacterRunAutomatonArgProducer() },
+ { typeof(CharArrayMap<string>), new StringCharArrayMapArgProducer() },
+ { typeof(StemmerOverrideFilter.StemmerOverrideMap), new StemmerOverrideMapArgProducer() },
+ { typeof(SynonymMap), new SynonymMapArgProducer() },
+ };
+
+ private class IntArgProducer : IArgProducer
+ {
+ public object Create(Random random)
+ {
+ // TODO: could cause huge ram usage to use full int range for some filters
+ // (e.g. allocate enormous arrays)
+ // return Integer.valueOf(random.nextInt());
+ return TestUtil.NextInt(random, -100, 100);
+ }
+ }
+
+ private class CharArgProducer : IArgProducer
+ {
+ public object Create(Random random)
+ {
+ // TODO: fix any filters that care to throw IAE instead.
+ // also add a unicode validating filter to validate termAtt?
+ // return Character.valueOf((char)random.nextInt(65536));
+ while (true)
+ {
+ char c = (char)random.nextInt(65536);
+ if (c < '\uD800' || c > '\uDFFF')
+ {
+ return c;
+ }
+ }
+ }
+ }
+
+ private class FloatArgProducer : IArgProducer
+ {
+ public object Create(Random random)
+ {
+ return (float)random.NextDouble();
+ }
+ }
+
+ private class BooleanArgProducer : IArgProducer
+ {
+ public object Create(Random random)
+ {
+ return random.nextBoolean();
+ }
+ }
+
+ private class ByteArgProducer : IArgProducer
+ {
+ public object Create(Random random)
+ {
+ // this wraps to negative when casting to byte
+ return (byte)random.nextInt(256);
+ }
+ }
+
+ private class ByteArrayArgProducer : IArgProducer
+ {
+ public object Create(Random random)
+ {
+ byte[] bytes = new byte[random.nextInt(256)];
+ random.NextBytes(bytes);
+ return bytes;
+ }
+ }
+
+ private class SByteArrayArgProducer : IArgProducer
+ {
+ public object Create(Random random)
+ {
+ byte[] bytes = new byte[random.nextInt(256)];
+ random.NextBytes(bytes);
+ return (sbyte[])(Array)bytes;
+ }
+ }
+
+ private class RandomArgProducer : IArgProducer
+ {
+ public object Create(Random random)
+ {
+ return new Random(random.Next());
+ }
+ }
+
+ private class VersionArgProducer : IArgProducer
+ {
+ public object Create(Random random)
+ {
+ // we expect bugs in emulating old versions
+ return TEST_VERSION_CURRENT;
+ }
+ }
+
+ private class StringEnumerableArgProducer : IArgProducer
+ {
+ public object Create(Random random)
+ {
+ // TypeTokenFilter
+ ISet<string> set = new HashSet<string>();
+ int num = random.nextInt(5);
+ for (int i = 0; i < num; i++)
+ {
+ set.Add(StandardTokenizer.TOKEN_TYPES[random.nextInt(StandardTokenizer.TOKEN_TYPES.Length)]);
+ }
+ return set;
+ }
+ }
+ private class CharArrayCollectionArgProducer : IArgProducer
+ {
+ public object Create(Random random)
+ {
+ // CapitalizationFilter
+ ICollection<char[]> col = new List<char[]>();
+ int num = random.nextInt(5);
+ for (int i = 0; i < num; i++)
+ {
+ col.Add(TestUtil.RandomSimpleString(random).toCharArray());
+ }
+ return col;
+ }
+ }
+
+ private class CharArraySetArgProducer : IArgProducer
+ {
+ public object Create(Random random)
+ {
+ int num = random.nextInt(10);
+ CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, num, random.nextBoolean());
+ for (int i = 0; i < num; i++)
+ {
+ // TODO: make nastier
+ set.add(TestUtil.RandomSimpleString(random));
+ }
+ return set;
+ }
+ }
+
+ private class RegexArgProducer : IArgProducer
+ {
+ public object Create(Random random)
+ {
+ // TODO: don't want to make the exponentially slow ones Dawid documents
+ // in TestPatternReplaceFilter, so dont use truly random patterns (for now)
+ return new Regex("a", RegexOptions.Compiled);
+ }
+ }
+
+ private class RegexArrayArgProducer : IArgProducer
+ {
+ public object Create(Random random)
+ {
+ return new Regex[] { new Regex("([a-z]+)", RegexOptions.Compiled), new Regex("([0-9]+)", RegexOptions.Compiled) };
+ }
+ }
+
+ private class PayloadEncoderArgProducer : IArgProducer
+ {
+ public object Create(Random random)
+ {
+ return new IdentityEncoder(); // the other encoders will throw exceptions if tokens arent numbers?
+ }
+ }
+
+ private class DictionaryArgProducer : IArgProducer
+ {
+ public object Create(Random random)
+ {
+ // TODO: make nastier
+ using (Stream affixStream = typeof(TestHunspellStemFilter).getResourceAsStream("simple.aff"))
+ {
+ using (Stream dictStream = typeof(TestHunspellStemFilter).getResourceAsStream("simple.dic"))
+ {
+ try
+ {
+ return new Dictionary(affixStream, dictStream);
+ }
+ catch (Exception ex)
+ {
+ throw ex;
+ return null; // unreachable code
+ }
+ }
+ }
+ }
+ }
+
+ private class Lucene43SideArgProducer : IArgProducer
+ {
+ public object Create(Random random)
+ {
+ return random.nextBoolean()
+ ? Lucene43EdgeNGramTokenizer.Side.FRONT
+ : Lucene43EdgeNGramTokenizer.Side.BACK;
+ }
+ }
+
+ private class SideArgProducer : IArgProducer
+ {
+ public object Create(Random random)
+ {
+ return random.nextBoolean()
+ ? EdgeNGramTokenFilter.Side.FRONT
+ : EdgeNGramTokenFilter.Side.BACK;
+ }
+ }
+
+ private class HyphenationTreeArgProducer : IArgProducer
+ {
+ public object Create(Random random)
+ {
+ // TODO: make nastier
+ try
+ {
+ using (Stream @is = typeof(TestCompoundWordTokenFilter).getResourceAsStream("da_UTF8.xml"))
+ {
+ HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter.GetHyphenationTree(@is);
+ return hyphenator;
+ }
+ }
+ catch (Exception ex)
+ {
+ throw ex;
+ return null; // unreachable code
+ }
+ }
+ }
+
+ private class SnowballProgramArgProducer : IArgProducer
+ {
+ public object Create(Random random)
+ {
+ try
+ {
+ string lang = TestSnowball.SNOWBALL_LANGS[random.nextInt(TestSnowball.SNOWBALL_LANGS.Length)];
+ Type clazz = Type.GetType("Lucene.Net.Tartarus.Snowball.Ext." + lang + "Stemmer, Lucene.Net.Analysis.Common");
+ return clazz.GetConstructor(new Type[0]).Invoke(new object[0]);
+ }
+ catch (Exception ex)
+ {
+ throw ex;
+ return null; // unreachable code
+ }
+ }
+ }
+
+ private class StringArgProducer : IArgProducer
+ {
+ public object Create(Random random)
+ {
+ // TODO: make nastier
+ if (random.nextBoolean())
+ {
+ // a token type
+ return StandardTokenizer.TOKEN_TYPES[random.nextInt(StandardTokenizer.TOKEN_TYPES.Length)];
+ }
+ else
+ {
+ return TestUtil.RandomSimpleString(random);
+ }
+ }
+ }
+
+ private class NormalizeCharMapArgProducer : IArgProducer
+ {
+ public object Create(Random random)
+ {
+ NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
+ // we can't add duplicate keys, or NormalizeCharMap gets angry
+ ISet<string> keys = new HashSet<string>();
+ int num = random.nextInt(5);
+ //System.out.println("NormalizeCharMap=");
+ for (int i = 0; i < num; i++)
+ {
+ string key = TestUtil.RandomSimpleString(random);
+ if (!keys.contains(key) && key.Length > 0)
+ {
+ string value = TestUtil.RandomSimpleString(random);
+ builder.Add(key, value);
+ keys.add(key);
+ //System.out.println("mapping: '" + key + "' => '" + value + "'");
+ }
+ }
+ return builder.Build();
+ }
+ }
+
+ private class CharacterRunAutomatonArgProducer : IArgProducer
+ {
+ public object Create(Random random)
+ {
+ // TODO: could probably use a purely random automaton
+ switch (random.nextInt(5))
+ {
+ case 0: return MockTokenizer.KEYWORD;
+ case 1: return MockTokenizer.SIMPLE;
+ case 2: return MockTokenizer.WHITESPACE;
+ case 3: return MockTokenFilter.EMPTY_STOPSET;
+ default: return MockTokenFilter.ENGLISH_STOPSET;
+ }
+ }
+ }
+
+ private class StringCharArrayMapArgProducer : IArgProducer
+ {
+ public object Create(Random random)
+ {
+ int num = random.nextInt(10);
+ CharArrayMap<string> map = new CharArrayMap<string>(TEST_VERSION_CURRENT, num, random.nextBoolean());
+ for (int i = 0; i < num; i++)
+ {
+ // TODO: make nastier
+ map.Put(TestUtil.RandomSimpleString(random), TestUtil.RandomSimpleString(random));
+ }
+ return map;
+ }
+ }
+
+ private class StemmerOverrideMapArgProducer : IArgProducer
+ {
+ public object Create(Random random)
+ {
+ int num = random.nextInt(10);
+ StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder(random.nextBoolean());
+ for (int i = 0; i < num; i++)
+ {
+ string input = "";
+ do
+ {
+ input = TestUtil.RandomRealisticUnicodeString(random);
+ } while (input == string.Empty);
+ string @out = ""; TestUtil.RandomSimpleString(random);
+ do
+ {
+ @out = TestUtil.RandomRealisticUnicodeString(random);
+ } while (@out == string.Empty);
+ builder.Add(input, @out);
+ }
+ try
+ {
+ return builder.Build();
+ }
+ catch (Exception ex)
+ {
+ throw ex;
+ return null; // unreachable code
+ }
+ }
+ }
+
+ private class SynonymMapArgProducer : IArgProducer
+ {
+ public object Create(Random random)
+ {
+ SynonymMap.Builder b = new SynonymMap.Builder(random.nextBoolean());
+ int numEntries = AtLeast(10);
+ for (int j = 0; j < numEntries; j++)
+ {
+ AddSyn(b, RandomNonEmptyString(random), RandomNonEmptyString(random), random.nextBoolean());
+ }
+ try
+ {
+ return b.Build();
+ }
+ catch (Exception ex)
+ {
+ throw ex;
+ return null; // unreachable code
+ }
+ }
+
+ private void AddSyn(SynonymMap.Builder b, string input, string output, bool keepOrig)
+ {
+ b.Add(new CharsRef(input.Replace(" +", "\u0000")),
+ new CharsRef(output.Replace(" +", "\u0000")),
+ keepOrig);
+ }
+
+ private string RandomNonEmptyString(Random random)
+ {
+ while (true)
+ {
+ string s = TestUtil.RandomUnicodeString(random).Trim();
+ if (s.Length != 0 && s.IndexOf('\u0000') == -1)
+ {
+ return s;
+ }
+ }
+ }
+ }
+
+
+
+ internal static T NewRandomArg<T>(Random random, Type paramType)
+ {
+ IArgProducer producer = argProducers[paramType];
+ assertNotNull("No producer for arguments of type " + paramType + " found", producer);
+ return (T)producer.Create(random);
+ }
+
+ internal static object[] NewTokenizerArgs(Random random, TextReader reader, Type[] paramTypes)
+ {
+ object[] args = new object[paramTypes.Length];
+ for (int i = 0; i < args.Length; i++)
+ {
+ Type paramType = paramTypes[i];
+ if (paramType == typeof(TextReader))
+ {
+ args[i] = reader;
+ }
+ else if (paramType == typeof(AttributeSource.AttributeFactory))
+ {
+ // TODO: maybe the collator one...???
+ args[i] = AttributeSource.AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY;
+ }
+ else if (paramType == typeof(AttributeSource))
+ {
+ // TODO: args[i] = new AttributeSource();
+ // this is currently too scary to deal with!
+ args[i] = null; // force IAE
+ }
+ else
+ {
+ args[i] = NewRandomArg<object>(random, paramType);
+ }
+ }
+ return args;
+ }
+
+ internal static object[] NewCharFilterArgs(Random random, TextReader reader, Type[] paramTypes)
+ {
+ object[] args = new object[paramTypes.Length];
+ for (int i = 0; i < args.Length; i++)
+ {
+ Type paramType = paramTypes[i];
+ if (paramType == typeof(TextReader))
+ {
+ args[i] = reader;
+ }
+ else
+ {
+ args[i] = NewRandomArg<object>(random, paramType);
+ }
+ }
+ return args;
+ }
+
+ static object[] NewFilterArgs(Random random, TokenStream stream, Type[] paramTypes)
+ {
+ object[] args = new object[paramTypes.Length];
+ for (int i = 0; i < args.Length; i++)
+ {
+ Type paramType = paramTypes[i];
+ if (paramType == typeof(TokenStream))
+ {
+ args[i] = stream;
+ }
+ else if (paramType == typeof(CommonGramsFilter))
+ {
+ // TODO: fix this one, thats broken: CommonGramsQueryFilter takes this one explicitly
+ args[i] = new CommonGramsFilter(TEST_VERSION_CURRENT, stream, NewRandomArg<CharArraySet>(random, typeof(CharArraySet)));
+ }
+ else
+ {
+ args[i] = NewRandomArg<object>(random, paramType);
+ }
+ }
+ return args;
+ }
+
+ private class MockRandomAnalyzer : Analyzer
+ {
+ internal readonly int seed;
+
+
+ public MockRandomAnalyzer(int seed)
+ {
+ this.seed = seed;
+ }
+
+ public bool OffsetsAreCorrect
+ {
+ get
+ {
+ // TODO: can we not do the full chain here!?
+ Random random = new Random(seed);
+ TokenizerSpec tokenizerSpec = NewTokenizer(random, new StringReader(""));
+ TokenFilterSpec filterSpec = NewFilterChain(random, tokenizerSpec.tokenizer, tokenizerSpec.offsetsAreCorrect);
+ return filterSpec.offsetsAreCorrect;
+ }
+ }
+
+ public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
+ {
+ Random random = new Random(seed);
+ TokenizerSpec tokenizerSpec = NewTokenizer(random, reader);
+ //System.out.println("seed=" + seed + ",create tokenizer=" + tokenizerSpec.toString);
+ TokenFilterSpec filterSpec = NewFilterChain(random, tokenizerSpec.tokenizer, tokenizerSpec.offsetsAreCorrect);
+ //System.out.println("seed=" + seed + ",create filter=" + filterSpec.toString);
+ return new TokenStreamComponents(tokenizerSpec.tokenizer, filterSpec.stream);
+ }
+
+ public override TextReader InitReader(string fieldName, TextReader reader)
+ {
+ Random random = new Random(seed);
+ CharFilterSpec charfilterspec = NewCharFilterChain(random, reader);
+ return charfilterspec.reader;
+ }
+
+
+ public override string ToString()
+ {
+ Random random = new Random(seed);
+ StringBuilder sb = new StringBuilder();
+ CharFilterSpec charFilterSpec = NewCharFilterChain(random, new StringReader(""));
+ sb.Append("\ncharfilters=");
+ sb.Append(charFilterSpec.toString);
+ // intentional: initReader gets its own separate random
+ random = new Random(seed);
+ TokenizerSpec tokenizerSpec = NewTokenizer(random, charFilterSpec.reader);
+ sb.Append("\n");
+ sb.Append("tokenizer=");
+ sb.Append(tokenizerSpec.toString);
+ TokenFilterSpec tokenFilterSpec = NewFilterChain(random, tokenizerSpec.tokenizer, tokenizerSpec.offsetsAreCorrect);
+ sb.Append("\n");
+ sb.Append("filters=");
+ sb.Append(tokenFilterSpec.toString);
+ sb.Append("\n");
+ sb.Append("offsetsAreCorrect=" + tokenFilterSpec.offsetsAreCorrect);
+ return sb.ToString();
+ }
+
+ private T CreateComponent<T>(ConstructorInfo ctor, object[] args, StringBuilder descr)
+ {
+ try
+ {
+ T instance = (T)ctor.Invoke(args);
+ /*
+ if (descr.length() > 0) {
+ descr.append(",");
+ }
+ */
+ descr.append("\n ");
+ descr.append(ctor.DeclaringType.Name);
+ string @params = Arrays.ToString(args);
+ //@params = @params.Substring(1, (@params.Length - 1) - 1); // LUCENENET - This is causing truncation of types
+ descr.append("(").append(@params).append(")");
+ return instance;
+ }
+ catch (TargetInvocationException ite)
+ {
+ if (ite.InnerException != null && (ite.InnerException.GetType().Equals(typeof(ArgumentException))
+ || ite.InnerException.GetType().Equals(typeof(ArgumentOutOfRangeException))
+ || ite.InnerException.GetType().Equals(typeof(NotSupportedException))))
+ {
+
+ // thats ok, ignore
+ if (VERBOSE)
+ {
+ Console.WriteLine("Ignoring IAE/UOE from ctor:");
+ //cause.printStackTrace(System.err);
+ }
+ }
+ else
+ {
+ throw ite;
+ }
+ }
+ //catch (IllegalAccessException iae)
+ //{
+ // Rethrow.rethrow(iae);
+ //}
+ //catch (InstantiationException ie)
+ //{
+ // Rethrow.rethrow(ie);
+ //}
+ return default(T); // no success
+ }
+
+ private bool Broken(ConstructorInfo ctor, object[] args)
+ {
+ IPredicate<object[]> pred = brokenConstructors.ContainsKey(ctor) ? brokenConstructors[ctor] : null;
+ return pred != null && pred.Apply(args);
+ }
+
+ private bool BrokenOffsets(ConstructorInfo ctor, object[] args)
+ {
+ IPredicate<object[]> pred = brokenOffsetsConstructors.ContainsKey(ctor) ? brokenOffsetsConstructors[ctor] : null;
+ return pred != null && pred.Apply(args);
+ }
+
+ // create a new random tokenizer from classpath
+ private TokenizerSpec NewTokenizer(Random random, TextReader reader)
+ {
+ TokenizerSpec spec = new TokenizerSpec();
+ while (spec.tokenizer == null)
+ {
+ ConstructorInfo ctor = tokenizers[random.nextInt(tokenizers.size())];
+ StringBuilder descr = new StringBuilder();
+ CheckThatYouDidntReadAnythingReaderWrapper wrapper = new CheckThatYouDidntReadAnythingReaderWrapper(reader);
+ object[] args = NewTokenizerArgs(random, wrapper, ctor.GetParameters().Select(p => p.ParameterType).ToArray());
+ if (Broken(ctor, args))
+ {
+ continue;
+ }
+ spec.tokenizer = CreateComponent<Tokenizer>(ctor, args, descr);
+ if (spec.tokenizer != null)
+ {
+ spec.offsetsAreCorrect &= !BrokenOffsets(ctor, args);
+ spec.toString = descr.toString();
+ }
+ else
+ {
+ assertFalse(ctor.DeclaringType.Name + " has read something in ctor but failed with UOE/IAE", wrapper.readSomething);
+ }
+ }
+ return spec;
+ }
+
+ private CharFilterSpec NewCharFilterChain(Random random, TextReader reader)
+ {
+ CharFilterSpec spec = new CharFilterSpec();
+ spec.reader = reader;
+ StringBuilder descr = new StringBuilder();
+ int numFilters = random.nextInt(3);
+ for (int i = 0; i < numFilters; i++)
+ {
+ while (true)
+ {
+ ConstructorInfo ctor = charfilters[random.nextInt(charfilters.size())];
+ object[] args = NewCharFilterArgs(random, spec.reader, ctor.GetParameters().Select(p => p.ParameterType).ToArray());
+ if (Broken(ctor, args))
+ {
+ continue;
+ }
+ reader = CreateComponent<TextReader>(ctor, args, descr);
+ if (reader != null)
+ {
+ spec.reader = reader;
+ break;
+ }
+ }
+ }
+ spec.toString = descr.toString();
+ return spec;
+ }
+
+ private TokenFilterSpec NewFilterChain(Random random, Tokenizer tokenizer, bool offsetsAreCorrect)
+ {
+ TokenFilterSpec spec = new TokenFilterSpec();
+ spec.offsetsAreCorrect = offsetsAreCorrect;
+ spec.stream = tokenizer;
+ StringBuilder descr = new StringBuilder();
+ int numFilters = random.nextInt(5);
+ for (int i = 0; i < numFilters; i++)
+ {
+
+ // Insert ValidatingTF after each stage so we can
+ // catch problems right after the TF that "caused"
+ // them:
+ spec.stream = new ValidatingTokenFilter(spec.stream, "stage " + i, spec.offsetsAreCorrect);
+
+ while (true)
+ {
+ ConstructorInfo ctor = tokenfilters[random.nextInt(tokenfilters.size())];
+
+ // hack: MockGraph/MockLookahead has assertions that will trip if they follow
+ // an offsets violator. so we cant use them after e.g. wikipediatokenizer
+ if (!spec.offsetsAreCorrect &&
+ (ctor.DeclaringType.Equals(typeof(MockGraphTokenFilter)))
+ || ctor.DeclaringType.Equals(typeof(MockRandomLookaheadTokenFilter)))
+ {
+ continue;
+ }
+
+ object[] args = NewFilterArgs(random, spec.stream, ctor.GetParameters().Select(p => p.ParameterType).ToArray());
+ if (Broken(ctor, args))
+ {
+ continue;
+ }
+ TokenFilter flt = CreateComponent<TokenFilter>(ctor, args, descr);
+ if (flt != null)
+ {
+ spec.offsetsAreCorrect &= !BrokenOffsets(ctor, args);
+ spec.stream = flt;
+ break;
+ }
+ }
+ }
+
+ // Insert ValidatingTF after each stage so we can
+ // catch problems right after the TF that "caused"
+ // them:
+ spec.stream = new ValidatingTokenFilter(spec.stream, "last stage", spec.offsetsAreCorrect);
+
+ spec.toString = descr.toString();
+ return spec;
+ }
+ }
+
+
+ internal class CheckThatYouDidntReadAnythingReaderWrapper : CharFilter
+ {
+ internal bool readSomething;
+
+ public CheckThatYouDidntReadAnythingReaderWrapper(TextReader @in)
+ : base(@in)
+ { }
+
+ private CharFilter Input
+ {
+ get { return (CharFilter)this.input; }
+ }
+
+ protected override int Correct(int currentOff)
+ {
+ return currentOff; // we don't change any offsets
+ }
+
+ public override int Read(char[] cbuf, int off, int len)
+ {
+ readSomething = true;
+ return input.Read(cbuf, off, len);
+ }
+
+ public override int Read()
+ {
+ readSomething = true;
+ return input.Read();
+ }
+
+ // LUCENENET: TextReader dosn't support this overload
+ //public int read(char[] cbuf)
+ //{
+ // readSomething = true;
+ // return input.read(cbuf);
+ //}
+
+ public override long Skip(int n)
+ {
+ readSomething = true;
+ return Input.Skip(n);
+ }
+
+ public override void Mark(int readAheadLimit)
+ {
+ Input.Mark(readAheadLimit);
+ }
+
+ public override bool IsMarkSupported
+ {
+ get
+ {
+ return Input.IsMarkSupported;
+ }
+ }
+
+ public override bool Ready()
+ {
+ return Input.Ready();
+ }
+
+ public override void Reset()
+ {
+ Input.Reset();
+ }
+ }
+
+ internal class TokenizerSpec
+ {
+ internal Tokenizer tokenizer;
+ internal string toString;
+ internal bool offsetsAreCorrect = true;
+ }
+
+ internal class TokenFilterSpec
+ {
+ internal TokenStream stream;
+ internal string toString;
+ internal bool offsetsAreCorrect = true;
+ }
+
+ internal class CharFilterSpec
+ {
+ internal TextReader reader;
+ internal string toString;
+ }
+
+ [Test, LongRunningTest]
+ public void TestRandomChains_()
+ {
+ int numIterations = AtLeast(20);
+ Random random = Random();
+ for (int i = 0; i < numIterations; i++)
+ {
+ MockRandomAnalyzer a = new MockRandomAnalyzer(random.Next());
+ if (VERBOSE)
+ {
+ Console.WriteLine("Creating random analyzer:" + a);
+ }
+ try
+ {
+ CheckRandomData(random, a, 500 * RANDOM_MULTIPLIER, 20, false,
+ false /* We already validate our own offsets... */);
+ }
+ catch (Exception e)
+ {
+ Console.WriteLine("Exception from random analyzer: " + a);
+ throw e;
+ }
+ }
+ }
- using NormalizeCharMap = org.apache.lucene.analysis.charfilter.NormalizeCharMap;
- using CJKBigramFilter = org.apache.lucene.analysis.cjk.CJKBigramFilter;
- using CommonGramsFilter = org.apache.lucene.analysis.commongrams.CommonGramsFilter;
- using CommonGramsQueryFilter = org.apache.lucene.analysis.commongrams.CommonGramsQueryFilter;
- using HyphenationCompoundWordTokenFilter = org.apache.lucene.analysis.compound.HyphenationCompoundWordTokenFilter;
- using TestCompoundWordTokenFilter = org.apache.lucene.analysis.compound.TestCompoundWordTokenFilter;
- using HyphenationTree = org.apache.lucene.analysis.compound.hyphenation.HyphenationTree;
- using Dictionary = org.apache.lucene.analysis.hunspell.Dictionary;
- using TestHunspellStemFilter = org.apache.lucene.analysis.hunspell.TestHunspellStemFilter;
- using HyphenatedWordsFilter = org.apache.lucene.analysis.miscellaneous.HyphenatedWordsFilter;
- using LimitTokenCountFilter = org.apache.lucene.analysis.miscellaneous.LimitTokenCountFilter;
- using LimitTokenPositionFilter = org.apache.lucene.analysis.miscellaneous.LimitTokenPositionFilter;
- using StemmerOverrideFilter = org.apache.lucene.analysis.miscellaneous.StemmerOverrideFilter;
- using StemmerOverrideMap = org.apache.lucene.analysis.miscellaneous.StemmerOverrideFilter.StemmerOverrideMap;
- using WordDelimiterFilter = org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter;
- using EdgeNGramTokenFilter = org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter;
- using Lucene43EdgeNGramTokenizer = org.apache.lucene.analysis.ngram.Lucene43EdgeNGramTokenizer;
- using PathHierarchyTokenizer = org.apache.lucene.analysis.path.PathHierarchyTokenizer;
- using ReversePathHierarchyTokenizer = org.apache.lucene.analysis.path.ReversePathHierarchyTokenizer;
- using IdentityEncoder = org.apache.lucene.analysis.payloads.IdentityEncoder;
- using PayloadEncoder = org.apache.lucene.analysis.payloads.PayloadEncoder;
- using TestSnowball = org.apache.lucene.analysis.snowball.TestSnowball;
- using StandardTokenizer = org.apache.lucene.analysis.standard.StandardTokenizer;
- using SynonymMap = org.apache.lucene.analysis.synonym.SynonymMap;
- using CharArrayMap = org.apache.lucene.analysis.util.CharArrayMap;
- using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
- using WikipediaTokenizer = org.apache.lucene.analysis.wikipedia.WikipediaTokenizer;
- using AttributeSource = org.apache.lucene.util.AttributeSource;
- using AttributeFactory = org.apache.lucene.util.AttributeSource.AttributeFactory;
- using CharsRef = org.apache.lucene.util.CharsRef;
- using Rethrow = org.apache.lucene.util.Rethrow;
- using TestUtil = org.apache.lucene.util.TestUtil;
- using Version = org.apache.lucene.util.Version;
- using CharacterRunAutomaton = org.apache.lucene.util.automaton.CharacterRunAutomaton;
- using AfterClass = org.junit.AfterClass;
- using BeforeClass = org.junit.BeforeClass;
- using SnowballProgram = org.tartarus.snowball.SnowballProgram;
- using InputSource = org.xml.sax.InputSource;
-
- /// <summary>
- /// tests random analysis chains </summary>
- public class TestRandomChains : BaseTokenStreamTestCase
- {
-
-//JAVA TO C# CONVERTER TODO TASK: Java wildcard generics are not converted to .NET:
-//ORIGINAL LINE: static java.util.List<Constructor<? extends org.apache.lucene.analysis.Tokenizer>> tokenizers;
- internal static IList<Constructor<?>> tokenizers;
-//JAVA TO C# CONVERTER TODO TASK: Java wildcard generics are not converted to .NET:
-//ORIGINAL LINE: static java.util.List<Constructor<? extends org.apache.lucene.analysis.TokenFilter>> tokenfilters;
- internal static IList<Constructor<?>> tokenfilters;
-//JAVA TO C# CONVERTER TODO TASK: Java wildcard generics are not converted to .NET:
-//ORIGINAL LINE: static java.util.List<Constructor<? extends org.apache.lucene.analysis.CharFilter>> charfilters;
- internal static IList<Constructor<?>> charfilters;
-
- private interface Predicate<T>
- {
- bool apply(T o);
- }
-
- private static readonly Predicate<object[]> ALWAYS = new PredicateAnonymousInnerClassHelper();
-
- private class PredicateAnonymousInnerClassHelper : Predicate<object[]>
- {
- public PredicateAnonymousInnerClassHelper()
- {
- }
-
- public virtual bool apply(object[] args)
- {
- return true;
- };
- }
-
-//JAVA TO C# CONVERTER TODO TASK: Java wildcard generics are not converted to .NET:
-//ORIGINAL LINE: private static final java.util.Map<Constructor<?>,Predicate<Object[]>> brokenConstructors = new java.util.HashMap<>();
- private static readonly IDictionary<Constructor<?>, Predicate<object[]>> brokenConstructors = new Dictionary<Constructor<?>, Predicate<object[]>>();
- static TestRandomChains()
- {
- try
- {
- brokenConstructors[typeof(LimitTokenCountFilter).GetConstructor(typeof(TokenStream), typeof(int))] = ALWAYS;
- brokenConstructors[typeof(LimitTokenCountFilter).GetConstructor(typeof(TokenStream), typeof(int), typeof(bool))] = new PredicateAnonymousInnerClassHelper2();
- brokenConstructors[typeof(LimitTokenPositionFilter).GetConstructor(typeof(TokenStream), typeof(int))] = ALWAYS;
- brokenConstructors[typeof(LimitTokenPositionFilter).GetConstructor(typeof(TokenStream), typeof(int), typeof(bool))] = new PredicateAnonymousInnerClassHelper3();
- foreach (Type c in Arrays.asList<Type>(typeof(CachingTokenFilter), typeof(CrankyTokenFilter), typeof(ValidatingTokenFilter)))
- // TODO: can we promote some of these to be only
- // offsets offenders?
- // doesn't actual reset itself!
- // Not broken, simulates brokenness:
- // Not broken: we forcefully add this, so we shouldn't
- // also randomly pick it:
- {
-//JAVA TO C# CONVERTER TODO TASK: Java wildcard generics are not converted to .NET:
-//ORIGINAL LINE: for (Constructor<?> ctor : c.getConstructors())
- foreach (Constructor<?> ctor in c.GetConstructors())
- {
- brokenConstructors[ctor] = ALWAYS;
- }
- }
- }
- catch (Exception e)
- {
- throw new Exception(e);
- }
- try
- {
- foreach (Type c in Arrays.asList<Type>(typeof(ReversePathHierarchyTokenizer), typeof(PathHierarchyTokenizer), typeof(WikipediaTokenizer), typeof(CJKBigramFilter), typeof(HyphenatedWordsFilter), typeof(CommonGramsFilter), typeof(CommonGramsQueryFilter), typeof(WordDelimiterFilter)))
- // TODO: it seems to mess up offsets!?
- // TODO: doesn't handle graph inputs
- // TODO: doesn't handle graph inputs (or even look at positionIncrement)
- // TODO: LUCENE-4983
- // TODO: doesn't handle graph inputs
- // TODO: probably doesnt handle graph inputs, too afraid to try
- {
-//JAVA TO C# CONVERTER TODO TASK: Java wildcard generics are not converted to .NET:
-//ORIGINAL LINE: for (Constructor<?> ctor : c.getConstructors())
- foreach (Constructor<?> ctor in c.GetConstructors())
- {
- brokenOffsetsConstructors[ctor] = ALWAYS;
- }
- }
- }
- catch (Exception e)
- {
- throw new Exception(e);
- }
- allowedTokenizerArgs = Collections.newSetFromMap(new IdentityHashMap<Type, bool?>());
- allowedTokenizerArgs.addAll(argProducers.Keys);
- allowedTokenizerArgs.Add(typeof(Reader));
- allowedTokenizerArgs.Add(typeof(AttributeSource.AttributeFactory));
- allowedTokenizerArgs.Add(typeof(AttributeSource));
-
- allowedTokenFilterArgs = Collections.newSetFromMap(new IdentityHashMap<Type, bool?>());
- allowedTokenFilterArgs.addAll(argProducers.Keys);
- allowedTokenFilterArgs.Add(typeof(TokenStream));
- // TODO: fix this one, thats broken:
- allowedTokenFilterArgs.Add(typeof(CommonGramsFilter));
-
- allowedCharFilterArgs = Collections.newSetFromMap(new IdentityHashMap<Type, bool?>());
- allowedCharFilterArgs.addAll(argProducers.Keys);
- allowedCharFilterArgs.Add(typeof(Reader));
- }
-
- private class PredicateAnonymousInnerClassHelper2 : Predicate<object[]>
- {
- public PredicateAnonymousInnerClassHelper2()
- {
- }
-
- public virtual bool apply(object[] args)
- {
- Debug.Assert(args.Length == 3);
- return !((bool?) args[2]); // args are broken if consumeAllTokens is false
- }
- }
-
- private class PredicateAnonymousInnerClassHelper3 : Predicate<object[]>
- {
- public PredicateAnonymousInnerClassHelper3()
- {
- }
-
- public virtual bool apply(object[] args)
- {
- Debug.Assert(args.Length == 3);
- return !((bool?) args[2]); // args are broken if consumeAllTokens is false
- }
- }
-
- // TODO: also fix these and remove (maybe):
- // Classes/options that don't produce consistent graph offsets:
-//JAVA TO C# CONVERTER TODO TASK: Java wildcard generics are not converted to .NET:
-//ORIGINAL LINE: private static final java.util.Map<Constructor<?>,Predicate<Object[]>> brokenOffsetsConstructors = new java.util.HashMap<>();
- private static readonly IDictionary<Constructor<?>, Predicate<object[]>> brokenOffsetsConstructors = new Dictionary<Constructor<?>, Predicate<object[]>>();
-
-//JAVA TO C# CONVERTER TODO TASK: Most Java annotations will not have direct .NET equivalent attributes:
-//ORIGINAL LINE: @BeforeClass public static void beforeClass() throws Exception
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
- public static void beforeClass()
- {
- IList<Type> analysisClasses = getClassesForPackage("org.apache.lucene.analysis");
- tokenizers = new List<>();
- tokenfilters = new List<>();
- charfilters = new List<>();
- foreach (Class c in analysisClasses)
- {
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final int modifiers = c.getModifiers();
- int modifiers = c.Modifiers;
- if (Modifier.isAbstract(modifiers) || !Modifier.isPublic(modifiers) || c.Synthetic || c.AnonymousClass || c.MemberClass || c.Interface || c.isAnnotationPresent(typeof(Deprecated)) || !(c.IsSubclassOf(typeof(Tokenizer)) || c.IsSubclassOf(typeof(TokenFilter)) || c.IsSubclassOf(typeof(CharFilter))))
- {
- // don't waste time with abstract classes or deprecated known-buggy ones
- continue;
- }
-
-//JAVA TO C# CONVERTER TODO TASK: Java wildcard generics are not converted to .NET:
-//ORIGINAL LINE: for (final Constructor<?> ctor : c.getConstructors())
- foreach (Constructor<?> ctor in c.Constructors)
- {
- // don't test synthetic or deprecated ctors, they likely have known bugs:
- if (ctor.Synthetic || ctor.isAnnotationPresent(typeof(Deprecated)) || brokenConstructors[ctor] == ALWAYS)
- {
- continue;
- }
- if (c.IsSubclassOf(typeof(Tokenizer)))
- {
-//JAVA TO C# CONVERTER TODO TASK: There is no .NET equivalent to the java.util.Collection 'containsAll' method:
- assertTrue(ctor.toGenericString() + " has unsupported parameter types", allowedTokenizerArgs.containsAll(Arrays.asList(ctor.ParameterTypes)));
- tokenizers.Add(castConstructor(typeof(Tokenizer), ctor));
- }
- else if (c.IsSubclassOf(typeof(TokenFilter)))
- {
-//JAVA TO C# CONVERTER TODO TASK: There is no .NET equivalent to the java.util.Collection 'containsAll' method:
- assertTrue(ctor.toGenericString() + " has unsupported parameter types", allowedTokenFilterArgs.containsAll(Arrays.asList(ctor.ParameterTypes)));
- tokenfilters.Add(castConstructor(typeof(TokenFilter), ctor));
- }
- else if (c.IsSubclassOf(typeof(CharFilter)))
- {
-//JAVA TO C# CONVERTER TODO TASK: There is no .NET equivalent to the java.util.Collection 'containsAll' method:
- assertTrue(ctor.toGenericString() + " has unsupported parameter types", allowedCharFilterArgs.containsAll(Arrays.asList(ctor.ParameterTypes)));
- charfilters.Add(castConstructor(typeof(CharFilter), ctor));
- }
- else
- {
- fail("Cannot get here");
- }
- }
- }
-
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final java.util.Comparator<Constructor<?>> ctorComp = new java.util.Comparator<Constructor<?>>()
-//JAVA TO C# CONVERTER TODO TASK: Java wildcard generics are not converted to .NET:
- IComparer<Constructor<?>> ctorComp = new ComparatorAnonymousInnerClassHelper();
- tokenizers.Sort(ctorComp);
- tokenfilters.Sort(ctorComp);
- charfilters.Sort(ctorComp);
- if (VERBOSE)
- {
- Console.WriteLine("tokenizers = " + tokenizers);
- Console.WriteLine("tokenfilters = " + tokenfilters);
- Console.WriteLine("charfilters = " + charfilters);
- }
- }
-
- private class ComparatorAnonymousInnerClassHelper : IComparer<Constructor<JavaToDotNetGenericWildcard>>
- {
- public ComparatorAnonymousInnerClassHelper()
- {
- }
-
- public virtual int compare<T1, T2>(Constructor<T1> arg0, Constructor<T2> arg1)
- {
- return arg0.toGenericString().compareTo(arg1.toGenericString());
- }
- }
-
-//JAVA TO C# CONVERTER TODO TASK: Most Java annotations will not have direct .NET equivalent attributes:
-//ORIGINAL LINE: @AfterClass public static void afterClass()
- public static void afterClass()
- {
- tokenizers = null;
- tokenfilters = null;
- charfilters = null;
- }
-
- /// <summary>
- /// Hack to work around the stupidness of Oracle's strict Java backwards compatibility.
- /// {@code Class<T>#getConstructors()} should return unmodifiable {@code List<Constructor<T>>} not array!
- /// </summary>
-//JAVA TO C# CONVERTER TODO TASK: Most Java annotations will not have direct .NET equivalent attributes:
-//ORIGINAL LINE: @SuppressWarnings("unchecked") private static <T> Constructor<T> castConstructor(Class<T> instanceClazz, Constructor<?> ctor)
- private static Constructor<T> castConstructor<T, T1>(Type<T> instanceClazz, Constructor<T1> ctor)
- {
- return (Constructor<T>) ctor;
- }
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: public static java.util.List<Class> getClassesForPackage(String pckgname) throws Exception
- public static IList<Type> getClassesForPackage(string pckgname)
- {
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final java.util.List<Class> classes = new java.util.ArrayList<>();
- IList<Type> classes = new List<Type>();
- collectClassesForPackage(pckgname, classes);
- assertFalse("No classes found in package '" + pckgname + "'; maybe your test classes are packaged as JAR file?", classes.Count == 0);
- return classes;
- }
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: private static void collectClassesForPackage(String pckgname, java.util.List<Class> classes) throws Exception
- private static void collectClassesForPackage(string pckgname, IList<Type> classes)
- {
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final ClassLoader cld = TestRandomChains.class.getClassLoader();
- ClassLoader cld = typeof(TestRandomChains).ClassLoader;
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final String path = pckgname.replace('.', '/');
- string path = pckgname.Replace('.', '/');
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final java.util.Iterator<java.net.URL> resources = cld.getResources(path);
- IEnumerator<URL> resources = cld.getResources(path);
- while (resources.MoveNext())
- {
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final java.net.URI uri = resources.Current.toURI();
- URI uri = resources.Current.toURI();
- if (!"file".Equals(uri.Scheme, StringComparison.CurrentCultureIgnoreCase))
- {
- continue;
- }
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final java.io.File directory = new java.io.File(uri);
- File directory = new File(uri);
- if (directory.exists())
- {
- string[] files = directory.list();
- foreach (string file in files)
- {
- if ((new File(directory, file)).Directory)
- {
- // recurse
- string subPackage = pckgname + "." + file;
- collectClassesForPackage(subPackage, classes);
- }
- if (file.EndsWith(".class", StringComparison.Ordinal))
- {
- string clazzName = file.Substring(0, file.Length - 6);
- // exclude Test classes that happen to be in these packages.
- // class.ForName'ing some of them can cause trouble.
- if (!clazzName.EndsWith("Test", StringComparison.Ordinal) && !clazzName.StartsWith("Test", StringComparison.Ordinal))
- {
- // Don't run static initializers, as we won't use most of them.
- // Java will do that automatically once accessed/instantiated.
- classes.Add(Type.GetType(pckgname + '.' + clazzName, false, cld));
- }
- }
- }
- }
- }
- }
-
- private interface ArgProducer
- {
- object create(Random random);
- }
-
- private static readonly IDictionary<Type, ArgProducer> argProducers = new IdentityHashMapAnonymousInnerClassHelper();
-
- private class IdentityHashMapAnonymousInnerClassHelper : IdentityHashMap<Type, ArgProducer>
- {
- public IdentityHashMapAnonymousInnerClassHelper()
- {
- }
-
- // {
- // put(int.class, new ArgProducer()
- // {
- // @@Override public Object create(Random random)
- // {
- // // TODO: could cause huge ram usage to use full int range for some filters
- // // (e.g. allocate enormous arrays)
- // // return Integer.valueOf(random.nextInt());
- // return Integer.valueOf(TestUtil.nextInt(random, -100, 100));
- // }
- // }
- // );
- // put(char.class, new ArgProducer()
- // {
- // @@Override public Object create(Random random)
- // {
- // // TODO: fix any filters that care to throw IAE instead.
- // // also add a unicode validating filter to validate termAtt?
- // // return Character.valueOf((char)random.nextInt(65536));
- // while(true)
- // {
- // char c = (char)random.nextInt(65536);
- // if (c < '\uD800' || c > '\uDFFF')
- // {
- // return Character.valueOf(c);
- // }
- // }
- // }
- // }
- // );
- // put(float.class, new ArgProducer()
- // {
- // @@Override public Object create(Random random)
- // {
- // return Float.valueOf(random.nextFloat());
- // }
- // }
- // );
- // put(boolean.class, new ArgProducer()
- // {
- // @@Override public Object create(Random random)
- // {
- // return Boolean.valueOf(random.nextBoolean());
- // }
- // }
- // );
- // put(byte.class, new ArgProducer()
- // {
- // @@Override public Object create(Random random)
- // {
- // // this wraps to negative when casting to byte
- // return Byte.valueOf((byte) random.nextInt(256));
- // }
- // }
- // );
- // put(byte[].class, new ArgProducer()
- // {
- // @@Override public Object create(Random random)
- // {
- // byte bytes[] = new byte[random.nextInt(256)];
- // random.nextBytes(bytes);
- // return bytes;
- // }
- // }
- // );
- // put(Random.class, new ArgProducer()
- // {
- // @@Override public Object create(Random random)
- // {
- // return new Random(random.nextLong());
- // }
- // }
- // );
- // put(Version.class, new ArgProducer()
- // {
- // @@Override public Object create(Random random)
- // {
- // // we expect bugs in emulating old versions
- // return TEST_VERSION_CURRENT;
- // }
- // }
- // );
- // put(Set.class, new ArgProducer()
- // {
- // @@Override public Object create(Random random)
- // {
- // // TypeTokenFilter
- // Set<String> set = new HashSet<>();
- // int num = random.nextInt(5);
- // for (int i = 0; i < num; i++)
- // {
- // set.add(StandardTokenizer.TOKEN_TYPES[random.nextInt(StandardTokenizer.TOKEN_TYPES.length)]);
- // }
- // return set;
- // }
- // }
- // );
- // put(Collection.class, new ArgProducer()
- // {
- // @@Override public Object create(Random random)
- // {
- // // CapitalizationFilter
- // Collection<char[]> col = new ArrayList<>();
- // int num = random.nextInt(5);
- // for (int i = 0; i < num; i++)
- // {
- // col.add(TestUtil.randomSimpleString(random).toCharArray());
- // }
- // return col;
- // }
- // }
- // );
- // put(CharArraySet.class, new ArgProducer()
- // {
- // @@Override public Object create(Random random)
- // {
- // int num = random.nextInt(10);
- // CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, num, random.nextBoolean());
- // for (int i = 0; i < num; i++)
- // {
- // // TODO: make nastier
- // set.add(TestUtil.randomSimpleString(random));
- // }
- // return set;
- // }
- // }
- // );
- // put(Pattern.class, new ArgProducer()
- // {
- // @@Override public Object create(Random random)
- // {
- // // TODO: don't want to make the exponentially slow ones Dawid documents
- // // in TestPatternReplaceFilter, so dont use truly random patterns (for now)
- // return Pattern.compile("a");
- // }
- // }
- // );
- //
- // put(Pattern[].class, new ArgProducer()
- // {
- // @@Override public Object create(Random random)
- // {
- // return new Pattern[] {Pattern.compile("([a-z]+)"), Pattern.compile("([0-9]+)")};
- // }
- // }
- // );
- // put(PayloadEncoder.class, new ArgProducer()
- // {
- // @@Override public Object create(Random random)
- // {
- // return new IdentityEncoder(); // the other encoders will throw exceptions if tokens arent numbers?
- // }
- // }
- // );
- // put(Dictionary.class, new ArgProducer()
- // {
- // @@Override public Object create(Random random)
- // {
- // // TODO: make nastier
- // InputStream affixStream = TestHunspellStemFilter.class.getResourceAsStream("simple.aff");
- // InputStream dictStream = TestHunspellStemFilter.class.getResourceAsStream("simple.dic");
- // try
- // {
- // return new Dictionary(affixStream, dictStream);
- // }
- // catch (Exception ex)
- // {
- // Rethrow.rethrow(ex);
- // return null; // unreachable code
- // }
- // }
- // }
- // );
- // put(Lucene43EdgeNGramTokenizer.Side.class, new ArgProducer()
- // {
- // @@Override public Object create(Random random)
- // {
- // return random.nextBoolean() ? Lucene43EdgeNGramTokenizer.Side.FRONT : Lucene43EdgeNGramTokenizer.Side.BACK;
- // }
- // }
- // );
- // put(EdgeNGramTokenFilter.Side.class, new ArgProducer()
- // {
- // @@Override public Object create(Random random)
- // {
- // return random.nextBoolean() ? EdgeNGramTokenFilter.Side.FRONT : EdgeNGramTokenFilter.Side.BACK;
- // }
- // }
- // );
- // put(HyphenationTree.class, new ArgProducer()
- // {
- // @@Override public Object create(Random random)
- // {
- // // TODO: make nastier
- // try
- // {
- // InputSource @is = new InputSource(TestCompoundWordTokenFilter.class.getResource("da_UTF8.xml").toExternalForm());
- // HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter.getHyphenationTree(@is);
- // return hyphenator;
- // }
- // catch (Exception ex)
- // {
- // Rethrow.rethrow(ex);
- // return null; // unreachable code
- // }
- // }
- // }
- // );
- // put(SnowballProgram.class, new ArgProducer()
- // {
- // @@Override public Object create(Random random)
- // {
- // try
- // {
- // String lang = TestSnowball.SNOWBALL_LANGS[random.nextInt(TestSnowball.SNOWBALL_LANGS.length)];
- // Class<? extends SnowballProgram> clazz = Class.forName("org.tartarus.snowball.ext." + lang + "Stemmer").asSubclass(SnowballProgram.class);
- // return clazz.newInstance();
- // }
- // catch (Exception ex)
- // {
- // Rethrow.rethrow(ex);
- // return null; // unreachable code
- // }
- // }
- // }
- // );
- // put(String.class, new ArgProducer()
- // {
- // @@Override public Object create(Random random)
- // {
- // // TODO: make nastier
- // if (random.nextBoolean())
- // {
- // // a token type
- // return StandardTokenizer.TOKEN_TYPES[random.nextInt(StandardTokenizer.TOKEN_TYPES.length)];
- // }
- // else
- // {
- // return TestUtil.randomSimpleString(random);
- // }
- // }
- // }
- // );
- // put(NormalizeCharMap.class, new ArgProducer()
- // {
- // @@Override public Object create(Random random)
- // {
- // NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
- // // we can't add duplicate keys, or NormalizeCharMap gets angry
- // Set<String> keys = new HashSet<>();
- // int num = random.nextInt(5);
- // //System.out.println("NormalizeCharMap=");
- // for (int i = 0; i < num; i++)
- // {
- // String key = TestUtil.randomSimpleString(random);
- // if (!keys.contains(key) && key.length() > 0)
- // {
- // String value = TestUtil.randomSimpleString(random);
- // builder.add(key, value);
- // keys.add(key);
- // //System.out.println("mapping: '" + key + "' => '" + value + "'");
- // }
- // }
- // return builder.build();
- // }
- // }
- // );
- // put(CharacterRunAutomaton.class, new ArgProducer()
- // {
- // @@Override public Object create(Random random)
- // {
- // // TODO: could probably use a purely random automaton
- // switch(random.nextInt(5))
- // {
- // case 0:
- // return MockTokenizer.KEYWORD;
- // case 1:
- // return MockTokenizer.SIMPLE;
- // case 2:
- // return MockTokenizer.WHITESPACE;
- // case 3:
- // return MockTokenFilter.EMPTY_STOPSET;
- // default:
- // return MockTokenFilter.ENGLISH_STOPSET;
- // }
- // }
- // }
- // );
- // put(CharArrayMap.class, new ArgProducer()
- // {
- // @@Override public Object create(Random random)
- // {
- // int num = random.nextInt(10);
- // CharArrayMap<String> map = new CharArrayMap<>(TEST_VERSION_CURRENT, num, random.nextBoolean());
- // for (int i = 0; i < num; i++)
- // {
- // // TODO: make nastier
- // map.put(TestUtil.randomSimpleString(random), TestUtil.randomSimpleString(random));
- // }
- // return map;
- // }
- // }
- // );
- // put(StemmerOverrideMap.class, new ArgProducer()
- // {
- // @@Override public Object create(Random random)
- // {
- // int num = random.nextInt(10);
- // StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder(random.nextBoolean());
- // for (int i = 0; i < num; i++)
- // {
- // String input = "";
- // do
- // {
- // input = TestUtil.randomRealisticUnicodeString(random);
- // } while(input.isEmpty());
- // String @out = "";
- // TestUtil.randomSimpleString(random);
- // do
- // {
- // @out = TestUtil.randomRealisticUnicodeString(random);
- // } while(@out.isEmpty());
- // builder.add(input, @out);
- // }
- // try
- // {
- // return builder.build();
- // }
- // catch (Exception ex)
- // {
- // Rethrow.rethrow(ex);
- // return null; // unreachable code
- // }
- // }
- // }
- // );
- // put(SynonymMap.class, new ArgProducer()
- // {
- // @@Override public Object create(Random random)
- // {
- // SynonymMap.Builder b = new SynonymMap.Builder(random.nextBoolean());
- // final int numEntries = atLeast(10);
- // for (int j = 0; j < numEntries; j++)
- // {
- // addSyn(b, randomNonEmptyString(random), randomNonEmptyString(random), random.nextBoolean());
- // }
- // try
- // {
- // return b.build();
- // }
- // catch (Exception ex)
- // {
- // Rethrow.rethrow(ex);
- // return null; // unreachable code
- // }
- // }
- //
- // private void addSyn(SynonymMap.Builder b, String input, String output, boolean keepOrig)
- // {
- // b.add(new CharsRef(input.replaceAll(" +", "\u0000")), new CharsRef(output.replaceAll(" +", "\u0000")), keepOrig);
- // }
- //
- // private String randomNonEmptyString(Random random)
- // {
- // while(true)
- // {
- // final String s = TestUtil.randomUnicodeString(random).trim();
- // if (s.length() != 0 && s.indexOf('\u0000') == -1)
- // {
- // return s;
- // }
- // }
- // }
- // }
- // );
- // }
- // }
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //
- // static final Set<Class> allowedTokenizerArgs, allowedTokenFilterArgs, allowedCharFilterArgs;
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //ignore
- //
- // @@SuppressWarnings("unchecked") static <T> T newRandomArg(Random random, Class<T> paramType)
- // {
- // final ArgProducer producer = argProducers.get(paramType);
- // assertNotNull("No producer for arguments of type " + paramType.getName() + " found", producer);
- // return (T) producer.create(random);
- // }
- //
- // static Object[] newTokenizerArgs(Random random, Reader reader, Class[] paramTypes)
- // {
- // Object[] args = new Object[paramTypes.length];
- // for (int i = 0; i < args.length; i++)
- // {
- // Class paramType = paramTypes[i];
- // if (paramType == Reader.class)
- // {
- // args[i] = reader;
- // }
- // else if (paramType == AttributeFactory.class)
- // {
- // // TODO: maybe the collator one...???
- // args[i] = AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY;
- // }
- // else if (paramType == AttributeSource.class)
- // {
- // // TODO: args[i] = new AttributeSource();
- // // this is currently too scary to deal with!
- // args[i] = null; // force IAE
- // }
- // else
- // {
- // args[i] = newRandomArg(random, paramType);
- // }
- // }
- // return args;
- // }
- //
- // static Object[] newCharFilterArgs(Random random, Reader reader, Class[] paramTypes)
- // {
- // Object[] args = new Object[paramTypes.length];
- // for (int i = 0; i < args.length; i++)
- // {
- // Class paramType = paramTypes[i];
- // if (paramType == Reader.class)
- // {
- // args[i] = reader;
- // }
- // else
- // {
- // args[i] = newRandomArg(random, paramType);
- // }
- // }
- // return args;
- // }
- //
- // static Object[] newFilterArgs(Random random, TokenStream stream, Class[] paramTypes)
- // {
- // Object[] args = new Object[paramTypes.length];
- // for (int i = 0; i < args.length; i++)
- // {
- // Class paramType = paramTypes[i];
- // if (paramType == TokenStream.class)
- // {
- // args[i] = stream;
- // }
- // else if (paramType == CommonGramsFilter.class)
- // {
- // // TODO: fix this one, thats broken: CommonGramsQueryFilter takes this one explicitly
- // args[i] = new CommonGramsFilter(TEST_VERSION_CURRENT, stream, newRandomArg(random, CharArraySet.class));
- // }
- // else
- // {
- // args[i] = newRandomArg(random, paramType);
- // }
- // }
- // return args;
- // }
- //
- // static class MockRandomAnalyzer extends Analyzer
- // {
- // final long seed;
- //
- // MockRandomAnalyzer(long seed)
- // {
- // this.seed = seed;
- // }
- //
- // public boolean offsetsAreCorrect()
- // {
- // // TODO: can we not do the full chain here!?
- // Random random = new Random(seed);
- // TokenizerSpec tokenizerSpec = newTokenizer(random, new StringReader(""));
- // TokenFilterSpec filterSpec = newFilterChain(random, tokenizerSpec.tokenizer, tokenizerSpec.offsetsAreCorrect);
- // return filterSpec.offsetsAreCorrect;
- // }
- //
- // @@Override protected TokenStreamComponents createComponents(String fieldName, Reader reader)
- // {
- // Random random = new Random(seed);
- // TokenizerSpec tokenizerSpec = newTokenizer(random, reader);
- // //System.out.println("seed=" + seed + ",create tokenizer=" + tokenizerSpec.toString);
- // TokenFilterSpec filterSpec = newFilterChain(random, tokenizerSpec.tokenizer, tokenizerSpec.offsetsAreCorrect);
- // //System.out.println("seed=" + seed + ",create filter=" + filterSpec.toString);
- // return new TokenStreamComponents(tokenizerSpec.tokenizer, filterSpec.stream);
- // }
- //
- // @@Override protected Reader initReader(String fieldName, Reader reader)
- // {
- // Random random = new Random(seed);
- // CharFilterSpec charfilterspec = newCharFilterChain(random, reader);
- // return charfilterspec.reader;
- // }
- //
- // @@Override public String toString()
- // {
- // Random random = new Random(seed);
- // StringBuilder sb = new StringBuilder();
- // CharFilterSpec charFilterSpec = newCharFilterChain(random, new StringReader(""));
- // sb.append("\ncharfilters=");
- // sb.append(charFilterSpec.toString);
- // // intentional: initReader gets its own separate random
- // random = new Random(seed);
- // TokenizerSpec tokenizerSpec = newTokenizer(random, charFilterSpec.reader);
- // sb.append("\n");
- // sb.append("tokenizer=");
- // sb.append(tokenizerSpec.toString);
- // TokenFilterSpec tokenFilterSpec = newFilterChain(random, tokenizerSpec.tokenizer, tokenizerSpec.offsetsAreCorrect);
- // sb.append("\n");
- // sb.append("filters=");
- // sb.append(tokenFilterSpec.toString);
- // sb.append("\n");
- // sb.append("offsetsAreCorrect=" + tokenFilterSpec.offsetsAreCorrect);
- // return sb.toString();
- // }
- //
- // private <T> T createComponent(Constructor<T> ctor, Object[] args, StringBuilder descr)
- // {
- // try
- // {
- // final T instance = ctor.newInstance(args);
- // /*
- // if (descr.length() > 0) {
- // descr.append(",");
- // }
- // */
- // descr.append("\n ");
- // descr.append(ctor.getDeclaringClass().getName());
- // String @params = Arrays.deepToString(args);
- // @params = @params.substring(1, (@params.length()-1) - 1);
- // descr.append("(").append(@params).append(")");
- // return instance;
- // }
- // catch (InvocationTargetException ite)
- // {
- // final Throwable cause = ite.getCause();
- // if (cause instanceof IllegalArgumentException || cause instanceof UnsupportedOperationException)
- // {
- // // thats ok, ignore
- // if (VERBOSE)
- // {
- // System.err.println("Ignoring IAE/UOE from ctor:");
- // cause.printStackTrace(System.err);
- // }
- // }
- // else
- // {
- // Rethrow.rethrow(cause);
- // }
- // }
- // catch (IllegalAccessException iae)
- // {
- // Rethrow.rethrow(iae);
- // }
- // catch (InstantiationException ie)
- // {
- // Rethrow.rethrow(ie);
- // }
- // return null; // no success
- // }
- //
- // private boolean broken(Constructor<?> ctor, Object[] args)
- // {
- // final Predicate<Object[]> pred = brokenConstructors.get(ctor);
- // return pred != null && pred.apply(args);
- // }
- //
- // private boolean brokenOffsets(Constructor<?> ctor, Object[] args)
- // {
- // final Predicate<Object[]> pred = brokenOffsetsConstructors.get(ctor);
- // return pred != null && pred.apply(args);
- // }
- //
- // // create a new random tokenizer from classpath
- // private TokenizerSpec newTokenizer(Random random, Reader reader)
- // {
- // TokenizerSpec spec = new TokenizerSpec();
- // while (spec.tokenizer == null)
- // {
- // final Constructor<? extends Tokenizer> ctor = tokenizers.get(random.nextInt(tokenizers.size()));
- // final StringBuilder descr = new StringBuilder();
- // final CheckThatYouDidntReadAnythingReaderWrapper wrapper = new CheckThatYouDidntReadAnythingReaderWrapper(reader);
- // final Object args[] = newTokenizerArgs(random, wrapper, ctor.getParameterTypes());
- // if (broken(ctor, args))
- // {
- // continue;
- // }
- // spec.tokenizer = createComponent(ctor, args, descr);
- // if (spec.tokenizer != null)
- // {
- // spec.offsetsAreCorrect &= !brokenOffsets(ctor, args);
- // spec.toString = descr.toString();
- // }
- // else
- // {
- // assertFalse(ctor.getDeclaringClass().getName() + " has read something in ctor but failed with UOE/IAE", wrapper.readSomething);
- // }
- // }
- // return spec;
- // }
- //
- // private CharFilterSpec newCharFilterChain(Random random, Reader reader)
- // {
- // CharFilterSpec spec = new CharFilterSpec();
- // spec.reader = reader;
- // StringBuilder descr = new StringBuilder();
- // int numFilters = random.nextInt(3);
- // for (int i = 0; i < numFilters; i++)
- // {
- // while (true)
- // {
- // final Constructor<? extends CharFilter> ctor = charfilters.get(random.nextInt(charfilters.size()));
- // final Object args[] = newCharFilterArgs(random, spec.reader, ctor.getParameterTypes());
- // if (broken(ctor, args))
- // {
- // continue;
- // }
- // reader = createComponent(ctor, args, descr);
- // if (reader != null)
- // {
- // spec.reader = reader;
- // break;
- // }
- // }
- // }
- // spec.toString = descr.toString();
- // return spec;
- // }
- //
- // private TokenFilterSpec newFilterChain(Random random, Tokenizer tokenizer, boolean offsetsAreCorrect)
- // {
- // TokenFilterSpec spec = new TokenFilterSpec();
- // spec.offsetsAreCorrect = offsetsAreCorrect;
- // spec.stream = tokenizer;
- // StringBuilder descr = new StringBuilder();
- // int numFilters = random.nextInt(5);
- // for (int i = 0; i < numFilters; i++)
- // {
- //
- // // Insert ValidatingTF after each stage so we can
- // // catch problems right after the TF that "caused"
- // // them:
- // spec.stream = new ValidatingTokenFilter(spec.stream, "stage " + i, spec.offsetsAreCorrect);
- //
- // while (true)
- // {
- // final Constructor<? extends TokenFilter> ctor = tokenfilters.get(random.nextInt(tokenfilters.size()));
- //
- // // hack: MockGraph/MockLookahead has assertions that will trip if they follow
- // // an offsets violator. so we cant use them after e.g. wikipediatokenizer
- // if (!spec.offsetsAreCorrect && (ctor.getDeclaringClass().equals(MockGraphTokenFilter.class) || ctor.getDeclaringClass().equals(MockRandomLookaheadTokenFilter.class)))
- // {
- // continue;
- // }
- //
- // final Object args[] = newFilterArgs(random, spec.stream, ctor.getParameterTypes());
- // if (broken(ctor, args))
- // {
- // continue;
- // }
- // final TokenFilter flt = createComponent(ctor, args, descr);
- // if (flt != null)
- // {
- // spec.offsetsAreCorrect &= !brokenOffsets(ctor, args);
- // spec.stream = flt;
- // break;
- // }
- // }
- // }
- //
- // // Insert ValidatingTF after each stage so we can
- // // catch problems right after the TF that "caused"
- // // them:
- // spec.stream = new ValidatingTokenFilter(spec.stream, "last stage", spec.offsetsAreCorrect);
- //
- // spec.toString = descr.toString();
- // return spec;
- // }
- // }
- //
- // static class CheckThatYouDidntReadAnythingReaderWrapper extends CharFilter
- // {
- // boolean readSomething;
- //
- // CheckThatYouDidntReadAnythingReaderWrapper(Reader @in)
- // {
- // base(@in);
- // }
- //
- // @@Override public int correct(int currentOff)
- // {
- // return currentOff; // we don't change any offsets
- // }
- //
- // @@Override public int read(char[] cbuf, int off, int len) throws IOException
- // {
- // readSomething = true;
- // return input.read(cbuf, off, len);
- // }
- //
- // @@Override public int read() throws IOException
- // {
- // readSomething = true;
- // return input.read();
- // }
- //
- // @@Override public int read(CharBuffer target) throws IOException
- // {
- // readSomething = true;
- // return input.read(target);
- // }
- //
- // @@Override public int read(char[] cbuf) throws IOException
- // {
- // readSomething = true;
- // return input.read(cbuf);
- // }
- //
- // @@Override public long skip(long n) throws IOException
- // {
- // readSomething = true;
- // return input.skip(n);
- // }
- //
- // @@Override public void mark(int readAheadLimit) throws IOException
- // {
- // input.mark(readAheadLimit);
- // }
- //
- // @@Override public boolean markSupported()
- // {
- // return input.markSupported();
- // }
- //
- // @@Override public boolean ready() throws IOException
- // {
- // return input.ready();
- // }
- //
- // @@Override public void reset() throws IOException
- // {
- // input.reset();
- // }
- // }
- //
- // static class TokenizerSpec
- // {
- // Tokenizer tokenizer;
- // String toString;
- // boolean offsetsAreCorrect = true;
- // }
- //
- // static class TokenFilterSpec
- // {
- // TokenStream stream;
- // String toString;
- // boolean offsetsAreCorrect = true;
- // }
- //
- // static class CharFilterSpec
- // {
- // Reader reader;
- // String toString;
- // }
- //
- // public void testRandomChains() throws Throwable
- // {
- // int numIterations = atLeast(20);
- // Random random = random();
- // for (int i = 0; i < numIterations; i++)
- // {
- // MockRandomAnalyzer a = new MockRandomAnalyzer(random.nextLong());
- // if (VERBOSE)
- // {
- // System.out.println("Creating random analyzer:" + a);
- // }
- // try
- // {
- // checkRandomData(random, a, 500*RANDOM_MULTIPLIER, 20, false, false); // We already validate our own offsets...
- // }
- // catch (Throwable e)
- // {
- // System.err.println("Exception from random analyzer: " + a);
- // throw e;
- // }
- // }
- // }
-
<TRUNCATED>