You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by ni...@apache.org on 2017/02/28 12:29:54 UTC
[03/20] lucenenet git commit: Lucene.Net.TestFramework: Implemented
MockRandomPostingsFormat
Lucene.Net.TestFramework: Implemented MockRandomPostingsFormat
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/28efcab9
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/28efcab9
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/28efcab9
Branch: refs/heads/api-work
Commit: 28efcab967597f1cc79e88bfbb5736254adb1679
Parents: 5107574
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Mon Feb 27 23:14:22 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Mon Feb 27 23:20:00 2017 +0700
----------------------------------------------------------------------
.../MockRandom/MockRandomPostingsFormat.cs | 577 +++++++++++++++++++
.../Lucene.Net.TestFramework.csproj | 1 +
.../Util/TestRuleSetupAndRestoreClassEnv.cs | 10 +-
3 files changed, 583 insertions(+), 5 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/28efcab9/src/Lucene.Net.TestFramework/Codecs/MockRandom/MockRandomPostingsFormat.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.TestFramework/Codecs/MockRandom/MockRandomPostingsFormat.cs b/src/Lucene.Net.TestFramework/Codecs/MockRandom/MockRandomPostingsFormat.cs
new file mode 100644
index 0000000..d2f39c5
--- /dev/null
+++ b/src/Lucene.Net.TestFramework/Codecs/MockRandom/MockRandomPostingsFormat.cs
@@ -0,0 +1,577 @@
+\ufeffusing Lucene.Net.Codecs.BlockTerms;
+using Lucene.Net.Codecs.Lucene41;
+using Lucene.Net.Codecs.Memory;
+using Lucene.Net.Codecs.MockIntBlock;
+using Lucene.Net.Codecs.MockSep;
+using Lucene.Net.Codecs.Pulsing;
+using Lucene.Net.Codecs.Sep;
+using Lucene.Net.Index;
+using Lucene.Net.Store;
+using Lucene.Net.Util;
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+
+namespace Lucene.Net.Codecs.MockRandom
+{
+ /// <summary>
+ /// Randomly combines terms index impl w/ postings impls.
+ /// </summary>
+ [PostingsFormatName("MockRandom")]
+ public sealed class MockRandomPostingsFormat : PostingsFormat
+ {
+ private readonly Random seedRandom;
+ private readonly string SEED_EXT = "sd";
+
+ private class RandomAnonymousClassHelper : Random
+ {
+ public RandomAnonymousClassHelper()
+ : base(0)
+ {
+ }
+
+ public override int Next(int maxValue)
+ {
+ throw new InvalidOperationException("Please use MockRandomPostingsFormat(Random)");
+ }
+ }
+
+ public MockRandomPostingsFormat()
+ : this(null)
+ {
+ // This ctor should *only* be used at read-time: get NPE if you use it!
+ }
+
+ public MockRandomPostingsFormat(Random random)
+ : base()
+ {
+ if (random == null)
+ {
+ this.seedRandom = new RandomAnonymousClassHelper();
+ // this.seedRandom = new Random(0) {
+
+ // protected override int Next(int arg0)
+ // {
+ // throw new IllegalStateException("Please use MockRandomPostingsFormat(Random)");
+ // }
+ //};
+ }
+ else
+ {
+ this.seedRandom = new Random(random.Next());
+ }
+ }
+
+ // Chooses random IntStreamFactory depending on file's extension
+ private class MockInt32StreamFactory : Int32StreamFactory
+ {
+ private readonly int salt;
+ private readonly IList<Int32StreamFactory> delegates = new List<Int32StreamFactory>();
+
+ public MockInt32StreamFactory(Random random)
+ {
+ salt = random.nextInt();
+ delegates.Add(new MockSingleIntFactory());
+ int blockSize = TestUtil.NextInt(random, 1, 2000);
+ delegates.Add(new MockFixedIntBlockPostingsFormat.MockIntFactory(blockSize));
+ int baseBlockSize = TestUtil.NextInt(random, 1, 127);
+ delegates.Add(new MockVariableIntBlockPostingsFormat.MockIntFactory(baseBlockSize));
+ // TODO: others
+ }
+
+ private static String getExtension(String fileName)
+ {
+ int idx = fileName.IndexOf('.');
+ Debug.Assert(idx != -1);
+ return fileName.Substring(idx);
+ }
+
+
+ public override Int32IndexInput OpenInput(Directory dir, string fileName, IOContext context)
+ {
+ // Must only use extension, because IW.addIndexes can
+ // rename segment!
+ Int32StreamFactory f = delegates[(Math.Abs(salt ^ getExtension(fileName).GetHashCode())) % delegates.size()];
+ if (LuceneTestCase.VERBOSE)
+ {
+ Console.WriteLine("MockRandomCodec: read using int factory " + f + " from fileName=" + fileName);
+ }
+ return f.OpenInput(dir, fileName, context);
+ }
+
+ public override Int32IndexOutput CreateOutput(Directory dir, string fileName, IOContext context)
+ {
+ Int32StreamFactory f = delegates[(Math.Abs(salt ^ getExtension(fileName).GetHashCode())) % delegates.size()];
+ if (LuceneTestCase.VERBOSE)
+ {
+ Console.WriteLine("MockRandomCodec: write using int factory " + f + " to fileName=" + fileName);
+ }
+ return f.CreateOutput(dir, fileName, context);
+ }
+ }
+
+ private class IndexTermSelectorAnonymousHelper : VariableGapTermsIndexWriter.IndexTermSelector
+ {
+ private readonly Random rand;
+ private readonly int gap;
+ public IndexTermSelectorAnonymousHelper(int seed, int gap)
+ {
+ rand = new Random(seed);
+ this.gap = gap;
+ }
+ public override bool IsIndexTerm(BytesRef term, TermStats stats)
+ {
+ return rand.Next(gap) == gap / 2;
+ }
+
+ public override void NewField(FieldInfo fieldInfo)
+ {
+ }
+ }
+
+ public override FieldsConsumer FieldsConsumer(SegmentWriteState state)
+ {
+ int minSkipInterval;
+ if (state.SegmentInfo.DocCount > 1000000)
+ {
+ // Test2BPostings can OOME otherwise:
+ minSkipInterval = 3;
+ }
+ else
+ {
+ minSkipInterval = 2;
+ }
+
+ // we pull this before the seed intentionally: because its not consumed at runtime
+ // (the skipInterval is written into postings header)
+ int skipInterval = TestUtil.NextInt(seedRandom, minSkipInterval, 10);
+
+ if (LuceneTestCase.VERBOSE)
+ {
+ Console.WriteLine("MockRandomCodec: skipInterval=" + skipInterval);
+ }
+
+ long seed = seedRandom.nextLong();
+
+ if (LuceneTestCase.VERBOSE)
+ {
+ Console.WriteLine("MockRandomCodec: writing to seg=" + state.SegmentInfo.Name + " formatID=" + state.SegmentSuffix + " seed=" + seed);
+ }
+
+ string seedFileName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, SEED_EXT);
+ IndexOutput @out = state.Directory.CreateOutput(seedFileName, state.Context);
+ try
+ {
+ @out.WriteInt64(seed);
+ }
+ finally
+ {
+ @out.Dispose();
+ }
+
+ Random random = new Random((int)seed);
+
+ random.nextInt(); // consume a random for buffersize
+
+ PostingsWriterBase postingsWriter;
+ if (random.nextBoolean())
+ {
+ postingsWriter = new SepPostingsWriter(state, new MockInt32StreamFactory(random), skipInterval);
+ }
+ else
+ {
+ if (LuceneTestCase.VERBOSE)
+ {
+ Console.WriteLine("MockRandomCodec: writing Standard postings");
+ }
+ // TODO: randomize variables like acceptibleOverHead?!
+ postingsWriter = new Lucene41PostingsWriter(state, skipInterval);
+ }
+
+ if (random.nextBoolean())
+ {
+ int totTFCutoff = TestUtil.NextInt(random, 1, 20);
+ if (LuceneTestCase.VERBOSE)
+ {
+ Console.WriteLine("MockRandomCodec: writing pulsing postings with totTFCutoff=" + totTFCutoff);
+ }
+ postingsWriter = new PulsingPostingsWriter(state, totTFCutoff, postingsWriter);
+ }
+
+ FieldsConsumer fields;
+ int t1 = random.nextInt(4);
+
+ if (t1 == 0)
+ {
+ bool success = false;
+ try
+ {
+ fields = new FSTTermsWriter(state, postingsWriter);
+ success = true;
+ }
+ finally
+ {
+ if (!success)
+ {
+ postingsWriter.Dispose();
+ }
+ }
+ }
+ else if (t1 == 1)
+ {
+ bool success = false;
+ try
+ {
+ fields = new FSTOrdTermsWriter(state, postingsWriter);
+ success = true;
+ }
+ finally
+ {
+ if (!success)
+ {
+ postingsWriter.Dispose();
+ }
+ }
+ }
+ else if (t1 == 2)
+ {
+ // Use BlockTree terms dict
+
+ if (LuceneTestCase.VERBOSE)
+ {
+ Console.WriteLine("MockRandomCodec: writing BlockTree terms dict");
+ }
+
+ // TODO: would be nice to allow 1 but this is very
+ // slow to write
+ int minTermsInBlock = TestUtil.NextInt(random, 2, 100);
+ int maxTermsInBlock = Math.Max(2, (minTermsInBlock - 1) * 2 + random.nextInt(100));
+
+ bool success = false;
+ try
+ {
+ fields = new BlockTreeTermsWriter(state, postingsWriter, minTermsInBlock, maxTermsInBlock);
+ success = true;
+ }
+ finally
+ {
+ if (!success)
+ {
+ postingsWriter.Dispose();
+ }
+ }
+ }
+ else
+ {
+
+ if (LuceneTestCase.VERBOSE)
+ {
+ Console.WriteLine("MockRandomCodec: writing Block terms dict");
+ }
+
+ bool success = false;
+
+ TermsIndexWriterBase indexWriter;
+ try
+ {
+ if (random.nextBoolean())
+ {
+ state.TermIndexInterval = TestUtil.NextInt(random, 1, 100);
+ if (LuceneTestCase.VERBOSE)
+ {
+ Console.WriteLine("MockRandomCodec: fixed-gap terms index (tii=" + state.TermIndexInterval + ")");
+ }
+ indexWriter = new FixedGapTermsIndexWriter(state);
+ }
+ else
+ {
+ VariableGapTermsIndexWriter.IndexTermSelector selector;
+ int n2 = random.nextInt(3);
+ if (n2 == 0)
+ {
+ int tii = TestUtil.NextInt(random, 1, 100);
+ selector = new VariableGapTermsIndexWriter.EveryNTermSelector(tii);
+ if (LuceneTestCase.VERBOSE)
+ {
+ Console.WriteLine("MockRandomCodec: variable-gap terms index (tii=" + tii + ")");
+ }
+ }
+ else if (n2 == 1)
+ {
+ int docFreqThresh = TestUtil.NextInt(random, 2, 100);
+ int tii = TestUtil.NextInt(random, 1, 100);
+ selector = new VariableGapTermsIndexWriter.EveryNOrDocFreqTermSelector(docFreqThresh, tii);
+ }
+ else
+ {
+ int seed2 = random.Next();
+ int gap = TestUtil.NextInt(random, 2, 40);
+ if (LuceneTestCase.VERBOSE)
+ {
+ Console.WriteLine("MockRandomCodec: random-gap terms index (max gap=" + gap + ")");
+ }
+ selector = new IndexTermSelectorAnonymousHelper(seed2, gap);
+
+ // selector = new VariableGapTermsIndexWriter.IndexTermSelector() {
+ // Random rand = new Random(seed2);
+
+ //@Override
+ // public bool isIndexTerm(BytesRef term, TermStats stats)
+ //{
+ // return rand.nextInt(gap) == gap / 2;
+ //}
+
+ //@Override
+ // public void newField(FieldInfo fieldInfo)
+ //{
+ //}
+ // };
+ }
+ indexWriter = new VariableGapTermsIndexWriter(state, selector);
+ }
+ success = true;
+ }
+ finally
+ {
+ if (!success)
+ {
+ postingsWriter.Dispose();
+ }
+ }
+
+ success = false;
+ try
+ {
+ fields = new BlockTermsWriter(indexWriter, state, postingsWriter);
+ success = true;
+ }
+ finally
+ {
+ if (!success)
+ {
+ try
+ {
+ postingsWriter.Dispose();
+ }
+ finally
+ {
+ indexWriter.Dispose();
+ }
+ }
+ }
+ }
+
+ return fields;
+ }
+
+ public override FieldsProducer FieldsProducer(SegmentReadState state)
+ {
+
+ string seedFileName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, SEED_EXT);
+ IndexInput @in = state.Directory.OpenInput(seedFileName, state.Context);
+ long seed = @in.ReadInt64();
+ if (LuceneTestCase.VERBOSE)
+ {
+ Console.WriteLine("MockRandomCodec: reading from seg=" + state.SegmentInfo.Name + " formatID=" + state.SegmentSuffix + " seed=" + seed);
+ }
+ @in.Dispose();
+
+ Random random = new Random((int)seed);
+
+ int readBufferSize = TestUtil.NextInt(random, 1, 4096);
+ if (LuceneTestCase.VERBOSE)
+ {
+ Console.WriteLine("MockRandomCodec: readBufferSize=" + readBufferSize);
+ }
+
+ PostingsReaderBase postingsReader;
+
+ if (random.nextBoolean())
+ {
+ if (LuceneTestCase.VERBOSE)
+ {
+ Console.WriteLine("MockRandomCodec: reading Sep postings");
+ }
+ postingsReader = new SepPostingsReader(state.Directory, state.FieldInfos, state.SegmentInfo,
+ state.Context, new MockInt32StreamFactory(random), state.SegmentSuffix);
+ }
+ else
+ {
+ if (LuceneTestCase.VERBOSE)
+ {
+ Console.WriteLine("MockRandomCodec: reading Standard postings");
+ }
+ postingsReader = new Lucene41PostingsReader(state.Directory, state.FieldInfos, state.SegmentInfo, state.Context, state.SegmentSuffix);
+ }
+
+ if (random.nextBoolean())
+ {
+ int totTFCutoff = TestUtil.NextInt(random, 1, 20);
+ if (LuceneTestCase.VERBOSE)
+ {
+ Console.WriteLine("MockRandomCodec: reading pulsing postings with totTFCutoff=" + totTFCutoff);
+ }
+ postingsReader = new PulsingPostingsReader(state, postingsReader);
+ }
+
+ FieldsProducer fields;
+ int t1 = random.nextInt(4);
+ if (t1 == 0)
+ {
+ bool success = false;
+ try
+ {
+ fields = new FSTTermsReader(state, postingsReader);
+ success = true;
+ }
+ finally
+ {
+ if (!success)
+ {
+ postingsReader.Dispose();
+ }
+ }
+ }
+ else if (t1 == 1)
+ {
+ bool success = false;
+ try
+ {
+ fields = new FSTOrdTermsReader(state, postingsReader);
+ success = true;
+ }
+ finally
+ {
+ if (!success)
+ {
+ postingsReader.Dispose();
+ }
+ }
+ }
+ else if (t1 == 2)
+ {
+ // Use BlockTree terms dict
+ if (LuceneTestCase.VERBOSE)
+ {
+ Console.WriteLine("MockRandomCodec: reading BlockTree terms dict");
+ }
+
+ bool success = false;
+ try
+ {
+ fields = new BlockTreeTermsReader(state.Directory,
+ state.FieldInfos,
+ state.SegmentInfo,
+ postingsReader,
+ state.Context,
+ state.SegmentSuffix,
+ state.TermsIndexDivisor);
+ success = true;
+ }
+ finally
+ {
+ if (!success)
+ {
+ postingsReader.Dispose();
+ }
+ }
+ }
+ else
+ {
+
+ if (LuceneTestCase.VERBOSE)
+ {
+ Console.WriteLine("MockRandomCodec: reading Block terms dict");
+ }
+ TermsIndexReaderBase indexReader;
+ bool success = false;
+ try
+ {
+ bool doFixedGap = random.nextBoolean();
+
+ // randomness diverges from writer, here:
+ if (state.TermsIndexDivisor != -1)
+ {
+ state.TermsIndexDivisor = TestUtil.NextInt(random, 1, 10);
+ }
+
+ if (doFixedGap)
+ {
+ // if termsIndexDivisor is set to -1, we should not touch it. It means a
+ // test explicitly instructed not to load the terms index.
+ if (LuceneTestCase.VERBOSE)
+ {
+ Console.WriteLine("MockRandomCodec: fixed-gap terms index (divisor=" + state.TermsIndexDivisor + ")");
+ }
+ indexReader = new FixedGapTermsIndexReader(state.Directory,
+ state.FieldInfos,
+ state.SegmentInfo.Name,
+ state.TermsIndexDivisor,
+ BytesRef.UTF8SortedAsUnicodeComparer,
+ state.SegmentSuffix, state.Context);
+ }
+ else
+ {
+ int n2 = random.nextInt(3);
+ if (n2 == 1)
+ {
+ random.nextInt();
+ }
+ else if (n2 == 2)
+ {
+ random.nextLong();
+ }
+ if (LuceneTestCase.VERBOSE)
+ {
+ Console.WriteLine("MockRandomCodec: variable-gap terms index (divisor=" + state.TermsIndexDivisor + ")");
+ }
+ indexReader = new VariableGapTermsIndexReader(state.Directory,
+ state.FieldInfos,
+ state.SegmentInfo.Name,
+ state.TermsIndexDivisor,
+ state.SegmentSuffix, state.Context);
+
+ }
+
+ success = true;
+ }
+ finally
+ {
+ if (!success)
+ {
+ postingsReader.Dispose();
+ }
+ }
+
+ success = false;
+ try
+ {
+ fields = new BlockTermsReader(indexReader,
+ state.Directory,
+ state.FieldInfos,
+ state.SegmentInfo,
+ postingsReader,
+ state.Context,
+ state.SegmentSuffix);
+ success = true;
+ }
+ finally
+ {
+ if (!success)
+ {
+ try
+ {
+ postingsReader.Dispose();
+ }
+ finally
+ {
+ indexReader.Dispose();
+ }
+ }
+ }
+ }
+
+ return fields;
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/28efcab9/src/Lucene.Net.TestFramework/Lucene.Net.TestFramework.csproj
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.TestFramework/Lucene.Net.TestFramework.csproj b/src/Lucene.Net.TestFramework/Lucene.Net.TestFramework.csproj
index f008111..c972f03 100644
--- a/src/Lucene.Net.TestFramework/Lucene.Net.TestFramework.csproj
+++ b/src/Lucene.Net.TestFramework/Lucene.Net.TestFramework.csproj
@@ -252,6 +252,7 @@
</Compile>
<Compile Include="Codecs\MockIntBlock\MockFixedIntBlockPostingsFormat.cs" />
<Compile Include="Codecs\MockIntBlock\MockVariableIntBlockPostingsFormat.cs" />
+ <Compile Include="Codecs\MockRandom\MockRandomPostingsFormat.cs" />
<Compile Include="Codecs\MockSep\MockSepPostingsFormat.cs" />
<Compile Include="Codecs\MockSep\MockSingleIntFactory.cs" />
<Compile Include="Codecs\MockSep\MockSingleIntIndexInput.cs" />
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/28efcab9/src/Lucene.Net.TestFramework/Util/TestRuleSetupAndRestoreClassEnv.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.TestFramework/Util/TestRuleSetupAndRestoreClassEnv.cs b/src/Lucene.Net.TestFramework/Util/TestRuleSetupAndRestoreClassEnv.cs
index 797d974..0a34665 100644
--- a/src/Lucene.Net.TestFramework/Util/TestRuleSetupAndRestoreClassEnv.cs
+++ b/src/Lucene.Net.TestFramework/Util/TestRuleSetupAndRestoreClassEnv.cs
@@ -14,6 +14,7 @@ namespace Lucene.Net.Util
using Codecs.Lucene41;
using Codecs.Lucene42;
using Codecs.Lucene45;
+ using Codecs.MockRandom;
using Codecs.SimpleText;
using JavaCompatibility;
//using AssumptionViolatedException = org.junit.@internal.AssumptionViolatedException;
@@ -195,11 +196,10 @@ namespace Lucene.Net.Util
{
format = PostingsFormat.ForName("Lucene41");
}
- // LUCENENET TODO: Implement MockRandomPostingsFormat
- //else if ("MockRandom".Equals(LuceneTestCase.TEST_POSTINGSFORMAT))
- //{
- // format = new MockRandomPostingsFormat(new Random(random.Next()));
- //}
+ else if ("MockRandom".Equals(LuceneTestCase.TEST_POSTINGSFORMAT))
+ {
+ format = new MockRandomPostingsFormat(new Random(random.Next()));
+ }
else
{
format = PostingsFormat.ForName(LuceneTestCase.TEST_POSTINGSFORMAT);