You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by sy...@apache.org on 2014/04/08 00:08:28 UTC
[1/3] git commit: Various
Repository: lucenenet
Updated Branches:
refs/heads/branch_4x da25f85ac -> f1fbbd9f1
Various
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/5ecbe926
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/5ecbe926
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/5ecbe926
Branch: refs/heads/branch_4x
Commit: 5ecbe9260500686e9ccf53849e1a930076007b44
Parents: da25f85
Author: synhershko <it...@code972.com>
Authored: Tue Apr 8 01:07:23 2014 +0300
Committer: synhershko <it...@code972.com>
Committed: Tue Apr 8 01:07:23 2014 +0300
----------------------------------------------------------------------
src/core/Support/Character.cs | 19 +++++
test/core/Document/TestDocument.cs | 2 +-
test/test-framework/LuceneTestCase.cs | 76 ++++++++++++--------
.../Randomized/RandomizedContext.cs | 4 +-
4 files changed, 66 insertions(+), 35 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/5ecbe926/src/core/Support/Character.cs
----------------------------------------------------------------------
diff --git a/src/core/Support/Character.cs b/src/core/Support/Character.cs
index 3575b7f..d870f38 100644
--- a/src/core/Support/Character.cs
+++ b/src/core/Support/Character.cs
@@ -45,6 +45,8 @@ namespace Lucene.Net.Support
public const char MIN_HIGH_SURROGATE = '\uD800';
public const char MAX_HIGH_SURROGATE = '\uDBFF';
+ public static int MIN_SUPPLEMENTARY_CODE_POINT = 0x010000;
+
/// <summary>
///
/// </summary>
@@ -80,6 +82,23 @@ namespace Lucene.Net.Support
return 1; // always 1 char written in .NET
}
+ public static char[] ToChars(int codePoint)
+ {
+ // .NET Port: we don't have to do anything funky with surrogates here. chars are always UTF-16.
+ return new[] {(char)codePoint};
+ }
+
+ public static int ToCodePoint(char high, char low)
+ {
+ // Optimized form of:
+ // return ((high - MIN_HIGH_SURROGATE) << 10)
+ // + (low - MIN_LOW_SURROGATE)
+ // + MIN_SUPPLEMENTARY_CODE_POINT;
+ return ((high << 10) + low) + (MIN_SUPPLEMENTARY_CODE_POINT
+ - (MIN_HIGH_SURROGATE << 10)
+ - MIN_LOW_SURROGATE);
+ }
+
public static int ToLowerCase(int codePoint)
{
// .NET Port: chars are always UTF-16 in .NET
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/5ecbe926/test/core/Document/TestDocument.cs
----------------------------------------------------------------------
diff --git a/test/core/Document/TestDocument.cs b/test/core/Document/TestDocument.cs
index f232b9a..bf99159 100644
--- a/test/core/Document/TestDocument.cs
+++ b/test/core/Document/TestDocument.cs
@@ -148,7 +148,7 @@ namespace Lucene.Net.Documents
/// </summary>
/// <throws> Exception on error </throws>
[Test]
- public virtual void TestGetValuesForNewDocument()
+ public virtual void testGetValuesForNewDocument()
{
doAssert(makeDocumentWithFields(), false);
}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/5ecbe926/test/test-framework/LuceneTestCase.cs
----------------------------------------------------------------------
diff --git a/test/test-framework/LuceneTestCase.cs b/test/test-framework/LuceneTestCase.cs
index 9d14d52..dc0bcd4 100644
--- a/test/test-framework/LuceneTestCase.cs
+++ b/test/test-framework/LuceneTestCase.cs
@@ -16,6 +16,9 @@
*/
using System;
+using Lucene.Net.Analysis;
+using Lucene.Net.Index;
+using Lucene.Net.Randomized;
using Lucene.Net.Util;
using NUnit.Framework;
@@ -27,6 +30,7 @@ using System.Collections.Generic;
using Lucene.Net.Search;
using Lucene.Net.TestFramework;
+using Version = Lucene.Net.Util.Version;
namespace Lucene.Net
{
@@ -69,7 +73,7 @@ namespace Lucene.Net
private const string SYSPROP_FAILFAST = "tests.failfast";
-
+
public static readonly Util.Version TEST_VERSION_CURRENT = Util.Version.LUCENE_43;
@@ -85,17 +89,20 @@ namespace Lucene.Net
public static readonly string TEST_CODEC = SystemProperties.GetProperty("tests.codec", "random");
- public static readonly string TEST_DOCVALUESFORMAT = SystemProperties.GetProperty("tests.docvaluesformat", "random");
+ public static readonly string TEST_DOCVALUESFORMAT = SystemProperties.GetProperty("tests.docvaluesformat",
+ "random");
public static readonly string TEST_DIRECTORY = SystemProperties.GetProperty("tests.directory", "random");
- public static readonly string TEST_LINE_DOCS_FILE = SystemProperties.GetProperty("tests.linedocsfile", DEFAULT_LINE_DOCS_FILE);
+ public static readonly string TEST_LINE_DOCS_FILE = SystemProperties.GetProperty("tests.linedocsfile",
+ DEFAULT_LINE_DOCS_FILE);
public static readonly bool TEST_NIGHTLY = RandomizedTest.SystemPropertyAsBoolean(NightlyAttribute.KEY, false);
public static readonly bool TEST_WEEKLY = RandomizedTest.SystemPropertyAsBoolean(WeeklyAttribute.KEY, false);
- public static readonly bool TEST_AWAITSFIX = RandomizedTest.SystemPropertyAsBoolean(AwaitsFixAttribute.KEY, false);
+ public static readonly bool TEST_AWAITSFIX = RandomizedTest.SystemPropertyAsBoolean(AwaitsFixAttribute.KEY,
+ false);
public static readonly bool TEST_SLOW = RandomizedTest.SystemPropertyAsBoolean(SlowAttribute.KEY, false);
@@ -107,7 +114,8 @@ namespace Lucene.Net
{
String s = SystemProperties.GetProperty("tempDir", System.IO.Path.GetTempPath());
if (s == null)
- throw new SystemException("To run tests, you need to define system property 'tempDir' or 'java.io.tmpdir'.");
+ throw new SystemException(
+ "To run tests, you need to define system property 'tempDir' or 'java.io.tmpdir'.");
TEMP_DIR = new System.IO.DirectoryInfo(s);
if (!TEMP_DIR.Exists) TEMP_DIR.Create();
@@ -115,18 +123,20 @@ namespace Lucene.Net
CORE_DIRECTORIES = new List<string>(FS_DIRECTORIES);
CORE_DIRECTORIES.Add("RAMDirectory");
-
+
}
- private static readonly string[] IGNORED_INVARIANT_PROPERTIES = {
+ private static readonly string[] IGNORED_INVARIANT_PROPERTIES =
+ {
"user.timezone", "java.rmi.server.randomIDs"
};
- private static readonly IList<String> FS_DIRECTORIES = new[] {
- "SimpleFSDirectory",
- "NIOFSDirectory",
- "MMapDirectory"
- };
+ private static readonly IList<String> FS_DIRECTORIES = new[]
+ {
+ "SimpleFSDirectory",
+ "NIOFSDirectory",
+ "MMapDirectory"
+ };
private static readonly IList<String> CORE_DIRECTORIES;
@@ -136,17 +146,18 @@ namespace Lucene.Net
// CORE_DIRECTORIES.add("RAMDirectory");
//};
- protected static readonly ISet<String> doesntSupportOffsets = new HashSet<String>(new[] {
- "Lucene3x",
- "MockFixedIntBlock",
- "MockVariableIntBlock",
- "MockSep",
- "MockRandom"
- });
+ protected static readonly ISet<String> doesntSupportOffsets = new HashSet<String>(new[]
+ {
+ "Lucene3x",
+ "MockFixedIntBlock",
+ "MockVariableIntBlock",
+ "MockSep",
+ "MockRandom"
+ });
public void Test()
{
-
+
}
public static bool PREFLEX_IMPERSONATION_IS_ACTIVE;
@@ -160,7 +171,7 @@ namespace Lucene.Net
//internal static readonly TestRuleIgnoreAfterMaxFailures ignoreAfterMaxFailures;
- private const long STATIC_LEAK_THRESHOLD = 10 * 1024 * 1024;
+ private const long STATIC_LEAK_THRESHOLD = 10*1024*1024;
//private static readonly ISet<String> STATIC_LEAK_IGNORED_TYPES =
// new HashSet<String>(new[] {
@@ -223,7 +234,7 @@ namespace Lucene.Net
{
}
-
+
public LuceneTestCase(System.String name)
{
@@ -317,7 +328,8 @@ namespace Lucene.Net
catch (System.SystemException e)
{
System.IO.StreamWriter temp_writer;
- temp_writer = new System.IO.StreamWriter(System.Console.OpenStandardError(), System.Console.Error.Encoding);
+ temp_writer = new System.IO.StreamWriter(System.Console.OpenStandardError(),
+ System.Console.Error.Encoding);
temp_writer.AutoFlush = true;
DumpArray(msg + ": FieldCache", entries, temp_writer);
throw e;
@@ -334,7 +346,8 @@ namespace Lucene.Net
if (null != insanity)
{
System.IO.StreamWriter temp_writer2;
- temp_writer2 = new System.IO.StreamWriter(System.Console.OpenStandardError(), System.Console.Error.Encoding);
+ temp_writer2 = new System.IO.StreamWriter(System.Console.OpenStandardError(),
+ System.Console.Error.Encoding);
temp_writer2.AutoFlush = true;
DumpArray(msg + ": Insane FieldCache usage(s)", insanity, temp_writer2);
}
@@ -348,7 +361,8 @@ namespace Lucene.Net
/// </param>
/// <param name="stream">Stream to log messages to.
/// </param>
- public static void DumpIterator(System.String label, System.Collections.IEnumerator iter, System.IO.StreamWriter stream)
+ public static void DumpIterator(System.String label, System.Collections.IEnumerator iter,
+ System.IO.StreamWriter stream)
{
stream.WriteLine("*** BEGIN " + label + " ***");
if (null == iter)
@@ -370,7 +384,9 @@ namespace Lucene.Net
/// </seealso>
public static void DumpArray(System.String label, System.Object[] objs, System.IO.StreamWriter stream)
{
- System.Collections.IEnumerator iter = (null == objs) ? null : new System.Collections.ArrayList(objs).GetEnumerator();
+ System.Collections.IEnumerator iter = (null == objs)
+ ? null
+ : new System.Collections.ArrayList(objs).GetEnumerator();
DumpIterator(label, iter, stream);
}
@@ -403,14 +419,12 @@ namespace Lucene.Net
}
// recorded seed
- [NonSerialized]
- protected internal int? seed = null;
+ [NonSerialized] protected internal int? seed = null;
//protected internal bool seed_init = false;
// static members
- [NonSerialized]
- private static readonly System.Random seedRnd = new System.Random();
-
+ [NonSerialized] private static readonly System.Random seedRnd = new System.Random();
+
protected static void Ok(bool condition, string message = null)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/5ecbe926/test/test-framework/Randomized/RandomizedContext.cs
----------------------------------------------------------------------
diff --git a/test/test-framework/Randomized/RandomizedContext.cs b/test/test-framework/Randomized/RandomizedContext.cs
index 1eb6365..6823229 100644
--- a/test/test-framework/Randomized/RandomizedContext.cs
+++ b/test/test-framework/Randomized/RandomizedContext.cs
@@ -17,8 +17,6 @@
using System;
using System.Collections.Generic;
-using System.Linq;
-using System.Text;
using System.Threading;
using Lucene.Net.Support;
@@ -74,7 +72,7 @@ namespace Lucene.Net.Randomized
this.runner = runner;
}
-
+ public static RandomizedContext Current { get { return Context(Thread.CurrentThread); } }
private static RandomizedContext Context(Thread thread)
{
[2/3] git commit: Ported tests/MockTokenizer
Posted by sy...@apache.org.
Ported tests/MockTokenizer
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/6e9d73f4
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/6e9d73f4
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/6e9d73f4
Branch: refs/heads/branch_4x
Commit: 6e9d73f4ac8bcbc1b0ae23dc4c32e5ca249c5be8
Parents: 5ecbe92
Author: synhershko <it...@code972.com>
Authored: Tue Apr 8 01:07:44 2014 +0300
Committer: synhershko <it...@code972.com>
Committed: Tue Apr 8 01:07:44 2014 +0300
----------------------------------------------------------------------
test/test-framework/Analysis/MockTokenizer.cs | 285 +++++++++++++++++++
.../Lucene.Net.TestFramework.csproj | 3 +-
2 files changed, 287 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/6e9d73f4/test/test-framework/Analysis/MockTokenizer.cs
----------------------------------------------------------------------
diff --git a/test/test-framework/Analysis/MockTokenizer.cs b/test/test-framework/Analysis/MockTokenizer.cs
new file mode 100644
index 0000000..0cd2942
--- /dev/null
+++ b/test/test-framework/Analysis/MockTokenizer.cs
@@ -0,0 +1,285 @@
+using System;
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Randomized;
+using Lucene.Net.Support;
+using Lucene.Net.Util;
+using Lucene.Net.Util.Automaton;
+
+namespace Lucene.Net.Analysis
+{
+ /**
+ * Tokenizer for testing.
+ * <p>
+ * This tokenizer is a replacement for {@link #WHITESPACE}, {@link #SIMPLE}, and {@link #KEYWORD}
+ * tokenizers. If you are writing a component such as a TokenFilter, its a great idea to test
+ * it wrapping this tokenizer instead for extra checks. This tokenizer has the following behavior:
+ * <ul>
+ * <li>An internal state-machine is used for checking consumer consistency. These checks can
+ * be disabled with {@link #setEnableChecks(boolean)}.
+ * <li>For convenience, optionally lowercases terms that it outputs.
+ * </ul>
+ */
+ public class MockTokenizer : Tokenizer
+ {
+ /** Acts Similar to WhitespaceTokenizer */
+ public static CharacterRunAutomaton WHITESPACE =
+ new CharacterRunAutomaton(new RegExp("[^ \t\r\n]+").ToAutomaton());
+ /** Acts Similar to KeywordTokenizer.
+ * TODO: Keyword returns an "empty" token for an empty reader...
+ */
+ public static CharacterRunAutomaton KEYWORD =
+ new CharacterRunAutomaton(new RegExp(".*").ToAutomaton());
+ /** Acts like LetterTokenizer. */
+ // the ugly regex below is incomplete Unicode 5.2 [:Letter:]
+ public static CharacterRunAutomaton SIMPLE =
+ new CharacterRunAutomaton(new RegExp("[A-Za-zªµºÀ-ÖØ-öø-ˁ一-鿌]+").ToAutomaton());
+
+ private CharacterRunAutomaton runAutomaton;
+ private bool lowerCase;
+ private int maxTokenLength;
+ public static int DEFAULT_MAX_TOKEN_LENGTH = int.MaxValue;
+ private int state;
+
+ private readonly CharTermAttribute termAtt;
+ private readonly OffsetAttribute offsetAtt;
+ int off = 0;
+
+ // TODO: "register" with LuceneTestCase to ensure all streams are closed() ?
+ // currently, we can only check that the lifecycle is correct if someone is reusing,
+ // but not for "one-offs".
+ private enum State
+ {
+ SETREADER, // consumer set a reader input either via ctor or via reset(Reader)
+ RESET, // consumer has called reset()
+ INCREMENT, // consumer is consuming, has called incrementToken() == true
+ INCREMENT_FALSE, // consumer has called incrementToken() which returned false
+ END, // consumer has called end() to perform end of stream operations
+ CLOSE // consumer has called close() to release any resources
+ };
+
+ private State streamState = State.CLOSE;
+ private int lastOffset = 0; // only for asserting
+ private bool enableChecks = true;
+
+ // evil: but we don't change the behavior with this random, we only switch up how we read
+ private Random random = new Random(/*RandomizedContext.Current.getRandom().nextLong()*/);
+
+ public MockTokenizer(AttributeSource.AttributeFactory factory, System.IO.TextReader input, CharacterRunAutomaton runAutomaton, bool lowerCase, int maxTokenLength)
+ : base(factory, input)
+ {
+ this.runAutomaton = runAutomaton;
+ this.lowerCase = lowerCase;
+ this.state = runAutomaton.InitialState;
+ this.streamState = State.SETREADER;
+ this.maxTokenLength = maxTokenLength;
+
+ termAtt = AddAttribute<CharTermAttribute>();
+ offsetAtt = AddAttribute<OffsetAttribute>();
+ }
+
+ public MockTokenizer(System.IO.TextReader input, CharacterRunAutomaton runAutomaton, bool lowerCase, int maxTokenLength) :
+ this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, input, runAutomaton, lowerCase, maxTokenLength)
+ {
+ }
+
+ public MockTokenizer(System.IO.TextReader input, CharacterRunAutomaton runAutomaton, bool lowerCase) :
+ this(input, runAutomaton, lowerCase, DEFAULT_MAX_TOKEN_LENGTH)
+ {
+ }
+ /** Calls {@link #MockTokenizer(Reader, CharacterRunAutomaton, boolean) MockTokenizer(Reader, WHITESPACE, true)} */
+ public MockTokenizer(System.IO.TextReader input) :
+ this(input, WHITESPACE, true)
+ {
+ }
+
+ public MockTokenizer(AttributeFactory factory, System.IO.TextReader input, CharacterRunAutomaton runAutomaton, bool lowerCase) :
+ this(factory, input, runAutomaton, lowerCase, DEFAULT_MAX_TOKEN_LENGTH)
+ {
+ }
+
+ /** Calls {@link #MockTokenizer(org.apache.lucene.util.AttributeSource.AttributeFactory,Reader,CharacterRunAutomaton,boolean)
+ * MockTokenizer(AttributeFactory, Reader, WHITESPACE, true)} */
+
+ public MockTokenizer(AttributeFactory factory, System.IO.TextReader input) :
+ this(input, WHITESPACE, true)
+ {
+
+ }
+
+ public override bool IncrementToken()
+ {
+ // assert !enableChecks || (streamState == State.RESET || streamState == State.INCREMENT)
+ // : "incrementToken() called while in wrong state: " + streamState;
+ ClearAttributes();
+ for (; ; )
+ {
+ int startOffset = off;
+ int cp = readCodePoint();
+ if (cp < 0)
+ {
+ break;
+ }
+ else if (isTokenChar(cp))
+ {
+ int endOffset;
+ do
+ {
+ char[] chars = Character.ToChars(Normalize(cp));
+ for (int i = 0; i < chars.Length; i++)
+ termAtt.Append(chars[i]);
+ endOffset = off;
+ if (termAtt.Length >= maxTokenLength)
+ {
+ break;
+ }
+ cp = readCodePoint();
+ } while (cp >= 0 && isTokenChar(cp));
+
+ int correctedStartOffset = CorrectOffset(startOffset);
+ int correctedEndOffset = CorrectOffset(endOffset);
+ // assert correctedStartOffset >= 0;
+ // assert correctedEndOffset >= 0;
+ // assert correctedStartOffset >= lastOffset;
+ lastOffset = correctedStartOffset;
+ // assert correctedEndOffset >= correctedStartOffset;
+ offsetAtt.SetOffset(correctedStartOffset, correctedEndOffset);
+ streamState = State.INCREMENT;
+ return true;
+ }
+ }
+ streamState = State.INCREMENT_FALSE;
+ return false;
+ }
+
+ protected int readCodePoint()
+ {
+ int ch = ReadChar();
+ if (ch < 0)
+ {
+ return ch;
+ }
+ else
+ {
+ //assert !Character.isLowSurrogate((char) ch) : "unpaired low surrogate: " + Integer.toHexString(ch);
+ off++;
+ if (Character.IsHighSurrogate((char)ch))
+ {
+ int ch2 = ReadChar();
+ if (ch2 >= 0)
+ {
+ off++;
+ //assert Character.isLowSurrogate((char) ch2) : "unpaired high surrogate: " + Integer.toHexString(ch) + ", followed by: " + Integer.toHexString(ch2);
+ return Character.ToCodePoint((char)ch, (char)ch2);
+ }
+ else
+ {
+ //assert false : "stream ends with unpaired high surrogate: " + Integer.toHexString(ch);
+ }
+ }
+ return ch;
+ }
+ }
+
+ protected int ReadChar()
+ {
+ switch (random.Next(0, 10))
+ {
+ case 0:
+ {
+ // read(char[])
+ char[] c = new char[1];
+ int ret = input.Read(c, 0, c.Length);
+ return ret < 0 ? ret : c[0];
+ }
+ case 1:
+ {
+ // read(char[], int, int)
+ char[] c = new char[2];
+ int ret = input.Read(c, 1, 1);
+ return ret < 0 ? ret : c[1];
+ }
+ // case 2: {
+ // // read(CharBuffer)
+ // char[] c = new char[1];
+ // CharBuffer cb = CharBuffer.wrap(c);
+ // int ret = input.Read(cb);
+ // return ret < 0 ? ret : c[0];
+ // }
+ default:
+ // read()
+ return input.Read();
+ }
+ }
+
+ protected bool isTokenChar(int c)
+ {
+ state = runAutomaton.Step(state, c);
+ if (state < 0)
+ {
+ state = runAutomaton.InitialState;
+ return false;
+ }
+ else
+ {
+ return true;
+ }
+ }
+
+ protected int Normalize(int c)
+ {
+ return lowerCase ? Character.ToLowerCase(c) : c;
+ }
+
+ public override void Reset()
+ {
+ base.Reset();
+ state = runAutomaton.InitialState;
+ lastOffset = off = 0;
+ //assert !enableChecks || streamState != State.RESET : "double reset()";
+ streamState = State.RESET;
+ }
+
+ protected virtual void Dispose(bool disposing)
+ {
+ base.Dispose(disposing);
+ // in some exceptional cases (e.g. TestIndexWriterExceptions) a test can prematurely close()
+ // these tests should disable this check, by default we check the normal workflow.
+ // TODO: investigate the CachingTokenFilter "double-close"... for now we ignore this
+ //assert !enableChecks || streamState == State.END || streamState == State.CLOSE : "close() called in wrong state: " + streamState;
+ streamState = State.CLOSE;
+ }
+
+ bool setReaderTestPoint()
+ {
+ //assert !enableChecks || streamState == State.CLOSE : "setReader() called in wrong state: " + streamState;
+ streamState = State.SETREADER;
+ return true;
+ }
+
+ public override void End()
+ {
+ int finalOffset = CorrectOffset(off);
+ offsetAtt.SetOffset(finalOffset, finalOffset);
+ // some tokenizers, such as limiting tokenizers, call end() before incrementToken() returns false.
+ // these tests should disable this check (in general you should consume the entire stream)
+ try
+ {
+ //assert !enableChecks || streamState == State.INCREMENT_FALSE : "end() called before incrementToken() returned false!";
+ }
+ finally
+ {
+ streamState = State.END;
+ }
+ }
+
+ /**
+ * Toggle consumer workflow checking: if your test consumes tokenstreams normally you
+ * should leave this enabled.
+ */
+ public void setEnableChecks(bool enableChecks)
+ {
+ this.enableChecks = enableChecks;
+ }
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/6e9d73f4/test/test-framework/Lucene.Net.TestFramework.csproj
----------------------------------------------------------------------
diff --git a/test/test-framework/Lucene.Net.TestFramework.csproj b/test/test-framework/Lucene.Net.TestFramework.csproj
index 14d381f..6b31aaa 100644
--- a/test/test-framework/Lucene.Net.TestFramework.csproj
+++ b/test/test-framework/Lucene.Net.TestFramework.csproj
@@ -56,6 +56,7 @@
<Reference Include="System.Xml" />
</ItemGroup>
<ItemGroup>
+ <Compile Include="Analysis\MockTokenizer.cs" />
<Compile Include="JavaCompatibility\LuceneTestCase.cs" />
<Compile Include="JavaCompatibility\LuceneTypesHelpers.cs" />
<Compile Include="JavaCompatibility\SystemTypesHelpers.cs" />
@@ -85,7 +86,7 @@
</ProjectReference>
</ItemGroup>
<ItemGroup>
- <Folder Include="Analysis\" />
+ <Folder Include="Index\" />
</ItemGroup>
<ItemGroup>
<None Include="Lucene.Net.snk" />
[3/3] git commit: Partial porting of RandomIndexWriter
Posted by sy...@apache.org.
Partial porting of RandomIndexWriter
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/f1fbbd9f
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/f1fbbd9f
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/f1fbbd9f
Branch: refs/heads/branch_4x
Commit: f1fbbd9f169689a484e65a005047d8afc93562be
Parents: 6e9d73f
Author: synhershko <it...@code972.com>
Authored: Tue Apr 8 01:08:02 2014 +0300
Committer: synhershko <it...@code972.com>
Committed: Tue Apr 8 01:08:02 2014 +0300
----------------------------------------------------------------------
test/test-framework/Index/RandomIndexWriter.cs | 316 ++++++++++++++++++++
1 file changed, 316 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/f1fbbd9f/test/test-framework/Index/RandomIndexWriter.cs
----------------------------------------------------------------------
diff --git a/test/test-framework/Index/RandomIndexWriter.cs b/test/test-framework/Index/RandomIndexWriter.cs
new file mode 100644
index 0000000..74af5ed
--- /dev/null
+++ b/test/test-framework/Index/RandomIndexWriter.cs
@@ -0,0 +1,316 @@
+using System;
+using System.Threading;
+using Lucene.Net.Analysis;
+using Lucene.Net.Codecs;
+using Lucene.Net.Index;
+using Lucene.Net.Randomized;
+using Lucene.Net.Search;
+using Lucene.Net.Store;
+using Lucene.Net.Util;
+using Version = System.Version;
+
+namespace Lucene.Net
+{
+public class RandomIndexWriter : IDisposable {
+
+ public IndexWriter w;
+ private Random r;
+ int docCount;
+ int flushAt;
+ private double flushAtFactor = 1.0;
+ private bool getReaderCalled;
+ private Codec codec; // sugar
+
+ // Randomly calls Thread.yield so we mixup thread scheduling
+ private class MockIndexWriter : IndexWriter {
+
+ private Random r;
+
+ public MockIndexWriter(Random r, Directory dir, IndexWriterConfig conf) : base(dir, conf) {
+ // TODO: this should be solved in a different way; Random should not be shared (!).
+ this.r = new Random(r.nextLong());
+ }
+
+ override bool testPoint(String name) {
+ if (r.nextInt(4) == 2)
+ Thread.yield();
+ return true;
+ }
+ }
+
+ /** create a RandomIndexWriter with a random config: Uses TEST_VERSION_CURRENT and MockAnalyzer */
+ public RandomIndexWriter(Random r, Directory dir):
+ this(r, dir, LuceneTestCase.newIndexWriterConfig(r, LuceneTestCase.TEST_VERSION_CURRENT, new MockAnalyzer(r)))
+ {
+ }
+
+ /** create a RandomIndexWriter with a random config: Uses TEST_VERSION_CURRENT */
+ public RandomIndexWriter(Random r, Directory dir, Analyzer a) {
+ this(r, dir, LuceneTestCase.newIndexWriterConfig(r, LuceneTestCase.TEST_VERSION_CURRENT, a));
+ }
+
+ /** create a RandomIndexWriter with a random config */
+ public RandomIndexWriter(Random r, Directory dir, Version v, Analyzer a) {
+ this(r, dir, LuceneTestCase.newIndexWriterConfig(r, v, a));
+ }
+
+ /** create a RandomIndexWriter with the provided config */
+ public RandomIndexWriter(Random r, Directory dir, IndexWriterConfig c) {
+ // TODO: this should be solved in a different way; Random should not be shared (!).
+ this.r = new Random(r.nextLong());
+ w = new MockIndexWriter(r, dir, c);
+ flushAt = _TestUtil.nextInt(r, 10, 1000);
+ codec = w.getConfig().getCodec();
+ if (LuceneTestCase.VERBOSE) {
+ Console.WriteLine("RIW dir=" + dir + " config=" + w.getConfig());
+ Console.WriteLine("codec default=" + codec.getName());
+ }
+
+ // Make sure we sometimes test indices that don't get
+ // any forced merges:
+ doRandomForceMerge = r.nextBoolean();
+ }
+
+ /**
+ * Adds a Document.
+ * @see IndexWriter#addDocument(Iterable)
+ */
+ public <T extends IndexableField> void addDocument(Iterable<T> doc) {
+ addDocument(doc, w.getAnalyzer());
+ }
+
+ public <T extends IndexableField> void addDocument(final Iterable<T> doc, Analyzer a) {
+ if (r.nextInt(5) == 3) {
+ // TODO: maybe, we should simply buffer up added docs
+ // (but we need to clone them), and only when
+ // getReader, commit, etc. are called, we do an
+ // addDocuments? Would be better testing.
+ w.AddDocuments(new Iterable<Iterable<T>>() {
+
+ public Iterator<Iterable<T>> iterator() {
+ return new Iterator<Iterable<T>>() {
+ boolean done;
+
+ @Override
+ public boolean hasNext() {
+ return !done;
+ }
+
+ @Override
+ public void remove() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public Iterable<T> next() {
+ if (done) {
+ throw new IllegalStateException();
+ }
+ done = true;
+ return doc;
+ }
+ };
+ }
+ }, a);
+ } else {
+ w.AddDocument(doc, a);
+ }
+
+ maybeCommit();
+ }
+
+ private void maybeCommit() {
+ if (docCount++ == flushAt) {
+ if (LuceneTestCase.VERBOSE) {
+ Console.WriteLine("RIW.add/updateDocument: now doing a commit at docCount=" + docCount);
+ }
+ w.Commit();
+ flushAt += _TestUtil.nextInt(r, (int) (flushAtFactor * 10), (int) (flushAtFactor * 1000));
+ if (flushAtFactor < 2e6) {
+ // gradually but exponentially increase time b/w flushes
+ flushAtFactor *= 1.05;
+ }
+ }
+ }
+
+ public void addDocuments(Iterable<? extends Iterable<? extends IIndexableField>> docs) {
+ w.AddDocuments(docs);
+ maybeCommit();
+ }
+
+ public void updateDocuments(Term delTerm, Iterable<? extends Iterable<? extends IndexableField>> docs) {
+ w.UpdateDocuments(delTerm, docs);
+ maybeCommit();
+ }
+
+ /**
+ * Updates a document.
+ * @see IndexWriter#updateDocument(Term, Iterable)
+ */
+ public <T extends IndexableField> void updateDocument(Term t, final Iterable<T> doc) {
+ if (r.nextInt(5) == 3) {
+ w.updateDocuments(t, new Iterable<Iterable<T>>() {
+
+ @Override
+ public Iterator<Iterable<T>> iterator() {
+ return new Iterator<Iterable<T>>() {
+ boolean done;
+
+ @Override
+ public boolean hasNext() {
+ return !done;
+ }
+
+ @Override
+ public void remove() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public Iterable<T> next() {
+ if (done) {
+ throw new IllegalStateException();
+ }
+ done = true;
+ return doc;
+ }
+ };
+ }
+ });
+ } else {
+ w.UpdateDocument(t, doc);
+ }
+ maybeCommit();
+ }
+
+ public void addIndexes(params[] Directory dirs) {
+ w.AddIndexes(dirs);
+ }
+
+ public void addIndexes(IndexReader... readers) {
+ w.AddIndexes(readers);
+ }
+
+ public void deleteDocuments(Term term) {
+ w.DeleteDocuments(term);
+ }
+
+ public void deleteDocuments(Query q) {
+ w.DeleteDocuments(q);
+ }
+
+ public void commit() {
+ w.Commit();
+ }
+
+ public int numDocs() {
+ return w.NumDocs;
+ }
+
+ public int maxDoc() {
+ return w.MaxDoc;
+ }
+
+ public void deleteAll() {
+ w.DeleteAll();
+ }
+
+ public DirectoryReader getReader() {
+ return getReader(true);
+ }
+
+ private bool doRandomForceMerge = true;
+ private bool doRandomForceMergeAssert = true;
+
+ public void forceMergeDeletes(bool doWait) {
+ w.ForceMergeDeletes(doWait);
+ }
+
+ public void forceMergeDeletes() {
+ w.ForceMergeDeletes();
+ }
+
+ public void setDoRandomForceMerge(bool v) {
+ doRandomForceMerge = v;
+ }
+
+ public void setDoRandomForceMergeAssert(bool v) {
+ doRandomForceMergeAssert = v;
+ }
+
+ private void doRandomForceMerge() {
+ if (doRandomForceMerge) {
+ int segCount = w.SegmentCount;
+ if (r.nextBoolean() || segCount == 0) {
+ // full forceMerge
+ if (LuceneTestCase.VERBOSE) {
+ Console.WriteLine("RIW: doRandomForceMerge(1)");
+ }
+ w.ForceMerge(1);
+ } else {
+ // partial forceMerge
+ int limit = _TestUtil.nextInt(r, 1, segCount);
+ if (LuceneTestCase.VERBOSE) {
+ Console.WriteLine("RIW: doRandomForceMerge(" + limit + ")");
+ }
+ w.ForceMerge(limit);
+ //assert !doRandomForceMergeAssert || w.getSegmentCount() <= limit: "limit=" + limit + " actual=" + w.getSegmentCount();
+ }
+ }
+ }
+
+ public DirectoryReader getReader(boolean applyDeletions) {
+ getReaderCalled = true;
+ if (r.nextInt(20) == 2) {
+ doRandomForceMerge();
+ }
+ // If we are writing with PreFlexRW, force a full
+ // IndexReader.open so terms are sorted in codepoint
+ // order during searching:
+ if (!applyDeletions || !codec.getName().equals("Lucene3x") && r.nextBoolean()) {
+ if (LuceneTestCase.VERBOSE) {
+ System.out.println("RIW.getReader: use NRT reader");
+ }
+ if (r.nextInt(5) == 1) {
+ w.Commit();
+ }
+ return w.getReader(applyDeletions);
+ } else {
+ if (LuceneTestCase.VERBOSE) {
+ System.out.println("RIW.getReader: open new reader");
+ }
+ w.Commit();
+ if (r.nextBoolean()) {
+ return DirectoryReader.Open(w.Directory, _TestUtil.nextInt(r, 1, 10));
+ } else {
+ return w.getReader(applyDeletions);
+ }
+ }
+ }
+
+ /**
+ * Close this writer.
+ * @see IndexWriter#close()
+ */
+ public void close() {
+ // if someone isn't using getReader() API, we want to be sure to
+ // forceMerge since presumably they might open a reader on the dir.
+ if (getReaderCalled == false && r.nextInt(8) == 2) {
+ doRandomForceMerge();
+ }
+ w.Close();
+ }
+
+ /**
+ * Forces a forceMerge.
+ * <p>
+ * NOTE: this should be avoided in tests unless absolutely necessary,
+ * as it will result in less test coverage.
+ * @see IndexWriter#forceMerge(int)
+ */
+ public void ForceMerge(int maxSegmentCount) {
+ w.ForceMerge(maxSegmentCount);
+ }
+}
+
+}