You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@lucenenet.apache.org by sy...@apache.org on 2014/04/08 00:08:28 UTC

[1/3] git commit: Various

Repository: lucenenet
Updated Branches:
  refs/heads/branch_4x da25f85ac -> f1fbbd9f1


Various


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/5ecbe926
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/5ecbe926
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/5ecbe926

Branch: refs/heads/branch_4x
Commit: 5ecbe9260500686e9ccf53849e1a930076007b44
Parents: da25f85
Author: synhershko <it...@code972.com>
Authored: Tue Apr 8 01:07:23 2014 +0300
Committer: synhershko <it...@code972.com>
Committed: Tue Apr 8 01:07:23 2014 +0300

----------------------------------------------------------------------
 src/core/Support/Character.cs                   | 19 +++++
 test/core/Document/TestDocument.cs              |  2 +-
 test/test-framework/LuceneTestCase.cs           | 76 ++++++++++++--------
 .../Randomized/RandomizedContext.cs             |  4 +-
 4 files changed, 66 insertions(+), 35 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/5ecbe926/src/core/Support/Character.cs
----------------------------------------------------------------------
diff --git a/src/core/Support/Character.cs b/src/core/Support/Character.cs
index 3575b7f..d870f38 100644
--- a/src/core/Support/Character.cs
+++ b/src/core/Support/Character.cs
@@ -45,6 +45,8 @@ namespace Lucene.Net.Support
         public const char MIN_HIGH_SURROGATE = '\uD800';
         public const char MAX_HIGH_SURROGATE = '\uDBFF';
 
+        public static int MIN_SUPPLEMENTARY_CODE_POINT = 0x010000;
+
         /// <summary>
         /// 
         /// </summary>
@@ -80,6 +82,23 @@ namespace Lucene.Net.Support
             return 1; // always 1 char written in .NET
         }
 
+        public static char[] ToChars(int codePoint)
+        {
+            // .NET Port: we don't have to do anything funky with surrogates here. chars are always UTF-16.           
+            return new[] {(char)codePoint};
+        }
+
+        public static int ToCodePoint(char high, char low)
+        {
+            // Optimized form of:
+            // return ((high - MIN_HIGH_SURROGATE) << 10)
+            //         + (low - MIN_LOW_SURROGATE)
+            //         + MIN_SUPPLEMENTARY_CODE_POINT;
+            return ((high << 10) + low) + (MIN_SUPPLEMENTARY_CODE_POINT
+                                           - (MIN_HIGH_SURROGATE << 10)
+                                           - MIN_LOW_SURROGATE);
+        }
+
         public static int ToLowerCase(int codePoint)
         {
             // .NET Port: chars are always UTF-16 in .NET

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/5ecbe926/test/core/Document/TestDocument.cs
----------------------------------------------------------------------
diff --git a/test/core/Document/TestDocument.cs b/test/core/Document/TestDocument.cs
index f232b9a..bf99159 100644
--- a/test/core/Document/TestDocument.cs
+++ b/test/core/Document/TestDocument.cs
@@ -148,7 +148,7 @@ namespace Lucene.Net.Documents
 		/// </summary>
 		/// <throws>  Exception on error </throws>
 		[Test]
-		public virtual void  TestGetValuesForNewDocument()
+		public virtual void testGetValuesForNewDocument()
 		{
 			doAssert(makeDocumentWithFields(), false);
 		}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/5ecbe926/test/test-framework/LuceneTestCase.cs
----------------------------------------------------------------------
diff --git a/test/test-framework/LuceneTestCase.cs b/test/test-framework/LuceneTestCase.cs
index 9d14d52..dc0bcd4 100644
--- a/test/test-framework/LuceneTestCase.cs
+++ b/test/test-framework/LuceneTestCase.cs
@@ -16,6 +16,9 @@
  */
 
 using System;
+using Lucene.Net.Analysis;
+using Lucene.Net.Index;
+using Lucene.Net.Randomized;
 using Lucene.Net.Util;
 using NUnit.Framework;
 
@@ -27,6 +30,7 @@ using System.Collections.Generic;
 using Lucene.Net.Search;
 
 using Lucene.Net.TestFramework;
+using Version = Lucene.Net.Util.Version;
 
 namespace Lucene.Net
 {
@@ -69,7 +73,7 @@ namespace Lucene.Net
         private const string SYSPROP_FAILFAST = "tests.failfast";
 
 
-     
+
 
         public static readonly Util.Version TEST_VERSION_CURRENT = Util.Version.LUCENE_43;
 
@@ -85,17 +89,20 @@ namespace Lucene.Net
 
         public static readonly string TEST_CODEC = SystemProperties.GetProperty("tests.codec", "random");
 
-        public static readonly string TEST_DOCVALUESFORMAT = SystemProperties.GetProperty("tests.docvaluesformat", "random");
+        public static readonly string TEST_DOCVALUESFORMAT = SystemProperties.GetProperty("tests.docvaluesformat",
+            "random");
 
         public static readonly string TEST_DIRECTORY = SystemProperties.GetProperty("tests.directory", "random");
 
-        public static readonly string TEST_LINE_DOCS_FILE = SystemProperties.GetProperty("tests.linedocsfile", DEFAULT_LINE_DOCS_FILE);
+        public static readonly string TEST_LINE_DOCS_FILE = SystemProperties.GetProperty("tests.linedocsfile",
+            DEFAULT_LINE_DOCS_FILE);
 
         public static readonly bool TEST_NIGHTLY = RandomizedTest.SystemPropertyAsBoolean(NightlyAttribute.KEY, false);
 
         public static readonly bool TEST_WEEKLY = RandomizedTest.SystemPropertyAsBoolean(WeeklyAttribute.KEY, false);
 
-        public static readonly bool TEST_AWAITSFIX = RandomizedTest.SystemPropertyAsBoolean(AwaitsFixAttribute.KEY, false);
+        public static readonly bool TEST_AWAITSFIX = RandomizedTest.SystemPropertyAsBoolean(AwaitsFixAttribute.KEY,
+            false);
 
         public static readonly bool TEST_SLOW = RandomizedTest.SystemPropertyAsBoolean(SlowAttribute.KEY, false);
 
@@ -107,7 +114,8 @@ namespace Lucene.Net
         {
             String s = SystemProperties.GetProperty("tempDir", System.IO.Path.GetTempPath());
             if (s == null)
-                throw new SystemException("To run tests, you need to define system property 'tempDir' or 'java.io.tmpdir'.");
+                throw new SystemException(
+                    "To run tests, you need to define system property 'tempDir' or 'java.io.tmpdir'.");
 
             TEMP_DIR = new System.IO.DirectoryInfo(s);
             if (!TEMP_DIR.Exists) TEMP_DIR.Create();
@@ -115,18 +123,20 @@ namespace Lucene.Net
             CORE_DIRECTORIES = new List<string>(FS_DIRECTORIES);
             CORE_DIRECTORIES.Add("RAMDirectory");
 
-            
+
         }
 
-        private static readonly string[] IGNORED_INVARIANT_PROPERTIES = {
+        private static readonly string[] IGNORED_INVARIANT_PROPERTIES =
+        {
             "user.timezone", "java.rmi.server.randomIDs"
         };
 
-        private static readonly IList<String> FS_DIRECTORIES = new[] {
-            "SimpleFSDirectory",
-            "NIOFSDirectory",
-            "MMapDirectory"
-        };
+        private static readonly IList<String> FS_DIRECTORIES = new[]
+                                                               {
+                                                                   "SimpleFSDirectory",
+                                                                   "NIOFSDirectory",
+                                                                   "MMapDirectory"
+                                                               };
 
         private static readonly IList<String> CORE_DIRECTORIES;
 
@@ -136,17 +146,18 @@ namespace Lucene.Net
         //  CORE_DIRECTORIES.add("RAMDirectory");
         //};
 
-        protected static readonly ISet<String> doesntSupportOffsets = new HashSet<String>(new[] {
-            "Lucene3x",
-            "MockFixedIntBlock",
-            "MockVariableIntBlock",
-            "MockSep",
-            "MockRandom"
-        });
+        protected static readonly ISet<String> doesntSupportOffsets = new HashSet<String>(new[]
+                                                                                          {
+                                                                                              "Lucene3x",
+                                                                                              "MockFixedIntBlock",
+                                                                                              "MockVariableIntBlock",
+                                                                                              "MockSep",
+                                                                                              "MockRandom"
+                                                                                          });
 
         public void Test()
         {
-            
+
         }
 
         public static bool PREFLEX_IMPERSONATION_IS_ACTIVE;
@@ -160,7 +171,7 @@ namespace Lucene.Net
 
         //internal static readonly TestRuleIgnoreAfterMaxFailures ignoreAfterMaxFailures;
 
-        private const long STATIC_LEAK_THRESHOLD = 10 * 1024 * 1024;
+        private const long STATIC_LEAK_THRESHOLD = 10*1024*1024;
 
         //private static readonly ISet<String> STATIC_LEAK_IGNORED_TYPES =
         //    new HashSet<String>(new[] {
@@ -223,7 +234,7 @@ namespace Lucene.Net
         {
         }
 
-        
+
 
         public LuceneTestCase(System.String name)
         {
@@ -317,7 +328,8 @@ namespace Lucene.Net
                 catch (System.SystemException e)
                 {
                     System.IO.StreamWriter temp_writer;
-                    temp_writer = new System.IO.StreamWriter(System.Console.OpenStandardError(), System.Console.Error.Encoding);
+                    temp_writer = new System.IO.StreamWriter(System.Console.OpenStandardError(),
+                        System.Console.Error.Encoding);
                     temp_writer.AutoFlush = true;
                     DumpArray(msg + ": FieldCache", entries, temp_writer);
                     throw e;
@@ -334,7 +346,8 @@ namespace Lucene.Net
                 if (null != insanity)
                 {
                     System.IO.StreamWriter temp_writer2;
-                    temp_writer2 = new System.IO.StreamWriter(System.Console.OpenStandardError(), System.Console.Error.Encoding);
+                    temp_writer2 = new System.IO.StreamWriter(System.Console.OpenStandardError(),
+                        System.Console.Error.Encoding);
                     temp_writer2.AutoFlush = true;
                     DumpArray(msg + ": Insane FieldCache usage(s)", insanity, temp_writer2);
                 }
@@ -348,7 +361,8 @@ namespace Lucene.Net
         /// </param>
         /// <param name="stream">Stream to log messages to.
         /// </param>
-        public static void DumpIterator(System.String label, System.Collections.IEnumerator iter, System.IO.StreamWriter stream)
+        public static void DumpIterator(System.String label, System.Collections.IEnumerator iter,
+            System.IO.StreamWriter stream)
         {
             stream.WriteLine("*** BEGIN " + label + " ***");
             if (null == iter)
@@ -370,7 +384,9 @@ namespace Lucene.Net
         /// </seealso>
         public static void DumpArray(System.String label, System.Object[] objs, System.IO.StreamWriter stream)
         {
-            System.Collections.IEnumerator iter = (null == objs) ? null : new System.Collections.ArrayList(objs).GetEnumerator();
+            System.Collections.IEnumerator iter = (null == objs)
+                ? null
+                : new System.Collections.ArrayList(objs).GetEnumerator();
             DumpIterator(label, iter, stream);
         }
 
@@ -403,14 +419,12 @@ namespace Lucene.Net
         }
 
         // recorded seed
-        [NonSerialized]
-        protected internal int? seed = null;
+        [NonSerialized] protected internal int? seed = null;
         //protected internal bool seed_init = false;
 
         // static members
-        [NonSerialized]
-        private static readonly System.Random seedRnd = new System.Random();
-             
+        [NonSerialized] private static readonly System.Random seedRnd = new System.Random();
+
 
         protected static void Ok(bool condition, string message = null)
         {

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/5ecbe926/test/test-framework/Randomized/RandomizedContext.cs
----------------------------------------------------------------------
diff --git a/test/test-framework/Randomized/RandomizedContext.cs b/test/test-framework/Randomized/RandomizedContext.cs
index 1eb6365..6823229 100644
--- a/test/test-framework/Randomized/RandomizedContext.cs
+++ b/test/test-framework/Randomized/RandomizedContext.cs
@@ -17,8 +17,6 @@
 
 using System;
 using System.Collections.Generic;
-using System.Linq;
-using System.Text;
 using System.Threading;
 using Lucene.Net.Support;
 
@@ -74,7 +72,7 @@ namespace Lucene.Net.Randomized
             this.runner = runner; 
         }
 
-
+        public static RandomizedContext Current { get { return Context(Thread.CurrentThread); } }
 
         private static RandomizedContext Context(Thread thread)
         {

[2/3] git commit: Ported tests/MockTokenizer

Posted by sy...@apache.org.

Ported tests/MockTokenizer


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/6e9d73f4
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/6e9d73f4
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/6e9d73f4

Branch: refs/heads/branch_4x
Commit: 6e9d73f4ac8bcbc1b0ae23dc4c32e5ca249c5be8
Parents: 5ecbe92
Author: synhershko <it...@code972.com>
Authored: Tue Apr 8 01:07:44 2014 +0300
Committer: synhershko <it...@code972.com>
Committed: Tue Apr 8 01:07:44 2014 +0300

----------------------------------------------------------------------
 test/test-framework/Analysis/MockTokenizer.cs   | 285 +++++++++++++++++++
 .../Lucene.Net.TestFramework.csproj             |   3 +-
 2 files changed, 287 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/6e9d73f4/test/test-framework/Analysis/MockTokenizer.cs
----------------------------------------------------------------------
diff --git a/test/test-framework/Analysis/MockTokenizer.cs b/test/test-framework/Analysis/MockTokenizer.cs
new file mode 100644
index 0000000..0cd2942
--- /dev/null
+++ b/test/test-framework/Analysis/MockTokenizer.cs
@@ -0,0 +1,285 @@
+using System;
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Randomized;
+using Lucene.Net.Support;
+using Lucene.Net.Util;
+using Lucene.Net.Util.Automaton;
+
+namespace Lucene.Net.Analysis
+{
+    /**
+     * Tokenizer for testing.
+     * <p>
+     * This tokenizer is a replacement for {@link #WHITESPACE}, {@link #SIMPLE}, and {@link #KEYWORD}
+     * tokenizers. If you are writing a component such as a TokenFilter, its a great idea to test
+     * it wrapping this tokenizer instead for extra checks. This tokenizer has the following behavior:
+     * <ul>
+     *   <li>An internal state-machine is used for checking consumer consistency. These checks can
+     *       be disabled with {@link #setEnableChecks(boolean)}.
+     *   <li>For convenience, optionally lowercases terms that it outputs.
+     * </ul>
+     */
+    public class MockTokenizer : Tokenizer
+    {
+        /** Acts Similar to WhitespaceTokenizer */
+        public static CharacterRunAutomaton WHITESPACE =
+          new CharacterRunAutomaton(new RegExp("[^ \t\r\n]+").ToAutomaton());
+        /** Acts Similar to KeywordTokenizer.
+         * TODO: Keyword returns an "empty" token for an empty reader... 
+         */
+        public static CharacterRunAutomaton KEYWORD =
+          new CharacterRunAutomaton(new RegExp(".*").ToAutomaton());
+        /** Acts like LetterTokenizer. */
+        // the ugly regex below is incomplete Unicode 5.2 [:Letter:]
+        public static CharacterRunAutomaton SIMPLE =
+          new CharacterRunAutomaton(new RegExp("[A-Za-zªµºÀ-ÖØ-öø-ˁ一-鿌]+").ToAutomaton());
+
+        private CharacterRunAutomaton runAutomaton;
+        private bool lowerCase;
+        private int maxTokenLength;
+        public static int DEFAULT_MAX_TOKEN_LENGTH = int.MaxValue;
+        private int state;
+
+        private readonly CharTermAttribute termAtt;
+        private readonly OffsetAttribute offsetAtt;
+        int off = 0;
+
+        // TODO: "register" with LuceneTestCase to ensure all streams are closed() ?
+        // currently, we can only check that the lifecycle is correct if someone is reusing,
+        // but not for "one-offs".
+        private enum State
+        {
+            SETREADER,       // consumer set a reader input either via ctor or via reset(Reader)
+            RESET,           // consumer has called reset()
+            INCREMENT,       // consumer is consuming, has called incrementToken() == true
+            INCREMENT_FALSE, // consumer has called incrementToken() which returned false
+            END,             // consumer has called end() to perform end of stream operations
+            CLOSE            // consumer has called close() to release any resources
+        };
+
+        private State streamState = State.CLOSE;
+        private int lastOffset = 0; // only for asserting
+        private bool enableChecks = true;
+
+        // evil: but we don't change the behavior with this random, we only switch up how we read
+        private Random random = new Random(/*RandomizedContext.Current.getRandom().nextLong()*/);
+
+        public MockTokenizer(AttributeSource.AttributeFactory factory, System.IO.TextReader input, CharacterRunAutomaton runAutomaton, bool lowerCase, int maxTokenLength)
+            : base(factory, input)
+        {
+            this.runAutomaton = runAutomaton;
+            this.lowerCase = lowerCase;
+            this.state = runAutomaton.InitialState;
+            this.streamState = State.SETREADER;
+            this.maxTokenLength = maxTokenLength;
+
+            termAtt = AddAttribute<CharTermAttribute>();
+            offsetAtt = AddAttribute<OffsetAttribute>();
+        }
+
+        public MockTokenizer(System.IO.TextReader input, CharacterRunAutomaton runAutomaton, bool lowerCase, int maxTokenLength) :
+            this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, input, runAutomaton, lowerCase, maxTokenLength)
+        {
+        }
+
+        public MockTokenizer(System.IO.TextReader input, CharacterRunAutomaton runAutomaton, bool lowerCase) :
+            this(input, runAutomaton, lowerCase, DEFAULT_MAX_TOKEN_LENGTH)
+        {
+        }
+        /** Calls {@link #MockTokenizer(Reader, CharacterRunAutomaton, boolean) MockTokenizer(Reader, WHITESPACE, true)} */
+        public MockTokenizer(System.IO.TextReader input) :
+            this(input, WHITESPACE, true)
+        {
+        }
+
+        public MockTokenizer(AttributeFactory factory, System.IO.TextReader input, CharacterRunAutomaton runAutomaton, bool lowerCase) :
+            this(factory, input, runAutomaton, lowerCase, DEFAULT_MAX_TOKEN_LENGTH)
+        {
+        }
+
+        /** Calls {@link #MockTokenizer(org.apache.lucene.util.AttributeSource.AttributeFactory,Reader,CharacterRunAutomaton,boolean)
+         *                MockTokenizer(AttributeFactory, Reader, WHITESPACE, true)} */
+
+        public MockTokenizer(AttributeFactory factory, System.IO.TextReader input) :
+            this(input, WHITESPACE, true)
+        {
+
+        }
+
+        public override bool IncrementToken()
+        {
+            //    assert !enableChecks || (streamState == State.RESET || streamState == State.INCREMENT) 
+            //                            : "incrementToken() called while in wrong state: " + streamState;
+            ClearAttributes();
+            for (; ; )
+            {
+                int startOffset = off;
+                int cp = readCodePoint();
+                if (cp < 0)
+                {
+                    break;
+                }
+                else if (isTokenChar(cp))
+                {
+                    int endOffset;
+                    do
+                    {
+                        char[] chars = Character.ToChars(Normalize(cp));
+                        for (int i = 0; i < chars.Length; i++)
+                            termAtt.Append(chars[i]);
+                        endOffset = off;
+                        if (termAtt.Length >= maxTokenLength)
+                        {
+                            break;
+                        }
+                        cp = readCodePoint();
+                    } while (cp >= 0 && isTokenChar(cp));
+
+                    int correctedStartOffset = CorrectOffset(startOffset);
+                    int correctedEndOffset = CorrectOffset(endOffset);
+                    //        assert correctedStartOffset >= 0;
+                    //        assert correctedEndOffset >= 0;
+                    //        assert correctedStartOffset >= lastOffset;
+                    lastOffset = correctedStartOffset;
+                    //        assert correctedEndOffset >= correctedStartOffset;
+                    offsetAtt.SetOffset(correctedStartOffset, correctedEndOffset);
+                    streamState = State.INCREMENT;
+                    return true;
+                }
+            }
+            streamState = State.INCREMENT_FALSE;
+            return false;
+        }
+
+        protected int readCodePoint()
+        {
+            int ch = ReadChar();
+            if (ch < 0)
+            {
+                return ch;
+            }
+            else
+            {
+                //assert !Character.isLowSurrogate((char) ch) : "unpaired low surrogate: " + Integer.toHexString(ch);
+                off++;
+                if (Character.IsHighSurrogate((char)ch))
+                {
+                    int ch2 = ReadChar();
+                    if (ch2 >= 0)
+                    {
+                        off++;
+                        //assert Character.isLowSurrogate((char) ch2) : "unpaired high surrogate: " + Integer.toHexString(ch) + ", followed by: " + Integer.toHexString(ch2);
+                        return Character.ToCodePoint((char)ch, (char)ch2);
+                    }
+                    else
+                    {
+                        //assert false : "stream ends with unpaired high surrogate: " + Integer.toHexString(ch);
+                    }
+                }
+                return ch;
+            }
+        }
+
+        protected int ReadChar()
+        {
+            switch (random.Next(0, 10))
+            {
+                case 0:
+                    {
+                        // read(char[])
+                        char[] c = new char[1];
+                        int ret = input.Read(c, 0, c.Length);
+                        return ret < 0 ? ret : c[0];
+                    }
+                case 1:
+                    {
+                        // read(char[], int, int)
+                        char[] c = new char[2];
+                        int ret = input.Read(c, 1, 1);
+                        return ret < 0 ? ret : c[1];
+                    }
+                //      case 2: {
+                //        // read(CharBuffer)
+                //        char[] c = new char[1];
+                //        CharBuffer cb = CharBuffer.wrap(c);
+                //        int ret = input.Read(cb);
+                //        return ret < 0 ? ret : c[0];
+                //      }
+                default:
+                    // read()
+                    return input.Read();
+            }
+        }
+
+        protected bool isTokenChar(int c)
+        {
+            state = runAutomaton.Step(state, c);
+            if (state < 0)
+            {
+                state = runAutomaton.InitialState;
+                return false;
+            }
+            else
+            {
+                return true;
+            }
+        }
+
+        protected int Normalize(int c)
+        {
+            return lowerCase ? Character.ToLowerCase(c) : c;
+        }
+
+        public override void Reset()
+        {
+            base.Reset();
+            state = runAutomaton.InitialState;
+            lastOffset = off = 0;
+            //assert !enableChecks || streamState != State.RESET : "double reset()";
+            streamState = State.RESET;
+        }
+
+        protected virtual void Dispose(bool disposing)
+        {
+            base.Dispose(disposing);
+            // in some exceptional cases (e.g. TestIndexWriterExceptions) a test can prematurely close()
+            // these tests should disable this check, by default we check the normal workflow.
+            // TODO: investigate the CachingTokenFilter "double-close"... for now we ignore this
+            //assert !enableChecks || streamState == State.END || streamState == State.CLOSE : "close() called in wrong state: " + streamState;
+            streamState = State.CLOSE;
+        }
+
+        bool setReaderTestPoint()
+        {
+            //assert !enableChecks || streamState == State.CLOSE : "setReader() called in wrong state: " + streamState;
+            streamState = State.SETREADER;
+            return true;
+        }
+
+        public override void End()
+        {
+            int finalOffset = CorrectOffset(off);
+            offsetAtt.SetOffset(finalOffset, finalOffset);
+            // some tokenizers, such as limiting tokenizers, call end() before incrementToken() returns false.
+            // these tests should disable this check (in general you should consume the entire stream)
+            try
+            {
+                //assert !enableChecks || streamState == State.INCREMENT_FALSE : "end() called before incrementToken() returned false!";
+            }
+            finally
+            {
+                streamState = State.END;
+            }
+        }
+
+        /** 
+         * Toggle consumer workflow checking: if your test consumes tokenstreams normally you
+         * should leave this enabled.
+         */
+        public void setEnableChecks(bool enableChecks)
+        {
+            this.enableChecks = enableChecks;
+        }
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/6e9d73f4/test/test-framework/Lucene.Net.TestFramework.csproj
----------------------------------------------------------------------
diff --git a/test/test-framework/Lucene.Net.TestFramework.csproj b/test/test-framework/Lucene.Net.TestFramework.csproj
index 14d381f..6b31aaa 100644
--- a/test/test-framework/Lucene.Net.TestFramework.csproj
+++ b/test/test-framework/Lucene.Net.TestFramework.csproj
@@ -56,6 +56,7 @@
     <Reference Include="System.Xml" />
   </ItemGroup>
   <ItemGroup>
+    <Compile Include="Analysis\MockTokenizer.cs" />
     <Compile Include="JavaCompatibility\LuceneTestCase.cs" />
     <Compile Include="JavaCompatibility\LuceneTypesHelpers.cs" />
     <Compile Include="JavaCompatibility\SystemTypesHelpers.cs" />
@@ -85,7 +86,7 @@
     </ProjectReference>
   </ItemGroup>
   <ItemGroup>
-    <Folder Include="Analysis\" />
+    <Folder Include="Index\" />
   </ItemGroup>
   <ItemGroup>
     <None Include="Lucene.Net.snk" />

[3/3] git commit: Partial porting of RandomIndexWriter

Posted by sy...@apache.org.

Partial porting of RandomIndexWriter


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/f1fbbd9f
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/f1fbbd9f
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/f1fbbd9f

Branch: refs/heads/branch_4x
Commit: f1fbbd9f169689a484e65a005047d8afc93562be
Parents: 6e9d73f
Author: synhershko <it...@code972.com>
Authored: Tue Apr 8 01:08:02 2014 +0300
Committer: synhershko <it...@code972.com>
Committed: Tue Apr 8 01:08:02 2014 +0300

----------------------------------------------------------------------
 test/test-framework/Index/RandomIndexWriter.cs | 316 ++++++++++++++++++++
 1 file changed, 316 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/f1fbbd9f/test/test-framework/Index/RandomIndexWriter.cs
----------------------------------------------------------------------
diff --git a/test/test-framework/Index/RandomIndexWriter.cs b/test/test-framework/Index/RandomIndexWriter.cs
new file mode 100644
index 0000000..74af5ed
--- /dev/null
+++ b/test/test-framework/Index/RandomIndexWriter.cs
@@ -0,0 +1,316 @@
+using System;
+using System.Threading;
+using Lucene.Net.Analysis;
+using Lucene.Net.Codecs;
+using Lucene.Net.Index;
+using Lucene.Net.Randomized;
+using Lucene.Net.Search;
+using Lucene.Net.Store;
+using Lucene.Net.Util;
+using Version = System.Version;
+
+namespace Lucene.Net
+{
+public class RandomIndexWriter : IDisposable {
+
+  public IndexWriter w;
+  private Random r;
+  int docCount;
+  int flushAt;
+  private double flushAtFactor = 1.0;
+  private bool getReaderCalled;
+  private Codec codec; // sugar
+
+  // Randomly calls Thread.yield so we mixup thread scheduling
+  private class MockIndexWriter : IndexWriter {
+
+    private Random r;
+
+    public MockIndexWriter(Random r, Directory dir, IndexWriterConfig conf) : base(dir, conf) {
+      // TODO: this should be solved in a different way; Random should not be shared (!).
+      this.r = new Random(r.nextLong());
+    }
+
+    override bool testPoint(String name) {
+      if (r.nextInt(4) == 2)
+        Thread.yield();
+      return true;
+    }
+  }
+
+  /** create a RandomIndexWriter with a random config: Uses TEST_VERSION_CURRENT and MockAnalyzer */
+  public RandomIndexWriter(Random r, Directory dir):
+    this(r, dir, LuceneTestCase.newIndexWriterConfig(r, LuceneTestCase.TEST_VERSION_CURRENT, new MockAnalyzer(r)))
+  {
+  }
+  
+  /** create a RandomIndexWriter with a random config: Uses TEST_VERSION_CURRENT */
+  public RandomIndexWriter(Random r, Directory dir, Analyzer a) {
+    this(r, dir, LuceneTestCase.newIndexWriterConfig(r, LuceneTestCase.TEST_VERSION_CURRENT, a));
+  }
+  
+  /** create a RandomIndexWriter with a random config */
+  public RandomIndexWriter(Random r, Directory dir, Version v, Analyzer a) {
+    this(r, dir, LuceneTestCase.newIndexWriterConfig(r, v, a));
+  }
+  
+  /** create a RandomIndexWriter with the provided config */
+  public RandomIndexWriter(Random r, Directory dir, IndexWriterConfig c) {
+    // TODO: this should be solved in a different way; Random should not be shared (!).
+    this.r = new Random(r.nextLong());
+    w = new MockIndexWriter(r, dir, c);
+    flushAt = _TestUtil.nextInt(r, 10, 1000);
+    codec = w.getConfig().getCodec();
+    if (LuceneTestCase.VERBOSE) {
+      Console.WriteLine("RIW dir=" + dir + " config=" + w.getConfig());
+      Console.WriteLine("codec default=" + codec.getName());
+    }
+
+    // Make sure we sometimes test indices that don't get
+    // any forced merges:
+    doRandomForceMerge = r.nextBoolean();
+  } 
+  
+  /**
+   * Adds a Document.
+   * @see IndexWriter#addDocument(Iterable)
+   */
+  public <T extends IndexableField> void addDocument(Iterable<T> doc) {
+    addDocument(doc, w.getAnalyzer());
+  }
+
+  public <T extends IndexableField> void addDocument(final Iterable<T> doc, Analyzer a) {
+    if (r.nextInt(5) == 3) {
+      // TODO: maybe, we should simply buffer up added docs
+      // (but we need to clone them), and only when
+      // getReader, commit, etc. are called, we do an
+      // addDocuments?  Would be better testing.
+      w.AddDocuments(new Iterable<Iterable<T>>() {
+
+        public Iterator<Iterable<T>> iterator() {
+          return new Iterator<Iterable<T>>() {
+            boolean done;
+            
+            @Override
+            public boolean hasNext() {
+              return !done;
+            }
+
+            @Override
+            public void remove() {
+              throw new UnsupportedOperationException();
+            }
+
+            @Override
+            public Iterable<T> next() {
+              if (done) {
+                throw new IllegalStateException();
+              }
+              done = true;
+              return doc;
+            }
+          };
+        }
+        }, a);
+    } else {
+      w.AddDocument(doc, a);
+    }
+    
+    maybeCommit();
+  }
+
+  private void maybeCommit() {
+    if (docCount++ == flushAt) {
+      if (LuceneTestCase.VERBOSE) {
+        Console.WriteLine("RIW.add/updateDocument: now doing a commit at docCount=" + docCount);
+      }
+      w.Commit();
+      flushAt += _TestUtil.nextInt(r, (int) (flushAtFactor * 10), (int) (flushAtFactor * 1000));
+      if (flushAtFactor < 2e6) {
+        // gradually but exponentially increase time b/w flushes
+        flushAtFactor *= 1.05;
+      }
+    }
+    }
+  
+  public void addDocuments(Iterable<? extends Iterable<? extends IIndexableField>> docs) {
+    w.AddDocuments(docs);
+    maybeCommit();
+  }
+
+  public void updateDocuments(Term delTerm, Iterable<? extends Iterable<? extends IndexableField>> docs) {
+    w.UpdateDocuments(delTerm, docs);
+    maybeCommit();
+  }
+
+  /**
+   * Updates a document.
+   * @see IndexWriter#updateDocument(Term, Iterable)
+   */
+  public <T extends IndexableField> void updateDocument(Term t, final Iterable<T> doc) {
+    if (r.nextInt(5) == 3) {
+      w.updateDocuments(t, new Iterable<Iterable<T>>() {
+
+        @Override
+        public Iterator<Iterable<T>> iterator() {
+          return new Iterator<Iterable<T>>() {
+            boolean done;
+            
+            @Override
+            public boolean hasNext() {
+              return !done;
+            }
+
+            @Override
+            public void remove() {
+              throw new UnsupportedOperationException();
+            }
+
+            @Override
+            public Iterable<T> next() {
+              if (done) {
+                throw new IllegalStateException();
+              }
+              done = true;
+              return doc;
+            }
+          };
+        }
+        });
+    } else {
+      w.UpdateDocument(t, doc);
+    }
+    maybeCommit();
+  }
+  
+  public void addIndexes(params[] Directory dirs) {
+    w.AddIndexes(dirs);
+  }
+
+  public void addIndexes(IndexReader... readers) {
+    w.AddIndexes(readers);
+  }
+  
+  public void deleteDocuments(Term term) {
+    w.DeleteDocuments(term);
+  }
+
+  public void deleteDocuments(Query q) {
+    w.DeleteDocuments(q);
+  }
+  
+  public void commit() {
+    w.Commit();
+  }
+  
+  public int numDocs() {
+    return w.NumDocs;
+  }
+
+  public int maxDoc() {
+    return w.MaxDoc;
+  }
+
+  public void deleteAll() {
+    w.DeleteAll();
+  }
+
+  public DirectoryReader getReader() {
+    return getReader(true);
+  }
+
+  private bool doRandomForceMerge = true;
+  private bool doRandomForceMergeAssert = true;
+
+  public void forceMergeDeletes(bool doWait) {
+    w.ForceMergeDeletes(doWait);
+  }
+
+  public void forceMergeDeletes() {
+    w.ForceMergeDeletes();
+  }
+
+  public void setDoRandomForceMerge(bool v) {
+    doRandomForceMerge = v;
+  }
+
+  public void setDoRandomForceMergeAssert(bool v) {
+    doRandomForceMergeAssert = v;
+  }
+
+  private void doRandomForceMerge() {
+    if (doRandomForceMerge) {
+      int segCount = w.SegmentCount;
+      if (r.nextBoolean() || segCount == 0) {
+        // full forceMerge
+        if (LuceneTestCase.VERBOSE) {
+          Console.WriteLine("RIW: doRandomForceMerge(1)");
+        }
+        w.ForceMerge(1);
+      } else {
+        // partial forceMerge
+        int limit = _TestUtil.nextInt(r, 1, segCount);
+        if (LuceneTestCase.VERBOSE) {
+          Console.WriteLine("RIW: doRandomForceMerge(" + limit + ")");
+        }
+        w.ForceMerge(limit);
+        //assert !doRandomForceMergeAssert || w.getSegmentCount() <= limit: "limit=" + limit + " actual=" + w.getSegmentCount();
+      }
+    }
+  }
+
+  public DirectoryReader getReader(boolean applyDeletions) {
+    getReaderCalled = true;
+    if (r.nextInt(20) == 2) {
+      doRandomForceMerge();
+    }
+    // If we are writing with PreFlexRW, force a full
+    // IndexReader.open so terms are sorted in codepoint
+    // order during searching:
+    if (!applyDeletions || !codec.getName().equals("Lucene3x") && r.nextBoolean()) {
+      if (LuceneTestCase.VERBOSE) {
+        System.out.println("RIW.getReader: use NRT reader");
+      }
+      if (r.nextInt(5) == 1) {
+        w.Commit();
+      }
+      return w.getReader(applyDeletions);
+    } else {
+      if (LuceneTestCase.VERBOSE) {
+        System.out.println("RIW.getReader: open new reader");
+      }
+      w.Commit();
+      if (r.nextBoolean()) {
+        return DirectoryReader.Open(w.Directory, _TestUtil.nextInt(r, 1, 10));
+      } else {
+        return w.getReader(applyDeletions);
+      }
+    }
+  }
+
+  /**
+   * Close this writer.
+   * @see IndexWriter#close()
+   */
+  public void close() {
+    // if someone isn't using getReader() API, we want to be sure to
+    // forceMerge since presumably they might open a reader on the dir.
+    if (getReaderCalled == false && r.nextInt(8) == 2) {
+      doRandomForceMerge();
+    }
+    w.Close();
+  }
+
+  /**
+   * Forces a forceMerge.
+   * <p>
+   * NOTE: this should be avoided in tests unless absolutely necessary,
+   * as it will result in less test coverage.
+   * @see IndexWriter#forceMerge(int)
+   */
+  public void ForceMerge(int maxSegmentCount) {
+    w.ForceMerge(maxSegmentCount);
+  }
+}
+
+}