You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@lucenenet.apache.org by ni...@apache.org on 2017/02/02 12:42:32 UTC

[01/14] lucenenet git commit: Lucene.Net.Analysis.Common.Analysis.Miscellaneous.WordDelimiterIterator refactor: changed parameters and fields from sbyte to byte for CLS compliance

Repository: lucenenet
Updated Branches:
  refs/heads/api-work 11a1a1c36 -> bc485b4c4


Lucene.Net.Analysis.Common.Analysis.Miscellaneous.WordDelimiterIterator refactor: changed parameters and fields from sbyte to byte for CLS compliance


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/7cd69ab0
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/7cd69ab0
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/7cd69ab0

Branch: refs/heads/api-work
Commit: 7cd69ab00ac649081feeb814c846e9eaeb99f208
Parents: 11a1a1c
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Wed Feb 1 23:51:38 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Wed Feb 1 23:51:38 2017 +0700

----------------------------------------------------------------------
 .../Miscellaneous/Lucene47WordDelimiterFilter.cs  |  2 +-
 .../Analysis/Miscellaneous/WordDelimiterFilter.cs |  2 +-
 .../Miscellaneous/WordDelimiterFilterFactory.cs   | 12 ++++++------
 .../Miscellaneous/WordDelimiterIterator.cs        | 18 +++++++++---------
 .../Analysis/Core/TestBugInSomething.cs           |  6 +++---
 5 files changed, 20 insertions(+), 20 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/7cd69ab0/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/Lucene47WordDelimiterFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/Lucene47WordDelimiterFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/Lucene47WordDelimiterFilter.cs
index 219db723..d46b6c5 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/Lucene47WordDelimiterFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/Lucene47WordDelimiterFilter.cs
@@ -155,7 +155,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
         /// <param name="charTypeTable"> table containing character types </param>
         /// <param name="configurationFlags"> Flags configuring the filter </param>
         /// <param name="protWords"> If not null is the set of tokens to protect from being delimited </param>
-        public Lucene47WordDelimiterFilter(TokenStream @in, sbyte[] charTypeTable, int configurationFlags, CharArraySet protWords)
+        public Lucene47WordDelimiterFilter(TokenStream @in, byte[] charTypeTable, int configurationFlags, CharArraySet protWords)
             : base(@in)
         {
             termAttribute = AddAttribute<ICharTermAttribute>();

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/7cd69ab0/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterFilter.cs
index d002ab2..77f643e 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterFilter.cs
@@ -206,7 +206,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
         /// <param name="charTypeTable"> table containing character types </param>
         /// <param name="configurationFlags"> Flags configuring the filter </param>
         /// <param name="protWords"> If not null is the set of tokens to protect from being delimited </param>
-        public WordDelimiterFilter(LuceneVersion matchVersion, TokenStream @in, sbyte[] charTypeTable, int configurationFlags, CharArraySet protWords)
+        public WordDelimiterFilter(LuceneVersion matchVersion, TokenStream @in, byte[] charTypeTable, int configurationFlags, CharArraySet protWords)
               : base(@in)
         {
             InitializeInstanceFields();

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/7cd69ab0/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterFilterFactory.cs
index b9aaf96..ce4959c 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterFilterFactory.cs
@@ -48,7 +48,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
         private readonly string wordFiles;
         private readonly string types;
         private readonly int flags;
-        internal sbyte[] typeTable = null;
+        internal byte[] typeTable = null;
         private CharArraySet protectedWords = null;
 
         /// <summary>
@@ -142,9 +142,9 @@ namespace Lucene.Net.Analysis.Miscellaneous
         private static Regex typePattern = new Regex("(.*)\\s*=>\\s*(.*)\\s*$", RegexOptions.Compiled);
 
         // parses a list of MappingCharFilter style rules into a custom byte[] type table
-        private sbyte[] ParseTypes(IList<string> rules)
+        private byte[] ParseTypes(IList<string> rules)
         {
-            IDictionary<char, sbyte> typeMap = new SortedDictionary<char, sbyte>();
+            IDictionary<char, byte> typeMap = new SortedDictionary<char, byte>();
             foreach (string rule in rules)
             {
                 //Matcher m = typePattern.matcher(rule);
@@ -155,7 +155,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
                     throw new System.ArgumentException("Invalid Mapping Rule : [" + rule + "]");
                 }
                 string lhs = ParseString(m.Groups[1].Value.Trim());
-                sbyte rhs = ParseType(m.Groups[2].Value.Trim());
+                byte rhs = ParseType(m.Groups[2].Value.Trim());
                 if (lhs.Length != 1)
                 {
                     throw new System.ArgumentException("Invalid Mapping Rule : [" + rule + "]. Only a single character is allowed.");
@@ -168,7 +168,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
             }
 
             // ensure the table is always at least as big as DEFAULT_WORD_DELIM_TABLE for performance
-            sbyte[] types = new sbyte[Math.Max(typeMap.Keys.LastOrDefault() + 1, WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE.Length)];
+            byte[] types = new byte[Math.Max(typeMap.Keys.LastOrDefault() + 1, WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE.Length)];
             for (int i = 0; i < types.Length; i++)
             {
                 types[i] = WordDelimiterIterator.GetType(i);
@@ -180,7 +180,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
             return types;
         }
 
-        private sbyte ParseType(string s)
+        private byte ParseType(string s)
         {
             if (s.Equals("LOWER"))
             {

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/7cd69ab0/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterIterator.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterIterator.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterIterator.cs
index 46c43a1..3fe61b6 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterIterator.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterIterator.cs
@@ -30,7 +30,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
         /// Indicates the end of iteration </summary>
         public const int DONE = -1;
 
-        public static readonly sbyte[] DEFAULT_WORD_DELIM_TABLE;
+        public static readonly byte[] DEFAULT_WORD_DELIM_TABLE;
 
         internal char[] text;
         internal int length;
@@ -71,7 +71,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
         /// </summary>
         internal readonly bool stemEnglishPossessive;
 
-        private readonly sbyte[] charTypeTable;
+        private readonly byte[] charTypeTable;
 
         /// <summary>
         /// if true, need to skip over a possessive found in the last call to next() </summary>
@@ -81,21 +81,21 @@ namespace Lucene.Net.Analysis.Miscellaneous
         // done if separated by these chars?) "," would be an obvious candidate...
         static WordDelimiterIterator()
         {
-            var tab = new sbyte[256];
+            var tab = new byte[256];
             for (int i = 0; i < 256; i++)
             {
-                sbyte code = 0;
+                byte code = 0;
                 if (char.IsLower((char)i))
                 {
-                    code |= (sbyte)WordDelimiterFilter.LOWER;
+                    code |= (byte)WordDelimiterFilter.LOWER;
                 }
                 else if (char.IsUpper((char)i))
                 {
-                    code |= (sbyte)WordDelimiterFilter.UPPER;
+                    code |= (byte)WordDelimiterFilter.UPPER;
                 }
                 else if (char.IsDigit((char)i))
                 {
-                    code |= (sbyte)WordDelimiterFilter.DIGIT;
+                    code |= (byte)WordDelimiterFilter.DIGIT;
                 }
                 if (code == 0)
                 {
@@ -113,7 +113,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
         /// <param name="splitOnCaseChange"> if true, causes "PowerShot" to be two tokens; ("Power-Shot" remains two parts regards) </param>
         /// <param name="splitOnNumerics"> if true, causes "j2se" to be three tokens; "j" "2" "se" </param>
         /// <param name="stemEnglishPossessive"> if true, causes trailing "'s" to be removed for each subword: "O'Neil's" => "O", "Neil" </param>
-        internal WordDelimiterIterator(sbyte[] charTypeTable, bool splitOnCaseChange, bool splitOnNumerics, bool stemEnglishPossessive)
+        internal WordDelimiterIterator(byte[] charTypeTable, bool splitOnCaseChange, bool splitOnNumerics, bool stemEnglishPossessive)
         {
             this.charTypeTable = charTypeTable;
             this.splitOnCaseChange = splitOnCaseChange;
@@ -315,7 +315,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
         /// </summary>
         /// <param name="ch"> Character whose type is to be determined </param>
         /// <returns> Type of the character </returns>
-        public static sbyte GetType(int ch)
+        public static byte GetType(int ch)
         {
             switch (CharUnicodeInfo.GetUnicodeCategory((char)ch))
             {

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/7cd69ab0/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestBugInSomething.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestBugInSomething.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestBugInSomething.cs
index e552384..1a7f7e9 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestBugInSomething.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestBugInSomething.cs
@@ -333,7 +333,7 @@ namespace Lucene.Net.Analysis.Core
         public virtual void TestCuriousWikipediaString()
         {
             CharArraySet protWords = new CharArraySet(TEST_VERSION_CURRENT, new HashSet<string>(Arrays.AsList("rrdpafa", "pupmmlu", "xlq", "dyy", "zqrxrrck", "o", "hsrlfvcha")), false);
-            sbyte[] table = new sbyte[] { -57, 26, 1, 48, 63, -23, 55, -84, 18, 120, -97, 103, 58, 13, 84, 89, 57, -13, -63, 5, 28, 97, -54, -94, 102, -108, -5, 5, 46, 40, 43, 78, 43, -72, 36, 29, 124, -106, -22, -51, 65, 5, 31, -42, 6, -99, 97, 14, 81, -128, 74, 100, 54, -55, -25, 53, -71, -98, 44, 33, 86, 106, -42, 47, 115, -89, -18, -26, 22, -95, -43, 83, -125, 105, -104, -24, 106, -16, 126, 115, -105, 97, 65, -33, 57, 44, -1, 123, -68, 100, 13, -41, -64, -119, 0, 92, 94, -36, 53, -9, -102, -18, 90, 94, -26, 31, 71, -20 };
+            byte[] table = (byte[])(Array)new sbyte[] { -57, 26, 1, 48, 63, -23, 55, -84, 18, 120, -97, 103, 58, 13, 84, 89, 57, -13, -63, 5, 28, 97, -54, -94, 102, -108, -5, 5, 46, 40, 43, 78, 43, -72, 36, 29, 124, -106, -22, -51, 65, 5, 31, -42, 6, -99, 97, 14, 81, -128, 74, 100, 54, -55, -25, 53, -71, -98, 44, 33, 86, 106, -42, 47, 115, -89, -18, -26, 22, -95, -43, 83, -125, 105, -104, -24, 106, -16, 126, 115, -105, 97, 65, -33, 57, 44, -1, 123, -68, 100, 13, -41, -64, -119, 0, 92, 94, -36, 53, -9, -102, -18, 90, 94, -26, 31, 71, -20 };
             Analyzer a = new AnalyzerAnonymousInnerClassHelper2(this, protWords, table);
             CheckAnalysisConsistency(Random(), a, false, "B\u28c3\ue0f8[ \ud800\udfc2 </p> jb");
         }
@@ -343,9 +343,9 @@ namespace Lucene.Net.Analysis.Core
             private readonly TestBugInSomething outerInstance;
 
             private CharArraySet protWords;
-            private sbyte[] table;
+            private byte[] table;
 
-            public AnalyzerAnonymousInnerClassHelper2(TestBugInSomething outerInstance, CharArraySet protWords, sbyte[] table)
+            public AnalyzerAnonymousInnerClassHelper2(TestBugInSomething outerInstance, CharArraySet protWords, byte[] table)
             {
                 this.outerInstance = outerInstance;
                 this.protWords = protWords;

[13/14] lucenenet git commit: IcuBreakIterator: Added a setting to enable the hacks on demand. They are not required for Analysis.Common, so they are disabled by default. They are only used for Highlighter.

Posted by ni...@apache.org.

IcuBreakIterator: Added a setting to enable the hacks on demand. They are not required for Analysis.Common, so they are disabled by default. They are only used for Highlighter.


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/fc7b5b52
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/fc7b5b52
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/fc7b5b52

Branch: refs/heads/api-work
Commit: fc7b5b52dd64877d5d63498b3d2df4e54c569bd8
Parents: 506f55a
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Thu Feb 2 18:02:48 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Thu Feb 2 18:30:37 2017 +0700

----------------------------------------------------------------------
 src/IcuBreakIterator.cs                         | 23 +++++++++++++++-----
 .../PostingsHighlight/PostingsHighlighter.cs    |  5 ++++-
 2 files changed, 21 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/fc7b5b52/src/IcuBreakIterator.cs
----------------------------------------------------------------------
diff --git a/src/IcuBreakIterator.cs b/src/IcuBreakIterator.cs
index 0bf6007..6028ba1 100644
--- a/src/IcuBreakIterator.cs
+++ b/src/IcuBreakIterator.cs
@@ -50,6 +50,8 @@ namespace Lucene.Net
         /// </summary>
         protected int m_end;
 
+        private bool enableHacks = false;
+
         public IcuBreakIterator(Icu.BreakIterator.UBreakIteratorType type)
             : this(type, CultureInfo.CurrentCulture)
         {
@@ -63,6 +65,13 @@ namespace Lucene.Net
             this.type = type;
         }
 
+        
+        public virtual bool EnableHacks
+        {
+            get { return enableHacks; }
+            set { enableHacks = value; }
+        }
+
         /// <summary>
         /// Sets the current iteration position to the beginning of the text.
         /// </summary>
@@ -280,20 +289,22 @@ namespace Lucene.Net
 
         private void LoadBoundaries(int start, int end)
         {
-            //boundaries = new List<int>();
-
             IEnumerable<Icu.Boundary> icuBoundaries;
             string offsetText = text.Substring(start, end - start);
 
-
             if (type == Icu.BreakIterator.UBreakIteratorType.WORD)
             {
-                // LUCENENET TODO: HACK - replacing hyphen with "a" so hyphenated words aren't broken
-                icuBoundaries = Icu.BreakIterator.GetWordBoundaries(locale, offsetText.Replace("-", "a"), true);
+                if (enableHacks)
+                {
+                    // LUCENENET TODO: HACK - replacing hyphen with "a" so hyphenated words aren't broken
+                    offsetText = offsetText.Replace("-", "a");
+                }
+                
+                icuBoundaries = Icu.BreakIterator.GetWordBoundaries(locale, offsetText, true);
             }
             else
             {
-                if (type == Icu.BreakIterator.UBreakIteratorType.SENTENCE)
+                if (enableHacks && type == Icu.BreakIterator.UBreakIteratorType.SENTENCE)
                 {
                     // LUCENENET TODO: HACK - newline character causes incorrect sentence breaking.
                     offsetText = offsetText.Replace("\n", " ");

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/fc7b5b52/src/Lucene.Net.Highlighter/PostingsHighlight/PostingsHighlighter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Highlighter/PostingsHighlight/PostingsHighlighter.cs b/src/Lucene.Net.Highlighter/PostingsHighlight/PostingsHighlighter.cs
index 63c48bc..db04ee1 100644
--- a/src/Lucene.Net.Highlighter/PostingsHighlight/PostingsHighlighter.cs
+++ b/src/Lucene.Net.Highlighter/PostingsHighlight/PostingsHighlighter.cs
@@ -131,7 +131,10 @@ namespace Lucene.Net.Search.PostingsHighlight
         /// </summary>
         protected virtual BreakIterator GetBreakIterator(string field)
         {
-            return new IcuBreakIterator(Icu.BreakIterator.UBreakIteratorType.SENTENCE, CultureInfo.InvariantCulture);
+            return new IcuBreakIterator(Icu.BreakIterator.UBreakIteratorType.SENTENCE, CultureInfo.InvariantCulture)
+            {
+                EnableHacks = true
+            };
         }
 
         /// <summary>

[03/14] lucenenet git commit: Lucene.Net.Core.Analysis.TokenAttributes: Deleted unused TermAttribute class (not part of Lucene 4.8.0)

Posted by ni...@apache.org.

Lucene.Net.Core.Analysis.TokenAttributes: Deleted unused TermAttribute class (not part of Lucene 4.8.0)


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/1c87ed5b
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/1c87ed5b
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/1c87ed5b

Branch: refs/heads/api-work
Commit: 1c87ed5b0139f836cbac1cfa4e76463882281e8f
Parents: 21b3d8b
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Thu Feb 2 12:26:58 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Thu Feb 2 18:30:29 2017 +0700

----------------------------------------------------------------------
 .../Analysis/Tokenattributes/TermAttribute.cs   | 268 -------------------
 1 file changed, 268 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1c87ed5b/src/Lucene.Net.Core/Analysis/Tokenattributes/TermAttribute.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Analysis/Tokenattributes/TermAttribute.cs b/src/Lucene.Net.Core/Analysis/Tokenattributes/TermAttribute.cs
deleted file mode 100644
index 3dad641..0000000
--- a/src/Lucene.Net.Core/Analysis/Tokenattributes/TermAttribute.cs
+++ /dev/null
@@ -1,268 +0,0 @@
-/* 
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- * 
- * http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-using System;
-using Lucene.Net.Support;
-using ArrayUtil = Lucene.Net.Util.ArrayUtil;
-using Attribute = Lucene.Net.Util.Attribute;
-
-namespace Lucene.Net.Analysis.Tokenattributes
-{
-    
-    /// <summary> The term text of a Token.</summary>
-    [Serializable]
-    public class TermAttribute:Attribute, ITermAttribute, System.ICloneable
-    {
-        private static int MIN_BUFFER_SIZE = 10;
-        
-        private char[] termBuffer;
-        private int termLength;
-
-        /// <summary>Returns the Token's term text.
-        /// 
-        /// This method has a performance penalty
-        /// because the text is stored internally in a char[].  If
-        /// possible, use <see cref="TermBuffer()" /> and 
-        /// <see cref="TermLength()" /> directly instead.  If you 
-        /// really need a String, use this method, which is nothing more than
-        /// a convenience call to <b>new String(token.termBuffer(), 0, token.termLength())</b>
-        /// </summary>
-        public virtual string Term
-        {
-            get
-            {
-                InitTermBuffer();
-                return new System.String(termBuffer, 0, termLength);
-            }
-        }
-
-        /// <summary>Copies the contents of buffer, starting at offset for
-        /// length characters, into the termBuffer array.
-        /// </summary>
-        /// <param name="buffer">the buffer to copy
-        /// </param>
-        /// <param name="offset">the index in the buffer of the first character to copy
-        /// </param>
-        /// <param name="length">the number of characters to copy
-        /// </param>
-        public virtual void  SetTermBuffer(char[] buffer, int offset, int length)
-        {
-            GrowTermBuffer(length);
-            Array.Copy(buffer, offset, termBuffer, 0, length);
-            termLength = length;
-        }
-        
-        /// <summary>Copies the contents of buffer into the termBuffer array.</summary>
-        /// <param name="buffer">the buffer to copy
-        /// </param>
-        public virtual void  SetTermBuffer(System.String buffer)
-        {
-            int length = buffer.Length;
-            GrowTermBuffer(length);
-            TextSupport.GetCharsFromString(buffer, 0, length, termBuffer, 0);
-            termLength = length;
-        }
-        
-        /// <summary>Copies the contents of buffer, starting at offset and continuing
-        /// for length characters, into the termBuffer array.
-        /// </summary>
-        /// <param name="buffer">the buffer to copy
-        /// </param>
-        /// <param name="offset">the index in the buffer of the first character to copy
-        /// </param>
-        /// <param name="length">the number of characters to copy
-        /// </param>
-        public virtual void  SetTermBuffer(System.String buffer, int offset, int length)
-        {
-            System.Diagnostics.Debug.Assert(offset <= buffer.Length);
-            System.Diagnostics.Debug.Assert(offset + length <= buffer.Length);
-            GrowTermBuffer(length);
-            TextSupport.GetCharsFromString(buffer, offset, offset + length, termBuffer, 0);
-            termLength = length;
-        }
-        
-        /// <summary>Returns the internal termBuffer character array which
-        /// you can then directly alter.  If the array is too
-        /// small for your token, use <see cref="ResizeTermBuffer(int)" />
-        /// to increase it.  After
-        /// altering the buffer be sure to call <see cref="SetTermLength" />
-        /// to record the number of valid
-        /// characters that were placed into the termBuffer. 
-        /// </summary>
-        public virtual char[] TermBuffer()
-        {
-            InitTermBuffer();
-            return termBuffer;
-        }
-        
-        /// <summary>Grows the termBuffer to at least size newSize, preserving the
-        /// existing content. Note: If the next operation is to change
-        /// the contents of the term buffer use
-        /// <see cref="SetTermBuffer(char[], int, int)" />,
-        /// <see cref="SetTermBuffer(String)" />, or
-        /// <see cref="SetTermBuffer(String, int, int)" />
-        /// to optimally combine the resize with the setting of the termBuffer.
-        /// </summary>
-        /// <param name="newSize">minimum size of the new termBuffer
-        /// </param>
-        /// <returns> newly created termBuffer with length >= newSize
-        /// </returns>
-        public virtual char[] ResizeTermBuffer(int newSize)
-        {
-            if (termBuffer == null)
-            {
-                // The buffer is always at least MIN_BUFFER_SIZE
-                termBuffer = new char[ArrayUtil.GetNextSize(newSize < MIN_BUFFER_SIZE?MIN_BUFFER_SIZE:newSize)];
-            }
-            else
-            {
-                if (termBuffer.Length < newSize)
-                {
-                    // Not big enough; create a new array with slight
-                    // over allocation and preserve content
-                    char[] newCharBuffer = new char[ArrayUtil.GetNextSize(newSize)];
-                    Array.Copy(termBuffer, 0, newCharBuffer, 0, termBuffer.Length);
-                    termBuffer = newCharBuffer;
-                }
-            }
-            return termBuffer;
-        }
-        
-        
-        /// <summary>Allocates a buffer char[] of at least newSize, without preserving the existing content.
-        /// its always used in places that set the content 
-        /// </summary>
-        /// <param name="newSize">minimum size of the buffer
-        /// </param>
-        private void  GrowTermBuffer(int newSize)
-        {
-            if (termBuffer == null)
-            {
-                // The buffer is always at least MIN_BUFFER_SIZE
-                termBuffer = new char[ArrayUtil.GetNextSize(newSize < MIN_BUFFER_SIZE?MIN_BUFFER_SIZE:newSize)];
-            }
-            else
-            {
-                if (termBuffer.Length < newSize)
-                {
-                    // Not big enough; create a new array with slight
-                    // over allocation:
-                    termBuffer = new char[ArrayUtil.GetNextSize(newSize)];
-                }
-            }
-        }
-        
-        private void  InitTermBuffer()
-        {
-            if (termBuffer == null)
-            {
-                termBuffer = new char[ArrayUtil.GetNextSize(MIN_BUFFER_SIZE)];
-                termLength = 0;
-            }
-        }
-        
-        /// <summary>Return number of valid characters (length of the term)
-        /// in the termBuffer array. 
-        /// </summary>
-        public virtual int TermLength()
-        {
-            return termLength;
-        }
-        
-        /// <summary>Set number of valid characters (length of the term) in
-        /// the termBuffer array. Use this to truncate the termBuffer
-        /// or to synchronize with external manipulation of the termBuffer.
-        /// Note: to grow the size of the array,
-        /// use <see cref="ResizeTermBuffer(int)" /> first.
-        /// </summary>
-        /// <param name="length">the truncated length
-        /// </param>
-        public virtual void  SetTermLength(int length)
-        {
-            InitTermBuffer();
-            if (length > termBuffer.Length)
-                throw new System.ArgumentException("length " + length + " exceeds the size of the termBuffer (" + termBuffer.Length + ")");
-            termLength = length;
-        }
-        
-        public override int GetHashCode()
-        {
-            InitTermBuffer();
-            int code = termLength;
-            code = code * 31 + ArrayUtil.HashCode(termBuffer, 0, termLength);
-            return code;
-        }
-        
-        public override void  Clear()
-        {
-            termLength = 0;
-        }
-        
-        public override System.Object Clone()
-        {
-            TermAttribute t = (TermAttribute) base.Clone();
-            // Do a deep clone
-            if (termBuffer != null)
-            {
-                t.termBuffer = new char[termBuffer.Length];
-                termBuffer.CopyTo(t.termBuffer, 0);
-            }
-            return t;
-        }
-        
-        public  override bool Equals(System.Object other)
-        {
-            if (other == this)
-            {
-                return true;
-            }
-            
-            if (other is ITermAttribute)
-            {
-                InitTermBuffer();
-                TermAttribute o = ((TermAttribute) other);
-                o.InitTermBuffer();
-                
-                if (termLength != o.termLength)
-                    return false;
-                for (int i = 0; i < termLength; i++)
-                {
-                    if (termBuffer[i] != o.termBuffer[i])
-                    {
-                        return false;
-                    }
-                }
-                return true;
-            }
-            
-            return false;
-        }
-        
-        public override System.String ToString()
-        {
-            InitTermBuffer();
-            return "term=" + new System.String(termBuffer, 0, termLength);
-        }
-        
-        public override void  CopyTo(Attribute target)
-        {
-            InitTermBuffer();
-            ITermAttribute t = (ITermAttribute) target;
-            t.SetTermBuffer(termBuffer, 0, termLength);
-        }
-    }
-}
\ No newline at end of file

[10/14] lucenenet git commit: Moved IcuBreakIterator to src\ directory and added it as a linked file to both Lucene.Net.Analysis.Common and Lucene.Net.Highlighter. This seems like a better option than creating a separate DLL to share this dependency or d

Posted by ni...@apache.org.

Moved IcuBreakIterator to src\ directory and added it as a linked file to both Lucene.Net.Analysis.Common and Lucene.Net.Highlighter. This seems like a better option than creating a separate DLL to share this dependency or duplicating it.


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/5a7cb173
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/5a7cb173
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/5a7cb173

Branch: refs/heads/api-work
Commit: 5a7cb173489a0e22b6ff890f7c283ee50895e42d
Parents: bed1f16
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Thu Feb 2 16:28:04 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Thu Feb 2 18:30:35 2017 +0700

----------------------------------------------------------------------
 src/IcuBreakIterator.cs                         | 368 +++++++++++++++++++
 .../Lucene.Net.Analysis.Common.csproj           |   3 +
 src/Lucene.Net.Analysis.Common/project.json     |   3 +
 src/Lucene.Net.Highlighter/IcuBreakIterator.cs  | 368 -------------------
 .../Lucene.Net.Highlighter.csproj               |   4 +-
 src/Lucene.Net.Highlighter/project.json         |   7 +
 6 files changed, 384 insertions(+), 369 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/5a7cb173/src/IcuBreakIterator.cs
----------------------------------------------------------------------
diff --git a/src/IcuBreakIterator.cs b/src/IcuBreakIterator.cs
new file mode 100644
index 0000000..0bf6007
--- /dev/null
+++ b/src/IcuBreakIterator.cs
@@ -0,0 +1,368 @@
+\ufeffusing Lucene.Net.Support;
+using System;
+using System.Collections.Generic;
+using System.Globalization;
+using System.Linq;
+using System.Text;
+
+namespace Lucene.Net
+{
+    /*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+    /// <summary>
+    /// A <see cref="BreakIterator"/> implementation that encapsulates the functionality
+    /// of icu.net's <see cref="Icu.BreakIterator"/> static class. A <see cref="BreakIterator"/>
+    /// provides methods to move forward, reverse, and randomly through a set of text breaks
+    /// defined by the <see cref="Icu.BreakIterator.UBreakIteratorType"/> enumeration.
+    /// </summary>
+    // LUCENENET specific type
+    internal class IcuBreakIterator : BreakIterator
+    {
+        private readonly Icu.Locale locale;
+        private readonly Icu.BreakIterator.UBreakIteratorType type;
+
+        private List<int> boundaries = new List<int>();
+        private int currentBoundaryIndex; // Index (not the value) of the current boundary in boundaries
+        private string text;
+
+        /// <summary>
+        /// The start offset for the string, if supplied by a <see cref="CharacterIterator"/>
+        /// </summary>
+        protected int m_start;
+
+        /// <summary>
+        /// The end offset for the string, if supplied by a <see cref="CharacterIterator"/>
+        /// </summary>
+        protected int m_end;
+
+        public IcuBreakIterator(Icu.BreakIterator.UBreakIteratorType type)
+            : this(type, CultureInfo.CurrentCulture)
+        {
+        }
+
+        public IcuBreakIterator(Icu.BreakIterator.UBreakIteratorType type, CultureInfo locale)
+        {
+            if (locale == null)
+                throw new ArgumentNullException("locale");
+            this.locale = new Icu.Locale(locale.Name);
+            this.type = type;
+        }
+
+        /// <summary>
+        /// Sets the current iteration position to the beginning of the text.
+        /// </summary>
+        /// <returns>The offset of the beginning of the text.</returns>
+        public override int First()
+        {
+            currentBoundaryIndex = 0;
+            return ReturnCurrent();
+        }
+
+        /// <summary>
+        /// Sets the current iteration position to the end of the text.
+        /// </summary>
+        /// <returns>The text's past-the-end offset.</returns>
+        public override int Last()
+        {
+            currentBoundaryIndex = boundaries.Count - 1;
+            return ReturnCurrent();
+        }
+
+        /// <summary>
+        /// Advances the iterator either forward or backward the specified number of steps.
+        /// Negative values move backward, and positive values move forward.  This is
+        /// equivalent to repeatedly calling <see cref="Next()"/> or <see cref="Previous()"/>.
+        /// </summary>
+        /// <param name="n">The number of steps to move.  The sign indicates the direction
+        /// (negative is backwards, and positive is forwards).</param>
+        /// <returns>The character offset of the boundary position n boundaries away from
+        /// the current one.</returns>
+        public override int Next(int n)
+        {
+            int result = Current;
+            while (n > 0)
+            {
+                result = Next();
+                --n;
+            }
+            while (n < 0)
+            {
+                result = Previous();
+                ++n;
+            }
+            return result;
+        }
+
+        /// <summary>
+        /// Advances the iterator to the next boundary position.
+        /// </summary>
+        /// <returns>The position of the first boundary after this one.</returns>
+        public override int Next()
+        {
+            if (currentBoundaryIndex >= boundaries.Count - 1 || boundaries.Count == 0)
+            {
+                return DONE;
+            }
+            currentBoundaryIndex++;
+            return ReturnCurrent();
+        }
+
+        /// <summary>
+        /// Advances the iterator backwards, to the last boundary preceding this one.
+        /// </summary>
+        /// <returns>The position of the last boundary position preceding this one.</returns>
+        public override int Previous()
+        {
+            if (currentBoundaryIndex == 0 || boundaries.Count == 0)
+            {
+                return DONE;
+            }
+            currentBoundaryIndex--;
+            return ReturnCurrent();
+        }
+
+        /// <summary>
+        /// Throw <see cref="ArgumentException"/> unless begin &lt;= offset &lt; end.
+        /// </summary>
+        /// <param name="offset"></param>
+        private void CheckOffset(int offset)
+        {
+            if (offset < m_start || offset > m_end)
+            {
+                throw new ArgumentException("offset out of bounds");
+            }
+        }
+
+        /// <summary>
+        /// Sets the iterator to refer to the first boundary position following
+        /// the specified position.
+        /// </summary>
+        /// <param name="offset">The position from which to begin searching for a break position.</param>
+        /// <returns>The position of the first break after the current position.</returns>
+        public override int Following(int offset)
+        {
+            CheckOffset(offset);
+
+            if (boundaries.Count == 0)
+            {
+                return DONE;
+            }
+
+            int following = GetLowestIndexGreaterThan(offset);
+            if (following == -1)
+            {
+                currentBoundaryIndex = boundaries.Count - 1;
+                return DONE;
+            }
+            else
+            {
+                currentBoundaryIndex = following;
+            }
+            return ReturnCurrent();
+        }
+
+        private int GetLowestIndexGreaterThan(int offset)
+        {
+            int index = boundaries.BinarySearch(offset);
+            if (index < 0)
+            {
+                return ~index;
+            }
+            else if (index + 1 < boundaries.Count)
+            {
+                return index + 1;
+            }
+
+            return -1;
+        }
+
+        /// <summary>
+        /// Sets the iterator to refer to the last boundary position before the
+        /// specified position.
+        /// </summary>
+        /// <param name="offset">The position to begin searching for a break from.</param>
+        /// <returns>The position of the last boundary before the starting position.</returns>
+        public override int Preceding(int offset)
+        {
+            CheckOffset(offset);
+
+            if (boundaries.Count == 0)
+            {
+                return DONE;
+            }
+
+            int preceeding = GetHighestIndexLessThan(offset);
+            if (preceeding == -1)
+            {
+                currentBoundaryIndex = 0;
+                return DONE;
+            }
+            else
+            {
+                currentBoundaryIndex = preceeding;
+            }
+            return ReturnCurrent();
+        }
+
+        private int GetHighestIndexLessThan(int offset)
+        {
+            int index = boundaries.BinarySearch(offset);
+            if (index < 0)
+            {
+                return ~index - 1;
+            }
+            else
+            {
+                // NOTE: This is intentionally allowed to return -1 in the case
+                // where index == 0. This state indicates we are before the first boundary.
+                return index - 1;
+            }
+        }
+
+        /// <summary>
+        /// Returns the current iteration position.
+        /// </summary>
+        public override int Current
+        {
+            get { return ReturnCurrent(); }
+        }
+
+        /// <summary>
+        /// Gets the text being analyzed.
+        /// </summary>
+        public override string Text
+        {
+            get
+            {
+                return text;
+            }
+        }
+
+        /// <summary>
+        /// Set the iterator to analyze a new piece of text.  This function resets
+        /// the current iteration position to the beginning of the text.
+        /// </summary>
+        /// <param name="newText">The text to analyze.</param>
+        public override void SetText(string newText)
+        {
+            text = newText;
+            currentBoundaryIndex = 0;
+            m_start = 0;
+            m_end = newText.Length;
+
+            LoadBoundaries(m_start, m_end);
+        }
+
+        public override void SetText(CharacterIterator newText)
+        {
+            text = newText.GetTextAsString();
+            currentBoundaryIndex = 0;
+            m_start = newText.BeginIndex;
+            m_end = newText.EndIndex;
+
+            LoadBoundaries(m_start, m_end);
+        }
+
+        private void LoadBoundaries(int start, int end)
+        {
+            //boundaries = new List<int>();
+
+            IEnumerable<Icu.Boundary> icuBoundaries;
+            string offsetText = text.Substring(start, end - start);
+
+
+            if (type == Icu.BreakIterator.UBreakIteratorType.WORD)
+            {
+                // LUCENENET TODO: HACK - replacing hyphen with "a" so hyphenated words aren't broken
+                icuBoundaries = Icu.BreakIterator.GetWordBoundaries(locale, offsetText.Replace("-", "a"), true);
+            }
+            else
+            {
+                if (type == Icu.BreakIterator.UBreakIteratorType.SENTENCE)
+                {
+                    // LUCENENET TODO: HACK - newline character causes incorrect sentence breaking.
+                    offsetText = offsetText.Replace("\n", " ");
+                    // LUCENENET TODO: HACK - the ICU sentence logic doesn't work (in English anyway) when sentences don't
+                    // begin with capital letters.
+                    offsetText = CapitalizeFirst(offsetText);
+                }
+
+                icuBoundaries = Icu.BreakIterator.GetBoundaries(type, locale, offsetText);
+            }
+
+            boundaries = icuBoundaries
+                .Select(t => new[] { t.Start + start, t.End + start })
+                .SelectMany(b => b)
+                .Distinct()
+                .ToList();
+        }
+
+        /// <summary>
+        /// Returns true if the specified character offset is a text boundary.
+        /// </summary>
+        /// <param name="offset">the character offset to check.</param>
+        /// <returns><c>true</c> if "offset" is a boundary position, <c>false</c> otherwise.</returns>
+        public override bool IsBoundary(int offset)
+        {
+            CheckOffset(offset);
+            return boundaries.Contains(offset);
+        }
+
+        private int ReturnCurrent()
+        {
+            if (boundaries.Count > 0)
+            {
+                return currentBoundaryIndex < boundaries.Count && currentBoundaryIndex > -1
+                    ? boundaries[currentBoundaryIndex]
+                    : DONE;
+            }
+
+            // If there are no boundaries, we must return the start offset
+            return m_start;
+        }
+
+        /// <summary>
+        /// LUCENENET TODO: This is a temporary workaround for an issue with icu-dotnet
+        /// where it doesn't correctly break sentences unless they begin with a capital letter.
+        /// If/when ICU is fixed, this method should be deleted and the IcuBreakIterator 
+        /// code changed to remove calls to this method.
+        /// </summary>
+        public static string CapitalizeFirst(string s)
+        {
+            bool isNewSentence = true;
+            var result = new StringBuilder(s.Length);
+            for (int i = 0; i < s.Length; i++)
+            {
+                if (isNewSentence && char.IsLetter(s[i]))
+                {
+                    result.Append(char.ToUpper(s[i]));
+                    isNewSentence = false;
+                }
+                else
+                    result.Append(s[i]);
+
+                if (s[i] == '!' || s[i] == '?' || s[i] == '.')
+                {
+                    isNewSentence = true;
+                }
+            }
+
+            return result.ToString();
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/5a7cb173/src/Lucene.Net.Analysis.Common/Lucene.Net.Analysis.Common.csproj
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Lucene.Net.Analysis.Common.csproj b/src/Lucene.Net.Analysis.Common/Lucene.Net.Analysis.Common.csproj
index 4e4a0e9..610125f 100644
--- a/src/Lucene.Net.Analysis.Common/Lucene.Net.Analysis.Common.csproj
+++ b/src/Lucene.Net.Analysis.Common/Lucene.Net.Analysis.Common.csproj
@@ -42,6 +42,9 @@
     <Reference Include="System.Xml.Linq" />
   </ItemGroup>
   <ItemGroup>
+    <Compile Include="..\IcuBreakIterator.cs">
+      <Link>IcuBreakIterator.cs</Link>
+    </Compile>
     <Compile Include="Analysis\Bg\BulgarianAnalyzer.cs" />
     <Compile Include="Analysis\Bg\BulgarianStemFilter.cs" />
     <Compile Include="Analysis\Bg\BulgarianStemFilterFactory.cs" />

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/5a7cb173/src/Lucene.Net.Analysis.Common/project.json
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/project.json b/src/Lucene.Net.Analysis.Common/project.json
index 9b9b2cf..02d5a79 100644
--- a/src/Lucene.Net.Analysis.Common/project.json
+++ b/src/Lucene.Net.Analysis.Common/project.json
@@ -16,6 +16,9 @@
       ]
     },
     "compile": {
+      "includeFiles": [
+        "../IcuBreakIterator.cs"
+      ],
       "exclude": [
       ]
     }

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/5a7cb173/src/Lucene.Net.Highlighter/IcuBreakIterator.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Highlighter/IcuBreakIterator.cs b/src/Lucene.Net.Highlighter/IcuBreakIterator.cs
deleted file mode 100644
index 72e89b0..0000000
--- a/src/Lucene.Net.Highlighter/IcuBreakIterator.cs
+++ /dev/null
@@ -1,368 +0,0 @@
-\ufeffusing Lucene.Net.Support;
-using System;
-using System.Collections.Generic;
-using System.Globalization;
-using System.Linq;
-using System.Text;
-
-namespace Lucene.Net.Search
-{
-    /*
-	 * Licensed to the Apache Software Foundation (ASF) under one or more
-	 * contributor license agreements.  See the NOTICE file distributed with
-	 * this work for additional information regarding copyright ownership.
-	 * The ASF licenses this file to You under the Apache License, Version 2.0
-	 * (the "License"); you may not use this file except in compliance with
-	 * the License.  You may obtain a copy of the License at
-	 *
-	 *     http://www.apache.org/licenses/LICENSE-2.0
-	 *
-	 * Unless required by applicable law or agreed to in writing, software
-	 * distributed under the License is distributed on an "AS IS" BASIS,
-	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-	 * See the License for the specific language governing permissions and
-	 * limitations under the License.
-	 */
-
-    /// <summary>
-    /// A <see cref="BreakIterator"/> implementation that encapsulates the functionality
-    /// of icu.net's <see cref="Icu.BreakIterator"/> static class. A <see cref="BreakIterator"/>
-    /// provides methods to move forward, reverse, and randomly through a set of text breaks
-    /// defined by the <see cref="Icu.BreakIterator.UBreakIteratorType"/> enumeration.
-    /// </summary>
-    // LUCENENET specific type
-    internal class IcuBreakIterator : BreakIterator
-    {
-        private readonly Icu.Locale locale;
-        private readonly Icu.BreakIterator.UBreakIteratorType type;
-
-        private List<int> boundaries = new List<int>();
-        private int currentBoundaryIndex; // Index (not the value) of the current boundary in boundaries
-        private string text;
-
-        /// <summary>
-        /// The start offset for the string, if supplied by a <see cref="CharacterIterator"/>
-        /// </summary>
-        protected int m_start;
-
-        /// <summary>
-        /// The end offset for the string, if supplied by a <see cref="CharacterIterator"/>
-        /// </summary>
-        protected int m_end;
-
-        public IcuBreakIterator(Icu.BreakIterator.UBreakIteratorType type)
-            : this(type, CultureInfo.CurrentCulture)
-        {
-        }
-
-        public IcuBreakIterator(Icu.BreakIterator.UBreakIteratorType type, CultureInfo locale)
-        {
-            if (locale == null)
-                throw new ArgumentNullException("locale");
-            this.locale = new Icu.Locale(locale.Name);
-            this.type = type;
-        }
-
-        /// <summary>
-        /// Sets the current iteration position to the beginning of the text.
-        /// </summary>
-        /// <returns>The offset of the beginning of the text.</returns>
-        public override int First()
-        {
-            currentBoundaryIndex = 0;
-            return ReturnCurrent();
-        }
-
-        /// <summary>
-        /// Sets the current iteration position to the end of the text.
-        /// </summary>
-        /// <returns>The text's past-the-end offset.</returns>
-        public override int Last()
-        {
-            currentBoundaryIndex = boundaries.Count - 1;
-            return ReturnCurrent();
-        }
-
-        /// <summary>
-        /// Advances the iterator either forward or backward the specified number of steps.
-        /// Negative values move backward, and positive values move forward.  This is
-        /// equivalent to repeatedly calling <see cref="Next()"/> or <see cref="Previous()"/>.
-        /// </summary>
-        /// <param name="n">The number of steps to move.  The sign indicates the direction
-        /// (negative is backwards, and positive is forwards).</param>
-        /// <returns>The character offset of the boundary position n boundaries away from
-        /// the current one.</returns>
-        public override int Next(int n)
-        {
-            int result = Current;
-            while (n > 0)
-            {
-                result = Next();
-                --n;
-            }
-            while (n < 0)
-            {
-                result = Previous();
-                ++n;
-            }
-            return result;
-        }
-
-        /// <summary>
-        /// Advances the iterator to the next boundary position.
-        /// </summary>
-        /// <returns>The position of the first boundary after this one.</returns>
-        public override int Next()
-        {
-            if (currentBoundaryIndex >= boundaries.Count - 1 || boundaries.Count == 0)
-            {
-                return DONE;
-            }
-            currentBoundaryIndex++;
-            return ReturnCurrent();
-        }
-
-        /// <summary>
-        /// Advances the iterator backwards, to the last boundary preceding this one.
-        /// </summary>
-        /// <returns>The position of the last boundary position preceding this one.</returns>
-        public override int Previous()
-        {
-            if (currentBoundaryIndex == 0 || boundaries.Count == 0)
-            {
-                return DONE;
-            }
-            currentBoundaryIndex--;
-            return ReturnCurrent();
-        }
-
-        /// <summary>
-        /// Throw <see cref="ArgumentException"/> unless begin &lt;= offset &lt; end.
-        /// </summary>
-        /// <param name="offset"></param>
-        private void CheckOffset(int offset)
-        {
-            if (offset < m_start || offset > m_end)
-            {
-                throw new ArgumentException("offset out of bounds");
-            }
-        }
-
-        /// <summary>
-        /// Sets the iterator to refer to the first boundary position following
-        /// the specified position.
-        /// </summary>
-        /// <param name="offset">The position from which to begin searching for a break position.</param>
-        /// <returns>The position of the first break after the current position.</returns>
-        public override int Following(int offset)
-        {
-            CheckOffset(offset);
-
-            if (boundaries.Count == 0)
-            {
-                return DONE;
-            }
-
-            int following = GetLowestIndexGreaterThan(offset);
-            if (following == -1)
-            {
-                currentBoundaryIndex = boundaries.Count - 1;
-                return DONE;
-            }
-            else
-            {
-                currentBoundaryIndex = following;
-            }
-            return ReturnCurrent();
-        }
-
-        private int GetLowestIndexGreaterThan(int offset)
-        {
-            int index = boundaries.BinarySearch(offset);
-            if (index < 0)
-            {
-                return ~index;
-            }
-            else if (index + 1 < boundaries.Count)
-            {
-                return index + 1;
-            }
-
-            return -1;
-        }
-
-        /// <summary>
-        /// Sets the iterator to refer to the last boundary position before the
-        /// specified position.
-        /// </summary>
-        /// <param name="offset">The position to begin searching for a break from.</param>
-        /// <returns>The position of the last boundary before the starting position.</returns>
-        public override int Preceding(int offset)
-        {
-            CheckOffset(offset);
-
-            if (boundaries.Count == 0)
-            {
-                return DONE;
-            }
-
-            int preceeding = GetHighestIndexLessThan(offset);
-            if (preceeding == -1)
-            {
-                currentBoundaryIndex = 0;
-                return DONE;
-            }
-            else
-            {
-                currentBoundaryIndex = preceeding;
-            }
-            return ReturnCurrent();
-        }
-
-        private int GetHighestIndexLessThan(int offset)
-        {
-            int index = boundaries.BinarySearch(offset);
-            if (index < 0)
-            {
-                return ~index - 1;
-            }
-            else
-            {
-                // NOTE: This is intentionally allowed to return -1 in the case
-                // where index == 0. This state indicates we are before the first boundary.
-                return index - 1;
-            }
-        }
-
-        /// <summary>
-        /// Returns the current iteration position.
-        /// </summary>
-        public override int Current
-        {
-            get { return ReturnCurrent(); }
-        }
-
-        /// <summary>
-        /// Gets the text being analyzed.
-        /// </summary>
-        public override string Text
-        {
-            get
-            {
-                return text;
-            }
-        }
-
-        /// <summary>
-        /// Set the iterator to analyze a new piece of text.  This function resets
-        /// the current iteration position to the beginning of the text.
-        /// </summary>
-        /// <param name="newText">The text to analyze.</param>
-        public override void SetText(string newText)
-        {
-            text = newText;
-            currentBoundaryIndex = 0;
-            m_start = 0;
-            m_end = newText.Length;
-
-            LoadBoundaries(m_start, m_end);
-        }
-
-        public override void SetText(CharacterIterator newText)
-        {
-            text = newText.GetTextAsString();
-            currentBoundaryIndex = 0;
-            m_start = newText.BeginIndex;
-            m_end = newText.EndIndex;
-
-            LoadBoundaries(m_start, m_end);
-        }
-
-        private void LoadBoundaries(int start, int end)
-        {
-            //boundaries = new List<int>();
-
-            IEnumerable<Icu.Boundary> icuBoundaries;
-            string offsetText = text.Substring(start, end - start);
-
-
-            if (type == Icu.BreakIterator.UBreakIteratorType.WORD)
-            {
-                // LUCENENET TODO: HACK - replacing hyphen with "a" so hyphenated words aren't broken
-                icuBoundaries = Icu.BreakIterator.GetWordBoundaries(locale, offsetText.Replace("-", "a"), true);
-            }
-            else
-            {
-                if (type == Icu.BreakIterator.UBreakIteratorType.SENTENCE)
-                {
-                    // LUCENENET TODO: HACK - newline character causes incorrect sentence breaking.
-                    offsetText = offsetText.Replace("\n", " ");
-                    // LUCENENET TODO: HACK - the ICU sentence logic doesn't work (in English anyway) when sentences don't
-                    // begin with capital letters.
-                    offsetText = CapitalizeFirst(offsetText);
-                }
-
-                icuBoundaries = Icu.BreakIterator.GetBoundaries(type, locale, offsetText);
-            }
-
-            boundaries = icuBoundaries
-                .Select(t => new[] { t.Start + start, t.End + start })
-                .SelectMany(b => b)
-                .Distinct()
-                .ToList();
-        }
-
-        /// <summary>
-        /// Returns true if the specified character offset is a text boundary.
-        /// </summary>
-        /// <param name="offset">the character offset to check.</param>
-        /// <returns><c>true</c> if "offset" is a boundary position, <c>false</c> otherwise.</returns>
-        public override bool IsBoundary(int offset)
-        {
-            CheckOffset(offset);
-            return boundaries.Contains(offset);
-        }
-
-        private int ReturnCurrent()
-        {
-            if (boundaries.Count > 0)
-            {
-                return currentBoundaryIndex < boundaries.Count && currentBoundaryIndex > -1
-                    ? boundaries[currentBoundaryIndex]
-                    : DONE;
-            }
-
-            // If there are no boundaries, we must return the start offset
-            return m_start;
-        }
-
-        /// <summary>
-        /// LUCENENET TODO: This is a temporary workaround for an issue with icu-dotnet
-        /// where it doesn't correctly break sentences unless they begin with a capital letter.
-        /// If/when ICU is fixed, this method should be deleted and the IcuBreakIterator 
-        /// code changed to remove calls to this method.
-        /// </summary>
-        public static string CapitalizeFirst(string s)
-        {
-            bool isNewSentence = true;
-            var result = new StringBuilder(s.Length);
-            for (int i = 0; i < s.Length; i++)
-            {
-                if (isNewSentence && char.IsLetter(s[i]))
-                {
-                    result.Append(char.ToUpper(s[i]));
-                    isNewSentence = false;
-                }
-                else
-                    result.Append(s[i]);
-
-                if (s[i] == '!' || s[i] == '?' || s[i] == '.')
-                {
-                    isNewSentence = true;
-                }
-            }
-
-            return result.ToString();
-        }
-    }
-}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/5a7cb173/src/Lucene.Net.Highlighter/Lucene.Net.Highlighter.csproj
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Highlighter/Lucene.Net.Highlighter.csproj b/src/Lucene.Net.Highlighter/Lucene.Net.Highlighter.csproj
index fedffc8..94eb504 100644
--- a/src/Lucene.Net.Highlighter/Lucene.Net.Highlighter.csproj
+++ b/src/Lucene.Net.Highlighter/Lucene.Net.Highlighter.csproj
@@ -41,6 +41,9 @@
     <Reference Include="System.Xml" />
   </ItemGroup>
   <ItemGroup>
+    <Compile Include="..\IcuBreakIterator.cs">
+      <Link>IcuBreakIterator.cs</Link>
+    </Compile>
     <Compile Include="Highlight\DefaultEncoder.cs" />
     <Compile Include="Highlight\GradientFormatter.cs" />
     <Compile Include="Highlight\Highlighter.cs" />
@@ -76,7 +79,6 @@
     <Compile Include="PostingsHighlight\WholeBreakIterator.cs" />
     <Compile Include="Properties\AssemblyInfo.cs" />
     <Compile Include="RectangularArrays.cs" />
-    <Compile Include="IcuBreakIterator.cs" />
     <Compile Include="VectorHighlight\BaseFragListBuilder.cs" />
     <Compile Include="VectorHighlight\BaseFragmentsBuilder.cs" />
     <Compile Include="VectorHighlight\BoundaryScanner.cs" />

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/5a7cb173/src/Lucene.Net.Highlighter/project.json
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Highlighter/project.json b/src/Lucene.Net.Highlighter/project.json
index 4200936..7b59d99 100644
--- a/src/Lucene.Net.Highlighter/project.json
+++ b/src/Lucene.Net.Highlighter/project.json
@@ -6,6 +6,13 @@
     "Lucene.Net.Analysis.Common": "4.8.0-alpha",
     "Lucene.Net.Queries": "4.8.0-alpha"
   },
+  "buildOptions": {
+    "compile": {
+      "includeFiles": [
+        "../IcuBreakIterator.cs"
+      ]
+    }
+  },
   "frameworks": {
     "netstandard1.5": {
       "imports": "dnxcore50",

[09/14] lucenenet git commit: Lucene.Net.Tests.Analysis.Common.Miscellaneous: Added missing TestKeywordRepeatFilter tests

Posted by ni...@apache.org.

Lucene.Net.Tests.Analysis.Common.Miscellaneous: Added missing TestKeywordRepeatFilter tests


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/5d556167
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/5d556167
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/5d556167

Branch: refs/heads/api-work
Commit: 5d556167073782c27cd50d857759b0a5b9d0fa3e
Parents: ddb054a
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Thu Feb 2 15:48:28 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Thu Feb 2 18:30:34 2017 +0700

----------------------------------------------------------------------
 .../Miscellaneous/TestKeywordRepeatFilter.cs    | 46 +++++++++-----------
 .../Lucene.Net.Tests.Analysis.Common.csproj     |  1 +
 .../project.json                                |  3 +-
 3 files changed, 23 insertions(+), 27 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/5d556167/src/Lucene.Net.Tests.Analysis.Common/Analysis/Miscellaneous/TestKeywordRepeatFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Miscellaneous/TestKeywordRepeatFilter.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Miscellaneous/TestKeywordRepeatFilter.cs
index e9674ba..ef977fd 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Miscellaneous/TestKeywordRepeatFilter.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Miscellaneous/TestKeywordRepeatFilter.cs
@@ -1,7 +1,9 @@
-\ufeffnamespace org.apache.lucene.analysis.miscellaneous
-{
+\ufeffusing NUnit.Framework;
+using System.IO;
 
-	/*
+namespace Lucene.Net.Analysis.Miscellaneous
+{
+    /*
 	 * Licensed to the Apache Software Foundation (ASF) under one or more
 	 * contributor license agreements.  See the NOTICE file distributed with
 	 * this work for additional information regarding copyright ownership.
@@ -18,31 +20,25 @@
 	 * limitations under the License.
 	 */
 
-	using SnowballFilter = org.apache.lucene.analysis.snowball.SnowballFilter;
-
-
-	public class TestKeywordRepeatFilter : BaseTokenStreamTestCase
-	{
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: public void testBasic() throws java.io.IOException
-	  public virtual void testBasic()
-	  {
-		TokenStream ts = new RemoveDuplicatesTokenFilter(new SnowballFilter(new KeywordRepeatFilter(new MockTokenizer(new StringReader("the birds are flying"), MockTokenizer.WHITESPACE, false)), "English"));
-		assertTokenStreamContents(ts, new string[] {"the", "birds", "bird", "are", "flying", "fli"}, new int[] {1,1,0,1,1,0});
-	  }
-
+    using SnowballFilter = Lucene.Net.Analysis.Snowball.SnowballFilter;
 
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: public void testComposition() throws java.io.IOException
-	  public virtual void testComposition()
-	  {
-		TokenStream ts = new RemoveDuplicatesTokenFilter(new SnowballFilter(new KeywordRepeatFilter(new KeywordRepeatFilter(new MockTokenizer(new StringReader("the birds are flying"), MockTokenizer.WHITESPACE, false))), "English"));
-		assertTokenStreamContents(ts, new string[] {"the", "birds", "bird", "are", "flying", "fli"}, new int[] {1,1,0,1,1,0});
-	  }
 
+    public class TestKeywordRepeatFilter : BaseTokenStreamTestCase
+    {
 
+        [Test]
+        public virtual void TestBasic()
+        {
+            TokenStream ts = new RemoveDuplicatesTokenFilter(new SnowballFilter(new KeywordRepeatFilter(new MockTokenizer(new StringReader("the birds are flying"), MockTokenizer.WHITESPACE, false)), "English"));
+            AssertTokenStreamContents(ts, new string[] { "the", "birds", "bird", "are", "flying", "fli" }, new int[] { 1, 1, 0, 1, 1, 0 });
+        }
 
-	}
 
+        [Test]
+        public virtual void TestComposition()
+        {
+            TokenStream ts = new RemoveDuplicatesTokenFilter(new SnowballFilter(new KeywordRepeatFilter(new KeywordRepeatFilter(new MockTokenizer(new StringReader("the birds are flying"), MockTokenizer.WHITESPACE, false))), "English"));
+            AssertTokenStreamContents(ts, new string[] { "the", "birds", "bird", "are", "flying", "fli" }, new int[] { 1, 1, 0, 1, 1, 0 });
+        }
+    }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/5d556167/src/Lucene.Net.Tests.Analysis.Common/Lucene.Net.Tests.Analysis.Common.csproj
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Lucene.Net.Tests.Analysis.Common.csproj b/src/Lucene.Net.Tests.Analysis.Common/Lucene.Net.Tests.Analysis.Common.csproj
index b5587d0..0bd31c1 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Lucene.Net.Tests.Analysis.Common.csproj
+++ b/src/Lucene.Net.Tests.Analysis.Common/Lucene.Net.Tests.Analysis.Common.csproj
@@ -195,6 +195,7 @@
     <Compile Include="Analysis\Miscellaneous\TestKeepWordFilter.cs" />
     <Compile Include="Analysis\Miscellaneous\TestKeywordMarkerFilter.cs" />
     <Compile Include="Analysis\Miscellaneous\TestKeywordMarkerFilterFactory.cs" />
+    <Compile Include="Analysis\Miscellaneous\TestKeywordRepeatFilter.cs" />
     <Compile Include="Analysis\Miscellaneous\TestLengthFilter.cs" />
     <Compile Include="Analysis\Miscellaneous\TestLengthFilterFactory.cs" />
     <Compile Include="Analysis\Miscellaneous\TestLimitTokenCountAnalyzer.cs" />

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/5d556167/src/Lucene.Net.Tests.Analysis.Common/project.json
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/project.json b/src/Lucene.Net.Tests.Analysis.Common/project.json
index b621d6e..46a3833 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/project.json
+++ b/src/Lucene.Net.Tests.Analysis.Common/project.json
@@ -100,8 +100,7 @@
       "excludeFiles": [
         "DateTimeHelperClass.cs",
         "HashMapHelperClass.cs",
-        "StringHelperClass.cs",
-        "Analysis/Miscellaneous/TestKeywordRepeatFilter.cs"
+        "StringHelperClass.cs"
       ]
     }
   },

[07/14] lucenenet git commit: Lucene.Net.Core.Util.Constants: Added using statement for System.Runtime.InteropServices because it is required for .NET core

Posted by ni...@apache.org.

Lucene.Net.Core.Util.Constants: Added using statement for System.Runtime.InteropServices because it is required for .NET core


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/fd6282ea
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/fd6282ea
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/fd6282ea

Branch: refs/heads/api-work
Commit: fd6282ead34b4c56cf997813f40bdef5b23998ca
Parents: 9525d45
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Thu Feb 2 12:52:29 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Thu Feb 2 18:30:32 2017 +0700

----------------------------------------------------------------------
 src/Lucene.Net.Core/Util/Constants.cs | 3 +++
 1 file changed, 3 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/fd6282ea/src/Lucene.Net.Core/Util/Constants.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Util/Constants.cs b/src/Lucene.Net.Core/Util/Constants.cs
index 00fbabe..01800e7 100644
--- a/src/Lucene.Net.Core/Util/Constants.cs
+++ b/src/Lucene.Net.Core/Util/Constants.cs
@@ -1,6 +1,9 @@
 using Lucene.Net.Support;
 using System;
 using System.Reflection;
+#if NETSTANDARD
+using System.Runtime.InteropServices;
+#endif
 using System.Text.RegularExpressions;
 
 namespace Lucene.Net.Util

[06/14] lucenenet git commit: Removed System.Runtime.Remoting namespace from SafeTextWriterWrapper.cs because of compile issues in .NET core

Posted by ni...@apache.org.

Removed System.Runtime.Remoting namespace from SafeTextWriterWrapper.cs because of compile issues in .NET core


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/fc34ba7d
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/fc34ba7d
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/fc34ba7d

Branch: refs/heads/api-work
Commit: fc34ba7d3dbf9046cf3f59e9584ad7e6bdd19209
Parents: 7d12310
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Thu Feb 2 12:28:36 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Thu Feb 2 18:30:31 2017 +0700

----------------------------------------------------------------------
 src/Lucene.Net.Core/Support/SafeTextWriterWrapper.cs | 1 -
 1 file changed, 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/fc34ba7d/src/Lucene.Net.Core/Support/SafeTextWriterWrapper.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Support/SafeTextWriterWrapper.cs b/src/Lucene.Net.Core/Support/SafeTextWriterWrapper.cs
index eb14921..0c5d7c3 100644
--- a/src/Lucene.Net.Core/Support/SafeTextWriterWrapper.cs
+++ b/src/Lucene.Net.Core/Support/SafeTextWriterWrapper.cs
@@ -1,6 +1,5 @@
 \ufeffusing System;
 using System.IO;
-using System.Runtime.Remoting;
 using System.Text;
 using System.Threading.Tasks;

[11/14] lucenenet git commit: Fixed several issues that were causing the .NET Core build to fail

Posted by ni...@apache.org.

Fixed several issues that were causing the .NET Core build to fail


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/bed1f16e
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/bed1f16e
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/bed1f16e

Branch: refs/heads/api-work
Commit: bed1f16e5d81495acf1c8c0f2a36cb7789bdb3ed
Parents: 5d55616
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Thu Feb 2 15:56:14 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Thu Feb 2 18:30:35 2017 +0700

----------------------------------------------------------------------
 src/Lucene.Net.Core/Index/TaskMergeScheduler.cs |  6 ++--
 .../Support/SafeTextWriterWrapper.cs            |  9 ++++++
 src/Lucene.Net.Core/project.json                |  9 +++++-
 src/Lucene.Net.TestFramework/project.json       |  1 +
 .../project.json                                |  7 ++++-
 .../project.json                                |  7 ++++-
 .../project.json                                |  9 +++++-
 .../Lucene.Net.Tests.Codecs.csproj              |  4 ---
 src/Lucene.Net.Tests.Codecs/project.json        |  9 ++++--
 src/Lucene.Net.Tests.Expressions/project.json   | 11 +++++--
 src/Lucene.Net.Tests.Facet/project.json         |  7 ++++-
 src/Lucene.Net.Tests.Grouping/project.json      |  7 ++++-
 src/Lucene.Net.Tests.Highlighter/project.json   |  7 ++++-
 src/Lucene.Net.Tests.Join/project.json          | 13 ++++++--
 src/Lucene.Net.Tests.Memory/project.json        |  9 +++++-
 src/Lucene.Net.Tests.Misc/project.json          | 13 ++++++--
 src/Lucene.Net.Tests.Queries/project.json       |  9 +++++-
 src/Lucene.Net.Tests.QueryParser/project.json   |  6 ++--
 src/Lucene.Net.Tests.Sandbox/project.json       |  9 ++++--
 src/Lucene.Net.Tests.Spatial/project.json       | 31 ++++++++++++--------
 src/Lucene.Net.Tests.Suggest/project.json       |  7 +++++
 .../Index/TestTaskMergeSchedulerExternal.cs     |  6 ++--
 src/Lucene.Net.Tests/project.json               |  1 +
 23 files changed, 150 insertions(+), 47 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/bed1f16e/src/Lucene.Net.Core/Index/TaskMergeScheduler.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Index/TaskMergeScheduler.cs b/src/Lucene.Net.Core/Index/TaskMergeScheduler.cs
index 61cec12..804e7bb 100644
--- a/src/Lucene.Net.Core/Index/TaskMergeScheduler.cs
+++ b/src/Lucene.Net.Core/Index/TaskMergeScheduler.cs
@@ -159,7 +159,7 @@ namespace Lucene.Net.Index
         /// }
         /// </pre>
         /// </summary>
-        protected internal bool Verbose
+        protected bool Verbose
         {
             get { return _writer != null && _writer.infoStream.IsEnabled(COMPONENT_NAME); }
         }
@@ -168,7 +168,7 @@ namespace Lucene.Net.Index
         /// Outputs the given message - this method assumes <seealso cref="#verbose()"/> was
         /// called and returned true.
         /// </summary>
-        protected internal virtual void Message(string message)
+        protected virtual void Message(string message)
         {
             _writer.infoStream.Message(COMPONENT_NAME, message);
         }
@@ -357,7 +357,7 @@ namespace Lucene.Net.Index
         /// Called when an exception is hit in a background merge
         ///  thread
         /// </summary>
-        protected internal virtual void HandleMergeException(Exception exc)
+        protected virtual void HandleMergeException(Exception exc)
         {
             // suppressExceptions is normally only set during testing
             if (suppressExceptions)

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/bed1f16e/src/Lucene.Net.Core/Support/SafeTextWriterWrapper.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Support/SafeTextWriterWrapper.cs b/src/Lucene.Net.Core/Support/SafeTextWriterWrapper.cs
index 0c5d7c3..507bb8a 100644
--- a/src/Lucene.Net.Core/Support/SafeTextWriterWrapper.cs
+++ b/src/Lucene.Net.Core/Support/SafeTextWriterWrapper.cs
@@ -1,5 +1,8 @@
 \ufeffusing System;
 using System.IO;
+#if FEATURE_MARSHAL_BY_REF
+using System.Runtime.Remoting;
+#endif
 using System.Text;
 using System.Threading.Tasks;
 
@@ -64,15 +67,19 @@ namespace Lucene.Net.Support
             }
         }
 
+#if FEATURE_CLOSEABLE
         public override void Close()
         {
             Run(() => textWriter.Close());
         }
+#endif
 
+#if FEATURE_MARSHAL_BY_REF
         public override ObjRef CreateObjRef(Type requestedType)
         {
             return Run(() => textWriter.CreateObjRef(requestedType));
         }
+#endif
 
         public override bool Equals(object obj)
         {
@@ -94,10 +101,12 @@ namespace Lucene.Net.Support
             return Run(() => textWriter.GetHashCode());
         }
 
+#if FEATURE_LIFETIME_SERVICE
         public override object InitializeLifetimeService()
         {
             return Run(() => textWriter.InitializeLifetimeService());
         }
+#endif
 
         public override string ToString()
         {

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/bed1f16e/src/Lucene.Net.Core/project.json
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/project.json b/src/Lucene.Net.Core/project.json
index c0879ac..ddbdf76 100644
--- a/src/Lucene.Net.Core/project.json
+++ b/src/Lucene.Net.Core/project.json
@@ -168,7 +168,14 @@
         "System.Xml.Linq": "4.0.0.0"
       },
       "buildOptions": {
-        "define": [ "FEATURE_CLONEABLE", "FEATURE_SERIALIZABLE", "FEATURE_THREADPOOL_UNSAFEQUEUEWORKITEM" ],
+        "define": [
+          "FEATURE_CLONEABLE",
+          "FEATURE_SERIALIZABLE",
+          "FEATURE_THREADPOOL_UNSAFEQUEUEWORKITEM",
+          "FEATURE_MARSHAL_BY_REF",
+          "FEATURE_CLOSEABLE",
+          "FEATURE_LIFETIME_SERVICE"
+        ],
         "compile": {
           "exclude": [
             "Analysis/Standard/*",

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/bed1f16e/src/Lucene.Net.TestFramework/project.json
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.TestFramework/project.json b/src/Lucene.Net.TestFramework/project.json
index 48ed0c2..d793f61 100644
--- a/src/Lucene.Net.TestFramework/project.json
+++ b/src/Lucene.Net.TestFramework/project.json
@@ -19,6 +19,7 @@
         "compile": {
           "excludeFiles": [
             "Support/SystemProperties.cs",
+            "Util/ApiScanTestBase.cs",
             "Util/AbstractBeforeAfterRule.cs",
             "Util/CloseableDirectory.cs",
             "Util/FailureMarker.cs",

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/bed1f16e/src/Lucene.Net.Tests.Analysis.Common/project.json
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/project.json b/src/Lucene.Net.Tests.Analysis.Common/project.json
index 46a3833..812bc65 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/project.json
+++ b/src/Lucene.Net.Tests.Analysis.Common/project.json
@@ -107,7 +107,12 @@
   "frameworks": {
     "netcoreapp1.0": {
       "buildOptions": {
-        "define": [ "NETSTANDARD" ]
+        "define": [ "NETSTANDARD" ],
+        "compile": {
+          "excludeFiles": [
+            "TestApiConsistency.cs"
+          ]
+        }
       },
       "dependencies": {
         "Microsoft.NETCore.App": {

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/bed1f16e/src/Lucene.Net.Tests.Analysis.Stempel/project.json
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Stempel/project.json b/src/Lucene.Net.Tests.Analysis.Stempel/project.json
index 018ecfd..7c6e2f9 100644
--- a/src/Lucene.Net.Tests.Analysis.Stempel/project.json
+++ b/src/Lucene.Net.Tests.Analysis.Stempel/project.json
@@ -25,7 +25,12 @@
         }
       },
       "buildOptions": {
-        "define": [ "NETSTANDARD" ]
+        "define": [ "NETSTANDARD" ],
+        "compile": {
+          "excludeFiles": [
+            "TestApiConsistency.cs"
+          ]
+        }
       }
     }
   },

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/bed1f16e/src/Lucene.Net.Tests.Classification/project.json
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Classification/project.json b/src/Lucene.Net.Tests.Classification/project.json
index a0e7c84..a8656f7 100644
--- a/src/Lucene.Net.Tests.Classification/project.json
+++ b/src/Lucene.Net.Tests.Classification/project.json
@@ -13,11 +13,18 @@
   "frameworks": {
     "netcoreapp1.0": {
       "imports": "dnxcore50",
-      "dependencies": { 
+      "dependencies": {
         "Microsoft.NETCore.App": {
           "type": "platform",
           "version": "1.0.1"
         }
+      },
+      "buildOptions": {
+        "compile": {
+          "excludeFiles": [
+            "TestApiConsistency.cs"
+          ]
+        }
       }
     }
   },

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/bed1f16e/src/Lucene.Net.Tests.Codecs/Lucene.Net.Tests.Codecs.csproj
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Codecs/Lucene.Net.Tests.Codecs.csproj b/src/Lucene.Net.Tests.Codecs/Lucene.Net.Tests.Codecs.csproj
index 7e4c447..4000a00 100644
--- a/src/Lucene.Net.Tests.Codecs/Lucene.Net.Tests.Codecs.csproj
+++ b/src/Lucene.Net.Tests.Codecs/Lucene.Net.Tests.Codecs.csproj
@@ -81,10 +81,6 @@
       <Project>{b2c0d749-ce34-4f62-a15e-00cb2ff5ddb3}</Project>
       <Name>Lucene.Net.TestFramework</Name>
     </ProjectReference>
-    <ProjectReference Include="..\Lucene.Net.Tests\Lucene.Net.Tests.csproj">
-      <Project>{de63db10-975f-460d-af85-572c17a91284}</Project>
-      <Name>Lucene.Net.Tests</Name>
-    </ProjectReference>
   </ItemGroup>
   <ItemGroup>
     <Service Include="{82A7F48D-3B50-4B1E-B82E-3ADA8210C358}" />

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/bed1f16e/src/Lucene.Net.Tests.Codecs/project.json
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Codecs/project.json b/src/Lucene.Net.Tests.Codecs/project.json
index 94d03ed..80ad56d 100644
--- a/src/Lucene.Net.Tests.Codecs/project.json
+++ b/src/Lucene.Net.Tests.Codecs/project.json
@@ -5,7 +5,7 @@
     "NUnit": "3.5.0",
     "Lucene.Net.Analysis.Common": "4.8.0-alpha",
     "Lucene.Net.Codecs": "4.8.0-alpha",
-    "Lucene.Net.Tests": "4.8.0-alpha"
+    "Lucene.Net.TestFramework": "4.8.0-alpha"
   },
 
   "testRunner": "nunit",
@@ -13,7 +13,12 @@
   "frameworks": {
     "netcoreapp1.0": {
       "buildOptions": {
-        "define": [ "NETSTANDARD" ]
+        "define": [ "NETSTANDARD" ],
+        "compile": {
+          "excludeFiles": [
+            "TestApiConsistency.cs"
+          ]
+        }
       },
       "imports": "dnxcore50",
       "dependencies": {

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/bed1f16e/src/Lucene.Net.Tests.Expressions/project.json
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Expressions/project.json b/src/Lucene.Net.Tests.Expressions/project.json
index 6b7d68d..ec5baa5 100644
--- a/src/Lucene.Net.Tests.Expressions/project.json
+++ b/src/Lucene.Net.Tests.Expressions/project.json
@@ -9,15 +9,22 @@
   },
 
   "testRunner": "nunit",
-
+  
   "frameworks": {
     "netcoreapp1.0": {
       "imports": "dnxcore50",
-      "dependencies": { 
+      "dependencies": {
         "Microsoft.NETCore.App": {
           "type": "platform",
           "version": "1.0.1"
         }
+      },
+      "buildOptions": {
+        "compile": {
+          "excludeFiles": [
+            "TestApiConsistency.cs"
+          ]
+        }
       }
     }
   },

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/bed1f16e/src/Lucene.Net.Tests.Facet/project.json
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Facet/project.json b/src/Lucene.Net.Tests.Facet/project.json
index 04b3527..2bed4d6 100644
--- a/src/Lucene.Net.Tests.Facet/project.json
+++ b/src/Lucene.Net.Tests.Facet/project.json
@@ -20,7 +20,12 @@
         }
       },
       "buildOptions": {
-        "define": [ "NETSTANDARD" ]
+        "define": [ "NETSTANDARD" ],
+        "compile": {
+          "excludeFiles": [
+            "TestApiConsistency.cs"
+          ]
+        }
       }
     }
   },

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/bed1f16e/src/Lucene.Net.Tests.Grouping/project.json
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Grouping/project.json b/src/Lucene.Net.Tests.Grouping/project.json
index 724369e..2cd0351 100644
--- a/src/Lucene.Net.Tests.Grouping/project.json
+++ b/src/Lucene.Net.Tests.Grouping/project.json
@@ -20,7 +20,12 @@
         }
       },
       "buildOptions": {
-        "define": [ "NETSTANDARD" ]
+        "define": [ "NETSTANDARD" ],
+        "compile": {
+          "excludeFiles": [
+            "TestApiConsistency.cs"
+          ]
+        }
       }
     }
   },

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/bed1f16e/src/Lucene.Net.Tests.Highlighter/project.json
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Highlighter/project.json b/src/Lucene.Net.Tests.Highlighter/project.json
index 84a0104..99bfb94 100644
--- a/src/Lucene.Net.Tests.Highlighter/project.json
+++ b/src/Lucene.Net.Tests.Highlighter/project.json
@@ -19,7 +19,12 @@
   "frameworks": {
     "netcoreapp1.0": {
       "buildOptions": {
-        "define": [ "NETSTANDARD", "FEATURE_EMBEDDED_RESOURCE" ]
+        "define": [ "NETSTANDARD", "FEATURE_EMBEDDED_RESOURCE" ],
+        "compile": {
+          "excludeFiles": [
+            "TestApiConsistency.cs"
+          ]
+        }
       },
       "dependencies": {
         "Microsoft.NETCore.App": {

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/bed1f16e/src/Lucene.Net.Tests.Join/project.json
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Join/project.json b/src/Lucene.Net.Tests.Join/project.json
index 9777ef4..c6d7a79 100644
--- a/src/Lucene.Net.Tests.Join/project.json
+++ b/src/Lucene.Net.Tests.Join/project.json
@@ -6,19 +6,26 @@
     "Lucene.Net.Grouping": "4.8.0-alpha",
     "Lucene.Net.Join": "4.8.0-alpha",
     "Lucene.Net.TestFramework": "4.8.0-alpha",
-    "NUnit": "3.5.0",
+    "NUnit": "3.5.0"
   },
 
   "testRunner": "nunit",
-
+  
   "frameworks": {
     "netcoreapp1.0": {
       "imports": "dnxcore50",
-      "dependencies": { 
+      "dependencies": {
         "Microsoft.NETCore.App": {
           "type": "platform",
           "version": "1.0.1"
         }
+      },
+      "buildOptions": {
+        "compile": {
+          "excludeFiles": [
+            "TestApiConsistency.cs"
+          ]
+        }
       }
     }
   },

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/bed1f16e/src/Lucene.Net.Tests.Memory/project.json
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Memory/project.json b/src/Lucene.Net.Tests.Memory/project.json
index bd96749..5f0fb11 100644
--- a/src/Lucene.Net.Tests.Memory/project.json
+++ b/src/Lucene.Net.Tests.Memory/project.json
@@ -22,11 +22,18 @@
   "frameworks": {
     "netcoreapp1.0": {
       "imports": "dnxcore50",
-      "dependencies": { 
+      "dependencies": {
         "Microsoft.NETCore.App": {
           "type": "platform",
           "version": "1.0.1"
         }
+      },
+      "buildOptions": {
+        "compile": {
+          "excludeFiles": [
+            "TestApiConsistency.cs"
+          ]
+        }
       }
     }
   },

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/bed1f16e/src/Lucene.Net.Tests.Misc/project.json
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Misc/project.json b/src/Lucene.Net.Tests.Misc/project.json
index a82c280..a2f7fd2 100644
--- a/src/Lucene.Net.Tests.Misc/project.json
+++ b/src/Lucene.Net.Tests.Misc/project.json
@@ -8,19 +8,26 @@
   },
 
   "testRunner": "nunit",
-
+  
   "frameworks": {
     "netcoreapp1.0": {
       "imports": "dnxcore50",
-      "dependencies": { 
+      "dependencies": {
         "Microsoft.NETCore.App": {
           "type": "platform",
           "version": "1.0.1"
         }
+      },
+      "buildOptions": {
+        "compile": {
+          "excludeFiles": [
+            "TestApiConsistency.cs"
+          ]
+        }
       }
     }
   },
-  
+
   "runtimes": {
     "win7-x86": {},
     "win7-x64": {}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/bed1f16e/src/Lucene.Net.Tests.Queries/project.json
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Queries/project.json b/src/Lucene.Net.Tests.Queries/project.json
index c78ec4f..abafeaf 100644
--- a/src/Lucene.Net.Tests.Queries/project.json
+++ b/src/Lucene.Net.Tests.Queries/project.json
@@ -12,11 +12,18 @@
   "frameworks": {
     "netcoreapp1.0": {
       "imports": "dnxcore50",
-      "dependencies": { 
+      "dependencies": {
         "Microsoft.NETCore.App": {
           "type": "platform",
           "version": "1.0.1"
         }
+      },
+      "buildOptions": {
+        "compile": {
+          "excludeFiles": [
+            "TestApiConsistency.cs"
+          ]
+        }
       }
     }
   },

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/bed1f16e/src/Lucene.Net.Tests.QueryParser/project.json
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.QueryParser/project.json b/src/Lucene.Net.Tests.QueryParser/project.json
index b83132e..b2ff80a 100644
--- a/src/Lucene.Net.Tests.QueryParser/project.json
+++ b/src/Lucene.Net.Tests.QueryParser/project.json
@@ -9,7 +9,6 @@
   },
 
   "testRunner": "nunit",
-
   "frameworks": {
     "netcoreapp1.0": {
       "imports": "dnxcore50",
@@ -22,13 +21,14 @@
       "buildOptions": {
         "compile": {
           "excludeFiles": [
-            "Xml/TestQueryTemplateManager.cs"
+            "Xml/TestQueryTemplateManager.cs",
+            "TestApiConsistency.cs"
           ]
         }
       }
     }
   },
-  
+
   "runtimes": {
     "win7-x86": {},
     "win7-x64": {}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/bed1f16e/src/Lucene.Net.Tests.Sandbox/project.json
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Sandbox/project.json b/src/Lucene.Net.Tests.Sandbox/project.json
index 186abd5..2da3ee8 100644
--- a/src/Lucene.Net.Tests.Sandbox/project.json
+++ b/src/Lucene.Net.Tests.Sandbox/project.json
@@ -9,7 +9,7 @@
   },
   "buildOptions": {
     "embed": {
-      "includeFiles": [ 
+      "includeFiles": [
         "Queries/fuzzyTestData.txt"
       ]
     }
@@ -26,7 +26,12 @@
       },
       "imports": "dnxcore50",
       "buildOptions": {
-        "define": [ "FEATURE_EMBEDDED_RESOURCE" ]
+        "define": [ "FEATURE_EMBEDDED_RESOURCE" ],
+        "compile": {
+          "excludeFiles": [
+            "TestApiConsistency.cs"
+          ]
+        }
       }
     }
   },

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/bed1f16e/src/Lucene.Net.Tests.Spatial/project.json
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Spatial/project.json b/src/Lucene.Net.Tests.Spatial/project.json
index 127bda3..6d99bf7 100644
--- a/src/Lucene.Net.Tests.Spatial/project.json
+++ b/src/Lucene.Net.Tests.Spatial/project.json
@@ -11,18 +11,18 @@
   "buildOptions": {
     "embed": {
       "includeFiles": [
-          "Test-Files/cities-Intersects-BBox.txt",
-          "Test-Files/Data/countries-bbox.txt",
-          "Test-Files/Data/countries-poly.txt",
-          "Test-Files/Data/geonames-IE.txt",
-          "Test-Files/Data/LUCENE-4464.txt",
-          "Test-Files/Data/simple-bbox.txt",
-          "Test-Files/Data/states-bbox.txt",
-          "Test-Files/Data/states-poly.txt",
-          "Test-Files/Data/world-cities-points.txt",
-          "Test-Files/simple-Queries-BBox.txt",
-          "Test-Files/states-Intersects-BBox.txt",
-          "Test-Files/states-IsWithin-BBox.txt"
+        "Test-Files/cities-Intersects-BBox.txt",
+        "Test-Files/Data/countries-bbox.txt",
+        "Test-Files/Data/countries-poly.txt",
+        "Test-Files/Data/geonames-IE.txt",
+        "Test-Files/Data/LUCENE-4464.txt",
+        "Test-Files/Data/simple-bbox.txt",
+        "Test-Files/Data/states-bbox.txt",
+        "Test-Files/Data/states-poly.txt",
+        "Test-Files/Data/world-cities-points.txt",
+        "Test-Files/simple-Queries-BBox.txt",
+        "Test-Files/states-Intersects-BBox.txt",
+        "Test-Files/states-IsWithin-BBox.txt"
       ]
     }
   },
@@ -30,7 +30,12 @@
     "netcoreapp1.0": {
       "imports": [ "dnxcore50", "portable-net403+sl5+win8+wp8+wpa81" ],
       "buildOptions": {
-        "define": [ "NETSTANDARD" ]
+        "define": [ "NETSTANDARD" ],
+        "compile": {
+          "excludeFiles": [
+            "TestApiConsistency.cs"
+          ]
+        }
       },
       "dependencies": {
         "Microsoft.NETCore.App": {

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/bed1f16e/src/Lucene.Net.Tests.Suggest/project.json
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Suggest/project.json b/src/Lucene.Net.Tests.Suggest/project.json
index 24fba67..188fbd0 100644
--- a/src/Lucene.Net.Tests.Suggest/project.json
+++ b/src/Lucene.Net.Tests.Suggest/project.json
@@ -28,6 +28,13 @@
           "type": "platform",
           "version": "1.0.1"
         }
+      },
+      "buildOptions": {
+        "compile": {
+          "excludeFiles": [
+            "TestApiConsistency.cs"
+          ]
+        }
       }
     }
   },

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/bed1f16e/src/Lucene.Net.Tests/core/Index/TestTaskMergeSchedulerExternal.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests/core/Index/TestTaskMergeSchedulerExternal.cs b/src/Lucene.Net.Tests/core/Index/TestTaskMergeSchedulerExternal.cs
index 960520b..f0ad954 100644
--- a/src/Lucene.Net.Tests/core/Index/TestTaskMergeSchedulerExternal.cs
+++ b/src/Lucene.Net.Tests/core/Index/TestTaskMergeSchedulerExternal.cs
@@ -56,7 +56,7 @@ namespace Lucene.Net.Tests
                 this.OuterInstance = outerInstance;
             }
 
-            protected internal override void HandleMergeException(Exception t)
+            protected override void HandleMergeException(Exception t)
             {
                 OuterInstance.ExcCalled = true;
             }
@@ -109,7 +109,7 @@ namespace Lucene.Net.Tests
             public override void Merge(IndexWriter writer, MergeTrigger trigger, bool newMergesFound)
             {
                 MergePolicy.OneMerge merge = null;
-                while ((merge = writer.NextMerge) != null)
+                while ((merge = writer.GetNextMerge()) != null)
                 {
                     if (VERBOSE)
                     {
@@ -119,7 +119,7 @@ namespace Lucene.Net.Tests
                 }
             }
 
-            public override void Dispose()
+            protected override void Dispose(bool disposing)
             {
             }
         }

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/bed1f16e/src/Lucene.Net.Tests/project.json
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests/project.json b/src/Lucene.Net.Tests/project.json
index 72e021d..171c985 100644
--- a/src/Lucene.Net.Tests/project.json
+++ b/src/Lucene.Net.Tests/project.json
@@ -16,6 +16,7 @@
         "core/Util/Junitcompat"
       ],
       "excludeFiles": [
+        "core/TestApiConsistency.cs",
         "core/TestMergeSchedulerExternal.cs",
         "core/TestWorstCaseTestBehavior.cs",
         "core/Index/TestBackwardsCompatibility.cs",

[04/14] lucenenet git commit: Updated .gitignore to exclude NuGetPackages directory

Posted by ni...@apache.org.

Updated .gitignore to exclude NuGetPackages directory


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/7d123105
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/7d123105
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/7d123105

Branch: refs/heads/api-work
Commit: 7d1231051da1929c1972fe6280adcbaee9641f5f
Parents: 1c87ed5
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Thu Feb 2 12:27:32 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Thu Feb 2 18:30:30 2017 +0700

----------------------------------------------------------------------
 .gitignore | 1 +
 1 file changed, 1 insertion(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/7d123105/.gitignore
----------------------------------------------------------------------
diff --git a/.gitignore b/.gitignore
index b3cc01d..4ee6108 100644
--- a/.gitignore
+++ b/.gitignore
@@ -37,6 +37,7 @@ packages/
 *.lock.json
 TestResults/
 test-files/analysis/data/
+[Nn]u[Gg]et[Pp]ackages/
 
 # NuGet v3's project.json files produces more ignoreable files
 *.nuget.props

[14/14] lucenenet git commit: SWEEP: Where used, changed Dictionary/OrderedDictionary back to LinkedHashMap, which is a better match of the LinkedHashMap in Java.

Posted by ni...@apache.org.

SWEEP: Where used, changed Dictionary/OrderedDictionary back to LinkedHashMap, which is a better match of the LinkedHashMap in Java.


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/bc485b4c
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/bc485b4c
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/bc485b4c

Branch: refs/heads/api-work
Commit: bc485b4c42db371f93d90534ee30db6824baca2f
Parents: fc7b5b5
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Thu Feb 2 19:41:38 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Thu Feb 2 19:41:38 2017 +0700

----------------------------------------------------------------------
 .../Analysis/Hunspell/Dictionary.cs             |  2 +-
 .../Analysis/Util/AnalysisSPILoader.cs          |  2 +-
 src/Lucene.Net.Core/Index/BufferedUpdates.cs    | 40 ++++++++------------
 .../JS/JavascriptCompiler.cs                    |  2 +-
 src/Lucene.Net.Facet/DrillDownQuery.cs          |  2 +-
 .../Simple/TestSimpleQueryParser.cs             |  4 +-
 6 files changed, 21 insertions(+), 31 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/bc485b4c/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/Dictionary.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/Dictionary.cs b/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/Dictionary.cs
index f8e3107..af966d1 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/Dictionary.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/Dictionary.cs
@@ -269,7 +269,7 @@ namespace Lucene.Net.Analysis.Hunspell
             patterns.Add(null);
 
             // zero strip -> 0 ord
-            IDictionary<string, int?> seenStrips = new Dictionary<string, int?>();
+            IDictionary<string, int?> seenStrips = new LinkedHashMap<string, int?>();
             seenStrips[""] = 0;
 
             var reader = new StreamReader(affixStream, decoder);

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/bc485b4c/src/Lucene.Net.Analysis.Common/Analysis/Util/AnalysisSPILoader.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/AnalysisSPILoader.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/AnalysisSPILoader.cs
index 5797c10..03b949d 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Util/AnalysisSPILoader.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/AnalysisSPILoader.cs
@@ -63,7 +63,7 @@ namespace Lucene.Net.Analysis.Util
         {
             lock (this)
             {
-                IDictionary<string, Type> services = new Dictionary<string, Type>(this.services);
+                IDictionary<string, Type> services = new LinkedHashMap<string, Type>(this.services);
                 SPIClassIterator<S> loader = SPIClassIterator<S>.Get();
 
                 foreach (var service in loader)

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/bc485b4c/src/Lucene.Net.Core/Index/BufferedUpdates.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Index/BufferedUpdates.cs b/src/Lucene.Net.Core/Index/BufferedUpdates.cs
index 764fdae..d03d298 100644
--- a/src/Lucene.Net.Core/Index/BufferedUpdates.cs
+++ b/src/Lucene.Net.Core/Index/BufferedUpdates.cs
@@ -123,25 +123,25 @@ namespace Lucene.Net.Index
         internal readonly IDictionary<Term, int?> terms = new Dictionary<Term, int?>();
         internal readonly IDictionary<Query, int?> queries = new Dictionary<Query, int?>();
         internal readonly IList<int?> docIDs = new List<int?>();
-        
+
 
         // Map<dvField,Map<updateTerm,NumericUpdate>>
         // For each field we keep an ordered list of NumericUpdates, key'd by the
-        // update Term. OrderedDictionary guarantees we will later traverse the map in
+        // update Term. LinkedHashMap guarantees we will later traverse the map in
         // insertion order (so that if two terms affect the same document, the last
         // one that came in wins), and helps us detect faster if the same Term is
         // used to update the same field multiple times (so we later traverse it
         // only once).
-        internal readonly IDictionary<string, OrderedDictionary> numericUpdates = new Dictionary<string, OrderedDictionary>();
+        internal readonly IDictionary<string, LinkedHashMap<Term, NumericDocValuesUpdate>> numericUpdates = new Dictionary<string, LinkedHashMap<Term, NumericDocValuesUpdate>>();
 
         // Map<dvField,Map<updateTerm,BinaryUpdate>>
         // For each field we keep an ordered list of BinaryUpdates, key'd by the
-        // update Term. OrderedDictionary guarantees we will later traverse the map in
+        // update Term. LinkedHashMap guarantees we will later traverse the map in
         // insertion order (so that if two terms affect the same document, the last
         // one that came in wins), and helps us detect faster if the same Term is
         // used to update the same field multiple times (so we later traverse it
         // only once).
-        internal readonly IDictionary<string, OrderedDictionary> binaryUpdates = new Dictionary<string, OrderedDictionary>();
+        internal readonly IDictionary<string, LinkedHashMap<Term, BinaryDocValuesUpdate>> binaryUpdates = new Dictionary<string, LinkedHashMap<Term, BinaryDocValuesUpdate>>();
 
         public static readonly int MAX_INT = Convert.ToInt32(int.MaxValue);
 
@@ -241,21 +241,16 @@ namespace Lucene.Net.Index
 
         public virtual void AddNumericUpdate(NumericDocValuesUpdate update, int docIDUpto)
         {
-            OrderedDictionary fieldUpdates = null;
+            LinkedHashMap<Term, NumericDocValuesUpdate> fieldUpdates = null;
             if (!numericUpdates.TryGetValue(update.field, out fieldUpdates))
             {
-                fieldUpdates = new OrderedDictionary();
+                fieldUpdates = new LinkedHashMap<Term, NumericDocValuesUpdate>();
                 numericUpdates[update.field] = fieldUpdates;
                 bytesUsed.AddAndGet(BYTES_PER_NUMERIC_FIELD_ENTRY);
             }
 
-            NumericDocValuesUpdate current = null;
-            if (fieldUpdates.Contains(update.term))
-            {
-                current = fieldUpdates[update.term] as NumericDocValuesUpdate;
-            }
-
-            if (current != null && docIDUpto < current.docIDUpto)
+            NumericDocValuesUpdate current;
+            if (fieldUpdates.TryGetValue(update.term, out current) && current != null && docIDUpto < current.docIDUpto)
             {
                 // Only record the new number if it's greater than or equal to the current
                 // one. this is important because if multiple threads are replacing the
@@ -265,7 +260,7 @@ namespace Lucene.Net.Index
             }
 
             update.docIDUpto = docIDUpto;
-            // since it's an OrderedDictionary, we must first remove the Term entry so that
+            // since it's an LinkedHashMap, we must first remove the Term entry so that
             // it's added last (we're interested in insertion-order).
             if (current != null)
             {
@@ -281,21 +276,16 @@ namespace Lucene.Net.Index
 
         public virtual void AddBinaryUpdate(BinaryDocValuesUpdate update, int docIDUpto)
         {
-            OrderedDictionary fieldUpdates;
+            LinkedHashMap<Term, BinaryDocValuesUpdate> fieldUpdates;
             if (!binaryUpdates.TryGetValue(update.field, out fieldUpdates))
             {
-                fieldUpdates = new OrderedDictionary();
+                fieldUpdates = new LinkedHashMap<Term, BinaryDocValuesUpdate>();
                 binaryUpdates[update.field] = fieldUpdates;
                 bytesUsed.AddAndGet(BYTES_PER_BINARY_FIELD_ENTRY);
             }
 
-            BinaryDocValuesUpdate current = null;
-            if (fieldUpdates.Contains(update.term))
-            {
-                current = fieldUpdates[update.term] as BinaryDocValuesUpdate;
-            }
-
-            if (current != null && docIDUpto < current.docIDUpto)
+            BinaryDocValuesUpdate current;
+            if (fieldUpdates.TryGetValue(update.term, out current) && current != null && docIDUpto < current.docIDUpto)
             {
                 // Only record the new number if it's greater than or equal to the current
                 // one. this is important because if multiple threads are replacing the
@@ -305,7 +295,7 @@ namespace Lucene.Net.Index
             }
 
             update.docIDUpto = docIDUpto;
-            // since it's an OrderedDictionary, we must first remove the Term entry so that
+            // since it's an LinkedHashMap, we must first remove the Term entry so that
             // it's added last (we're interested in insertion-order).
             if (current != null)
             {

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/bc485b4c/src/Lucene.Net.Expressions/JS/JavascriptCompiler.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Expressions/JS/JavascriptCompiler.cs b/src/Lucene.Net.Expressions/JS/JavascriptCompiler.cs
index 550d756..d037d2d 100644
--- a/src/Lucene.Net.Expressions/JS/JavascriptCompiler.cs
+++ b/src/Lucene.Net.Expressions/JS/JavascriptCompiler.cs
@@ -92,7 +92,7 @@ namespace Lucene.Net.Expressions.JS
 
         private readonly string sourceText;
 
-        private readonly IDictionary<string, int> externalsMap = new HashMap<string, int>();
+        private readonly IDictionary<string, int> externalsMap = new LinkedHashMap<string, int>();
 
         private TypeBuilder dynamicType;
 

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/bc485b4c/src/Lucene.Net.Facet/DrillDownQuery.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Facet/DrillDownQuery.cs b/src/Lucene.Net.Facet/DrillDownQuery.cs
index e222f2d..5e7d4e5 100644
--- a/src/Lucene.Net.Facet/DrillDownQuery.cs
+++ b/src/Lucene.Net.Facet/DrillDownQuery.cs
@@ -60,7 +60,7 @@ namespace Lucene.Net.Facet
 
         private readonly FacetsConfig config;
         private readonly BooleanQuery query;
-        private readonly IDictionary<string, int?> drillDownDims = new Dictionary<string, int?>();
+        private readonly IDictionary<string, int?> drillDownDims = new LinkedHashMap<string, int?>();
 
         /// <summary>
         /// Used by <see cref="Clone"/>

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/bc485b4c/src/Lucene.Net.Tests.QueryParser/Simple/TestSimpleQueryParser.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.QueryParser/Simple/TestSimpleQueryParser.cs b/src/Lucene.Net.Tests.QueryParser/Simple/TestSimpleQueryParser.cs
index 065aa82..0cd23ec 100644
--- a/src/Lucene.Net.Tests.QueryParser/Simple/TestSimpleQueryParser.cs
+++ b/src/Lucene.Net.Tests.QueryParser/Simple/TestSimpleQueryParser.cs
@@ -541,7 +541,7 @@ namespace Lucene.Net.QueryParsers.Simple
         [Test]
         public virtual void TestWeightedTerm()
         {
-            IDictionary<string, float> weights = new Dictionary<string, float>();
+            IDictionary<string, float> weights = new LinkedHashMap<string, float>();
             weights["field0"] = 5f;
             weights["field1"] = 10f;
 
@@ -562,7 +562,7 @@ namespace Lucene.Net.QueryParsers.Simple
         [Test]
         public virtual void TestWeightedOR()
         {
-            IDictionary<string, float> weights = new Dictionary<string, float>();
+            IDictionary<string, float> weights = new LinkedHashMap<string, float>();
             weights["field0"] = 5f;
             weights["field1"] = 10f;

[08/14] lucenenet git commit: Lucene.Net.Core.Index.IndexWriter: Added TODO about renaming GetNextMerge() method to NextMerge()

Posted by ni...@apache.org.

Lucene.Net.Core.Index.IndexWriter: Added TODO about renaming GetNextMerge() method to NextMerge()


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/ddb054a0
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/ddb054a0
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/ddb054a0

Branch: refs/heads/api-work
Commit: ddb054a0b67a4622f51312853bc2cfee298c6023
Parents: fd6282e
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Thu Feb 2 15:43:43 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Thu Feb 2 18:30:33 2017 +0700

----------------------------------------------------------------------
 src/Lucene.Net.Core/Index/IndexWriter.cs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/ddb054a0/src/Lucene.Net.Core/Index/IndexWriter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Index/IndexWriter.cs b/src/Lucene.Net.Core/Index/IndexWriter.cs
index f402271..39983c1 100644
--- a/src/Lucene.Net.Core/Index/IndexWriter.cs
+++ b/src/Lucene.Net.Core/Index/IndexWriter.cs
@@ -2451,7 +2451,7 @@ namespace Lucene.Net.Index
         ///
         /// @lucene.experimental
         /// </summary>
-        public virtual MergePolicy.OneMerge GetNextMerge()
+        public virtual MergePolicy.OneMerge GetNextMerge() // LUCENENET TODO: Rename NextMerge() (consistency with iterator.Next())
         {
             lock (this)
             {

[02/14] lucenenet git commit: Lucene.Net.Analysis.Common.Collation: For now, adding [CLSCompliant(false)] to CollationAttributeFactory, CollationKeyAnalyzer, CollationKeyFilter, and TokenAttributes.CollatedTermAttributeImpl because they expose types from

Posted by ni...@apache.org.

Lucene.Net.Analysis.Common.Collation: For now, adding [CLSCompliant(false)] to CollationAttributeFactory, CollationKeyAnalyzer, CollationKeyFilter, and TokenAttributes.CollatedTermAttributeImpl because they expose types from icu.net (which is not marked CLS compliant).


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/21b3d8b7
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/21b3d8b7
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/21b3d8b7

Branch: refs/heads/api-work
Commit: 21b3d8b7703c321155d8388621d6b0ed120750fc
Parents: 7cd69ab
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Thu Feb 2 10:38:17 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Thu Feb 2 18:30:28 2017 +0700

----------------------------------------------------------------------
 .../Collation/CollationAttributeFactory.cs      |  93 ++++++++-------
 .../Collation/CollationKeyAnalyzer.cs           | 119 ++++++++++---------
 .../Collation/CollationKeyFilter.cs             |   5 +-
 .../CollatedTermAttributeImpl.cs                |  14 ++-
 4 files changed, 122 insertions(+), 109 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/21b3d8b7/src/Lucene.Net.Analysis.Common/Collation/CollationAttributeFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Collation/CollationAttributeFactory.cs b/src/Lucene.Net.Analysis.Common/Collation/CollationAttributeFactory.cs
index 64687dd..d3a7b1f 100644
--- a/src/Lucene.Net.Analysis.Common/Collation/CollationAttributeFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Collation/CollationAttributeFactory.cs
@@ -22,51 +22,54 @@ namespace Lucene.Net.Collation
      * limitations under the License.
      */
 
-	/// <summary>
-	/// <para>
-	///   Converts each token into its <seealso cref="CollationKey"/>, and then
-	///   encodes the bytes as an index term.
-	/// </para>
-	/// <para>
-	///   <strong>WARNING:</strong> Make sure you use exactly the same Collator at
-	///   index and query time -- CollationKeys are only comparable when produced by
-	///   the same Collator.  Since <seealso cref="RuleBasedCollator"/>s are not
-	///   independently versioned, it is unsafe to search against stored
-	///   CollationKeys unless the following are exactly the same (best practice is
-	///   to store this information with the index and check that they remain the
-	///   same at query time):
-	/// </para>
-	/// <ol>
-	///   <li>JVM vendor</li>
-	///   <li>JVM version, including patch version</li>
-	///   <li>
-	///     The language (and country and variant, if specified) of the Locale
-	///     used when constructing the collator via
-	///     <seealso cref="Collator#getInstance(Locale)"/>.
-	///   </li>
-	///   <li>
-	///     The collation strength used - see <seealso cref="Collator#setStrength(int)"/>
-	///   </li>
-	/// </ol> 
-	/// <para>
-	///   The <code>ICUCollationAttributeFactory</code> in the analysis-icu package 
-	///   uses ICU4J's Collator, which makes its
-	///   version available, thus allowing collation to be versioned independently
-	///   from the JVM.  ICUCollationAttributeFactory is also significantly faster and
-	///   generates significantly shorter keys than CollationAttributeFactory.  See
-	///   <a href="http://site.icu-project.org/charts/collation-icu4j-sun"
-	///   >http://site.icu-project.org/charts/collation-icu4j-sun</a> for key
-	///   generation timing and key length comparisons between ICU4J and
-	///   java.text.Collator over several languages.
-	/// </para>
-	/// <para>
-	///   CollationKeys generated by java.text.Collators are not compatible
-	///   with those those generated by ICU Collators.  Specifically, if you use 
-	///   CollationAttributeFactory to generate index terms, do not use
-	///   ICUCollationAttributeFactory on the query side, or vice versa.
-	/// </para>
-	/// </summary>
-	public class CollationAttributeFactory : AttributeSource.AttributeFactory
+    /// <summary>
+    /// <para>
+    ///   Converts each token into its <seealso cref="CollationKey"/>, and then
+    ///   encodes the bytes as an index term.
+    /// </para>
+    /// <para>
+    ///   <strong>WARNING:</strong> Make sure you use exactly the same Collator at
+    ///   index and query time -- CollationKeys are only comparable when produced by
+    ///   the same Collator.  Since <seealso cref="RuleBasedCollator"/>s are not
+    ///   independently versioned, it is unsafe to search against stored
+    ///   CollationKeys unless the following are exactly the same (best practice is
+    ///   to store this information with the index and check that they remain the
+    ///   same at query time):
+    /// </para>
+    /// <ol>
+    ///   <li>JVM vendor</li>
+    ///   <li>JVM version, including patch version</li>
+    ///   <li>
+    ///     The language (and country and variant, if specified) of the Locale
+    ///     used when constructing the collator via
+    ///     <seealso cref="Collator#getInstance(Locale)"/>.
+    ///   </li>
+    ///   <li>
+    ///     The collation strength used - see <seealso cref="Collator#setStrength(int)"/>
+    ///   </li>
+    /// </ol> 
+    /// <para>
+    ///   The <code>ICUCollationAttributeFactory</code> in the analysis-icu package 
+    ///   uses ICU4J's Collator, which makes its
+    ///   version available, thus allowing collation to be versioned independently
+    ///   from the JVM.  ICUCollationAttributeFactory is also significantly faster and
+    ///   generates significantly shorter keys than CollationAttributeFactory.  See
+    ///   <a href="http://site.icu-project.org/charts/collation-icu4j-sun"
+    ///   >http://site.icu-project.org/charts/collation-icu4j-sun</a> for key
+    ///   generation timing and key length comparisons between ICU4J and
+    ///   java.text.Collator over several languages.
+    /// </para>
+    /// <para>
+    ///   CollationKeys generated by java.text.Collators are not compatible
+    ///   with those those generated by ICU Collators.  Specifically, if you use 
+    ///   CollationAttributeFactory to generate index terms, do not use
+    ///   ICUCollationAttributeFactory on the query side, or vice versa.
+    /// </para>
+    /// </summary>
+    // LUCENENET TODO: A better option would be to contribute to the icu.net library and
+    // make it CLS compliant (at least the parts of it we use)
+    [System.CLSCompliant(false)]
+    public class CollationAttributeFactory : AttributeSource.AttributeFactory
 	{
 		private readonly Collator collator;
 		private readonly AttributeSource.AttributeFactory @delegate;

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/21b3d8b7/src/Lucene.Net.Analysis.Common/Collation/CollationKeyAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Collation/CollationKeyAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Collation/CollationKeyAnalyzer.cs
index f6db44c..b76e520 100644
--- a/src/Lucene.Net.Analysis.Common/Collation/CollationKeyAnalyzer.cs
+++ b/src/Lucene.Net.Analysis.Common/Collation/CollationKeyAnalyzer.cs
@@ -24,64 +24,67 @@ namespace Lucene.Net.Collation
      * limitations under the License.
      */
 
-	/// <summary>
-	/// <para>
-	///   Configures <seealso cref="KeywordTokenizer"/> with <seealso cref="CollationAttributeFactory"/>.
-	/// </para>
-	/// <para>
-	///   Converts the token into its <seealso cref="java.text.CollationKey"/>, and then
-	///   encodes the CollationKey either directly or with 
-	///   <seealso cref="IndexableBinaryStringTools"/> (see <a href="#version">below</a>), to allow 
-	///   it to be stored as an index term.
-	/// </para>
-	/// <para>
-	///   <strong>WARNING:</strong> Make sure you use exactly the same Collator at
-	///   index and query time -- CollationKeys are only comparable when produced by
-	///   the same Collator.  Since <seealso cref="java.text.RuleBasedCollator"/>s are not
-	///   independently versioned, it is unsafe to search against stored
-	///   CollationKeys unless the following are exactly the same (best practice is
-	///   to store this information with the index and check that they remain the
-	///   same at query time):
-	/// </para>
-	/// <ol>
-	///   <li>JVM vendor</li>
-	///   <li>JVM version, including patch version</li>
-	///   <li>
-	///     The language (and country and variant, if specified) of the Locale
-	///     used when constructing the collator via
-	///     <seealso cref="Collator#getInstance(java.util.Locale)"/>.
-	///   </li>
-	///   <li>
-	///     The collation strength used - see <seealso cref="Collator#setStrength(int)"/>
-	///   </li>
-	/// </ol> 
-	/// <para>
-	///   The <code>ICUCollationKeyAnalyzer</code> in the analysis-icu package 
-	///   uses ICU4J's Collator, which makes its
-	///   its version available, thus allowing collation to be versioned
-	///   independently from the JVM.  ICUCollationKeyAnalyzer is also significantly
-	///   faster and generates significantly shorter keys than CollationKeyAnalyzer.
-	///   See <a href="http://site.icu-project.org/charts/collation-icu4j-sun"
-	///   >http://site.icu-project.org/charts/collation-icu4j-sun</a> for key
-	///   generation timing and key length comparisons between ICU4J and
-	///   java.text.Collator over several languages.
-	/// </para>
-	/// <para>
-	///   CollationKeys generated by java.text.Collators are not compatible
-	///   with those those generated by ICU Collators.  Specifically, if you use 
-	///   CollationKeyAnalyzer to generate index terms, do not use
-	///   ICUCollationKeyAnalyzer on the query side, or vice versa.
-	/// </para>
-	/// <a name="version"/>
-	/// <para>You must specify the required <seealso cref="Version"/>
-	/// compatibility when creating CollationKeyAnalyzer:
-	/// <ul>
-	///   <li> As of 4.0, Collation Keys are directly encoded as bytes. Previous
-	///   versions will encode the bytes with <seealso cref="IndexableBinaryStringTools"/>.
-	/// </ul>
-	/// </para>
-	/// </summary>
-	public sealed class CollationKeyAnalyzer : Analyzer
+    /// <summary>
+    /// <para>
+    ///   Configures <seealso cref="KeywordTokenizer"/> with <seealso cref="CollationAttributeFactory"/>.
+    /// </para>
+    /// <para>
+    ///   Converts the token into its <seealso cref="java.text.CollationKey"/>, and then
+    ///   encodes the CollationKey either directly or with 
+    ///   <seealso cref="IndexableBinaryStringTools"/> (see <a href="#version">below</a>), to allow 
+    ///   it to be stored as an index term.
+    /// </para>
+    /// <para>
+    ///   <strong>WARNING:</strong> Make sure you use exactly the same Collator at
+    ///   index and query time -- CollationKeys are only comparable when produced by
+    ///   the same Collator.  Since <seealso cref="java.text.RuleBasedCollator"/>s are not
+    ///   independently versioned, it is unsafe to search against stored
+    ///   CollationKeys unless the following are exactly the same (best practice is
+    ///   to store this information with the index and check that they remain the
+    ///   same at query time):
+    /// </para>
+    /// <ol>
+    ///   <li>JVM vendor</li>
+    ///   <li>JVM version, including patch version</li>
+    ///   <li>
+    ///     The language (and country and variant, if specified) of the Locale
+    ///     used when constructing the collator via
+    ///     <seealso cref="Collator#getInstance(java.util.Locale)"/>.
+    ///   </li>
+    ///   <li>
+    ///     The collation strength used - see <seealso cref="Collator#setStrength(int)"/>
+    ///   </li>
+    /// </ol> 
+    /// <para>
+    ///   The <code>ICUCollationKeyAnalyzer</code> in the analysis-icu package 
+    ///   uses ICU4J's Collator, which makes its
+    ///   its version available, thus allowing collation to be versioned
+    ///   independently from the JVM.  ICUCollationKeyAnalyzer is also significantly
+    ///   faster and generates significantly shorter keys than CollationKeyAnalyzer.
+    ///   See <a href="http://site.icu-project.org/charts/collation-icu4j-sun"
+    ///   >http://site.icu-project.org/charts/collation-icu4j-sun</a> for key
+    ///   generation timing and key length comparisons between ICU4J and
+    ///   java.text.Collator over several languages.
+    /// </para>
+    /// <para>
+    ///   CollationKeys generated by java.text.Collators are not compatible
+    ///   with those those generated by ICU Collators.  Specifically, if you use 
+    ///   CollationKeyAnalyzer to generate index terms, do not use
+    ///   ICUCollationKeyAnalyzer on the query side, or vice versa.
+    /// </para>
+    /// <a name="version"/>
+    /// <para>You must specify the required <seealso cref="Version"/>
+    /// compatibility when creating CollationKeyAnalyzer:
+    /// <ul>
+    ///   <li> As of 4.0, Collation Keys are directly encoded as bytes. Previous
+    ///   versions will encode the bytes with <seealso cref="IndexableBinaryStringTools"/>.
+    /// </ul>
+    /// </para>
+    /// </summary>
+    // LUCENENET TODO: A better option would be to contribute to the icu.net library and
+    // make it CLS compliant (at least the parts of it we use)
+    [CLSCompliant(false)]
+    public sealed class CollationKeyAnalyzer : Analyzer
 	{
 		private readonly Collator collator;
 		private readonly CollationAttributeFactory factory;

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/21b3d8b7/src/Lucene.Net.Analysis.Common/Collation/CollationKeyFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Collation/CollationKeyFilter.cs b/src/Lucene.Net.Analysis.Common/Collation/CollationKeyFilter.cs
index 5012e9c..6e684c1 100644
--- a/src/Lucene.Net.Analysis.Common/Collation/CollationKeyFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Collation/CollationKeyFilter.cs
@@ -70,7 +70,10 @@ namespace Lucene.Net.Collation
 	/// @deprecated Use <seealso cref="CollationAttributeFactory"/> instead, which encodes
 	///  terms directly as bytes. This filter will be removed in Lucene 5.0 
 	[Obsolete("Use <seealso cref=\"CollationAttributeFactory\"/> instead, which encodes")]
-	public sealed class CollationKeyFilter : TokenFilter
+    // LUCENENET TODO: A better option would be to contribute to the icu.net library and
+    // make it CLS compliant (at least the parts of it we use)
+    [CLSCompliant(false)]
+    public sealed class CollationKeyFilter : TokenFilter
 	{
 		private readonly Collator collator;
 		private readonly ICharTermAttribute termAtt;

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/21b3d8b7/src/Lucene.Net.Analysis.Common/Collation/TokenAttributes/CollatedTermAttributeImpl.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Collation/TokenAttributes/CollatedTermAttributeImpl.cs b/src/Lucene.Net.Analysis.Common/Collation/TokenAttributes/CollatedTermAttributeImpl.cs
index dc3b85e..a29a5e8 100644
--- a/src/Lucene.Net.Analysis.Common/Collation/TokenAttributes/CollatedTermAttributeImpl.cs
+++ b/src/Lucene.Net.Analysis.Common/Collation/TokenAttributes/CollatedTermAttributeImpl.cs
@@ -1,5 +1,6 @@
 \ufeffusing Icu.Collation;
 using Lucene.Net.Analysis.TokenAttributes;
+using System;
 
 namespace Lucene.Net.Collation.TokenAttributes
 {
@@ -20,11 +21,14 @@ namespace Lucene.Net.Collation.TokenAttributes
      * limitations under the License.
      */
 
-	/// <summary>
-	/// Extension of <seealso cref="CharTermAttribute"/> that encodes the term
-	/// text as a binary Unicode collation key instead of as UTF-8 bytes.
-	/// </summary>
-	public class CollatedTermAttributeImpl : CharTermAttribute
+    /// <summary>
+    /// Extension of <seealso cref="CharTermAttribute"/> that encodes the term
+    /// text as a binary Unicode collation key instead of as UTF-8 bytes.
+    /// </summary>
+    // LUCENENET TODO: A better option would be to contribute to the icu.net library and
+    // make it CLS compliant (at least the parts of it we use)
+    [CLSCompliant(false)]
+    public class CollatedTermAttributeImpl : CharTermAttribute
 	{
 		private readonly Collator collator;

[12/14] lucenenet git commit: Lucene.Net.Analysis.Common: Reverted ThaiTokenizer, ThaiWordFilter, and SegmentingTokenizerBase back to their original API and used IcuBreakIterator as the backing BreakIterator.

Posted by ni...@apache.org.

Lucene.Net.Analysis.Common: Reverted ThaiTokenizer, ThaiWordFilter, and SegmentingTokenizerBase back to their original API and used IcuBreakIterator as the backing BreakIterator.


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/506f55a6
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/506f55a6
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/506f55a6

Branch: refs/heads/api-work
Commit: 506f55a64a9d82e1965da077cc38c64ecd9214eb
Parents: 5a7cb17
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Thu Feb 2 16:30:07 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Thu Feb 2 18:30:36 2017 +0700

----------------------------------------------------------------------
 .../Analysis/Th/ThaiTokenizer.cs                | 126 ++++++-------------
 .../Analysis/Th/ThaiWordFilter.cs               |   6 +-
 .../Analysis/Util/SegmentingTokenizerBase.cs    |  91 +++++---------
 .../Analysis/Th/TestThaiAnalyzer.cs             |   9 --
 .../Util/TestSegmentingTokenizerBase.cs         |  15 +--
 5 files changed, 76 insertions(+), 171 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/506f55a6/src/Lucene.Net.Analysis.Common/Analysis/Th/ThaiTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Th/ThaiTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Th/ThaiTokenizer.cs
index 52f6750..ae3ab1a 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Th/ThaiTokenizer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Th/ThaiTokenizer.cs
@@ -1,13 +1,13 @@
-\ufeffusing Icu;
+\ufeff//using Icu;
 using Lucene.Net.Analysis.TokenAttributes;
 using Lucene.Net.Analysis.Util;
 using Lucene.Net.Support;
 using System;
 using System.Collections.Generic;
+using System.Globalization;
 using System.IO;
 using System.Linq;
 using System.Text.RegularExpressions;
-using BreakIterator = Icu.BreakIterator;
 
 namespace Lucene.Net.Analysis.Th
 {
@@ -43,17 +43,12 @@ namespace Lucene.Net.Analysis.Th
         /// If this is false, this tokenizer will not work at all!
         /// </summary>
         public static readonly bool DBBI_AVAILABLE;
-        
-        //LUCENENET: Specifying to use Thai locale.
-        private static readonly Locale LocaleThai = new Locale("th");
-
-        private static readonly IEnumerable<Boundary> proto;
+        private static readonly BreakIterator proto = new IcuBreakIterator(Icu.BreakIterator.UBreakIteratorType.WORD, new CultureInfo("th"));
         static ThaiTokenizer()
         {
             // check that we have a working dictionary-based break iterator for thai
-            proto = BreakIterator.GetWordBoundaries(LocaleThai, "\u0e20\u0e32\u0e29\u0e32\u0e44\u0e17\u0e22", includeSpacesAndPunctuation: false).ToArray();
-            var first = proto.FirstOrDefault();
-            DBBI_AVAILABLE = first != default(Boundary) && first.End == 4;
+            proto.SetText("\u0e20\u0e32\u0e29\u0e32\u0e44\u0e17\u0e22");
+            DBBI_AVAILABLE = proto.IsBoundary(4);
         }
 
         private readonly ThaiWordBreaker wordBreaker;
@@ -75,18 +70,18 @@ namespace Lucene.Net.Analysis.Th
         /// <summary>
         /// Creates a new ThaiTokenizer, supplying the AttributeFactory </summary>
         public ThaiTokenizer(AttributeFactory factory, TextReader reader)
-              : base(factory, reader, LocaleThai, BreakIterator.UBreakIteratorType.SENTENCE)
+              : base(factory, reader, new IcuBreakIterator(Icu.BreakIterator.UBreakIteratorType.SENTENCE, new CultureInfo("th")))
         {
             if (!DBBI_AVAILABLE)
             {
                 throw new System.NotSupportedException("This JRE does not have support for Thai segmentation");
             }
-            wordBreaker = new ThaiWordBreaker(LocaleUS);
+            wordBreaker = new ThaiWordBreaker(new IcuBreakIterator(Icu.BreakIterator.UBreakIteratorType.WORD, CultureInfo.InvariantCulture));
             termAtt = AddAttribute<ICharTermAttribute>();
             offsetAtt = AddAttribute<IOffsetAttribute>();
         }
 
-        protected internal override void SetNextSentence(int sentenceStart, int sentenceEnd)
+        protected override void SetNextSentence(int sentenceStart, int sentenceEnd)
         {
             this.sentenceStart = sentenceStart;
             this.sentenceEnd = sentenceEnd;
@@ -94,7 +89,7 @@ namespace Lucene.Net.Analysis.Th
             wordBreaker.SetText(new string(wrapper.Text, wrapper.Start, wrapper.Length));
         }
 
-        protected internal override bool IncrementWord()
+        protected override bool IncrementWord()
         {
             int start = wordBreaker.Current();
             if (start == BreakIterator.DONE)
@@ -104,7 +99,7 @@ namespace Lucene.Net.Analysis.Th
 
             // find the next set of boundaries, skipping over non-tokens
             int end = wordBreaker.Next();
-            while (end != BreakIterator.DONE && !char.IsLetterOrDigit((char)Support.Character.CodePointAt(m_buffer, sentenceStart + start, sentenceEnd)))
+            while (end != BreakIterator.DONE && !char.IsLetterOrDigit((char)Character.CodePointAt(m_buffer, sentenceStart + start, sentenceEnd)))
             {
                 start = end;
                 end = wordBreaker.Next();
@@ -126,137 +121,86 @@ namespace Lucene.Net.Analysis.Th
     /// LUCENENET specific class to patch the behavior of the ICU BreakIterator.
     /// Corrects the breaking of words by finding transitions between Thai and non-Thai
     /// characters.
-    /// 
-    /// This logic assumes that the Java BreakIterator also breaks up Thai numerals from
-    /// Arabic numerals (1, 2, 3, etc.). That is, it assumes the first test below passes
-    /// and the second test fails in Lucene (not attempted).
-    /// 
-    /// ThaiAnalyzer analyzer = new ThaiAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET);
-    /// AssertAnalyzesTo(analyzer, "\u0e51\u0e52\u0e53456", new string[] { "\u0e51\u0e52\u0e53", "456" });
-    /// AssertAnalyzesTo(analyzer, "\u0e51\u0e52\u0e53456", new string[] { "\u0e51\u0e52\u0e53456" });
     /// </summary>
     internal class ThaiWordBreaker
     {
-        private readonly Locale locale;
-        private IEnumerator<Boundary> wordBreaker;
-        private int currentIndex;
+        private readonly BreakIterator wordBreaker;
         private string text;
         private readonly IList<int> transitions = new List<int>();
         private readonly static Regex thaiPattern = new Regex(@"\p{IsThai}", RegexOptions.Compiled | RegexOptions.CultureInvariant);
 
-        public ThaiWordBreaker(Locale locale)
+        public ThaiWordBreaker(BreakIterator wordBreaker)
         {
-            if (locale == null)
+            if (wordBreaker == null)
             {
-                throw new ArgumentNullException("locale");
+                throw new ArgumentNullException("wordBreaker");
             }
-
-            this.locale = locale;
-            currentIndex = int.MinValue;
+            this.wordBreaker = wordBreaker;
         }
 
         public void SetText(string text)
         {
             this.text = text;
-            wordBreaker = BreakIterator.GetWordBoundaries(locale, text, includeSpacesAndPunctuation: false).ToList().GetEnumerator();
-            currentIndex = wordBreaker.MoveNext()
-                ? wordBreaker.Current.Start : BreakIterator.DONE;
+            wordBreaker.SetText(text);
         }
 
         public int Current()
         {
-            return currentIndex;
+            if (transitions.Any())
+            {
+                return transitions.First();
+            }
+            return wordBreaker.Current;
         }
 
         public int Next()
         {
-            // Tracking whether a transition was returned last time
-            // next was called. If that is the case, and there are no
-            // transitions left, then we return the End index in the
-            // wordbreaker.Current
-            bool transitionReturned = false;
-
             if (transitions.Any())
             {
-                transitionReturned = currentIndex == transitions[0];
                 transitions.RemoveAt(0);
             }
-
             if (transitions.Any())
             {
-                currentIndex = transitions.First();
-                return currentIndex;
-            }
-            else if (transitionReturned)
-            {
-                currentIndex = wordBreaker.Current.End;
+                return transitions.First();
             }
-
             return GetNext();
         }
 
         private int GetNext()
         {
-            bool isThaiLetter = false, isNonThaiLetter = false;
+            bool isThai = false, isNonThai = false;
             bool prevWasThai = false, prevWasNonThai = false;
+            int prev = wordBreaker.Current;
+            int current = wordBreaker.Next();
 
-            int previous = currentIndex;
-            int current;
-
-            if (currentIndex == wordBreaker.Current.Start)
-            {
-                current = wordBreaker.Current.End;
-            }
-            else if (wordBreaker.MoveNext())
-            {
-                // The break iterator works by returning the start and end
-                // boundary of each word it finds. Consider the two words,
-                // 
-                if (currentIndex == wordBreaker.Current.Start)
-                {
-                    current = wordBreaker.Current.End;
-                }
-                else
-                {
-                    current = wordBreaker.Current.Start;
-                }
-            }
-            else
-            {
-                current = BreakIterator.DONE;
-            }
-
-            if (current != BreakIterator.DONE && current - previous > 0)
+            if (current != BreakIterator.DONE && current - prev > 0)
             {
                 // Find all of the transitions between Thai and non-Thai characters and digits
-                for (int i = previous; i < current; i++)
+                for (int i = prev; i < current; i++)
                 {
                     char c = text[i];
-                    isThaiLetter = char.IsLetter(c) && thaiPattern.IsMatch(c.ToString());
-                    isNonThaiLetter = char.IsLetter(c) && !isThaiLetter;
+                    isThai = char.IsLetter(c) && thaiPattern.IsMatch(c.ToString());
+                    isNonThai = char.IsLetter(c) && !isThai;
 
-                    if ((prevWasThai && isNonThaiLetter) ||
-                        (prevWasNonThai && isThaiLetter))
+                    if ((prevWasThai && isNonThai) ||
+                        (prevWasNonThai && isThai))
                     {
                         transitions.Add(i);
                     }
 
                     // record the values for comparison with the next loop
-                    prevWasThai = isThaiLetter;
-                    prevWasNonThai = isNonThaiLetter;
+                    prevWasThai = isThai;
+                    prevWasNonThai = isNonThai;
                 }
 
                 if (transitions.Any())
                 {
                     transitions.Add(current);
-                    currentIndex = transitions.First();
-
-                    return currentIndex;
+                    return transitions.First();
                 }
             }
 
-            currentIndex = current;
-            return currentIndex;
+            return current;
         }
     }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/506f55a6/src/Lucene.Net.Analysis.Common/Analysis/Th/ThaiWordFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Th/ThaiWordFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Th/ThaiWordFilter.cs
index 2b21033..d55733a 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Th/ThaiWordFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Th/ThaiWordFilter.cs
@@ -1,9 +1,11 @@
-\ufeffusing Icu;
+\ufeff//using Icu;
 using Lucene.Net.Analysis.Core;
 using Lucene.Net.Analysis.TokenAttributes;
 using Lucene.Net.Analysis.Util;
+using Lucene.Net.Support;
 using Lucene.Net.Util;
 using System;
+using System.Globalization;
 using System.Text.RegularExpressions;
 
 namespace Lucene.Net.Analysis.Th
@@ -48,7 +50,7 @@ namespace Lucene.Net.Analysis.Th
         /// If this is false, this filter will not work at all!
         /// </summary>
         public static readonly bool DBBI_AVAILABLE = ThaiTokenizer.DBBI_AVAILABLE;
-        private readonly ThaiWordBreaker breaker = new ThaiWordBreaker(new Locale());
+        private readonly ThaiWordBreaker breaker = new ThaiWordBreaker(new IcuBreakIterator(Icu.BreakIterator.UBreakIteratorType.WORD, new CultureInfo("th")));
         private readonly CharArrayIterator charIterator = CharArrayIterator.NewWordInstance();
 
         private readonly bool handlePosIncr;

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/506f55a6/src/Lucene.Net.Analysis.Common/Analysis/Util/SegmentingTokenizerBase.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/SegmentingTokenizerBase.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/SegmentingTokenizerBase.cs
index 502b7da..ca0b994 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Util/SegmentingTokenizerBase.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/SegmentingTokenizerBase.cs
@@ -1,13 +1,8 @@
-\ufeffusing System;
-using System.Collections;
-using System.Collections.Generic;
+\ufeffusing Lucene.Net.Analysis.TokenAttributes;
+using Lucene.Net.Support;
+using System;
 using System.Diagnostics;
-using System.Linq;
 using System.IO;
-using Icu;
-using Lucene.Net.Analysis.TokenAttributes;
-using Version = Lucene.Net.Util.LuceneVersion;
-
 
 namespace Lucene.Net.Analysis.Util
 {
@@ -28,9 +23,8 @@ namespace Lucene.Net.Analysis.Util
      * limitations under the License.
      */
 
-
     /// <summary>
-    /// Breaks text into sentences with a <seealso cref="BreakIterator"/> and
+    /// Breaks text into sentences with a <see cref="BreakIterator"/> and
     /// allows subclasses to decompose these sentences into words.
     /// <para>
     /// This can be used by subclasses that need sentence context 
@@ -46,11 +40,8 @@ namespace Lucene.Net.Analysis.Util
     /// </summary>
     public abstract class SegmentingTokenizerBase : Tokenizer
     {
-        // LUCENENET: Using Icu .NET to get Local_US
-        public static readonly Locale LocaleUS = new Locale("en-US");
-
-        protected internal const int BUFFERMAX = 1024;
-        protected internal readonly char[] m_buffer = new char[BUFFERMAX];
+        protected const int BUFFERMAX = 1024;
+        protected readonly char[] m_buffer = new char[BUFFERMAX];
         /// <summary>
         /// true length of text in the buffer </summary>
         private int length = 0;
@@ -59,11 +50,9 @@ namespace Lucene.Net.Analysis.Util
         private int usableLength = 0;
         /// <summary>
         /// accumulated offset of previous buffers for this reader, for offsetAtt </summary>
-        protected internal int m_offset = 0;
+        protected int m_offset = 0;
 
-        private readonly Locale locale;
-        private readonly BreakIterator.UBreakIteratorType iteratorType;
-        private IEnumerator<Boundary> enumerator;
+        private readonly BreakIterator iterator;
         private readonly CharArrayIterator wrapper = CharArrayIterator.NewSentenceInstance();
 
         private readonly IOffsetAttribute offsetAtt;
@@ -77,28 +66,19 @@ namespace Lucene.Net.Analysis.Util
         /// be provided to this constructor.
         /// </para>
         /// </summary>
-        protected SegmentingTokenizerBase(TextReader reader, BreakIterator.UBreakIteratorType iteratorType)
-            : this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, reader, LocaleUS, iteratorType)
-        { }
-
-        protected SegmentingTokenizerBase(TextReader reader, Locale locale, BreakIterator.UBreakIteratorType iteratorType)
-            : this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, reader, locale, iteratorType)
-        { }
+        protected SegmentingTokenizerBase(TextReader reader, BreakIterator iterator)
+            : this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, reader, iterator)
+        {
+        }
 
         /// <summary>
         /// Construct a new SegmenterBase, also supplying the AttributeFactory
         /// </summary>
-        protected SegmentingTokenizerBase(AttributeFactory factory, TextReader reader, BreakIterator.UBreakIteratorType iteratorType)
-            : this(factory, reader, LocaleUS, iteratorType) 
-        { }
-
-        protected SegmentingTokenizerBase(AttributeFactory factory, TextReader reader, Locale locale, BreakIterator.UBreakIteratorType iteratorType)
+        protected SegmentingTokenizerBase(AttributeFactory factory, TextReader reader, BreakIterator iterator)
             : base(factory, reader)
         {
             offsetAtt = AddAttribute<IOffsetAttribute>();
-            this.iteratorType = iteratorType;
-            this.locale = locale;
-            enumerator = Enumerable.Empty<Boundary>().GetEnumerator();
+            this.iterator = iterator;
         }
 
         public override sealed bool IncrementToken()
@@ -122,7 +102,7 @@ namespace Lucene.Net.Analysis.Util
         {
             base.Reset();
             wrapper.SetText(m_buffer, 0, 0);
-            enumerator = Enumerable.Empty<Boundary>().GetEnumerator();
+            iterator.SetText(new string(wrapper.Text, wrapper.Start, wrapper.Length));
             length = usableLength = m_offset = 0;
         }
 
@@ -193,16 +173,7 @@ namespace Lucene.Net.Analysis.Util
             }
 
             wrapper.SetText(m_buffer, 0, Math.Max(0, usableLength));
-
-            var text = new string(wrapper.Text, wrapper.Start, wrapper.Length);
-
-            if (enumerator != null)
-            {
-                enumerator.Dispose();
-                enumerator = null;
-            }
-
-            enumerator = BreakIterator.GetBoundaries(iteratorType, locale, text).ToList().GetEnumerator();
+            iterator.SetText(new string(wrapper.Text, wrapper.Start, wrapper.Length));
         }
 
         // TODO: refactor to a shared readFully somewhere
@@ -240,15 +211,22 @@ namespace Lucene.Net.Analysis.Util
 
             while (true)
             {
-                if (!enumerator.MoveNext())
+                int start = iterator.Current;
+
+                if (start == BreakIterator.DONE)
                 {
-                    return false;
+                    return false; // BreakIterator exhausted
                 }
 
-                var current = enumerator.Current;
+                // find the next set of boundaries
+                int end_Renamed = iterator.Next();
 
-                SetNextSentence(current.Start, current.End);
+                if (end_Renamed == BreakIterator.DONE)
+                {
+                    return false; // BreakIterator exhausted
+                }
 
+                SetNextSentence(start, end_Renamed);
                 if (IncrementWord())
                 {
                     return true;
@@ -256,23 +234,12 @@ namespace Lucene.Net.Analysis.Util
             }
         }
 
-        public override void Dispose()
-        {
-            if (enumerator != null)
-            {
-                enumerator.Dispose();
-                enumerator = null;
-            }
-
-            base.Dispose();
-        }
-
         /// <summary>
         /// Provides the next input sentence for analysis </summary>
-        protected internal abstract void SetNextSentence(int sentenceStart, int sentenceEnd);
+        protected abstract void SetNextSentence(int sentenceStart, int sentenceEnd);
 
         /// <summary>
         /// Returns true if another word is available </summary>
-        protected internal abstract bool IncrementWord();
+        protected abstract bool IncrementWord();
     }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/506f55a6/src/Lucene.Net.Tests.Analysis.Common/Analysis/Th/TestThaiAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Th/TestThaiAnalyzer.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Th/TestThaiAnalyzer.cs
index 3654c56..5ff98e2 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Th/TestThaiAnalyzer.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Th/TestThaiAnalyzer.cs
@@ -133,24 +133,15 @@ namespace Lucene.Net.Analysis.Th
         [Test]
         public virtual void TestRandomStrings()
         {
-            fail("This test is somehow crashing NUnit and causing it not to complete");
             CheckRandomData(Random(), new ThaiAnalyzer(TEST_VERSION_CURRENT), 1000 * RANDOM_MULTIPLIER);
         }
 
         /// <summary>
         /// blast some random large strings through the analyzer </summary>
         /// 
-#if NETSTANDARD
-        // NUnit does not have TimeoutAttribute for .NET Standard, so it can run for a long time.
-        // https://github.com/nunit/nunit/issues/1658
-        [LongRunningTest]
-#endif
         [Test]
         public virtual void TestRandomHugeStrings()
         {
-            // LUCENENET NOTE: This is NOT a long running test - it should only take a few seconds
-            fail("This test is somehow crashing NUnit and causing it not to complete");
-
             Random random = Random();
             CheckRandomData(random, new ThaiAnalyzer(TEST_VERSION_CURRENT), 100 * RANDOM_MULTIPLIER, 8192);
         }

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/506f55a6/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestSegmentingTokenizerBase.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestSegmentingTokenizerBase.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestSegmentingTokenizerBase.cs
index d717edf..86fda8f 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestSegmentingTokenizerBase.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestSegmentingTokenizerBase.cs
@@ -4,7 +4,8 @@ using Lucene.Net.Analysis.TokenAttributes;
 using Lucene.Net.Analysis.Util;
 using Lucene.Net.Support;
 using NUnit.Framework;
-\ufeffusing System;
+using System;
+using System.Globalization;
 using System.IO;
 using System.Text;
 using BreakIterator = Icu.BreakIterator;
@@ -130,20 +131,20 @@ namespace Lucene.Net.Analysis.Util
             internal IOffsetAttribute offsetAtt;
 
             public WholeSentenceTokenizer(TextReader reader)
-                : base(reader, new Locale("en-US"), BreakIterator.UBreakIteratorType.SENTENCE)
+                : base(reader, new IcuBreakIterator(BreakIterator.UBreakIteratorType.SENTENCE, CultureInfo.InvariantCulture)  /*new Locale("en-US"), BreakIterator.UBreakIteratorType.SENTENCE*/)
             {
                 termAtt = AddAttribute<ICharTermAttribute>();
                 offsetAtt = AddAttribute<IOffsetAttribute>();
             }
 
-            protected internal override void SetNextSentence(int sentenceStart, int sentenceEnd)
+            protected override void SetNextSentence(int sentenceStart, int sentenceEnd)
             {
                 this.sentenceStart = sentenceStart;
                 this.sentenceEnd = sentenceEnd;
                 hasSentence = true;
             }
 
-            protected internal override bool IncrementWord()
+            protected override bool IncrementWord()
             {
                 if (hasSentence)
                 {
@@ -175,14 +176,14 @@ namespace Lucene.Net.Analysis.Util
             internal IPositionIncrementAttribute posIncAtt;
 
             public SentenceAndWordTokenizer(TextReader reader)
-                : base(reader, new Locale("en-US"), BreakIterator.UBreakIteratorType.SENTENCE)
+                : base(reader, new IcuBreakIterator(BreakIterator.UBreakIteratorType.SENTENCE, CultureInfo.InvariantCulture) /*new Locale("en-US"), BreakIterator.UBreakIteratorType.SENTENCE*/)
             {
                 termAtt = AddAttribute<ICharTermAttribute>();
                 offsetAtt = AddAttribute<IOffsetAttribute>();
                 posIncAtt = AddAttribute<IPositionIncrementAttribute>();
             }
 
-            protected internal override void SetNextSentence(int sentenceStart, int sentenceEnd)
+            protected override void SetNextSentence(int sentenceStart, int sentenceEnd)
             {
                 this.wordStart = this.wordEnd = this.sentenceStart = sentenceStart;
                 this.sentenceEnd = sentenceEnd;
@@ -195,7 +196,7 @@ namespace Lucene.Net.Analysis.Util
                 posBoost = -1;
             }
 
-            protected internal override bool IncrementWord()
+            protected override bool IncrementWord()
             {
                 wordStart = wordEnd;
                 while (wordStart < sentenceEnd)

[05/14] lucenenet git commit: Lucene.Net.Core.Util.Version: Changed .ToUpper(CultureInfo.InvariantCulture) > .ToUpperInvariant() (issue with .NET Core)

Posted by ni...@apache.org.

Lucene.Net.Core.Util.Version: Changed .ToUpper(CultureInfo.InvariantCulture) > .ToUpperInvariant() (issue with .NET Core)


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/9525d45f
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/9525d45f
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/9525d45f

Branch: refs/heads/api-work
Commit: 9525d45f80a11617c6e478d362165fddaaa288e1
Parents: fc34ba7
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Thu Feb 2 12:51:06 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Thu Feb 2 18:30:31 2017 +0700

----------------------------------------------------------------------
 src/Lucene.Net.Core/Util/Version.cs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/9525d45f/src/Lucene.Net.Core/Util/Version.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Util/Version.cs b/src/Lucene.Net.Core/Util/Version.cs
index 9a9c6f9..59b4ac2 100644
--- a/src/Lucene.Net.Core/Util/Version.cs
+++ b/src/Lucene.Net.Core/Util/Version.cs
@@ -166,7 +166,7 @@ namespace Lucene.Net.Util
 
         public static LuceneVersion ParseLeniently(string version)
         {
-            string parsedMatchVersion = version.ToUpper(CultureInfo.InvariantCulture);
+            string parsedMatchVersion = version.ToUpperInvariant();
             LuceneVersion result;
             Enum.TryParse(NumericVersion.Replace(parsedMatchVersion, "LUCENE_$1$2", 1), out result);
             return result;