You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by ni...@apache.org on 2017/02/02 12:42:32 UTC
[01/14] lucenenet git commit:
Lucene.Net.Analysis.Common.Analysis.Miscellaneous.WordDelimiterIterator
refactor: changed parameters and fields from sbyte to byte for CLS compliance
Repository: lucenenet
Updated Branches:
refs/heads/api-work 11a1a1c36 -> bc485b4c4
Lucene.Net.Analysis.Common.Analysis.Miscellaneous.WordDelimiterIterator refactor: changed parameters and fields from sbyte to byte for CLS compliance
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/7cd69ab0
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/7cd69ab0
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/7cd69ab0
Branch: refs/heads/api-work
Commit: 7cd69ab00ac649081feeb814c846e9eaeb99f208
Parents: 11a1a1c
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Wed Feb 1 23:51:38 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Wed Feb 1 23:51:38 2017 +0700
----------------------------------------------------------------------
.../Miscellaneous/Lucene47WordDelimiterFilter.cs | 2 +-
.../Analysis/Miscellaneous/WordDelimiterFilter.cs | 2 +-
.../Miscellaneous/WordDelimiterFilterFactory.cs | 12 ++++++------
.../Miscellaneous/WordDelimiterIterator.cs | 18 +++++++++---------
.../Analysis/Core/TestBugInSomething.cs | 6 +++---
5 files changed, 20 insertions(+), 20 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/7cd69ab0/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/Lucene47WordDelimiterFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/Lucene47WordDelimiterFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/Lucene47WordDelimiterFilter.cs
index 219db723..d46b6c5 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/Lucene47WordDelimiterFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/Lucene47WordDelimiterFilter.cs
@@ -155,7 +155,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// <param name="charTypeTable"> table containing character types </param>
/// <param name="configurationFlags"> Flags configuring the filter </param>
/// <param name="protWords"> If not null is the set of tokens to protect from being delimited </param>
- public Lucene47WordDelimiterFilter(TokenStream @in, sbyte[] charTypeTable, int configurationFlags, CharArraySet protWords)
+ public Lucene47WordDelimiterFilter(TokenStream @in, byte[] charTypeTable, int configurationFlags, CharArraySet protWords)
: base(@in)
{
termAttribute = AddAttribute<ICharTermAttribute>();
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/7cd69ab0/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterFilter.cs
index d002ab2..77f643e 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterFilter.cs
@@ -206,7 +206,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// <param name="charTypeTable"> table containing character types </param>
/// <param name="configurationFlags"> Flags configuring the filter </param>
/// <param name="protWords"> If not null is the set of tokens to protect from being delimited </param>
- public WordDelimiterFilter(LuceneVersion matchVersion, TokenStream @in, sbyte[] charTypeTable, int configurationFlags, CharArraySet protWords)
+ public WordDelimiterFilter(LuceneVersion matchVersion, TokenStream @in, byte[] charTypeTable, int configurationFlags, CharArraySet protWords)
: base(@in)
{
InitializeInstanceFields();
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/7cd69ab0/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterFilterFactory.cs
index b9aaf96..ce4959c 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterFilterFactory.cs
@@ -48,7 +48,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
private readonly string wordFiles;
private readonly string types;
private readonly int flags;
- internal sbyte[] typeTable = null;
+ internal byte[] typeTable = null;
private CharArraySet protectedWords = null;
/// <summary>
@@ -142,9 +142,9 @@ namespace Lucene.Net.Analysis.Miscellaneous
private static Regex typePattern = new Regex("(.*)\\s*=>\\s*(.*)\\s*$", RegexOptions.Compiled);
// parses a list of MappingCharFilter style rules into a custom byte[] type table
- private sbyte[] ParseTypes(IList<string> rules)
+ private byte[] ParseTypes(IList<string> rules)
{
- IDictionary<char, sbyte> typeMap = new SortedDictionary<char, sbyte>();
+ IDictionary<char, byte> typeMap = new SortedDictionary<char, byte>();
foreach (string rule in rules)
{
//Matcher m = typePattern.matcher(rule);
@@ -155,7 +155,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
throw new System.ArgumentException("Invalid Mapping Rule : [" + rule + "]");
}
string lhs = ParseString(m.Groups[1].Value.Trim());
- sbyte rhs = ParseType(m.Groups[2].Value.Trim());
+ byte rhs = ParseType(m.Groups[2].Value.Trim());
if (lhs.Length != 1)
{
throw new System.ArgumentException("Invalid Mapping Rule : [" + rule + "]. Only a single character is allowed.");
@@ -168,7 +168,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
}
// ensure the table is always at least as big as DEFAULT_WORD_DELIM_TABLE for performance
- sbyte[] types = new sbyte[Math.Max(typeMap.Keys.LastOrDefault() + 1, WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE.Length)];
+ byte[] types = new byte[Math.Max(typeMap.Keys.LastOrDefault() + 1, WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE.Length)];
for (int i = 0; i < types.Length; i++)
{
types[i] = WordDelimiterIterator.GetType(i);
@@ -180,7 +180,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
return types;
}
- private sbyte ParseType(string s)
+ private byte ParseType(string s)
{
if (s.Equals("LOWER"))
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/7cd69ab0/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterIterator.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterIterator.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterIterator.cs
index 46c43a1..3fe61b6 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterIterator.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterIterator.cs
@@ -30,7 +30,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// Indicates the end of iteration </summary>
public const int DONE = -1;
- public static readonly sbyte[] DEFAULT_WORD_DELIM_TABLE;
+ public static readonly byte[] DEFAULT_WORD_DELIM_TABLE;
internal char[] text;
internal int length;
@@ -71,7 +71,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// </summary>
internal readonly bool stemEnglishPossessive;
- private readonly sbyte[] charTypeTable;
+ private readonly byte[] charTypeTable;
/// <summary>
/// if true, need to skip over a possessive found in the last call to next() </summary>
@@ -81,21 +81,21 @@ namespace Lucene.Net.Analysis.Miscellaneous
// done if separated by these chars?) "," would be an obvious candidate...
static WordDelimiterIterator()
{
- var tab = new sbyte[256];
+ var tab = new byte[256];
for (int i = 0; i < 256; i++)
{
- sbyte code = 0;
+ byte code = 0;
if (char.IsLower((char)i))
{
- code |= (sbyte)WordDelimiterFilter.LOWER;
+ code |= (byte)WordDelimiterFilter.LOWER;
}
else if (char.IsUpper((char)i))
{
- code |= (sbyte)WordDelimiterFilter.UPPER;
+ code |= (byte)WordDelimiterFilter.UPPER;
}
else if (char.IsDigit((char)i))
{
- code |= (sbyte)WordDelimiterFilter.DIGIT;
+ code |= (byte)WordDelimiterFilter.DIGIT;
}
if (code == 0)
{
@@ -113,7 +113,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// <param name="splitOnCaseChange"> if true, causes "PowerShot" to be two tokens; ("Power-Shot" remains two parts regards) </param>
/// <param name="splitOnNumerics"> if true, causes "j2se" to be three tokens; "j" "2" "se" </param>
/// <param name="stemEnglishPossessive"> if true, causes trailing "'s" to be removed for each subword: "O'Neil's" => "O", "Neil" </param>
- internal WordDelimiterIterator(sbyte[] charTypeTable, bool splitOnCaseChange, bool splitOnNumerics, bool stemEnglishPossessive)
+ internal WordDelimiterIterator(byte[] charTypeTable, bool splitOnCaseChange, bool splitOnNumerics, bool stemEnglishPossessive)
{
this.charTypeTable = charTypeTable;
this.splitOnCaseChange = splitOnCaseChange;
@@ -315,7 +315,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// </summary>
/// <param name="ch"> Character whose type is to be determined </param>
/// <returns> Type of the character </returns>
- public static sbyte GetType(int ch)
+ public static byte GetType(int ch)
{
switch (CharUnicodeInfo.GetUnicodeCategory((char)ch))
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/7cd69ab0/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestBugInSomething.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestBugInSomething.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestBugInSomething.cs
index e552384..1a7f7e9 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestBugInSomething.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestBugInSomething.cs
@@ -333,7 +333,7 @@ namespace Lucene.Net.Analysis.Core
public virtual void TestCuriousWikipediaString()
{
CharArraySet protWords = new CharArraySet(TEST_VERSION_CURRENT, new HashSet<string>(Arrays.AsList("rrdpafa", "pupmmlu", "xlq", "dyy", "zqrxrrck", "o", "hsrlfvcha")), false);
- sbyte[] table = new sbyte[] { -57, 26, 1, 48, 63, -23, 55, -84, 18, 120, -97, 103, 58, 13, 84, 89, 57, -13, -63, 5, 28, 97, -54, -94, 102, -108, -5, 5, 46, 40, 43, 78, 43, -72, 36, 29, 124, -106, -22, -51, 65, 5, 31, -42, 6, -99, 97, 14, 81, -128, 74, 100, 54, -55, -25, 53, -71, -98, 44, 33, 86, 106, -42, 47, 115, -89, -18, -26, 22, -95, -43, 83, -125, 105, -104, -24, 106, -16, 126, 115, -105, 97, 65, -33, 57, 44, -1, 123, -68, 100, 13, -41, -64, -119, 0, 92, 94, -36, 53, -9, -102, -18, 90, 94, -26, 31, 71, -20 };
+ byte[] table = (byte[])(Array)new sbyte[] { -57, 26, 1, 48, 63, -23, 55, -84, 18, 120, -97, 103, 58, 13, 84, 89, 57, -13, -63, 5, 28, 97, -54, -94, 102, -108, -5, 5, 46, 40, 43, 78, 43, -72, 36, 29, 124, -106, -22, -51, 65, 5, 31, -42, 6, -99, 97, 14, 81, -128, 74, 100, 54, -55, -25, 53, -71, -98, 44, 33, 86, 106, -42, 47, 115, -89, -18, -26, 22, -95, -43, 83, -125, 105, -104, -24, 106, -16, 126, 115, -105, 97, 65, -33, 57, 44, -1, 123, -68, 100, 13, -41, -64, -119, 0, 92, 94, -36, 53, -9, -102, -18, 90, 94, -26, 31, 71, -20 };
Analyzer a = new AnalyzerAnonymousInnerClassHelper2(this, protWords, table);
CheckAnalysisConsistency(Random(), a, false, "B\u28c3\ue0f8[ \ud800\udfc2 </p> jb");
}
@@ -343,9 +343,9 @@ namespace Lucene.Net.Analysis.Core
private readonly TestBugInSomething outerInstance;
private CharArraySet protWords;
- private sbyte[] table;
+ private byte[] table;
- public AnalyzerAnonymousInnerClassHelper2(TestBugInSomething outerInstance, CharArraySet protWords, sbyte[] table)
+ public AnalyzerAnonymousInnerClassHelper2(TestBugInSomething outerInstance, CharArraySet protWords, byte[] table)
{
this.outerInstance = outerInstance;
this.protWords = protWords;
[13/14] lucenenet git commit: IcuBreakIterator: Added a setting to
enable the hacks on demand. They are not required for Analysis.Common,
so they are disabled by default. They are only used for Highlighter.
Posted by ni...@apache.org.
IcuBreakIterator: Added a setting to enable the hacks on demand. They are not required for Analysis.Common, so they are disabled by default. They are only used for Highlighter.
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/fc7b5b52
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/fc7b5b52
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/fc7b5b52
Branch: refs/heads/api-work
Commit: fc7b5b52dd64877d5d63498b3d2df4e54c569bd8
Parents: 506f55a
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Thu Feb 2 18:02:48 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Thu Feb 2 18:30:37 2017 +0700
----------------------------------------------------------------------
src/IcuBreakIterator.cs | 23 +++++++++++++++-----
.../PostingsHighlight/PostingsHighlighter.cs | 5 ++++-
2 files changed, 21 insertions(+), 7 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/fc7b5b52/src/IcuBreakIterator.cs
----------------------------------------------------------------------
diff --git a/src/IcuBreakIterator.cs b/src/IcuBreakIterator.cs
index 0bf6007..6028ba1 100644
--- a/src/IcuBreakIterator.cs
+++ b/src/IcuBreakIterator.cs
@@ -50,6 +50,8 @@ namespace Lucene.Net
/// </summary>
protected int m_end;
+ private bool enableHacks = false;
+
public IcuBreakIterator(Icu.BreakIterator.UBreakIteratorType type)
: this(type, CultureInfo.CurrentCulture)
{
@@ -63,6 +65,13 @@ namespace Lucene.Net
this.type = type;
}
+
+ public virtual bool EnableHacks
+ {
+ get { return enableHacks; }
+ set { enableHacks = value; }
+ }
+
/// <summary>
/// Sets the current iteration position to the beginning of the text.
/// </summary>
@@ -280,20 +289,22 @@ namespace Lucene.Net
private void LoadBoundaries(int start, int end)
{
- //boundaries = new List<int>();
-
IEnumerable<Icu.Boundary> icuBoundaries;
string offsetText = text.Substring(start, end - start);
-
if (type == Icu.BreakIterator.UBreakIteratorType.WORD)
{
- // LUCENENET TODO: HACK - replacing hyphen with "a" so hyphenated words aren't broken
- icuBoundaries = Icu.BreakIterator.GetWordBoundaries(locale, offsetText.Replace("-", "a"), true);
+ if (enableHacks)
+ {
+ // LUCENENET TODO: HACK - replacing hyphen with "a" so hyphenated words aren't broken
+ offsetText = offsetText.Replace("-", "a");
+ }
+
+ icuBoundaries = Icu.BreakIterator.GetWordBoundaries(locale, offsetText, true);
}
else
{
- if (type == Icu.BreakIterator.UBreakIteratorType.SENTENCE)
+ if (enableHacks && type == Icu.BreakIterator.UBreakIteratorType.SENTENCE)
{
// LUCENENET TODO: HACK - newline character causes incorrect sentence breaking.
offsetText = offsetText.Replace("\n", " ");
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/fc7b5b52/src/Lucene.Net.Highlighter/PostingsHighlight/PostingsHighlighter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Highlighter/PostingsHighlight/PostingsHighlighter.cs b/src/Lucene.Net.Highlighter/PostingsHighlight/PostingsHighlighter.cs
index 63c48bc..db04ee1 100644
--- a/src/Lucene.Net.Highlighter/PostingsHighlight/PostingsHighlighter.cs
+++ b/src/Lucene.Net.Highlighter/PostingsHighlight/PostingsHighlighter.cs
@@ -131,7 +131,10 @@ namespace Lucene.Net.Search.PostingsHighlight
/// </summary>
protected virtual BreakIterator GetBreakIterator(string field)
{
- return new IcuBreakIterator(Icu.BreakIterator.UBreakIteratorType.SENTENCE, CultureInfo.InvariantCulture);
+ return new IcuBreakIterator(Icu.BreakIterator.UBreakIteratorType.SENTENCE, CultureInfo.InvariantCulture)
+ {
+ EnableHacks = true
+ };
}
/// <summary>
[03/14] lucenenet git commit:
Lucene.Net.Core.Analysis.TokenAttributes: Deleted unused TermAttribute class
(not part of Lucene 4.8.0)
Posted by ni...@apache.org.
Lucene.Net.Core.Analysis.TokenAttributes: Deleted unused TermAttribute class (not part of Lucene 4.8.0)
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/1c87ed5b
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/1c87ed5b
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/1c87ed5b
Branch: refs/heads/api-work
Commit: 1c87ed5b0139f836cbac1cfa4e76463882281e8f
Parents: 21b3d8b
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Thu Feb 2 12:26:58 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Thu Feb 2 18:30:29 2017 +0700
----------------------------------------------------------------------
.../Analysis/Tokenattributes/TermAttribute.cs | 268 -------------------
1 file changed, 268 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1c87ed5b/src/Lucene.Net.Core/Analysis/Tokenattributes/TermAttribute.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Analysis/Tokenattributes/TermAttribute.cs b/src/Lucene.Net.Core/Analysis/Tokenattributes/TermAttribute.cs
deleted file mode 100644
index 3dad641..0000000
--- a/src/Lucene.Net.Core/Analysis/Tokenattributes/TermAttribute.cs
+++ /dev/null
@@ -1,268 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-using System;
-using Lucene.Net.Support;
-using ArrayUtil = Lucene.Net.Util.ArrayUtil;
-using Attribute = Lucene.Net.Util.Attribute;
-
-namespace Lucene.Net.Analysis.Tokenattributes
-{
-
- /// <summary> The term text of a Token.</summary>
- [Serializable]
- public class TermAttribute:Attribute, ITermAttribute, System.ICloneable
- {
- private static int MIN_BUFFER_SIZE = 10;
-
- private char[] termBuffer;
- private int termLength;
-
- /// <summary>Returns the Token's term text.
- ///
- /// This method has a performance penalty
- /// because the text is stored internally in a char[]. If
- /// possible, use <see cref="TermBuffer()" /> and
- /// <see cref="TermLength()" /> directly instead. If you
- /// really need a String, use this method, which is nothing more than
- /// a convenience call to <b>new String(token.termBuffer(), 0, token.termLength())</b>
- /// </summary>
- public virtual string Term
- {
- get
- {
- InitTermBuffer();
- return new System.String(termBuffer, 0, termLength);
- }
- }
-
- /// <summary>Copies the contents of buffer, starting at offset for
- /// length characters, into the termBuffer array.
- /// </summary>
- /// <param name="buffer">the buffer to copy
- /// </param>
- /// <param name="offset">the index in the buffer of the first character to copy
- /// </param>
- /// <param name="length">the number of characters to copy
- /// </param>
- public virtual void SetTermBuffer(char[] buffer, int offset, int length)
- {
- GrowTermBuffer(length);
- Array.Copy(buffer, offset, termBuffer, 0, length);
- termLength = length;
- }
-
- /// <summary>Copies the contents of buffer into the termBuffer array.</summary>
- /// <param name="buffer">the buffer to copy
- /// </param>
- public virtual void SetTermBuffer(System.String buffer)
- {
- int length = buffer.Length;
- GrowTermBuffer(length);
- TextSupport.GetCharsFromString(buffer, 0, length, termBuffer, 0);
- termLength = length;
- }
-
- /// <summary>Copies the contents of buffer, starting at offset and continuing
- /// for length characters, into the termBuffer array.
- /// </summary>
- /// <param name="buffer">the buffer to copy
- /// </param>
- /// <param name="offset">the index in the buffer of the first character to copy
- /// </param>
- /// <param name="length">the number of characters to copy
- /// </param>
- public virtual void SetTermBuffer(System.String buffer, int offset, int length)
- {
- System.Diagnostics.Debug.Assert(offset <= buffer.Length);
- System.Diagnostics.Debug.Assert(offset + length <= buffer.Length);
- GrowTermBuffer(length);
- TextSupport.GetCharsFromString(buffer, offset, offset + length, termBuffer, 0);
- termLength = length;
- }
-
- /// <summary>Returns the internal termBuffer character array which
- /// you can then directly alter. If the array is too
- /// small for your token, use <see cref="ResizeTermBuffer(int)" />
- /// to increase it. After
- /// altering the buffer be sure to call <see cref="SetTermLength" />
- /// to record the number of valid
- /// characters that were placed into the termBuffer.
- /// </summary>
- public virtual char[] TermBuffer()
- {
- InitTermBuffer();
- return termBuffer;
- }
-
- /// <summary>Grows the termBuffer to at least size newSize, preserving the
- /// existing content. Note: If the next operation is to change
- /// the contents of the term buffer use
- /// <see cref="SetTermBuffer(char[], int, int)" />,
- /// <see cref="SetTermBuffer(String)" />, or
- /// <see cref="SetTermBuffer(String, int, int)" />
- /// to optimally combine the resize with the setting of the termBuffer.
- /// </summary>
- /// <param name="newSize">minimum size of the new termBuffer
- /// </param>
- /// <returns> newly created termBuffer with length >= newSize
- /// </returns>
- public virtual char[] ResizeTermBuffer(int newSize)
- {
- if (termBuffer == null)
- {
- // The buffer is always at least MIN_BUFFER_SIZE
- termBuffer = new char[ArrayUtil.GetNextSize(newSize < MIN_BUFFER_SIZE?MIN_BUFFER_SIZE:newSize)];
- }
- else
- {
- if (termBuffer.Length < newSize)
- {
- // Not big enough; create a new array with slight
- // over allocation and preserve content
- char[] newCharBuffer = new char[ArrayUtil.GetNextSize(newSize)];
- Array.Copy(termBuffer, 0, newCharBuffer, 0, termBuffer.Length);
- termBuffer = newCharBuffer;
- }
- }
- return termBuffer;
- }
-
-
- /// <summary>Allocates a buffer char[] of at least newSize, without preserving the existing content.
- /// its always used in places that set the content
- /// </summary>
- /// <param name="newSize">minimum size of the buffer
- /// </param>
- private void GrowTermBuffer(int newSize)
- {
- if (termBuffer == null)
- {
- // The buffer is always at least MIN_BUFFER_SIZE
- termBuffer = new char[ArrayUtil.GetNextSize(newSize < MIN_BUFFER_SIZE?MIN_BUFFER_SIZE:newSize)];
- }
- else
- {
- if (termBuffer.Length < newSize)
- {
- // Not big enough; create a new array with slight
- // over allocation:
- termBuffer = new char[ArrayUtil.GetNextSize(newSize)];
- }
- }
- }
-
- private void InitTermBuffer()
- {
- if (termBuffer == null)
- {
- termBuffer = new char[ArrayUtil.GetNextSize(MIN_BUFFER_SIZE)];
- termLength = 0;
- }
- }
-
- /// <summary>Return number of valid characters (length of the term)
- /// in the termBuffer array.
- /// </summary>
- public virtual int TermLength()
- {
- return termLength;
- }
-
- /// <summary>Set number of valid characters (length of the term) in
- /// the termBuffer array. Use this to truncate the termBuffer
- /// or to synchronize with external manipulation of the termBuffer.
- /// Note: to grow the size of the array,
- /// use <see cref="ResizeTermBuffer(int)" /> first.
- /// </summary>
- /// <param name="length">the truncated length
- /// </param>
- public virtual void SetTermLength(int length)
- {
- InitTermBuffer();
- if (length > termBuffer.Length)
- throw new System.ArgumentException("length " + length + " exceeds the size of the termBuffer (" + termBuffer.Length + ")");
- termLength = length;
- }
-
- public override int GetHashCode()
- {
- InitTermBuffer();
- int code = termLength;
- code = code * 31 + ArrayUtil.HashCode(termBuffer, 0, termLength);
- return code;
- }
-
- public override void Clear()
- {
- termLength = 0;
- }
-
- public override System.Object Clone()
- {
- TermAttribute t = (TermAttribute) base.Clone();
- // Do a deep clone
- if (termBuffer != null)
- {
- t.termBuffer = new char[termBuffer.Length];
- termBuffer.CopyTo(t.termBuffer, 0);
- }
- return t;
- }
-
- public override bool Equals(System.Object other)
- {
- if (other == this)
- {
- return true;
- }
-
- if (other is ITermAttribute)
- {
- InitTermBuffer();
- TermAttribute o = ((TermAttribute) other);
- o.InitTermBuffer();
-
- if (termLength != o.termLength)
- return false;
- for (int i = 0; i < termLength; i++)
- {
- if (termBuffer[i] != o.termBuffer[i])
- {
- return false;
- }
- }
- return true;
- }
-
- return false;
- }
-
- public override System.String ToString()
- {
- InitTermBuffer();
- return "term=" + new System.String(termBuffer, 0, termLength);
- }
-
- public override void CopyTo(Attribute target)
- {
- InitTermBuffer();
- ITermAttribute t = (ITermAttribute) target;
- t.SetTermBuffer(termBuffer, 0, termLength);
- }
- }
-}
\ No newline at end of file
[10/14] lucenenet git commit: Moved IcuBreakIterator to src\
directory and added it as a linked file to both Lucene.Net.Analysis.Common
and Lucene.Net.Highlighter. This seems like a better option than creating a
separate DLL to share this dependency or d
Posted by ni...@apache.org.
Moved IcuBreakIterator to src\ directory and added it as a linked file to both Lucene.Net.Analysis.Common and Lucene.Net.Highlighter. This seems like a better option than creating a separate DLL to share this dependency or duplicating it.
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/5a7cb173
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/5a7cb173
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/5a7cb173
Branch: refs/heads/api-work
Commit: 5a7cb173489a0e22b6ff890f7c283ee50895e42d
Parents: bed1f16
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Thu Feb 2 16:28:04 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Thu Feb 2 18:30:35 2017 +0700
----------------------------------------------------------------------
src/IcuBreakIterator.cs | 368 +++++++++++++++++++
.../Lucene.Net.Analysis.Common.csproj | 3 +
src/Lucene.Net.Analysis.Common/project.json | 3 +
src/Lucene.Net.Highlighter/IcuBreakIterator.cs | 368 -------------------
.../Lucene.Net.Highlighter.csproj | 4 +-
src/Lucene.Net.Highlighter/project.json | 7 +
6 files changed, 384 insertions(+), 369 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/5a7cb173/src/IcuBreakIterator.cs
----------------------------------------------------------------------
diff --git a/src/IcuBreakIterator.cs b/src/IcuBreakIterator.cs
new file mode 100644
index 0000000..0bf6007
--- /dev/null
+++ b/src/IcuBreakIterator.cs
@@ -0,0 +1,368 @@
+\ufeffusing Lucene.Net.Support;
+using System;
+using System.Collections.Generic;
+using System.Globalization;
+using System.Linq;
+using System.Text;
+
+namespace Lucene.Net
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// A <see cref="BreakIterator"/> implementation that encapsulates the functionality
+ /// of icu.net's <see cref="Icu.BreakIterator"/> static class. A <see cref="BreakIterator"/>
+ /// provides methods to move forward, reverse, and randomly through a set of text breaks
+ /// defined by the <see cref="Icu.BreakIterator.UBreakIteratorType"/> enumeration.
+ /// </summary>
+ // LUCENENET specific type
+ internal class IcuBreakIterator : BreakIterator
+ {
+ private readonly Icu.Locale locale;
+ private readonly Icu.BreakIterator.UBreakIteratorType type;
+
+ private List<int> boundaries = new List<int>();
+ private int currentBoundaryIndex; // Index (not the value) of the current boundary in boundaries
+ private string text;
+
+ /// <summary>
+ /// The start offset for the string, if supplied by a <see cref="CharacterIterator"/>
+ /// </summary>
+ protected int m_start;
+
+ /// <summary>
+ /// The end offset for the string, if supplied by a <see cref="CharacterIterator"/>
+ /// </summary>
+ protected int m_end;
+
+ public IcuBreakIterator(Icu.BreakIterator.UBreakIteratorType type)
+ : this(type, CultureInfo.CurrentCulture)
+ {
+ }
+
+ public IcuBreakIterator(Icu.BreakIterator.UBreakIteratorType type, CultureInfo locale)
+ {
+ if (locale == null)
+ throw new ArgumentNullException("locale");
+ this.locale = new Icu.Locale(locale.Name);
+ this.type = type;
+ }
+
+ /// <summary>
+ /// Sets the current iteration position to the beginning of the text.
+ /// </summary>
+ /// <returns>The offset of the beginning of the text.</returns>
+ public override int First()
+ {
+ currentBoundaryIndex = 0;
+ return ReturnCurrent();
+ }
+
+ /// <summary>
+ /// Sets the current iteration position to the end of the text.
+ /// </summary>
+ /// <returns>The text's past-the-end offset.</returns>
+ public override int Last()
+ {
+ currentBoundaryIndex = boundaries.Count - 1;
+ return ReturnCurrent();
+ }
+
+ /// <summary>
+ /// Advances the iterator either forward or backward the specified number of steps.
+ /// Negative values move backward, and positive values move forward. This is
+ /// equivalent to repeatedly calling <see cref="Next()"/> or <see cref="Previous()"/>.
+ /// </summary>
+ /// <param name="n">The number of steps to move. The sign indicates the direction
+ /// (negative is backwards, and positive is forwards).</param>
+ /// <returns>The character offset of the boundary position n boundaries away from
+ /// the current one.</returns>
+ public override int Next(int n)
+ {
+ int result = Current;
+ while (n > 0)
+ {
+ result = Next();
+ --n;
+ }
+ while (n < 0)
+ {
+ result = Previous();
+ ++n;
+ }
+ return result;
+ }
+
+ /// <summary>
+ /// Advances the iterator to the next boundary position.
+ /// </summary>
+ /// <returns>The position of the first boundary after this one.</returns>
+ public override int Next()
+ {
+ if (currentBoundaryIndex >= boundaries.Count - 1 || boundaries.Count == 0)
+ {
+ return DONE;
+ }
+ currentBoundaryIndex++;
+ return ReturnCurrent();
+ }
+
+ /// <summary>
+ /// Advances the iterator backwards, to the last boundary preceding this one.
+ /// </summary>
+ /// <returns>The position of the last boundary position preceding this one.</returns>
+ public override int Previous()
+ {
+ if (currentBoundaryIndex == 0 || boundaries.Count == 0)
+ {
+ return DONE;
+ }
+ currentBoundaryIndex--;
+ return ReturnCurrent();
+ }
+
+ /// <summary>
+ /// Throw <see cref="ArgumentException"/> unless begin <= offset < end.
+ /// </summary>
+ /// <param name="offset"></param>
+ private void CheckOffset(int offset)
+ {
+ if (offset < m_start || offset > m_end)
+ {
+ throw new ArgumentException("offset out of bounds");
+ }
+ }
+
+ /// <summary>
+ /// Sets the iterator to refer to the first boundary position following
+ /// the specified position.
+ /// </summary>
+ /// <param name="offset">The position from which to begin searching for a break position.</param>
+ /// <returns>The position of the first break after the current position.</returns>
+ public override int Following(int offset)
+ {
+ CheckOffset(offset);
+
+ if (boundaries.Count == 0)
+ {
+ return DONE;
+ }
+
+ int following = GetLowestIndexGreaterThan(offset);
+ if (following == -1)
+ {
+ currentBoundaryIndex = boundaries.Count - 1;
+ return DONE;
+ }
+ else
+ {
+ currentBoundaryIndex = following;
+ }
+ return ReturnCurrent();
+ }
+
+ private int GetLowestIndexGreaterThan(int offset)
+ {
+ int index = boundaries.BinarySearch(offset);
+ if (index < 0)
+ {
+ return ~index;
+ }
+ else if (index + 1 < boundaries.Count)
+ {
+ return index + 1;
+ }
+
+ return -1;
+ }
+
+ /// <summary>
+ /// Sets the iterator to refer to the last boundary position before the
+ /// specified position.
+ /// </summary>
+ /// <param name="offset">The position to begin searching for a break from.</param>
+ /// <returns>The position of the last boundary before the starting position.</returns>
+ public override int Preceding(int offset)
+ {
+ CheckOffset(offset);
+
+ if (boundaries.Count == 0)
+ {
+ return DONE;
+ }
+
+ int preceeding = GetHighestIndexLessThan(offset);
+ if (preceeding == -1)
+ {
+ currentBoundaryIndex = 0;
+ return DONE;
+ }
+ else
+ {
+ currentBoundaryIndex = preceeding;
+ }
+ return ReturnCurrent();
+ }
+
+ private int GetHighestIndexLessThan(int offset)
+ {
+ int index = boundaries.BinarySearch(offset);
+ if (index < 0)
+ {
+ return ~index - 1;
+ }
+ else
+ {
+ // NOTE: This is intentionally allowed to return -1 in the case
+ // where index == 0. This state indicates we are before the first boundary.
+ return index - 1;
+ }
+ }
+
+ /// <summary>
+ /// Returns the current iteration position.
+ /// </summary>
+ public override int Current
+ {
+ get { return ReturnCurrent(); }
+ }
+
+ /// <summary>
+ /// Gets the text being analyzed.
+ /// </summary>
+ public override string Text
+ {
+ get
+ {
+ return text;
+ }
+ }
+
+ /// <summary>
+ /// Set the iterator to analyze a new piece of text. This function resets
+ /// the current iteration position to the beginning of the text.
+ /// </summary>
+ /// <param name="newText">The text to analyze.</param>
+ public override void SetText(string newText)
+ {
+ text = newText;
+ currentBoundaryIndex = 0;
+ m_start = 0;
+ m_end = newText.Length;
+
+ LoadBoundaries(m_start, m_end);
+ }
+
+ public override void SetText(CharacterIterator newText)
+ {
+ text = newText.GetTextAsString();
+ currentBoundaryIndex = 0;
+ m_start = newText.BeginIndex;
+ m_end = newText.EndIndex;
+
+ LoadBoundaries(m_start, m_end);
+ }
+
+ private void LoadBoundaries(int start, int end)
+ {
+ //boundaries = new List<int>();
+
+ IEnumerable<Icu.Boundary> icuBoundaries;
+ string offsetText = text.Substring(start, end - start);
+
+
+ if (type == Icu.BreakIterator.UBreakIteratorType.WORD)
+ {
+ // LUCENENET TODO: HACK - replacing hyphen with "a" so hyphenated words aren't broken
+ icuBoundaries = Icu.BreakIterator.GetWordBoundaries(locale, offsetText.Replace("-", "a"), true);
+ }
+ else
+ {
+ if (type == Icu.BreakIterator.UBreakIteratorType.SENTENCE)
+ {
+ // LUCENENET TODO: HACK - newline character causes incorrect sentence breaking.
+ offsetText = offsetText.Replace("\n", " ");
+ // LUCENENET TODO: HACK - the ICU sentence logic doesn't work (in English anyway) when sentences don't
+ // begin with capital letters.
+ offsetText = CapitalizeFirst(offsetText);
+ }
+
+ icuBoundaries = Icu.BreakIterator.GetBoundaries(type, locale, offsetText);
+ }
+
+ boundaries = icuBoundaries
+ .Select(t => new[] { t.Start + start, t.End + start })
+ .SelectMany(b => b)
+ .Distinct()
+ .ToList();
+ }
+
+ /// <summary>
+ /// Returns true if the specified character offset is a text boundary.
+ /// </summary>
+ /// <param name="offset">the character offset to check.</param>
+ /// <returns><c>true</c> if "offset" is a boundary position, <c>false</c> otherwise.</returns>
+ public override bool IsBoundary(int offset)
+ {
+ CheckOffset(offset);
+ return boundaries.Contains(offset);
+ }
+
+ private int ReturnCurrent()
+ {
+ if (boundaries.Count > 0)
+ {
+ return currentBoundaryIndex < boundaries.Count && currentBoundaryIndex > -1
+ ? boundaries[currentBoundaryIndex]
+ : DONE;
+ }
+
+ // If there are no boundaries, we must return the start offset
+ return m_start;
+ }
+
+ /// <summary>
+ /// LUCENENET TODO: This is a temporary workaround for an issue with icu-dotnet
+ /// where it doesn't correctly break sentences unless they begin with a capital letter.
+ /// If/when ICU is fixed, this method should be deleted and the IcuBreakIterator
+ /// code changed to remove calls to this method.
+ /// </summary>
+ public static string CapitalizeFirst(string s)
+ {
+ bool isNewSentence = true;
+ var result = new StringBuilder(s.Length);
+ for (int i = 0; i < s.Length; i++)
+ {
+ if (isNewSentence && char.IsLetter(s[i]))
+ {
+ result.Append(char.ToUpper(s[i]));
+ isNewSentence = false;
+ }
+ else
+ result.Append(s[i]);
+
+ if (s[i] == '!' || s[i] == '?' || s[i] == '.')
+ {
+ isNewSentence = true;
+ }
+ }
+
+ return result.ToString();
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/5a7cb173/src/Lucene.Net.Analysis.Common/Lucene.Net.Analysis.Common.csproj
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Lucene.Net.Analysis.Common.csproj b/src/Lucene.Net.Analysis.Common/Lucene.Net.Analysis.Common.csproj
index 4e4a0e9..610125f 100644
--- a/src/Lucene.Net.Analysis.Common/Lucene.Net.Analysis.Common.csproj
+++ b/src/Lucene.Net.Analysis.Common/Lucene.Net.Analysis.Common.csproj
@@ -42,6 +42,9 @@
<Reference Include="System.Xml.Linq" />
</ItemGroup>
<ItemGroup>
+ <Compile Include="..\IcuBreakIterator.cs">
+ <Link>IcuBreakIterator.cs</Link>
+ </Compile>
<Compile Include="Analysis\Bg\BulgarianAnalyzer.cs" />
<Compile Include="Analysis\Bg\BulgarianStemFilter.cs" />
<Compile Include="Analysis\Bg\BulgarianStemFilterFactory.cs" />
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/5a7cb173/src/Lucene.Net.Analysis.Common/project.json
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/project.json b/src/Lucene.Net.Analysis.Common/project.json
index 9b9b2cf..02d5a79 100644
--- a/src/Lucene.Net.Analysis.Common/project.json
+++ b/src/Lucene.Net.Analysis.Common/project.json
@@ -16,6 +16,9 @@
]
},
"compile": {
+ "includeFiles": [
+ "../IcuBreakIterator.cs"
+ ],
"exclude": [
]
}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/5a7cb173/src/Lucene.Net.Highlighter/IcuBreakIterator.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Highlighter/IcuBreakIterator.cs b/src/Lucene.Net.Highlighter/IcuBreakIterator.cs
deleted file mode 100644
index 72e89b0..0000000
--- a/src/Lucene.Net.Highlighter/IcuBreakIterator.cs
+++ /dev/null
@@ -1,368 +0,0 @@
-\ufeffusing Lucene.Net.Support;
-using System;
-using System.Collections.Generic;
-using System.Globalization;
-using System.Linq;
-using System.Text;
-
-namespace Lucene.Net.Search
-{
- /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
- /// <summary>
- /// A <see cref="BreakIterator"/> implementation that encapsulates the functionality
- /// of icu.net's <see cref="Icu.BreakIterator"/> static class. A <see cref="BreakIterator"/>
- /// provides methods to move forward, reverse, and randomly through a set of text breaks
- /// defined by the <see cref="Icu.BreakIterator.UBreakIteratorType"/> enumeration.
- /// </summary>
- // LUCENENET specific type
- internal class IcuBreakIterator : BreakIterator
- {
- private readonly Icu.Locale locale;
- private readonly Icu.BreakIterator.UBreakIteratorType type;
-
- private List<int> boundaries = new List<int>();
- private int currentBoundaryIndex; // Index (not the value) of the current boundary in boundaries
- private string text;
-
- /// <summary>
- /// The start offset for the string, if supplied by a <see cref="CharacterIterator"/>
- /// </summary>
- protected int m_start;
-
- /// <summary>
- /// The end offset for the string, if supplied by a <see cref="CharacterIterator"/>
- /// </summary>
- protected int m_end;
-
- public IcuBreakIterator(Icu.BreakIterator.UBreakIteratorType type)
- : this(type, CultureInfo.CurrentCulture)
- {
- }
-
- public IcuBreakIterator(Icu.BreakIterator.UBreakIteratorType type, CultureInfo locale)
- {
- if (locale == null)
- throw new ArgumentNullException("locale");
- this.locale = new Icu.Locale(locale.Name);
- this.type = type;
- }
-
- /// <summary>
- /// Sets the current iteration position to the beginning of the text.
- /// </summary>
- /// <returns>The offset of the beginning of the text.</returns>
- public override int First()
- {
- currentBoundaryIndex = 0;
- return ReturnCurrent();
- }
-
- /// <summary>
- /// Sets the current iteration position to the end of the text.
- /// </summary>
- /// <returns>The text's past-the-end offset.</returns>
- public override int Last()
- {
- currentBoundaryIndex = boundaries.Count - 1;
- return ReturnCurrent();
- }
-
- /// <summary>
- /// Advances the iterator either forward or backward the specified number of steps.
- /// Negative values move backward, and positive values move forward. This is
- /// equivalent to repeatedly calling <see cref="Next()"/> or <see cref="Previous()"/>.
- /// </summary>
- /// <param name="n">The number of steps to move. The sign indicates the direction
- /// (negative is backwards, and positive is forwards).</param>
- /// <returns>The character offset of the boundary position n boundaries away from
- /// the current one.</returns>
- public override int Next(int n)
- {
- int result = Current;
- while (n > 0)
- {
- result = Next();
- --n;
- }
- while (n < 0)
- {
- result = Previous();
- ++n;
- }
- return result;
- }
-
- /// <summary>
- /// Advances the iterator to the next boundary position.
- /// </summary>
- /// <returns>The position of the first boundary after this one.</returns>
- public override int Next()
- {
- if (currentBoundaryIndex >= boundaries.Count - 1 || boundaries.Count == 0)
- {
- return DONE;
- }
- currentBoundaryIndex++;
- return ReturnCurrent();
- }
-
- /// <summary>
- /// Advances the iterator backwards, to the last boundary preceding this one.
- /// </summary>
- /// <returns>The position of the last boundary position preceding this one.</returns>
- public override int Previous()
- {
- if (currentBoundaryIndex == 0 || boundaries.Count == 0)
- {
- return DONE;
- }
- currentBoundaryIndex--;
- return ReturnCurrent();
- }
-
- /// <summary>
- /// Throw <see cref="ArgumentException"/> unless begin <= offset < end.
- /// </summary>
- /// <param name="offset"></param>
- private void CheckOffset(int offset)
- {
- if (offset < m_start || offset > m_end)
- {
- throw new ArgumentException("offset out of bounds");
- }
- }
-
- /// <summary>
- /// Sets the iterator to refer to the first boundary position following
- /// the specified position.
- /// </summary>
- /// <param name="offset">The position from which to begin searching for a break position.</param>
- /// <returns>The position of the first break after the current position.</returns>
- public override int Following(int offset)
- {
- CheckOffset(offset);
-
- if (boundaries.Count == 0)
- {
- return DONE;
- }
-
- int following = GetLowestIndexGreaterThan(offset);
- if (following == -1)
- {
- currentBoundaryIndex = boundaries.Count - 1;
- return DONE;
- }
- else
- {
- currentBoundaryIndex = following;
- }
- return ReturnCurrent();
- }
-
- private int GetLowestIndexGreaterThan(int offset)
- {
- int index = boundaries.BinarySearch(offset);
- if (index < 0)
- {
- return ~index;
- }
- else if (index + 1 < boundaries.Count)
- {
- return index + 1;
- }
-
- return -1;
- }
-
- /// <summary>
- /// Sets the iterator to refer to the last boundary position before the
- /// specified position.
- /// </summary>
- /// <param name="offset">The position to begin searching for a break from.</param>
- /// <returns>The position of the last boundary before the starting position.</returns>
- public override int Preceding(int offset)
- {
- CheckOffset(offset);
-
- if (boundaries.Count == 0)
- {
- return DONE;
- }
-
- int preceeding = GetHighestIndexLessThan(offset);
- if (preceeding == -1)
- {
- currentBoundaryIndex = 0;
- return DONE;
- }
- else
- {
- currentBoundaryIndex = preceeding;
- }
- return ReturnCurrent();
- }
-
- private int GetHighestIndexLessThan(int offset)
- {
- int index = boundaries.BinarySearch(offset);
- if (index < 0)
- {
- return ~index - 1;
- }
- else
- {
- // NOTE: This is intentionally allowed to return -1 in the case
- // where index == 0. This state indicates we are before the first boundary.
- return index - 1;
- }
- }
-
- /// <summary>
- /// Returns the current iteration position.
- /// </summary>
- public override int Current
- {
- get { return ReturnCurrent(); }
- }
-
- /// <summary>
- /// Gets the text being analyzed.
- /// </summary>
- public override string Text
- {
- get
- {
- return text;
- }
- }
-
- /// <summary>
- /// Set the iterator to analyze a new piece of text. This function resets
- /// the current iteration position to the beginning of the text.
- /// </summary>
- /// <param name="newText">The text to analyze.</param>
- public override void SetText(string newText)
- {
- text = newText;
- currentBoundaryIndex = 0;
- m_start = 0;
- m_end = newText.Length;
-
- LoadBoundaries(m_start, m_end);
- }
-
- public override void SetText(CharacterIterator newText)
- {
- text = newText.GetTextAsString();
- currentBoundaryIndex = 0;
- m_start = newText.BeginIndex;
- m_end = newText.EndIndex;
-
- LoadBoundaries(m_start, m_end);
- }
-
- private void LoadBoundaries(int start, int end)
- {
- //boundaries = new List<int>();
-
- IEnumerable<Icu.Boundary> icuBoundaries;
- string offsetText = text.Substring(start, end - start);
-
-
- if (type == Icu.BreakIterator.UBreakIteratorType.WORD)
- {
- // LUCENENET TODO: HACK - replacing hyphen with "a" so hyphenated words aren't broken
- icuBoundaries = Icu.BreakIterator.GetWordBoundaries(locale, offsetText.Replace("-", "a"), true);
- }
- else
- {
- if (type == Icu.BreakIterator.UBreakIteratorType.SENTENCE)
- {
- // LUCENENET TODO: HACK - newline character causes incorrect sentence breaking.
- offsetText = offsetText.Replace("\n", " ");
- // LUCENENET TODO: HACK - the ICU sentence logic doesn't work (in English anyway) when sentences don't
- // begin with capital letters.
- offsetText = CapitalizeFirst(offsetText);
- }
-
- icuBoundaries = Icu.BreakIterator.GetBoundaries(type, locale, offsetText);
- }
-
- boundaries = icuBoundaries
- .Select(t => new[] { t.Start + start, t.End + start })
- .SelectMany(b => b)
- .Distinct()
- .ToList();
- }
-
- /// <summary>
- /// Returns true if the specified character offset is a text boundary.
- /// </summary>
- /// <param name="offset">the character offset to check.</param>
- /// <returns><c>true</c> if "offset" is a boundary position, <c>false</c> otherwise.</returns>
- public override bool IsBoundary(int offset)
- {
- CheckOffset(offset);
- return boundaries.Contains(offset);
- }
-
- private int ReturnCurrent()
- {
- if (boundaries.Count > 0)
- {
- return currentBoundaryIndex < boundaries.Count && currentBoundaryIndex > -1
- ? boundaries[currentBoundaryIndex]
- : DONE;
- }
-
- // If there are no boundaries, we must return the start offset
- return m_start;
- }
-
- /// <summary>
- /// LUCENENET TODO: This is a temporary workaround for an issue with icu-dotnet
- /// where it doesn't correctly break sentences unless they begin with a capital letter.
- /// If/when ICU is fixed, this method should be deleted and the IcuBreakIterator
- /// code changed to remove calls to this method.
- /// </summary>
- public static string CapitalizeFirst(string s)
- {
- bool isNewSentence = true;
- var result = new StringBuilder(s.Length);
- for (int i = 0; i < s.Length; i++)
- {
- if (isNewSentence && char.IsLetter(s[i]))
- {
- result.Append(char.ToUpper(s[i]));
- isNewSentence = false;
- }
- else
- result.Append(s[i]);
-
- if (s[i] == '!' || s[i] == '?' || s[i] == '.')
- {
- isNewSentence = true;
- }
- }
-
- return result.ToString();
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/5a7cb173/src/Lucene.Net.Highlighter/Lucene.Net.Highlighter.csproj
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Highlighter/Lucene.Net.Highlighter.csproj b/src/Lucene.Net.Highlighter/Lucene.Net.Highlighter.csproj
index fedffc8..94eb504 100644
--- a/src/Lucene.Net.Highlighter/Lucene.Net.Highlighter.csproj
+++ b/src/Lucene.Net.Highlighter/Lucene.Net.Highlighter.csproj
@@ -41,6 +41,9 @@
<Reference Include="System.Xml" />
</ItemGroup>
<ItemGroup>
+ <Compile Include="..\IcuBreakIterator.cs">
+ <Link>IcuBreakIterator.cs</Link>
+ </Compile>
<Compile Include="Highlight\DefaultEncoder.cs" />
<Compile Include="Highlight\GradientFormatter.cs" />
<Compile Include="Highlight\Highlighter.cs" />
@@ -76,7 +79,6 @@
<Compile Include="PostingsHighlight\WholeBreakIterator.cs" />
<Compile Include="Properties\AssemblyInfo.cs" />
<Compile Include="RectangularArrays.cs" />
- <Compile Include="IcuBreakIterator.cs" />
<Compile Include="VectorHighlight\BaseFragListBuilder.cs" />
<Compile Include="VectorHighlight\BaseFragmentsBuilder.cs" />
<Compile Include="VectorHighlight\BoundaryScanner.cs" />
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/5a7cb173/src/Lucene.Net.Highlighter/project.json
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Highlighter/project.json b/src/Lucene.Net.Highlighter/project.json
index 4200936..7b59d99 100644
--- a/src/Lucene.Net.Highlighter/project.json
+++ b/src/Lucene.Net.Highlighter/project.json
@@ -6,6 +6,13 @@
"Lucene.Net.Analysis.Common": "4.8.0-alpha",
"Lucene.Net.Queries": "4.8.0-alpha"
},
+ "buildOptions": {
+ "compile": {
+ "includeFiles": [
+ "../IcuBreakIterator.cs"
+ ]
+ }
+ },
"frameworks": {
"netstandard1.5": {
"imports": "dnxcore50",
[09/14] lucenenet git commit:
Lucene.Net.Tests.Analysis.Common.Miscellaneous: Added missing
TestKeywordRepeatFilter tests
Posted by ni...@apache.org.
Lucene.Net.Tests.Analysis.Common.Miscellaneous: Added missing TestKeywordRepeatFilter tests
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/5d556167
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/5d556167
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/5d556167
Branch: refs/heads/api-work
Commit: 5d556167073782c27cd50d857759b0a5b9d0fa3e
Parents: ddb054a
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Thu Feb 2 15:48:28 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Thu Feb 2 18:30:34 2017 +0700
----------------------------------------------------------------------
.../Miscellaneous/TestKeywordRepeatFilter.cs | 46 +++++++++-----------
.../Lucene.Net.Tests.Analysis.Common.csproj | 1 +
.../project.json | 3 +-
3 files changed, 23 insertions(+), 27 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/5d556167/src/Lucene.Net.Tests.Analysis.Common/Analysis/Miscellaneous/TestKeywordRepeatFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Miscellaneous/TestKeywordRepeatFilter.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Miscellaneous/TestKeywordRepeatFilter.cs
index e9674ba..ef977fd 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Miscellaneous/TestKeywordRepeatFilter.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Miscellaneous/TestKeywordRepeatFilter.cs
@@ -1,7 +1,9 @@
-\ufeffnamespace org.apache.lucene.analysis.miscellaneous
-{
+\ufeffusing NUnit.Framework;
+using System.IO;
- /*
+namespace Lucene.Net.Analysis.Miscellaneous
+{
+ /*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -18,31 +20,25 @@
* limitations under the License.
*/
- using SnowballFilter = org.apache.lucene.analysis.snowball.SnowballFilter;
-
-
- public class TestKeywordRepeatFilter : BaseTokenStreamTestCase
- {
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: public void testBasic() throws java.io.IOException
- public virtual void testBasic()
- {
- TokenStream ts = new RemoveDuplicatesTokenFilter(new SnowballFilter(new KeywordRepeatFilter(new MockTokenizer(new StringReader("the birds are flying"), MockTokenizer.WHITESPACE, false)), "English"));
- assertTokenStreamContents(ts, new string[] {"the", "birds", "bird", "are", "flying", "fli"}, new int[] {1,1,0,1,1,0});
- }
-
+ using SnowballFilter = Lucene.Net.Analysis.Snowball.SnowballFilter;
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: public void testComposition() throws java.io.IOException
- public virtual void testComposition()
- {
- TokenStream ts = new RemoveDuplicatesTokenFilter(new SnowballFilter(new KeywordRepeatFilter(new KeywordRepeatFilter(new MockTokenizer(new StringReader("the birds are flying"), MockTokenizer.WHITESPACE, false))), "English"));
- assertTokenStreamContents(ts, new string[] {"the", "birds", "bird", "are", "flying", "fli"}, new int[] {1,1,0,1,1,0});
- }
+ public class TestKeywordRepeatFilter : BaseTokenStreamTestCase
+ {
+ [Test]
+ public virtual void TestBasic()
+ {
+ TokenStream ts = new RemoveDuplicatesTokenFilter(new SnowballFilter(new KeywordRepeatFilter(new MockTokenizer(new StringReader("the birds are flying"), MockTokenizer.WHITESPACE, false)), "English"));
+ AssertTokenStreamContents(ts, new string[] { "the", "birds", "bird", "are", "flying", "fli" }, new int[] { 1, 1, 0, 1, 1, 0 });
+ }
- }
+ [Test]
+ public virtual void TestComposition()
+ {
+ TokenStream ts = new RemoveDuplicatesTokenFilter(new SnowballFilter(new KeywordRepeatFilter(new KeywordRepeatFilter(new MockTokenizer(new StringReader("the birds are flying"), MockTokenizer.WHITESPACE, false))), "English"));
+ AssertTokenStreamContents(ts, new string[] { "the", "birds", "bird", "are", "flying", "fli" }, new int[] { 1, 1, 0, 1, 1, 0 });
+ }
+ }
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/5d556167/src/Lucene.Net.Tests.Analysis.Common/Lucene.Net.Tests.Analysis.Common.csproj
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Lucene.Net.Tests.Analysis.Common.csproj b/src/Lucene.Net.Tests.Analysis.Common/Lucene.Net.Tests.Analysis.Common.csproj
index b5587d0..0bd31c1 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Lucene.Net.Tests.Analysis.Common.csproj
+++ b/src/Lucene.Net.Tests.Analysis.Common/Lucene.Net.Tests.Analysis.Common.csproj
@@ -195,6 +195,7 @@
<Compile Include="Analysis\Miscellaneous\TestKeepWordFilter.cs" />
<Compile Include="Analysis\Miscellaneous\TestKeywordMarkerFilter.cs" />
<Compile Include="Analysis\Miscellaneous\TestKeywordMarkerFilterFactory.cs" />
+ <Compile Include="Analysis\Miscellaneous\TestKeywordRepeatFilter.cs" />
<Compile Include="Analysis\Miscellaneous\TestLengthFilter.cs" />
<Compile Include="Analysis\Miscellaneous\TestLengthFilterFactory.cs" />
<Compile Include="Analysis\Miscellaneous\TestLimitTokenCountAnalyzer.cs" />
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/5d556167/src/Lucene.Net.Tests.Analysis.Common/project.json
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/project.json b/src/Lucene.Net.Tests.Analysis.Common/project.json
index b621d6e..46a3833 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/project.json
+++ b/src/Lucene.Net.Tests.Analysis.Common/project.json
@@ -100,8 +100,7 @@
"excludeFiles": [
"DateTimeHelperClass.cs",
"HashMapHelperClass.cs",
- "StringHelperClass.cs",
- "Analysis/Miscellaneous/TestKeywordRepeatFilter.cs"
+ "StringHelperClass.cs"
]
}
},
[07/14] lucenenet git commit: Lucene.Net.Core.Util.Constants: Added
using statement for System.Runtime.InteropServices because it is required for
.NET core
Posted by ni...@apache.org.
Lucene.Net.Core.Util.Constants: Added using statement for System.Runtime.InteropServices because it is required for .NET core
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/fd6282ea
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/fd6282ea
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/fd6282ea
Branch: refs/heads/api-work
Commit: fd6282ead34b4c56cf997813f40bdef5b23998ca
Parents: 9525d45
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Thu Feb 2 12:52:29 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Thu Feb 2 18:30:32 2017 +0700
----------------------------------------------------------------------
src/Lucene.Net.Core/Util/Constants.cs | 3 +++
1 file changed, 3 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/fd6282ea/src/Lucene.Net.Core/Util/Constants.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Util/Constants.cs b/src/Lucene.Net.Core/Util/Constants.cs
index 00fbabe..01800e7 100644
--- a/src/Lucene.Net.Core/Util/Constants.cs
+++ b/src/Lucene.Net.Core/Util/Constants.cs
@@ -1,6 +1,9 @@
using Lucene.Net.Support;
using System;
using System.Reflection;
+#if NETSTANDARD
+using System.Runtime.InteropServices;
+#endif
using System.Text.RegularExpressions;
namespace Lucene.Net.Util
[06/14] lucenenet git commit: Removed System.Runtime.Remoting
namespace from SafeTextWriterWrapper.cs because of compile issues in .NET
core
Posted by ni...@apache.org.
Removed System.Runtime.Remoting namespace from SafeTextWriterWrapper.cs because of compile issues in .NET core
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/fc34ba7d
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/fc34ba7d
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/fc34ba7d
Branch: refs/heads/api-work
Commit: fc34ba7d3dbf9046cf3f59e9584ad7e6bdd19209
Parents: 7d12310
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Thu Feb 2 12:28:36 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Thu Feb 2 18:30:31 2017 +0700
----------------------------------------------------------------------
src/Lucene.Net.Core/Support/SafeTextWriterWrapper.cs | 1 -
1 file changed, 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/fc34ba7d/src/Lucene.Net.Core/Support/SafeTextWriterWrapper.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Support/SafeTextWriterWrapper.cs b/src/Lucene.Net.Core/Support/SafeTextWriterWrapper.cs
index eb14921..0c5d7c3 100644
--- a/src/Lucene.Net.Core/Support/SafeTextWriterWrapper.cs
+++ b/src/Lucene.Net.Core/Support/SafeTextWriterWrapper.cs
@@ -1,6 +1,5 @@
\ufeffusing System;
using System.IO;
-using System.Runtime.Remoting;
using System.Text;
using System.Threading.Tasks;
[11/14] lucenenet git commit: Fixed several issues that were causing
the .NET Core build to fail
Posted by ni...@apache.org.
Fixed several issues that were causing the .NET Core build to fail
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/bed1f16e
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/bed1f16e
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/bed1f16e
Branch: refs/heads/api-work
Commit: bed1f16e5d81495acf1c8c0f2a36cb7789bdb3ed
Parents: 5d55616
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Thu Feb 2 15:56:14 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Thu Feb 2 18:30:35 2017 +0700
----------------------------------------------------------------------
src/Lucene.Net.Core/Index/TaskMergeScheduler.cs | 6 ++--
.../Support/SafeTextWriterWrapper.cs | 9 ++++++
src/Lucene.Net.Core/project.json | 9 +++++-
src/Lucene.Net.TestFramework/project.json | 1 +
.../project.json | 7 ++++-
.../project.json | 7 ++++-
.../project.json | 9 +++++-
.../Lucene.Net.Tests.Codecs.csproj | 4 ---
src/Lucene.Net.Tests.Codecs/project.json | 9 ++++--
src/Lucene.Net.Tests.Expressions/project.json | 11 +++++--
src/Lucene.Net.Tests.Facet/project.json | 7 ++++-
src/Lucene.Net.Tests.Grouping/project.json | 7 ++++-
src/Lucene.Net.Tests.Highlighter/project.json | 7 ++++-
src/Lucene.Net.Tests.Join/project.json | 13 ++++++--
src/Lucene.Net.Tests.Memory/project.json | 9 +++++-
src/Lucene.Net.Tests.Misc/project.json | 13 ++++++--
src/Lucene.Net.Tests.Queries/project.json | 9 +++++-
src/Lucene.Net.Tests.QueryParser/project.json | 6 ++--
src/Lucene.Net.Tests.Sandbox/project.json | 9 ++++--
src/Lucene.Net.Tests.Spatial/project.json | 31 ++++++++++++--------
src/Lucene.Net.Tests.Suggest/project.json | 7 +++++
.../Index/TestTaskMergeSchedulerExternal.cs | 6 ++--
src/Lucene.Net.Tests/project.json | 1 +
23 files changed, 150 insertions(+), 47 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/bed1f16e/src/Lucene.Net.Core/Index/TaskMergeScheduler.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Index/TaskMergeScheduler.cs b/src/Lucene.Net.Core/Index/TaskMergeScheduler.cs
index 61cec12..804e7bb 100644
--- a/src/Lucene.Net.Core/Index/TaskMergeScheduler.cs
+++ b/src/Lucene.Net.Core/Index/TaskMergeScheduler.cs
@@ -159,7 +159,7 @@ namespace Lucene.Net.Index
/// }
/// </pre>
/// </summary>
- protected internal bool Verbose
+ protected bool Verbose
{
get { return _writer != null && _writer.infoStream.IsEnabled(COMPONENT_NAME); }
}
@@ -168,7 +168,7 @@ namespace Lucene.Net.Index
/// Outputs the given message - this method assumes <seealso cref="#verbose()"/> was
/// called and returned true.
/// </summary>
- protected internal virtual void Message(string message)
+ protected virtual void Message(string message)
{
_writer.infoStream.Message(COMPONENT_NAME, message);
}
@@ -357,7 +357,7 @@ namespace Lucene.Net.Index
/// Called when an exception is hit in a background merge
/// thread
/// </summary>
- protected internal virtual void HandleMergeException(Exception exc)
+ protected virtual void HandleMergeException(Exception exc)
{
// suppressExceptions is normally only set during testing
if (suppressExceptions)
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/bed1f16e/src/Lucene.Net.Core/Support/SafeTextWriterWrapper.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Support/SafeTextWriterWrapper.cs b/src/Lucene.Net.Core/Support/SafeTextWriterWrapper.cs
index 0c5d7c3..507bb8a 100644
--- a/src/Lucene.Net.Core/Support/SafeTextWriterWrapper.cs
+++ b/src/Lucene.Net.Core/Support/SafeTextWriterWrapper.cs
@@ -1,5 +1,8 @@
\ufeffusing System;
using System.IO;
+#if FEATURE_MARSHAL_BY_REF
+using System.Runtime.Remoting;
+#endif
using System.Text;
using System.Threading.Tasks;
@@ -64,15 +67,19 @@ namespace Lucene.Net.Support
}
}
+#if FEATURE_CLOSEABLE
public override void Close()
{
Run(() => textWriter.Close());
}
+#endif
+#if FEATURE_MARSHAL_BY_REF
public override ObjRef CreateObjRef(Type requestedType)
{
return Run(() => textWriter.CreateObjRef(requestedType));
}
+#endif
public override bool Equals(object obj)
{
@@ -94,10 +101,12 @@ namespace Lucene.Net.Support
return Run(() => textWriter.GetHashCode());
}
+#if FEATURE_LIFETIME_SERVICE
public override object InitializeLifetimeService()
{
return Run(() => textWriter.InitializeLifetimeService());
}
+#endif
public override string ToString()
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/bed1f16e/src/Lucene.Net.Core/project.json
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/project.json b/src/Lucene.Net.Core/project.json
index c0879ac..ddbdf76 100644
--- a/src/Lucene.Net.Core/project.json
+++ b/src/Lucene.Net.Core/project.json
@@ -168,7 +168,14 @@
"System.Xml.Linq": "4.0.0.0"
},
"buildOptions": {
- "define": [ "FEATURE_CLONEABLE", "FEATURE_SERIALIZABLE", "FEATURE_THREADPOOL_UNSAFEQUEUEWORKITEM" ],
+ "define": [
+ "FEATURE_CLONEABLE",
+ "FEATURE_SERIALIZABLE",
+ "FEATURE_THREADPOOL_UNSAFEQUEUEWORKITEM",
+ "FEATURE_MARSHAL_BY_REF",
+ "FEATURE_CLOSEABLE",
+ "FEATURE_LIFETIME_SERVICE"
+ ],
"compile": {
"exclude": [
"Analysis/Standard/*",
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/bed1f16e/src/Lucene.Net.TestFramework/project.json
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.TestFramework/project.json b/src/Lucene.Net.TestFramework/project.json
index 48ed0c2..d793f61 100644
--- a/src/Lucene.Net.TestFramework/project.json
+++ b/src/Lucene.Net.TestFramework/project.json
@@ -19,6 +19,7 @@
"compile": {
"excludeFiles": [
"Support/SystemProperties.cs",
+ "Util/ApiScanTestBase.cs",
"Util/AbstractBeforeAfterRule.cs",
"Util/CloseableDirectory.cs",
"Util/FailureMarker.cs",
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/bed1f16e/src/Lucene.Net.Tests.Analysis.Common/project.json
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/project.json b/src/Lucene.Net.Tests.Analysis.Common/project.json
index 46a3833..812bc65 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/project.json
+++ b/src/Lucene.Net.Tests.Analysis.Common/project.json
@@ -107,7 +107,12 @@
"frameworks": {
"netcoreapp1.0": {
"buildOptions": {
- "define": [ "NETSTANDARD" ]
+ "define": [ "NETSTANDARD" ],
+ "compile": {
+ "excludeFiles": [
+ "TestApiConsistency.cs"
+ ]
+ }
},
"dependencies": {
"Microsoft.NETCore.App": {
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/bed1f16e/src/Lucene.Net.Tests.Analysis.Stempel/project.json
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Stempel/project.json b/src/Lucene.Net.Tests.Analysis.Stempel/project.json
index 018ecfd..7c6e2f9 100644
--- a/src/Lucene.Net.Tests.Analysis.Stempel/project.json
+++ b/src/Lucene.Net.Tests.Analysis.Stempel/project.json
@@ -25,7 +25,12 @@
}
},
"buildOptions": {
- "define": [ "NETSTANDARD" ]
+ "define": [ "NETSTANDARD" ],
+ "compile": {
+ "excludeFiles": [
+ "TestApiConsistency.cs"
+ ]
+ }
}
}
},
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/bed1f16e/src/Lucene.Net.Tests.Classification/project.json
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Classification/project.json b/src/Lucene.Net.Tests.Classification/project.json
index a0e7c84..a8656f7 100644
--- a/src/Lucene.Net.Tests.Classification/project.json
+++ b/src/Lucene.Net.Tests.Classification/project.json
@@ -13,11 +13,18 @@
"frameworks": {
"netcoreapp1.0": {
"imports": "dnxcore50",
- "dependencies": {
+ "dependencies": {
"Microsoft.NETCore.App": {
"type": "platform",
"version": "1.0.1"
}
+ },
+ "buildOptions": {
+ "compile": {
+ "excludeFiles": [
+ "TestApiConsistency.cs"
+ ]
+ }
}
}
},
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/bed1f16e/src/Lucene.Net.Tests.Codecs/Lucene.Net.Tests.Codecs.csproj
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Codecs/Lucene.Net.Tests.Codecs.csproj b/src/Lucene.Net.Tests.Codecs/Lucene.Net.Tests.Codecs.csproj
index 7e4c447..4000a00 100644
--- a/src/Lucene.Net.Tests.Codecs/Lucene.Net.Tests.Codecs.csproj
+++ b/src/Lucene.Net.Tests.Codecs/Lucene.Net.Tests.Codecs.csproj
@@ -81,10 +81,6 @@
<Project>{b2c0d749-ce34-4f62-a15e-00cb2ff5ddb3}</Project>
<Name>Lucene.Net.TestFramework</Name>
</ProjectReference>
- <ProjectReference Include="..\Lucene.Net.Tests\Lucene.Net.Tests.csproj">
- <Project>{de63db10-975f-460d-af85-572c17a91284}</Project>
- <Name>Lucene.Net.Tests</Name>
- </ProjectReference>
</ItemGroup>
<ItemGroup>
<Service Include="{82A7F48D-3B50-4B1E-B82E-3ADA8210C358}" />
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/bed1f16e/src/Lucene.Net.Tests.Codecs/project.json
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Codecs/project.json b/src/Lucene.Net.Tests.Codecs/project.json
index 94d03ed..80ad56d 100644
--- a/src/Lucene.Net.Tests.Codecs/project.json
+++ b/src/Lucene.Net.Tests.Codecs/project.json
@@ -5,7 +5,7 @@
"NUnit": "3.5.0",
"Lucene.Net.Analysis.Common": "4.8.0-alpha",
"Lucene.Net.Codecs": "4.8.0-alpha",
- "Lucene.Net.Tests": "4.8.0-alpha"
+ "Lucene.Net.TestFramework": "4.8.0-alpha"
},
"testRunner": "nunit",
@@ -13,7 +13,12 @@
"frameworks": {
"netcoreapp1.0": {
"buildOptions": {
- "define": [ "NETSTANDARD" ]
+ "define": [ "NETSTANDARD" ],
+ "compile": {
+ "excludeFiles": [
+ "TestApiConsistency.cs"
+ ]
+ }
},
"imports": "dnxcore50",
"dependencies": {
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/bed1f16e/src/Lucene.Net.Tests.Expressions/project.json
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Expressions/project.json b/src/Lucene.Net.Tests.Expressions/project.json
index 6b7d68d..ec5baa5 100644
--- a/src/Lucene.Net.Tests.Expressions/project.json
+++ b/src/Lucene.Net.Tests.Expressions/project.json
@@ -9,15 +9,22 @@
},
"testRunner": "nunit",
-
+
"frameworks": {
"netcoreapp1.0": {
"imports": "dnxcore50",
- "dependencies": {
+ "dependencies": {
"Microsoft.NETCore.App": {
"type": "platform",
"version": "1.0.1"
}
+ },
+ "buildOptions": {
+ "compile": {
+ "excludeFiles": [
+ "TestApiConsistency.cs"
+ ]
+ }
}
}
},
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/bed1f16e/src/Lucene.Net.Tests.Facet/project.json
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Facet/project.json b/src/Lucene.Net.Tests.Facet/project.json
index 04b3527..2bed4d6 100644
--- a/src/Lucene.Net.Tests.Facet/project.json
+++ b/src/Lucene.Net.Tests.Facet/project.json
@@ -20,7 +20,12 @@
}
},
"buildOptions": {
- "define": [ "NETSTANDARD" ]
+ "define": [ "NETSTANDARD" ],
+ "compile": {
+ "excludeFiles": [
+ "TestApiConsistency.cs"
+ ]
+ }
}
}
},
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/bed1f16e/src/Lucene.Net.Tests.Grouping/project.json
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Grouping/project.json b/src/Lucene.Net.Tests.Grouping/project.json
index 724369e..2cd0351 100644
--- a/src/Lucene.Net.Tests.Grouping/project.json
+++ b/src/Lucene.Net.Tests.Grouping/project.json
@@ -20,7 +20,12 @@
}
},
"buildOptions": {
- "define": [ "NETSTANDARD" ]
+ "define": [ "NETSTANDARD" ],
+ "compile": {
+ "excludeFiles": [
+ "TestApiConsistency.cs"
+ ]
+ }
}
}
},
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/bed1f16e/src/Lucene.Net.Tests.Highlighter/project.json
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Highlighter/project.json b/src/Lucene.Net.Tests.Highlighter/project.json
index 84a0104..99bfb94 100644
--- a/src/Lucene.Net.Tests.Highlighter/project.json
+++ b/src/Lucene.Net.Tests.Highlighter/project.json
@@ -19,7 +19,12 @@
"frameworks": {
"netcoreapp1.0": {
"buildOptions": {
- "define": [ "NETSTANDARD", "FEATURE_EMBEDDED_RESOURCE" ]
+ "define": [ "NETSTANDARD", "FEATURE_EMBEDDED_RESOURCE" ],
+ "compile": {
+ "excludeFiles": [
+ "TestApiConsistency.cs"
+ ]
+ }
},
"dependencies": {
"Microsoft.NETCore.App": {
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/bed1f16e/src/Lucene.Net.Tests.Join/project.json
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Join/project.json b/src/Lucene.Net.Tests.Join/project.json
index 9777ef4..c6d7a79 100644
--- a/src/Lucene.Net.Tests.Join/project.json
+++ b/src/Lucene.Net.Tests.Join/project.json
@@ -6,19 +6,26 @@
"Lucene.Net.Grouping": "4.8.0-alpha",
"Lucene.Net.Join": "4.8.0-alpha",
"Lucene.Net.TestFramework": "4.8.0-alpha",
- "NUnit": "3.5.0",
+ "NUnit": "3.5.0"
},
"testRunner": "nunit",
-
+
"frameworks": {
"netcoreapp1.0": {
"imports": "dnxcore50",
- "dependencies": {
+ "dependencies": {
"Microsoft.NETCore.App": {
"type": "platform",
"version": "1.0.1"
}
+ },
+ "buildOptions": {
+ "compile": {
+ "excludeFiles": [
+ "TestApiConsistency.cs"
+ ]
+ }
}
}
},
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/bed1f16e/src/Lucene.Net.Tests.Memory/project.json
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Memory/project.json b/src/Lucene.Net.Tests.Memory/project.json
index bd96749..5f0fb11 100644
--- a/src/Lucene.Net.Tests.Memory/project.json
+++ b/src/Lucene.Net.Tests.Memory/project.json
@@ -22,11 +22,18 @@
"frameworks": {
"netcoreapp1.0": {
"imports": "dnxcore50",
- "dependencies": {
+ "dependencies": {
"Microsoft.NETCore.App": {
"type": "platform",
"version": "1.0.1"
}
+ },
+ "buildOptions": {
+ "compile": {
+ "excludeFiles": [
+ "TestApiConsistency.cs"
+ ]
+ }
}
}
},
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/bed1f16e/src/Lucene.Net.Tests.Misc/project.json
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Misc/project.json b/src/Lucene.Net.Tests.Misc/project.json
index a82c280..a2f7fd2 100644
--- a/src/Lucene.Net.Tests.Misc/project.json
+++ b/src/Lucene.Net.Tests.Misc/project.json
@@ -8,19 +8,26 @@
},
"testRunner": "nunit",
-
+
"frameworks": {
"netcoreapp1.0": {
"imports": "dnxcore50",
- "dependencies": {
+ "dependencies": {
"Microsoft.NETCore.App": {
"type": "platform",
"version": "1.0.1"
}
+ },
+ "buildOptions": {
+ "compile": {
+ "excludeFiles": [
+ "TestApiConsistency.cs"
+ ]
+ }
}
}
},
-
+
"runtimes": {
"win7-x86": {},
"win7-x64": {}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/bed1f16e/src/Lucene.Net.Tests.Queries/project.json
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Queries/project.json b/src/Lucene.Net.Tests.Queries/project.json
index c78ec4f..abafeaf 100644
--- a/src/Lucene.Net.Tests.Queries/project.json
+++ b/src/Lucene.Net.Tests.Queries/project.json
@@ -12,11 +12,18 @@
"frameworks": {
"netcoreapp1.0": {
"imports": "dnxcore50",
- "dependencies": {
+ "dependencies": {
"Microsoft.NETCore.App": {
"type": "platform",
"version": "1.0.1"
}
+ },
+ "buildOptions": {
+ "compile": {
+ "excludeFiles": [
+ "TestApiConsistency.cs"
+ ]
+ }
}
}
},
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/bed1f16e/src/Lucene.Net.Tests.QueryParser/project.json
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.QueryParser/project.json b/src/Lucene.Net.Tests.QueryParser/project.json
index b83132e..b2ff80a 100644
--- a/src/Lucene.Net.Tests.QueryParser/project.json
+++ b/src/Lucene.Net.Tests.QueryParser/project.json
@@ -9,7 +9,6 @@
},
"testRunner": "nunit",
-
"frameworks": {
"netcoreapp1.0": {
"imports": "dnxcore50",
@@ -22,13 +21,14 @@
"buildOptions": {
"compile": {
"excludeFiles": [
- "Xml/TestQueryTemplateManager.cs"
+ "Xml/TestQueryTemplateManager.cs",
+ "TestApiConsistency.cs"
]
}
}
}
},
-
+
"runtimes": {
"win7-x86": {},
"win7-x64": {}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/bed1f16e/src/Lucene.Net.Tests.Sandbox/project.json
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Sandbox/project.json b/src/Lucene.Net.Tests.Sandbox/project.json
index 186abd5..2da3ee8 100644
--- a/src/Lucene.Net.Tests.Sandbox/project.json
+++ b/src/Lucene.Net.Tests.Sandbox/project.json
@@ -9,7 +9,7 @@
},
"buildOptions": {
"embed": {
- "includeFiles": [
+ "includeFiles": [
"Queries/fuzzyTestData.txt"
]
}
@@ -26,7 +26,12 @@
},
"imports": "dnxcore50",
"buildOptions": {
- "define": [ "FEATURE_EMBEDDED_RESOURCE" ]
+ "define": [ "FEATURE_EMBEDDED_RESOURCE" ],
+ "compile": {
+ "excludeFiles": [
+ "TestApiConsistency.cs"
+ ]
+ }
}
}
},
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/bed1f16e/src/Lucene.Net.Tests.Spatial/project.json
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Spatial/project.json b/src/Lucene.Net.Tests.Spatial/project.json
index 127bda3..6d99bf7 100644
--- a/src/Lucene.Net.Tests.Spatial/project.json
+++ b/src/Lucene.Net.Tests.Spatial/project.json
@@ -11,18 +11,18 @@
"buildOptions": {
"embed": {
"includeFiles": [
- "Test-Files/cities-Intersects-BBox.txt",
- "Test-Files/Data/countries-bbox.txt",
- "Test-Files/Data/countries-poly.txt",
- "Test-Files/Data/geonames-IE.txt",
- "Test-Files/Data/LUCENE-4464.txt",
- "Test-Files/Data/simple-bbox.txt",
- "Test-Files/Data/states-bbox.txt",
- "Test-Files/Data/states-poly.txt",
- "Test-Files/Data/world-cities-points.txt",
- "Test-Files/simple-Queries-BBox.txt",
- "Test-Files/states-Intersects-BBox.txt",
- "Test-Files/states-IsWithin-BBox.txt"
+ "Test-Files/cities-Intersects-BBox.txt",
+ "Test-Files/Data/countries-bbox.txt",
+ "Test-Files/Data/countries-poly.txt",
+ "Test-Files/Data/geonames-IE.txt",
+ "Test-Files/Data/LUCENE-4464.txt",
+ "Test-Files/Data/simple-bbox.txt",
+ "Test-Files/Data/states-bbox.txt",
+ "Test-Files/Data/states-poly.txt",
+ "Test-Files/Data/world-cities-points.txt",
+ "Test-Files/simple-Queries-BBox.txt",
+ "Test-Files/states-Intersects-BBox.txt",
+ "Test-Files/states-IsWithin-BBox.txt"
]
}
},
@@ -30,7 +30,12 @@
"netcoreapp1.0": {
"imports": [ "dnxcore50", "portable-net403+sl5+win8+wp8+wpa81" ],
"buildOptions": {
- "define": [ "NETSTANDARD" ]
+ "define": [ "NETSTANDARD" ],
+ "compile": {
+ "excludeFiles": [
+ "TestApiConsistency.cs"
+ ]
+ }
},
"dependencies": {
"Microsoft.NETCore.App": {
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/bed1f16e/src/Lucene.Net.Tests.Suggest/project.json
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Suggest/project.json b/src/Lucene.Net.Tests.Suggest/project.json
index 24fba67..188fbd0 100644
--- a/src/Lucene.Net.Tests.Suggest/project.json
+++ b/src/Lucene.Net.Tests.Suggest/project.json
@@ -28,6 +28,13 @@
"type": "platform",
"version": "1.0.1"
}
+ },
+ "buildOptions": {
+ "compile": {
+ "excludeFiles": [
+ "TestApiConsistency.cs"
+ ]
+ }
}
}
},
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/bed1f16e/src/Lucene.Net.Tests/core/Index/TestTaskMergeSchedulerExternal.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests/core/Index/TestTaskMergeSchedulerExternal.cs b/src/Lucene.Net.Tests/core/Index/TestTaskMergeSchedulerExternal.cs
index 960520b..f0ad954 100644
--- a/src/Lucene.Net.Tests/core/Index/TestTaskMergeSchedulerExternal.cs
+++ b/src/Lucene.Net.Tests/core/Index/TestTaskMergeSchedulerExternal.cs
@@ -56,7 +56,7 @@ namespace Lucene.Net.Tests
this.OuterInstance = outerInstance;
}
- protected internal override void HandleMergeException(Exception t)
+ protected override void HandleMergeException(Exception t)
{
OuterInstance.ExcCalled = true;
}
@@ -109,7 +109,7 @@ namespace Lucene.Net.Tests
public override void Merge(IndexWriter writer, MergeTrigger trigger, bool newMergesFound)
{
MergePolicy.OneMerge merge = null;
- while ((merge = writer.NextMerge) != null)
+ while ((merge = writer.GetNextMerge()) != null)
{
if (VERBOSE)
{
@@ -119,7 +119,7 @@ namespace Lucene.Net.Tests
}
}
- public override void Dispose()
+ protected override void Dispose(bool disposing)
{
}
}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/bed1f16e/src/Lucene.Net.Tests/project.json
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests/project.json b/src/Lucene.Net.Tests/project.json
index 72e021d..171c985 100644
--- a/src/Lucene.Net.Tests/project.json
+++ b/src/Lucene.Net.Tests/project.json
@@ -16,6 +16,7 @@
"core/Util/Junitcompat"
],
"excludeFiles": [
+ "core/TestApiConsistency.cs",
"core/TestMergeSchedulerExternal.cs",
"core/TestWorstCaseTestBehavior.cs",
"core/Index/TestBackwardsCompatibility.cs",
[04/14] lucenenet git commit: Updated .gitignore to exclude
NuGetPackages directory
Posted by ni...@apache.org.
Updated .gitignore to exclude NuGetPackages directory
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/7d123105
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/7d123105
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/7d123105
Branch: refs/heads/api-work
Commit: 7d1231051da1929c1972fe6280adcbaee9641f5f
Parents: 1c87ed5
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Thu Feb 2 12:27:32 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Thu Feb 2 18:30:30 2017 +0700
----------------------------------------------------------------------
.gitignore | 1 +
1 file changed, 1 insertion(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/7d123105/.gitignore
----------------------------------------------------------------------
diff --git a/.gitignore b/.gitignore
index b3cc01d..4ee6108 100644
--- a/.gitignore
+++ b/.gitignore
@@ -37,6 +37,7 @@ packages/
*.lock.json
TestResults/
test-files/analysis/data/
+[Nn]u[Gg]et[Pp]ackages/
# NuGet v3's project.json files produces more ignoreable files
*.nuget.props
[14/14] lucenenet git commit: SWEEP: Where used,
changed Dictionary/OrderedDictionary back to LinkedHashMap,
which is a better match of the LinkedHashMap in Java.
Posted by ni...@apache.org.
SWEEP: Where used, changed Dictionary/OrderedDictionary back to LinkedHashMap, which is a better match of the LinkedHashMap in Java.
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/bc485b4c
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/bc485b4c
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/bc485b4c
Branch: refs/heads/api-work
Commit: bc485b4c42db371f93d90534ee30db6824baca2f
Parents: fc7b5b5
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Thu Feb 2 19:41:38 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Thu Feb 2 19:41:38 2017 +0700
----------------------------------------------------------------------
.../Analysis/Hunspell/Dictionary.cs | 2 +-
.../Analysis/Util/AnalysisSPILoader.cs | 2 +-
src/Lucene.Net.Core/Index/BufferedUpdates.cs | 40 ++++++++------------
.../JS/JavascriptCompiler.cs | 2 +-
src/Lucene.Net.Facet/DrillDownQuery.cs | 2 +-
.../Simple/TestSimpleQueryParser.cs | 4 +-
6 files changed, 21 insertions(+), 31 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/bc485b4c/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/Dictionary.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/Dictionary.cs b/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/Dictionary.cs
index f8e3107..af966d1 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/Dictionary.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/Dictionary.cs
@@ -269,7 +269,7 @@ namespace Lucene.Net.Analysis.Hunspell
patterns.Add(null);
// zero strip -> 0 ord
- IDictionary<string, int?> seenStrips = new Dictionary<string, int?>();
+ IDictionary<string, int?> seenStrips = new LinkedHashMap<string, int?>();
seenStrips[""] = 0;
var reader = new StreamReader(affixStream, decoder);
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/bc485b4c/src/Lucene.Net.Analysis.Common/Analysis/Util/AnalysisSPILoader.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/AnalysisSPILoader.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/AnalysisSPILoader.cs
index 5797c10..03b949d 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Util/AnalysisSPILoader.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/AnalysisSPILoader.cs
@@ -63,7 +63,7 @@ namespace Lucene.Net.Analysis.Util
{
lock (this)
{
- IDictionary<string, Type> services = new Dictionary<string, Type>(this.services);
+ IDictionary<string, Type> services = new LinkedHashMap<string, Type>(this.services);
SPIClassIterator<S> loader = SPIClassIterator<S>.Get();
foreach (var service in loader)
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/bc485b4c/src/Lucene.Net.Core/Index/BufferedUpdates.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Index/BufferedUpdates.cs b/src/Lucene.Net.Core/Index/BufferedUpdates.cs
index 764fdae..d03d298 100644
--- a/src/Lucene.Net.Core/Index/BufferedUpdates.cs
+++ b/src/Lucene.Net.Core/Index/BufferedUpdates.cs
@@ -123,25 +123,25 @@ namespace Lucene.Net.Index
internal readonly IDictionary<Term, int?> terms = new Dictionary<Term, int?>();
internal readonly IDictionary<Query, int?> queries = new Dictionary<Query, int?>();
internal readonly IList<int?> docIDs = new List<int?>();
-
+
// Map<dvField,Map<updateTerm,NumericUpdate>>
// For each field we keep an ordered list of NumericUpdates, key'd by the
- // update Term. OrderedDictionary guarantees we will later traverse the map in
+ // update Term. LinkedHashMap guarantees we will later traverse the map in
// insertion order (so that if two terms affect the same document, the last
// one that came in wins), and helps us detect faster if the same Term is
// used to update the same field multiple times (so we later traverse it
// only once).
- internal readonly IDictionary<string, OrderedDictionary> numericUpdates = new Dictionary<string, OrderedDictionary>();
+ internal readonly IDictionary<string, LinkedHashMap<Term, NumericDocValuesUpdate>> numericUpdates = new Dictionary<string, LinkedHashMap<Term, NumericDocValuesUpdate>>();
// Map<dvField,Map<updateTerm,BinaryUpdate>>
// For each field we keep an ordered list of BinaryUpdates, key'd by the
- // update Term. OrderedDictionary guarantees we will later traverse the map in
+ // update Term. LinkedHashMap guarantees we will later traverse the map in
// insertion order (so that if two terms affect the same document, the last
// one that came in wins), and helps us detect faster if the same Term is
// used to update the same field multiple times (so we later traverse it
// only once).
- internal readonly IDictionary<string, OrderedDictionary> binaryUpdates = new Dictionary<string, OrderedDictionary>();
+ internal readonly IDictionary<string, LinkedHashMap<Term, BinaryDocValuesUpdate>> binaryUpdates = new Dictionary<string, LinkedHashMap<Term, BinaryDocValuesUpdate>>();
public static readonly int MAX_INT = Convert.ToInt32(int.MaxValue);
@@ -241,21 +241,16 @@ namespace Lucene.Net.Index
public virtual void AddNumericUpdate(NumericDocValuesUpdate update, int docIDUpto)
{
- OrderedDictionary fieldUpdates = null;
+ LinkedHashMap<Term, NumericDocValuesUpdate> fieldUpdates = null;
if (!numericUpdates.TryGetValue(update.field, out fieldUpdates))
{
- fieldUpdates = new OrderedDictionary();
+ fieldUpdates = new LinkedHashMap<Term, NumericDocValuesUpdate>();
numericUpdates[update.field] = fieldUpdates;
bytesUsed.AddAndGet(BYTES_PER_NUMERIC_FIELD_ENTRY);
}
- NumericDocValuesUpdate current = null;
- if (fieldUpdates.Contains(update.term))
- {
- current = fieldUpdates[update.term] as NumericDocValuesUpdate;
- }
-
- if (current != null && docIDUpto < current.docIDUpto)
+ NumericDocValuesUpdate current;
+ if (fieldUpdates.TryGetValue(update.term, out current) && current != null && docIDUpto < current.docIDUpto)
{
// Only record the new number if it's greater than or equal to the current
// one. this is important because if multiple threads are replacing the
@@ -265,7 +260,7 @@ namespace Lucene.Net.Index
}
update.docIDUpto = docIDUpto;
- // since it's an OrderedDictionary, we must first remove the Term entry so that
+ // since it's an LinkedHashMap, we must first remove the Term entry so that
// it's added last (we're interested in insertion-order).
if (current != null)
{
@@ -281,21 +276,16 @@ namespace Lucene.Net.Index
public virtual void AddBinaryUpdate(BinaryDocValuesUpdate update, int docIDUpto)
{
- OrderedDictionary fieldUpdates;
+ LinkedHashMap<Term, BinaryDocValuesUpdate> fieldUpdates;
if (!binaryUpdates.TryGetValue(update.field, out fieldUpdates))
{
- fieldUpdates = new OrderedDictionary();
+ fieldUpdates = new LinkedHashMap<Term, BinaryDocValuesUpdate>();
binaryUpdates[update.field] = fieldUpdates;
bytesUsed.AddAndGet(BYTES_PER_BINARY_FIELD_ENTRY);
}
- BinaryDocValuesUpdate current = null;
- if (fieldUpdates.Contains(update.term))
- {
- current = fieldUpdates[update.term] as BinaryDocValuesUpdate;
- }
-
- if (current != null && docIDUpto < current.docIDUpto)
+ BinaryDocValuesUpdate current;
+ if (fieldUpdates.TryGetValue(update.term, out current) && current != null && docIDUpto < current.docIDUpto)
{
// Only record the new number if it's greater than or equal to the current
// one. this is important because if multiple threads are replacing the
@@ -305,7 +295,7 @@ namespace Lucene.Net.Index
}
update.docIDUpto = docIDUpto;
- // since it's an OrderedDictionary, we must first remove the Term entry so that
+ // since it's an LinkedHashMap, we must first remove the Term entry so that
// it's added last (we're interested in insertion-order).
if (current != null)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/bc485b4c/src/Lucene.Net.Expressions/JS/JavascriptCompiler.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Expressions/JS/JavascriptCompiler.cs b/src/Lucene.Net.Expressions/JS/JavascriptCompiler.cs
index 550d756..d037d2d 100644
--- a/src/Lucene.Net.Expressions/JS/JavascriptCompiler.cs
+++ b/src/Lucene.Net.Expressions/JS/JavascriptCompiler.cs
@@ -92,7 +92,7 @@ namespace Lucene.Net.Expressions.JS
private readonly string sourceText;
- private readonly IDictionary<string, int> externalsMap = new HashMap<string, int>();
+ private readonly IDictionary<string, int> externalsMap = new LinkedHashMap<string, int>();
private TypeBuilder dynamicType;
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/bc485b4c/src/Lucene.Net.Facet/DrillDownQuery.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Facet/DrillDownQuery.cs b/src/Lucene.Net.Facet/DrillDownQuery.cs
index e222f2d..5e7d4e5 100644
--- a/src/Lucene.Net.Facet/DrillDownQuery.cs
+++ b/src/Lucene.Net.Facet/DrillDownQuery.cs
@@ -60,7 +60,7 @@ namespace Lucene.Net.Facet
private readonly FacetsConfig config;
private readonly BooleanQuery query;
- private readonly IDictionary<string, int?> drillDownDims = new Dictionary<string, int?>();
+ private readonly IDictionary<string, int?> drillDownDims = new LinkedHashMap<string, int?>();
/// <summary>
/// Used by <see cref="Clone"/>
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/bc485b4c/src/Lucene.Net.Tests.QueryParser/Simple/TestSimpleQueryParser.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.QueryParser/Simple/TestSimpleQueryParser.cs b/src/Lucene.Net.Tests.QueryParser/Simple/TestSimpleQueryParser.cs
index 065aa82..0cd23ec 100644
--- a/src/Lucene.Net.Tests.QueryParser/Simple/TestSimpleQueryParser.cs
+++ b/src/Lucene.Net.Tests.QueryParser/Simple/TestSimpleQueryParser.cs
@@ -541,7 +541,7 @@ namespace Lucene.Net.QueryParsers.Simple
[Test]
public virtual void TestWeightedTerm()
{
- IDictionary<string, float> weights = new Dictionary<string, float>();
+ IDictionary<string, float> weights = new LinkedHashMap<string, float>();
weights["field0"] = 5f;
weights["field1"] = 10f;
@@ -562,7 +562,7 @@ namespace Lucene.Net.QueryParsers.Simple
[Test]
public virtual void TestWeightedOR()
{
- IDictionary<string, float> weights = new Dictionary<string, float>();
+ IDictionary<string, float> weights = new LinkedHashMap<string, float>();
weights["field0"] = 5f;
weights["field1"] = 10f;
[08/14] lucenenet git commit: Lucene.Net.Core.Index.IndexWriter:
Added TODO about renaming GetNextMerge() method to NextMerge()
Posted by ni...@apache.org.
Lucene.Net.Core.Index.IndexWriter: Added TODO about renaming GetNextMerge() method to NextMerge()
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/ddb054a0
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/ddb054a0
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/ddb054a0
Branch: refs/heads/api-work
Commit: ddb054a0b67a4622f51312853bc2cfee298c6023
Parents: fd6282e
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Thu Feb 2 15:43:43 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Thu Feb 2 18:30:33 2017 +0700
----------------------------------------------------------------------
src/Lucene.Net.Core/Index/IndexWriter.cs | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/ddb054a0/src/Lucene.Net.Core/Index/IndexWriter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Index/IndexWriter.cs b/src/Lucene.Net.Core/Index/IndexWriter.cs
index f402271..39983c1 100644
--- a/src/Lucene.Net.Core/Index/IndexWriter.cs
+++ b/src/Lucene.Net.Core/Index/IndexWriter.cs
@@ -2451,7 +2451,7 @@ namespace Lucene.Net.Index
///
/// @lucene.experimental
/// </summary>
- public virtual MergePolicy.OneMerge GetNextMerge()
+ public virtual MergePolicy.OneMerge GetNextMerge() // LUCENENET TODO: Rename NextMerge() (consistency with iterator.Next())
{
lock (this)
{
[02/14] lucenenet git commit: Lucene.Net.Analysis.Common.Collation:
For now, adding [CLSCompliant(false)] to CollationAttributeFactory,
CollationKeyAnalyzer, CollationKeyFilter,
and TokenAttributes.CollatedTermAttributeImpl because they expose types from
Posted by ni...@apache.org.
Lucene.Net.Analysis.Common.Collation: For now, adding [CLSCompliant(false)] to CollationAttributeFactory, CollationKeyAnalyzer, CollationKeyFilter, and TokenAttributes.CollatedTermAttributeImpl because they expose types from icu.net (which is not marked CLS compliant).
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/21b3d8b7
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/21b3d8b7
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/21b3d8b7
Branch: refs/heads/api-work
Commit: 21b3d8b7703c321155d8388621d6b0ed120750fc
Parents: 7cd69ab
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Thu Feb 2 10:38:17 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Thu Feb 2 18:30:28 2017 +0700
----------------------------------------------------------------------
.../Collation/CollationAttributeFactory.cs | 93 ++++++++-------
.../Collation/CollationKeyAnalyzer.cs | 119 ++++++++++---------
.../Collation/CollationKeyFilter.cs | 5 +-
.../CollatedTermAttributeImpl.cs | 14 ++-
4 files changed, 122 insertions(+), 109 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/21b3d8b7/src/Lucene.Net.Analysis.Common/Collation/CollationAttributeFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Collation/CollationAttributeFactory.cs b/src/Lucene.Net.Analysis.Common/Collation/CollationAttributeFactory.cs
index 64687dd..d3a7b1f 100644
--- a/src/Lucene.Net.Analysis.Common/Collation/CollationAttributeFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Collation/CollationAttributeFactory.cs
@@ -22,51 +22,54 @@ namespace Lucene.Net.Collation
* limitations under the License.
*/
- /// <summary>
- /// <para>
- /// Converts each token into its <seealso cref="CollationKey"/>, and then
- /// encodes the bytes as an index term.
- /// </para>
- /// <para>
- /// <strong>WARNING:</strong> Make sure you use exactly the same Collator at
- /// index and query time -- CollationKeys are only comparable when produced by
- /// the same Collator. Since <seealso cref="RuleBasedCollator"/>s are not
- /// independently versioned, it is unsafe to search against stored
- /// CollationKeys unless the following are exactly the same (best practice is
- /// to store this information with the index and check that they remain the
- /// same at query time):
- /// </para>
- /// <ol>
- /// <li>JVM vendor</li>
- /// <li>JVM version, including patch version</li>
- /// <li>
- /// The language (and country and variant, if specified) of the Locale
- /// used when constructing the collator via
- /// <seealso cref="Collator#getInstance(Locale)"/>.
- /// </li>
- /// <li>
- /// The collation strength used - see <seealso cref="Collator#setStrength(int)"/>
- /// </li>
- /// </ol>
- /// <para>
- /// The <code>ICUCollationAttributeFactory</code> in the analysis-icu package
- /// uses ICU4J's Collator, which makes its
- /// version available, thus allowing collation to be versioned independently
- /// from the JVM. ICUCollationAttributeFactory is also significantly faster and
- /// generates significantly shorter keys than CollationAttributeFactory. See
- /// <a href="http://site.icu-project.org/charts/collation-icu4j-sun"
- /// >http://site.icu-project.org/charts/collation-icu4j-sun</a> for key
- /// generation timing and key length comparisons between ICU4J and
- /// java.text.Collator over several languages.
- /// </para>
- /// <para>
- /// CollationKeys generated by java.text.Collators are not compatible
- /// with those those generated by ICU Collators. Specifically, if you use
- /// CollationAttributeFactory to generate index terms, do not use
- /// ICUCollationAttributeFactory on the query side, or vice versa.
- /// </para>
- /// </summary>
- public class CollationAttributeFactory : AttributeSource.AttributeFactory
+ /// <summary>
+ /// <para>
+ /// Converts each token into its <seealso cref="CollationKey"/>, and then
+ /// encodes the bytes as an index term.
+ /// </para>
+ /// <para>
+ /// <strong>WARNING:</strong> Make sure you use exactly the same Collator at
+ /// index and query time -- CollationKeys are only comparable when produced by
+ /// the same Collator. Since <seealso cref="RuleBasedCollator"/>s are not
+ /// independently versioned, it is unsafe to search against stored
+ /// CollationKeys unless the following are exactly the same (best practice is
+ /// to store this information with the index and check that they remain the
+ /// same at query time):
+ /// </para>
+ /// <ol>
+ /// <li>JVM vendor</li>
+ /// <li>JVM version, including patch version</li>
+ /// <li>
+ /// The language (and country and variant, if specified) of the Locale
+ /// used when constructing the collator via
+ /// <seealso cref="Collator#getInstance(Locale)"/>.
+ /// </li>
+ /// <li>
+ /// The collation strength used - see <seealso cref="Collator#setStrength(int)"/>
+ /// </li>
+ /// </ol>
+ /// <para>
+ /// The <code>ICUCollationAttributeFactory</code> in the analysis-icu package
+ /// uses ICU4J's Collator, which makes its
+ /// version available, thus allowing collation to be versioned independently
+ /// from the JVM. ICUCollationAttributeFactory is also significantly faster and
+ /// generates significantly shorter keys than CollationAttributeFactory. See
+ /// <a href="http://site.icu-project.org/charts/collation-icu4j-sun"
+ /// >http://site.icu-project.org/charts/collation-icu4j-sun</a> for key
+ /// generation timing and key length comparisons between ICU4J and
+ /// java.text.Collator over several languages.
+ /// </para>
+ /// <para>
+ /// CollationKeys generated by java.text.Collators are not compatible
+ /// with those those generated by ICU Collators. Specifically, if you use
+ /// CollationAttributeFactory to generate index terms, do not use
+ /// ICUCollationAttributeFactory on the query side, or vice versa.
+ /// </para>
+ /// </summary>
+ // LUCENENET TODO: A better option would be to contribute to the icu.net library and
+ // make it CLS compliant (at least the parts of it we use)
+ [System.CLSCompliant(false)]
+ public class CollationAttributeFactory : AttributeSource.AttributeFactory
{
private readonly Collator collator;
private readonly AttributeSource.AttributeFactory @delegate;
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/21b3d8b7/src/Lucene.Net.Analysis.Common/Collation/CollationKeyAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Collation/CollationKeyAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Collation/CollationKeyAnalyzer.cs
index f6db44c..b76e520 100644
--- a/src/Lucene.Net.Analysis.Common/Collation/CollationKeyAnalyzer.cs
+++ b/src/Lucene.Net.Analysis.Common/Collation/CollationKeyAnalyzer.cs
@@ -24,64 +24,67 @@ namespace Lucene.Net.Collation
* limitations under the License.
*/
- /// <summary>
- /// <para>
- /// Configures <seealso cref="KeywordTokenizer"/> with <seealso cref="CollationAttributeFactory"/>.
- /// </para>
- /// <para>
- /// Converts the token into its <seealso cref="java.text.CollationKey"/>, and then
- /// encodes the CollationKey either directly or with
- /// <seealso cref="IndexableBinaryStringTools"/> (see <a href="#version">below</a>), to allow
- /// it to be stored as an index term.
- /// </para>
- /// <para>
- /// <strong>WARNING:</strong> Make sure you use exactly the same Collator at
- /// index and query time -- CollationKeys are only comparable when produced by
- /// the same Collator. Since <seealso cref="java.text.RuleBasedCollator"/>s are not
- /// independently versioned, it is unsafe to search against stored
- /// CollationKeys unless the following are exactly the same (best practice is
- /// to store this information with the index and check that they remain the
- /// same at query time):
- /// </para>
- /// <ol>
- /// <li>JVM vendor</li>
- /// <li>JVM version, including patch version</li>
- /// <li>
- /// The language (and country and variant, if specified) of the Locale
- /// used when constructing the collator via
- /// <seealso cref="Collator#getInstance(java.util.Locale)"/>.
- /// </li>
- /// <li>
- /// The collation strength used - see <seealso cref="Collator#setStrength(int)"/>
- /// </li>
- /// </ol>
- /// <para>
- /// The <code>ICUCollationKeyAnalyzer</code> in the analysis-icu package
- /// uses ICU4J's Collator, which makes its
- /// its version available, thus allowing collation to be versioned
- /// independently from the JVM. ICUCollationKeyAnalyzer is also significantly
- /// faster and generates significantly shorter keys than CollationKeyAnalyzer.
- /// See <a href="http://site.icu-project.org/charts/collation-icu4j-sun"
- /// >http://site.icu-project.org/charts/collation-icu4j-sun</a> for key
- /// generation timing and key length comparisons between ICU4J and
- /// java.text.Collator over several languages.
- /// </para>
- /// <para>
- /// CollationKeys generated by java.text.Collators are not compatible
- /// with those those generated by ICU Collators. Specifically, if you use
- /// CollationKeyAnalyzer to generate index terms, do not use
- /// ICUCollationKeyAnalyzer on the query side, or vice versa.
- /// </para>
- /// <a name="version"/>
- /// <para>You must specify the required <seealso cref="Version"/>
- /// compatibility when creating CollationKeyAnalyzer:
- /// <ul>
- /// <li> As of 4.0, Collation Keys are directly encoded as bytes. Previous
- /// versions will encode the bytes with <seealso cref="IndexableBinaryStringTools"/>.
- /// </ul>
- /// </para>
- /// </summary>
- public sealed class CollationKeyAnalyzer : Analyzer
+ /// <summary>
+ /// <para>
+ /// Configures <seealso cref="KeywordTokenizer"/> with <seealso cref="CollationAttributeFactory"/>.
+ /// </para>
+ /// <para>
+ /// Converts the token into its <seealso cref="java.text.CollationKey"/>, and then
+ /// encodes the CollationKey either directly or with
+ /// <seealso cref="IndexableBinaryStringTools"/> (see <a href="#version">below</a>), to allow
+ /// it to be stored as an index term.
+ /// </para>
+ /// <para>
+ /// <strong>WARNING:</strong> Make sure you use exactly the same Collator at
+ /// index and query time -- CollationKeys are only comparable when produced by
+ /// the same Collator. Since <seealso cref="java.text.RuleBasedCollator"/>s are not
+ /// independently versioned, it is unsafe to search against stored
+ /// CollationKeys unless the following are exactly the same (best practice is
+ /// to store this information with the index and check that they remain the
+ /// same at query time):
+ /// </para>
+ /// <ol>
+ /// <li>JVM vendor</li>
+ /// <li>JVM version, including patch version</li>
+ /// <li>
+ /// The language (and country and variant, if specified) of the Locale
+ /// used when constructing the collator via
+ /// <seealso cref="Collator#getInstance(java.util.Locale)"/>.
+ /// </li>
+ /// <li>
+ /// The collation strength used - see <seealso cref="Collator#setStrength(int)"/>
+ /// </li>
+ /// </ol>
+ /// <para>
+ /// The <code>ICUCollationKeyAnalyzer</code> in the analysis-icu package
+ /// uses ICU4J's Collator, which makes its
+ /// its version available, thus allowing collation to be versioned
+ /// independently from the JVM. ICUCollationKeyAnalyzer is also significantly
+ /// faster and generates significantly shorter keys than CollationKeyAnalyzer.
+ /// See <a href="http://site.icu-project.org/charts/collation-icu4j-sun"
+ /// >http://site.icu-project.org/charts/collation-icu4j-sun</a> for key
+ /// generation timing and key length comparisons between ICU4J and
+ /// java.text.Collator over several languages.
+ /// </para>
+ /// <para>
+ /// CollationKeys generated by java.text.Collators are not compatible
+ /// with those those generated by ICU Collators. Specifically, if you use
+ /// CollationKeyAnalyzer to generate index terms, do not use
+ /// ICUCollationKeyAnalyzer on the query side, or vice versa.
+ /// </para>
+ /// <a name="version"/>
+ /// <para>You must specify the required <seealso cref="Version"/>
+ /// compatibility when creating CollationKeyAnalyzer:
+ /// <ul>
+ /// <li> As of 4.0, Collation Keys are directly encoded as bytes. Previous
+ /// versions will encode the bytes with <seealso cref="IndexableBinaryStringTools"/>.
+ /// </ul>
+ /// </para>
+ /// </summary>
+ // LUCENENET TODO: A better option would be to contribute to the icu.net library and
+ // make it CLS compliant (at least the parts of it we use)
+ [CLSCompliant(false)]
+ public sealed class CollationKeyAnalyzer : Analyzer
{
private readonly Collator collator;
private readonly CollationAttributeFactory factory;
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/21b3d8b7/src/Lucene.Net.Analysis.Common/Collation/CollationKeyFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Collation/CollationKeyFilter.cs b/src/Lucene.Net.Analysis.Common/Collation/CollationKeyFilter.cs
index 5012e9c..6e684c1 100644
--- a/src/Lucene.Net.Analysis.Common/Collation/CollationKeyFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Collation/CollationKeyFilter.cs
@@ -70,7 +70,10 @@ namespace Lucene.Net.Collation
/// @deprecated Use <seealso cref="CollationAttributeFactory"/> instead, which encodes
/// terms directly as bytes. This filter will be removed in Lucene 5.0
[Obsolete("Use <seealso cref=\"CollationAttributeFactory\"/> instead, which encodes")]
- public sealed class CollationKeyFilter : TokenFilter
+ // LUCENENET TODO: A better option would be to contribute to the icu.net library and
+ // make it CLS compliant (at least the parts of it we use)
+ [CLSCompliant(false)]
+ public sealed class CollationKeyFilter : TokenFilter
{
private readonly Collator collator;
private readonly ICharTermAttribute termAtt;
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/21b3d8b7/src/Lucene.Net.Analysis.Common/Collation/TokenAttributes/CollatedTermAttributeImpl.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Collation/TokenAttributes/CollatedTermAttributeImpl.cs b/src/Lucene.Net.Analysis.Common/Collation/TokenAttributes/CollatedTermAttributeImpl.cs
index dc3b85e..a29a5e8 100644
--- a/src/Lucene.Net.Analysis.Common/Collation/TokenAttributes/CollatedTermAttributeImpl.cs
+++ b/src/Lucene.Net.Analysis.Common/Collation/TokenAttributes/CollatedTermAttributeImpl.cs
@@ -1,5 +1,6 @@
\ufeffusing Icu.Collation;
using Lucene.Net.Analysis.TokenAttributes;
+using System;
namespace Lucene.Net.Collation.TokenAttributes
{
@@ -20,11 +21,14 @@ namespace Lucene.Net.Collation.TokenAttributes
* limitations under the License.
*/
- /// <summary>
- /// Extension of <seealso cref="CharTermAttribute"/> that encodes the term
- /// text as a binary Unicode collation key instead of as UTF-8 bytes.
- /// </summary>
- public class CollatedTermAttributeImpl : CharTermAttribute
+ /// <summary>
+ /// Extension of <seealso cref="CharTermAttribute"/> that encodes the term
+ /// text as a binary Unicode collation key instead of as UTF-8 bytes.
+ /// </summary>
+ // LUCENENET TODO: A better option would be to contribute to the icu.net library and
+ // make it CLS compliant (at least the parts of it we use)
+ [CLSCompliant(false)]
+ public class CollatedTermAttributeImpl : CharTermAttribute
{
private readonly Collator collator;
[12/14] lucenenet git commit: Lucene.Net.Analysis.Common: Reverted
ThaiTokenizer, ThaiWordFilter,
and SegmentingTokenizerBase back to their original API and used
IcuBreakIterator as the backing BreakIterator.
Posted by ni...@apache.org.
Lucene.Net.Analysis.Common: Reverted ThaiTokenizer, ThaiWordFilter, and SegmentingTokenizerBase back to their original API and used IcuBreakIterator as the backing BreakIterator.
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/506f55a6
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/506f55a6
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/506f55a6
Branch: refs/heads/api-work
Commit: 506f55a64a9d82e1965da077cc38c64ecd9214eb
Parents: 5a7cb17
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Thu Feb 2 16:30:07 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Thu Feb 2 18:30:36 2017 +0700
----------------------------------------------------------------------
.../Analysis/Th/ThaiTokenizer.cs | 126 ++++++-------------
.../Analysis/Th/ThaiWordFilter.cs | 6 +-
.../Analysis/Util/SegmentingTokenizerBase.cs | 91 +++++---------
.../Analysis/Th/TestThaiAnalyzer.cs | 9 --
.../Util/TestSegmentingTokenizerBase.cs | 15 +--
5 files changed, 76 insertions(+), 171 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/506f55a6/src/Lucene.Net.Analysis.Common/Analysis/Th/ThaiTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Th/ThaiTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Th/ThaiTokenizer.cs
index 52f6750..ae3ab1a 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Th/ThaiTokenizer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Th/ThaiTokenizer.cs
@@ -1,13 +1,13 @@
-\ufeffusing Icu;
+\ufeff//using Icu;
using Lucene.Net.Analysis.TokenAttributes;
using Lucene.Net.Analysis.Util;
using Lucene.Net.Support;
using System;
using System.Collections.Generic;
+using System.Globalization;
using System.IO;
using System.Linq;
using System.Text.RegularExpressions;
-using BreakIterator = Icu.BreakIterator;
namespace Lucene.Net.Analysis.Th
{
@@ -43,17 +43,12 @@ namespace Lucene.Net.Analysis.Th
/// If this is false, this tokenizer will not work at all!
/// </summary>
public static readonly bool DBBI_AVAILABLE;
-
- //LUCENENET: Specifying to use Thai locale.
- private static readonly Locale LocaleThai = new Locale("th");
-
- private static readonly IEnumerable<Boundary> proto;
+ private static readonly BreakIterator proto = new IcuBreakIterator(Icu.BreakIterator.UBreakIteratorType.WORD, new CultureInfo("th"));
static ThaiTokenizer()
{
// check that we have a working dictionary-based break iterator for thai
- proto = BreakIterator.GetWordBoundaries(LocaleThai, "\u0e20\u0e32\u0e29\u0e32\u0e44\u0e17\u0e22", includeSpacesAndPunctuation: false).ToArray();
- var first = proto.FirstOrDefault();
- DBBI_AVAILABLE = first != default(Boundary) && first.End == 4;
+ proto.SetText("\u0e20\u0e32\u0e29\u0e32\u0e44\u0e17\u0e22");
+ DBBI_AVAILABLE = proto.IsBoundary(4);
}
private readonly ThaiWordBreaker wordBreaker;
@@ -75,18 +70,18 @@ namespace Lucene.Net.Analysis.Th
/// <summary>
/// Creates a new ThaiTokenizer, supplying the AttributeFactory </summary>
public ThaiTokenizer(AttributeFactory factory, TextReader reader)
- : base(factory, reader, LocaleThai, BreakIterator.UBreakIteratorType.SENTENCE)
+ : base(factory, reader, new IcuBreakIterator(Icu.BreakIterator.UBreakIteratorType.SENTENCE, new CultureInfo("th")))
{
if (!DBBI_AVAILABLE)
{
throw new System.NotSupportedException("This JRE does not have support for Thai segmentation");
}
- wordBreaker = new ThaiWordBreaker(LocaleUS);
+ wordBreaker = new ThaiWordBreaker(new IcuBreakIterator(Icu.BreakIterator.UBreakIteratorType.WORD, CultureInfo.InvariantCulture));
termAtt = AddAttribute<ICharTermAttribute>();
offsetAtt = AddAttribute<IOffsetAttribute>();
}
- protected internal override void SetNextSentence(int sentenceStart, int sentenceEnd)
+ protected override void SetNextSentence(int sentenceStart, int sentenceEnd)
{
this.sentenceStart = sentenceStart;
this.sentenceEnd = sentenceEnd;
@@ -94,7 +89,7 @@ namespace Lucene.Net.Analysis.Th
wordBreaker.SetText(new string(wrapper.Text, wrapper.Start, wrapper.Length));
}
- protected internal override bool IncrementWord()
+ protected override bool IncrementWord()
{
int start = wordBreaker.Current();
if (start == BreakIterator.DONE)
@@ -104,7 +99,7 @@ namespace Lucene.Net.Analysis.Th
// find the next set of boundaries, skipping over non-tokens
int end = wordBreaker.Next();
- while (end != BreakIterator.DONE && !char.IsLetterOrDigit((char)Support.Character.CodePointAt(m_buffer, sentenceStart + start, sentenceEnd)))
+ while (end != BreakIterator.DONE && !char.IsLetterOrDigit((char)Character.CodePointAt(m_buffer, sentenceStart + start, sentenceEnd)))
{
start = end;
end = wordBreaker.Next();
@@ -126,137 +121,86 @@ namespace Lucene.Net.Analysis.Th
/// LUCENENET specific class to patch the behavior of the ICU BreakIterator.
/// Corrects the breaking of words by finding transitions between Thai and non-Thai
/// characters.
- ///
- /// This logic assumes that the Java BreakIterator also breaks up Thai numerals from
- /// Arabic numerals (1, 2, 3, etc.). That is, it assumes the first test below passes
- /// and the second test fails in Lucene (not attempted).
- ///
- /// ThaiAnalyzer analyzer = new ThaiAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET);
- /// AssertAnalyzesTo(analyzer, "\u0e51\u0e52\u0e53456", new string[] { "\u0e51\u0e52\u0e53", "456" });
- /// AssertAnalyzesTo(analyzer, "\u0e51\u0e52\u0e53456", new string[] { "\u0e51\u0e52\u0e53456" });
/// </summary>
internal class ThaiWordBreaker
{
- private readonly Locale locale;
- private IEnumerator<Boundary> wordBreaker;
- private int currentIndex;
+ private readonly BreakIterator wordBreaker;
private string text;
private readonly IList<int> transitions = new List<int>();
private readonly static Regex thaiPattern = new Regex(@"\p{IsThai}", RegexOptions.Compiled | RegexOptions.CultureInvariant);
- public ThaiWordBreaker(Locale locale)
+ public ThaiWordBreaker(BreakIterator wordBreaker)
{
- if (locale == null)
+ if (wordBreaker == null)
{
- throw new ArgumentNullException("locale");
+ throw new ArgumentNullException("wordBreaker");
}
-
- this.locale = locale;
- currentIndex = int.MinValue;
+ this.wordBreaker = wordBreaker;
}
public void SetText(string text)
{
this.text = text;
- wordBreaker = BreakIterator.GetWordBoundaries(locale, text, includeSpacesAndPunctuation: false).ToList().GetEnumerator();
- currentIndex = wordBreaker.MoveNext()
- ? wordBreaker.Current.Start : BreakIterator.DONE;
+ wordBreaker.SetText(text);
}
public int Current()
{
- return currentIndex;
+ if (transitions.Any())
+ {
+ return transitions.First();
+ }
+ return wordBreaker.Current;
}
public int Next()
{
- // Tracking whether a transition was returned last time
- // next was called. If that is the case, and there are no
- // transitions left, then we return the End index in the
- // wordbreaker.Current
- bool transitionReturned = false;
-
if (transitions.Any())
{
- transitionReturned = currentIndex == transitions[0];
transitions.RemoveAt(0);
}
-
if (transitions.Any())
{
- currentIndex = transitions.First();
- return currentIndex;
- }
- else if (transitionReturned)
- {
- currentIndex = wordBreaker.Current.End;
+ return transitions.First();
}
-
return GetNext();
}
private int GetNext()
{
- bool isThaiLetter = false, isNonThaiLetter = false;
+ bool isThai = false, isNonThai = false;
bool prevWasThai = false, prevWasNonThai = false;
+ int prev = wordBreaker.Current;
+ int current = wordBreaker.Next();
- int previous = currentIndex;
- int current;
-
- if (currentIndex == wordBreaker.Current.Start)
- {
- current = wordBreaker.Current.End;
- }
- else if (wordBreaker.MoveNext())
- {
- // The break iterator works by returning the start and end
- // boundary of each word it finds. Consider the two words,
- //
- if (currentIndex == wordBreaker.Current.Start)
- {
- current = wordBreaker.Current.End;
- }
- else
- {
- current = wordBreaker.Current.Start;
- }
- }
- else
- {
- current = BreakIterator.DONE;
- }
-
- if (current != BreakIterator.DONE && current - previous > 0)
+ if (current != BreakIterator.DONE && current - prev > 0)
{
// Find all of the transitions between Thai and non-Thai characters and digits
- for (int i = previous; i < current; i++)
+ for (int i = prev; i < current; i++)
{
char c = text[i];
- isThaiLetter = char.IsLetter(c) && thaiPattern.IsMatch(c.ToString());
- isNonThaiLetter = char.IsLetter(c) && !isThaiLetter;
+ isThai = char.IsLetter(c) && thaiPattern.IsMatch(c.ToString());
+ isNonThai = char.IsLetter(c) && !isThai;
- if ((prevWasThai && isNonThaiLetter) ||
- (prevWasNonThai && isThaiLetter))
+ if ((prevWasThai && isNonThai) ||
+ (prevWasNonThai && isThai))
{
transitions.Add(i);
}
// record the values for comparison with the next loop
- prevWasThai = isThaiLetter;
- prevWasNonThai = isNonThaiLetter;
+ prevWasThai = isThai;
+ prevWasNonThai = isNonThai;
}
if (transitions.Any())
{
transitions.Add(current);
- currentIndex = transitions.First();
-
- return currentIndex;
+ return transitions.First();
}
}
- currentIndex = current;
- return currentIndex;
+ return current;
}
}
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/506f55a6/src/Lucene.Net.Analysis.Common/Analysis/Th/ThaiWordFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Th/ThaiWordFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Th/ThaiWordFilter.cs
index 2b21033..d55733a 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Th/ThaiWordFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Th/ThaiWordFilter.cs
@@ -1,9 +1,11 @@
-\ufeffusing Icu;
+\ufeff//using Icu;
using Lucene.Net.Analysis.Core;
using Lucene.Net.Analysis.TokenAttributes;
using Lucene.Net.Analysis.Util;
+using Lucene.Net.Support;
using Lucene.Net.Util;
using System;
+using System.Globalization;
using System.Text.RegularExpressions;
namespace Lucene.Net.Analysis.Th
@@ -48,7 +50,7 @@ namespace Lucene.Net.Analysis.Th
/// If this is false, this filter will not work at all!
/// </summary>
public static readonly bool DBBI_AVAILABLE = ThaiTokenizer.DBBI_AVAILABLE;
- private readonly ThaiWordBreaker breaker = new ThaiWordBreaker(new Locale());
+ private readonly ThaiWordBreaker breaker = new ThaiWordBreaker(new IcuBreakIterator(Icu.BreakIterator.UBreakIteratorType.WORD, new CultureInfo("th")));
private readonly CharArrayIterator charIterator = CharArrayIterator.NewWordInstance();
private readonly bool handlePosIncr;
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/506f55a6/src/Lucene.Net.Analysis.Common/Analysis/Util/SegmentingTokenizerBase.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/SegmentingTokenizerBase.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/SegmentingTokenizerBase.cs
index 502b7da..ca0b994 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Util/SegmentingTokenizerBase.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/SegmentingTokenizerBase.cs
@@ -1,13 +1,8 @@
-\ufeffusing System;
-using System.Collections;
-using System.Collections.Generic;
+\ufeffusing Lucene.Net.Analysis.TokenAttributes;
+using Lucene.Net.Support;
+using System;
using System.Diagnostics;
-using System.Linq;
using System.IO;
-using Icu;
-using Lucene.Net.Analysis.TokenAttributes;
-using Version = Lucene.Net.Util.LuceneVersion;
-
namespace Lucene.Net.Analysis.Util
{
@@ -28,9 +23,8 @@ namespace Lucene.Net.Analysis.Util
* limitations under the License.
*/
-
/// <summary>
- /// Breaks text into sentences with a <seealso cref="BreakIterator"/> and
+ /// Breaks text into sentences with a <see cref="BreakIterator"/> and
/// allows subclasses to decompose these sentences into words.
/// <para>
/// This can be used by subclasses that need sentence context
@@ -46,11 +40,8 @@ namespace Lucene.Net.Analysis.Util
/// </summary>
public abstract class SegmentingTokenizerBase : Tokenizer
{
- // LUCENENET: Using Icu .NET to get Local_US
- public static readonly Locale LocaleUS = new Locale("en-US");
-
- protected internal const int BUFFERMAX = 1024;
- protected internal readonly char[] m_buffer = new char[BUFFERMAX];
+ protected const int BUFFERMAX = 1024;
+ protected readonly char[] m_buffer = new char[BUFFERMAX];
/// <summary>
/// true length of text in the buffer </summary>
private int length = 0;
@@ -59,11 +50,9 @@ namespace Lucene.Net.Analysis.Util
private int usableLength = 0;
/// <summary>
/// accumulated offset of previous buffers for this reader, for offsetAtt </summary>
- protected internal int m_offset = 0;
+ protected int m_offset = 0;
- private readonly Locale locale;
- private readonly BreakIterator.UBreakIteratorType iteratorType;
- private IEnumerator<Boundary> enumerator;
+ private readonly BreakIterator iterator;
private readonly CharArrayIterator wrapper = CharArrayIterator.NewSentenceInstance();
private readonly IOffsetAttribute offsetAtt;
@@ -77,28 +66,19 @@ namespace Lucene.Net.Analysis.Util
/// be provided to this constructor.
/// </para>
/// </summary>
- protected SegmentingTokenizerBase(TextReader reader, BreakIterator.UBreakIteratorType iteratorType)
- : this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, reader, LocaleUS, iteratorType)
- { }
-
- protected SegmentingTokenizerBase(TextReader reader, Locale locale, BreakIterator.UBreakIteratorType iteratorType)
- : this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, reader, locale, iteratorType)
- { }
+ protected SegmentingTokenizerBase(TextReader reader, BreakIterator iterator)
+ : this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, reader, iterator)
+ {
+ }
/// <summary>
/// Construct a new SegmenterBase, also supplying the AttributeFactory
/// </summary>
- protected SegmentingTokenizerBase(AttributeFactory factory, TextReader reader, BreakIterator.UBreakIteratorType iteratorType)
- : this(factory, reader, LocaleUS, iteratorType)
- { }
-
- protected SegmentingTokenizerBase(AttributeFactory factory, TextReader reader, Locale locale, BreakIterator.UBreakIteratorType iteratorType)
+ protected SegmentingTokenizerBase(AttributeFactory factory, TextReader reader, BreakIterator iterator)
: base(factory, reader)
{
offsetAtt = AddAttribute<IOffsetAttribute>();
- this.iteratorType = iteratorType;
- this.locale = locale;
- enumerator = Enumerable.Empty<Boundary>().GetEnumerator();
+ this.iterator = iterator;
}
public override sealed bool IncrementToken()
@@ -122,7 +102,7 @@ namespace Lucene.Net.Analysis.Util
{
base.Reset();
wrapper.SetText(m_buffer, 0, 0);
- enumerator = Enumerable.Empty<Boundary>().GetEnumerator();
+ iterator.SetText(new string(wrapper.Text, wrapper.Start, wrapper.Length));
length = usableLength = m_offset = 0;
}
@@ -193,16 +173,7 @@ namespace Lucene.Net.Analysis.Util
}
wrapper.SetText(m_buffer, 0, Math.Max(0, usableLength));
-
- var text = new string(wrapper.Text, wrapper.Start, wrapper.Length);
-
- if (enumerator != null)
- {
- enumerator.Dispose();
- enumerator = null;
- }
-
- enumerator = BreakIterator.GetBoundaries(iteratorType, locale, text).ToList().GetEnumerator();
+ iterator.SetText(new string(wrapper.Text, wrapper.Start, wrapper.Length));
}
// TODO: refactor to a shared readFully somewhere
@@ -240,15 +211,22 @@ namespace Lucene.Net.Analysis.Util
while (true)
{
- if (!enumerator.MoveNext())
+ int start = iterator.Current;
+
+ if (start == BreakIterator.DONE)
{
- return false;
+ return false; // BreakIterator exhausted
}
- var current = enumerator.Current;
+ // find the next set of boundaries
+ int end_Renamed = iterator.Next();
- SetNextSentence(current.Start, current.End);
+ if (end_Renamed == BreakIterator.DONE)
+ {
+ return false; // BreakIterator exhausted
+ }
+ SetNextSentence(start, end_Renamed);
if (IncrementWord())
{
return true;
@@ -256,23 +234,12 @@ namespace Lucene.Net.Analysis.Util
}
}
- public override void Dispose()
- {
- if (enumerator != null)
- {
- enumerator.Dispose();
- enumerator = null;
- }
-
- base.Dispose();
- }
-
/// <summary>
/// Provides the next input sentence for analysis </summary>
- protected internal abstract void SetNextSentence(int sentenceStart, int sentenceEnd);
+ protected abstract void SetNextSentence(int sentenceStart, int sentenceEnd);
/// <summary>
/// Returns true if another word is available </summary>
- protected internal abstract bool IncrementWord();
+ protected abstract bool IncrementWord();
}
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/506f55a6/src/Lucene.Net.Tests.Analysis.Common/Analysis/Th/TestThaiAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Th/TestThaiAnalyzer.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Th/TestThaiAnalyzer.cs
index 3654c56..5ff98e2 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Th/TestThaiAnalyzer.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Th/TestThaiAnalyzer.cs
@@ -133,24 +133,15 @@ namespace Lucene.Net.Analysis.Th
[Test]
public virtual void TestRandomStrings()
{
- fail("This test is somehow crashing NUnit and causing it not to complete");
CheckRandomData(Random(), new ThaiAnalyzer(TEST_VERSION_CURRENT), 1000 * RANDOM_MULTIPLIER);
}
/// <summary>
/// blast some random large strings through the analyzer </summary>
///
-#if NETSTANDARD
- // NUnit does not have TimeoutAttribute for .NET Standard, so it can run for a long time.
- // https://github.com/nunit/nunit/issues/1658
- [LongRunningTest]
-#endif
[Test]
public virtual void TestRandomHugeStrings()
{
- // LUCENENET NOTE: This is NOT a long running test - it should only take a few seconds
- fail("This test is somehow crashing NUnit and causing it not to complete");
-
Random random = Random();
CheckRandomData(random, new ThaiAnalyzer(TEST_VERSION_CURRENT), 100 * RANDOM_MULTIPLIER, 8192);
}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/506f55a6/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestSegmentingTokenizerBase.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestSegmentingTokenizerBase.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestSegmentingTokenizerBase.cs
index d717edf..86fda8f 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestSegmentingTokenizerBase.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestSegmentingTokenizerBase.cs
@@ -4,7 +4,8 @@ using Lucene.Net.Analysis.TokenAttributes;
using Lucene.Net.Analysis.Util;
using Lucene.Net.Support;
using NUnit.Framework;
-\ufeffusing System;
+using System;
+using System.Globalization;
using System.IO;
using System.Text;
using BreakIterator = Icu.BreakIterator;
@@ -130,20 +131,20 @@ namespace Lucene.Net.Analysis.Util
internal IOffsetAttribute offsetAtt;
public WholeSentenceTokenizer(TextReader reader)
- : base(reader, new Locale("en-US"), BreakIterator.UBreakIteratorType.SENTENCE)
+ : base(reader, new IcuBreakIterator(BreakIterator.UBreakIteratorType.SENTENCE, CultureInfo.InvariantCulture) /*new Locale("en-US"), BreakIterator.UBreakIteratorType.SENTENCE*/)
{
termAtt = AddAttribute<ICharTermAttribute>();
offsetAtt = AddAttribute<IOffsetAttribute>();
}
- protected internal override void SetNextSentence(int sentenceStart, int sentenceEnd)
+ protected override void SetNextSentence(int sentenceStart, int sentenceEnd)
{
this.sentenceStart = sentenceStart;
this.sentenceEnd = sentenceEnd;
hasSentence = true;
}
- protected internal override bool IncrementWord()
+ protected override bool IncrementWord()
{
if (hasSentence)
{
@@ -175,14 +176,14 @@ namespace Lucene.Net.Analysis.Util
internal IPositionIncrementAttribute posIncAtt;
public SentenceAndWordTokenizer(TextReader reader)
- : base(reader, new Locale("en-US"), BreakIterator.UBreakIteratorType.SENTENCE)
+ : base(reader, new IcuBreakIterator(BreakIterator.UBreakIteratorType.SENTENCE, CultureInfo.InvariantCulture) /*new Locale("en-US"), BreakIterator.UBreakIteratorType.SENTENCE*/)
{
termAtt = AddAttribute<ICharTermAttribute>();
offsetAtt = AddAttribute<IOffsetAttribute>();
posIncAtt = AddAttribute<IPositionIncrementAttribute>();
}
- protected internal override void SetNextSentence(int sentenceStart, int sentenceEnd)
+ protected override void SetNextSentence(int sentenceStart, int sentenceEnd)
{
this.wordStart = this.wordEnd = this.sentenceStart = sentenceStart;
this.sentenceEnd = sentenceEnd;
@@ -195,7 +196,7 @@ namespace Lucene.Net.Analysis.Util
posBoost = -1;
}
- protected internal override bool IncrementWord()
+ protected override bool IncrementWord()
{
wordStart = wordEnd;
while (wordStart < sentenceEnd)
[05/14] lucenenet git commit: Lucene.Net.Core.Util.Version: Changed
.ToUpper(CultureInfo.InvariantCulture) > .ToUpperInvariant() (issue with .NET
Core)
Posted by ni...@apache.org.
Lucene.Net.Core.Util.Version: Changed .ToUpper(CultureInfo.InvariantCulture) > .ToUpperInvariant() (issue with .NET Core)
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/9525d45f
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/9525d45f
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/9525d45f
Branch: refs/heads/api-work
Commit: 9525d45f80a11617c6e478d362165fddaaa288e1
Parents: fc34ba7
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Thu Feb 2 12:51:06 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Thu Feb 2 18:30:31 2017 +0700
----------------------------------------------------------------------
src/Lucene.Net.Core/Util/Version.cs | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/9525d45f/src/Lucene.Net.Core/Util/Version.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Util/Version.cs b/src/Lucene.Net.Core/Util/Version.cs
index 9a9c6f9..59b4ac2 100644
--- a/src/Lucene.Net.Core/Util/Version.cs
+++ b/src/Lucene.Net.Core/Util/Version.cs
@@ -166,7 +166,7 @@ namespace Lucene.Net.Util
public static LuceneVersion ParseLeniently(string version)
{
- string parsedMatchVersion = version.ToUpper(CultureInfo.InvariantCulture);
+ string parsedMatchVersion = version.ToUpperInvariant();
LuceneVersion result;
Enum.TryParse(NumericVersion.Replace(parsedMatchVersion, "LUCENE_$1$2", 1), out result);
return result;