You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by ni...@apache.org on 2017/02/04 20:32:20 UTC
[01/39] lucenenet git commit: Lucene.Net.Analysis.Shingle refactor:
member accessibility and documentation comments
Repository: lucenenet
Updated Branches:
refs/heads/api-work f1631b143 -> ab81d9131
Lucene.Net.Analysis.Shingle refactor: member accessibility and documentation comments
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/6ea63fd3
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/6ea63fd3
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/6ea63fd3
Branch: refs/heads/api-work
Commit: 6ea63fd329aae75e455aaf1611692022ce6738ff
Parents: f1631b1
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Sat Feb 4 08:48:22 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Sat Feb 4 08:48:22 2017 +0700
----------------------------------------------------------------------
.../Analysis/Shingle/ShingleAnalyzerWrapper.cs | 13 ++-
.../Analysis/Shingle/ShingleFilter.cs | 103 ++++++++++---------
.../Analysis/Shingle/ShingleFilterFactory.cs | 5 +-
3 files changed, 63 insertions(+), 58 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/6ea63fd3/src/Lucene.Net.Analysis.Common/Analysis/Shingle/ShingleAnalyzerWrapper.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Shingle/ShingleAnalyzerWrapper.cs b/src/Lucene.Net.Analysis.Common/Analysis/Shingle/ShingleAnalyzerWrapper.cs
index b3634dc..50dad96 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Shingle/ShingleAnalyzerWrapper.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Shingle/ShingleAnalyzerWrapper.cs
@@ -29,7 +29,6 @@ namespace Lucene.Net.Analysis.Shingle
/// </summary>
public sealed class ShingleAnalyzerWrapper : AnalyzerWrapper
{
-
private readonly Analyzer @delegate;
private readonly int maxShingleSize;
private readonly int minShingleSize;
@@ -39,24 +38,24 @@ namespace Lucene.Net.Analysis.Shingle
private readonly string fillerToken;
public ShingleAnalyzerWrapper(Analyzer defaultAnalyzer)
- : this(defaultAnalyzer, ShingleFilter.DEFAULT_MAX_SHINGLE_SIZE)
+ : this(defaultAnalyzer, ShingleFilter.DEFAULT_MAX_SHINGLE_SIZE)
{
}
public ShingleAnalyzerWrapper(Analyzer defaultAnalyzer, int maxShingleSize)
- : this(defaultAnalyzer, ShingleFilter.DEFAULT_MIN_SHINGLE_SIZE, maxShingleSize)
+ : this(defaultAnalyzer, ShingleFilter.DEFAULT_MIN_SHINGLE_SIZE, maxShingleSize)
{
}
public ShingleAnalyzerWrapper(Analyzer defaultAnalyzer, int minShingleSize, int maxShingleSize)
- : this(defaultAnalyzer, minShingleSize, maxShingleSize, ShingleFilter.DEFAULT_TOKEN_SEPARATOR, true, false, ShingleFilter.DEFAULT_FILLER_TOKEN)
+ : this(defaultAnalyzer, minShingleSize, maxShingleSize, ShingleFilter.DEFAULT_TOKEN_SEPARATOR, true, false, ShingleFilter.DEFAULT_FILLER_TOKEN)
{
}
/// <summary>
- /// Creates a new ShingleAnalyzerWrapper
+ /// Creates a new <see cref="ShingleAnalyzerWrapper"/>
/// </summary>
- /// <param name="delegate"> Analyzer whose TokenStream is to be filtered </param>
+ /// <param name="delegate"> <see cref="Analyzer"/> whose <see cref="TokenStream"/> is to be filtered </param>
/// <param name="minShingleSize"> Min shingle (token ngram) size </param>
/// <param name="maxShingleSize"> Max shingle size </param>
/// <param name="tokenSeparator"> Used to separate input stream tokens in output shingles </param>
@@ -168,7 +167,7 @@ namespace Lucene.Net.Analysis.Shingle
}
}
- protected override Analyzer GetWrappedAnalyzer(string fieldName)
+ protected override sealed Analyzer GetWrappedAnalyzer(string fieldName)
{
return @delegate;
}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/6ea63fd3/src/Lucene.Net.Analysis.Common/Analysis/Shingle/ShingleFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Shingle/ShingleFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Shingle/ShingleFilter.cs
index 19b07a0..1153d9e 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Shingle/ShingleFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Shingle/ShingleFilter.cs
@@ -1,10 +1,9 @@
-\ufeffusing System;
+\ufeffusing Lucene.Net.Analysis.TokenAttributes;
+using Lucene.Net.Util;
+using System;
using System.Collections.Generic;
using System.IO;
using System.Text;
-using Lucene.Net.Analysis;
-using Lucene.Net.Util;
-using Lucene.Net.Analysis.TokenAttributes;
namespace Lucene.Net.Analysis.Shingle
{
@@ -26,7 +25,7 @@ namespace Lucene.Net.Analysis.Shingle
*/
/// <summary>
- /// <para>A ShingleFilter constructs shingles (token n-grams) from a token stream.
+ /// <para>A <see cref="ShingleFilter"/> constructs shingles (token n-grams) from a token stream.
/// In other words, it creates combinations of tokens as a single token.
///
/// </para>
@@ -148,7 +147,7 @@ namespace Lucene.Net.Analysis.Shingle
/// <summary>
/// true if no shingles have been output yet (for outputUnigramsIfNoShingles).
/// </summary>
- internal bool noShingleOutput = true;
+ private bool noShingleOutput = true;
/// <summary>
/// Holds the State after input.end() was called, so we can
@@ -164,14 +163,14 @@ namespace Lucene.Net.Analysis.Shingle
/// <summary>
- /// Constructs a ShingleFilter with the specified shingle size from the
- /// <see cref="TokenStream"/> <code>input</code>
+ /// Constructs a <see cref="ShingleFilter"/> with the specified shingle size from the
+ /// <see cref="TokenStream"/> <paramref name="input"/>
/// </summary>
/// <param name="input"> input stream </param>
/// <param name="minShingleSize"> minimum shingle size produced by the filter. </param>
/// <param name="maxShingleSize"> maximum shingle size produced by the filter. </param>
public ShingleFilter(TokenStream input, int minShingleSize, int maxShingleSize)
- : base(input)
+ : base(input)
{
SetMaxShingleSize(maxShingleSize);
SetMinShingleSize(minShingleSize);
@@ -183,33 +182,33 @@ namespace Lucene.Net.Analysis.Shingle
}
/// <summary>
- /// Constructs a ShingleFilter with the specified shingle size from the
- /// <see cref="TokenStream"/> <code>input</code>
+ /// Constructs a <see cref="ShingleFilter"/> with the specified shingle size from the
+ /// <see cref="TokenStream"/> <paramref name="input"/>
/// </summary>
/// <param name="input"> input stream </param>
/// <param name="maxShingleSize"> maximum shingle size produced by the filter. </param>
public ShingleFilter(TokenStream input, int maxShingleSize)
- : this(input, DEFAULT_MIN_SHINGLE_SIZE, maxShingleSize)
+ : this(input, DEFAULT_MIN_SHINGLE_SIZE, maxShingleSize)
{
}
/// <summary>
- /// Construct a ShingleFilter with default shingle size: 2.
+ /// Construct a <see cref="ShingleFilter"/> with default shingle size: 2.
/// </summary>
/// <param name="input"> input stream </param>
public ShingleFilter(TokenStream input)
- : this(input, DEFAULT_MIN_SHINGLE_SIZE, DEFAULT_MAX_SHINGLE_SIZE)
+ : this(input, DEFAULT_MIN_SHINGLE_SIZE, DEFAULT_MAX_SHINGLE_SIZE)
{
}
/// <summary>
- /// Construct a ShingleFilter with the specified token type for shingle tokens
+ /// Construct a <see cref="ShingleFilter"/> with the specified token type for shingle tokens
/// and the default shingle size: 2
/// </summary>
/// <param name="input"> input stream </param>
/// <param name="tokenType"> token type for shingle tokens </param>
public ShingleFilter(TokenStream input, string tokenType)
- : this(input, DEFAULT_MIN_SHINGLE_SIZE, DEFAULT_MAX_SHINGLE_SIZE)
+ : this(input, DEFAULT_MIN_SHINGLE_SIZE, DEFAULT_MAX_SHINGLE_SIZE)
{
SetTokenType(tokenType);
}
@@ -314,7 +313,7 @@ namespace Lucene.Net.Analysis.Shingle
{
bool tokenAvailable = false;
int builtGramSize = 0;
- if (gramSize.atMinValue() || inputWindow.Count < gramSize.Value)
+ if (gramSize.AtMinValue() || inputWindow.Count < gramSize.Value)
{
ShiftInputWindow();
gramBuilder.Length = 0;
@@ -344,7 +343,7 @@ namespace Lucene.Net.Analysis.Shingle
{
if (gramNum == gramSize.Value)
{
- gramSize.advance();
+ gramSize.Advance();
}
}
else
@@ -365,7 +364,7 @@ namespace Lucene.Net.Analysis.Shingle
offsetAtt.SetOffset(offsetAtt.StartOffset, nextToken.offsetAtt.EndOffset);
posLenAtt.PositionLength = builtGramSize;
isOutputHere = true;
- gramSize.advance();
+ gramSize.Advance();
tokenAvailable = true;
}
}
@@ -377,8 +376,8 @@ namespace Lucene.Net.Analysis.Shingle
/// <summary>
/// <para>Get the next token from the input stream.
/// </para>
- /// <para>If the next token has <code>positionIncrement > 1</code>,
- /// <code>positionIncrement - 1</code> <see cref="#fillerToken"/>s are
+ /// <para>If the next token has <c>positionIncrement > 1</c>,
+ /// <c>positionIncrement - 1</c> <see cref="fillerToken"/>s are
/// inserted first.
/// </para>
/// </summary>
@@ -496,11 +495,11 @@ namespace Lucene.Net.Analysis.Shingle
}
/// <summary>
- /// <para>Fills <see cref="#inputWindow"/> with input stream tokens, if available,
+ /// <para>Fills <see cref="inputWindow"/> with input stream tokens, if available,
/// shifting to the right if the window was previously full.
/// </para>
- /// <para>Resets <see cref="#gramSize"/> to its minimum value.
- ///
+ /// <para>
+ /// Resets <see cref="gramSize"/> to its minimum value.
/// </para>
/// </summary>
/// <exception cref="IOException"> if there's a problem getting the next token </exception>
@@ -510,7 +509,7 @@ namespace Lucene.Net.Analysis.Shingle
if (inputWindow.Count > 0)
{
firstToken = inputWindow.First.Value;
- inputWindow.RemoveFirst();
+ inputWindow.RemoveFirst(); // LUCENENET TODO: Safer if we remove the .First.Value from the previous line (do this across the solution) - extension method?
}
while (inputWindow.Count < maxShingleSize)
{
@@ -539,18 +538,18 @@ namespace Lucene.Net.Analysis.Shingle
}
}
}
- if (outputUnigramsIfNoShingles && noShingleOutput && gramSize.minValue > 1 && inputWindow.Count < minShingleSize)
+ if (outputUnigramsIfNoShingles && noShingleOutput && gramSize.MinValue > 1 && inputWindow.Count < minShingleSize)
{
- gramSize.minValue = 1;
+ gramSize.MinValue = 1;
}
- gramSize.reset();
+ gramSize.Reset();
isOutputHere = false;
}
public override void Reset()
{
base.Reset();
- gramSize.reset();
+ gramSize.Reset();
inputWindow.Clear();
nextInputStreamToken = null;
isNextInputStreamToken = false;
@@ -562,7 +561,7 @@ namespace Lucene.Net.Analysis.Shingle
if (outputUnigramsIfNoShingles && !outputUnigrams)
{
// Fix up gramSize if minValue was reset for outputUnigramsIfNoShingles
- gramSize.minValue = minShingleSize;
+ gramSize.MinValue = minShingleSize;
}
}
@@ -570,32 +569,32 @@ namespace Lucene.Net.Analysis.Shingle
/// <summary>
/// <para>An instance of this class is used to maintain the number of input
/// stream tokens that will be used to compose the next unigram or shingle:
- /// <see cref="#gramSize"/>.
+ /// <see cref="gramSize"/>.
/// </para>
/// <para><code>gramSize</code> will take on values from the circular sequence
- /// <b>{ [ 1, ] <see cref="#minShingleSize"/> [ , ... , <see cref="#maxShingleSize"/> ] }</b>.
+ /// <b>{ [ 1, ] <see cref="minShingleSize"/> [ , ... , <see cref="maxShingleSize"/> ] }</b>.
/// </para>
/// <para>1 is included in the circular sequence only if
- /// <see cref="#outputUnigrams"/> = true.
+ /// <see cref="outputUnigrams"/> = true.
/// </para>
/// </summary>
private class CircularSequence
{
private readonly ShingleFilter outerInstance;
- internal int value;
- internal int previousValue;
- internal int minValue;
+ private int value;
+ private int previousValue;
+ private int minValue;
public CircularSequence(ShingleFilter outerInstance)
{
this.outerInstance = outerInstance;
minValue = outerInstance.outputUnigrams ? 1 : outerInstance.minShingleSize;
- reset();
+ Reset();
}
/// <returns> the current value. </returns>
- /// <seealso cref= #advance() </seealso>
+ /// <seealso cref="Advance()"/>
public virtual int Value
{
get
@@ -608,13 +607,13 @@ namespace Lucene.Net.Analysis.Shingle
/// <para>Increments this circular number's value to the next member in the
/// circular sequence
/// <code>gramSize</code> will take on values from the circular sequence
- /// <b>{ [ 1, ] <see cref="#minShingleSize"/> [ , ... , <see cref="#maxShingleSize"/> ] }</b>.
+ /// <b>{ [ 1, ] <see cref="minShingleSize"/> [ , ... , <see cref="maxShingleSize"/> ] }</b>.
/// </para>
/// <para>1 is included in the circular sequence only if
- /// <see cref="#outputUnigrams"/> = true.
+ /// <see cref="outputUnigrams"/> = true.
/// </para>
/// </summary>
- public virtual void advance()
+ public virtual void Advance()
{
previousValue = value;
if (value == 1)
@@ -623,7 +622,7 @@ namespace Lucene.Net.Analysis.Shingle
}
else if (value == outerInstance.maxShingleSize)
{
- reset();
+ Reset();
}
else
{
@@ -636,13 +635,13 @@ namespace Lucene.Net.Analysis.Shingle
/// circular sequence
/// </para>
/// <para><code>gramSize</code> will take on values from the circular sequence
- /// <b>{ [ 1, ] <see cref="#minShingleSize"/> [ , ... , <see cref="#maxShingleSize"/> ] }</b>.
+ /// <b>{ [ 1, ] <see cref="minShingleSize"/> [ , ... , <see cref="maxShingleSize"/> ] }</b>.
/// </para>
/// <para>1 is included in the circular sequence only if
- /// <see cref="#outputUnigrams"/> = true.
+ /// <see cref="outputUnigrams"/> = true.
/// </para>
/// </summary>
- public virtual void reset()
+ public virtual void Reset()
{
previousValue = value = minValue;
}
@@ -651,19 +650,19 @@ namespace Lucene.Net.Analysis.Shingle
/// <para>Returns true if the current value is the first member of the circular
/// sequence.
/// </para>
- /// <para>If <see cref="#outputUnigrams"/> = true, the first member of the circular
- /// sequence will be 1; otherwise, it will be <see cref="#minShingleSize"/>.
+ /// <para>If <see cref="outputUnigrams"/> = true, the first member of the circular
+ /// sequence will be 1; otherwise, it will be <see cref="minShingleSize"/>.
///
/// </para>
/// </summary>
/// <returns> true if the current value is the first member of the circular
/// sequence; false otherwise </returns>
- public virtual bool atMinValue()
+ public virtual bool AtMinValue()
{
return value == minValue;
}
- /// <returns> the value this instance had before the last advance() call </returns>
+ /// <returns> the value this instance had before the last <see cref="Advance()"/> call </returns>
public virtual int PreviousValue
{
get
@@ -671,6 +670,12 @@ namespace Lucene.Net.Analysis.Shingle
return previousValue;
}
}
+
+ internal virtual int MinValue // LUCENENET specific - added to encapsulate minValue field
+ {
+ get { return minValue; }
+ set { minValue = value; }
+ }
}
private class InputWindowToken
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/6ea63fd3/src/Lucene.Net.Analysis.Common/Analysis/Shingle/ShingleFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Shingle/ShingleFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Shingle/ShingleFilterFactory.cs
index 782fb83..58889ec 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Shingle/ShingleFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Shingle/ShingleFilterFactory.cs
@@ -42,8 +42,9 @@ namespace Lucene.Net.Analysis.Shingle
private readonly string fillerToken;
/// <summary>
- /// Creates a new ShingleFilterFactory </summary>
- public ShingleFilterFactory(IDictionary<string, string> args) : base(args)
+ /// Creates a new <see cref="ShingleFilterFactory"/> </summary>
+ public ShingleFilterFactory(IDictionary<string, string> args)
+ : base(args)
{
maxShingleSize = GetInt(args, "maxShingleSize", ShingleFilter.DEFAULT_MAX_SHINGLE_SIZE);
if (maxShingleSize < 2)
[24/39] lucenenet git commit:
Lucene.Net.Analysis.Miscellaneous.StemmerOverrideFilter refactor: BytesReader
> GetBytesReader() (returns new instance)
Posted by ni...@apache.org.
Lucene.Net.Analysis.Miscellaneous.StemmerOverrideFilter refactor: BytesReader > GetBytesReader() (returns new instance)
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/de106d90
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/de106d90
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/de106d90
Branch: refs/heads/api-work
Commit: de106d905c8c29d239bacd85d3cc27cc5218e171
Parents: 0697c7a
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Sun Feb 5 00:59:11 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Sun Feb 5 00:59:11 2017 +0700
----------------------------------------------------------------------
.../Miscellaneous/StemmerOverrideFilter.cs | 19 ++++++++-----------
1 file changed, 8 insertions(+), 11 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/de106d90/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/StemmerOverrideFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/StemmerOverrideFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/StemmerOverrideFilter.cs
index d0f6618..2eec23f 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/StemmerOverrideFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/StemmerOverrideFilter.cs
@@ -49,7 +49,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
: base(input)
{
this.stemmerOverrideMap = stemmerOverrideMap;
- fstReader = stemmerOverrideMap.BytesReader;
+ fstReader = stemmerOverrideMap.GetBytesReader();
termAtt = AddAttribute<ICharTermAttribute>();
keywordAtt = AddAttribute<IKeywordAttribute>();
}
@@ -109,18 +109,15 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// <summary>
/// Returns a <see cref="FST.BytesReader"/> to pass to the <see cref="Get(char[], int, FST.Arc{BytesRef}, FST.BytesReader)"/> method.
/// </summary>
- public FST.BytesReader BytesReader // LUCENENET TODO: Change to GetBytesReader()
+ public FST.BytesReader GetBytesReader()
{
- get
+ if (fst == null)
{
- if (fst == null)
- {
- return null;
- }
- else
- {
- return fst.GetBytesReader();
- }
+ return null;
+ }
+ else
+ {
+ return fst.GetBytesReader();
}
}
[37/39] lucenenet git commit: Lucene.Net.Analysis.Ngram - renamed
NGram in Git
Posted by ni...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/ab81d913/src/Lucene.Net.Tests.Analysis.Common/Analysis/NGram/EdgeNGramTokenizerTest.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/NGram/EdgeNGramTokenizerTest.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/NGram/EdgeNGramTokenizerTest.cs
new file mode 100644
index 0000000..4ccecfa
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/NGram/EdgeNGramTokenizerTest.cs
@@ -0,0 +1,278 @@
+\ufeffusing Lucene.Net.Support;
+using Lucene.Net.Util;
+using NUnit.Framework;
+using System.IO;
+using Reader = System.IO.TextReader;
+using Version = Lucene.Net.Util.LuceneVersion;
+
+namespace Lucene.Net.Analysis.NGram
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// Tests <seealso cref="EdgeNGramTokenizer"/> for correctness.
+ /// </summary>
+ public class EdgeNGramTokenizerTest : BaseTokenStreamTestCase
+ {
+ private StringReader input;
+
+ public override void SetUp()
+ {
+ base.SetUp();
+ input = new StringReader("abcde");
+ }
+
+ [Test]
+ public virtual void TestInvalidInput()
+ {
+ bool gotException = false;
+ try
+ {
+ new EdgeNGramTokenizer(TEST_VERSION_CURRENT, input, 0, 0);
+ }
+ catch (System.ArgumentException)
+ {
+ gotException = true;
+ }
+ assertTrue(gotException);
+ }
+
+ [Test]
+ public virtual void TestInvalidInput2()
+ {
+ bool gotException = false;
+ try
+ {
+ new EdgeNGramTokenizer(TEST_VERSION_CURRENT, input, 2, 1);
+ }
+ catch (System.ArgumentException)
+ {
+ gotException = true;
+ }
+ assertTrue(gotException);
+ }
+
+ [Test]
+ public virtual void TestInvalidInput3()
+ {
+ bool gotException = false;
+ try
+ {
+ new EdgeNGramTokenizer(TEST_VERSION_CURRENT, input, -1, 2);
+ }
+ catch (System.ArgumentException)
+ {
+ gotException = true;
+ }
+ assertTrue(gotException);
+ }
+
+ [Test]
+ public virtual void TestFrontUnigram()
+ {
+ EdgeNGramTokenizer tokenizer = new EdgeNGramTokenizer(TEST_VERSION_CURRENT, input, 1, 1);
+ AssertTokenStreamContents(tokenizer, new string[] { "a" }, new int[] { 0 }, new int[] { 1 }, 5); // abcde
+ }
+
+ [Test]
+ public virtual void TestBackUnigram()
+ {
+#pragma warning disable 612, 618
+ Tokenizer tokenizer = new Lucene43EdgeNGramTokenizer(Version.LUCENE_43, input, Lucene43EdgeNGramTokenizer.Side.BACK, 1, 1);
+#pragma warning restore 612, 618
+ AssertTokenStreamContents(tokenizer, new string[] { "e" }, new int[] { 4 }, new int[] { 5 }, 5); // abcde
+ }
+
+ [Test]
+ public virtual void TestOversizedNgrams()
+ {
+ EdgeNGramTokenizer tokenizer = new EdgeNGramTokenizer(TEST_VERSION_CURRENT, input, 6, 6);
+ AssertTokenStreamContents(tokenizer, new string[0], new int[0], new int[0], 5); // abcde
+ }
+
+ [Test]
+ public virtual void TestFrontRangeOfNgrams()
+ {
+ EdgeNGramTokenizer tokenizer = new EdgeNGramTokenizer(TEST_VERSION_CURRENT, input, 1, 3);
+ AssertTokenStreamContents(tokenizer, new string[] { "a", "ab", "abc" }, new int[] { 0, 0, 0 }, new int[] { 1, 2, 3 }, 5); // abcde
+ }
+
+ [Test]
+ public virtual void TestBackRangeOfNgrams()
+ {
+#pragma warning disable 612, 618
+ Tokenizer tokenizer = new Lucene43EdgeNGramTokenizer(Version.LUCENE_43, input, Lucene43EdgeNGramTokenizer.Side.BACK, 1, 3);
+#pragma warning restore 612, 618
+ AssertTokenStreamContents(tokenizer, new string[] { "e", "de", "cde" }, new int[] { 4, 3, 2 }, new int[] { 5, 5, 5 }, null, null, null, 5, false); // abcde
+ }
+
+ [Test]
+ public virtual void TestReset()
+ {
+ EdgeNGramTokenizer tokenizer = new EdgeNGramTokenizer(TEST_VERSION_CURRENT, input, 1, 3);
+ AssertTokenStreamContents(tokenizer, new string[] { "a", "ab", "abc" }, new int[] { 0, 0, 0 }, new int[] { 1, 2, 3 }, 5); // abcde
+ tokenizer.SetReader(new StringReader("abcde"));
+ AssertTokenStreamContents(tokenizer, new string[] { "a", "ab", "abc" }, new int[] { 0, 0, 0 }, new int[] { 1, 2, 3 }, 5); // abcde
+ }
+
+ /// <summary>
+ /// blast some random strings through the analyzer </summary>
+ [Test]
+ public virtual void TestRandomStrings()
+ {
+ for (int i = 0; i < 10; i++)
+ {
+ int min = TestUtil.NextInt(Random(), 2, 10);
+ int max = TestUtil.NextInt(Random(), min, 20);
+
+ Analyzer a = new AnalyzerAnonymousInnerClassHelper(this, min, max);
+ CheckRandomData(Random(), a, 100 * RANDOM_MULTIPLIER, 20);
+ CheckRandomData(Random(), a, 10 * RANDOM_MULTIPLIER, 8192);
+ }
+
+ Analyzer b = new AnalyzerAnonymousInnerClassHelper2(this);
+ CheckRandomData(Random(), b, 1000 * RANDOM_MULTIPLIER, 20, false, false);
+ CheckRandomData(Random(), b, 100 * RANDOM_MULTIPLIER, 8192, false, false);
+ }
+
+ private class AnalyzerAnonymousInnerClassHelper : Analyzer
+ {
+ private readonly EdgeNGramTokenizerTest outerInstance;
+
+ private int min;
+ private int max;
+
+ public AnalyzerAnonymousInnerClassHelper(EdgeNGramTokenizerTest outerInstance, int min, int max)
+ {
+ this.outerInstance = outerInstance;
+ this.min = min;
+ this.max = max;
+ }
+
+ protected internal override TokenStreamComponents CreateComponents(string fieldName, Reader reader)
+ {
+ Tokenizer tokenizer = new EdgeNGramTokenizer(TEST_VERSION_CURRENT, reader, min, max);
+ return new TokenStreamComponents(tokenizer, tokenizer);
+ }
+ }
+
+ private class AnalyzerAnonymousInnerClassHelper2 : Analyzer
+ {
+ private readonly EdgeNGramTokenizerTest outerInstance;
+
+ public AnalyzerAnonymousInnerClassHelper2(EdgeNGramTokenizerTest outerInstance)
+ {
+ this.outerInstance = outerInstance;
+ }
+
+ protected internal override TokenStreamComponents CreateComponents(string fieldName, Reader reader)
+ {
+#pragma warning disable 612, 618
+ Tokenizer tokenizer = new Lucene43EdgeNGramTokenizer(Version.LUCENE_43, reader, Lucene43EdgeNGramTokenizer.Side.BACK, 2, 4);
+#pragma warning restore 612, 618
+ return new TokenStreamComponents(tokenizer, tokenizer);
+ }
+ }
+
+ [Test]
+ public virtual void TestTokenizerPositions()
+ {
+#pragma warning disable 612, 618
+ Tokenizer tokenizer = new Lucene43EdgeNGramTokenizer(Version.LUCENE_43, input, Lucene43EdgeNGramTokenizer.Side.FRONT, 1, 3);
+#pragma warning restore 612, 618
+ AssertTokenStreamContents(tokenizer, new string[] { "a", "ab", "abc" }, new int[] { 0, 0, 0 }, new int[] { 1, 2, 3 }, null, new int[] { 1, 0, 0 }, null, null, false);
+
+ tokenizer = new EdgeNGramTokenizer(TEST_VERSION_CURRENT, new StringReader("abcde"), 1, 3);
+ AssertTokenStreamContents(tokenizer, new string[] { "a", "ab", "abc" }, new int[] { 0, 0, 0 }, new int[] { 1, 2, 3 }, null, new int[] { 1, 1, 1 }, null, null, false);
+ }
+
+ private static void TestNGrams(int minGram, int maxGram, int length, string nonTokenChars)
+ {
+ // LUCENENET TODO: Changed randomizing strategy - not sure if this is right...
+ //string s = RandomStrings.randomAsciiOfLength(Random(), length);
+ string s = TestUtil.RandomAnalysisString(Random(), length, true);
+ TestNGrams(minGram, maxGram, s, nonTokenChars);
+ }
+
+ private static void TestNGrams(int minGram, int maxGram, string s, string nonTokenChars)
+ {
+ NGramTokenizerTest.TestNGrams(minGram, maxGram, s, nonTokenChars, true);
+ }
+
+ [Test]
+ public virtual void TestLargeInput()
+ {
+ // test sliding
+ int minGram = TestUtil.NextInt(Random(), 1, 100);
+ int maxGram = TestUtil.NextInt(Random(), minGram, 100);
+ TestNGrams(minGram, maxGram, TestUtil.NextInt(Random(), 3 * 1024, 4 * 1024), "");
+ }
+
+ [Test]
+ public virtual void TestLargeMaxGram()
+ {
+ // test sliding with maxGram > 1024
+ int minGram = TestUtil.NextInt(Random(), 1290, 1300);
+ int maxGram = TestUtil.NextInt(Random(), minGram, 1300);
+ TestNGrams(minGram, maxGram, TestUtil.NextInt(Random(), 3 * 1024, 4 * 1024), "");
+ }
+
+ [Test]
+ public virtual void TestPreTokenization()
+ {
+ int minGram = TestUtil.NextInt(Random(), 1, 100);
+ int maxGram = TestUtil.NextInt(Random(), minGram, 100);
+ TestNGrams(minGram, maxGram, TestUtil.NextInt(Random(), 0, 4 * 1024), "a");
+ }
+
+ [Test]
+ public virtual void TestHeavyPreTokenization()
+ {
+ int minGram = TestUtil.NextInt(Random(), 1, 100);
+ int maxGram = TestUtil.NextInt(Random(), minGram, 100);
+ TestNGrams(minGram, maxGram, TestUtil.NextInt(Random(), 0, 4 * 1024), "abcdef");
+ }
+
+ [Test]
+ public virtual void TestFewTokenChars()
+ {
+ char[] chrs = new char[TestUtil.NextInt(Random(), 4000, 5000)];
+ Arrays.Fill(chrs, ' ');
+ for (int i = 0; i < chrs.Length; ++i)
+ {
+ if (Random().NextDouble() < 0.1)
+ {
+ chrs[i] = 'a';
+ }
+ }
+ int minGram = TestUtil.NextInt(Random(), 1, 2);
+ int maxGram = TestUtil.NextInt(Random(), minGram, 2);
+ TestNGrams(minGram, maxGram, new string(chrs), " ");
+ }
+
+ [Test]
+ public virtual void TestFullUTF8Range()
+ {
+ int minGram = TestUtil.NextInt(Random(), 1, 100);
+ int maxGram = TestUtil.NextInt(Random(), minGram, 100);
+ string s = TestUtil.RandomUnicodeString(Random(), 4 * 1024);
+ TestNGrams(minGram, maxGram, s, "");
+ TestNGrams(minGram, maxGram, s, "abcdef");
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/ab81d913/src/Lucene.Net.Tests.Analysis.Common/Analysis/NGram/NGramTokenFilterTest.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/NGram/NGramTokenFilterTest.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/NGram/NGramTokenFilterTest.cs
new file mode 100644
index 0000000..e485fc0
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/NGram/NGramTokenFilterTest.cs
@@ -0,0 +1,249 @@
+\ufeffusing Lucene.Net.Analysis.Core;
+using Lucene.Net.Analysis.Miscellaneous;
+using Lucene.Net.Analysis.TokenAttributes;
+using Lucene.Net.Support;
+using Lucene.Net.Util;
+using NUnit.Framework;
+using System;
+using System.IO;
+
+namespace Lucene.Net.Analysis.NGram
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// Tests <seealso cref="NGramTokenFilter"/> for correctness.
+ /// </summary>
+ public class NGramTokenFilterTest : BaseTokenStreamTestCase
+ {
+ private TokenStream input;
+
+ public override void SetUp()
+ {
+ base.SetUp();
+ input = new MockTokenizer(new StringReader("abcde"), MockTokenizer.WHITESPACE, false);
+ }
+
+ [Test]
+ public virtual void TestInvalidInput()
+ {
+ bool gotException = false;
+ try
+ {
+ new NGramTokenFilter(TEST_VERSION_CURRENT, input, 2, 1);
+ }
+ catch (System.ArgumentException)
+ {
+ gotException = true;
+ }
+ assertTrue(gotException);
+ }
+
+ [Test]
+ public virtual void TestInvalidInput2()
+ {
+ bool gotException = false;
+ try
+ {
+ new NGramTokenFilter(TEST_VERSION_CURRENT, input, 0, 1);
+ }
+ catch (System.ArgumentException)
+ {
+ gotException = true;
+ }
+ assertTrue(gotException);
+ }
+
+ [Test]
+ public virtual void TestUnigrams()
+ {
+ NGramTokenFilter filter = new NGramTokenFilter(TEST_VERSION_CURRENT, input, 1, 1);
+ AssertTokenStreamContents(filter, new string[] { "a", "b", "c", "d", "e" }, new int[] { 0, 0, 0, 0, 0 }, new int[] { 5, 5, 5, 5, 5 }, new int[] { 1, 0, 0, 0, 0 });
+ }
+
+ [Test]
+ public virtual void TestBigrams()
+ {
+ NGramTokenFilter filter = new NGramTokenFilter(TEST_VERSION_CURRENT, input, 2, 2);
+ AssertTokenStreamContents(filter, new string[] { "ab", "bc", "cd", "de" }, new int[] { 0, 0, 0, 0 }, new int[] { 5, 5, 5, 5 }, new int[] { 1, 0, 0, 0 });
+ }
+
+ [Test]
+ public virtual void TestNgrams()
+ {
+ NGramTokenFilter filter = new NGramTokenFilter(TEST_VERSION_CURRENT, input, 1, 3);
+ AssertTokenStreamContents(filter, new string[] { "a", "ab", "abc", "b", "bc", "bcd", "c", "cd", "cde", "d", "de", "e" }, new int[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, new int[] { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5 }, null, new int[] { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, null, null, false);
+ }
+
+ [Test]
+ public virtual void TestNgramsNoIncrement()
+ {
+ NGramTokenFilter filter = new NGramTokenFilter(TEST_VERSION_CURRENT, input, 1, 3);
+ AssertTokenStreamContents(filter, new string[] { "a", "ab", "abc", "b", "bc", "bcd", "c", "cd", "cde", "d", "de", "e" }, new int[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, new int[] { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5 }, null, new int[] { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, null, null, false);
+ }
+
+ [Test]
+ public virtual void TestOversizedNgrams()
+ {
+ NGramTokenFilter filter = new NGramTokenFilter(TEST_VERSION_CURRENT, input, 6, 7);
+ AssertTokenStreamContents(filter, new string[0], new int[0], new int[0]);
+ }
+
+ [Test]
+ public virtual void TestSmallTokenInStream()
+ {
+ input = new MockTokenizer(new StringReader("abc de fgh"), MockTokenizer.WHITESPACE, false);
+ NGramTokenFilter filter = new NGramTokenFilter(TEST_VERSION_CURRENT, input, 3, 3);
+ AssertTokenStreamContents(filter, new string[] { "abc", "fgh" }, new int[] { 0, 7 }, new int[] { 3, 10 }, new int[] { 1, 2 });
+ }
+
+ [Test]
+ public virtual void TestReset()
+ {
+ WhitespaceTokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("abcde"));
+ NGramTokenFilter filter = new NGramTokenFilter(TEST_VERSION_CURRENT, tokenizer, 1, 1);
+ AssertTokenStreamContents(filter, new string[] { "a", "b", "c", "d", "e" }, new int[] { 0, 0, 0, 0, 0 }, new int[] { 5, 5, 5, 5, 5 }, new int[] { 1, 0, 0, 0, 0 });
+ tokenizer.SetReader(new StringReader("abcde"));
+ AssertTokenStreamContents(filter, new string[] { "a", "b", "c", "d", "e" }, new int[] { 0, 0, 0, 0, 0 }, new int[] { 5, 5, 5, 5, 5 }, new int[] { 1, 0, 0, 0, 0 });
+ }
+
+ // LUCENE-3642
+ // EdgeNgram blindly adds term length to offset, but this can take things out of bounds
+ // wrt original text if a previous filter increases the length of the word (in this case � -> ae)
+ // so in this case we behave like WDF, and preserve any modified offsets
+ [Test]
+ public virtual void TestInvalidOffsets()
+ {
+ Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper(this);
+ AssertAnalyzesTo(analyzer, "mosfellsb�r", new string[] { "mo", "os", "sf", "fe", "el", "ll", "ls", "sb", "ba", "ae", "er" }, new int[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, new int[] { 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11 }, new int[] { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 });
+ }
+
+ private class AnalyzerAnonymousInnerClassHelper : Analyzer
+ {
+ private readonly NGramTokenFilterTest outerInstance;
+
+ public AnalyzerAnonymousInnerClassHelper(NGramTokenFilterTest outerInstance)
+ {
+ this.outerInstance = outerInstance;
+ }
+
+ protected internal override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
+ {
+ Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ TokenFilter filters = new ASCIIFoldingFilter(tokenizer);
+ filters = new NGramTokenFilter(TEST_VERSION_CURRENT, filters, 2, 2);
+ return new TokenStreamComponents(tokenizer, filters);
+ }
+ }
+
+ /// <summary>
+ /// blast some random strings through the analyzer </summary>
+ [Test]
+ public virtual void TestRandomStrings()
+ {
+ for (int i = 0; i < 10; i++)
+ {
+ int min = TestUtil.NextInt(Random(), 2, 10);
+ int max = TestUtil.NextInt(Random(), min, 20);
+ Analyzer a = new AnalyzerAnonymousInnerClassHelper2(this, min, max);
+ CheckRandomData(Random(), a, 200 * RANDOM_MULTIPLIER, 20);
+ }
+ }
+
+ private class AnalyzerAnonymousInnerClassHelper2 : Analyzer
+ {
+ private readonly NGramTokenFilterTest outerInstance;
+
+ private int min;
+ private int max;
+
+ public AnalyzerAnonymousInnerClassHelper2(NGramTokenFilterTest outerInstance, int min, int max)
+ {
+ this.outerInstance = outerInstance;
+ this.min = min;
+ this.max = max;
+ }
+
+ protected internal override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
+ {
+ Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ return new TokenStreamComponents(tokenizer, new NGramTokenFilter(TEST_VERSION_CURRENT, tokenizer, min, max));
+ }
+ }
+
+ [Test]
+ public virtual void TestEmptyTerm()
+ {
+ Random random = Random();
+ Analyzer a = new AnalyzerAnonymousInnerClassHelper3(this);
+ CheckAnalysisConsistency(random, a, random.nextBoolean(), "");
+ }
+
+ private class AnalyzerAnonymousInnerClassHelper3 : Analyzer
+ {
+ private readonly NGramTokenFilterTest outerInstance;
+
+ public AnalyzerAnonymousInnerClassHelper3(NGramTokenFilterTest outerInstance)
+ {
+ this.outerInstance = outerInstance;
+ }
+
+ protected internal override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
+ {
+ Tokenizer tokenizer = new KeywordTokenizer(reader);
+ return new TokenStreamComponents(tokenizer, new NGramTokenFilter(TEST_VERSION_CURRENT, tokenizer, 2, 15));
+ }
+ }
+
+ [Test]
+ public virtual void TestLucene43()
+ {
+#pragma warning disable 612, 618
+ NGramTokenFilter filter = new NGramTokenFilter(LuceneVersion.LUCENE_43, input, 2, 3);
+#pragma warning restore 612, 618
+ AssertTokenStreamContents(filter, new string[] { "ab", "bc", "cd", "de", "abc", "bcd", "cde" }, new int[] { 0, 1, 2, 3, 0, 1, 2 }, new int[] { 2, 3, 4, 5, 3, 4, 5 }, null, new int[] { 1, 1, 1, 1, 1, 1, 1 }, null, null, false);
+ }
+
+ [Test]
+ public virtual void TestSupplementaryCharacters()
+ {
+ string s = TestUtil.RandomUnicodeString(Random(), 10);
+ int codePointCount = s.CodePointCount(0, s.Length);
+ int minGram = TestUtil.NextInt(Random(), 1, 3);
+ int maxGram = TestUtil.NextInt(Random(), minGram, 10);
+ TokenStream tk = new KeywordTokenizer(new StringReader(s));
+ tk = new NGramTokenFilter(TEST_VERSION_CURRENT, tk, minGram, maxGram);
+ ICharTermAttribute termAtt = tk.AddAttribute<ICharTermAttribute>();
+ IOffsetAttribute offsetAtt = tk.AddAttribute<IOffsetAttribute>();
+ tk.Reset();
+ for (int start = 0; start < codePointCount; ++start)
+ {
+ for (int end = start + minGram; end <= Math.Min(codePointCount, start + maxGram); ++end)
+ {
+ assertTrue(tk.IncrementToken());
+ assertEquals(0, offsetAtt.StartOffset);
+ assertEquals(s.Length, offsetAtt.EndOffset);
+ int startIndex = Character.OffsetByCodePoints(s, 0, start);
+ int endIndex = Character.OffsetByCodePoints(s, 0, end);
+ assertEquals(s.Substring(startIndex, endIndex - startIndex), termAtt.ToString());
+ }
+ }
+ assertFalse(tk.IncrementToken());
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/ab81d913/src/Lucene.Net.Tests.Analysis.Common/Analysis/NGram/NGramTokenizerTest.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/NGram/NGramTokenizerTest.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/NGram/NGramTokenizerTest.cs
new file mode 100644
index 0000000..2fc1356
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/NGram/NGramTokenizerTest.cs
@@ -0,0 +1,303 @@
+\ufeffusing Lucene.Net.Analysis.TokenAttributes;
+using Lucene.Net.Support;
+using Lucene.Net.Util;
+using NUnit.Framework;
+using System.IO;
+
+namespace Lucene.Net.Analysis.NGram
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// Tests <seealso cref="NGramTokenizer"/> for correctness.
+ /// </summary>
+ public class NGramTokenizerTest : BaseTokenStreamTestCase
+ {
+ private StringReader input;
+
+ public override void SetUp()
+ {
+ base.SetUp();
+ input = new StringReader("abcde");
+ }
+
+ [Test]
+ public virtual void TestInvalidInput()
+ {
+ bool gotException = false;
+ try
+ {
+ new NGramTokenizer(TEST_VERSION_CURRENT, input, 2, 1);
+ }
+ catch (System.ArgumentException)
+ {
+ gotException = true;
+ }
+ assertTrue(gotException);
+ }
+
+ [Test]
+ public virtual void TestInvalidInput2()
+ {
+ bool gotException = false;
+ try
+ {
+ new NGramTokenizer(TEST_VERSION_CURRENT, input, 0, 1);
+ }
+ catch (System.ArgumentException)
+ {
+ gotException = true;
+ }
+ assertTrue(gotException);
+ }
+
+ [Test]
+ public virtual void TestUnigrams()
+ {
+ NGramTokenizer tokenizer = new NGramTokenizer(TEST_VERSION_CURRENT, input, 1, 1);
+ AssertTokenStreamContents(tokenizer, new string[] { "a", "b", "c", "d", "e" }, new int[] { 0, 1, 2, 3, 4 }, new int[] { 1, 2, 3, 4, 5 }, 5); // abcde
+ }
+
+ [Test]
+ public virtual void TestBigrams()
+ {
+ NGramTokenizer tokenizer = new NGramTokenizer(TEST_VERSION_CURRENT, input, 2, 2);
+ AssertTokenStreamContents(tokenizer, new string[] { "ab", "bc", "cd", "de" }, new int[] { 0, 1, 2, 3 }, new int[] { 2, 3, 4, 5 }, 5); // abcde
+ }
+
+ [Test]
+ public virtual void TestNgrams()
+ {
+ NGramTokenizer tokenizer = new NGramTokenizer(TEST_VERSION_CURRENT, input, 1, 3);
+ AssertTokenStreamContents(tokenizer, new string[] { "a", "ab", "abc", "b", "bc", "bcd", "c", "cd", "cde", "d", "de", "e" }, new int[] { 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 4 }, new int[] { 1, 2, 3, 2, 3, 4, 3, 4, 5, 4, 5, 5 }, null, null, null, 5, false); // abcde
+ }
+
+ [Test]
+ public virtual void TestOversizedNgrams()
+ {
+ NGramTokenizer tokenizer = new NGramTokenizer(TEST_VERSION_CURRENT, input, 6, 7);
+ AssertTokenStreamContents(tokenizer, new string[0], new int[0], new int[0], 5); // abcde
+ }
+
+ [Test]
+ public virtual void TestReset()
+ {
+ NGramTokenizer tokenizer = new NGramTokenizer(TEST_VERSION_CURRENT, input, 1, 1);
+ AssertTokenStreamContents(tokenizer, new string[] { "a", "b", "c", "d", "e" }, new int[] { 0, 1, 2, 3, 4 }, new int[] { 1, 2, 3, 4, 5 }, 5); // abcde
+ tokenizer.SetReader(new StringReader("abcde"));
+ AssertTokenStreamContents(tokenizer, new string[] { "a", "b", "c", "d", "e" }, new int[] { 0, 1, 2, 3, 4 }, new int[] { 1, 2, 3, 4, 5 }, 5); // abcde
+ }
+
+ /// <summary>
+ /// blast some random strings through the analyzer </summary>
+ [Test]
+ public virtual void TestRandomStrings()
+ {
+ for (int i = 0; i < 10; i++)
+ {
+ int min = TestUtil.NextInt(Random(), 2, 10);
+ int max = TestUtil.NextInt(Random(), min, 20);
+ Analyzer a = new AnalyzerAnonymousInnerClassHelper(this, min, max);
+ CheckRandomData(Random(), a, 200 * RANDOM_MULTIPLIER, 20);
+ CheckRandomData(Random(), a, 10 * RANDOM_MULTIPLIER, 1027);
+ }
+ }
+
+ private class AnalyzerAnonymousInnerClassHelper : Analyzer
+ {
+ private readonly NGramTokenizerTest outerInstance;
+
+ private int min;
+ private int max;
+
+ public AnalyzerAnonymousInnerClassHelper(NGramTokenizerTest outerInstance, int min, int max)
+ {
+ this.outerInstance = outerInstance;
+ this.min = min;
+ this.max = max;
+ }
+
+ protected internal override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
+ {
+ Tokenizer tokenizer = new NGramTokenizer(TEST_VERSION_CURRENT, reader, min, max);
+ return new TokenStreamComponents(tokenizer, tokenizer);
+ }
+ }
+
+ private static void TestNGrams(int minGram, int maxGram, int length, string nonTokenChars)
+ {
+ //string s = RandomStrings.randomAsciiOfLength(Random(), length);
+ string s = TestUtil.RandomAnalysisString(Random(), length, true);
+ TestNGrams(minGram, maxGram, s, nonTokenChars);
+ }
+
+ private static void TestNGrams(int minGram, int maxGram, string s, string nonTokenChars)
+ {
+ TestNGrams(minGram, maxGram, s, nonTokenChars, false);
+ }
+
+ internal static int[] toCodePoints(string s)
+ {
+ int[] codePoints = new int[Character.CodePointCount(s, 0, s.Length)];
+ for (int i = 0, j = 0; i < s.Length; ++j)
+ {
+ codePoints[j] = Character.CodePointAt(s, i);
+ i += Character.CharCount(codePoints[j]);
+ }
+ return codePoints;
+ }
+
+ internal static bool isTokenChar(string nonTokenChars, int codePoint)
+ {
+ for (int i = 0; i < nonTokenChars.Length;)
+ {
+ int cp = char.ConvertToUtf32(nonTokenChars, i);
+ if (cp == codePoint)
+ {
+ return false;
+ }
+ i += Character.CharCount(cp);
+ }
+ return true;
+ }
+
+ internal static void TestNGrams(int minGram, int maxGram, string s, string nonTokenChars, bool edgesOnly)
+ {
+ // convert the string to code points
+ int[] codePoints = toCodePoints(s);
+ int[] offsets = new int[codePoints.Length + 1];
+ for (int i = 0; i < codePoints.Length; ++i)
+ {
+ offsets[i + 1] = offsets[i] + Character.CharCount(codePoints[i]);
+ }
+ TokenStream grams = new NGramTokenizerAnonymousInnerClassHelper(TEST_VERSION_CURRENT, new StringReader(s), minGram, maxGram, edgesOnly, nonTokenChars);
+ ICharTermAttribute termAtt = grams.AddAttribute<ICharTermAttribute>();
+ IPositionIncrementAttribute posIncAtt = grams.AddAttribute<IPositionIncrementAttribute>();
+ IPositionLengthAttribute posLenAtt = grams.AddAttribute<IPositionLengthAttribute>();
+ IOffsetAttribute offsetAtt = grams.AddAttribute<IOffsetAttribute>();
+ grams.Reset();
+ for (int start = 0; start < codePoints.Length; ++start)
+ {
+ for (int end = start + minGram; end <= start + maxGram && end <= codePoints.Length; ++end)
+ {
+ if (edgesOnly && start > 0 && isTokenChar(nonTokenChars, codePoints[start - 1]))
+ {
+ // not on an edge
+ goto nextGramContinue;
+ }
+ for (int j = start; j < end; ++j)
+ {
+ if (!isTokenChar(nonTokenChars, codePoints[j]))
+ {
+ goto nextGramContinue;
+ }
+ }
+ assertTrue(grams.IncrementToken());
+ assertArrayEquals(Arrays.CopyOfRange(codePoints, start, end), toCodePoints(termAtt.ToString()));
+ assertEquals(1, posIncAtt.PositionIncrement);
+ assertEquals(1, posLenAtt.PositionLength);
+ assertEquals(offsets[start], offsetAtt.StartOffset);
+ assertEquals(offsets[end], offsetAtt.EndOffset);
+ nextGramContinue:;
+ }
+ //nextGramBreak:;
+ }
+ assertFalse(grams.IncrementToken());
+ grams.End();
+ assertEquals(s.Length, offsetAtt.StartOffset);
+ assertEquals(s.Length, offsetAtt.EndOffset);
+ }
+
+ private class NGramTokenizerAnonymousInnerClassHelper : NGramTokenizer
+ {
+ private string nonTokenChars;
+
+ public NGramTokenizerAnonymousInnerClassHelper(LuceneVersion TEST_VERSION_CURRENT, StringReader java, int minGram, int maxGram, bool edgesOnly, string nonTokenChars)
+ : base(TEST_VERSION_CURRENT, java, minGram, maxGram, edgesOnly)
+ {
+ this.nonTokenChars = nonTokenChars;
+ }
+
+ protected override bool IsTokenChar(int chr)
+ {
+ return nonTokenChars.IndexOf((char)chr) < 0;
+ }
+ }
+
+ [Test]
+ public virtual void TestLargeInput()
+ {
+ // test sliding
+ int minGram = TestUtil.NextInt(Random(), 1, 100);
+ int maxGram = TestUtil.NextInt(Random(), minGram, 100);
+ TestNGrams(minGram, maxGram, TestUtil.NextInt(Random(), 3 * 1024, 4 * 1024), "");
+ }
+
+ [Test]
+ public virtual void TestLargeMaxGram()
+ {
+ // test sliding with maxGram > 1024
+ int minGram = TestUtil.NextInt(Random(), 1290, 1300);
+ int maxGram = TestUtil.NextInt(Random(), minGram, 1300);
+ TestNGrams(minGram, maxGram, TestUtil.NextInt(Random(), 3 * 1024, 4 * 1024), "");
+ }
+
+ [Test]
+ public virtual void TestPreTokenization()
+ {
+ int minGram = TestUtil.NextInt(Random(), 1, 100);
+ int maxGram = TestUtil.NextInt(Random(), minGram, 100);
+ TestNGrams(minGram, maxGram, TestUtil.NextInt(Random(), 0, 4 * 1024), "a");
+ }
+
+ [Test]
+ public virtual void TestHeavyPreTokenization()
+ {
+ int minGram = TestUtil.NextInt(Random(), 1, 100);
+ int maxGram = TestUtil.NextInt(Random(), minGram, 100);
+ TestNGrams(minGram, maxGram, TestUtil.NextInt(Random(), 0, 4 * 1024), "abcdef");
+ }
+
+ [Test]
+ public virtual void TestFewTokenChars()
+ {
+ char[] chrs = new char[TestUtil.NextInt(Random(), 4000, 5000)];
+ Arrays.Fill(chrs, ' ');
+ for (int i = 0; i < chrs.Length; ++i)
+ {
+ if (Random().NextDouble() < 0.1)
+ {
+ chrs[i] = 'a';
+ }
+ }
+ int minGram = TestUtil.NextInt(Random(), 1, 2);
+ int maxGram = TestUtil.NextInt(Random(), minGram, 2);
+ TestNGrams(minGram, maxGram, new string(chrs), " ");
+ }
+
+ [Test]
+ public virtual void TestFullUTF8Range()
+ {
+ int minGram = TestUtil.NextInt(Random(), 1, 100);
+ int maxGram = TestUtil.NextInt(Random(), minGram, 100);
+ string s = TestUtil.RandomUnicodeString(Random(), 4 * 1024);
+ TestNGrams(minGram, maxGram, s, "");
+ TestNGrams(minGram, maxGram, s, "abcdef");
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/ab81d913/src/Lucene.Net.Tests.Analysis.Common/Analysis/NGram/TestNGramFilters.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/NGram/TestNGramFilters.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/NGram/TestNGramFilters.cs
new file mode 100644
index 0000000..c0683a6
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/NGram/TestNGramFilters.cs
@@ -0,0 +1,196 @@
+\ufeffusing Lucene.Net.Analysis.Util;
+using Lucene.Net.Util;
+using NUnit.Framework;
+using System.IO;
+using Reader = System.IO.TextReader;
+
+namespace Lucene.Net.Analysis.NGram
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// Simple tests to ensure the NGram filter factories are working.
+ /// </summary>
+ public class TestNGramFilters : BaseTokenStreamFactoryTestCase
+ {
+ /// <summary>
+ /// Test NGramTokenizerFactory
+ /// </summary>
+ [Test]
+ public virtual void TestNGramTokenizer()
+ {
+ Reader reader = new StringReader("test");
+ TokenStream stream = TokenizerFactory("NGram").Create(reader);
+ AssertTokenStreamContents(stream, new string[] { "t", "te", "e", "es", "s", "st", "t" });
+ }
+
+ /// <summary>
+ /// Test NGramTokenizerFactory with min and max gram options
+ /// </summary>
+ [Test]
+ public virtual void TestNGramTokenizer2()
+ {
+ Reader reader = new StringReader("test");
+ TokenStream stream = TokenizerFactory("NGram", "minGramSize", "2", "maxGramSize", "3").Create(reader);
+ AssertTokenStreamContents(stream, new string[] { "te", "tes", "es", "est", "st" });
+ }
+
+ /// <summary>
+ /// Test the NGramFilterFactory
+ /// </summary>
+ [Test]
+ public virtual void TestNGramFilter()
+ {
+ Reader reader = new StringReader("test");
+ TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ stream = TokenFilterFactory("NGram").Create(stream);
+ AssertTokenStreamContents(stream, new string[] { "t", "te", "e", "es", "s", "st", "t" });
+ }
+
+ /// <summary>
+ /// Test the NGramFilterFactory with min and max gram options
+ /// </summary>
+ [Test]
+ public virtual void TestNGramFilter2()
+ {
+ Reader reader = new StringReader("test");
+ TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ stream = TokenFilterFactory("NGram", "minGramSize", "2", "maxGramSize", "3").Create(stream);
+ AssertTokenStreamContents(stream, new string[] { "te", "tes", "es", "est", "st" });
+ }
+
+ /// <summary>
+ /// Test EdgeNGramTokenizerFactory
+ /// </summary>
+ [Test]
+ public virtual void TestEdgeNGramTokenizer()
+ {
+ Reader reader = new StringReader("test");
+ TokenStream stream = TokenizerFactory("EdgeNGram").Create(reader);
+ AssertTokenStreamContents(stream, new string[] { "t" });
+ }
+
+ /// <summary>
+ /// Test EdgeNGramTokenizerFactory with min and max gram size
+ /// </summary>
+ [Test]
+ public virtual void TestEdgeNGramTokenizer2()
+ {
+ Reader reader = new StringReader("test");
+ TokenStream stream = TokenizerFactory("EdgeNGram", "minGramSize", "1", "maxGramSize", "2").Create(reader);
+ AssertTokenStreamContents(stream, new string[] { "t", "te" });
+ }
+
+ /// <summary>
+ /// Test EdgeNGramTokenizerFactory with side option
+ /// </summary>
+ [Test]
+ public virtual void TestEdgeNGramTokenizer3()
+ {
+ Reader reader = new StringReader("ready");
+#pragma warning disable 612, 618
+ TokenStream stream = TokenizerFactory("EdgeNGram", LuceneVersion.LUCENE_43, "side", "back").Create(reader);
+#pragma warning restore 612, 618
+ AssertTokenStreamContents(stream, new string[] { "y" });
+ }
+
+ /// <summary>
+ /// Test EdgeNGramFilterFactory
+ /// </summary>
+ [Test]
+ public virtual void TestEdgeNGramFilter()
+ {
+ Reader reader = new StringReader("test");
+ TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ stream = TokenFilterFactory("EdgeNGram").Create(stream);
+ AssertTokenStreamContents(stream, new string[] { "t" });
+ }
+
+ /// <summary>
+ /// Test EdgeNGramFilterFactory with min and max gram size
+ /// </summary>
+ [Test]
+ public virtual void TestEdgeNGramFilter2()
+ {
+ Reader reader = new StringReader("test");
+ TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ stream = TokenFilterFactory("EdgeNGram", "minGramSize", "1", "maxGramSize", "2").Create(stream);
+ AssertTokenStreamContents(stream, new string[] { "t", "te" });
+ }
+
+ /// <summary>
+ /// Test EdgeNGramFilterFactory with side option
+ /// </summary>
+ [Test]
+ public virtual void TestEdgeNGramFilter3()
+ {
+ Reader reader = new StringReader("ready");
+ TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+#pragma warning disable 612, 618
+ stream = TokenFilterFactory("EdgeNGram", LuceneVersion.LUCENE_43, "side", "back").Create(stream);
+#pragma warning restore 612, 618
+ AssertTokenStreamContents(stream, new string[] { "y" });
+ }
+
+ /// <summary>
+ /// Test that bogus arguments result in exception </summary>
+ [Test]
+ public virtual void TestBogusArguments()
+ {
+ try
+ {
+ TokenizerFactory("NGram", "bogusArg", "bogusValue");
+ fail();
+ }
+ catch (System.ArgumentException expected)
+ {
+ assertTrue(expected.Message.Contains("Unknown parameters"));
+ }
+
+ try
+ {
+ TokenizerFactory("EdgeNGram", "bogusArg", "bogusValue");
+ fail();
+ }
+ catch (System.ArgumentException expected)
+ {
+ assertTrue(expected.Message.Contains("Unknown parameters"));
+ }
+
+ try
+ {
+ TokenFilterFactory("NGram", "bogusArg", "bogusValue");
+ fail();
+ }
+ catch (System.ArgumentException expected)
+ {
+ assertTrue(expected.Message.Contains("Unknown parameters"));
+ }
+
+ try
+ {
+ TokenFilterFactory("EdgeNGram", "bogusArg", "bogusValue");
+ fail();
+ }
+ catch (System.ArgumentException expected)
+ {
+ assertTrue(expected.Message.Contains("Unknown parameters"));
+ }
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/ab81d913/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ngram/EdgeNGramTokenFilterTest.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ngram/EdgeNGramTokenFilterTest.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ngram/EdgeNGramTokenFilterTest.cs
deleted file mode 100644
index ea6fbd7..0000000
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ngram/EdgeNGramTokenFilterTest.cs
+++ /dev/null
@@ -1,390 +0,0 @@
-\ufeffusing Lucene.Net.Analysis.Core;
-using Lucene.Net.Analysis.Miscellaneous;
-using Lucene.Net.Analysis.Shingle;
-using Lucene.Net.Analysis.TokenAttributes;
-using Lucene.Net.Support;
-using Lucene.Net.Util;
-using NUnit.Framework;
-using System;
-using System.IO;
-
-namespace Lucene.Net.Analysis.NGram
-{
- /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
- /// <summary>
- /// Tests <seealso cref="EdgeNGramTokenFilter"/> for correctness.
- /// </summary>
- public class EdgeNGramTokenFilterTest : BaseTokenStreamTestCase
- {
- private TokenStream input;
-
- public override void SetUp()
- {
- base.SetUp();
- input = new MockTokenizer(new StringReader("abcde"), MockTokenizer.WHITESPACE, false);
- }
-
- [Test]
- public virtual void TestInvalidInput()
- {
- bool gotException = false;
- try
- {
-#pragma warning disable 612, 618
- new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, input, EdgeNGramTokenFilter.Side.FRONT, 0, 0);
-#pragma warning restore 612, 618
- }
- catch (System.ArgumentException)
- {
- gotException = true;
- }
- assertTrue(gotException);
- }
-
- [Test]
- public virtual void TestInvalidInput2()
- {
- bool gotException = false;
- try
- {
-#pragma warning disable 612, 618
- new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, input, EdgeNGramTokenFilter.Side.FRONT, 2, 1);
-#pragma warning restore 612, 618
- }
- catch (System.ArgumentException)
- {
- gotException = true;
- }
- assertTrue(gotException);
- }
-
- [Test]
- public virtual void TestInvalidInput3()
- {
- bool gotException = false;
- try
- {
-#pragma warning disable 612, 618
- new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, input, EdgeNGramTokenFilter.Side.FRONT, -1, 2);
-#pragma warning restore 612, 618
- }
- catch (System.ArgumentException)
- {
- gotException = true;
- }
- assertTrue(gotException);
- }
-
- [Test]
- public virtual void TestFrontUnigram()
- {
-#pragma warning disable 612, 618
- EdgeNGramTokenFilter tokenizer = new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, input, EdgeNGramTokenFilter.Side.FRONT, 1, 1);
-#pragma warning restore 612, 618
- AssertTokenStreamContents(tokenizer, new string[] { "a" }, new int[] { 0 }, new int[] { 5 });
- }
-
- [Test]
- public virtual void TestBackUnigram()
- {
-#pragma warning disable 612, 618
- EdgeNGramTokenFilter tokenizer = new EdgeNGramTokenFilter(LuceneVersion.LUCENE_43, input, EdgeNGramTokenFilter.Side.BACK, 1, 1);
-#pragma warning restore 612, 618
- AssertTokenStreamContents(tokenizer, new string[] { "e" }, new int[] { 4 }, new int[] { 5 });
- }
-
- [Test]
- public virtual void TestOversizedNgrams()
- {
-#pragma warning disable 612, 618
- EdgeNGramTokenFilter tokenizer = new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, input, EdgeNGramTokenFilter.Side.FRONT, 6, 6);
-#pragma warning restore 612, 618
- AssertTokenStreamContents(tokenizer, new string[0], new int[0], new int[0]);
- }
-
- [Test]
- public virtual void TestFrontRangeOfNgrams()
- {
-#pragma warning disable 612, 618
- EdgeNGramTokenFilter tokenizer = new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, input, EdgeNGramTokenFilter.Side.FRONT, 1, 3);
-#pragma warning restore 612, 618
- AssertTokenStreamContents(tokenizer, new string[] { "a", "ab", "abc" }, new int[] { 0, 0, 0 }, new int[] { 5, 5, 5 });
- }
-
- [Test]
- public virtual void TestBackRangeOfNgrams()
- {
-#pragma warning disable 612, 618
- EdgeNGramTokenFilter tokenizer = new EdgeNGramTokenFilter(LuceneVersion.LUCENE_43, input, EdgeNGramTokenFilter.Side.BACK, 1, 3);
-#pragma warning restore 612, 618
- AssertTokenStreamContents(tokenizer, new string[] { "e", "de", "cde" }, new int[] { 4, 3, 2 }, new int[] { 5, 5, 5 }, null, null, null, null, false);
- }
-
- [Test]
- public virtual void TestFilterPositions()
- {
- TokenStream ts = new MockTokenizer(new StringReader("abcde vwxyz"), MockTokenizer.WHITESPACE, false);
-#pragma warning disable 612, 618
- EdgeNGramTokenFilter tokenizer = new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, ts, EdgeNGramTokenFilter.Side.FRONT, 1, 3);
-#pragma warning restore 612, 618
- AssertTokenStreamContents(tokenizer, new string[] { "a", "ab", "abc", "v", "vw", "vwx" }, new int[] { 0, 0, 0, 6, 6, 6 }, new int[] { 5, 5, 5, 11, 11, 11 }, null, new int[] { 1, 0, 0, 1, 0, 0 }, null, null, false);
- }
-
- private class PositionFilter : TokenFilter
- {
-
- internal readonly IPositionIncrementAttribute posIncrAtt;
- internal bool started;
-
- internal PositionFilter(TokenStream input) : base(input)
- {
- posIncrAtt = AddAttribute<IPositionIncrementAttribute>();
- }
-
- public override sealed bool IncrementToken()
- {
- if (m_input.IncrementToken())
- {
- if (started)
- {
- posIncrAtt.PositionIncrement = 0;
- }
- else
- {
- started = true;
- }
- return true;
- }
- else
- {
- return false;
- }
- }
-
- public override void Reset()
- {
- base.Reset();
- started = false;
- }
- }
-
- [Test]
- public virtual void TestFirstTokenPositionIncrement()
- {
- TokenStream ts = new MockTokenizer(new StringReader("a abc"), MockTokenizer.WHITESPACE, false);
- ts = new PositionFilter(ts); // All but first token will get 0 position increment
-#pragma warning disable 612, 618
- EdgeNGramTokenFilter filter = new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, ts, EdgeNGramTokenFilter.Side.FRONT, 2, 3);
-#pragma warning restore 612, 618
- // The first token "a" will not be output, since it's smaller than the mingram size of 2.
- // The second token on input to EdgeNGramTokenFilter will have position increment of 0,
- // which should be increased to 1, since this is the first output token in the stream.
- AssertTokenStreamContents(filter, new string[] { "ab", "abc" }, new int[] { 2, 2 }, new int[] { 5, 5 }, new int[] { 1, 0 });
- }
-
- [Test]
- public virtual void TestSmallTokenInStream()
- {
- input = new MockTokenizer(new StringReader("abc de fgh"), MockTokenizer.WHITESPACE, false);
-#pragma warning disable 612, 618
- EdgeNGramTokenFilter tokenizer = new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, input, EdgeNGramTokenFilter.Side.FRONT, 3, 3);
-#pragma warning restore 612, 618
- AssertTokenStreamContents(tokenizer, new string[] { "abc", "fgh" }, new int[] { 0, 7 }, new int[] { 3, 10 });
- }
-
- [Test]
- public virtual void TestReset()
- {
- WhitespaceTokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("abcde"));
-#pragma warning disable 612, 618
- EdgeNGramTokenFilter filter = new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, tokenizer, EdgeNGramTokenFilter.Side.FRONT, 1, 3);
-#pragma warning restore 612, 618
- AssertTokenStreamContents(filter, new string[] { "a", "ab", "abc" }, new int[] { 0, 0, 0 }, new int[] { 5, 5, 5 });
- tokenizer.SetReader(new StringReader("abcde"));
- AssertTokenStreamContents(filter, new string[] { "a", "ab", "abc" }, new int[] { 0, 0, 0 }, new int[] { 5, 5, 5 });
- }
-
- // LUCENE-3642
- // EdgeNgram blindly adds term length to offset, but this can take things out of bounds
- // wrt original text if a previous filter increases the length of the word (in this case � -> ae)
- // so in this case we behave like WDF, and preserve any modified offsets
- [Test]
- public virtual void TestInvalidOffsets()
- {
- Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper(this);
- AssertAnalyzesTo(analyzer, "mosfellsb�r", new string[] { "mo", "mos", "mosf", "mosfe", "mosfel", "mosfell", "mosfells", "mosfellsb", "mosfellsba", "mosfellsbae", "mosfellsbaer" }, new int[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, new int[] { 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11 });
- }
-
- private class AnalyzerAnonymousInnerClassHelper : Analyzer
- {
- private readonly EdgeNGramTokenFilterTest outerInstance;
-
- public AnalyzerAnonymousInnerClassHelper(EdgeNGramTokenFilterTest outerInstance)
- {
- this.outerInstance = outerInstance;
- }
-
- protected internal override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
- {
- Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
- TokenFilter filters = new ASCIIFoldingFilter(tokenizer);
-#pragma warning disable 612, 618
- filters = new EdgeNGramTokenFilter(LuceneVersion.LUCENE_43, filters, EdgeNGramTokenFilter.Side.FRONT, 2, 15);
-#pragma warning restore 612, 618
- return new TokenStreamComponents(tokenizer, filters);
- }
- }
-
- /// <summary>
- /// blast some random strings through the analyzer </summary>
- [Test]
- public virtual void TestRandomStrings()
- {
- for (int i = 0; i < 10; i++)
- {
- int min = TestUtil.NextInt(Random(), 2, 10);
- int max = TestUtil.NextInt(Random(), min, 20);
-
- Analyzer a = new AnalyzerAnonymousInnerClassHelper2(this, min, max);
- CheckRandomData(Random(), a, 100 * RANDOM_MULTIPLIER);
- }
-
- Analyzer b = new AnalyzerAnonymousInnerClassHelper3(this);
- CheckRandomData(Random(), b, 1000 * RANDOM_MULTIPLIER, 20, false, false);
- }
-
- private class AnalyzerAnonymousInnerClassHelper2 : Analyzer
- {
- private readonly EdgeNGramTokenFilterTest outerInstance;
-
- private int min;
- private int max;
-
- public AnalyzerAnonymousInnerClassHelper2(EdgeNGramTokenFilterTest outerInstance, int min, int max)
- {
- this.outerInstance = outerInstance;
- this.min = min;
- this.max = max;
- }
-
- protected internal override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
- {
- Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
- return new TokenStreamComponents(tokenizer, new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, tokenizer, min, max));
- }
- }
-
- private class AnalyzerAnonymousInnerClassHelper3 : Analyzer
- {
- private readonly EdgeNGramTokenFilterTest outerInstance;
-
- public AnalyzerAnonymousInnerClassHelper3(EdgeNGramTokenFilterTest outerInstance)
- {
- this.outerInstance = outerInstance;
- }
-
- protected internal override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
- {
- Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
-#pragma warning disable 612, 618
- return new TokenStreamComponents(tokenizer, new EdgeNGramTokenFilter(LuceneVersion.LUCENE_43, tokenizer, EdgeNGramTokenFilter.Side.BACK, 2, 4));
-#pragma warning restore 612, 618
- }
- }
-
- [Test]
- public virtual void TestEmptyTerm()
- {
- Random random = Random();
- Analyzer a = new AnalyzerAnonymousInnerClassHelper4(this);
- CheckAnalysisConsistency(random, a, random.nextBoolean(), "");
-
- Analyzer b = new AnalyzerAnonymousInnerClassHelper5(this);
- CheckAnalysisConsistency(random, b, random.nextBoolean(), "");
- }
-
- private class AnalyzerAnonymousInnerClassHelper4 : Analyzer
- {
- private readonly EdgeNGramTokenFilterTest outerInstance;
-
- public AnalyzerAnonymousInnerClassHelper4(EdgeNGramTokenFilterTest outerInstance)
- {
- this.outerInstance = outerInstance;
- }
-
- protected internal override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
- {
- Tokenizer tokenizer = new KeywordTokenizer(reader);
-#pragma warning disable 612, 618
- return new TokenStreamComponents(tokenizer, new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, tokenizer, EdgeNGramTokenFilter.Side.FRONT, 2, 15));
-#pragma warning restore 612, 618
- }
- }
-
- private class AnalyzerAnonymousInnerClassHelper5 : Analyzer
- {
- private readonly EdgeNGramTokenFilterTest outerInstance;
-
- public AnalyzerAnonymousInnerClassHelper5(EdgeNGramTokenFilterTest outerInstance)
- {
- this.outerInstance = outerInstance;
- }
-
- protected internal override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
- {
- Tokenizer tokenizer = new KeywordTokenizer(reader);
-#pragma warning disable 612, 618
- return new TokenStreamComponents(tokenizer, new EdgeNGramTokenFilter(LuceneVersion.LUCENE_43, tokenizer, EdgeNGramTokenFilter.Side.BACK, 2, 15));
-#pragma warning restore 612, 618
- }
- }
-
- [Test]
- public virtual void TestGraphs()
- {
- TokenStream tk = new LetterTokenizer(TEST_VERSION_CURRENT, new StringReader("abc d efgh ij klmno p q"));
- tk = new ShingleFilter(tk);
- tk = new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, tk, 7, 10);
- AssertTokenStreamContents(tk, new string[] { "efgh ij", "ij klmn", "ij klmno", "klmno p" }, new int[] { 6, 11, 11, 14 }, new int[] { 13, 19, 19, 21 }, new int[] { 3, 1, 0, 1 }, new int[] { 2, 2, 2, 2 }, 23);
- }
-
- [Test]
- public virtual void TestSupplementaryCharacters()
- {
- string s = TestUtil.RandomUnicodeString(Random(), 10);
- int codePointCount = s.CodePointCount(0, s.Length);
- int minGram = TestUtil.NextInt(Random(), 1, 3);
- int maxGram = TestUtil.NextInt(Random(), minGram, 10);
- TokenStream tk = new KeywordTokenizer(new StringReader(s));
- tk = new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, tk, minGram, maxGram);
- ICharTermAttribute termAtt = tk.AddAttribute<ICharTermAttribute>();
- IOffsetAttribute offsetAtt = tk.AddAttribute<IOffsetAttribute>();
- tk.Reset();
- for (int i = minGram; i <= Math.Min(codePointCount, maxGram); ++i)
- {
- assertTrue(tk.IncrementToken());
- assertEquals(0, offsetAtt.StartOffset);
- assertEquals(s.Length, offsetAtt.EndOffset);
- int end = Character.OffsetByCodePoints(s, 0, i);
- assertEquals(s.Substring(0, end), termAtt.ToString());
- }
- assertFalse(tk.IncrementToken());
- }
- }
-}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/ab81d913/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ngram/EdgeNGramTokenizerTest.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ngram/EdgeNGramTokenizerTest.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ngram/EdgeNGramTokenizerTest.cs
deleted file mode 100644
index 4ccecfa..0000000
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ngram/EdgeNGramTokenizerTest.cs
+++ /dev/null
@@ -1,278 +0,0 @@
-\ufeffusing Lucene.Net.Support;
-using Lucene.Net.Util;
-using NUnit.Framework;
-using System.IO;
-using Reader = System.IO.TextReader;
-using Version = Lucene.Net.Util.LuceneVersion;
-
-namespace Lucene.Net.Analysis.NGram
-{
- /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
- /// <summary>
- /// Tests <seealso cref="EdgeNGramTokenizer"/> for correctness.
- /// </summary>
- public class EdgeNGramTokenizerTest : BaseTokenStreamTestCase
- {
- private StringReader input;
-
- public override void SetUp()
- {
- base.SetUp();
- input = new StringReader("abcde");
- }
-
- [Test]
- public virtual void TestInvalidInput()
- {
- bool gotException = false;
- try
- {
- new EdgeNGramTokenizer(TEST_VERSION_CURRENT, input, 0, 0);
- }
- catch (System.ArgumentException)
- {
- gotException = true;
- }
- assertTrue(gotException);
- }
-
- [Test]
- public virtual void TestInvalidInput2()
- {
- bool gotException = false;
- try
- {
- new EdgeNGramTokenizer(TEST_VERSION_CURRENT, input, 2, 1);
- }
- catch (System.ArgumentException)
- {
- gotException = true;
- }
- assertTrue(gotException);
- }
-
- [Test]
- public virtual void TestInvalidInput3()
- {
- bool gotException = false;
- try
- {
- new EdgeNGramTokenizer(TEST_VERSION_CURRENT, input, -1, 2);
- }
- catch (System.ArgumentException)
- {
- gotException = true;
- }
- assertTrue(gotException);
- }
-
- [Test]
- public virtual void TestFrontUnigram()
- {
- EdgeNGramTokenizer tokenizer = new EdgeNGramTokenizer(TEST_VERSION_CURRENT, input, 1, 1);
- AssertTokenStreamContents(tokenizer, new string[] { "a" }, new int[] { 0 }, new int[] { 1 }, 5); // abcde
- }
-
- [Test]
- public virtual void TestBackUnigram()
- {
-#pragma warning disable 612, 618
- Tokenizer tokenizer = new Lucene43EdgeNGramTokenizer(Version.LUCENE_43, input, Lucene43EdgeNGramTokenizer.Side.BACK, 1, 1);
-#pragma warning restore 612, 618
- AssertTokenStreamContents(tokenizer, new string[] { "e" }, new int[] { 4 }, new int[] { 5 }, 5); // abcde
- }
-
- [Test]
- public virtual void TestOversizedNgrams()
- {
- EdgeNGramTokenizer tokenizer = new EdgeNGramTokenizer(TEST_VERSION_CURRENT, input, 6, 6);
- AssertTokenStreamContents(tokenizer, new string[0], new int[0], new int[0], 5); // abcde
- }
-
- [Test]
- public virtual void TestFrontRangeOfNgrams()
- {
- EdgeNGramTokenizer tokenizer = new EdgeNGramTokenizer(TEST_VERSION_CURRENT, input, 1, 3);
- AssertTokenStreamContents(tokenizer, new string[] { "a", "ab", "abc" }, new int[] { 0, 0, 0 }, new int[] { 1, 2, 3 }, 5); // abcde
- }
-
- [Test]
- public virtual void TestBackRangeOfNgrams()
- {
-#pragma warning disable 612, 618
- Tokenizer tokenizer = new Lucene43EdgeNGramTokenizer(Version.LUCENE_43, input, Lucene43EdgeNGramTokenizer.Side.BACK, 1, 3);
-#pragma warning restore 612, 618
- AssertTokenStreamContents(tokenizer, new string[] { "e", "de", "cde" }, new int[] { 4, 3, 2 }, new int[] { 5, 5, 5 }, null, null, null, 5, false); // abcde
- }
-
- [Test]
- public virtual void TestReset()
- {
- EdgeNGramTokenizer tokenizer = new EdgeNGramTokenizer(TEST_VERSION_CURRENT, input, 1, 3);
- AssertTokenStreamContents(tokenizer, new string[] { "a", "ab", "abc" }, new int[] { 0, 0, 0 }, new int[] { 1, 2, 3 }, 5); // abcde
- tokenizer.SetReader(new StringReader("abcde"));
- AssertTokenStreamContents(tokenizer, new string[] { "a", "ab", "abc" }, new int[] { 0, 0, 0 }, new int[] { 1, 2, 3 }, 5); // abcde
- }
-
- /// <summary>
- /// blast some random strings through the analyzer </summary>
- [Test]
- public virtual void TestRandomStrings()
- {
- for (int i = 0; i < 10; i++)
- {
- int min = TestUtil.NextInt(Random(), 2, 10);
- int max = TestUtil.NextInt(Random(), min, 20);
-
- Analyzer a = new AnalyzerAnonymousInnerClassHelper(this, min, max);
- CheckRandomData(Random(), a, 100 * RANDOM_MULTIPLIER, 20);
- CheckRandomData(Random(), a, 10 * RANDOM_MULTIPLIER, 8192);
- }
-
- Analyzer b = new AnalyzerAnonymousInnerClassHelper2(this);
- CheckRandomData(Random(), b, 1000 * RANDOM_MULTIPLIER, 20, false, false);
- CheckRandomData(Random(), b, 100 * RANDOM_MULTIPLIER, 8192, false, false);
- }
-
- private class AnalyzerAnonymousInnerClassHelper : Analyzer
- {
- private readonly EdgeNGramTokenizerTest outerInstance;
-
- private int min;
- private int max;
-
- public AnalyzerAnonymousInnerClassHelper(EdgeNGramTokenizerTest outerInstance, int min, int max)
- {
- this.outerInstance = outerInstance;
- this.min = min;
- this.max = max;
- }
-
- protected internal override TokenStreamComponents CreateComponents(string fieldName, Reader reader)
- {
- Tokenizer tokenizer = new EdgeNGramTokenizer(TEST_VERSION_CURRENT, reader, min, max);
- return new TokenStreamComponents(tokenizer, tokenizer);
- }
- }
-
- private class AnalyzerAnonymousInnerClassHelper2 : Analyzer
- {
- private readonly EdgeNGramTokenizerTest outerInstance;
-
- public AnalyzerAnonymousInnerClassHelper2(EdgeNGramTokenizerTest outerInstance)
- {
- this.outerInstance = outerInstance;
- }
-
- protected internal override TokenStreamComponents CreateComponents(string fieldName, Reader reader)
- {
-#pragma warning disable 612, 618
- Tokenizer tokenizer = new Lucene43EdgeNGramTokenizer(Version.LUCENE_43, reader, Lucene43EdgeNGramTokenizer.Side.BACK, 2, 4);
-#pragma warning restore 612, 618
- return new TokenStreamComponents(tokenizer, tokenizer);
- }
- }
-
- [Test]
- public virtual void TestTokenizerPositions()
- {
-#pragma warning disable 612, 618
- Tokenizer tokenizer = new Lucene43EdgeNGramTokenizer(Version.LUCENE_43, input, Lucene43EdgeNGramTokenizer.Side.FRONT, 1, 3);
-#pragma warning restore 612, 618
- AssertTokenStreamContents(tokenizer, new string[] { "a", "ab", "abc" }, new int[] { 0, 0, 0 }, new int[] { 1, 2, 3 }, null, new int[] { 1, 0, 0 }, null, null, false);
-
- tokenizer = new EdgeNGramTokenizer(TEST_VERSION_CURRENT, new StringReader("abcde"), 1, 3);
- AssertTokenStreamContents(tokenizer, new string[] { "a", "ab", "abc" }, new int[] { 0, 0, 0 }, new int[] { 1, 2, 3 }, null, new int[] { 1, 1, 1 }, null, null, false);
- }
-
- private static void TestNGrams(int minGram, int maxGram, int length, string nonTokenChars)
- {
- // LUCENENET TODO: Changed randomizing strategy - not sure if this is right...
- //string s = RandomStrings.randomAsciiOfLength(Random(), length);
- string s = TestUtil.RandomAnalysisString(Random(), length, true);
- TestNGrams(minGram, maxGram, s, nonTokenChars);
- }
-
- private static void TestNGrams(int minGram, int maxGram, string s, string nonTokenChars)
- {
- NGramTokenizerTest.TestNGrams(minGram, maxGram, s, nonTokenChars, true);
- }
-
- [Test]
- public virtual void TestLargeInput()
- {
- // test sliding
- int minGram = TestUtil.NextInt(Random(), 1, 100);
- int maxGram = TestUtil.NextInt(Random(), minGram, 100);
- TestNGrams(minGram, maxGram, TestUtil.NextInt(Random(), 3 * 1024, 4 * 1024), "");
- }
-
- [Test]
- public virtual void TestLargeMaxGram()
- {
- // test sliding with maxGram > 1024
- int minGram = TestUtil.NextInt(Random(), 1290, 1300);
- int maxGram = TestUtil.NextInt(Random(), minGram, 1300);
- TestNGrams(minGram, maxGram, TestUtil.NextInt(Random(), 3 * 1024, 4 * 1024), "");
- }
-
- [Test]
- public virtual void TestPreTokenization()
- {
- int minGram = TestUtil.NextInt(Random(), 1, 100);
- int maxGram = TestUtil.NextInt(Random(), minGram, 100);
- TestNGrams(minGram, maxGram, TestUtil.NextInt(Random(), 0, 4 * 1024), "a");
- }
-
- [Test]
- public virtual void TestHeavyPreTokenization()
- {
- int minGram = TestUtil.NextInt(Random(), 1, 100);
- int maxGram = TestUtil.NextInt(Random(), minGram, 100);
- TestNGrams(minGram, maxGram, TestUtil.NextInt(Random(), 0, 4 * 1024), "abcdef");
- }
-
- [Test]
- public virtual void TestFewTokenChars()
- {
- char[] chrs = new char[TestUtil.NextInt(Random(), 4000, 5000)];
- Arrays.Fill(chrs, ' ');
- for (int i = 0; i < chrs.Length; ++i)
- {
- if (Random().NextDouble() < 0.1)
- {
- chrs[i] = 'a';
- }
- }
- int minGram = TestUtil.NextInt(Random(), 1, 2);
- int maxGram = TestUtil.NextInt(Random(), minGram, 2);
- TestNGrams(minGram, maxGram, new string(chrs), " ");
- }
-
- [Test]
- public virtual void TestFullUTF8Range()
- {
- int minGram = TestUtil.NextInt(Random(), 1, 100);
- int maxGram = TestUtil.NextInt(Random(), minGram, 100);
- string s = TestUtil.RandomUnicodeString(Random(), 4 * 1024);
- TestNGrams(minGram, maxGram, s, "");
- TestNGrams(minGram, maxGram, s, "abcdef");
- }
- }
-}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/ab81d913/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ngram/NGramTokenFilterTest.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ngram/NGramTokenFilterTest.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ngram/NGramTokenFilterTest.cs
deleted file mode 100644
index e485fc0..0000000
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ngram/NGramTokenFilterTest.cs
+++ /dev/null
@@ -1,249 +0,0 @@
-\ufeffusing Lucene.Net.Analysis.Core;
-using Lucene.Net.Analysis.Miscellaneous;
-using Lucene.Net.Analysis.TokenAttributes;
-using Lucene.Net.Support;
-using Lucene.Net.Util;
-using NUnit.Framework;
-using System;
-using System.IO;
-
-namespace Lucene.Net.Analysis.NGram
-{
- /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
- /// <summary>
- /// Tests <seealso cref="NGramTokenFilter"/> for correctness.
- /// </summary>
- public class NGramTokenFilterTest : BaseTokenStreamTestCase
- {
- private TokenStream input;
-
- public override void SetUp()
- {
- base.SetUp();
- input = new MockTokenizer(new StringReader("abcde"), MockTokenizer.WHITESPACE, false);
- }
-
- [Test]
- public virtual void TestInvalidInput()
- {
- bool gotException = false;
- try
- {
- new NGramTokenFilter(TEST_VERSION_CURRENT, input, 2, 1);
- }
- catch (System.ArgumentException)
- {
- gotException = true;
- }
- assertTrue(gotException);
- }
-
- [Test]
- public virtual void TestInvalidInput2()
- {
- bool gotException = false;
- try
- {
- new NGramTokenFilter(TEST_VERSION_CURRENT, input, 0, 1);
- }
- catch (System.ArgumentException)
- {
- gotException = true;
- }
- assertTrue(gotException);
- }
-
- [Test]
- public virtual void TestUnigrams()
- {
- NGramTokenFilter filter = new NGramTokenFilter(TEST_VERSION_CURRENT, input, 1, 1);
- AssertTokenStreamContents(filter, new string[] { "a", "b", "c", "d", "e" }, new int[] { 0, 0, 0, 0, 0 }, new int[] { 5, 5, 5, 5, 5 }, new int[] { 1, 0, 0, 0, 0 });
- }
-
- [Test]
- public virtual void TestBigrams()
- {
- NGramTokenFilter filter = new NGramTokenFilter(TEST_VERSION_CURRENT, input, 2, 2);
- AssertTokenStreamContents(filter, new string[] { "ab", "bc", "cd", "de" }, new int[] { 0, 0, 0, 0 }, new int[] { 5, 5, 5, 5 }, new int[] { 1, 0, 0, 0 });
- }
-
- [Test]
- public virtual void TestNgrams()
- {
- NGramTokenFilter filter = new NGramTokenFilter(TEST_VERSION_CURRENT, input, 1, 3);
- AssertTokenStreamContents(filter, new string[] { "a", "ab", "abc", "b", "bc", "bcd", "c", "cd", "cde", "d", "de", "e" }, new int[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, new int[] { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5 }, null, new int[] { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, null, null, false);
- }
-
- [Test]
- public virtual void TestNgramsNoIncrement()
- {
- NGramTokenFilter filter = new NGramTokenFilter(TEST_VERSION_CURRENT, input, 1, 3);
- AssertTokenStreamContents(filter, new string[] { "a", "ab", "abc", "b", "bc", "bcd", "c", "cd", "cde", "d", "de", "e" }, new int[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, new int[] { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5 }, null, new int[] { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, null, null, false);
- }
-
- [Test]
- public virtual void TestOversizedNgrams()
- {
- NGramTokenFilter filter = new NGramTokenFilter(TEST_VERSION_CURRENT, input, 6, 7);
- AssertTokenStreamContents(filter, new string[0], new int[0], new int[0]);
- }
-
- [Test]
- public virtual void TestSmallTokenInStream()
- {
- input = new MockTokenizer(new StringReader("abc de fgh"), MockTokenizer.WHITESPACE, false);
- NGramTokenFilter filter = new NGramTokenFilter(TEST_VERSION_CURRENT, input, 3, 3);
- AssertTokenStreamContents(filter, new string[] { "abc", "fgh" }, new int[] { 0, 7 }, new int[] { 3, 10 }, new int[] { 1, 2 });
- }
-
- [Test]
- public virtual void TestReset()
- {
- WhitespaceTokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("abcde"));
- NGramTokenFilter filter = new NGramTokenFilter(TEST_VERSION_CURRENT, tokenizer, 1, 1);
- AssertTokenStreamContents(filter, new string[] { "a", "b", "c", "d", "e" }, new int[] { 0, 0, 0, 0, 0 }, new int[] { 5, 5, 5, 5, 5 }, new int[] { 1, 0, 0, 0, 0 });
- tokenizer.SetReader(new StringReader("abcde"));
- AssertTokenStreamContents(filter, new string[] { "a", "b", "c", "d", "e" }, new int[] { 0, 0, 0, 0, 0 }, new int[] { 5, 5, 5, 5, 5 }, new int[] { 1, 0, 0, 0, 0 });
- }
-
- // LUCENE-3642
- // EdgeNgram blindly adds term length to offset, but this can take things out of bounds
- // wrt original text if a previous filter increases the length of the word (in this case � -> ae)
- // so in this case we behave like WDF, and preserve any modified offsets
- [Test]
- public virtual void TestInvalidOffsets()
- {
- Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper(this);
- AssertAnalyzesTo(analyzer, "mosfellsb�r", new string[] { "mo", "os", "sf", "fe", "el", "ll", "ls", "sb", "ba", "ae", "er" }, new int[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, new int[] { 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11 }, new int[] { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 });
- }
-
- private class AnalyzerAnonymousInnerClassHelper : Analyzer
- {
- private readonly NGramTokenFilterTest outerInstance;
-
- public AnalyzerAnonymousInnerClassHelper(NGramTokenFilterTest outerInstance)
- {
- this.outerInstance = outerInstance;
- }
-
- protected internal override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
- {
- Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
- TokenFilter filters = new ASCIIFoldingFilter(tokenizer);
- filters = new NGramTokenFilter(TEST_VERSION_CURRENT, filters, 2, 2);
- return new TokenStreamComponents(tokenizer, filters);
- }
- }
-
- /// <summary>
- /// blast some random strings through the analyzer </summary>
- [Test]
- public virtual void TestRandomStrings()
- {
- for (int i = 0; i < 10; i++)
- {
- int min = TestUtil.NextInt(Random(), 2, 10);
- int max = TestUtil.NextInt(Random(), min, 20);
- Analyzer a = new AnalyzerAnonymousInnerClassHelper2(this, min, max);
- CheckRandomData(Random(), a, 200 * RANDOM_MULTIPLIER, 20);
- }
- }
-
- private class AnalyzerAnonymousInnerClassHelper2 : Analyzer
- {
- private readonly NGramTokenFilterTest outerInstance;
-
- private int min;
- private int max;
-
- public AnalyzerAnonymousInnerClassHelper2(NGramTokenFilterTest outerInstance, int min, int max)
- {
- this.outerInstance = outerInstance;
- this.min = min;
- this.max = max;
- }
-
- protected internal override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
- {
- Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
- return new TokenStreamComponents(tokenizer, new NGramTokenFilter(TEST_VERSION_CURRENT, tokenizer, min, max));
- }
- }
-
- [Test]
- public virtual void TestEmptyTerm()
- {
- Random random = Random();
- Analyzer a = new AnalyzerAnonymousInnerClassHelper3(this);
- CheckAnalysisConsistency(random, a, random.nextBoolean(), "");
- }
-
- private class AnalyzerAnonymousInnerClassHelper3 : Analyzer
- {
- private readonly NGramTokenFilterTest outerInstance;
-
- public AnalyzerAnonymousInnerClassHelper3(NGramTokenFilterTest outerInstance)
- {
- this.outerInstance = outerInstance;
- }
-
- protected internal override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
- {
- Tokenizer tokenizer = new KeywordTokenizer(reader);
- return new TokenStreamComponents(tokenizer, new NGramTokenFilter(TEST_VERSION_CURRENT, tokenizer, 2, 15));
- }
- }
-
- [Test]
- public virtual void TestLucene43()
- {
-#pragma warning disable 612, 618
- NGramTokenFilter filter = new NGramTokenFilter(LuceneVersion.LUCENE_43, input, 2, 3);
-#pragma warning restore 612, 618
- AssertTokenStreamContents(filter, new string[] { "ab", "bc", "cd", "de", "abc", "bcd", "cde" }, new int[] { 0, 1, 2, 3, 0, 1, 2 }, new int[] { 2, 3, 4, 5, 3, 4, 5 }, null, new int[] { 1, 1, 1, 1, 1, 1, 1 }, null, null, false);
- }
-
- [Test]
- public virtual void TestSupplementaryCharacters()
- {
- string s = TestUtil.RandomUnicodeString(Random(), 10);
- int codePointCount = s.CodePointCount(0, s.Length);
- int minGram = TestUtil.NextInt(Random(), 1, 3);
- int maxGram = TestUtil.NextInt(Random(), minGram, 10);
- TokenStream tk = new KeywordTokenizer(new StringReader(s));
- tk = new NGramTokenFilter(TEST_VERSION_CURRENT, tk, minGram, maxGram);
- ICharTermAttribute termAtt = tk.AddAttribute<ICharTermAttribute>();
- IOffsetAttribute offsetAtt = tk.AddAttribute<IOffsetAttribute>();
- tk.Reset();
- for (int start = 0; start < codePointCount; ++start)
- {
- for (int end = start + minGram; end <= Math.Min(codePointCount, start + maxGram); ++end)
- {
- assertTrue(tk.IncrementToken());
- assertEquals(0, offsetAtt.StartOffset);
- assertEquals(s.Length, offsetAtt.EndOffset);
- int startIndex = Character.OffsetByCodePoints(s, 0, start);
- int endIndex = Character.OffsetByCodePoints(s, 0, end);
- assertEquals(s.Substring(startIndex, endIndex - startIndex), termAtt.ToString());
- }
- }
- assertFalse(tk.IncrementToken());
- }
- }
-}
\ No newline at end of file
[36/39] lucenenet git commit: Lucene.Net.Analysis.Ngram - renamed
NGram in Git
Posted by ni...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/ab81d913/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ngram/NGramTokenizerTest.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ngram/NGramTokenizerTest.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ngram/NGramTokenizerTest.cs
deleted file mode 100644
index 2fc1356..0000000
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ngram/NGramTokenizerTest.cs
+++ /dev/null
@@ -1,303 +0,0 @@
-\ufeffusing Lucene.Net.Analysis.TokenAttributes;
-using Lucene.Net.Support;
-using Lucene.Net.Util;
-using NUnit.Framework;
-using System.IO;
-
-namespace Lucene.Net.Analysis.NGram
-{
- /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
- /// <summary>
- /// Tests <seealso cref="NGramTokenizer"/> for correctness.
- /// </summary>
- public class NGramTokenizerTest : BaseTokenStreamTestCase
- {
- private StringReader input;
-
- public override void SetUp()
- {
- base.SetUp();
- input = new StringReader("abcde");
- }
-
- [Test]
- public virtual void TestInvalidInput()
- {
- bool gotException = false;
- try
- {
- new NGramTokenizer(TEST_VERSION_CURRENT, input, 2, 1);
- }
- catch (System.ArgumentException)
- {
- gotException = true;
- }
- assertTrue(gotException);
- }
-
- [Test]
- public virtual void TestInvalidInput2()
- {
- bool gotException = false;
- try
- {
- new NGramTokenizer(TEST_VERSION_CURRENT, input, 0, 1);
- }
- catch (System.ArgumentException)
- {
- gotException = true;
- }
- assertTrue(gotException);
- }
-
- [Test]
- public virtual void TestUnigrams()
- {
- NGramTokenizer tokenizer = new NGramTokenizer(TEST_VERSION_CURRENT, input, 1, 1);
- AssertTokenStreamContents(tokenizer, new string[] { "a", "b", "c", "d", "e" }, new int[] { 0, 1, 2, 3, 4 }, new int[] { 1, 2, 3, 4, 5 }, 5); // abcde
- }
-
- [Test]
- public virtual void TestBigrams()
- {
- NGramTokenizer tokenizer = new NGramTokenizer(TEST_VERSION_CURRENT, input, 2, 2);
- AssertTokenStreamContents(tokenizer, new string[] { "ab", "bc", "cd", "de" }, new int[] { 0, 1, 2, 3 }, new int[] { 2, 3, 4, 5 }, 5); // abcde
- }
-
- [Test]
- public virtual void TestNgrams()
- {
- NGramTokenizer tokenizer = new NGramTokenizer(TEST_VERSION_CURRENT, input, 1, 3);
- AssertTokenStreamContents(tokenizer, new string[] { "a", "ab", "abc", "b", "bc", "bcd", "c", "cd", "cde", "d", "de", "e" }, new int[] { 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 4 }, new int[] { 1, 2, 3, 2, 3, 4, 3, 4, 5, 4, 5, 5 }, null, null, null, 5, false); // abcde
- }
-
- [Test]
- public virtual void TestOversizedNgrams()
- {
- NGramTokenizer tokenizer = new NGramTokenizer(TEST_VERSION_CURRENT, input, 6, 7);
- AssertTokenStreamContents(tokenizer, new string[0], new int[0], new int[0], 5); // abcde
- }
-
- [Test]
- public virtual void TestReset()
- {
- NGramTokenizer tokenizer = new NGramTokenizer(TEST_VERSION_CURRENT, input, 1, 1);
- AssertTokenStreamContents(tokenizer, new string[] { "a", "b", "c", "d", "e" }, new int[] { 0, 1, 2, 3, 4 }, new int[] { 1, 2, 3, 4, 5 }, 5); // abcde
- tokenizer.SetReader(new StringReader("abcde"));
- AssertTokenStreamContents(tokenizer, new string[] { "a", "b", "c", "d", "e" }, new int[] { 0, 1, 2, 3, 4 }, new int[] { 1, 2, 3, 4, 5 }, 5); // abcde
- }
-
- /// <summary>
- /// blast some random strings through the analyzer </summary>
- [Test]
- public virtual void TestRandomStrings()
- {
- for (int i = 0; i < 10; i++)
- {
- int min = TestUtil.NextInt(Random(), 2, 10);
- int max = TestUtil.NextInt(Random(), min, 20);
- Analyzer a = new AnalyzerAnonymousInnerClassHelper(this, min, max);
- CheckRandomData(Random(), a, 200 * RANDOM_MULTIPLIER, 20);
- CheckRandomData(Random(), a, 10 * RANDOM_MULTIPLIER, 1027);
- }
- }
-
- private class AnalyzerAnonymousInnerClassHelper : Analyzer
- {
- private readonly NGramTokenizerTest outerInstance;
-
- private int min;
- private int max;
-
- public AnalyzerAnonymousInnerClassHelper(NGramTokenizerTest outerInstance, int min, int max)
- {
- this.outerInstance = outerInstance;
- this.min = min;
- this.max = max;
- }
-
- protected internal override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
- {
- Tokenizer tokenizer = new NGramTokenizer(TEST_VERSION_CURRENT, reader, min, max);
- return new TokenStreamComponents(tokenizer, tokenizer);
- }
- }
-
- private static void TestNGrams(int minGram, int maxGram, int length, string nonTokenChars)
- {
- //string s = RandomStrings.randomAsciiOfLength(Random(), length);
- string s = TestUtil.RandomAnalysisString(Random(), length, true);
- TestNGrams(minGram, maxGram, s, nonTokenChars);
- }
-
- private static void TestNGrams(int minGram, int maxGram, string s, string nonTokenChars)
- {
- TestNGrams(minGram, maxGram, s, nonTokenChars, false);
- }
-
- internal static int[] toCodePoints(string s)
- {
- int[] codePoints = new int[Character.CodePointCount(s, 0, s.Length)];
- for (int i = 0, j = 0; i < s.Length; ++j)
- {
- codePoints[j] = Character.CodePointAt(s, i);
- i += Character.CharCount(codePoints[j]);
- }
- return codePoints;
- }
-
- internal static bool isTokenChar(string nonTokenChars, int codePoint)
- {
- for (int i = 0; i < nonTokenChars.Length;)
- {
- int cp = char.ConvertToUtf32(nonTokenChars, i);
- if (cp == codePoint)
- {
- return false;
- }
- i += Character.CharCount(cp);
- }
- return true;
- }
-
- internal static void TestNGrams(int minGram, int maxGram, string s, string nonTokenChars, bool edgesOnly)
- {
- // convert the string to code points
- int[] codePoints = toCodePoints(s);
- int[] offsets = new int[codePoints.Length + 1];
- for (int i = 0; i < codePoints.Length; ++i)
- {
- offsets[i + 1] = offsets[i] + Character.CharCount(codePoints[i]);
- }
- TokenStream grams = new NGramTokenizerAnonymousInnerClassHelper(TEST_VERSION_CURRENT, new StringReader(s), minGram, maxGram, edgesOnly, nonTokenChars);
- ICharTermAttribute termAtt = grams.AddAttribute<ICharTermAttribute>();
- IPositionIncrementAttribute posIncAtt = grams.AddAttribute<IPositionIncrementAttribute>();
- IPositionLengthAttribute posLenAtt = grams.AddAttribute<IPositionLengthAttribute>();
- IOffsetAttribute offsetAtt = grams.AddAttribute<IOffsetAttribute>();
- grams.Reset();
- for (int start = 0; start < codePoints.Length; ++start)
- {
- for (int end = start + minGram; end <= start + maxGram && end <= codePoints.Length; ++end)
- {
- if (edgesOnly && start > 0 && isTokenChar(nonTokenChars, codePoints[start - 1]))
- {
- // not on an edge
- goto nextGramContinue;
- }
- for (int j = start; j < end; ++j)
- {
- if (!isTokenChar(nonTokenChars, codePoints[j]))
- {
- goto nextGramContinue;
- }
- }
- assertTrue(grams.IncrementToken());
- assertArrayEquals(Arrays.CopyOfRange(codePoints, start, end), toCodePoints(termAtt.ToString()));
- assertEquals(1, posIncAtt.PositionIncrement);
- assertEquals(1, posLenAtt.PositionLength);
- assertEquals(offsets[start], offsetAtt.StartOffset);
- assertEquals(offsets[end], offsetAtt.EndOffset);
- nextGramContinue:;
- }
- //nextGramBreak:;
- }
- assertFalse(grams.IncrementToken());
- grams.End();
- assertEquals(s.Length, offsetAtt.StartOffset);
- assertEquals(s.Length, offsetAtt.EndOffset);
- }
-
- private class NGramTokenizerAnonymousInnerClassHelper : NGramTokenizer
- {
- private string nonTokenChars;
-
- public NGramTokenizerAnonymousInnerClassHelper(LuceneVersion TEST_VERSION_CURRENT, StringReader java, int minGram, int maxGram, bool edgesOnly, string nonTokenChars)
- : base(TEST_VERSION_CURRENT, java, minGram, maxGram, edgesOnly)
- {
- this.nonTokenChars = nonTokenChars;
- }
-
- protected override bool IsTokenChar(int chr)
- {
- return nonTokenChars.IndexOf((char)chr) < 0;
- }
- }
-
- [Test]
- public virtual void TestLargeInput()
- {
- // test sliding
- int minGram = TestUtil.NextInt(Random(), 1, 100);
- int maxGram = TestUtil.NextInt(Random(), minGram, 100);
- TestNGrams(minGram, maxGram, TestUtil.NextInt(Random(), 3 * 1024, 4 * 1024), "");
- }
-
- [Test]
- public virtual void TestLargeMaxGram()
- {
- // test sliding with maxGram > 1024
- int minGram = TestUtil.NextInt(Random(), 1290, 1300);
- int maxGram = TestUtil.NextInt(Random(), minGram, 1300);
- TestNGrams(minGram, maxGram, TestUtil.NextInt(Random(), 3 * 1024, 4 * 1024), "");
- }
-
- [Test]
- public virtual void TestPreTokenization()
- {
- int minGram = TestUtil.NextInt(Random(), 1, 100);
- int maxGram = TestUtil.NextInt(Random(), minGram, 100);
- TestNGrams(minGram, maxGram, TestUtil.NextInt(Random(), 0, 4 * 1024), "a");
- }
-
- [Test]
- public virtual void TestHeavyPreTokenization()
- {
- int minGram = TestUtil.NextInt(Random(), 1, 100);
- int maxGram = TestUtil.NextInt(Random(), minGram, 100);
- TestNGrams(minGram, maxGram, TestUtil.NextInt(Random(), 0, 4 * 1024), "abcdef");
- }
-
- [Test]
- public virtual void TestFewTokenChars()
- {
- char[] chrs = new char[TestUtil.NextInt(Random(), 4000, 5000)];
- Arrays.Fill(chrs, ' ');
- for (int i = 0; i < chrs.Length; ++i)
- {
- if (Random().NextDouble() < 0.1)
- {
- chrs[i] = 'a';
- }
- }
- int minGram = TestUtil.NextInt(Random(), 1, 2);
- int maxGram = TestUtil.NextInt(Random(), minGram, 2);
- TestNGrams(minGram, maxGram, new string(chrs), " ");
- }
-
- [Test]
- public virtual void TestFullUTF8Range()
- {
- int minGram = TestUtil.NextInt(Random(), 1, 100);
- int maxGram = TestUtil.NextInt(Random(), minGram, 100);
- string s = TestUtil.RandomUnicodeString(Random(), 4 * 1024);
- TestNGrams(minGram, maxGram, s, "");
- TestNGrams(minGram, maxGram, s, "abcdef");
- }
- }
-}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/ab81d913/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ngram/TestNGramFilters.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ngram/TestNGramFilters.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ngram/TestNGramFilters.cs
deleted file mode 100644
index c0683a6..0000000
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ngram/TestNGramFilters.cs
+++ /dev/null
@@ -1,196 +0,0 @@
-\ufeffusing Lucene.Net.Analysis.Util;
-using Lucene.Net.Util;
-using NUnit.Framework;
-using System.IO;
-using Reader = System.IO.TextReader;
-
-namespace Lucene.Net.Analysis.NGram
-{
- /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
- /// <summary>
- /// Simple tests to ensure the NGram filter factories are working.
- /// </summary>
- public class TestNGramFilters : BaseTokenStreamFactoryTestCase
- {
- /// <summary>
- /// Test NGramTokenizerFactory
- /// </summary>
- [Test]
- public virtual void TestNGramTokenizer()
- {
- Reader reader = new StringReader("test");
- TokenStream stream = TokenizerFactory("NGram").Create(reader);
- AssertTokenStreamContents(stream, new string[] { "t", "te", "e", "es", "s", "st", "t" });
- }
-
- /// <summary>
- /// Test NGramTokenizerFactory with min and max gram options
- /// </summary>
- [Test]
- public virtual void TestNGramTokenizer2()
- {
- Reader reader = new StringReader("test");
- TokenStream stream = TokenizerFactory("NGram", "minGramSize", "2", "maxGramSize", "3").Create(reader);
- AssertTokenStreamContents(stream, new string[] { "te", "tes", "es", "est", "st" });
- }
-
- /// <summary>
- /// Test the NGramFilterFactory
- /// </summary>
- [Test]
- public virtual void TestNGramFilter()
- {
- Reader reader = new StringReader("test");
- TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
- stream = TokenFilterFactory("NGram").Create(stream);
- AssertTokenStreamContents(stream, new string[] { "t", "te", "e", "es", "s", "st", "t" });
- }
-
- /// <summary>
- /// Test the NGramFilterFactory with min and max gram options
- /// </summary>
- [Test]
- public virtual void TestNGramFilter2()
- {
- Reader reader = new StringReader("test");
- TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
- stream = TokenFilterFactory("NGram", "minGramSize", "2", "maxGramSize", "3").Create(stream);
- AssertTokenStreamContents(stream, new string[] { "te", "tes", "es", "est", "st" });
- }
-
- /// <summary>
- /// Test EdgeNGramTokenizerFactory
- /// </summary>
- [Test]
- public virtual void TestEdgeNGramTokenizer()
- {
- Reader reader = new StringReader("test");
- TokenStream stream = TokenizerFactory("EdgeNGram").Create(reader);
- AssertTokenStreamContents(stream, new string[] { "t" });
- }
-
- /// <summary>
- /// Test EdgeNGramTokenizerFactory with min and max gram size
- /// </summary>
- [Test]
- public virtual void TestEdgeNGramTokenizer2()
- {
- Reader reader = new StringReader("test");
- TokenStream stream = TokenizerFactory("EdgeNGram", "minGramSize", "1", "maxGramSize", "2").Create(reader);
- AssertTokenStreamContents(stream, new string[] { "t", "te" });
- }
-
- /// <summary>
- /// Test EdgeNGramTokenizerFactory with side option
- /// </summary>
- [Test]
- public virtual void TestEdgeNGramTokenizer3()
- {
- Reader reader = new StringReader("ready");
-#pragma warning disable 612, 618
- TokenStream stream = TokenizerFactory("EdgeNGram", LuceneVersion.LUCENE_43, "side", "back").Create(reader);
-#pragma warning restore 612, 618
- AssertTokenStreamContents(stream, new string[] { "y" });
- }
-
- /// <summary>
- /// Test EdgeNGramFilterFactory
- /// </summary>
- [Test]
- public virtual void TestEdgeNGramFilter()
- {
- Reader reader = new StringReader("test");
- TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
- stream = TokenFilterFactory("EdgeNGram").Create(stream);
- AssertTokenStreamContents(stream, new string[] { "t" });
- }
-
- /// <summary>
- /// Test EdgeNGramFilterFactory with min and max gram size
- /// </summary>
- [Test]
- public virtual void TestEdgeNGramFilter2()
- {
- Reader reader = new StringReader("test");
- TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
- stream = TokenFilterFactory("EdgeNGram", "minGramSize", "1", "maxGramSize", "2").Create(stream);
- AssertTokenStreamContents(stream, new string[] { "t", "te" });
- }
-
- /// <summary>
- /// Test EdgeNGramFilterFactory with side option
- /// </summary>
- [Test]
- public virtual void TestEdgeNGramFilter3()
- {
- Reader reader = new StringReader("ready");
- TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
-#pragma warning disable 612, 618
- stream = TokenFilterFactory("EdgeNGram", LuceneVersion.LUCENE_43, "side", "back").Create(stream);
-#pragma warning restore 612, 618
- AssertTokenStreamContents(stream, new string[] { "y" });
- }
-
- /// <summary>
- /// Test that bogus arguments result in exception </summary>
- [Test]
- public virtual void TestBogusArguments()
- {
- try
- {
- TokenizerFactory("NGram", "bogusArg", "bogusValue");
- fail();
- }
- catch (System.ArgumentException expected)
- {
- assertTrue(expected.Message.Contains("Unknown parameters"));
- }
-
- try
- {
- TokenizerFactory("EdgeNGram", "bogusArg", "bogusValue");
- fail();
- }
- catch (System.ArgumentException expected)
- {
- assertTrue(expected.Message.Contains("Unknown parameters"));
- }
-
- try
- {
- TokenFilterFactory("NGram", "bogusArg", "bogusValue");
- fail();
- }
- catch (System.ArgumentException expected)
- {
- assertTrue(expected.Message.Contains("Unknown parameters"));
- }
-
- try
- {
- TokenFilterFactory("EdgeNGram", "bogusArg", "bogusValue");
- fail();
- }
- catch (System.ArgumentException expected)
- {
- assertTrue(expected.Message.Contains("Unknown parameters"));
- }
- }
- }
-}
\ No newline at end of file
[17/39] lucenenet git commit: Lucene.Net.Analysis: Fixed
miscellaneous documentation comment issues
Posted by ni...@apache.org.
Lucene.Net.Analysis: Fixed miscellaneous documentation comment issues
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/898a818d
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/898a818d
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/898a818d
Branch: refs/heads/api-work
Commit: 898a818db2ae88b0d8527e34f00da1c780a1a8fd
Parents: e67f797
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Sat Feb 4 23:04:14 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Sat Feb 4 23:08:23 2017 +0700
----------------------------------------------------------------------
.../Analysis/Br/BrazilianAnalyzer.cs | 1 +
.../Analysis/Cjk/CJKBigramFilter.cs | 8 ++
.../Analysis/El/GreekStemmer.cs | 2 +-
.../Analysis/En/KStemmer.cs | 86 +++++++++-----------
.../Analysis/En/PorterStemmer.cs | 4 +-
.../Analysis/Eu/BasqueAnalyzer.cs | 2 +-
.../Analysis/Fr/FrenchStemmer.cs | 2 +
.../Analysis/Hunspell/Dictionary.cs | 3 +
.../Analysis/Hunspell/HunspellStemFilter.cs | 1 +
.../Analysis/Hunspell/Stemmer.cs | 6 ++
.../Analysis/Miscellaneous/PatternAnalyzer.cs | 4 +-
.../Analysis/Ngram/NGramTokenizer.cs | 4 +-
.../Analysis/Nl/DutchStemFilter.cs | 4 +-
.../Analysis/No/NorwegianLightStemFilter.cs | 4 +-
.../Analysis/No/NorwegianMinimalStemFilter.cs | 1 +
.../Analysis/Ro/RomanianAnalyzer.cs | 1 +
16 files changed, 77 insertions(+), 56 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/898a818d/src/Lucene.Net.Analysis.Common/Analysis/Br/BrazilianAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Br/BrazilianAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Br/BrazilianAnalyzer.cs
index b6bd791..df78414 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Br/BrazilianAnalyzer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Br/BrazilianAnalyzer.cs
@@ -112,6 +112,7 @@ namespace Lucene.Net.Analysis.Br
/// lucene compatibility version </param>
/// <param name="stopwords">
/// a stopword set </param>
+ /// <param name="stemExclusionSet"> a set of terms not to be stemmed </param>
public BrazilianAnalyzer(LuceneVersion matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet)
: this(matchVersion, stopwords)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/898a818d/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKBigramFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKBigramFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKBigramFilter.cs
index 443ea04..9a9c707 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKBigramFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKBigramFilter.cs
@@ -107,6 +107,8 @@ namespace Lucene.Net.Analysis.Cjk
/// Calls <see cref="CJKBigramFilter.CJKBigramFilter(TokenStream, int)">
/// CJKBigramFilter(in, HAN | HIRAGANA | KATAKANA | HANGUL)</see>
/// </summary>
+ /// <param name="in">
+ /// Input <see cref="TokenStream"/> </param>
public CJKBigramFilter(TokenStream @in)
: this(@in, HAN | HIRAGANA | KATAKANA | HANGUL)
{
@@ -116,6 +118,10 @@ namespace Lucene.Net.Analysis.Cjk
/// Calls <see cref="CJKBigramFilter.CJKBigramFilter(TokenStream, int, bool)">
/// CJKBigramFilter(in, flags, false)</see>
/// </summary>
+ /// <param name="in">
+ /// Input <see cref="TokenStream"/> </param>
+ /// <param name="flags"> OR'ed set from <see cref="CJKBigramFilter.HAN"/>, <see cref="CJKBigramFilter.HIRAGANA"/>,
+ /// <see cref="CJKBigramFilter.KATAKANA"/>, <see cref="CJKBigramFilter.HANGUL"/> </param>
public CJKBigramFilter(TokenStream @in, int flags)
: this(@in, flags, false)
{
@@ -124,6 +130,8 @@ namespace Lucene.Net.Analysis.Cjk
/// <summary>
/// Create a new <see cref="CJKBigramFilter"/>, specifying which writing systems should be bigrammed,
/// and whether or not unigrams should also be output. </summary>
+ /// <param name="in">
+ /// Input <see cref="TokenStream"/> </param>
/// <param name="flags"> OR'ed set from <see cref="CJKBigramFilter.HAN"/>, <see cref="CJKBigramFilter.HIRAGANA"/>,
/// <see cref="CJKBigramFilter.KATAKANA"/>, <see cref="CJKBigramFilter.HANGUL"/> </param>
/// <param name="outputUnigrams"> true if unigrams for the selected writing systems should also be output.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/898a818d/src/Lucene.Net.Analysis.Common/Analysis/El/GreekStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/El/GreekStemmer.cs b/src/Lucene.Net.Analysis.Common/Analysis/El/GreekStemmer.cs
index 4934410..5c6a40a 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/El/GreekStemmer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/El/GreekStemmer.cs
@@ -1000,7 +1000,7 @@ namespace Lucene.Net.Analysis.El
/// </summary>
/// <param name="s"> A char[] array that represents a word. </param>
/// <param name="len"> The length of the char[] array. </param>
- /// <param name="suffix"> A <see cref="String"/> object to check if the word given ends with these characters. </param>
+ /// <param name="suffix"> A <see cref="string"/> object to check if the word given ends with these characters. </param>
/// <returns> True if the word ends with the suffix given , false otherwise. </returns>
private bool EndsWith(char[] s, int len, string suffix)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/898a818d/src/Lucene.Net.Analysis.Common/Analysis/En/KStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/En/KStemmer.cs b/src/Lucene.Net.Analysis.Common/Analysis/En/KStemmer.cs
index 1808ced..250af5b 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/En/KStemmer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/En/KStemmer.cs
@@ -700,13 +700,13 @@ namespace Lucene.Net.Analysis.En
* common
*/
- /// <summary>
- ///**
- /// YCS: this was the one place where lookup was not followed by return.
- /// So restructure it. if ((j>0)&&(lookup(word.toString())) &&
- /// !((word.CharAt(j) == 's') && (word.CharAt(j-1) == 's'))) return;
- /// ****
- /// </summary>
+
+ //**
+ // YCS: this was the one place where lookup was not followed by return.
+ // So restructure it. if ((j>0)&&(lookup(word.toString())) &&
+ // !((word.CharAt(j) == 's') && (word.CharAt(j-1) == 's'))) return;
+ // ****
+
bool tryE = j > 0 && !((word.CharAt(j) == 's') && (word.CharAt(j - 1) == 's'));
if (tryE && Lookup())
{
@@ -1877,13 +1877,13 @@ namespace Lucene.Net.Analysis.En
{
get
{
- /// <summary>
- ///*
- /// if (!lookups.contains(word.toString())) { throw new
- /// RuntimeException("didn't look up "+word.toString()+" prev="+prevLookup);
- /// }
- /// **
- /// </summary>
+
+ //*
+ // if (!lookups.contains(word.toString())) { throw new
+ // RuntimeException("didn't look up "+word.toString()+" prev="+prevLookup);
+ // }
+ // **
+
// lookup();
return matchedEntry != null;
}
@@ -1916,15 +1916,13 @@ namespace Lucene.Net.Analysis.En
return false;
}
- /// <summary>
- ///*
- /// caching off is normally faster if (cache == null) initializeStemHash();
- ///
- /// // now check the cache, before we copy chars to "word" if (cache != null)
- /// { String val = cache.get(term, 0, len); if (val != null) { if (val !=
- /// SAME) { result = val; return true; } return false; } }
- /// **
- /// </summary>
+ //*
+ // caching off is normally faster if (cache == null) initializeStemHash();
+ //
+ // // now check the cache, before we copy chars to "word" if (cache != null)
+ // { String val = cache.get(term, 0, len); if (val != null) { if (val !=
+ // SAME) { result = val; return true; } return false; } }
+ // **
word.Reset();
// allocate enough space so that an expansion is never needed
@@ -1942,11 +1940,11 @@ namespace Lucene.Net.Analysis.En
}
matchedEntry = null;
- /// <summary>
- ///*
- /// lookups.clear(); lookups.add(word.toString());
- /// **
- /// </summary>
+
+ //*
+ // lookups.clear(); lookups.add(word.toString());
+ // **
+
/*
* This while loop will never be executed more than one time; it is here
@@ -2053,24 +2051,20 @@ namespace Lucene.Net.Analysis.En
result = entry.root; // may be null, which means that "word" is the stem
}
- /// <summary>
- ///*
- /// caching off is normally faster if (cache != null && cache.size() <
- /// maxCacheSize) { char[] key = new char[len]; System.arraycopy(term, 0,
- /// key, 0, len); if (result != null) { cache.put(key, result); } else {
- /// cache.put(key, word.toString()); } }
- /// **
- /// </summary>
-
- /// <summary>
- ///*
- /// if (entry == null) { if (!word.toString().equals(new String(term,0,len)))
- /// { System.out.println("CASE:" + word.toString() + "," + new
- /// String(term,0,len));
- ///
- /// } }
- /// **
- /// </summary>
+ //*
+ // caching off is normally faster if (cache != null && cache.size() <
+ // maxCacheSize) { char[] key = new char[len]; System.arraycopy(term, 0,
+ // key, 0, len); if (result != null) { cache.put(key, result); } else {
+ // cache.put(key, word.toString()); } }
+ // **
+
+ //*
+ // if (entry == null) { if (!word.toString().equals(new String(term,0,len)))
+ // { System.out.println("CASE:" + word.toString() + "," + new
+ // String(term,0,len));
+ //
+ // } }
+ // **
// no entry matched means result is "word"
return true;
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/898a818d/src/Lucene.Net.Analysis.Common/Analysis/En/PorterStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/En/PorterStemmer.cs b/src/Lucene.Net.Analysis.Common/Analysis/En/PorterStemmer.cs
index d1119c4..707c90c 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/En/PorterStemmer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/En/PorterStemmer.cs
@@ -54,7 +54,7 @@ namespace Lucene.Net.Analysis.En
///
/// The Stemmer class transforms a word into its root form. The input
/// word can be provided a character at time (by calling <see cref="Add"/>), or at once
- /// by calling one of the various <see cref="Stem"/> methods.
+ /// by calling one of the various Stem methods, such as <see cref="Stem(string)"/>.
/// </summary>
internal class PorterStemmer
{
@@ -71,7 +71,7 @@ namespace Lucene.Net.Analysis.En
/// <summary>
/// <see cref="Reset"/> resets the stemmer so it can stem another word. If you invoke
- /// the stemmer by calling <see cref="Add(char)"/> and then <see cref="Stem"/>, you must call <see cref="Reset"/>
+ /// the stemmer by calling <see cref="Add(char)"/> and then <see cref="Stem()"/>, you must call <see cref="Reset"/>
/// before starting another word.
/// </summary>
public virtual void Reset()
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/898a818d/src/Lucene.Net.Analysis.Common/Analysis/Eu/BasqueAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Eu/BasqueAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Eu/BasqueAnalyzer.cs
index d0fa82d..621c6a6 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Eu/BasqueAnalyzer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Eu/BasqueAnalyzer.cs
@@ -108,7 +108,7 @@ namespace Lucene.Net.Analysis.Eu
/// <summary>
/// Creates a
/// <see cref="Analyzer.TokenStreamComponents"/>
- /// which tokenizes all the text in the provided <see cref="Reader"/>.
+ /// which tokenizes all the text in the provided <see cref="TextReader"/>.
/// </summary>
/// <returns> A
/// <see cref="Analyzer.TokenStreamComponents"/>
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/898a818d/src/Lucene.Net.Analysis.Common/Analysis/Fr/FrenchStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Fr/FrenchStemmer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Fr/FrenchStemmer.cs
index e67a7fb..2735cb0 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Fr/FrenchStemmer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Fr/FrenchStemmer.cs
@@ -472,6 +472,8 @@ namespace Lucene.Net.Analysis.Fr
/// <param name="search"> the strings to search for suppression </param>
/// <param name="prefix"> the prefix to add to the search string to test </param>
/// <param name="without"> true if it will be deleted even without prefix found </param>
+ /// <param name="from"> the secondary source zone for search </param>
+ /// <param name="replace"> the replacement string </param>
private void DeleteButSuffixFromElseReplace(string source, string[] search, string prefix, bool without, string from, string replace)
{
if (source != null)
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/898a818d/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/Dictionary.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/Dictionary.cs b/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/Dictionary.cs
index 29938e5..a5276f7 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/Dictionary.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/Dictionary.cs
@@ -125,6 +125,7 @@ namespace Lucene.Net.Analysis.Hunspell
/// </summary>
/// <param name="affix"> <see cref="Stream"/> for reading the hunspell affix file (won't be disposed). </param>
/// <param name="dictionaries"> <see cref="Stream"/> for reading the hunspell dictionary files (won't be disposed). </param>
+ /// <param name="ignoreCase"> ignore case? </param>
/// <exception cref="IOException"> Can be thrown while reading from the <see cref="Stream"/>s </exception>
/// <exception cref="Exception"> Can be thrown if the content of the files does not meet expected formats </exception>
public Dictionary(Stream affix, IList<Stream> dictionaries, bool ignoreCase)
@@ -399,6 +400,7 @@ namespace Lucene.Net.Analysis.Hunspell
/// <param name="conditionPattern"> <see cref="string.Format(string, object[])"/> pattern to be used to generate the condition regex
/// pattern </param>
/// <param name="seenPatterns"> map from condition -> index of patterns, for deduplication. </param>
+ /// <param name="seenStrips"></param>
/// <exception cref="IOException"> Can be thrown while reading the rule </exception>
private void ParseAffix(SortedDictionary<string, IList<char?>> affixes, string header, TextReader reader, string conditionPattern, IDictionary<string, int?> seenPatterns, IDictionary<string, int?> seenStrips)
{
@@ -733,6 +735,7 @@ namespace Lucene.Net.Analysis.Hunspell
/// </summary>
/// <param name="dictionaries"> <see cref="Stream"/>s to read the dictionary file through </param>
/// <param name="decoder"> <see cref="Encoding"/> used to decode the contents of the file </param>
+ /// <param name="words"></param>
/// <exception cref="IOException"> Can be thrown while reading from the file </exception>
private void ReadDictionaryFiles(IList<Stream> dictionaries, Encoding decoder, Builder<IntsRef> words)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/898a818d/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/HunspellStemFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/HunspellStemFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/HunspellStemFilter.cs
index 0135fad..d7f3c97 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/HunspellStemFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/HunspellStemFilter.cs
@@ -73,6 +73,7 @@ namespace Lucene.Net.Analysis.Hunspell
/// </summary>
/// <param name="input"> <see cref="TokenStream"/> whose tokens will be stemmed </param>
/// <param name="dictionary"> Hunspell <see cref="Dictionary"/> containing the affix rules and words that will be used to stem the tokens </param>
+ /// <param name="dedup"> remove duplicates </param>
/// <param name="longestOnly"> true if only the longest term should be output. </param>
public HunspellStemFilter(TokenStream input, Dictionary dictionary, bool dedup, bool longestOnly)
: base(input)
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/898a818d/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/Stemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/Stemmer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/Stemmer.cs
index 60be661..3b2d0d4 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/Stemmer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/Stemmer.cs
@@ -66,6 +66,7 @@ namespace Lucene.Net.Analysis.Hunspell
/// Find the stem(s) of the provided word
/// </summary>
/// <param name="word"> Word to find the stems for </param>
+ /// <param name="length"> length </param>
/// <returns> <see cref="IList{CharsRef}"/> of stems for the word </returns>
public IList<CharsRef> Stem(char[] word, int length)
{
@@ -100,6 +101,7 @@ namespace Lucene.Net.Analysis.Hunspell
/// Find the unique stem(s) of the provided word
/// </summary>
/// <param name="word"> Word to find the stems for </param>
+ /// <param name="length"> length </param>
/// <returns> <see cref="IList{CharsRef}"/> of stems for the word </returns>
public IList<CharsRef> UniqueStems(char[] word, int length)
{
@@ -154,6 +156,7 @@ namespace Lucene.Net.Analysis.Hunspell
/// Generates a list of stems for the provided word
/// </summary>
/// <param name="word"> Word to generate the stems for </param>
+ /// <param name="length"> length </param>
/// <param name="previous"> previous affix that was removed (so we dont remove same one twice) </param>
/// <param name="prevFlag"> Flag from a previous stemming step that need to be cross-checked with any affixes in this recursive step </param>
/// <param name="prefixFlag"> flag of the most inner removed prefix, so that when removing a suffix, its also checked against the word </param>
@@ -354,6 +357,8 @@ namespace Lucene.Net.Analysis.Hunspell
/// so we must check dictionary form against both to add it as a stem! </param>
/// <param name="recursionDepth"> current recursion depth </param>
/// <param name="prefix"> true if we are removing a prefix (false if its a suffix) </param>
+ /// <param name="circumfix"> true if the previous prefix removal was signed as a circumfix
+ /// this means inner most suffix must also contain circumfix flag. </param>
/// <returns> <see cref="IList{CharsRef}"/> of stems for the word, or an empty list if none are found </returns>
internal IList<CharsRef> ApplyAffix(char[] strippedWord, int length, int affix, int prefixFlag, int recursionDepth, bool prefix, bool circumfix)
{
@@ -458,6 +463,7 @@ namespace Lucene.Net.Analysis.Hunspell
/// </summary>
/// <param name="flag"> Flag to cross check with the array of flags </param>
/// <param name="flags"> Array of flags to cross check against. Can be <c>null</c> </param>
+ /// <param name="matchEmpty"> If true, will match a zero length flags array. </param>
/// <returns> <c>true</c> if the flag is found in the array or the array is <c>null</c>, <c>false</c> otherwise </returns>
private bool HasCrossCheckedFlag(char flag, char[] flags, bool matchEmpty)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/898a818d/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternAnalyzer.cs
index f943762..59ce195 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternAnalyzer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternAnalyzer.cs
@@ -29,7 +29,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// <summary>
/// Efficient Lucene analyzer/tokenizer that preferably operates on a <see cref="string"/> rather than a
/// <see cref="TextReader"/>, that can flexibly separate text into terms via a regular expression <see cref="Regex"/>
- /// (with behaviour similar to <see cref="string.Split(string)"/>),
+ /// (with behaviour similar to <see cref="string.Split(string[], StringSplitOptions)"/>),
/// and that combines the functionality of
/// <see cref="LetterTokenizer"/>,
/// <see cref="LowerCaseTokenizer"/>,
@@ -39,7 +39,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// <para>
/// If you are unsure how exactly a regular expression should look like, consider
/// prototyping by simply trying various expressions on some test texts via
- /// <see cref="string.Split(char[])"/>. Once you are satisfied, give that regex to
+ /// <see cref="string.Split(string[], StringSplitOptions)"/>. Once you are satisfied, give that regex to
/// <see cref="PatternAnalyzer"/>. Also see <a target="_blank"
/// href="http://www.regular-expressions.info/">Regular Expression Tutorial</a>.
/// </para>
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/898a818d/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramTokenizer.cs
index a6ce01d..b37d290 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramTokenizer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramTokenizer.cs
@@ -223,7 +223,7 @@ namespace Lucene.Net.Analysis.Ngram // LUCENENET TODO: Change namespace, directo
// fill in remaining space
exhausted = !charUtils.Fill(charBuffer, m_input, buffer.Length - bufferEnd);
// convert to code points
- bufferEnd += charUtils.toCodePoints(charBuffer.Buffer, 0, charBuffer.Length, buffer, bufferEnd);
+ bufferEnd += charUtils.ToCodePoints(charBuffer.Buffer, 0, charBuffer.Length, buffer, bufferEnd);
}
// should we go to the next offset?
@@ -250,7 +250,7 @@ namespace Lucene.Net.Analysis.Ngram // LUCENENET TODO: Change namespace, directo
continue;
}
- int length = charUtils.toChars(buffer, bufferStart, gramSize, termAtt.Buffer, 0);
+ int length = charUtils.ToChars(buffer, bufferStart, gramSize, termAtt.Buffer, 0);
termAtt.Length = length;
posIncAtt.PositionIncrement = 1;
posLenAtt.PositionLength = 1;
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/898a818d/src/Lucene.Net.Analysis.Common/Analysis/Nl/DutchStemFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Nl/DutchStemFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Nl/DutchStemFilter.cs
index 9c280bf..8fd66ac 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Nl/DutchStemFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Nl/DutchStemFilter.cs
@@ -50,13 +50,15 @@ namespace Lucene.Net.Analysis.Nl
private readonly ICharTermAttribute termAtt;
private readonly IKeywordAttribute keywordAttr;
+ /// <param name="in"> Input <see cref="TokenStream"/> </param>
public DutchStemFilter(TokenStream @in)
- : base(@in)
+ : base(@in)
{
termAtt = AddAttribute<ICharTermAttribute>();
keywordAttr = AddAttribute<IKeywordAttribute>();
}
+ /// <param name="in"> Input <see cref="TokenStream"/> </param>
/// <param name="stemdictionary"> Dictionary of word stem pairs, that overrule the algorithm </param>
public DutchStemFilter(TokenStream @in, IDictionary<string, string> stemdictionary)
: this(@in)
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/898a818d/src/Lucene.Net.Analysis.Common/Analysis/No/NorwegianLightStemFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/No/NorwegianLightStemFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/No/NorwegianLightStemFilter.cs
index 6fb788a..9681e1e 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/No/NorwegianLightStemFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/No/NorwegianLightStemFilter.cs
@@ -38,13 +38,15 @@ namespace Lucene.Net.Analysis.No
/// Calls <see cref="NorwegianLightStemFilter.NorwegianLightStemFilter(TokenStream, int)"/>
/// - NorwegianLightStemFilter(input, BOKMAAL)
/// </summary>
+ /// <param name="input"> the source <see cref="TokenStream"/> to filter </param>
public NorwegianLightStemFilter(TokenStream input)
- : this(input, NorwegianLightStemmer.BOKMAAL)
+ : this(input, NorwegianLightStemmer.BOKMAAL)
{
}
/// <summary>
/// Creates a new <see cref="NorwegianLightStemFilter"/> </summary>
+ /// <param name="input"> the source <see cref="TokenStream"/> to filter </param>
/// <param name="flags"> set to <see cref="NorwegianLightStemmer.BOKMAAL"/>,
/// <see cref="NorwegianLightStemmer.NYNORSK"/>, or both. </param>
public NorwegianLightStemFilter(TokenStream input, int flags)
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/898a818d/src/Lucene.Net.Analysis.Common/Analysis/No/NorwegianMinimalStemFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/No/NorwegianMinimalStemFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/No/NorwegianMinimalStemFilter.cs
index 520425d..446bf3a 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/No/NorwegianMinimalStemFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/No/NorwegianMinimalStemFilter.cs
@@ -45,6 +45,7 @@ namespace Lucene.Net.Analysis.No
/// <summary>
/// Creates a new <see cref="NorwegianLightStemFilter"/> </summary>
+ /// <param name="input"> the source <see cref="TokenStream"/> to filter </param>
/// <param name="flags"> set to <see cref="NorwegianLightStemmer.BOKMAAL"/>,
/// <see cref="NorwegianLightStemmer.NYNORSK"/>, or both. </param>
public NorwegianMinimalStemFilter(TokenStream input, int flags)
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/898a818d/src/Lucene.Net.Analysis.Common/Analysis/Ro/RomanianAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ro/RomanianAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ro/RomanianAnalyzer.cs
index af4161c..dca59e8 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Ro/RomanianAnalyzer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ro/RomanianAnalyzer.cs
@@ -81,6 +81,7 @@ namespace Lucene.Net.Analysis.Ro
/// <summary>
/// Builds an analyzer with the default stop words: <see cref="DEFAULT_STOPWORD_FILE"/>.
/// </summary>
+ /// <param name="matchVersion"> lucene compatibility version </param>
public RomanianAnalyzer(LuceneVersion matchVersion)
: this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET)
{
[16/39] lucenenet git commit: Lucene.Net.Collation refactor: member
accessibility and documentation comments
Posted by ni...@apache.org.
Lucene.Net.Collation refactor: member accessibility and documentation comments
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/e67f7979
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/e67f7979
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/e67f7979
Branch: refs/heads/api-work
Commit: e67f79794936d68b4742f998d916c79e7dab5dcf
Parents: d390386
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Sat Feb 4 22:19:30 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Sat Feb 4 23:08:21 2017 +0700
----------------------------------------------------------------------
.../Collation/CollationAttributeFactory.cs | 55 +++++------
.../Collation/CollationKeyAnalyzer.cs | 66 ++++++-------
.../Collation/CollationKeyFilter.cs | 97 ++++++++++----------
.../Collation/CollationKeyFilterFactory.cs | 87 +++++++++---------
.../CollatedTermAttributeImpl.cs | 8 +-
5 files changed, 157 insertions(+), 156 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/e67f7979/src/Lucene.Net.Analysis.Common/Collation/CollationAttributeFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Collation/CollationAttributeFactory.cs b/src/Lucene.Net.Analysis.Common/Collation/CollationAttributeFactory.cs
index b057e7d..6e15ad1 100644
--- a/src/Lucene.Net.Analysis.Common/Collation/CollationAttributeFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Collation/CollationAttributeFactory.cs
@@ -24,32 +24,32 @@ namespace Lucene.Net.Collation
/// <summary>
/// <para>
- /// Converts each token into its <see cref="CollationKey"/>, and then
+ /// Converts each token into its <see cref="System.Globalization.SortKey"/>, and then
/// encodes the bytes as an index term.
/// </para>
/// <para>
- /// <strong>WARNING:</strong> Make sure you use exactly the same Collator at
- /// index and query time -- CollationKeys are only comparable when produced by
- /// the same Collator. Since <see cref="RuleBasedCollator"/>s are not
+ /// <strong>WARNING:</strong> Make sure you use exactly the same <see cref="Collator"/> at
+ /// index and query time -- <see cref="System.Globalization.SortKey"/>s are only comparable when produced by
+ /// the same <see cref="Collator"/>. Since <see cref="RuleBasedCollator"/>s are not
/// independently versioned, it is unsafe to search against stored
- /// CollationKeys unless the following are exactly the same (best practice is
+ /// <see cref="System.Globalization.SortKey"/>s unless the following are exactly the same (best practice is
/// to store this information with the index and check that they remain the
/// same at query time):
/// </para>
- /// <ol>
- /// <li>JVM vendor</li>
- /// <li>JVM version, including patch version</li>
- /// <li>
+ /// <list type="number">
+ /// <item>JVM vendor</item>
+ /// <item>JVM version, including patch version</item>
+ /// <item>
/// The language (and country and variant, if specified) of the Locale
/// used when constructing the collator via
- /// <see cref="Collator#getInstance(Locale)"/>.
- /// </li>
- /// <li>
- /// The collation strength used - see <see cref="Collator#setStrength(int)"/>
- /// </li>
- /// </ol>
+ /// <see cref="Collator.Create(System.Globalization.CultureInfo)"/>.
+ /// </item>
+ /// <item>
+ /// The collation strength used - see <see cref="Collator.Strength"/>
+ /// </item>
+ /// </list>
/// <para>
- /// The <code>ICUCollationAttributeFactory</code> in the analysis-icu package
+ /// The <c>ICUCollationAttributeFactory</c> in the analysis-icu package
/// uses ICU4J's Collator, which makes its
/// version available, thus allowing collation to be versioned independently
/// from the JVM. ICUCollationAttributeFactory is also significantly faster and
@@ -57,7 +57,7 @@ namespace Lucene.Net.Collation
/// <a href="http://site.icu-project.org/charts/collation-icu4j-sun"
/// >http://site.icu-project.org/charts/collation-icu4j-sun</a> for key
/// generation timing and key length comparisons between ICU4J and
- /// java.text.Collator over several languages.
+ /// <see cref="Collator"/> over several languages.
/// </para>
/// <para>
/// CollationKeys generated by java.text.Collators are not compatible
@@ -75,20 +75,21 @@ namespace Lucene.Net.Collation
private readonly AttributeSource.AttributeFactory @delegate;
/// <summary>
- /// Create a CollationAttributeFactory, using
- /// <see cref="AttributeSource.AttributeFactory#DEFAULT_ATTRIBUTE_FACTORY"/> as the
+ /// Create a <see cref="CollationAttributeFactory"/>, using
+ /// <see cref="AttributeSource.AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY"/> as the
/// factory for all other attributes. </summary>
- /// <param name="collator"> CollationKey generator </param>
- public CollationAttributeFactory(Collator collator) : this(AttributeSource.AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, collator)
+ /// <param name="collator"> <see cref="System.Globalization.SortKey"/> generator </param>
+ public CollationAttributeFactory(Collator collator)
+ : this(AttributeSource.AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, collator)
{
}
- /// <summary>
- /// Create a CollationAttributeFactory, using the supplied Attribute Factory
- /// as the factory for all other attributes. </summary>
- /// <param name="delegate"> Attribute Factory </param>
- /// <param name="collator"> CollationKey generator </param>
- public CollationAttributeFactory(AttributeSource.AttributeFactory @delegate, Collator collator)
+ /// <summary>
+ /// Create a <see cref="CollationAttributeFactory"/>, using the supplied Attribute Factory
+ /// as the factory for all other attributes. </summary>
+ /// <param name="delegate"> Attribute Factory </param>
+ /// <param name="collator"> <see cref="System.Globalization.SortKey"/> generator </param>
+ public CollationAttributeFactory(AttributeSource.AttributeFactory @delegate, Collator collator)
{
this.@delegate = @delegate;
this.collator = collator;
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/e67f7979/src/Lucene.Net.Analysis.Common/Collation/CollationKeyAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Collation/CollationKeyAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Collation/CollationKeyAnalyzer.cs
index 4eabd4d..6ebee3d 100644
--- a/src/Lucene.Net.Analysis.Common/Collation/CollationKeyAnalyzer.cs
+++ b/src/Lucene.Net.Analysis.Common/Collation/CollationKeyAnalyzer.cs
@@ -29,34 +29,34 @@ namespace Lucene.Net.Collation
/// Configures <see cref="KeywordTokenizer"/> with <see cref="CollationAttributeFactory"/>.
/// </para>
/// <para>
- /// Converts the token into its <see cref="java.text.CollationKey"/>, and then
- /// encodes the CollationKey either directly or with
- /// <see cref="IndexableBinaryStringTools"/> (see <a href="#version">below</a>), to allow
+ /// Converts the token into its <see cref="System.Globalization.SortKey"/>, and then
+ /// encodes the <see cref="System.Globalization.SortKey"/> either directly or with
+ /// <see cref="IndexableBinaryStringTools"/> (see version note below), to allow
/// it to be stored as an index term.
/// </para>
/// <para>
- /// <strong>WARNING:</strong> Make sure you use exactly the same Collator at
- /// index and query time -- CollationKeys are only comparable when produced by
- /// the same Collator. Since <see cref="java.text.RuleBasedCollator"/>s are not
+ /// <strong>WARNING:</strong> Make sure you use exactly the same <see cref="Collator"/> at
+ /// index and query time -- <see cref="System.Globalization.SortKey"/> are only comparable when produced by
+ /// the same <see cref="Collator"/>. Since <c>java.text.RuleBasedCollators</c> are not
/// independently versioned, it is unsafe to search against stored
- /// CollationKeys unless the following are exactly the same (best practice is
+ /// <see cref="System.Globalization.SortKey"/> unless the following are exactly the same (best practice is
/// to store this information with the index and check that they remain the
/// same at query time):
/// </para>
- /// <ol>
- /// <li>JVM vendor</li>
- /// <li>JVM version, including patch version</li>
- /// <li>
+ /// <list type="number">
+ /// <item>JVM vendor</item>
+ /// <item>JVM version, including patch version</item>
+ /// <item>
/// The language (and country and variant, if specified) of the Locale
/// used when constructing the collator via
- /// <see cref="Collator#getInstance(java.util.Locale)"/>.
- /// </li>
- /// <li>
- /// The collation strength used - see <see cref="Collator#setStrength(int)"/>
- /// </li>
- /// </ol>
+ /// <see cref="Collator.Create(System.Globalization.CultureInfo)"/>.
+ /// </item>
+ /// <item>
+ /// The collation strength used - see <see cref="Collator.Strength"/>
+ /// </item>
+ /// </list>
/// <para>
- /// The <code>ICUCollationKeyAnalyzer</code> in the analysis-icu package
+ /// The <c>ICUCollationKeyAnalyzer</c> in the analysis-icu package
/// uses ICU4J's Collator, which makes its
/// its version available, thus allowing collation to be versioned
/// independently from the JVM. ICUCollationKeyAnalyzer is also significantly
@@ -64,21 +64,20 @@ namespace Lucene.Net.Collation
/// See <a href="http://site.icu-project.org/charts/collation-icu4j-sun"
/// >http://site.icu-project.org/charts/collation-icu4j-sun</a> for key
/// generation timing and key length comparisons between ICU4J and
- /// java.text.Collator over several languages.
+ /// <see cref="Collator"/> over several languages.
/// </para>
/// <para>
- /// CollationKeys generated by java.text.Collators are not compatible
+ /// CollationKeys generated by <see cref="Collator"/> are not compatible
/// with those those generated by ICU Collators. Specifically, if you use
/// CollationKeyAnalyzer to generate index terms, do not use
/// ICUCollationKeyAnalyzer on the query side, or vice versa.
/// </para>
- /// <a name="version"/>
/// <para>You must specify the required <see cref="LuceneVersion"/>
- /// compatibility when creating CollationKeyAnalyzer:
- /// <ul>
- /// <li> As of 4.0, Collation Keys are directly encoded as bytes. Previous
- /// versions will encode the bytes with <see cref="IndexableBinaryStringTools"/>.
- /// </ul>
+ /// compatibility when creating <see cref="CollationKeyAnalyzer"/>:
+ /// <list type="bullet">
+ /// <item> As of 4.0, Collation Keys are directly encoded as bytes. Previous
+ /// versions will encode the bytes with <see cref="IndexableBinaryStringTools"/>.</item>
+ /// </list>
/// </para>
/// </summary>
// LUCENENET TODO: A better option would be to contribute to the icu.net library and
@@ -90,12 +89,12 @@ namespace Lucene.Net.Collation
private readonly CollationAttributeFactory factory;
private readonly LuceneVersion matchVersion;
- /// <summary>
- /// Create a new CollationKeyAnalyzer, using the specified collator.
- /// </summary>
- /// <param name="matchVersion"> See <a href="#version">above</a> </param>
- /// <param name="collator"> CollationKey generator </param>
- public CollationKeyAnalyzer(LuceneVersion matchVersion, Collator collator)
+ /// <summary>
+ /// Create a new <see cref="CollationKeyAnalyzer"/>, using the specified collator.
+ /// </summary>
+ /// <param name="matchVersion"> See <see cref="CollationKeyAnalyzer"/> </param>
+ /// <param name="collator"> <see cref="System.Globalization.SortKey"/> generator </param>
+ public CollationKeyAnalyzer(LuceneVersion matchVersion, Collator collator)
{
this.matchVersion = matchVersion;
this.collator = collator;
@@ -103,7 +102,8 @@ namespace Lucene.Net.Collation
}
[Obsolete("Use <seealso cref=\"CollationKeyAnalyzer#CollationKeyAnalyzer(LuceneVersion, Collator)\"/> and specify a version instead. This ctor will be removed in Lucene 5.0")]
- public CollationKeyAnalyzer(Collator collator) : this(LuceneVersion.LUCENE_31, collator)
+ public CollationKeyAnalyzer(Collator collator)
+ : this(LuceneVersion.LUCENE_31, collator)
{
}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/e67f7979/src/Lucene.Net.Analysis.Common/Collation/CollationKeyFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Collation/CollationKeyFilter.cs b/src/Lucene.Net.Analysis.Common/Collation/CollationKeyFilter.cs
index 477e524..ae90816 100644
--- a/src/Lucene.Net.Analysis.Common/Collation/CollationKeyFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Collation/CollationKeyFilter.cs
@@ -23,53 +23,53 @@ namespace Lucene.Net.Collation
* limitations under the License.
*/
- /// <summary>
- /// <para>
- /// Converts each token into its <see cref="java.text.CollationKey"/>, and then
- /// encodes the CollationKey with <see cref="IndexableBinaryStringTools"/>, to allow
- /// it to be stored as an index term.
- /// </para>
- /// <para>
- /// <strong>WARNING:</strong> Make sure you use exactly the same Collator at
- /// index and query time -- CollationKeys are only comparable when produced by
- /// the same Collator. Since <see cref="java.text.RuleBasedCollator"/>s are not
- /// independently versioned, it is unsafe to search against stored
- /// CollationKeys unless the following are exactly the same (best practice is
- /// to store this information with the index and check that they remain the
- /// same at query time):
- /// </para>
- /// <ol>
- /// <li>JVM vendor</li>
- /// <li>JVM version, including patch version</li>
- /// <li>
- /// The language (and country and variant, if specified) of the Locale
- /// used when constructing the collator via
- /// <see cref="Collator#getInstance(CultureInfo)"/>.
- /// </li>
- /// <li>
- /// The collation strength used - see <see cref="Collator#setStrength(int)"/>
- /// </li>
- /// </ol>
- /// <para>
- /// The <code>ICUCollationKeyFilter</code> in the analysis-icu package
- /// uses ICU4J's Collator, which makes its
- /// version available, thus allowing collation to be versioned independently
- /// from the JVM. ICUCollationKeyFilter is also significantly faster and
- /// generates significantly shorter keys than CollationKeyFilter. See
- /// <a href="http://site.icu-project.org/charts/collation-icu4j-sun"
- /// >http://site.icu-project.org/charts/collation-icu4j-sun</a> for key
- /// generation timing and key length comparisons between ICU4J and
- /// java.text.Collator over several languages.
- /// </para>
- /// <para>
- /// CollationKeys generated by java.text.Collators are not compatible
- /// with those those generated by ICU Collators. Specifically, if you use
- /// CollationKeyFilter to generate index terms, do not use
- /// ICUCollationKeyFilter on the query side, or vice versa.
- /// </para> </summary>
- /// @deprecated Use <see cref="CollationAttributeFactory"/> instead, which encodes
- /// terms directly as bytes. This filter will be removed in Lucene 5.0
- [Obsolete("Use <seealso cref=\"CollationAttributeFactory\"/> instead, which encodes")]
+ /// <summary>
+ /// <para>
+ /// Converts each token into its <see cref="System.Globalization.SortKey"/>, and then
+ /// encodes the <see cref="System.Globalization.SortKey"/> with <see cref="IndexableBinaryStringTools"/>, to allow
+ /// it to be stored as an index term.
+ /// </para>
+ /// <para>
+ /// <strong>WARNING:</strong> Make sure you use exactly the same <see cref="Collator"/> at
+ /// index and query time -- <see cref="System.Globalization.SortKey"/> are only comparable when produced by
+ /// the same <see cref="Collator"/>. Since <c>java.text.RuleBasedCollators</c> are not
+ /// independently versioned, it is unsafe to search against stored
+ /// <see cref="System.Globalization.SortKey"/> unless the following are exactly the same (best practice is
+ /// to store this information with the index and check that they remain the
+ /// same at query time):
+ /// </para>
+ /// <list type="number">
+ /// <item>JVM vendor</item>
+ /// <item>JVM version, including patch version</item>
+ /// <item>
+ /// The language (and country and variant, if specified) of the Locale
+ /// used when constructing the collator via
+ /// <see cref="Collator.Create(System.Globalization.CultureInfo)"/>.
+ /// </item>
+ /// <item>
+ /// The collation strength used - see <see cref="Collator.Strength"/>
+ /// </item>
+ /// </list>
+ /// <para>
+ /// The <c>ICUCollationKeyFilter</c> in the analysis-icu package
+ /// uses ICU4J's Collator, which makes its
+ /// version available, thus allowing collation to be versioned independently
+ /// from the JVM. ICUCollationKeyFilter is also significantly faster and
+ /// generates significantly shorter keys than CollationKeyFilter. See
+ /// <a href="http://site.icu-project.org/charts/collation-icu4j-sun"
+ /// >http://site.icu-project.org/charts/collation-icu4j-sun</a> for key
+ /// generation timing and key length comparisons between ICU4J and
+ /// java.text.Collator over several languages.
+ /// </para>
+ /// <para>
+ /// CollationKeys generated by java.text.Collators are not compatible
+ /// with those those generated by ICU Collators. Specifically, if you use
+ /// CollationKeyFilter to generate index terms, do not use
+ /// ICUCollationKeyFilter on the query side, or vice versa.
+ /// </para> </summary>
+ /// @deprecated Use <see cref="CollationAttributeFactory"/> instead, which encodes
+ /// terms directly as bytes. This filter will be removed in Lucene 5.0
+ [Obsolete("Use CollationAttributeFactory instead, which encodes terms directly as bytes. This filter will be removed in Lucene 5.0.")]
// LUCENENET TODO: A better option would be to contribute to the icu.net library and
// make it CLS compliant (at least the parts of it we use)
[CLSCompliant(false)]
@@ -80,7 +80,8 @@ namespace Lucene.Net.Collation
/// <param name="input"> Source token stream </param>
/// <param name="collator"> CollationKey generator </param>
- public CollationKeyFilter(TokenStream input, Collator collator) : base(input)
+ public CollationKeyFilter(TokenStream input, Collator collator)
+ : base(input)
{
this.collator = collator;
this.termAtt = this.AddAttribute<ICharTermAttribute>();
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/e67f7979/src/Lucene.Net.Analysis.Common/Collation/CollationKeyFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Collation/CollationKeyFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Collation/CollationKeyFilterFactory.cs
index d5e53a1..45bb1e1 100644
--- a/src/Lucene.Net.Analysis.Common/Collation/CollationKeyFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Collation/CollationKeyFilterFactory.cs
@@ -7,7 +7,6 @@ using System;
using System.Collections.Generic;
using System.Globalization;
using System.IO;
-using System.Linq;
using System.Text;
namespace Lucene.Net.Collation
@@ -29,49 +28,49 @@ namespace Lucene.Net.Collation
* limitations under the License.
*/
- /// <summary>
- /// Factory for <see cref="CollationKeyFilter"/>.
- /// <para>
- /// This factory can be created in two ways:
- /// <ul>
- /// <li>Based upon a system collator associated with a Locale.</li>
- /// <li>Based upon a tailored ruleset.</li>
- /// </ul>
- /// </para>
- /// <para>
- /// Using a System collator:
- /// <ul>
- /// <li>language: ISO-639 language code (mandatory)</li>
- /// <li>country: ISO-3166 country code (optional)</li>
- /// <li>variant: vendor or browser-specific code (optional)</li>
- /// <li>strength: 'primary','secondary','tertiary', or 'identical' (optional)</li>
- /// <li>decomposition: 'no','canonical', or 'full' (optional)</li>
- /// </ul>
- /// </para>
- /// <para>
- /// Using a Tailored ruleset:
- /// <ul>
- /// <li>custom: UTF-8 text file containing rules supported by RuleBasedCollator (mandatory)</li>
- /// <li>strength: 'primary','secondary','tertiary', or 'identical' (optional)</li>
- /// <li>decomposition: 'no','canonical', or 'full' (optional)</li>
- /// </ul>
- ///
- /// <pre class="prettyprint" >
- /// <fieldType name="text_clltnky" class="solr.TextField" positionIncrementGap="100">
- /// <analyzer>
- /// <tokenizer class="solr.KeywordTokenizerFactory"/>
- /// <filter class="solr.CollationKeyFilterFactory" language="ja" country="JP"/>
- /// </analyzer>
- /// </fieldType></code>
- ///
- /// </para>
- /// </summary>
- /// <see cref="Collator"></seealso>
- /// <see cref="CultureInfo"></seealso>
- /// <see cref="RuleBasedCollator">
- /// @since solr 3.1 </seealso>
- /// @deprecated use <see cref="CollationKeyAnalyzer"/> instead.
- [Obsolete("use <seealso cref=\"CollationKeyAnalyzer\"/> instead.")]
+ /// <summary>
+ /// Factory for <see cref="CollationKeyFilter"/>.
+ /// <para>
+ /// This factory can be created in two ways:
+ /// <list type="bullet">
+ /// <item>Based upon a system collator associated with a <see cref="System.Globalization.CultureInfo"/>.</item>
+ /// <item>Based upon a tailored ruleset.</item>
+ /// </list>
+ /// </para>
+ /// <para>
+ /// Using a System collator:
+ /// <list type="bullet">
+ /// <item>language: ISO-639 language code (mandatory)</item>
+ /// <item>country: ISO-3166 country code (optional)</item>
+ /// <item>variant: vendor or browser-specific code (optional)</item>
+ /// <item>strength: 'primary','secondary','tertiary', or 'identical' (optional)</item>
+ /// <item>decomposition: 'no','canonical', or 'full' (optional)</item>
+ /// </list>
+ /// </para>
+ /// <para>
+ /// Using a Tailored ruleset:
+ /// <list type="bullet">
+ /// <item>custom: UTF-8 text file containing rules supported by RuleBasedCollator (mandatory)</item>
+ /// <item>strength: 'primary','secondary','tertiary', or 'identical' (optional)</item>
+ /// <item>decomposition: 'no','canonical', or 'full' (optional)</item>
+ /// </list>
+ ///
+ /// <code>
+ /// <fieldType name="text_clltnky" class="solr.TextField" positionIncrementGap="100">
+ /// <analyzer>
+ /// <tokenizer class="solr.KeywordTokenizerFactory"/>
+ /// <filter class="solr.CollationKeyFilterFactory" language="ja" country="JP"/>
+ /// </analyzer>
+ /// </fieldType></code>
+ ///
+ /// </para>
+ /// </summary>
+ /// <seealso cref="Collator"/>
+ /// <seealso cref="CultureInfo"/>
+ /// <seealso cref="RuleBasedCollator"/>
+ /// @since solr 3.1
+ /// @deprecated use <see cref="CollationKeyAnalyzer"/> instead.
+ [Obsolete("use <seealso cref=\"CollationKeyAnalyzer\"/> instead.")]
public class CollationKeyFilterFactory : TokenFilterFactory, IMultiTermAwareComponent, IResourceLoaderAware
{
private Collator collator;
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/e67f7979/src/Lucene.Net.Analysis.Common/Collation/TokenAttributes/CollatedTermAttributeImpl.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Collation/TokenAttributes/CollatedTermAttributeImpl.cs b/src/Lucene.Net.Analysis.Common/Collation/TokenAttributes/CollatedTermAttributeImpl.cs
index ceebafb..05333ed 100644
--- a/src/Lucene.Net.Analysis.Common/Collation/TokenAttributes/CollatedTermAttributeImpl.cs
+++ b/src/Lucene.Net.Analysis.Common/Collation/TokenAttributes/CollatedTermAttributeImpl.cs
@@ -33,13 +33,13 @@ namespace Lucene.Net.Collation.TokenAttributes
private readonly Collator collator;
/// <summary>
- /// Create a new CollatedTermAttributeImpl </summary>
+ /// Create a new <see cref="CollatedTermAttributeImpl"/> </summary>
/// <param name="collator"> Collation key generator </param>
public CollatedTermAttributeImpl(Collator collator)
{
- // clone in case JRE doesn't properly sync,
- // or to reduce contention in case they do
- this.collator = collator;
+ // clone in case JRE doesn't properly sync,
+ // or to reduce contention in case they do
+ this.collator = (Collator)collator.Clone();
}
public override void FillBytesRef()
[11/39] lucenenet git commit: Fixed issue with PatternParser not
compiling on .NET Core because of a missing overload on StreamReader
Posted by ni...@apache.org.
Fixed issue with PatternParser not compiling on .NET Core because of a missing overload on StreamReader
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/dc21329c
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/dc21329c
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/dc21329c
Branch: refs/heads/api-work
Commit: dc21329cd56791eaf38b06c52e4153498df91361
Parents: 7cf215b
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Sat Feb 4 16:13:06 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Sat Feb 4 23:08:16 2017 +0700
----------------------------------------------------------------------
.../Analysis/Compound/Hyphenation/PatternParser.cs | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/dc21329c/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/PatternParser.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/PatternParser.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/PatternParser.cs
index 9b9f226..9bbe391 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/PatternParser.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/PatternParser.cs
@@ -99,7 +99,7 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation
var xmlReaderSettings = GetXmlReaderSettings();
// LUCENENET TODO: Create overloads that allow XmlReaderSettings to be passed in.
- using (var src = XmlReader.Create(new StreamReader(path, encoding), xmlReaderSettings))
+ using (var src = XmlReader.Create(new StreamReader(new FileStream(path, FileMode.Open), encoding), xmlReaderSettings))
{
Parse(src);
}
[29/39] lucenenet git commit:
Lucene.Net.Analysis.Util.OpenStringBuilder refactor: Removed Count property
which was formerly size() - we already have a Length property. And since
StringBuilder uses Length, that one is preferred.
Posted by ni...@apache.org.
Lucene.Net.Analysis.Util.OpenStringBuilder refactor: Removed Count property which was formerly size() - we already have a Length property. And since StringBuilder uses Length, that one is preferred.
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/4e209cdc
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/4e209cdc
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/4e209cdc
Branch: refs/heads/api-work
Commit: 4e209cdc56eb94bf641629af36faf1c2641a3be6
Parents: e3efbd0
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Sun Feb 5 01:32:15 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Sun Feb 5 01:32:15 2017 +0700
----------------------------------------------------------------------
.../Analysis/En/KStemmer.cs | 2 +-
.../Analysis/Util/OpenStringBuilder.cs | 19 ++++++++++---------
2 files changed, 11 insertions(+), 10 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4e209cdc/src/Lucene.Net.Analysis.Common/Analysis/En/KStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/En/KStemmer.cs b/src/Lucene.Net.Analysis.Common/Analysis/En/KStemmer.cs
index 250af5b..9173a2a 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/En/KStemmer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/En/KStemmer.cs
@@ -778,7 +778,7 @@ namespace Lucene.Net.Analysis.En
// thisLookup); } else { // System.out.println("new lookup:" + thisLookup);
// }
- matchedEntry = dict_ht.Get(word.Array, 0, word.Count);
+ matchedEntry = dict_ht.Get(word.Array, 0, word.Length);
return matchedEntry != null;
}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4e209cdc/src/Lucene.Net.Analysis.Common/Analysis/Util/OpenStringBuilder.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/OpenStringBuilder.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/OpenStringBuilder.cs
index fc73055..b930b3f 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Util/OpenStringBuilder.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/OpenStringBuilder.cs
@@ -69,11 +69,12 @@ namespace Lucene.Net.Analysis.Util
}
}
- // LUCENENE TODO: Change to Length (StringBuilder uses Length in .NET)
- public virtual int Count // LUCENENET NOTE: This was size() in Lucene.
- {
- get{ return m_len; }
- }
+ // LUCENENE NOTE: This is essentially a duplicate of Length (except that property can be set).
+ // .NET uses Length for StringBuilder anyway, so that property is preferable to this one.
+ //public virtual int Count // LUCENENET NOTE: This was size() in Lucene.
+ //{
+ // get{ return m_len; }
+ //}
public virtual int Capacity
{
@@ -142,7 +143,7 @@ namespace Lucene.Net.Analysis.Util
protected virtual void Resize(int len)
{
char[] newbuf = new char[Math.Max(m_buf.Length << 1, len)];
- System.Array.Copy(m_buf, 0, newbuf, 0, Count);
+ System.Array.Copy(m_buf, 0, newbuf, 0, Length);
m_buf = newbuf;
}
@@ -202,14 +203,14 @@ namespace Lucene.Net.Analysis.Util
public virtual char[] ToCharArray()
{
- char[] newbuf = new char[Count];
- System.Array.Copy(m_buf, 0, newbuf, 0, Count);
+ char[] newbuf = new char[Length];
+ System.Array.Copy(m_buf, 0, newbuf, 0, Length);
return newbuf;
}
public override string ToString()
{
- return new string(m_buf, 0, Count);
+ return new string(m_buf, 0, Length);
}
}
}
\ No newline at end of file
[38/39] lucenenet git commit: Lucene.Net.Analysis.Ngram - renamed
NGram in Git
Posted by ni...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/ab81d913/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramTokenFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramTokenFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramTokenFilter.cs
deleted file mode 100644
index 8cf8172..0000000
--- a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramTokenFilter.cs
+++ /dev/null
@@ -1,245 +0,0 @@
-\ufeffusing Lucene.Net.Analysis.TokenAttributes;
-using Lucene.Net.Analysis.Util;
-using Lucene.Net.Util;
-using System;
-
-namespace Lucene.Net.Analysis.NGram
-{
- /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
- /// <summary>
- /// Tokenizes the given token into n-grams of given size(s).
- /// <para>
- /// This <see cref="TokenFilter"/> create n-grams from the beginning edge or ending edge of a input token.
- /// </para>
- /// <para>As of Lucene 4.4, this filter does not support
- /// <see cref="Side.BACK"/> (you can use <see cref="Reverse.ReverseStringFilter"/> up-front and
- /// afterward to get the same behavior), handles supplementary characters
- /// correctly and does not update offsets anymore.
- /// </para>
- /// </summary>
- public sealed class EdgeNGramTokenFilter : TokenFilter
- {
- public const Side DEFAULT_SIDE = Side.FRONT;
- public const int DEFAULT_MAX_GRAM_SIZE = 1;
- public const int DEFAULT_MIN_GRAM_SIZE = 1;
-
- /// <summary>
- /// Specifies which side of the input the n-gram should be generated from </summary>
- public enum Side
- {
- /// <summary>
- /// Get the n-gram from the front of the input </summary>
- FRONT,
-
- /// <summary>
- /// Get the n-gram from the end of the input </summary>
- [System.Obsolete]
- BACK,
- }
-
- /// <summary>
- /// Get the appropriate <see cref="Side"/> from a string
- /// </summary>
- public static Side GetSide(string sideName)
- {
- Side result;
- if (!Enum.TryParse(sideName, true, out result))
- {
- result = Side.FRONT;
- }
- return result;
- }
-
- private readonly LuceneVersion version;
- private readonly CharacterUtils charUtils;
- private readonly int minGram;
- private readonly int maxGram;
- private Side side;
- private char[] curTermBuffer;
- private int curTermLength;
- private int curCodePointCount;
- private int curGramSize;
- private int tokStart;
- private int tokEnd; // only used if the length changed before this filter
- private bool updateOffsets; // never if the length changed before this filter
- private int savePosIncr;
- private int savePosLen;
-
- private readonly ICharTermAttribute termAtt;
- private readonly IOffsetAttribute offsetAtt;
- private readonly IPositionIncrementAttribute posIncrAtt;
- private readonly IPositionLengthAttribute posLenAtt;
-
- /// <summary>
- /// Creates <see cref="EdgeNGramTokenFilter"/> that can generate n-grams in the sizes of the given range
- /// </summary>
- /// <param name="version"> the Lucene match version - See <see cref="LuceneVersion"/> </param>
- /// <param name="input"> <see cref="TokenStream"/> holding the input to be tokenized </param>
- /// <param name="side"> the <see cref="Side"/> from which to chop off an n-gram </param>
- /// <param name="minGram"> the smallest n-gram to generate </param>
- /// <param name="maxGram"> the largest n-gram to generate </param>
- [Obsolete]
- public EdgeNGramTokenFilter(LuceneVersion version, TokenStream input, Side side, int minGram, int maxGram)
- : base(input)
- {
-
- //if (version == null)
- //{
- // throw new System.ArgumentException("version must not be null");
- //}
-
- if (version.OnOrAfter(LuceneVersion.LUCENE_44) && side == Side.BACK)
- {
- throw new System.ArgumentException("Side.BACK is not supported anymore as of Lucene 4.4, use ReverseStringFilter up-front and afterward");
- }
-
- if (!Enum.IsDefined(typeof(Side), side))
- {
- throw new System.ArgumentException("sideLabel must be either front or back");
- }
-
- if (minGram < 1)
- {
- throw new System.ArgumentException("minGram must be greater than zero");
- }
-
- if (minGram > maxGram)
- {
- throw new System.ArgumentException("minGram must not be greater than maxGram");
- }
-
- this.version = version;
- this.charUtils = version.OnOrAfter(LuceneVersion.LUCENE_44) ? CharacterUtils.GetInstance(version) : CharacterUtils.Java4Instance;
- this.minGram = minGram;
- this.maxGram = maxGram;
- this.side = side;
-
- this.termAtt = AddAttribute<ICharTermAttribute>();
- this.offsetAtt = AddAttribute<IOffsetAttribute>();
- this.posIncrAtt = AddAttribute<IPositionIncrementAttribute>();
- this.posLenAtt = AddAttribute<IPositionLengthAttribute>();
- }
-
- /// <summary>
- /// Creates <see cref="EdgeNGramTokenFilter"/> that can generate n-grams in the sizes of the given range
- /// </summary>
- /// <param name="version"> the Lucene match version - See <see cref="LuceneVersion"/> </param>
- /// <param name="input"> <see cref="TokenStream"/> holding the input to be tokenized </param>
- /// <param name="sideLabel"> the name of the <see cref="Side"/> from which to chop off an n-gram </param>
- /// <param name="minGram"> the smallest n-gram to generate </param>
- /// <param name="maxGram"> the largest n-gram to generate </param>
- [Obsolete]
- public EdgeNGramTokenFilter(LuceneVersion version, TokenStream input, string sideLabel, int minGram, int maxGram)
- : this(version, input, GetSide(sideLabel), minGram, maxGram)
- {
- }
-
- /// <summary>
- /// Creates <see cref="EdgeNGramTokenFilter"/> that can generate n-grams in the sizes of the given range
- /// </summary>
- /// <param name="version"> the Lucene match version - See <see cref="LuceneVersion"/> </param>
- /// <param name="input"> <see cref="TokenStream"/> holding the input to be tokenized </param>
- /// <param name="minGram"> the smallest n-gram to generate </param>
- /// <param name="maxGram"> the largest n-gram to generate </param>
- public EdgeNGramTokenFilter(LuceneVersion version, TokenStream input, int minGram, int maxGram)
-#pragma warning disable 612, 618
- : this(version, input, Side.FRONT, minGram, maxGram)
-#pragma warning restore 612, 618
- {
- }
-
- public override sealed bool IncrementToken()
- {
- while (true)
- {
- if (curTermBuffer == null)
- {
- if (!m_input.IncrementToken())
- {
- return false;
- }
- else
- {
- curTermBuffer = (char[])termAtt.Buffer.Clone();
- curTermLength = termAtt.Length;
- curCodePointCount = charUtils.CodePointCount(termAtt.ToString());
- curGramSize = minGram;
- tokStart = offsetAtt.StartOffset;
- tokEnd = offsetAtt.EndOffset;
-#pragma warning disable 612, 618
- if (version.OnOrAfter(LuceneVersion.LUCENE_44))
-#pragma warning restore 612, 618
- {
- // Never update offsets
- updateOffsets = false;
- }
- else
- {
- // if length by start + end offsets doesn't match the term text then assume
- // this is a synonym and don't adjust the offsets.
- updateOffsets = (tokStart + curTermLength) == tokEnd;
- }
- savePosIncr += posIncrAtt.PositionIncrement;
- savePosLen = posLenAtt.PositionLength;
- }
- }
- if (curGramSize <= maxGram) // if we have hit the end of our n-gram size range, quit
- {
- if (curGramSize <= curCodePointCount) // if the remaining input is too short, we can't generate any n-grams
- {
- // grab gramSize chars from front or back
- int start = side == Side.FRONT ? 0 : charUtils.OffsetByCodePoints(curTermBuffer, 0, curTermLength, curTermLength, -curGramSize);
- int end = charUtils.OffsetByCodePoints(curTermBuffer, 0, curTermLength, start, curGramSize);
- ClearAttributes();
- if (updateOffsets)
- {
- offsetAtt.SetOffset(tokStart + start, tokStart + end);
- }
- else
- {
- offsetAtt.SetOffset(tokStart, tokEnd);
- }
- // first ngram gets increment, others don't
- if (curGramSize == minGram)
- {
- posIncrAtt.PositionIncrement = savePosIncr;
- savePosIncr = 0;
- }
- else
- {
- posIncrAtt.PositionIncrement = 0;
- }
- posLenAtt.PositionLength = savePosLen;
- termAtt.CopyBuffer(curTermBuffer, start, end - start);
- curGramSize++;
- return true;
- }
- }
- curTermBuffer = null;
- }
- }
-
- public override void Reset()
- {
- base.Reset();
- curTermBuffer = null;
- savePosIncr = 0;
- }
- }
-}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/ab81d913/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramTokenizer.cs
deleted file mode 100644
index ed2cb3d..0000000
--- a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramTokenizer.cs
+++ /dev/null
@@ -1,72 +0,0 @@
-\ufeffusing Lucene.Net.Util;
-using System.IO;
-
-namespace Lucene.Net.Analysis.NGram
-{
- /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
- /// <summary>
- /// Tokenizes the input from an edge into n-grams of given size(s).
- /// <para>
- /// This <see cref="Tokenizer"/> create n-grams from the beginning edge or ending edge of a input token.
- /// </para>
- /// <para>As of Lucene 4.4, this tokenizer
- /// <list type="bullet">
- /// <item>can handle <code>maxGram</code> larger than 1024 chars, but beware that this will result in increased memory usage</item>
- /// <item>doesn't trim the input,</item>
- /// <item>sets position increments equal to 1 instead of 1 for the first token and 0 for all other ones</item>
- /// <item>doesn't support backward n-grams anymore.</item>
- /// <item>supports <see cref="Util.CharTokenizer.IsTokenChar(int)"/> pre-tokenization,</item>
- /// <item>correctly handles supplementary characters.</item>
- /// </list>
- /// </para>
- /// <para>Although <b style="color:red">highly</b> discouraged, it is still possible
- /// to use the old behavior through <see cref="Lucene43EdgeNGramTokenizer"/>.
- /// </para>
- /// </summary>
- public class EdgeNGramTokenizer : NGramTokenizer
- {
- public const int DEFAULT_MAX_GRAM_SIZE = 1;
- public const int DEFAULT_MIN_GRAM_SIZE = 1;
-
- /// <summary>
- /// Creates <see cref="EdgeNGramTokenizer"/> that can generate n-grams in the sizes of the given range
- /// </summary>
- /// <param name="version"> the Lucene match version - See <see cref="LuceneVersion"/> </param>
- /// <param name="input"> <see cref="TextReader"/> holding the input to be tokenized </param>
- /// <param name="minGram"> the smallest n-gram to generate </param>
- /// <param name="maxGram"> the largest n-gram to generate </param>
- public EdgeNGramTokenizer(LuceneVersion version, TextReader input, int minGram, int maxGram)
- : base(version, input, minGram, maxGram, true)
- {
- }
-
- /// <summary>
- /// Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given range
- /// </summary>
- /// <param name="version"> the Lucene match version - See <see cref="LuceneVersion"/> </param>
- /// <param name="factory"> <see cref="AttributeSource.AttributeFactory"/> to use </param>
- /// <param name="input"> <see cref="TextReader"/> holding the input to be tokenized </param>
- /// <param name="minGram"> the smallest n-gram to generate </param>
- /// <param name="maxGram"> the largest n-gram to generate </param>
- public EdgeNGramTokenizer(LuceneVersion version, AttributeSource.AttributeFactory factory, TextReader input, int minGram, int maxGram)
- : base(version, factory, input, minGram, maxGram, true)
- {
- }
- }
-}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/ab81d913/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramTokenizerFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramTokenizerFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramTokenizerFactory.cs
deleted file mode 100644
index 00325f5..0000000
--- a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramTokenizerFactory.cs
+++ /dev/null
@@ -1,75 +0,0 @@
-\ufeffusing Lucene.Net.Analysis.Util;
-using Lucene.Net.Util;
-using System;
-using System.Collections.Generic;
-using System.IO;
-
-namespace Lucene.Net.Analysis.NGram
-{
- /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
- /// <summary>
- /// Creates new instances of <see cref="EdgeNGramTokenizer"/>.
- /// <code>
- /// <fieldType name="text_edgngrm" class="solr.TextField" positionIncrementGap="100">
- /// <analyzer>
- /// <tokenizer class="solr.EdgeNGramTokenizerFactory" minGramSize="1" maxGramSize="1"/>
- /// </analyzer>
- /// </fieldType></code>
- /// </summary>
- public class EdgeNGramTokenizerFactory : TokenizerFactory
- {
- private readonly int maxGramSize;
- private readonly int minGramSize;
- private readonly string side;
-
- /// <summary>
- /// Creates a new <see cref="EdgeNGramTokenizerFactory"/> </summary>
- public EdgeNGramTokenizerFactory(IDictionary<string, string> args) : base(args)
- {
- minGramSize = GetInt(args, "minGramSize", EdgeNGramTokenizer.DEFAULT_MIN_GRAM_SIZE);
- maxGramSize = GetInt(args, "maxGramSize", EdgeNGramTokenizer.DEFAULT_MAX_GRAM_SIZE);
- side = Get(args, "side", EdgeNGramTokenFilter.Side.FRONT.ToString());
- if (args.Count > 0)
- {
- throw new System.ArgumentException("Unknown parameters: " + args);
- }
- }
-
- public override Tokenizer Create(AttributeSource.AttributeFactory factory, TextReader input)
- {
-#pragma warning disable 612, 618
- if (m_luceneMatchVersion.OnOrAfter(LuceneVersion.LUCENE_44))
-#pragma warning restore 612, 618
- {
- EdgeNGramTokenFilter.Side sideEnum;
- if (!Enum.TryParse(this.side, true, out sideEnum))
- {
- throw new System.ArgumentException(typeof(EdgeNGramTokenizer).Name + " does not support backward n-grams as of Lucene 4.4");
- }
- return new EdgeNGramTokenizer(m_luceneMatchVersion, input, minGramSize, maxGramSize);
- }
- else
- {
-#pragma warning disable 612, 618
- return new Lucene43EdgeNGramTokenizer(m_luceneMatchVersion, input, side, minGramSize, maxGramSize);
-#pragma warning restore 612, 618
- }
- }
- }
-}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/ab81d913/src/Lucene.Net.Analysis.Common/Analysis/Ngram/Lucene43EdgeNGramTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/Lucene43EdgeNGramTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/Lucene43EdgeNGramTokenizer.cs
deleted file mode 100644
index 4dadbed..0000000
--- a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/Lucene43EdgeNGramTokenizer.cs
+++ /dev/null
@@ -1,297 +0,0 @@
-\ufeffusing Lucene.Net.Analysis.TokenAttributes;
-using Lucene.Net.Util;
-using System;
-using System.IO;
-
-namespace Lucene.Net.Analysis.NGram
-{
- /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
- /// <summary>
- /// Old version of <see cref="EdgeNGramTokenizer"/> which doesn't handle correctly
- /// supplementary characters.
- /// </summary>
- [Obsolete]
- public sealed class Lucene43EdgeNGramTokenizer : Tokenizer
- {
- public const Side DEFAULT_SIDE = Side.FRONT;
- public const int DEFAULT_MAX_GRAM_SIZE = 1;
- public const int DEFAULT_MIN_GRAM_SIZE = 1;
-
- private ICharTermAttribute termAtt;
- private IOffsetAttribute offsetAtt;
- private IPositionIncrementAttribute posIncrAtt;
-
- /// <summary>
- /// Specifies which side of the input the n-gram should be generated from </summary>
- public enum Side
- {
- /// <summary>
- /// Get the n-gram from the front of the input </summary>
- FRONT,
-
- /// <summary>
- /// Get the n-gram from the end of the input </summary>
- BACK,
- }
-
- // Get the appropriate Side from a string
- public static Side GetSide(string sideName)
- {
- Side result;
- if (!Enum.TryParse(sideName, true, out result))
- {
- result = Side.FRONT;
- }
- return result;
- }
-
- private int minGram;
- private int maxGram;
- private int gramSize;
- private Side side;
- private bool started;
- private int inLen; // length of the input AFTER trim()
- private int charsRead; // length of the input
- private string inStr;
-
-
- /// <summary>
- /// Creates <see cref="Lucene43EdgeNGramTokenizer"/> that can generate n-grams in the sizes of the given range
- /// </summary>
- /// <param name="version"> the Lucene match version - See <see cref="LuceneVersion"/> </param>
- /// <param name="input"> <see cref="TextReader"/> holding the input to be tokenized </param>
- /// <param name="side"> the <see cref="Side"/> from which to chop off an n-gram </param>
- /// <param name="minGram"> the smallest n-gram to generate </param>
- /// <param name="maxGram"> the largest n-gram to generate </param>
- [Obsolete]
- public Lucene43EdgeNGramTokenizer(LuceneVersion version, TextReader input, Side side, int minGram, int maxGram)
- : base(input)
- {
- Init(version, side, minGram, maxGram);
- }
-
- /// <summary>
- /// Creates <see cref="Lucene43EdgeNGramTokenizer"/> that can generate n-grams in the sizes of the given range
- /// </summary>
- /// <param name="version"> the Lucene match version - See <see cref="LuceneVersion"/> </param>
- /// <param name="factory"> <see cref="AttributeSource.AttributeFactory"/> to use </param>
- /// <param name="input"> <see cref="TextReader"/> holding the input to be tokenized </param>
- /// <param name="side"> the <see cref="Side"/> from which to chop off an n-gram </param>
- /// <param name="minGram"> the smallest n-gram to generate </param>
- /// <param name="maxGram"> the largest n-gram to generate </param>
- [Obsolete]
- public Lucene43EdgeNGramTokenizer(LuceneVersion version, AttributeFactory factory, TextReader input, Side side, int minGram, int maxGram)
- : base(factory, input)
- {
- Init(version, side, minGram, maxGram);
- }
-
- /// <summary>
- /// Creates <see cref="Lucene43EdgeNGramTokenizer"/> that can generate n-grams in the sizes of the given range
- /// </summary>
- /// <param name="version"> the Lucene match version - See <see cref="LuceneVersion"/> </param>
- /// <param name="input"> <see cref="TextReader"/> holding the input to be tokenized </param>
- /// <param name="sideLabel"> the name of the <see cref="Side"/> from which to chop off an n-gram </param>
- /// <param name="minGram"> the smallest n-gram to generate </param>
- /// <param name="maxGram"> the largest n-gram to generate </param>
- [Obsolete]
- public Lucene43EdgeNGramTokenizer(LuceneVersion version, TextReader input, string sideLabel, int minGram, int maxGram)
- : this(version, input, GetSide(sideLabel), minGram, maxGram)
- {
- }
-
- /// <summary>
- /// Creates <see cref="Lucene43EdgeNGramTokenizer"/> that can generate n-grams in the sizes of the given range
- /// </summary>
- /// <param name="version"> the Lucene match version - See <see cref="LuceneVersion"/> </param>
- /// <param name="factory"> <see cref="AttributeSource.AttributeFactory"/> to use </param>
- /// <param name="input"> <see cref="TextReader"/> holding the input to be tokenized </param>
- /// <param name="sideLabel"> the name of the <see cref="Side"/> from which to chop off an n-gram </param>
- /// <param name="minGram"> the smallest n-gram to generate </param>
- /// <param name="maxGram"> the largest n-gram to generate </param>
- [Obsolete]
- public Lucene43EdgeNGramTokenizer(LuceneVersion version, AttributeFactory factory, TextReader input, string sideLabel, int minGram, int maxGram)
- : this(version, factory, input, GetSide(sideLabel), minGram, maxGram)
- {
- }
-
- /// <summary>
- /// Creates <see cref="Lucene43EdgeNGramTokenizer"/> that can generate n-grams in the sizes of the given range
- /// </summary>
- /// <param name="version"> the Lucene match version - See <see cref="LuceneVersion"/> </param>
- /// <param name="input"> <see cref="TextReader"/> holding the input to be tokenized </param>
- /// <param name="minGram"> the smallest n-gram to generate </param>
- /// <param name="maxGram"> the largest n-gram to generate </param>
- public Lucene43EdgeNGramTokenizer(LuceneVersion version, TextReader input, int minGram, int maxGram)
- : this(version, input, Side.FRONT, minGram, maxGram)
- {
- }
-
- /// <summary>
- /// Creates <see cref="Lucene43EdgeNGramTokenizer"/> that can generate n-grams in the sizes of the given range
- /// </summary>
- /// <param name="version"> the Lucene match version - See <see cref="LuceneVersion"/> </param>
- /// <param name="factory"> <see cref="AttributeSource.AttributeFactory"/> to use </param>
- /// <param name="input"> <see cref="TextReader"/> holding the input to be tokenized </param>
- /// <param name="minGram"> the smallest n-gram to generate </param>
- /// <param name="maxGram"> the largest n-gram to generate </param>
- public Lucene43EdgeNGramTokenizer(LuceneVersion version, AttributeFactory factory, TextReader input, int minGram, int maxGram)
- : this(version, factory, input, Side.FRONT, minGram, maxGram)
- {
- }
-
- private void Init(LuceneVersion version, Side side, int minGram, int maxGram)
- {
- //if (version == null)
- //{
- // throw new System.ArgumentException("version must not be null");
- //}
-
- if (!Enum.IsDefined(typeof(Side), side))
- {
- throw new System.ArgumentException("sideLabel must be either front or back");
- }
-
- if (minGram < 1)
- {
- throw new System.ArgumentException("minGram must be greater than zero");
- }
-
- if (minGram > maxGram)
- {
- throw new System.ArgumentException("minGram must not be greater than maxGram");
- }
-
- if (version.OnOrAfter(LuceneVersion.LUCENE_44))
- {
- if (side == Side.BACK)
- {
- throw new System.ArgumentException("Side.BACK is not supported anymore as of Lucene 4.4");
- }
- }
- else
- {
- maxGram = Math.Min(maxGram, 1024);
- }
-
- this.minGram = minGram;
- this.maxGram = maxGram;
- this.side = side;
- this.termAtt = AddAttribute<ICharTermAttribute>();
- this.offsetAtt = AddAttribute<IOffsetAttribute>();
- this.posIncrAtt = AddAttribute<IPositionIncrementAttribute>();
- }
-
- /// <summary>
- /// Returns the next token in the stream, or null at EOS. </summary>
- public override bool IncrementToken()
- {
- ClearAttributes();
- // if we are just starting, read the whole input
- if (!started)
- {
- started = true;
- gramSize = minGram;
- int limit = side == Side.FRONT ? maxGram : 1024;
- char[] chars = new char[Math.Min(1024, limit)];
- charsRead = 0;
- // TODO: refactor to a shared readFully somewhere:
- bool exhausted = false;
- while (charsRead < limit)
- {
- int inc = m_input.Read(chars, charsRead, chars.Length - charsRead);
- if (inc <= 0)
- {
- exhausted = true;
- break;
- }
- charsRead += inc;
- if (charsRead == chars.Length && charsRead < limit)
- {
- chars = ArrayUtil.Grow(chars);
- }
- }
-
- inStr = new string(chars, 0, charsRead);
- inStr = inStr.Trim();
-
- if (!exhausted)
- {
- // Read extra throwaway chars so that on end() we
- // report the correct offset:
- var throwaway = new char[1024];
- while (true)
- {
- int inc = m_input.Read(throwaway, 0, throwaway.Length);
- if (inc <= 0)
- {
- break;
- }
- charsRead += inc;
- }
- }
-
- inLen = inStr.Length;
- if (inLen == 0)
- {
- return false;
- }
- posIncrAtt.PositionIncrement = 1;
- }
- else
- {
- posIncrAtt.PositionIncrement = 0;
- }
-
- // if the remaining input is too short, we can't generate any n-grams
- if (gramSize > inLen)
- {
- return false;
- }
-
- // if we have hit the end of our n-gram size range, quit
- if (gramSize > maxGram || gramSize > inLen)
- {
- return false;
- }
-
- // grab gramSize chars from front or back
- int start = side == Side.FRONT ? 0 : inLen - gramSize;
- int end = start + gramSize;
- termAtt.SetEmpty().Append(inStr, start, end);
- offsetAtt.SetOffset(CorrectOffset(start), CorrectOffset(end));
- gramSize++;
- return true;
- }
-
- public override void End()
- {
- base.End();
- // set final offset
- int finalOffset = CorrectOffset(charsRead);
- this.offsetAtt.SetOffset(finalOffset, finalOffset);
- }
-
- public override void Reset()
- {
- base.Reset();
- started = false;
- }
- }
-}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/ab81d913/src/Lucene.Net.Analysis.Common/Analysis/Ngram/Lucene43NGramTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/Lucene43NGramTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/Lucene43NGramTokenizer.cs
deleted file mode 100644
index b806345..0000000
--- a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/Lucene43NGramTokenizer.cs
+++ /dev/null
@@ -1,173 +0,0 @@
-\ufeffusing Lucene.Net.Analysis.TokenAttributes;
-using System;
-using System.IO;
-
-namespace Lucene.Net.Analysis.NGram
-{
- /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
- /// <summary>
- /// Old broken version of <see cref="NGramTokenizer"/>.
- /// </summary>
- [Obsolete]
- public sealed class Lucene43NGramTokenizer : Tokenizer
- {
- public const int DEFAULT_MIN_NGRAM_SIZE = 1;
- public const int DEFAULT_MAX_NGRAM_SIZE = 2;
-
- private int minGram, maxGram;
- private int gramSize;
- private int pos;
- private int inLen; // length of the input AFTER trim()
- private int charsRead; // length of the input
- private string inStr;
- private bool started;
-
- private ICharTermAttribute termAtt;
- private IOffsetAttribute offsetAtt;
-
- /// <summary>
- /// Creates <see cref="Lucene43NGramTokenizer"/> with given min and max n-grams. </summary>
- /// <param name="input"> <see cref="TextReader"/> holding the input to be tokenized </param>
- /// <param name="minGram"> the smallest n-gram to generate </param>
- /// <param name="maxGram"> the largest n-gram to generate </param>
- public Lucene43NGramTokenizer(TextReader input, int minGram, int maxGram)
- : base(input)
- {
- Init(minGram, maxGram);
- }
-
- /// <summary>
- /// Creates <see cref="Lucene43NGramTokenizer"/> with given min and max n-grams. </summary>
- /// <param name="factory"> <see cref="Lucene.Net.Util.AttributeSource.AttributeFactory"/> to use </param>
- /// <param name="input"> <see cref="TextReader"/> holding the input to be tokenized </param>
- /// <param name="minGram"> the smallest n-gram to generate </param>
- /// <param name="maxGram"> the largest n-gram to generate </param>
- public Lucene43NGramTokenizer(AttributeFactory factory, TextReader input, int minGram, int maxGram)
- : base(factory, input)
- {
- Init(minGram, maxGram);
- }
-
- /// <summary>
- /// Creates <see cref="Lucene43NGramTokenizer"/> with default min and max n-grams. </summary>
- /// <param name="input"> <see cref="TextReader"/> holding the input to be tokenized </param>
- public Lucene43NGramTokenizer(TextReader input)
- : this(input, DEFAULT_MIN_NGRAM_SIZE, DEFAULT_MAX_NGRAM_SIZE)
- {
- }
-
- private void Init(int minGram, int maxGram)
- {
- if (minGram < 1)
- {
- throw new System.ArgumentException("minGram must be greater than zero");
- }
- if (minGram > maxGram)
- {
- throw new System.ArgumentException("minGram must not be greater than maxGram");
- }
- this.minGram = minGram;
- this.maxGram = maxGram;
- termAtt = AddAttribute<ICharTermAttribute>();
- offsetAtt = AddAttribute<IOffsetAttribute>();
- }
-
- /// <summary>
- /// Returns the next token in the stream, or null at EOS. </summary>
- public override bool IncrementToken()
- {
- ClearAttributes();
- if (!started)
- {
- started = true;
- gramSize = minGram;
- char[] chars = new char[1024];
- charsRead = 0;
- // TODO: refactor to a shared readFully somewhere:
- while (charsRead < chars.Length)
- {
- int inc = m_input.Read(chars, charsRead, chars.Length - charsRead);
- if (inc == -1)
- {
- break;
- }
- charsRead += inc;
- }
- inStr = (new string(chars, 0, charsRead)).Trim(); // remove any trailing empty strings
-
- if (charsRead == chars.Length)
- {
- // Read extra throwaway chars so that on end() we
- // report the correct offset:
- var throwaway = new char[1024];
- while (true)
- {
- int inc = m_input.Read(throwaway, 0, throwaway.Length);
- if (inc == -1)
- {
- break;
- }
- charsRead += inc;
- }
- }
-
- inLen = inStr.Length;
- if (inLen == 0)
- {
- return false;
- }
- }
-
- if (pos + gramSize > inLen) // if we hit the end of the string
- {
- pos = 0; // reset to beginning of string
- gramSize++; // increase n-gram size
- if (gramSize > maxGram) // we are done
- {
- return false;
- }
- if (pos + gramSize > inLen)
- {
- return false;
- }
- }
-
- int oldPos = pos;
- pos++;
- termAtt.SetEmpty().Append(inStr, oldPos, oldPos + gramSize);
- offsetAtt.SetOffset(CorrectOffset(oldPos), CorrectOffset(oldPos + gramSize));
- return true;
- }
-
- public override void End()
- {
- base.End();
- // set final offset
- int finalOffset = CorrectOffset(charsRead);
- this.offsetAtt.SetOffset(finalOffset, finalOffset);
- }
-
- public override void Reset()
- {
- base.Reset();
- started = false;
- pos = 0;
- }
- }
-}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/ab81d913/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramFilterFactory.cs
deleted file mode 100644
index ca1d0bc..0000000
--- a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramFilterFactory.cs
+++ /dev/null
@@ -1,56 +0,0 @@
-\ufeffusing Lucene.Net.Analysis.Util;
-using System.Collections.Generic;
-
-namespace Lucene.Net.Analysis.NGram
-{
- /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
- /// <summary>
- /// Factory for <see cref="NGramTokenFilter"/>.
- /// <code>
- /// <fieldType name="text_ngrm" class="solr.TextField" positionIncrementGap="100">
- /// <analyzer>
- /// <tokenizer class="solr.WhitespaceTokenizerFactory"/>
- /// <filter class="solr.NGramFilterFactory" minGramSize="1" maxGramSize="2"/>
- /// </analyzer>
- /// </fieldType></code>
- /// </summary>
- public class NGramFilterFactory : TokenFilterFactory
- {
- private readonly int maxGramSize;
- private readonly int minGramSize;
-
- /// <summary>
- /// Creates a new <see cref="NGramFilterFactory"/> </summary>
- public NGramFilterFactory(IDictionary<string, string> args)
- : base(args)
- {
- minGramSize = GetInt(args, "minGramSize", NGramTokenFilter.DEFAULT_MIN_NGRAM_SIZE);
- maxGramSize = GetInt(args, "maxGramSize", NGramTokenFilter.DEFAULT_MAX_NGRAM_SIZE);
- if (args.Count > 0)
- {
- throw new System.ArgumentException("Unknown parameters: " + args);
- }
- }
-
- public override TokenStream Create(TokenStream input)
- {
- return new NGramTokenFilter(m_luceneMatchVersion, input, minGramSize, maxGramSize);
- }
- }
-}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/ab81d913/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramTokenFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramTokenFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramTokenFilter.cs
deleted file mode 100644
index f1c82c5..0000000
--- a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramTokenFilter.cs
+++ /dev/null
@@ -1,252 +0,0 @@
-\ufeffusing Lucene.Net.Analysis.Miscellaneous;
-using Lucene.Net.Analysis.TokenAttributes;
-using Lucene.Net.Analysis.Util;
-using Lucene.Net.Util;
-
-namespace Lucene.Net.Analysis.NGram
-{
- /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
- /// <summary>
- /// Tokenizes the input into n-grams of the given size(s).
- /// <para>You must specify the required <see cref="LuceneVersion"/> compatibility when
- /// creating a <see cref="NGramTokenFilter"/>. As of Lucene 4.4, this token filters:
- /// <list type="bullet">
- /// <item>handles supplementary characters correctly,</item>
- /// <item>emits all n-grams for the same token at the same position,</item>
- /// <item>does not modify offsets,</item>
- /// <item>sorts n-grams by their offset in the original token first, then
- /// increasing length (meaning that "abc" will give "a", "ab", "abc", "b", "bc",
- /// "c").</item>
- /// </list>
- /// </para>
- /// <para>You can make this filter use the old behavior by providing a version <
- /// <see cref="LuceneVersion.LUCENE_44"/> in the constructor but this is not recommended as
- /// it will lead to broken <see cref="TokenStream"/>s that will cause highlighting
- /// bugs.
- /// </para>
- /// <para>If you were using this <see cref="TokenFilter"/> to perform partial highlighting,
- /// this won't work anymore since this filter doesn't update offsets. You should
- /// modify your analysis chain to use <see cref="NGramTokenizer"/>, and potentially
- /// override <see cref="NGramTokenizer.IsTokenChar(int)"/> to perform pre-tokenization.
- /// </para>
- /// </summary>
- public sealed class NGramTokenFilter : TokenFilter
- {
- public const int DEFAULT_MIN_NGRAM_SIZE = 1;
- public const int DEFAULT_MAX_NGRAM_SIZE = 2;
-
- private readonly int minGram, maxGram;
-
- private char[] curTermBuffer;
- private int curTermLength;
- private int curCodePointCount;
- private int curGramSize;
- private int curPos;
- private int curPosInc, curPosLen;
- private int tokStart;
- private int tokEnd;
- private bool hasIllegalOffsets; // only if the length changed before this filter
-
- private readonly LuceneVersion version;
- private readonly CharacterUtils charUtils;
- private readonly ICharTermAttribute termAtt;
- private readonly IPositionIncrementAttribute posIncAtt;
- private readonly IPositionLengthAttribute posLenAtt;
- private readonly IOffsetAttribute offsetAtt;
-
- /// <summary>
- /// Creates <see cref="NGramTokenFilter"/> with given min and max n-grams. </summary>
- /// <param name="version"> Lucene version to enable correct position increments.
- /// See <see cref="NGramTokenFilter"/> for details. </param>
- /// <param name="input"> <see cref="TokenStream"/> holding the input to be tokenized </param>
- /// <param name="minGram"> the smallest n-gram to generate </param>
- /// <param name="maxGram"> the largest n-gram to generate </param>
- public NGramTokenFilter(LuceneVersion version, TokenStream input, int minGram, int maxGram)
- : base(new CodepointCountFilter(version, input, minGram, int.MaxValue))
- {
- this.version = version;
- this.charUtils = version.OnOrAfter(
-#pragma warning disable 612, 618
- LuceneVersion.LUCENE_44) ?
-#pragma warning restore 612, 618
- CharacterUtils.GetInstance(version) : CharacterUtils.Java4Instance;
- if (minGram < 1)
- {
- throw new System.ArgumentException("minGram must be greater than zero");
- }
- if (minGram > maxGram)
- {
- throw new System.ArgumentException("minGram must not be greater than maxGram");
- }
- this.minGram = minGram;
- this.maxGram = maxGram;
-#pragma warning disable 612, 618
- if (version.OnOrAfter(LuceneVersion.LUCENE_44))
-#pragma warning restore 612, 618
- {
- posIncAtt = AddAttribute<IPositionIncrementAttribute>();
- posLenAtt = AddAttribute<IPositionLengthAttribute>();
- }
- else
- {
- posIncAtt = new PositionIncrementAttributeAnonymousInnerClassHelper(this);
- posLenAtt = new PositionLengthAttributeAnonymousInnerClassHelper(this);
- }
- termAtt = AddAttribute<ICharTermAttribute>();
- offsetAtt = AddAttribute<IOffsetAttribute>();
- }
-
- private class PositionIncrementAttributeAnonymousInnerClassHelper : PositionIncrementAttribute
- {
- private readonly NGramTokenFilter outerInstance;
-
- public PositionIncrementAttributeAnonymousInnerClassHelper(NGramTokenFilter outerInstance)
- {
- this.outerInstance = outerInstance;
- }
-
- public override int PositionIncrement
- {
- set
- {
- }
- get
- {
- return 0;
- }
- }
- }
-
- private class PositionLengthAttributeAnonymousInnerClassHelper : PositionLengthAttribute
- {
- private readonly NGramTokenFilter outerInstance;
-
- public PositionLengthAttributeAnonymousInnerClassHelper(NGramTokenFilter outerInstance)
- {
- this.outerInstance = outerInstance;
- }
-
- public override int PositionLength
- {
- set
- {
- }
- get
- {
- return 0;
- }
- }
- }
-
- /// <summary>
- /// Creates <see cref="NGramTokenFilter"/> with default min and max n-grams. </summary>
- /// <param name="version"> Lucene version to enable correct position increments.
- /// See <see cref="NGramTokenFilter"/> for details. </param>
- /// <param name="input"> <see cref="TokenStream"/> holding the input to be tokenized </param>
- public NGramTokenFilter(LuceneVersion version, TokenStream input)
- : this(version, input, DEFAULT_MIN_NGRAM_SIZE, DEFAULT_MAX_NGRAM_SIZE)
- {
- }
-
- /// <summary>
- /// Returns the next token in the stream, or null at EOS.
- /// </summary>
- public override sealed bool IncrementToken()
- {
- while (true)
- {
- if (curTermBuffer == null)
- {
- if (!m_input.IncrementToken())
- {
- return false;
- }
- else
- {
- curTermBuffer = (char[])termAtt.Buffer.Clone();
- curTermLength = termAtt.Length;
- curCodePointCount = charUtils.CodePointCount(termAtt.ToString());
- curGramSize = minGram;
- curPos = 0;
- curPosInc = posIncAtt.PositionIncrement;
- curPosLen = posLenAtt.PositionLength;
- tokStart = offsetAtt.StartOffset;
- tokEnd = offsetAtt.EndOffset;
- // if length by start + end offsets doesn't match the term text then assume
- // this is a synonym and don't adjust the offsets.
- hasIllegalOffsets = (tokStart + curTermLength) != tokEnd;
- }
- }
-#pragma warning disable 612, 618
- if (version.OnOrAfter(LuceneVersion.LUCENE_44))
-#pragma warning restore 612, 618
- {
- if (curGramSize > maxGram || (curPos + curGramSize) > curCodePointCount)
- {
- ++curPos;
- curGramSize = minGram;
- }
- if ((curPos + curGramSize) <= curCodePointCount)
- {
- ClearAttributes();
- int start = charUtils.OffsetByCodePoints(curTermBuffer, 0, curTermLength, 0, curPos);
- int end = charUtils.OffsetByCodePoints(curTermBuffer, 0, curTermLength, start, curGramSize);
- termAtt.CopyBuffer(curTermBuffer, start, end - start);
- posIncAtt.PositionIncrement = curPosInc;
- curPosInc = 0;
- posLenAtt.PositionLength = curPosLen;
- offsetAtt.SetOffset(tokStart, tokEnd);
- curGramSize++;
- return true;
- }
- }
- else
- {
- while (curGramSize <= maxGram)
- {
- while (curPos + curGramSize <= curTermLength) // while there is input
- {
- ClearAttributes();
- termAtt.CopyBuffer(curTermBuffer, curPos, curGramSize);
- if (hasIllegalOffsets)
- {
- offsetAtt.SetOffset(tokStart, tokEnd);
- }
- else
- {
- offsetAtt.SetOffset(tokStart + curPos, tokStart + curPos + curGramSize);
- }
- curPos++;
- return true;
- }
- curGramSize++; // increase n-gram size
- curPos = 0;
- }
- }
- curTermBuffer = null;
- }
- }
-
- public override void Reset()
- {
- base.Reset();
- curTermBuffer = null;
- }
- }
-}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/ab81d913/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramTokenizer.cs
deleted file mode 100644
index b1845c8..0000000
--- a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramTokenizer.cs
+++ /dev/null
@@ -1,319 +0,0 @@
-\ufeffusing Lucene.Net.Analysis.TokenAttributes;
-using Lucene.Net.Analysis.Util;
-using Lucene.Net.Support;
-using Lucene.Net.Util;
-using System;
-using System.Diagnostics;
-using System.IO;
-
-namespace Lucene.Net.Analysis.NGram
-{
- /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
- /// <summary>
- /// Tokenizes the input into n-grams of the given size(s).
- /// <para>On the contrary to <see cref="NGramTokenFilter"/>, this class sets offsets so
- /// that characters between startOffset and endOffset in the original stream are
- /// the same as the term chars.
- /// </para>
- /// <para>For example, "abcde" would be tokenized as (minGram=2, maxGram=3):
- /// <list type="table">
- /// <listheader>
- /// <term>Term</term>
- /// <term>Position increment</term>
- /// <term>Position length</term>
- /// <term>Offsets</term>
- /// </listheader>
- /// <item>
- /// <term>ab</term>
- /// <term>1</term>
- /// <term>1</term>
- /// <term>[0,2[</term>
- /// </item>
- /// <item>
- /// <term>abc</term>
- /// <term>1</term>
- /// <term>1</term>
- /// <term>[0,3[</term>
- /// </item>
- /// <item>
- /// <term>bc</term>
- /// <term>1</term>
- /// <term>1</term>
- /// <term>[1,3[</term>
- /// </item>
- /// <item>
- /// <term>bcd</term>
- /// <term>1</term>
- /// <term>1</term>
- /// <term>[1,4[</term>
- /// </item>
- /// <item>
- /// <term>cd</term>
- /// <term>1</term>
- /// <term>1</term>
- /// <term>[2,4[</term>
- /// </item>
- /// <item>
- /// <term>cde</term>
- /// <term>1</term>
- /// <term>1</term>
- /// <term>[2,5[</term>
- /// </item>
- /// <item>
- /// <term>de</term>
- /// <term>1</term>
- /// <term>1</term>
- /// <term>[3,5[</term>
- /// </item>
- /// </list>
- /// </para>
- /// <para>This tokenizer changed a lot in Lucene 4.4 in order to:
- /// <list type="bullet">
- /// <item>tokenize in a streaming fashion to support streams which are larger
- /// than 1024 chars (limit of the previous version),</item>
- /// <item>count grams based on unicode code points instead of java chars (and
- /// never split in the middle of surrogate pairs),</item>
- /// <item>give the ability to pre-tokenize the stream (<see cref="IsTokenChar(int)"/>)
- /// before computing n-grams.</item>
- /// </list>
- /// </para>
- /// <para>Additionally, this class doesn't trim trailing whitespaces and emits
- /// tokens in a different order, tokens are now emitted by increasing start
- /// offsets while they used to be emitted by increasing lengths (which prevented
- /// from supporting large input streams).
- /// </para>
- /// <para>Although <b style="color:red">highly</b> discouraged, it is still possible
- /// to use the old behavior through <see cref="Lucene43NGramTokenizer"/>.
- /// </para>
- /// </summary>
- // non-sealed to allow for overriding IsTokenChar, but all other methods should be sealed
- public class NGramTokenizer : Tokenizer
- {
- public const int DEFAULT_MIN_NGRAM_SIZE = 1;
- public const int DEFAULT_MAX_NGRAM_SIZE = 2;
-
- private CharacterUtils charUtils;
- private CharacterUtils.CharacterBuffer charBuffer;
- private int[] buffer; // like charBuffer, but converted to code points
- private int bufferStart, bufferEnd; // remaining slice in buffer
- private int offset;
- private int gramSize;
- private int minGram, maxGram;
- private bool exhausted;
- private int lastCheckedChar; // last offset in the buffer that we checked
- private int lastNonTokenChar; // last offset that we found to not be a token char
- private bool edgesOnly; // leading edges n-grams only
-
- private ICharTermAttribute termAtt;
- private IPositionIncrementAttribute posIncAtt;
- private IPositionLengthAttribute posLenAtt;
- private IOffsetAttribute offsetAtt;
-
- internal NGramTokenizer(LuceneVersion version, TextReader input, int minGram, int maxGram, bool edgesOnly)
- : base(input)
- {
- Init(version, minGram, maxGram, edgesOnly);
- }
-
- /// <summary>
- /// Creates <see cref="NGramTokenizer"/> with given min and max n-grams. </summary>
- /// <param name="version"> the lucene compatibility version </param>
- /// <param name="input"> <see cref="TextReader"/> holding the input to be tokenized </param>
- /// <param name="minGram"> the smallest n-gram to generate </param>
- /// <param name="maxGram"> the largest n-gram to generate </param>
- public NGramTokenizer(LuceneVersion version, TextReader input, int minGram, int maxGram)
- : this(version, input, minGram, maxGram, false)
- {
- }
-
- internal NGramTokenizer(LuceneVersion version, AttributeFactory factory, TextReader input, int minGram, int maxGram, bool edgesOnly)
- : base(factory, input)
- {
- Init(version, minGram, maxGram, edgesOnly);
- }
-
- /// <summary>
- /// Creates <see cref="NGramTokenizer"/> with given min and max n-grams. </summary>
- /// <param name="version"> the lucene compatibility version </param>
- /// <param name="factory"> <see cref="AttributeSource.AttributeFactory"/> to use </param>
- /// <param name="input"> <see cref="TextReader"/> holding the input to be tokenized </param>
- /// <param name="minGram"> the smallest n-gram to generate </param>
- /// <param name="maxGram"> the largest n-gram to generate </param>
- public NGramTokenizer(LuceneVersion version, AttributeFactory factory, TextReader input, int minGram, int maxGram)
- : this(version, factory, input, minGram, maxGram, false)
- {
- }
-
- /// <summary>
- /// Creates <see cref="NGramTokenizer"/> with default min and max n-grams. </summary>
- /// <param name="version"> the lucene compatibility version </param>
- /// <param name="input"> <see cref="TextReader"/> holding the input to be tokenized </param>
- public NGramTokenizer(LuceneVersion version, TextReader input)
- : this(version, input, DEFAULT_MIN_NGRAM_SIZE, DEFAULT_MAX_NGRAM_SIZE)
- {
- }
-
- private void Init(LuceneVersion version, int minGram, int maxGram, bool edgesOnly)
- {
-#pragma warning disable 612, 618
- if (!version.OnOrAfter(LuceneVersion.LUCENE_44))
-#pragma warning restore 612, 618
- {
- throw new System.ArgumentException("This class only works with Lucene 4.4+. To emulate the old (broken) behavior of NGramTokenizer, use Lucene43NGramTokenizer/Lucene43EdgeNGramTokenizer");
- }
-#pragma warning disable 612, 618
- charUtils = version.OnOrAfter(LuceneVersion.LUCENE_44) ?
-#pragma warning restore 612, 618
- CharacterUtils.GetInstance(version) : CharacterUtils.Java4Instance;
- if (minGram < 1)
- {
- throw new System.ArgumentException("minGram must be greater than zero");
- }
- if (minGram > maxGram)
- {
- throw new System.ArgumentException("minGram must not be greater than maxGram");
- }
- termAtt = AddAttribute<ICharTermAttribute>();
- posIncAtt = AddAttribute<IPositionIncrementAttribute>();
- posLenAtt = AddAttribute<IPositionLengthAttribute>();
- offsetAtt = AddAttribute<IOffsetAttribute>();
- this.minGram = minGram;
- this.maxGram = maxGram;
- this.edgesOnly = edgesOnly;
- charBuffer = CharacterUtils.NewCharacterBuffer(2 * maxGram + 1024); // 2 * maxGram in case all code points require 2 chars and + 1024 for buffering to not keep polling the Reader
- buffer = new int[charBuffer.Buffer.Length];
-
- // Make the term att large enough
- termAtt.ResizeBuffer(2 * maxGram);
- }
-
- public override sealed bool IncrementToken()
- {
- ClearAttributes();
-
- // termination of this loop is guaranteed by the fact that every iteration
- // either advances the buffer (calls consumes()) or increases gramSize
- while (true)
- {
- // compact
- if (bufferStart >= bufferEnd - maxGram - 1 && !exhausted)
- {
- Array.Copy(buffer, bufferStart, buffer, 0, bufferEnd - bufferStart);
- bufferEnd -= bufferStart;
- lastCheckedChar -= bufferStart;
- lastNonTokenChar -= bufferStart;
- bufferStart = 0;
-
- // fill in remaining space
- exhausted = !charUtils.Fill(charBuffer, m_input, buffer.Length - bufferEnd);
- // convert to code points
- bufferEnd += charUtils.ToCodePoints(charBuffer.Buffer, 0, charBuffer.Length, buffer, bufferEnd);
- }
-
- // should we go to the next offset?
- if (gramSize > maxGram || (bufferStart + gramSize) > bufferEnd)
- {
- if (bufferStart + 1 + minGram > bufferEnd)
- {
- Debug.Assert(exhausted);
- return false;
- }
- Consume();
- gramSize = minGram;
- }
-
- UpdateLastNonTokenChar();
-
- // retry if the token to be emitted was going to not only contain token chars
- bool termContainsNonTokenChar = lastNonTokenChar >= bufferStart && lastNonTokenChar < (bufferStart + gramSize);
- bool isEdgeAndPreviousCharIsTokenChar = edgesOnly && lastNonTokenChar != bufferStart - 1;
- if (termContainsNonTokenChar || isEdgeAndPreviousCharIsTokenChar)
- {
- Consume();
- gramSize = minGram;
- continue;
- }
-
- int length = charUtils.ToChars(buffer, bufferStart, gramSize, termAtt.Buffer, 0);
- termAtt.Length = length;
- posIncAtt.PositionIncrement = 1;
- posLenAtt.PositionLength = 1;
- offsetAtt.SetOffset(CorrectOffset(offset), CorrectOffset(offset + length));
- ++gramSize;
- return true;
- }
- }
-
- private void UpdateLastNonTokenChar()
- {
- int termEnd = bufferStart + gramSize - 1;
- if (termEnd > lastCheckedChar)
- {
- for (int i = termEnd; i > lastCheckedChar; --i)
- {
- if (!IsTokenChar(buffer[i]))
- {
- lastNonTokenChar = i;
- break;
- }
- }
- lastCheckedChar = termEnd;
- }
- }
-
- /// <summary>
- /// Consume one code point. </summary>
- private void Consume()
- {
- offset += Character.CharCount(buffer[bufferStart++]);
- }
-
- /// <summary>
- /// Only collect characters which satisfy this condition. </summary>
- protected virtual bool IsTokenChar(int chr)
- {
- return true;
- }
-
- public override sealed void End()
- {
- base.End();
- Debug.Assert(bufferStart <= bufferEnd);
- int endOffset = offset;
- for (int i = bufferStart; i < bufferEnd; ++i)
- {
- endOffset += Character.CharCount(buffer[i]);
- }
- endOffset = CorrectOffset(endOffset);
- // set final offset
- offsetAtt.SetOffset(endOffset, endOffset);
- }
-
- public override sealed void Reset()
- {
- base.Reset();
- bufferStart = bufferEnd = buffer.Length;
- lastNonTokenChar = lastCheckedChar = bufferStart - 1;
- offset = 0;
- gramSize = minGram;
- exhausted = false;
- charBuffer.Reset();
- }
- }
-}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/ab81d913/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramTokenizerFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramTokenizerFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramTokenizerFactory.cs
deleted file mode 100644
index cf25b65..0000000
--- a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramTokenizerFactory.cs
+++ /dev/null
@@ -1,70 +0,0 @@
-\ufeffusing Lucene.Net.Analysis.Util;
-using Lucene.Net.Util;
-using System.Collections.Generic;
-using System.IO;
-
-namespace Lucene.Net.Analysis.NGram
-{
- /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
- /// <summary>
- /// Factory for <see cref="NGramTokenizer"/>.
- /// <code>
- /// <fieldType name="text_ngrm" class="solr.TextField" positionIncrementGap="100">
- /// <analyzer>
- /// <tokenizer class="solr.NGramTokenizerFactory" minGramSize="1" maxGramSize="2"/>
- /// </analyzer>
- /// </fieldType></code>
- /// </summary>
- public class NGramTokenizerFactory : TokenizerFactory
- {
- private readonly int maxGramSize;
- private readonly int minGramSize;
-
- /// <summary>
- /// Creates a new <see cref="NGramTokenizerFactory"/> </summary>
- public NGramTokenizerFactory(IDictionary<string, string> args)
- : base(args)
- {
- minGramSize = GetInt(args, "minGramSize", NGramTokenizer.DEFAULT_MIN_NGRAM_SIZE);
- maxGramSize = GetInt(args, "maxGramSize", NGramTokenizer.DEFAULT_MAX_NGRAM_SIZE);
- if (args.Count > 0)
- {
- throw new System.ArgumentException("Unknown parameters: " + args);
- }
- }
-
- /// <summary>
- /// Creates the <see cref="TokenStream"/> of n-grams from the given <see cref="TextReader"/> and <see cref="AttributeSource.AttributeFactory"/>. </summary>
- public override Tokenizer Create(AttributeSource.AttributeFactory factory, TextReader input)
- {
-#pragma warning disable 612, 618
- if (m_luceneMatchVersion.OnOrAfter(LuceneVersion.LUCENE_44))
-#pragma warning restore 612, 618
- {
- return new NGramTokenizer(m_luceneMatchVersion, factory, input, minGramSize, maxGramSize);
- }
- else
- {
-#pragma warning disable 612, 618
- return new Lucene43NGramTokenizer(factory, input, minGramSize, maxGramSize);
-#pragma warning restore 612, 618
- }
- }
- }
-}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/ab81d913/src/Lucene.Net.Tests.Analysis.Common/Analysis/NGram/EdgeNGramTokenFilterTest.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/NGram/EdgeNGramTokenFilterTest.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/NGram/EdgeNGramTokenFilterTest.cs
new file mode 100644
index 0000000..ea6fbd7
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/NGram/EdgeNGramTokenFilterTest.cs
@@ -0,0 +1,390 @@
+\ufeffusing Lucene.Net.Analysis.Core;
+using Lucene.Net.Analysis.Miscellaneous;
+using Lucene.Net.Analysis.Shingle;
+using Lucene.Net.Analysis.TokenAttributes;
+using Lucene.Net.Support;
+using Lucene.Net.Util;
+using NUnit.Framework;
+using System;
+using System.IO;
+
+namespace Lucene.Net.Analysis.NGram
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// Tests <seealso cref="EdgeNGramTokenFilter"/> for correctness.
+ /// </summary>
+ public class EdgeNGramTokenFilterTest : BaseTokenStreamTestCase
+ {
+ private TokenStream input;
+
+ public override void SetUp()
+ {
+ base.SetUp();
+ input = new MockTokenizer(new StringReader("abcde"), MockTokenizer.WHITESPACE, false);
+ }
+
+ [Test]
+ public virtual void TestInvalidInput()
+ {
+ bool gotException = false;
+ try
+ {
+#pragma warning disable 612, 618
+ new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, input, EdgeNGramTokenFilter.Side.FRONT, 0, 0);
+#pragma warning restore 612, 618
+ }
+ catch (System.ArgumentException)
+ {
+ gotException = true;
+ }
+ assertTrue(gotException);
+ }
+
+ [Test]
+ public virtual void TestInvalidInput2()
+ {
+ bool gotException = false;
+ try
+ {
+#pragma warning disable 612, 618
+ new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, input, EdgeNGramTokenFilter.Side.FRONT, 2, 1);
+#pragma warning restore 612, 618
+ }
+ catch (System.ArgumentException)
+ {
+ gotException = true;
+ }
+ assertTrue(gotException);
+ }
+
+ [Test]
+ public virtual void TestInvalidInput3()
+ {
+ bool gotException = false;
+ try
+ {
+#pragma warning disable 612, 618
+ new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, input, EdgeNGramTokenFilter.Side.FRONT, -1, 2);
+#pragma warning restore 612, 618
+ }
+ catch (System.ArgumentException)
+ {
+ gotException = true;
+ }
+ assertTrue(gotException);
+ }
+
+ [Test]
+ public virtual void TestFrontUnigram()
+ {
+#pragma warning disable 612, 618
+ EdgeNGramTokenFilter tokenizer = new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, input, EdgeNGramTokenFilter.Side.FRONT, 1, 1);
+#pragma warning restore 612, 618
+ AssertTokenStreamContents(tokenizer, new string[] { "a" }, new int[] { 0 }, new int[] { 5 });
+ }
+
+ [Test]
+ public virtual void TestBackUnigram()
+ {
+#pragma warning disable 612, 618
+ EdgeNGramTokenFilter tokenizer = new EdgeNGramTokenFilter(LuceneVersion.LUCENE_43, input, EdgeNGramTokenFilter.Side.BACK, 1, 1);
+#pragma warning restore 612, 618
+ AssertTokenStreamContents(tokenizer, new string[] { "e" }, new int[] { 4 }, new int[] { 5 });
+ }
+
+ [Test]
+ public virtual void TestOversizedNgrams()
+ {
+#pragma warning disable 612, 618
+ EdgeNGramTokenFilter tokenizer = new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, input, EdgeNGramTokenFilter.Side.FRONT, 6, 6);
+#pragma warning restore 612, 618
+ AssertTokenStreamContents(tokenizer, new string[0], new int[0], new int[0]);
+ }
+
+ [Test]
+ public virtual void TestFrontRangeOfNgrams()
+ {
+#pragma warning disable 612, 618
+ EdgeNGramTokenFilter tokenizer = new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, input, EdgeNGramTokenFilter.Side.FRONT, 1, 3);
+#pragma warning restore 612, 618
+ AssertTokenStreamContents(tokenizer, new string[] { "a", "ab", "abc" }, new int[] { 0, 0, 0 }, new int[] { 5, 5, 5 });
+ }
+
+ [Test]
+ public virtual void TestBackRangeOfNgrams()
+ {
+#pragma warning disable 612, 618
+ EdgeNGramTokenFilter tokenizer = new EdgeNGramTokenFilter(LuceneVersion.LUCENE_43, input, EdgeNGramTokenFilter.Side.BACK, 1, 3);
+#pragma warning restore 612, 618
+ AssertTokenStreamContents(tokenizer, new string[] { "e", "de", "cde" }, new int[] { 4, 3, 2 }, new int[] { 5, 5, 5 }, null, null, null, null, false);
+ }
+
+ [Test]
+ public virtual void TestFilterPositions()
+ {
+ TokenStream ts = new MockTokenizer(new StringReader("abcde vwxyz"), MockTokenizer.WHITESPACE, false);
+#pragma warning disable 612, 618
+ EdgeNGramTokenFilter tokenizer = new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, ts, EdgeNGramTokenFilter.Side.FRONT, 1, 3);
+#pragma warning restore 612, 618
+ AssertTokenStreamContents(tokenizer, new string[] { "a", "ab", "abc", "v", "vw", "vwx" }, new int[] { 0, 0, 0, 6, 6, 6 }, new int[] { 5, 5, 5, 11, 11, 11 }, null, new int[] { 1, 0, 0, 1, 0, 0 }, null, null, false);
+ }
+
+ private class PositionFilter : TokenFilter
+ {
+
+ internal readonly IPositionIncrementAttribute posIncrAtt;
+ internal bool started;
+
+ internal PositionFilter(TokenStream input) : base(input)
+ {
+ posIncrAtt = AddAttribute<IPositionIncrementAttribute>();
+ }
+
+ public override sealed bool IncrementToken()
+ {
+ if (m_input.IncrementToken())
+ {
+ if (started)
+ {
+ posIncrAtt.PositionIncrement = 0;
+ }
+ else
+ {
+ started = true;
+ }
+ return true;
+ }
+ else
+ {
+ return false;
+ }
+ }
+
+ public override void Reset()
+ {
+ base.Reset();
+ started = false;
+ }
+ }
+
+ [Test]
+ public virtual void TestFirstTokenPositionIncrement()
+ {
+ TokenStream ts = new MockTokenizer(new StringReader("a abc"), MockTokenizer.WHITESPACE, false);
+ ts = new PositionFilter(ts); // All but first token will get 0 position increment
+#pragma warning disable 612, 618
+ EdgeNGramTokenFilter filter = new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, ts, EdgeNGramTokenFilter.Side.FRONT, 2, 3);
+#pragma warning restore 612, 618
+ // The first token "a" will not be output, since it's smaller than the mingram size of 2.
+ // The second token on input to EdgeNGramTokenFilter will have position increment of 0,
+ // which should be increased to 1, since this is the first output token in the stream.
+ AssertTokenStreamContents(filter, new string[] { "ab", "abc" }, new int[] { 2, 2 }, new int[] { 5, 5 }, new int[] { 1, 0 });
+ }
+
+ [Test]
+ public virtual void TestSmallTokenInStream()
+ {
+ input = new MockTokenizer(new StringReader("abc de fgh"), MockTokenizer.WHITESPACE, false);
+#pragma warning disable 612, 618
+ EdgeNGramTokenFilter tokenizer = new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, input, EdgeNGramTokenFilter.Side.FRONT, 3, 3);
+#pragma warning restore 612, 618
+ AssertTokenStreamContents(tokenizer, new string[] { "abc", "fgh" }, new int[] { 0, 7 }, new int[] { 3, 10 });
+ }
+
+ [Test]
+ public virtual void TestReset()
+ {
+ WhitespaceTokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("abcde"));
+#pragma warning disable 612, 618
+ EdgeNGramTokenFilter filter = new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, tokenizer, EdgeNGramTokenFilter.Side.FRONT, 1, 3);
+#pragma warning restore 612, 618
+ AssertTokenStreamContents(filter, new string[] { "a", "ab", "abc" }, new int[] { 0, 0, 0 }, new int[] { 5, 5, 5 });
+ tokenizer.SetReader(new StringReader("abcde"));
+ AssertTokenStreamContents(filter, new string[] { "a", "ab", "abc" }, new int[] { 0, 0, 0 }, new int[] { 5, 5, 5 });
+ }
+
+ // LUCENE-3642
+ // EdgeNgram blindly adds term length to offset, but this can take things out of bounds
+ // wrt original text if a previous filter increases the length of the word (in this case � -> ae)
+ // so in this case we behave like WDF, and preserve any modified offsets
+ [Test]
+ public virtual void TestInvalidOffsets()
+ {
+ Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper(this);
+ AssertAnalyzesTo(analyzer, "mosfellsb�r", new string[] { "mo", "mos", "mosf", "mosfe", "mosfel", "mosfell", "mosfells", "mosfellsb", "mosfellsba", "mosfellsbae", "mosfellsbaer" }, new int[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, new int[] { 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11 });
+ }
+
+ private class AnalyzerAnonymousInnerClassHelper : Analyzer
+ {
+ private readonly EdgeNGramTokenFilterTest outerInstance;
+
+ public AnalyzerAnonymousInnerClassHelper(EdgeNGramTokenFilterTest outerInstance)
+ {
+ this.outerInstance = outerInstance;
+ }
+
+ protected internal override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
+ {
+ Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ TokenFilter filters = new ASCIIFoldingFilter(tokenizer);
+#pragma warning disable 612, 618
+ filters = new EdgeNGramTokenFilter(LuceneVersion.LUCENE_43, filters, EdgeNGramTokenFilter.Side.FRONT, 2, 15);
+#pragma warning restore 612, 618
+ return new TokenStreamComponents(tokenizer, filters);
+ }
+ }
+
+ /// <summary>
+ /// blast some random strings through the analyzer </summary>
+ [Test]
+ public virtual void TestRandomStrings()
+ {
+ for (int i = 0; i < 10; i++)
+ {
+ int min = TestUtil.NextInt(Random(), 2, 10);
+ int max = TestUtil.NextInt(Random(), min, 20);
+
+ Analyzer a = new AnalyzerAnonymousInnerClassHelper2(this, min, max);
+ CheckRandomData(Random(), a, 100 * RANDOM_MULTIPLIER);
+ }
+
+ Analyzer b = new AnalyzerAnonymousInnerClassHelper3(this);
+ CheckRandomData(Random(), b, 1000 * RANDOM_MULTIPLIER, 20, false, false);
+ }
+
+ private class AnalyzerAnonymousInnerClassHelper2 : Analyzer
+ {
+ private readonly EdgeNGramTokenFilterTest outerInstance;
+
+ private int min;
+ private int max;
+
+ public AnalyzerAnonymousInnerClassHelper2(EdgeNGramTokenFilterTest outerInstance, int min, int max)
+ {
+ this.outerInstance = outerInstance;
+ this.min = min;
+ this.max = max;
+ }
+
+ protected internal override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
+ {
+ Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ return new TokenStreamComponents(tokenizer, new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, tokenizer, min, max));
+ }
+ }
+
+ private class AnalyzerAnonymousInnerClassHelper3 : Analyzer
+ {
+ private readonly EdgeNGramTokenFilterTest outerInstance;
+
+ public AnalyzerAnonymousInnerClassHelper3(EdgeNGramTokenFilterTest outerInstance)
+ {
+ this.outerInstance = outerInstance;
+ }
+
+ protected internal override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
+ {
+ Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+#pragma warning disable 612, 618
+ return new TokenStreamComponents(tokenizer, new EdgeNGramTokenFilter(LuceneVersion.LUCENE_43, tokenizer, EdgeNGramTokenFilter.Side.BACK, 2, 4));
+#pragma warning restore 612, 618
+ }
+ }
+
+ [Test]
+ public virtual void TestEmptyTerm()
+ {
+ Random random = Random();
+ Analyzer a = new AnalyzerAnonymousInnerClassHelper4(this);
+ CheckAnalysisConsistency(random, a, random.nextBoolean(), "");
+
+ Analyzer b = new AnalyzerAnonymousInnerClassHelper5(this);
+ CheckAnalysisConsistency(random, b, random.nextBoolean(), "");
+ }
+
+ private class AnalyzerAnonymousInnerClassHelper4 : Analyzer
+ {
+ private readonly EdgeNGramTokenFilterTest outerInstance;
+
+ public AnalyzerAnonymousInnerClassHelper4(EdgeNGramTokenFilterTest outerInstance)
+ {
+ this.outerInstance = outerInstance;
+ }
+
+ protected internal override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
+ {
+ Tokenizer tokenizer = new KeywordTokenizer(reader);
+#pragma warning disable 612, 618
+ return new TokenStreamComponents(tokenizer, new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, tokenizer, EdgeNGramTokenFilter.Side.FRONT, 2, 15));
+#pragma warning restore 612, 618
+ }
+ }
+
+ private class AnalyzerAnonymousInnerClassHelper5 : Analyzer
+ {
+ private readonly EdgeNGramTokenFilterTest outerInstance;
+
+ public AnalyzerAnonymousInnerClassHelper5(EdgeNGramTokenFilterTest outerInstance)
+ {
+ this.outerInstance = outerInstance;
+ }
+
+ protected internal override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
+ {
+ Tokenizer tokenizer = new KeywordTokenizer(reader);
+#pragma warning disable 612, 618
+ return new TokenStreamComponents(tokenizer, new EdgeNGramTokenFilter(LuceneVersion.LUCENE_43, tokenizer, EdgeNGramTokenFilter.Side.BACK, 2, 15));
+#pragma warning restore 612, 618
+ }
+ }
+
+ [Test]
+ public virtual void TestGraphs()
+ {
+ TokenStream tk = new LetterTokenizer(TEST_VERSION_CURRENT, new StringReader("abc d efgh ij klmno p q"));
+ tk = new ShingleFilter(tk);
+ tk = new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, tk, 7, 10);
+ AssertTokenStreamContents(tk, new string[] { "efgh ij", "ij klmn", "ij klmno", "klmno p" }, new int[] { 6, 11, 11, 14 }, new int[] { 13, 19, 19, 21 }, new int[] { 3, 1, 0, 1 }, new int[] { 2, 2, 2, 2 }, 23);
+ }
+
+ [Test]
+ public virtual void TestSupplementaryCharacters()
+ {
+ string s = TestUtil.RandomUnicodeString(Random(), 10);
+ int codePointCount = s.CodePointCount(0, s.Length);
+ int minGram = TestUtil.NextInt(Random(), 1, 3);
+ int maxGram = TestUtil.NextInt(Random(), minGram, 10);
+ TokenStream tk = new KeywordTokenizer(new StringReader(s));
+ tk = new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, tk, minGram, maxGram);
+ ICharTermAttribute termAtt = tk.AddAttribute<ICharTermAttribute>();
+ IOffsetAttribute offsetAtt = tk.AddAttribute<IOffsetAttribute>();
+ tk.Reset();
+ for (int i = minGram; i <= Math.Min(codePointCount, maxGram); ++i)
+ {
+ assertTrue(tk.IncrementToken());
+ assertEquals(0, offsetAtt.StartOffset);
+ assertEquals(s.Length, offsetAtt.EndOffset);
+ int end = Character.OffsetByCodePoints(s, 0, i);
+ assertEquals(s.Substring(0, end), termAtt.ToString());
+ }
+ assertFalse(tk.IncrementToken());
+ }
+ }
+}
\ No newline at end of file
[13/39] lucenenet git commit: Lucene.Net.Analysis.Util
(CharArrayIterator + CharArrayMap + CharArraySet) refactor: member
accessibility and documentation comments
Posted by ni...@apache.org.
Lucene.Net.Analysis.Util (CharArrayIterator + CharArrayMap + CharArraySet) refactor: member accessibility and documentation comments
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/b19aee50
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/b19aee50
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/b19aee50
Branch: refs/heads/api-work
Commit: b19aee50367acf30f8e678e2430875c2a0467cb7
Parents: 47155b3
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Sat Feb 4 18:30:43 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Sat Feb 4 23:08:18 2017 +0700
----------------------------------------------------------------------
.../Analysis/Util/CharArrayIterator.cs | 13 +-
.../Analysis/Util/CharArrayMap.cs | 322 +++++++++++--------
.../Analysis/Util/CharArraySet.cs | 83 ++---
3 files changed, 232 insertions(+), 186 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b19aee50/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArrayIterator.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArrayIterator.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArrayIterator.cs
index e50e87e..b5a3456 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArrayIterator.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArrayIterator.cs
@@ -1,7 +1,6 @@
\ufeffusing Lucene.Net.Support;
\ufeffusing System;
using System.Diagnostics.CodeAnalysis;
-using Icu;
namespace Lucene.Net.Analysis.Util
{
@@ -26,7 +25,7 @@ namespace Lucene.Net.Analysis.Util
/// A CharacterIterator used internally for use with <see cref="BreakIterator"/>
/// @lucene.internal
/// </summary>
- public abstract class CharArrayIterator : CharacterIterator
+ public abstract class CharArrayIterator : CharacterIterator // LUCENENET TODO: Since the only purpose of this class is to work around Java bugs, is this class really needed?
{
private char[] array;
private int start;
@@ -83,7 +82,7 @@ namespace Lucene.Net.Analysis.Util
}
}
- protected internal abstract char JreBugWorkaround(char ch);
+ protected abstract char JreBugWorkaround(char ch);
public override char First()
@@ -170,7 +169,7 @@ namespace Lucene.Net.Analysis.Util
/// <summary>
/// Create a new CharArrayIterator that works around JRE bugs
- /// in a manner suitable for <see cref="BreakIterator#getSentenceInstance()"/>
+ /// in a manner suitable for <c>BreakIterator#getSentenceInstance()</c>
/// </summary>
public static CharArrayIterator NewSentenceInstance()
{
@@ -180,7 +179,7 @@ namespace Lucene.Net.Analysis.Util
private class CharArrayIteratorAnonymousInnerClassHelper2 : CharArrayIterator
{
// no bugs
- protected internal override char JreBugWorkaround(char ch)
+ protected override char JreBugWorkaround(char ch)
{
return ch;
}
@@ -188,7 +187,7 @@ namespace Lucene.Net.Analysis.Util
/// <summary>
/// Create a new CharArrayIterator that works around JRE bugs
- /// in a manner suitable for <see cref="BreakIterator#getWordInstance()"/>
+ /// in a manner suitable for <c>BreakIterator#getWordInstance()</c>
/// </summary>
public static CharArrayIterator NewWordInstance()
{
@@ -198,7 +197,7 @@ namespace Lucene.Net.Analysis.Util
private class CharArrayIteratorAnonymousInnerClassHelper4 : CharArrayIterator
{
// no bugs
- protected internal override char JreBugWorkaround(char ch)
+ protected override char JreBugWorkaround(char ch)
{
return ch;
}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b19aee50/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArrayMap.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArrayMap.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArrayMap.cs
index b6e2e36..f8a8ad2 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArrayMap.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArrayMap.cs
@@ -72,13 +72,13 @@ namespace Lucene.Net.Analysis.Util
/// <summary>
/// LUCENENET SPECIFIC type used to act as a placeholder. Since <c>null</c>
/// means that our value is not populated, we need an instance of something
- /// to indicate it is. Using an instance of <see cref="V"/> would only work if
+ /// to indicate it is. Using an instance of <typeparamref name="TValue"/> would only work if
/// we could constrain it with the new() constraint, which isn't possible because
/// some types such as <see cref="string"/> don't have a default constructor.
/// So, this is a workaround that allows any type regardless of the type of constructor.
///
/// <para>
- /// Note also that we gain the ability to use value types for <see cref="V"/>, but
+ /// Note also that we gain the ability to use value types for <typeparamref name="TValue"/>, but
/// also create a difference in behavior from Java Lucene where the actual values
/// returned could be <c>null</c>.
/// </para>
@@ -102,11 +102,10 @@ namespace Lucene.Net.Analysis.Util
}
/// <summary>
- /// Create map with enough capacity to hold startSize terms
+ /// Create map with enough capacity to hold <paramref name="startSize"/> terms
/// </summary>
/// <param name="matchVersion">
- /// compatibility match version see <a href="#version">Version
- /// note</a> above for details. </param>
+ /// lucene compatibility version - see <see cref="CharArrayMap{TValue}"/> for details. </param>
/// <param name="startSize">
/// the initial capacity </param>
/// <param name="ignoreCase">
@@ -133,7 +132,7 @@ namespace Lucene.Net.Analysis.Util
/// compatibility match version see <a href="#version">Version
/// note</a> above for details. </param>
/// <param name="c">
- /// a map (<see cref="IDictionary{string, V}"/>) whose mappings to be copied </param>
+ /// a map (<see cref="T:IDictionary{string, V}"/>) whose mappings to be copied </param>
/// <param name="ignoreCase">
/// <c>false</c> if and only if the set should be case sensitive;
/// otherwise <c>true</c>. </param>
@@ -160,11 +159,11 @@ namespace Lucene.Net.Analysis.Util
}
/// <summary>
- /// Adds the <see cref="KeyValuePair{string, V}.Value"/> for the passed in <see cref="KeyValuePair{string, V}.Key"/>.
- /// Note that the <see cref="KeyValuePair{string, V}"/> instance is not added to the dictionary.
+ /// Adds the <see cref="T:KeyValuePair{string, V}.Value"/> for the passed in <see cref="T:KeyValuePair{string, V}.Key"/>.
+ /// Note that the <see cref="T:KeyValuePair{string, V}"/> instance is not added to the dictionary.
/// </summary>
- /// <param name="item">A <see cref="KeyValuePair{string, V}"/> whose <see cref="KeyValuePair{string, V}.Value"/>
- /// will be added for the corresponding <see cref="KeyValuePair{string, V}.Key"/>. </param>
+ /// <param name="item">A <see cref="T:KeyValuePair{string, V}"/> whose <see cref="T:KeyValuePair{string, V}.Value"/>
+ /// will be added for the corresponding <see cref="T:KeyValuePair{string, V}.Key"/>. </param>
public virtual void Add(KeyValuePair<string, TValue> item)
{
Add(item.Key, item.Value);
@@ -209,12 +208,12 @@ namespace Lucene.Net.Analysis.Util
}
/// <summary>
- /// Copies all items in the current dictionary the <paramref name="array"/> starting at the <see cref="arrayIndex"/>.
+ /// Copies all items in the current dictionary the <paramref name="array"/> starting at the <paramref name="arrayIndex"/>.
/// The array is assumed to already be dimensioned to fit the elements in this dictionary; otherwise a <see cref="ArgumentOutOfRangeException"/>
/// will be thrown.
/// </summary>
/// <param name="array">The array to copy the items into.</param>
- /// <param name="arrayIndex">A 32-bit integer that represents the index in <see cref="array"/> at which copying begins.</param>
+ /// <param name="arrayIndex">A 32-bit integer that represents the index in <paramref name="array"/> at which copying begins.</param>
public virtual void CopyTo(KeyValuePair<string, TValue>[] array, int arrayIndex)
{
var iter = (EntryIterator)EntrySet().GetEnumerator();
@@ -240,7 +239,7 @@ namespace Lucene.Net.Analysis.Util
/// <summary>
/// <c>true</c> if the <paramref name="length"/> chars of <paramref name="text"/> starting at <paramref name="offset"/>
- /// are in the <see cref="KeySet"/>
+ /// are in the <see cref="Keys"/>
/// </summary>
public virtual bool ContainsKey(char[] text, int offset, int length)
{
@@ -248,7 +247,7 @@ namespace Lucene.Net.Analysis.Util
}
/// <summary>
- /// <c>true</c> if the entire <see cref="KeySet"/> is the same as the
+ /// <c>true</c> if the entire <see cref="Keys"/> is the same as the
/// <paramref name="text"/> <see cref="T:char[]"/> being passed in;
/// otherwise <c>false</c>.
/// </summary>
@@ -258,7 +257,7 @@ namespace Lucene.Net.Analysis.Util
}
/// <summary>
- /// <c>true</c> if the <paramref name="text"/> <see cref="string"/> is in the <see cref="KeySet"/>;
+ /// <c>true</c> if the <paramref name="text"/> <see cref="string"/> is in the <see cref="Keys"/>;
/// otherwise <c>false</c>
/// </summary>
public virtual bool ContainsKey(string text)
@@ -267,17 +266,17 @@ namespace Lucene.Net.Analysis.Util
}
/// <summary>
- /// <c>true</c> if the <paramref name="text"/> <see cref="ICharSequence"/> is in the <see cref="KeySet"/>;
+ /// <c>true</c> if the <paramref name="text"/> <see cref="ICharSequence"/> is in the <see cref="Keys"/>;
/// otherwise <c>false</c>
/// </summary>
- public virtual bool ContainsKey(ICharSequence cs)
+ public virtual bool ContainsKey(ICharSequence text)
{
- return keys[GetSlot(cs)] != null;
+ return keys[GetSlot(text)] != null;
}
/// <summary>
- /// <c>true</c> if the <paramref name="o"/> <see cref="object.ToString()"/> is in the <see cref="KeySet"/>;
+ /// <c>true</c> if the <paramref name="o"/> <see cref="object.ToString()"/> is in the <see cref="Keys"/>;
/// otherwise <c>false</c>
/// </summary>
public virtual bool ContainsKey(object o)
@@ -318,18 +317,18 @@ namespace Lucene.Net.Analysis.Util
/// <summary>
/// returns the value of the mapping of the chars inside this <see cref="ICharSequence"/>
/// </summary>
- public virtual TValue Get(ICharSequence cs)
+ public virtual TValue Get(ICharSequence text)
{
- var value = values[GetSlot(cs)];
+ var value = values[GetSlot(text)];
return (value != null) ? value.Value : default(TValue);
}
/// <summary>
/// returns the value of the mapping of the chars inside this <see cref="string"/>
/// </summary>
- public virtual TValue Get(string cs)
+ public virtual TValue Get(string text)
{
- var value = values[GetSlot(cs)];
+ var value = values[GetSlot(text)];
return (value != null) ? value.Value : default(TValue);
}
@@ -490,10 +489,10 @@ namespace Lucene.Net.Analysis.Util
}
/// <summary>
- /// LUCENENET specific. Centralizes the logic between <see cref="Put"/>
+ /// LUCENENET specific. Centralizes the logic between Put()
/// implementations that accept a value and those that don't. This value is
/// so we know whether or not the value was set, since we can't reliably do
- /// a check for <c>null</c> on the <see cref="V"/> type.
+ /// a check for <c>null</c> on the <typeparamref name="TValue"/> type.
/// </summary>
private MapValue PutImpl(char[] text, MapValue value)
{
@@ -523,7 +522,7 @@ namespace Lucene.Net.Analysis.Util
#region PutAll
/// <summary>
- /// This implementation enumerates over the specified <see cref="IDictionary{char[],TValue}"/>'s
+ /// This implementation enumerates over the specified <see cref="T:IDictionary{char[],TValue}"/>'s
/// entries, and calls this map's <see cref="Put(char[], TValue)"/> operation once for each entry.
/// </summary>
/// <param name="collection">A dictionary of values to add/update in the current map.</param>
@@ -536,7 +535,7 @@ namespace Lucene.Net.Analysis.Util
}
/// <summary>
- /// This implementation enumerates over the specified <see cref="IDictionary{string,TValue}"/>'s
+ /// This implementation enumerates over the specified <see cref="T:IDictionary{string,TValue}"/>'s
/// entries, and calls this map's <see cref="Put(string, TValue)"/> operation once for each entry.
/// </summary>
/// <param name="collection">A dictionary of values to add/update in the current map.</param>
@@ -549,7 +548,7 @@ namespace Lucene.Net.Analysis.Util
}
/// <summary>
- /// This implementation enumerates over the specified <see cref="IDictionary{ICharSequence,TValue}"/>'s
+ /// This implementation enumerates over the specified <see cref="T:IDictionary{ICharSequence,TValue}"/>'s
/// entries, and calls this map's <see cref="Put(ICharSequence, TValue)"/> operation once for each entry.
/// </summary>
/// <param name="collection">A dictionary of values to add/update in the current map.</param>
@@ -562,7 +561,7 @@ namespace Lucene.Net.Analysis.Util
}
/// <summary>
- /// This implementation enumerates over the specified <see cref="IDictionary{object,TValue}"/>'s
+ /// This implementation enumerates over the specified <see cref="T:IDictionary{object,TValue}"/>'s
/// entries, and calls this map's <see cref="Put(object, TValue)"/> operation once for each entry.
/// </summary>
/// <param name="collection">A dictionary of values to add/update in the current map.</param>
@@ -575,7 +574,7 @@ namespace Lucene.Net.Analysis.Util
}
/// <summary>
- /// This implementation enumerates over the specified <see cref="IEnumerable{KeyValuePair{char[],TValue}}"/>'s
+ /// This implementation enumerates over the specified <see cref="T:IEnumerable{KeyValuePair{char[],TValue}}"/>'s
/// entries, and calls this map's <see cref="Put(char[], TValue)"/> operation once for each entry.
/// </summary>
/// <param name="collection">The values to add/update in the current map.</param>
@@ -588,7 +587,7 @@ namespace Lucene.Net.Analysis.Util
}
/// <summary>
- /// This implementation enumerates over the specified <see cref="IEnumerable{KeyValuePair{string,TValue}}"/>'s
+ /// This implementation enumerates over the specified <see cref="T:IEnumerable{KeyValuePair{string,TValue}}"/>'s
/// entries, and calls this map's <see cref="Put(string, TValue)"/> operation once for each entry.
/// </summary>
/// <param name="collection">The values to add/update in the current map.</param>
@@ -601,7 +600,7 @@ namespace Lucene.Net.Analysis.Util
}
/// <summary>
- /// This implementation enumerates over the specified <see cref="IEnumerable{KeyValuePair{ICharSequence,TValue}}"/>'s
+ /// This implementation enumerates over the specified <see cref="T:IEnumerable{KeyValuePair{ICharSequence,TValue}}"/>'s
/// entries, and calls this map's <see cref="Put(ICharSequence, TValue)"/> operation once for each entry.
/// </summary>
/// <param name="collection">The values to add/update in the current map.</param>
@@ -614,7 +613,7 @@ namespace Lucene.Net.Analysis.Util
}
/// <summary>
- /// This implementation enumerates over the specified <see cref="IEnumerable{KeyValuePair{object,TValue}}"/>'s
+ /// This implementation enumerates over the specified <see cref="T:IEnumerable{KeyValuePair{object,TValue}}"/>'s
/// entries, and calls this map's <see cref="Put(object, TValue)"/> operation once for each entry.
/// </summary>
/// <param name="collection">The values to add/update in the current map.</param>
@@ -750,7 +749,7 @@ namespace Lucene.Net.Analysis.Util
/// LUCENENET Specific - test for value equality similar to how it is done in Java
/// </summary>
/// <param name="obj">Another dictionary to test the values of</param>
- /// <returns><c>true</c> if the given object is an <see cref="IDictionary{object, V}"/> that contains
+ /// <returns><c>true</c> if the given object is an <see cref="T:IDictionary{object, V}"/> that contains
/// the same key value pairs as the current map</returns>
public override bool Equals(object obj)
{
@@ -1456,7 +1455,7 @@ namespace Lucene.Net.Analysis.Util
}
/// <summary>
- /// helper for CharArraySet to not produce endless recursion
+ /// helper for <see cref="CharArraySet"/> to not produce endless recursion
/// </summary>
#if !NETSTANDARD
[DesignerSerializationVisibility(DesignerSerializationVisibility.Hidden)]
@@ -1640,7 +1639,7 @@ namespace Lucene.Net.Analysis.Util
/// public EntrySet_ class so efficient methods are exposed to users
///
/// NOTE: In .NET this was renamed to EntrySet_ because it conflicted with the
- /// method EntrySet(). Since there is also an extension method named <see cref="IDictionary{K,V}.EntrySet()"/>
+ /// method EntrySet(). Since there is also an extension method named <see cref="T:IDictionary{K,V}.EntrySet()"/>
/// that this class needs to override, changing the name of the method was not
/// possible because the extension method would produce incorrect results if it were
/// inadvertently called, leading to hard-to-diagnose bugs.
@@ -1767,7 +1766,7 @@ namespace Lucene.Net.Analysis.Util
return EMPTY_MAP;
}
- // LUCENENET: Moved UnmodifyableCharArraymap to CharArrayMap class
+ // LUCENENET: Moved UnmodifiableCharArraymap to CharArrayMap class
// LUCENENET: Moved EmptyCharArrayMap to CharArrayMap class
}
@@ -1783,8 +1782,8 @@ namespace Lucene.Net.Analysis.Util
bool ContainsKey(char[] text, int offset, int length);
bool ContainsKey(char[] text);
bool ContainsKey(object o);
- bool ContainsKey(string cs);
- bool ContainsKey(ICharSequence cs);
+ bool ContainsKey(string text);
+ bool ContainsKey(ICharSequence text);
int Count { get; }
LuceneVersion MatchVersion { get; }
ICollection<string> OriginalKeySet { get; }
@@ -1807,8 +1806,8 @@ namespace Lucene.Net.Analysis.Util
/// <b>Note:</b> If you intend to create a copy of another <see cref="CharArrayMap{TValue}"/> where
/// the <see cref="LuceneVersion"/> of the source map differs from its copy
/// <see cref="CharArrayMap{TValue}.CharArrayMap(LuceneVersion, IDictionary{string, TValue}, bool)"/> should be used instead.
- /// The <see cref="Copy(LuceneVersion, IDictionary{string, TValue})"/> will preserve the <see cref="LuceneVersion"/> of the
- /// source map it is an instance of <see cref="CharArrayMap{TValue}"/>.
+ /// The <see cref="Copy{TValue}(LuceneVersion, IDictionary{string, TValue})"/> will preserve the <see cref="LuceneVersion"/> of the
+ /// source map if it is an instance of <see cref="CharArrayMap{TValue}"/>.
/// </para>
/// </summary>
/// <param name="matchVersion">
@@ -1879,7 +1878,7 @@ namespace Lucene.Net.Analysis.Util
/// <summary>
/// Used by <see cref="CharArraySet"/> to create an <see cref="UnmodifiableCharArrayMap{TValue}"/> instance
- /// without knowing the type of <see cref="TValue"/>.
+ /// without knowing the type of <typeparamref name="TValue"/>.
/// </summary>
internal static ICharArrayMap UnmodifiableMap<TValue>(ICharArrayMap map)
{
@@ -2017,8 +2016,8 @@ namespace Lucene.Net.Analysis.Util
}
/// <summary>
- /// Empty <see cref="CharArrayMap{V}.UnmodifiableCharArrayMap"/> optimized for speed.
- /// Contains checks will always return <code>false</code> or throw
+ /// Empty <see cref="UnmodifiableCharArrayMap{V}"/> optimized for speed.
+ /// Contains checks will always return <c>false</c> or throw
/// NPE if necessary.
/// </summary>
internal class EmptyCharArrayMap<V> : UnmodifiableCharArrayMap<V>
@@ -2048,11 +2047,11 @@ namespace Lucene.Net.Analysis.Util
return false;
}
- public override bool ContainsKey(ICharSequence cs)
+ public override bool ContainsKey(ICharSequence text)
{
- if (cs == null)
+ if (text == null)
{
- throw new ArgumentNullException("cs");
+ throw new ArgumentNullException("text");
}
return false;
}
@@ -2084,11 +2083,11 @@ namespace Lucene.Net.Analysis.Util
return default(V);
}
- public override V Get(ICharSequence cs)
+ public override V Get(ICharSequence text)
{
- if (cs == null)
+ if (text == null)
{
- throw new ArgumentNullException("cs");
+ throw new ArgumentNullException("text");
}
return default(V);
}
@@ -2461,183 +2460,197 @@ namespace Lucene.Net.Analysis.Util
#region PutAll
/// <summary>
- /// This implementation enumerates over the specified <see cref="IDictionary{bool,TValue}"/>'s
- /// entries, and calls this map's <see cref="Put(string, TValue)"/> operation once for each entry.
+ /// This implementation enumerates over the specified <paramref name="dictionary"/>'s
+ /// entries, and calls this map's <see cref="CharArrayMap{TValue}.Put(string, TValue)"/> operation once for each entry.
/// </summary>
- /// <param name="collection">A dictionary of values to add/update in the current map.</param>
- public static void PutAll<TValue>(this CharArrayMap<TValue> map, IDictionary<bool, TValue> collection)
+ /// <param name="map">this map</param>
+ /// <param name="dictionary">A dictionary of values to add/update in the current map.</param>
+ public static void PutAll<TValue>(this CharArrayMap<TValue> map, IDictionary<bool, TValue> dictionary)
{
- foreach (var kvp in collection)
+ foreach (var kvp in dictionary)
{
map.Put(kvp.Key.ToString(), kvp.Value);
}
}
/// <summary>
- /// This implementation enumerates over the specified <see cref="IDictionary{byte,TValue}"/>'s
- /// entries, and calls this map's <see cref="Put(string, TValue)"/> operation once for each entry.
+ /// This implementation enumerates over the specified <paramref name="dictionary"/>'s
+ /// entries, and calls this map's <see cref="CharArrayMap{TValue}.Put(string, TValue)"/> operation once for each entry.
/// </summary>
- /// <param name="collection">A dictionary of values to add/update in the current map.</param>
- public static void PutAll<TValue>(this CharArrayMap<TValue> map, IDictionary<byte, TValue> collection)
+ /// <param name="map">this map</param>
+ /// <param name="dictionary">A dictionary of values to add/update in the current map.</param>
+ public static void PutAll<TValue>(this CharArrayMap<TValue> map, IDictionary<byte, TValue> dictionary)
{
- foreach (var kvp in collection)
+ foreach (var kvp in dictionary)
{
map.Put(kvp.Key.ToString(CultureInfo.InvariantCulture), kvp.Value);
}
}
/// <summary>
- /// This implementation enumerates over the specified <see cref="IDictionary{char,TValue}"/>'s
- /// entries, and calls this map's <see cref="Put(string, TValue)"/> operation once for each entry.
+ /// This implementation enumerates over the specified <paramref name="dictionary"/>'s
+ /// entries, and calls this map's <see cref="CharArrayMap{TValue}.Put(string, TValue)"/> operation once for each entry.
/// </summary>
- /// <param name="collection">A dictionary of values to add/update in the current map.</param>
- public static void PutAll<TValue>(this CharArrayMap<TValue> map, IDictionary<char, TValue> collection)
+ /// <param name="map">this map</param>
+ /// <param name="dictionary">A dictionary of values to add/update in the current map.</param>
+ public static void PutAll<TValue>(this CharArrayMap<TValue> map, IDictionary<char, TValue> dictionary)
{
- foreach (var kvp in collection)
+ foreach (var kvp in dictionary)
{
map.Put("" + kvp.Key, kvp.Value);
}
}
///// <summary>
- ///// This implementation enumerates over the specified <see cref="IDictionary{Decimal,TValue}"/>'s
- ///// entries, and calls this map's <see cref="Put(string, TValue)"/> operation once for each entry.
+ ///// This implementation enumerates over the specified <paramref name="dictionary"/>'s
+ ///// entries, and calls this map's <see cref="CharArrayMap{TValue}.Put(string, TValue)"/> operation once for each entry.
///// </summary>
- ///// <param name="collection">A dictionary of values to add/update in the current map.</param>
- //public static void PutAll<TValue>(this CharArrayMap<TValue> map, IDictionary<decimal, TValue> collection)
+ ///// <param name="map">this map</param>
+ ///// <param name="dictionary">A dictionary of values to add/update in the current map.</param>
+ //public static void PutAll<TValue>(this CharArrayMap<TValue> map, IDictionary<decimal, TValue> dictionary)
//{
- // foreach (var kvp in collection)
+ // foreach (var kvp in dictionary)
// {
// map.Put(kvp.Key.ToString(CultureInfo.InvariantCulture), kvp.Value);
// }
//}
///// <summary>
- ///// This implementation enumerates over the specified <see cref="IDictionary{double,TValue}"/>'s
- ///// entries, and calls this map's <see cref="Put(string, TValue)"/> operation once for each entry.
+ ///// This implementation enumerates over the specified <paramref name="dictionary"/>'s
+ ///// entries, and calls this map's <see cref="CharArrayMap{TValue}.Put(string, TValue)"/> operation once for each entry.
///// </summary>
- ///// <param name="collection">A dictionary of values to add/update in the current map.</param>
- //public static void PutAll<TValue>(this CharArrayMap<TValue> map, IDictionary<double, TValue> collection)
+ ///// <param name="map">this map</param>
+ ///// <param name="dictionary">A dictionary of values to add/update in the current map.</param>
+ //public static void PutAll<TValue>(this CharArrayMap<TValue> map, IDictionary<double, TValue> dictionary)
//{
- // foreach (var kvp in collection)
+ // foreach (var kvp in dictionary)
// {
// map.Put(kvp.Key.ToString(CultureInfo.InvariantCulture), kvp.Value);
// }
//}
///// <summary>
- ///// This implementation enumerates over the specified <see cref="IDictionary{float,TValue}"/>'s
- ///// entries, and calls this map's <see cref="Put(string, TValue)"/> operation once for each entry.
+ ///// This implementation enumerates over the specified <paramref name="dictionary"/>'s
+ ///// entries, and calls this map's <see cref="CharArrayMap{TValue}.Put(string, TValue)"/> operation once for each entry.
///// </summary>
- ///// <param name="collection">A dictionary of values to add/update in the current map.</param>
- //public static void PutAll<TValue>(this CharArrayMap<TValue> map, IDictionary<float, TValue> collection)
+ ///// <param name="map">this map</param>
+ ///// <param name="dictionary">A dictionary of values to add/update in the current map.</param>
+ //public static void PutAll<TValue>(this CharArrayMap<TValue> map, IDictionary<float, TValue> dictionary)
//{
- // foreach (var kvp in collection)
+ // foreach (var kvp in dictionary)
// {
// map.Put(kvp.Key.ToString(CultureInfo.InvariantCulture), kvp.Value);
// }
//}
/// <summary>
- /// This implementation enumerates over the specified <see cref="IDictionary{int,TValue}"/>'s
- /// entries, and calls this map's <see cref="Put(string, TValue)"/> operation once for each entry.
+ /// This implementation enumerates over the specified <paramref name="dictionary"/>'s
+ /// entries, and calls this map's <see cref="CharArrayMap{TValue}.Put(string, TValue)"/> operation once for each entry.
/// </summary>
- /// <param name="collection">A dictionary of values to add/update in the current map.</param>
- public static void PutAll<TValue>(this CharArrayMap<TValue> map, IDictionary<int, TValue> collection)
+ /// <param name="map">this map</param>
+ /// <param name="dictionary">A dictionary of values to add/update in the current map.</param>
+ public static void PutAll<TValue>(this CharArrayMap<TValue> map, IDictionary<int, TValue> dictionary)
{
- foreach (var kvp in collection)
+ foreach (var kvp in dictionary)
{
map.Put(kvp.Key.ToString(CultureInfo.InvariantCulture), kvp.Value);
}
}
/// <summary>
- /// This implementation enumerates over the specified <see cref="IDictionary{long,TValue}"/>'s
- /// entries, and calls this map's <see cref="Put(string, TValue)"/> operation once for each entry.
+ /// This implementation enumerates over the specified <paramref name="dictionary"/>'s
+ /// entries, and calls this map's <see cref="CharArrayMap{TValue}.Put(string, TValue)"/> operation once for each entry.
/// </summary>
- /// <param name="collection">A dictionary of values to add/update in the current map.</param>
- public static void PutAll<TValue>(this CharArrayMap<TValue> map, IDictionary<long, TValue> collection)
+ /// <param name="map">this map</param>
+ /// <param name="dictionary">A dictionary of values to add/update in the current map.</param>
+ public static void PutAll<TValue>(this CharArrayMap<TValue> map, IDictionary<long, TValue> dictionary)
{
- foreach (var kvp in collection)
+ foreach (var kvp in dictionary)
{
map.Put(kvp.Key.ToString(CultureInfo.InvariantCulture), kvp.Value);
}
}
/// <summary>
- /// This implementation enumerates over the specified <see cref="IDictionary{sbyte,TValue}"/>'s
- /// entries, and calls this map's <see cref="Put(string, TValue)"/> operation once for each entry.
+ /// This implementation enumerates over the specified <paramref name="dictionary"/>'s
+ /// entries, and calls this map's <see cref="CharArrayMap{TValue}.Put(string, TValue)"/> operation once for each entry.
/// </summary>
- /// <param name="collection">A dictionary of values to add/update in the current map.</param>
+ /// <param name="map">this map</param>
+ /// <param name="dictionary">A dictionary of values to add/update in the current map.</param>
[CLSCompliant(false)]
- public static void PutAll<TValue>(this CharArrayMap<TValue> map, IDictionary<sbyte, TValue> collection)
+ public static void PutAll<TValue>(this CharArrayMap<TValue> map, IDictionary<sbyte, TValue> dictionary)
{
- foreach (var kvp in collection)
+ foreach (var kvp in dictionary)
{
map.Put(kvp.Key.ToString(CultureInfo.InvariantCulture), kvp.Value);
}
}
/// <summary>
- /// This implementation enumerates over the specified <see cref="IDictionary{short,TValue}"/>'s
- /// entries, and calls this map's <see cref="Put(string, TValue)"/> operation once for each entry.
+ /// This implementation enumerates over the specified <paramref name="dictionary"/>'s
+ /// entries, and calls this map's <see cref="CharArrayMap{TValue}.Put(string, TValue)"/> operation once for each entry.
/// </summary>
- /// <param name="collection">A dictionary of values to add/update in the current map.</param>
- public static void PutAll<TValue>(this CharArrayMap<TValue> map, IDictionary<short, TValue> collection)
+ /// <param name="map">this map</param>
+ /// <param name="dictionary">A dictionary of values to add/update in the current map.</param>
+ public static void PutAll<TValue>(this CharArrayMap<TValue> map, IDictionary<short, TValue> dictionary)
{
- foreach (var kvp in collection)
+ foreach (var kvp in dictionary)
{
map.Put(kvp.Key.ToString(CultureInfo.InvariantCulture), kvp.Value);
}
}
/// <summary>
- /// This implementation enumerates over the specified <see cref="IDictionary{uint,TValue}"/>'s
- /// entries, and calls this map's <see cref="Put(string, TValue)"/> operation once for each entry.
+ /// This implementation enumerates over the specified <paramref name="dictionary"/>'s
+ /// entries, and calls this map's <see cref="CharArrayMap{TValue}.Put(string, TValue)"/> operation once for each entry.
/// </summary>
- /// <param name="collection">A dictionary of values to add/update in the current map.</param>
+ /// <param name="map">this map</param>
+ /// <param name="dictionary">A dictionary of values to add/update in the current map.</param>
[CLSCompliant(false)]
- public static void PutAll<TValue>(this CharArrayMap<TValue> map, IDictionary<uint, TValue> collection)
+ public static void PutAll<TValue>(this CharArrayMap<TValue> map, IDictionary<uint, TValue> dictionary)
{
- foreach (var kvp in collection)
+ foreach (var kvp in dictionary)
{
map.Put(kvp.Key.ToString(CultureInfo.InvariantCulture), kvp.Value);
}
}
/// <summary>
- /// This implementation enumerates over the specified <see cref="IDictionary{ulong,TValue}"/>'s
- /// entries, and calls this map's <see cref="Put(string, TValue)"/> operation once for each entry.
+ /// This implementation enumerates over the specified <paramref name="dictionary"/>'s
+ /// entries, and calls this map's <see cref="CharArrayMap{TValue}.Put(string, TValue)"/> operation once for each entry.
/// </summary>
- /// <param name="collection">A dictionary of values to add/update in the current map.</param>
+ /// <param name="map">this map</param>
+ /// <param name="dictionary">A dictionary of values to add/update in the current map.</param>
[CLSCompliant(false)]
- public static void PutAll<TValue>(this CharArrayMap<TValue> map, IDictionary<ulong, TValue> collection)
+ public static void PutAll<TValue>(this CharArrayMap<TValue> map, IDictionary<ulong, TValue> dictionary)
{
- foreach (var kvp in collection)
+ foreach (var kvp in dictionary)
{
map.Put(kvp.Key.ToString(CultureInfo.InvariantCulture), kvp.Value);
}
}
/// <summary>
- /// This implementation enumerates over the specified <see cref="IDictionary{ushort,TValue}"/>'s
- /// entries, and calls this map's <see cref="Put(string, TValue)"/> operation once for each entry.
+ /// This implementation enumerates over the specified <paramref name="dictionary"/>'s
+ /// entries, and calls this map's <see cref="CharArrayMap{TValue}.Put(string, TValue)"/> operation once for each entry.
/// </summary>
- /// <param name="collection">A dictionary of values to add/update in the current map.</param>
+ /// <param name="map">this map</param>
+ /// <param name="dictionary">A dictionary of values to add/update in the current map.</param>
[CLSCompliant(false)]
- public static void PutAll<TValue>(this CharArrayMap<TValue> map, IDictionary<ushort, TValue> collection)
+ public static void PutAll<TValue>(this CharArrayMap<TValue> map, IDictionary<ushort, TValue> dictionary)
{
- foreach (var kvp in collection)
+ foreach (var kvp in dictionary)
{
map.Put(kvp.Key.ToString(CultureInfo.InvariantCulture), kvp.Value);
}
}
-
+
/// <summary>
- /// This implementation enumerates over the specified <see cref="IEnumerable{KeyValuePair{bool,TValue}}"/>'s
- /// entries, and calls this map's <see cref="Put(string, TValue)"/> operation once for each entry.
+ /// This implementation enumerates over the specified <paramref name="collection"/>'s
+ /// entries, and calls this map's <see cref="CharArrayMap{TValue}.Put(string, TValue)"/> operation once for each entry.
/// </summary>
+ /// <param name="map">this map</param>
/// <param name="collection">The values to add/update in the current map.</param>
public static void PutAll<TValue>(this CharArrayMap<TValue> map, IEnumerable<KeyValuePair<bool, TValue>> collection)
{
@@ -2648,9 +2661,10 @@ namespace Lucene.Net.Analysis.Util
}
/// <summary>
- /// This implementation enumerates over the specified <see cref="IEnumerable{KeyValuePair{byte,TValue}}"/>'s
- /// entries, and calls this map's <see cref="Put(string, TValue)"/> operation once for each entry.
+ /// This implementation enumerates over the specified <paramref name="collection"/>'s
+ /// entries, and calls this map's <see cref="CharArrayMap{TValue}.Put(string, TValue)"/> operation once for each entry.
/// </summary>
+ /// <param name="map">this map</param>
/// <param name="collection">The values to add/update in the current map.</param>
public static void PutAll<TValue>(this CharArrayMap<TValue> map, IEnumerable<KeyValuePair<byte, TValue>> collection)
{
@@ -2661,9 +2675,10 @@ namespace Lucene.Net.Analysis.Util
}
/// <summary>
- /// This implementation enumerates over the specified <see cref="IEnumerable{KeyValuePair{char,TValue}}"/>'s
- /// entries, and calls this map's <see cref="Put(string, TValue)"/> operation once for each entry.
+ /// This implementation enumerates over the specified <paramref name="collection"/>'s
+ /// entries, and calls this map's <see cref="CharArrayMap{TValue}.Put(string, TValue)"/> operation once for each entry.
/// </summary>
+ /// <param name="map">this map</param>
/// <param name="collection">The values to add/update in the current map.</param>
public static void PutAll<TValue>(this CharArrayMap<TValue> map, IEnumerable<KeyValuePair<char, TValue>> collection)
{
@@ -2674,9 +2689,10 @@ namespace Lucene.Net.Analysis.Util
}
///// <summary>
- ///// This implementation enumerates over the specified <see cref="IEnumerable{KeyValuePair{Decimal,TValue}}"/>'s
- ///// entries, and calls this map's <see cref="Put(string, TValue)"/> operation once for each entry.
+ ///// This implementation enumerates over the specified <paramref name="collection"/>'s
+ ///// entries, and calls this map's <see cref="CharArrayMap{TValue}.Put(string, TValue)"/> operation once for each entry.
///// </summary>
+ ///// <param name="map">this map</param>
///// <param name="collection">The values to add/update in the current map.</param>
//public static void PutAll<TValue>(this CharArrayMap<TValue> map, IEnumerable<KeyValuePair<decimal, TValue>> collection)
//{
@@ -2687,9 +2703,10 @@ namespace Lucene.Net.Analysis.Util
//}
///// <summary>
- ///// This implementation enumerates over the specified <see cref="IEnumerable{KeyValuePair{double,TValue}}"/>'s
- ///// entries, and calls this map's <see cref="Put(string, TValue)"/> operation once for each entry.
+ ///// This implementation enumerates over the specified <paramref name="collection"/>'s
+ ///// entries, and calls this map's <see cref="CharArrayMap{TValue}.Put(string, TValue)"/> operation once for each entry.
///// </summary>
+ ///// <param name="map">this map</param>
///// <param name="collection">The values to add/update in the current map.</param>
//public static void PutAll<TValue>(this CharArrayMap<TValue> map, IEnumerable<KeyValuePair<double, TValue>> collection)
//{
@@ -2700,9 +2717,10 @@ namespace Lucene.Net.Analysis.Util
//}
///// <summary>
- ///// This implementation enumerates over the specified <see cref="IEnumerable{KeyValuePair{float,TValue}}"/>'s
- ///// entries, and calls this map's <see cref="Put(string, TValue)"/> operation once for each entry.
+ ///// This implementation enumerates over the specified <paramref name="collection"/>'s
+ ///// entries, and calls this map's <see cref="CharArrayMap{TValue}.Put(string, TValue)"/> operation once for each entry.
///// </summary>
+ ///// <param name="map">this map</param>
///// <param name="collection">The values to add/update in the current map.</param>
//public static void PutAll<TValue>(this CharArrayMap<TValue> map, IEnumerable<KeyValuePair<float, TValue>> collection)
//{
@@ -2713,9 +2731,10 @@ namespace Lucene.Net.Analysis.Util
//}
/// <summary>
- /// This implementation enumerates over the specified <see cref="IEnumerable{KeyValuePair{int,TValue}}"/>'s
- /// entries, and calls this map's <see cref="Put(string, TValue)"/> operation once for each entry.
+ /// This implementation enumerates over the specified <paramref name="collection"/>'s
+ /// entries, and calls this map's <see cref="CharArrayMap{TValue}.Put(string, TValue)"/> operation once for each entry.
/// </summary>
+ /// <param name="map">this map</param>
/// <param name="collection">The values to add/update in the current map.</param>
public static void PutAll<TValue>(this CharArrayMap<TValue> map, IEnumerable<KeyValuePair<int, TValue>> collection)
{
@@ -2726,9 +2745,10 @@ namespace Lucene.Net.Analysis.Util
}
/// <summary>
- /// This implementation enumerates over the specified <see cref="IEnumerable{KeyValuePair{long,TValue}}"/>'s
- /// entries, and calls this map's <see cref="Put(string, TValue)"/> operation once for each entry.
+ /// This implementation enumerates over the specified <paramref name="collection"/>'s
+ /// entries, and calls this map's <see cref="CharArrayMap{TValue}.Put(string, TValue)"/> operation once for each entry.
/// </summary>
+ /// <param name="map">this map</param>
/// <param name="collection">The values to add/update in the current map.</param>
public static void PutAll<TValue>(this CharArrayMap<TValue> map, IEnumerable<KeyValuePair<long, TValue>> collection)
{
@@ -2739,9 +2759,10 @@ namespace Lucene.Net.Analysis.Util
}
/// <summary>
- /// This implementation enumerates over the specified <see cref="IEnumerable{KeyValuePair{sbyte,TValue}}"/>'s
- /// entries, and calls this map's <see cref="Put(string, TValue)"/> operation once for each entry.
+ /// This implementation enumerates over the specified <paramref name="collection"/>'s
+ /// entries, and calls this map's <see cref="CharArrayMap{TValue}.Put(string, TValue)"/> operation once for each entry.
/// </summary>
+ /// <param name="map">this map</param>
/// <param name="collection">The values to add/update in the current map.</param>
[CLSCompliant(false)]
public static void PutAll<TValue>(this CharArrayMap<TValue> map, IEnumerable<KeyValuePair<sbyte, TValue>> collection)
@@ -2753,9 +2774,10 @@ namespace Lucene.Net.Analysis.Util
}
/// <summary>
- /// This implementation enumerates over the specified <see cref="IEnumerable{KeyValuePair{short,TValue}}"/>'s
- /// entries, and calls this map's <see cref="Put(string, TValue)"/> operation once for each entry.
+ /// This implementation enumerates over the specified <paramref name="collection"/>'s
+ /// entries, and calls this map's <see cref="CharArrayMap{TValue}.Put(string, TValue)"/> operation once for each entry.
/// </summary>
+ /// <param name="map">this map</param>
/// <param name="collection">The values to add/update in the current map.</param>
public static void PutAll<TValue>(this CharArrayMap<TValue> map, IEnumerable<KeyValuePair<short, TValue>> collection)
{
@@ -2766,9 +2788,10 @@ namespace Lucene.Net.Analysis.Util
}
/// <summary>
- /// This implementation enumerates over the specified <see cref="IEnumerable{KeyValuePair{uint,TValue}}"/>'s
- /// entries, and calls this map's <see cref="Put(string, TValue)"/> operation once for each entry.
+ /// This implementation enumerates over the specified <paramref name="collection"/>'s
+ /// entries, and calls this map's <see cref="CharArrayMap{TValue}.Put(string, TValue)"/> operation once for each entry.
/// </summary>
+ /// <param name="map">this map</param>
/// <param name="collection">The values to add/update in the current map.</param>
[CLSCompliant(false)]
public static void PutAll<TValue>(this CharArrayMap<TValue> map, IEnumerable<KeyValuePair<uint, TValue>> collection)
@@ -2780,9 +2803,10 @@ namespace Lucene.Net.Analysis.Util
}
/// <summary>
- /// This implementation enumerates over the specified <see cref="IEnumerable{KeyValuePair{ulong,TValue}}"/>'s
- /// entries, and calls this map's <see cref="Put(string, TValue)"/> operation once for each entry.
+ /// This implementation enumerates over the specified <paramref name="collection"/>'s
+ /// entries, and calls this map's <see cref="CharArrayMap{TValue}.Put(string, TValue)"/> operation once for each entry.
/// </summary>
+ /// <param name="map">this map</param>
/// <param name="collection">The values to add/update in the current map.</param>
[CLSCompliant(false)]
public static void PutAll<TValue>(this CharArrayMap<TValue> map, IEnumerable<KeyValuePair<ulong, TValue>> collection)
@@ -2794,9 +2818,10 @@ namespace Lucene.Net.Analysis.Util
}
/// <summary>
- /// This implementation enumerates over the specified <see cref="IEnumerable{KeyValuePair{ushort,TValue}}"/>'s
- /// entries, and calls this map's <see cref="Put(string, TValue)"/> operation once for each entry.
+ /// This implementation enumerates over the specified <paramref name="collection"/>'s
+ /// entries, and calls this map's <see cref="CharArrayMap{TValue}.Put(string, TValue)"/> operation once for each entry.
/// </summary>
+ /// <param name="map">this map</param>
/// <param name="collection">The values to add/update in the current map.</param>
[CLSCompliant(false)]
public static void PutAll<TValue>(this CharArrayMap<TValue> map, IEnumerable<KeyValuePair<ushort, TValue>> collection)
@@ -2814,6 +2839,7 @@ namespace Lucene.Net.Analysis.Util
/// <summary>
/// Gets the value associated with the specified key.
/// </summary>
+ /// <param name="map">this map</param>
/// <param name="key">The key of the value to get.</param>
/// <param name="value">When this method returns, contains the value associated with the specified key,
/// if the key is found; otherwise, the default value for the type of the value parameter.
@@ -2827,6 +2853,7 @@ namespace Lucene.Net.Analysis.Util
/// <summary>
/// Gets the value associated with the specified key.
/// </summary>
+ /// <param name="map">this map</param>
/// <param name="key">The key of the value to get.</param>
/// <param name="value">When this method returns, contains the value associated with the specified key,
/// if the key is found; otherwise, the default value for the type of the value parameter.
@@ -2840,6 +2867,7 @@ namespace Lucene.Net.Analysis.Util
/// <summary>
/// Gets the value associated with the specified key.
/// </summary>
+ /// <param name="map">this map</param>
/// <param name="key">The key of the value to get.</param>
/// <param name="value">When this method returns, contains the value associated with the specified key,
/// if the key is found; otherwise, the default value for the type of the value parameter.
@@ -2853,6 +2881,7 @@ namespace Lucene.Net.Analysis.Util
///// <summary>
///// Gets the value associated with the specified key.
///// </summary>
+ ///// <param name="map">this map</param>
///// <param name="key">The key of the value to get.</param>
///// <param name="value">When this method returns, contains the value associated with the specified key,
///// if the key is found; otherwise, the default value for the type of the value parameter.
@@ -2866,6 +2895,7 @@ namespace Lucene.Net.Analysis.Util
///// <summary>
///// Gets the value associated with the specified key.
///// </summary>
+ ///// <param name="map">this map</param>
///// <param name="key">The key of the value to get.</param>
///// <param name="value">When this method returns, contains the value associated with the specified key,
///// if the key is found; otherwise, the default value for the type of the value parameter.
@@ -2879,6 +2909,7 @@ namespace Lucene.Net.Analysis.Util
///// <summary>
///// Gets the value associated with the specified key.
///// </summary>
+ ///// <param name="map">this map</param>
///// <param name="key">The key of the value to get.</param>
///// <param name="value">When this method returns, contains the value associated with the specified key,
///// if the key is found; otherwise, the default value for the type of the value parameter.
@@ -2892,6 +2923,7 @@ namespace Lucene.Net.Analysis.Util
/// <summary>
/// Gets the value associated with the specified key.
/// </summary>
+ /// <param name="map">this map</param>
/// <param name="key">The key of the value to get.</param>
/// <param name="value">When this method returns, contains the value associated with the specified key,
/// if the key is found; otherwise, the default value for the type of the value parameter.
@@ -2905,6 +2937,7 @@ namespace Lucene.Net.Analysis.Util
/// <summary>
/// Gets the value associated with the specified key.
/// </summary>
+ /// <param name="map">this map</param>
/// <param name="key">The key of the value to get.</param>
/// <param name="value">When this method returns, contains the value associated with the specified key,
/// if the key is found; otherwise, the default value for the type of the value parameter.
@@ -2918,6 +2951,7 @@ namespace Lucene.Net.Analysis.Util
/// <summary>
/// Gets the value associated with the specified key.
/// </summary>
+ /// <param name="map">this map</param>
/// <param name="key">The key of the value to get.</param>
/// <param name="value">When this method returns, contains the value associated with the specified key,
/// if the key is found; otherwise, the default value for the type of the value parameter.
@@ -2932,6 +2966,7 @@ namespace Lucene.Net.Analysis.Util
/// <summary>
/// Gets the value associated with the specified key.
/// </summary>
+ /// <param name="map">this map</param>
/// <param name="key">The key of the value to get.</param>
/// <param name="value">When this method returns, contains the value associated with the specified key,
/// if the key is found; otherwise, the default value for the type of the value parameter.
@@ -2945,6 +2980,7 @@ namespace Lucene.Net.Analysis.Util
/// <summary>
/// Gets the value associated with the specified key.
/// </summary>
+ /// <param name="map">this map</param>
/// <param name="key">The key of the value to get.</param>
/// <param name="value">When this method returns, contains the value associated with the specified key,
/// if the key is found; otherwise, the default value for the type of the value parameter.
@@ -2959,6 +2995,7 @@ namespace Lucene.Net.Analysis.Util
/// <summary>
/// Gets the value associated with the specified key.
/// </summary>
+ /// <param name="map">this map</param>
/// <param name="key">The key of the value to get.</param>
/// <param name="value">When this method returns, contains the value associated with the specified key,
/// if the key is found; otherwise, the default value for the type of the value parameter.
@@ -2973,6 +3010,7 @@ namespace Lucene.Net.Analysis.Util
/// <summary>
/// Gets the value associated with the specified key.
/// </summary>
+ /// <param name="map">this map</param>
/// <param name="key">The key of the value to get.</param>
/// <param name="value">When this method returns, contains the value associated with the specified key,
/// if the key is found; otherwise, the default value for the type of the value parameter.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b19aee50/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArraySet.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArraySet.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArraySet.cs
index 9ac42ba..97bab79 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArraySet.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArraySet.cs
@@ -70,8 +70,7 @@ namespace Lucene.Net.Analysis.Util
/// Create set with enough capacity to hold <paramref name="startSize"/> terms
/// </summary>
/// <param name="matchVersion">
- /// compatibility match version see <a href="#version">Version
- /// note</a> above for details. </param>
+ /// compatibility match version see <see cref="CharArraySet"/> for details. </param>
/// <param name="startSize">
/// the initial capacity </param>
/// <param name="ignoreCase">
@@ -86,8 +85,7 @@ namespace Lucene.Net.Analysis.Util
/// Creates a set from a collection of objects.
/// </summary>
/// <param name="matchVersion">
- /// compatibility match version see <a href="#version">Version
- /// note</a> above for details. </param>
+ /// compatibility match version see <see cref="CharArraySet"/> for details. </param>
/// <param name="c">
/// a collection whose elements to be placed into the set </param>
/// <param name="ignoreCase">
@@ -108,7 +106,7 @@ namespace Lucene.Net.Analysis.Util
}
/// <summary>
- /// Clears all entries in this set. This method is supported for reusing, but not <see cref="ICollection{Object}.Remove(string)"/>.
+ /// Clears all entries in this set. This method is supported for reusing, but not <see cref="M:ICollection{string}.Remove(string)"/>.
/// </summary>
public virtual void Clear()
{
@@ -173,7 +171,7 @@ namespace Lucene.Net.Analysis.Util
/// <summary>
/// Add this <see cref="ICharSequence"/> into the set
/// </summary>
- /// <returns><c>true</c> if <paramref name="o"/> was added to the set; <c>false</c> if it already existed prior to this call</returns>
+ /// <returns><c>true</c> if <paramref name="text"/> was added to the set; <c>false</c> if it already existed prior to this call</returns>
public virtual bool Add(ICharSequence text)
{
return map.Put(text);
@@ -182,7 +180,7 @@ namespace Lucene.Net.Analysis.Util
/// <summary>
/// Add this <see cref="string"/> into the set
/// </summary>
- /// <returns><c>true</c> if <paramref name="o"/> was added to the set; <c>false</c> if it already existed prior to this call</returns>
+ /// <returns><c>true</c> if <paramref name="text"/> was added to the set; <c>false</c> if it already existed prior to this call</returns>
public virtual bool Add(string text)
{
return map.Put(text);
@@ -193,7 +191,7 @@ namespace Lucene.Net.Analysis.Util
/// If <c>ignoreCase</c> is true for this <see cref="CharArraySet"/>, the text array will be directly modified.
/// The user should never modify this text array after calling this method.
/// </summary>
- /// <returns><c>true</c> if <paramref name="o"/> was added to the set; <c>false</c> if it already existed prior to this call</returns>
+ /// <returns><c>true</c> if <paramref name="text"/> was added to the set; <c>false</c> if it already existed prior to this call</returns>
public virtual bool Add(char[] text)
{
return map.Put(text);
@@ -252,14 +250,13 @@ namespace Lucene.Net.Analysis.Util
/// <para>
/// <b>Note:</b> If you intend to create a copy of another <see cref="CharArraySet"/> where
/// the <see cref="LuceneVersion"/> of the source set differs from its copy
- /// <see cref="CharArraySet(LuceneVersion, IEnumerable{string}, bool)"/> should be used instead.
- /// The <see cref="Copy{T}(LuceneVersion, IEnumerable{T})"/> will preserve the <see cref="LuceneVersion"/> of the
+ /// <see cref="CharArraySet.CharArraySet(LuceneVersion, ICollection{string}, bool)"/> should be used instead.
+ /// The <see cref="Copy{T}(LuceneVersion, ICollection{T})"/> will preserve the <see cref="LuceneVersion"/> of the
/// source set it is an instance of <see cref="CharArraySet"/>.
/// </para>
/// </summary>
/// <param name="matchVersion">
- /// compatibility match version see <a href="#version">Version
- /// note</a> above for details. This argument will be ignored if the
+ /// compatibility match version. This argument will be ignored if the
/// given set is a <see cref="CharArraySet"/>. </param>
/// <param name="set">
/// a set to copy </param>
@@ -529,7 +526,7 @@ namespace Lucene.Net.Analysis.Util
/// Determines whether a <see cref="CharArraySet"/> object is a subset of the specified collection.
/// </summary>
/// <param name="other">The collection to compare to the current <see cref="CharArraySet"/> object.</param>
- /// <returns><c>true</c> if the <see cref="CharArraySet"/> object is a subset of <paramref name="other"/>; otherwise, <c>false</c>.</returns>
+ /// <returns><c>true</c> if this <see cref="CharArraySet"/> object is a subset of <paramref name="other"/>; otherwise, <c>false</c>.</returns>
public virtual bool IsSubsetOf(IEnumerable<string> other)
{
if (other == null)
@@ -561,7 +558,7 @@ namespace Lucene.Net.Analysis.Util
/// Determines whether a <see cref="CharArraySet"/> object is a subset of the specified collection.
/// </summary>
/// <param name="other">The collection to compare to the current <see cref="CharArraySet"/> object.</param>
- /// <returns><c>true</c> if the <see cref="CharArraySet"/> object is a subset of <paramref name="other"/>; otherwise, <c>false</c>.</returns>
+ /// <returns><c>true</c> if this <see cref="CharArraySet"/> object is a subset of <paramref name="other"/>; otherwise, <c>false</c>.</returns>
public virtual bool IsSubsetOf<T>(IEnumerable<T> other)
{
if (other == null)
@@ -584,7 +581,7 @@ namespace Lucene.Net.Analysis.Util
/// Determines whether a <see cref="CharArraySet"/> object is a superset of the specified collection.
/// </summary>
/// <param name="other">The collection to compare to the current <see cref="CharArraySet"/> object.</param>
- /// <returns><c>true</c> if the HashSet<T> object is a superset of <paramref name="other"/>; otherwise, <c>false</c>.</returns>
+ /// <returns><c>true</c> if this <see cref="CharArraySet"/> object is a superset of <paramref name="other"/>; otherwise, <c>false</c>.</returns>
public virtual bool IsSupersetOf(IEnumerable<string> other)
{
if (other == null)
@@ -611,7 +608,7 @@ namespace Lucene.Net.Analysis.Util
/// Determines whether a <see cref="CharArraySet"/> object is a superset of the specified collection.
/// </summary>
/// <param name="other">The collection to compare to the current <see cref="CharArraySet"/> object.</param>
- /// <returns><c>true</c> if the HashSet<T> object is a superset of <paramref name="other"/>; otherwise, <c>false</c>.</returns>
+ /// <returns><c>true</c> if this <see cref="CharArraySet"/> object is a superset of <paramref name="other"/>; otherwise, <c>false</c>.</returns>
public virtual bool IsSupersetOf<T>(IEnumerable<T> other)
{
if (other == null)
@@ -630,7 +627,7 @@ namespace Lucene.Net.Analysis.Util
/// Determines whether a <see cref="CharArraySet"/> object is a proper subset of the specified collection.
/// </summary>
/// <param name="other">The collection to compare to the current <see cref="CharArraySet"/> object.</param>
- /// <returns><c>true</c> if the <see cref="CharArraySet"/> object is a proper subset of <paramref name="other"/>; otherwise, <c>false</c>.</returns>
+ /// <returns><c>true</c> if this <see cref="CharArraySet"/> object is a proper subset of <paramref name="other"/>; otherwise, <c>false</c>.</returns>
public virtual bool IsProperSubsetOf(IEnumerable<string> other)
{
if (other == null)
@@ -666,7 +663,7 @@ namespace Lucene.Net.Analysis.Util
/// Determines whether a <see cref="CharArraySet"/> object is a proper subset of the specified collection.
/// </summary>
/// <param name="other">The collection to compare to the current <see cref="CharArraySet"/> object.</param>
- /// <returns><c>true</c> if the <see cref="CharArraySet"/> object is a proper subset of <paramref name="other"/>; otherwise, <c>false</c>.</returns>
+ /// <returns><c>true</c> if this <see cref="CharArraySet"/> object is a proper subset of <paramref name="other"/>; otherwise, <c>false</c>.</returns>
public virtual bool IsProperSubsetOf<T>(IEnumerable<T> other)
{
if (other == null)
@@ -690,7 +687,7 @@ namespace Lucene.Net.Analysis.Util
/// Determines whether a <see cref="CharArraySet"/> object is a proper superset of the specified collection.
/// </summary>
/// <param name="other">The collection to compare to the current <see cref="CharArraySet"/> object.</param>
- /// <returns><c>true</c> if the HashSet<T> object is a proper superset of <paramref name="other"/>; otherwise, <c>false</c>.</returns>
+ /// <returns><c>true</c> if this <see cref="CharArraySet"/> object is a proper superset of <paramref name="other"/>; otherwise, <c>false</c>.</returns>
public virtual bool IsProperSupersetOf(IEnumerable<string> other)
{
if (other == null)
@@ -727,7 +724,7 @@ namespace Lucene.Net.Analysis.Util
/// Determines whether a <see cref="CharArraySet"/> object is a proper superset of the specified collection.
/// </summary>
/// <param name="other">The collection to compare to the current <see cref="CharArraySet"/> object.</param>
- /// <returns><c>true</c> if the HashSet<T> object is a proper superset of <paramref name="other"/>; otherwise, <c>false</c>.</returns>
+ /// <returns><c>true</c> if this <see cref="CharArraySet"/> object is a proper superset of <paramref name="other"/>; otherwise, <c>false</c>.</returns>
public virtual bool IsProperSupersetOf<T>(IEnumerable<T> other)
{
if (other == null)
@@ -752,7 +749,7 @@ namespace Lucene.Net.Analysis.Util
/// Determines whether the current <see cref="CharArraySet"/> object and a specified collection share common elements.
/// </summary>
/// <param name="other">The collection to compare to the current <see cref="CharArraySet"/> object.</param>
- /// <returns><c>true</c> if the HashSet<T> object and <paramref name="other"/> share at least one common element; otherwise, <c>false</c>.</returns>
+ /// <returns><c>true</c> if the <see cref="CharArraySet"/> object and <paramref name="other"/> share at least one common element; otherwise, <c>false</c>.</returns>
public virtual bool Overlaps(IEnumerable<string> other)
{
if (other == null)
@@ -776,7 +773,7 @@ namespace Lucene.Net.Analysis.Util
/// Determines whether the current <see cref="CharArraySet"/> object and a specified collection share common elements.
/// </summary>
/// <param name="other">The collection to compare to the current <see cref="CharArraySet"/> object.</param>
- /// <returns><c>true</c> if the HashSet<T> object and <paramref name="other"/> share at least one common element; otherwise, <c>false</c>.</returns>
+ /// <returns><c>true</c> if this <see cref="CharArraySet"/> object and <paramref name="other"/> share at least one common element; otherwise, <c>false</c>.</returns>
public virtual bool Overlaps<T>(IEnumerable<T> other)
{
if (other == null)
@@ -801,7 +798,7 @@ namespace Lucene.Net.Analysis.Util
/// in the specified collection.
/// </summary>
/// <param name="other">collection to be checked for containment in this collection</param>
- /// <returns><c>true</c> if this collection contains all of the elements in the specified collection; otherwise, <c>false</c>.</returns>
+ /// <returns><c>true</c> if this <see cref="CharArraySet"/> contains all of the elements in the specified collection; otherwise, <c>false</c>.</returns>
public virtual bool ContainsAll(IEnumerable<string> other)
{
foreach (var local in other)
@@ -819,7 +816,7 @@ namespace Lucene.Net.Analysis.Util
/// in the specified collection.
/// </summary>
/// <param name="other">collection to be checked for containment in this collection</param>
- /// <returns><c>true</c> if this collection contains all of the elements in the specified collection; otherwise, <c>false</c>.</returns>
+ /// <returns><c>true</c> if this <see cref="CharArraySet"/> contains all of the elements in the specified collection; otherwise, <c>false</c>.</returns>
public virtual bool ContainsAll<T>(IEnumerable<T> other)
{
foreach (var local in other)
@@ -874,7 +871,7 @@ namespace Lucene.Net.Analysis.Util
/// <summary>
/// Add this <see cref="bool"/> into the set
/// </summary>
- /// <returns><c>true</c> if <paramref name="o"/> was added to the set; <c>false</c> if it already existed prior to this call</returns>
+ /// <returns><c>true</c> if <paramref name="text"/> was added to the set; <c>false</c> if it already existed prior to this call</returns>
public static bool Add(this CharArraySet set, bool text)
{
return set.map.Put(text.ToString());
@@ -883,7 +880,7 @@ namespace Lucene.Net.Analysis.Util
/// <summary>
/// Add this <see cref="byte"/> into the set
/// </summary>
- /// <returns><c>true</c> if <paramref name="o"/> was added to the set; <c>false</c> if it already existed prior to this call</returns>
+ /// <returns><c>true</c> if <paramref name="text"/> was added to the set; <c>false</c> if it already existed prior to this call</returns>
public static bool Add(this CharArraySet set, byte text)
{
return set.map.Put(text.ToString(CultureInfo.InvariantCulture));
@@ -892,7 +889,7 @@ namespace Lucene.Net.Analysis.Util
/// <summary>
/// Add this <see cref="char"/> into the set
/// </summary>
- /// <returns><c>true</c> if <paramref name="o"/> was added to the set; <c>false</c> if it already existed prior to this call</returns>
+ /// <returns><c>true</c> if <paramref name="text"/> was added to the set; <c>false</c> if it already existed prior to this call</returns>
public static bool Add(this CharArraySet set, char text)
{
return set.map.Put("" + text);
@@ -901,7 +898,7 @@ namespace Lucene.Net.Analysis.Util
///// <summary>
///// Add this <see cref="decimal"/> into the set
///// </summary>
- ///// <returns><c>true</c> if <paramref name="o"/> was added to the set; <c>false</c> if it already existed prior to this call</returns>
+ ///// <returns><c>true</c> if <paramref name="text"/> was added to the set; <c>false</c> if it already existed prior to this call</returns>
//public static bool Add(this CharArraySet set, decimal text)
//{
// return set.map.Put(text.ToString(CultureInfo.InvariantCulture));
@@ -910,7 +907,7 @@ namespace Lucene.Net.Analysis.Util
///// <summary>
///// Add this <see cref="double"/> into the set
///// </summary>
- ///// <returns><c>true</c> if <paramref name="o"/> was added to the set; <c>false</c> if it already existed prior to this call</returns>
+ ///// <returns><c>true</c> if <paramref name="text"/> was added to the set; <c>false</c> if it already existed prior to this call</returns>
//public static bool Add(this CharArraySet set, double text)
//{
// return set.map.Put(text.ToString(CultureInfo.InvariantCulture));
@@ -919,7 +916,7 @@ namespace Lucene.Net.Analysis.Util
///// <summary>
///// Add this <see cref="float"/> into the set
///// </summary>
- ///// <returns><c>true</c> if <paramref name="o"/> was added to the set; <c>false</c> if it already existed prior to this call</returns>
+ ///// <returns><c>true</c> if <paramref name="text"/> was added to the set; <c>false</c> if it already existed prior to this call</returns>
//public static bool Add(this CharArraySet set, float text)
//{
// return set.map.Put(text.ToString(CultureInfo.InvariantCulture));
@@ -928,7 +925,7 @@ namespace Lucene.Net.Analysis.Util
/// <summary>
/// Add this <see cref="int"/> into the set
/// </summary>
- /// <returns><c>true</c> if <paramref name="o"/> was added to the set; <c>false</c> if it already existed prior to this call</returns>
+ /// <returns><c>true</c> if <paramref name="text"/> was added to the set; <c>false</c> if it already existed prior to this call</returns>
public static bool Add(this CharArraySet set, int text)
{
return set.map.Put(text.ToString(CultureInfo.InvariantCulture));
@@ -937,7 +934,7 @@ namespace Lucene.Net.Analysis.Util
/// <summary>
/// Add this <see cref="long"/> into the set
/// </summary>
- /// <returns><c>true</c> if <paramref name="o"/> was added to the set; <c>false</c> if it already existed prior to this call</returns>
+ /// <returns><c>true</c> if <paramref name="text"/> was added to the set; <c>false</c> if it already existed prior to this call</returns>
public static bool Add(this CharArraySet set, long text)
{
return set.map.Put(text.ToString(CultureInfo.InvariantCulture));
@@ -946,7 +943,7 @@ namespace Lucene.Net.Analysis.Util
/// <summary>
/// Add this <see cref="sbyte"/> into the set
/// </summary>
- /// <returns><c>true</c> if <paramref name="o"/> was added to the set; <c>false</c> if it already existed prior to this call</returns>
+ /// <returns><c>true</c> if <paramref name="text"/> was added to the set; <c>false</c> if it already existed prior to this call</returns>
[CLSCompliant(false)]
public static bool Add(this CharArraySet set, sbyte text)
{
@@ -956,7 +953,7 @@ namespace Lucene.Net.Analysis.Util
/// <summary>
/// Add this <see cref="short"/> into the set
/// </summary>
- /// <returns><c>true</c> if <paramref name="o"/> was added to the set; <c>false</c> if it already existed prior to this call</returns>
+ /// <returns><c>true</c> if <paramref name="text"/> was added to the set; <c>false</c> if it already existed prior to this call</returns>
public static bool Add(this CharArraySet set, short text)
{
return set.map.Put(text.ToString(CultureInfo.InvariantCulture));
@@ -965,7 +962,7 @@ namespace Lucene.Net.Analysis.Util
/// <summary>
/// Add this <see cref="uint"/> into the set
/// </summary>
- /// <returns><c>true</c> if <paramref name="o"/> was added to the set; <c>false</c> if it already existed prior to this call</returns>
+ /// <returns><c>true</c> if <paramref name="text"/> was added to the set; <c>false</c> if it already existed prior to this call</returns>
[CLSCompliant(false)]
public static bool Add(this CharArraySet set, uint text)
{
@@ -975,7 +972,7 @@ namespace Lucene.Net.Analysis.Util
/// <summary>
/// Add this <see cref="ulong"/> into the set
/// </summary>
- /// <returns><c>true</c> if <paramref name="o"/> was added to the set; <c>false</c> if it already existed prior to this call</returns>
+ /// <returns><c>true</c> if <paramref name="text"/> was added to the set; <c>false</c> if it already existed prior to this call</returns>
[CLSCompliant(false)]
public static bool Add(this CharArraySet set, ulong text)
{
@@ -985,7 +982,7 @@ namespace Lucene.Net.Analysis.Util
/// <summary>
/// Add this <see cref="ushort"/> into the set
/// </summary>
- /// <returns><c>true</c> if <paramref name="o"/> was added to the set; <c>false</c> if it already existed prior to this call</returns>
+ /// <returns><c>true</c> if <paramref name="text"/> was added to the set; <c>false</c> if it already existed prior to this call</returns>
[CLSCompliant(false)]
public static bool Add(this CharArraySet set, ushort text)
{
@@ -1112,6 +1109,7 @@ namespace Lucene.Net.Analysis.Util
/// Modifies the current <see cref="CharArraySet"/> to contain all elements that are present
/// in itself, the specified collection, or both.
/// </summary>
+ /// <param name="set">this <see cref="CharArraySet"/></param>
/// <param name="other">The collection whose elements should be merged into the <see cref="CharArraySet"/>.</param>
/// <returns><c>true</c> if this <see cref="CharArraySet"/> changed as a result of the call</returns>
public static bool UnionWith(this CharArraySet set, IEnumerable<byte> other)
@@ -1139,6 +1137,7 @@ namespace Lucene.Net.Analysis.Util
/// Modifies the current <see cref="CharArraySet"/> to contain all elements that are present
/// in itself, the specified collection, or both.
/// </summary>
+ /// <param name="set">this <see cref="CharArraySet"/></param>
/// <param name="other">The collection whose elements should be merged into the <see cref="CharArraySet"/>.</param>
/// <returns><c>true</c> if this <see cref="CharArraySet"/> changed as a result of the call</returns>
public static bool UnionWith(this CharArraySet set, IEnumerable<char> other)
@@ -1166,6 +1165,7 @@ namespace Lucene.Net.Analysis.Util
///// Modifies the current <see cref="CharArraySet"/> to contain all elements that are present
///// in itself, the specified collection, or both.
///// </summary>
+ ///// <param name="set">this <see cref="CharArraySet"/></param>
///// <param name="other">The collection whose elements should be merged into the <see cref="CharArraySet"/>.</param>
///// <returns><c>true</c> if this <see cref="CharArraySet"/> changed as a result of the call</returns>
//public static bool UnionWith(this CharArraySet set, IEnumerable<decimal> other)
@@ -1193,6 +1193,7 @@ namespace Lucene.Net.Analysis.Util
///// Modifies the current <see cref="CharArraySet"/> to contain all elements that are present
///// in itself, the specified collection, or both.
///// </summary>
+ ///// <param name="set">this <see cref="CharArraySet"/></param>
///// <param name="other">The collection whose elements should be merged into the <see cref="CharArraySet"/>.</param>
///// <returns><c>true</c> if this <see cref="CharArraySet"/> changed as a result of the call</returns>
//public static bool UnionWith(this CharArraySet set, IEnumerable<double> other)
@@ -1220,6 +1221,7 @@ namespace Lucene.Net.Analysis.Util
///// Modifies the current <see cref="CharArraySet"/> to contain all elements that are present
///// in itself, the specified collection, or both.
///// </summary>
+ ///// <param name="set">this <see cref="CharArraySet"/></param>
///// <param name="other">The collection whose elements should be merged into the <see cref="CharArraySet"/>.</param>
///// <returns><c>true</c> if this <see cref="CharArraySet"/> changed as a result of the call</returns>
//public static bool UnionWith(this CharArraySet set, IEnumerable<float> other)
@@ -1247,6 +1249,7 @@ namespace Lucene.Net.Analysis.Util
/// Modifies the current <see cref="CharArraySet"/> to contain all elements that are present
/// in itself, the specified collection, or both.
/// </summary>
+ /// <param name="set">this <see cref="CharArraySet"/></param>
/// <param name="other">The collection whose elements should be merged into the <see cref="CharArraySet"/>.</param>
/// <returns><c>true</c> if this <see cref="CharArraySet"/> changed as a result of the call</returns>
public static bool UnionWith(this CharArraySet set, IEnumerable<int> other)
@@ -1274,6 +1277,7 @@ namespace Lucene.Net.Analysis.Util
/// Modifies the current <see cref="CharArraySet"/> to contain all elements that are present
/// in itself, the specified collection, or both.
/// </summary>
+ /// <param name="set">this <see cref="CharArraySet"/></param>
/// <param name="other">The collection whose elements should be merged into the <see cref="CharArraySet"/>.</param>
/// <returns><c>true</c> if this <see cref="CharArraySet"/> changed as a result of the call</returns>
public static bool UnionWith(this CharArraySet set, IEnumerable<long> other)
@@ -1301,6 +1305,7 @@ namespace Lucene.Net.Analysis.Util
/// Modifies the current <see cref="CharArraySet"/> to contain all elements that are present
/// in itself, the specified collection, or both.
/// </summary>
+ /// <param name="set">this <see cref="CharArraySet"/></param>
/// <param name="other">The collection whose elements should be merged into the <see cref="CharArraySet"/>.</param>
/// <returns><c>true</c> if this <see cref="CharArraySet"/> changed as a result of the call</returns>
[CLSCompliant(false)]
@@ -1329,6 +1334,7 @@ namespace Lucene.Net.Analysis.Util
/// Modifies the current <see cref="CharArraySet"/> to contain all elements that are present
/// in itself, the specified collection, or both.
/// </summary>
+ /// <param name="set">this <see cref="CharArraySet"/></param>
/// <param name="other">The collection whose elements should be merged into the <see cref="CharArraySet"/>.</param>
/// <returns><c>true</c> if this <see cref="CharArraySet"/> changed as a result of the call</returns>
public static bool UnionWith(this CharArraySet set, IEnumerable<short> other)
@@ -1356,6 +1362,7 @@ namespace Lucene.Net.Analysis.Util
/// Modifies the current <see cref="CharArraySet"/> to contain all elements that are present
/// in itself, the specified collection, or both.
/// </summary>
+ /// <param name="set">this <see cref="CharArraySet"/></param>
/// <param name="other">The collection whose elements should be merged into the <see cref="CharArraySet"/>.</param>
/// <returns><c>true</c> if this <see cref="CharArraySet"/> changed as a result of the call</returns>
[CLSCompliant(false)]
@@ -1384,6 +1391,7 @@ namespace Lucene.Net.Analysis.Util
/// Modifies the current <see cref="CharArraySet"/> to contain all elements that are present
/// in itself, the specified collection, or both.
/// </summary>
+ /// <param name="set">this <see cref="CharArraySet"/></param>
/// <param name="other">The collection whose elements should be merged into the <see cref="CharArraySet"/>.</param>
/// <returns><c>true</c> if this <see cref="CharArraySet"/> changed as a result of the call</returns>
[CLSCompliant(false)]
@@ -1412,6 +1420,7 @@ namespace Lucene.Net.Analysis.Util
/// Modifies the current <see cref="CharArraySet"/> to contain all elements that are present
/// in itself, the specified collection, or both.
/// </summary>
+ /// <param name="set">this <see cref="CharArraySet"/></param>
/// <param name="other">The collection whose elements should be merged into the <see cref="CharArraySet"/>.</param>
/// <returns><c>true</c> if this <see cref="CharArraySet"/> changed as a result of the call</returns>
[CLSCompliant(false)]
[18/39] lucenenet git commit: Lucene.Net.Analysis.Common/Tartarus
refactor: member accessibility and documentation comments
Posted by ni...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4abfb341/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/RussianStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/RussianStemmer.cs b/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/RussianStemmer.cs
index 75ef579..9ad695c 100644
--- a/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/RussianStemmer.cs
+++ b/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/RussianStemmer.cs
@@ -174,7 +174,7 @@
{
I_p2 = other.I_p2;
I_pV = other.I_pV;
- base.copy_from(other);
+ base.CopyFrom(other);
}
private bool r_mark_regions()
@@ -193,7 +193,7 @@
{
do
{
- if (!(in_grouping(g_v, 1072, 1103)))
+ if (!(InGrouping(g_v, 1072, 1103)))
{
goto lab2;
}
@@ -214,7 +214,7 @@
{
do
{
- if (!(out_grouping(g_v, 1072, 1103)))
+ if (!(OutGrouping(g_v, 1072, 1103)))
{
goto lab4;
}
@@ -233,7 +233,7 @@
{
do
{
- if (!(in_grouping(g_v, 1072, 1103)))
+ if (!(InGrouping(g_v, 1072, 1103)))
{
goto lab6;
}
@@ -252,7 +252,7 @@
{
do
{
- if (!(out_grouping(g_v, 1072, 1103)))
+ if (!(OutGrouping(g_v, 1072, 1103)))
{
goto lab8;
}
@@ -291,7 +291,7 @@
// [, line 72
m_ket = m_cursor;
// substring, line 72
- among_var = find_among_b(a_0, 9);
+ among_var = FindAmongB(a_0, 9);
if (among_var == 0)
{
return false;
@@ -311,7 +311,7 @@
do
{
// literal, line 76
- if (!(eq_s_b(1, "\u0430")))
+ if (!(Eq_S_B(1, "\u0430")))
{
goto lab1;
}
@@ -320,19 +320,19 @@
lab1:
m_cursor = m_limit - v_1;
// literal, line 76
- if (!(eq_s_b(1, "\u044F")))
+ if (!(Eq_S_B(1, "\u044F")))
{
return false;
}
} while (false);
lab0:
// delete, line 76
- slice_del();
+ SliceDel();
break;
case 2:
// (, line 83
// delete, line 83
- slice_del();
+ SliceDel();
break;
}
return true;
@@ -345,7 +345,7 @@
// [, line 88
m_ket = m_cursor;
// substring, line 88
- among_var = find_among_b(a_1, 26);
+ among_var = FindAmongB(a_1, 26);
if (among_var == 0)
{
return false;
@@ -359,7 +359,7 @@
case 1:
// (, line 97
// delete, line 97
- slice_del();
+ SliceDel();
break;
}
return true;
@@ -384,7 +384,7 @@
// [, line 110
m_ket = m_cursor;
// substring, line 110
- among_var = find_among_b(a_2, 8);
+ among_var = FindAmongB(a_2, 8);
if (among_var == 0)
{
m_cursor = m_limit - v_1;
@@ -406,7 +406,7 @@
do
{
// literal, line 115
- if (!(eq_s_b(1, "\u0430")))
+ if (!(Eq_S_B(1, "\u0430")))
{
goto lab2;
}
@@ -415,7 +415,7 @@
lab2:
m_cursor = m_limit - v_2;
// literal, line 115
- if (!(eq_s_b(1, "\u044F")))
+ if (!(Eq_S_B(1, "\u044F")))
{
m_cursor = m_limit - v_1;
goto lab0;
@@ -423,12 +423,12 @@
} while (false);
lab1:
// delete, line 115
- slice_del();
+ SliceDel();
break;
case 2:
// (, line 122
// delete, line 122
- slice_del();
+ SliceDel();
break;
}
} while (false);
@@ -443,7 +443,7 @@
// [, line 129
m_ket = m_cursor;
// substring, line 129
- among_var = find_among_b(a_3, 2);
+ among_var = FindAmongB(a_3, 2);
if (among_var == 0)
{
return false;
@@ -457,7 +457,7 @@
case 1:
// (, line 132
// delete, line 132
- slice_del();
+ SliceDel();
break;
}
return true;
@@ -471,7 +471,7 @@
// [, line 137
m_ket = m_cursor;
// substring, line 137
- among_var = find_among_b(a_4, 46);
+ among_var = FindAmongB(a_4, 46);
if (among_var == 0)
{
return false;
@@ -491,7 +491,7 @@
do
{
// literal, line 143
- if (!(eq_s_b(1, "\u0430")))
+ if (!(Eq_S_B(1, "\u0430")))
{
goto lab1;
}
@@ -500,19 +500,19 @@
lab1:
m_cursor = m_limit - v_1;
// literal, line 143
- if (!(eq_s_b(1, "\u044F")))
+ if (!(Eq_S_B(1, "\u044F")))
{
return false;
}
} while (false);
lab0:
// delete, line 143
- slice_del();
+ SliceDel();
break;
case 2:
// (, line 151
// delete, line 151
- slice_del();
+ SliceDel();
break;
}
return true;
@@ -525,7 +525,7 @@
// [, line 160
m_ket = m_cursor;
// substring, line 160
- among_var = find_among_b(a_5, 36);
+ among_var = FindAmongB(a_5, 36);
if (among_var == 0)
{
return false;
@@ -539,7 +539,7 @@
case 1:
// (, line 167
// delete, line 167
- slice_del();
+ SliceDel();
break;
}
return true;
@@ -552,7 +552,7 @@
// [, line 176
m_ket = m_cursor;
// substring, line 176
- among_var = find_among_b(a_6, 2);
+ among_var = FindAmongB(a_6, 2);
if (among_var == 0)
{
return false;
@@ -571,7 +571,7 @@
case 1:
// (, line 179
// delete, line 179
- slice_del();
+ SliceDel();
break;
}
return true;
@@ -584,7 +584,7 @@
// [, line 184
m_ket = m_cursor;
// substring, line 184
- among_var = find_among_b(a_7, 4);
+ among_var = FindAmongB(a_7, 4);
if (among_var == 0)
{
return false;
@@ -598,38 +598,38 @@
case 1:
// (, line 188
// delete, line 188
- slice_del();
+ SliceDel();
// [, line 189
m_ket = m_cursor;
// literal, line 189
- if (!(eq_s_b(1, "\u043D")))
+ if (!(Eq_S_B(1, "\u043D")))
{
return false;
}
// ], line 189
m_bra = m_cursor;
// literal, line 189
- if (!(eq_s_b(1, "\u043D")))
+ if (!(Eq_S_B(1, "\u043D")))
{
return false;
}
// delete, line 189
- slice_del();
+ SliceDel();
break;
case 2:
// (, line 192
// literal, line 192
- if (!(eq_s_b(1, "\u043D")))
+ if (!(Eq_S_B(1, "\u043D")))
{
return false;
}
// delete, line 192
- slice_del();
+ SliceDel();
break;
case 3:
// (, line 194
// delete, line 194
- slice_del();
+ SliceDel();
break;
}
return true;
@@ -754,7 +754,7 @@
// [, line 209
m_ket = m_cursor;
// literal, line 209
- if (!(eq_s_b(1, "\u0438")))
+ if (!(Eq_S_B(1, "\u0438")))
{
m_cursor = m_limit - v_8;
goto lab8;
@@ -762,7 +762,7 @@
// ], line 209
m_bra = m_cursor;
// delete, line 209
- slice_del();
+ SliceDel();
} while (false);
lab8:
// do, line 212
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4abfb341/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/SpanishStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/SpanishStemmer.cs b/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/SpanishStemmer.cs
index 7cdd686..bc825a4 100644
--- a/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/SpanishStemmer.cs
+++ b/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/SpanishStemmer.cs
@@ -251,7 +251,7 @@
I_p2 = other.I_p2;
I_p1 = other.I_p1;
I_pV = other.I_pV;
- base.copy_from(other);
+ base.CopyFrom(other);
}
private bool r_mark_regions()
@@ -277,7 +277,7 @@
do
{
// (, line 38
- if (!(in_grouping(g_v, 97, 252)))
+ if (!(InGrouping(g_v, 97, 252)))
{
goto lab2;
}
@@ -288,7 +288,7 @@
do
{
// (, line 38
- if (!(out_grouping(g_v, 97, 252)))
+ if (!(OutGrouping(g_v, 97, 252)))
{
goto lab4;
}
@@ -297,7 +297,7 @@
{
do
{
- if (!(in_grouping(g_v, 97, 252)))
+ if (!(InGrouping(g_v, 97, 252)))
{
goto lab6;
}
@@ -316,7 +316,7 @@
lab4:
m_cursor = v_3;
// (, line 38
- if (!(in_grouping(g_v, 97, 252)))
+ if (!(InGrouping(g_v, 97, 252)))
{
goto lab2;
}
@@ -325,7 +325,7 @@
{
do
{
- if (!(out_grouping(g_v, 97, 252)))
+ if (!(OutGrouping(g_v, 97, 252)))
{
goto lab8;
}
@@ -346,7 +346,7 @@
lab2:
m_cursor = v_2;
// (, line 40
- if (!(out_grouping(g_v, 97, 252)))
+ if (!(OutGrouping(g_v, 97, 252)))
{
goto lab0;
}
@@ -357,7 +357,7 @@
do
{
// (, line 40
- if (!(out_grouping(g_v, 97, 252)))
+ if (!(OutGrouping(g_v, 97, 252)))
{
goto lab10;
}
@@ -366,7 +366,7 @@
{
do
{
- if (!(in_grouping(g_v, 97, 252)))
+ if (!(InGrouping(g_v, 97, 252)))
{
goto lab12;
}
@@ -385,7 +385,7 @@
lab10:
m_cursor = v_6;
// (, line 40
- if (!(in_grouping(g_v, 97, 252)))
+ if (!(InGrouping(g_v, 97, 252)))
{
goto lab0;
}
@@ -414,7 +414,7 @@
{
do
{
- if (!(in_grouping(g_v, 97, 252)))
+ if (!(InGrouping(g_v, 97, 252)))
{
goto lab15;
}
@@ -433,7 +433,7 @@
{
do
{
- if (!(out_grouping(g_v, 97, 252)))
+ if (!(OutGrouping(g_v, 97, 252)))
{
goto lab17;
}
@@ -454,7 +454,7 @@
{
do
{
- if (!(in_grouping(g_v, 97, 252)))
+ if (!(InGrouping(g_v, 97, 252)))
{
goto lab19;
}
@@ -473,7 +473,7 @@
{
do
{
- if (!(out_grouping(g_v, 97, 252)))
+ if (!(OutGrouping(g_v, 97, 252)))
{
goto lab21;
}
@@ -509,7 +509,7 @@
// [, line 50
m_bra = m_cursor;
// substring, line 50
- among_var = find_among(a_0, 6);
+ among_var = FindAmong(a_0, 6);
if (among_var == 0)
{
goto lab1;
@@ -523,27 +523,27 @@
case 1:
// (, line 51
// <-, line 51
- slice_from("a");
+ SliceFrom("a");
break;
case 2:
// (, line 52
// <-, line 52
- slice_from("e");
+ SliceFrom("e");
break;
case 3:
// (, line 53
// <-, line 53
- slice_from("i");
+ SliceFrom("i");
break;
case 4:
// (, line 54
// <-, line 54
- slice_from("o");
+ SliceFrom("o");
break;
case 5:
// (, line 55
// <-, line 55
- slice_from("u");
+ SliceFrom("u");
break;
case 6:
// (, line 57
@@ -608,14 +608,14 @@
// [, line 68
m_ket = m_cursor;
// substring, line 68
- if (find_among_b(a_1, 13) == 0)
+ if (FindAmongB(a_1, 13) == 0)
{
return false;
}
// ], line 68
m_bra = m_cursor;
// substring, line 72
- among_var = find_among_b(a_2, 11);
+ among_var = FindAmongB(a_2, 11);
if (among_var == 0)
{
return false;
@@ -634,50 +634,50 @@
// ], line 73
m_bra = m_cursor;
// <-, line 73
- slice_from("iendo");
+ SliceFrom("iendo");
break;
case 2:
// (, line 74
// ], line 74
m_bra = m_cursor;
// <-, line 74
- slice_from("ando");
+ SliceFrom("ando");
break;
case 3:
// (, line 75
// ], line 75
m_bra = m_cursor;
// <-, line 75
- slice_from("ar");
+ SliceFrom("ar");
break;
case 4:
// (, line 76
// ], line 76
m_bra = m_cursor;
// <-, line 76
- slice_from("er");
+ SliceFrom("er");
break;
case 5:
// (, line 77
// ], line 77
m_bra = m_cursor;
// <-, line 77
- slice_from("ir");
+ SliceFrom("ir");
break;
case 6:
// (, line 81
// delete, line 81
- slice_del();
+ SliceDel();
break;
case 7:
// (, line 82
// literal, line 82
- if (!(eq_s_b(1, "u")))
+ if (!(Eq_S_B(1, "u")))
{
return false;
}
// delete, line 82
- slice_del();
+ SliceDel();
break;
}
return true;
@@ -695,7 +695,7 @@
// [, line 87
m_ket = m_cursor;
// substring, line 87
- among_var = find_among_b(a_6, 46);
+ among_var = FindAmongB(a_6, 46);
if (among_var == 0)
{
return false;
@@ -714,7 +714,7 @@
return false;
}
// delete, line 99
- slice_del();
+ SliceDel();
break;
case 2:
// (, line 104
@@ -724,7 +724,7 @@
return false;
}
// delete, line 105
- slice_del();
+ SliceDel();
// try, line 106
v_1 = m_limit - m_cursor;
do
@@ -733,7 +733,7 @@
// [, line 106
m_ket = m_cursor;
// literal, line 106
- if (!(eq_s_b(2, "ic")))
+ if (!(Eq_S_B(2, "ic")))
{
m_cursor = m_limit - v_1;
goto lab0;
@@ -747,7 +747,7 @@
goto lab0;
}
// delete, line 106
- slice_del();
+ SliceDel();
} while (false);
lab0:
break;
@@ -759,7 +759,7 @@
return false;
}
// <-, line 111
- slice_from("log");
+ SliceFrom("log");
break;
case 4:
// (, line 114
@@ -769,7 +769,7 @@
return false;
}
// <-, line 115
- slice_from("u");
+ SliceFrom("u");
break;
case 5:
// (, line 118
@@ -779,7 +779,7 @@
return false;
}
// <-, line 119
- slice_from("ente");
+ SliceFrom("ente");
break;
case 6:
// (, line 122
@@ -789,7 +789,7 @@
return false;
}
// delete, line 123
- slice_del();
+ SliceDel();
// try, line 124
v_2 = m_limit - m_cursor;
do
@@ -798,7 +798,7 @@
// [, line 125
m_ket = m_cursor;
// substring, line 125
- among_var = find_among_b(a_3, 4);
+ among_var = FindAmongB(a_3, 4);
if (among_var == 0)
{
m_cursor = m_limit - v_2;
@@ -813,7 +813,7 @@
goto lab1;
}
// delete, line 125
- slice_del();
+ SliceDel();
switch (among_var)
{
case 0:
@@ -824,7 +824,7 @@
// [, line 126
m_ket = m_cursor;
// literal, line 126
- if (!(eq_s_b(2, "at")))
+ if (!(Eq_S_B(2, "at")))
{
m_cursor = m_limit - v_2;
goto lab1;
@@ -838,7 +838,7 @@
goto lab1;
}
// delete, line 126
- slice_del();
+ SliceDel();
break;
}
} while (false);
@@ -852,7 +852,7 @@
return false;
}
// delete, line 135
- slice_del();
+ SliceDel();
// try, line 136
v_3 = m_limit - m_cursor;
do
@@ -861,7 +861,7 @@
// [, line 137
m_ket = m_cursor;
// substring, line 137
- among_var = find_among_b(a_4, 3);
+ among_var = FindAmongB(a_4, 3);
if (among_var == 0)
{
m_cursor = m_limit - v_3;
@@ -883,7 +883,7 @@
goto lab2;
}
// delete, line 140
- slice_del();
+ SliceDel();
break;
}
} while (false);
@@ -897,7 +897,7 @@
return false;
}
// delete, line 147
- slice_del();
+ SliceDel();
// try, line 148
v_4 = m_limit - m_cursor;
do
@@ -906,7 +906,7 @@
// [, line 149
m_ket = m_cursor;
// substring, line 149
- among_var = find_among_b(a_5, 3);
+ among_var = FindAmongB(a_5, 3);
if (among_var == 0)
{
m_cursor = m_limit - v_4;
@@ -928,7 +928,7 @@
goto lab3;
}
// delete, line 152
- slice_del();
+ SliceDel();
break;
}
} while (false);
@@ -942,7 +942,7 @@
return false;
}
// delete, line 159
- slice_del();
+ SliceDel();
// try, line 160
v_5 = m_limit - m_cursor;
do
@@ -951,7 +951,7 @@
// [, line 161
m_ket = m_cursor;
// literal, line 161
- if (!(eq_s_b(2, "at")))
+ if (!(Eq_S_B(2, "at")))
{
m_cursor = m_limit - v_5;
goto lab4;
@@ -965,7 +965,7 @@
goto lab4;
}
// delete, line 161
- slice_del();
+ SliceDel();
} while (false);
lab4:
break;
@@ -994,7 +994,7 @@
// [, line 168
m_ket = m_cursor;
// substring, line 168
- among_var = find_among_b(a_7, 12);
+ among_var = FindAmongB(a_7, 12);
if (among_var == 0)
{
m_limit_backward = v_2;
@@ -1010,12 +1010,12 @@
case 1:
// (, line 171
// literal, line 171
- if (!(eq_s_b(1, "u")))
+ if (!(Eq_S_B(1, "u")))
{
return false;
}
// delete, line 171
- slice_del();
+ SliceDel();
break;
}
return true;
@@ -1044,7 +1044,7 @@
// [, line 176
m_ket = m_cursor;
// substring, line 176
- among_var = find_among_b(a_8, 96);
+ among_var = FindAmongB(a_8, 96);
if (among_var == 0)
{
m_limit_backward = v_2;
@@ -1065,7 +1065,7 @@
{
// (, line 179
// literal, line 179
- if (!(eq_s_b(1, "u")))
+ if (!(Eq_S_B(1, "u")))
{
m_cursor = m_limit - v_3;
goto lab0;
@@ -1073,7 +1073,7 @@
// test, line 179
v_4 = m_limit - m_cursor;
// literal, line 179
- if (!(eq_s_b(1, "g")))
+ if (!(Eq_S_B(1, "g")))
{
m_cursor = m_limit - v_3;
goto lab0;
@@ -1084,12 +1084,12 @@
// ], line 179
m_bra = m_cursor;
// delete, line 179
- slice_del();
+ SliceDel();
break;
case 2:
// (, line 200
// delete, line 200
- slice_del();
+ SliceDel();
break;
}
return true;
@@ -1104,7 +1104,7 @@
// [, line 205
m_ket = m_cursor;
// substring, line 205
- among_var = find_among_b(a_9, 8);
+ among_var = FindAmongB(a_9, 8);
if (among_var == 0)
{
return false;
@@ -1123,7 +1123,7 @@
return false;
}
// delete, line 208
- slice_del();
+ SliceDel();
break;
case 2:
// (, line 210
@@ -1133,7 +1133,7 @@
return false;
}
// delete, line 210
- slice_del();
+ SliceDel();
// try, line 210
v_1 = m_limit - m_cursor;
do
@@ -1142,7 +1142,7 @@
// [, line 210
m_ket = m_cursor;
// literal, line 210
- if (!(eq_s_b(1, "u")))
+ if (!(Eq_S_B(1, "u")))
{
m_cursor = m_limit - v_1;
goto lab0;
@@ -1152,7 +1152,7 @@
// test, line 210
v_2 = m_limit - m_cursor;
// literal, line 210
- if (!(eq_s_b(1, "g")))
+ if (!(Eq_S_B(1, "g")))
{
m_cursor = m_limit - v_1;
goto lab0;
@@ -1165,7 +1165,7 @@
goto lab0;
}
// delete, line 210
- slice_del();
+ SliceDel();
} while (false);
lab0:
break;
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4abfb341/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/SwedishStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/SwedishStemmer.cs b/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/SwedishStemmer.cs
index f934edf..706d563 100644
--- a/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/SwedishStemmer.cs
+++ b/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/SwedishStemmer.cs
@@ -77,7 +77,7 @@
{
I_x = other.I_x;
I_p1 = other.I_p1;
- base.copy_from(other);
+ base.CopyFrom(other);
}
private bool r_mark_regions()
@@ -107,7 +107,7 @@
v_2 = m_cursor;
do
{
- if (!(in_grouping(g_v, 97, 246)))
+ if (!(InGrouping(g_v, 97, 246)))
{
goto lab1;
}
@@ -128,7 +128,7 @@
{
do
{
- if (!(out_grouping(g_v, 97, 246)))
+ if (!(OutGrouping(g_v, 97, 246)))
{
goto lab3;
}
@@ -179,7 +179,7 @@
// [, line 37
m_ket = m_cursor;
// substring, line 37
- among_var = find_among_b(a_0, 37);
+ among_var = FindAmongB(a_0, 37);
if (among_var == 0)
{
m_limit_backward = v_2;
@@ -195,16 +195,16 @@
case 1:
// (, line 44
// delete, line 44
- slice_del();
+ SliceDel();
break;
case 2:
// (, line 46
- if (!(in_grouping_b(g_s_ending, 98, 121)))
+ if (!(InGroupingB(g_s_ending, 98, 121)))
{
return false;
}
// delete, line 46
- slice_del();
+ SliceDel();
break;
}
return true;
@@ -230,7 +230,7 @@
// and, line 52
v_3 = m_limit - m_cursor;
// among, line 51
- if (find_among_b(a_1, 7) == 0)
+ if (FindAmongB(a_1, 7) == 0)
{
m_limit_backward = v_2;
return false;
@@ -249,7 +249,7 @@
// ], line 52
m_bra = m_cursor;
// delete, line 52
- slice_del();
+ SliceDel();
m_limit_backward = v_2;
return true;
}
@@ -274,7 +274,7 @@
// [, line 56
m_ket = m_cursor;
// substring, line 56
- among_var = find_among_b(a_2, 5);
+ among_var = FindAmongB(a_2, 5);
if (among_var == 0)
{
m_limit_backward = v_2;
@@ -290,17 +290,17 @@
case 1:
// (, line 57
// delete, line 57
- slice_del();
+ SliceDel();
break;
case 2:
// (, line 58
// <-, line 58
- slice_from("l\u00F6s");
+ SliceFrom("l\u00F6s");
break;
case 3:
// (, line 59
// <-, line 59
- slice_from("full");
+ SliceFrom("full");
break;
}
m_limit_backward = v_2;
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4abfb341/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/TurkishStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/TurkishStemmer.cs b/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/TurkishStemmer.cs
index 72c3969..3dd337a 100644
--- a/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/TurkishStemmer.cs
+++ b/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/TurkishStemmer.cs
@@ -223,7 +223,7 @@
{
B_continue_stemming_noun_suffixes = other.B_continue_stemming_noun_suffixes;
I_strlen = other.I_strlen;
- base.copy_from(other);
+ base.CopyFrom(other);
}
private bool r_check_vowel_harmony()
@@ -251,7 +251,7 @@
v_2 = m_limit - m_cursor;
do
{
- if (!(in_grouping_b(g_vowel, 97, 305)))
+ if (!(InGroupingB(g_vowel, 97, 305)))
{
goto lab1;
}
@@ -276,7 +276,7 @@
{
// (, line 116
// literal, line 116
- if (!(eq_s_b(1, "a")))
+ if (!(Eq_S_B(1, "a")))
{
goto lab3;
}
@@ -286,7 +286,7 @@
v_4 = m_limit - m_cursor;
do
{
- if (!(in_grouping_b(g_vowel1, 97, 305)))
+ if (!(InGroupingB(g_vowel1, 97, 305)))
{
goto lab5;
}
@@ -310,7 +310,7 @@
{
// (, line 117
// literal, line 117
- if (!(eq_s_b(1, "e")))
+ if (!(Eq_S_B(1, "e")))
{
goto lab6;
}
@@ -322,7 +322,7 @@
do
{
- if (!(in_grouping_b(g_vowel2, 101, 252)))
+ if (!(InGroupingB(g_vowel2, 101, 252)))
{
goto lab8;
}
@@ -347,7 +347,7 @@
{
// (, line 118
// literal, line 118
- if (!(eq_s_b(1, "\u0131")))
+ if (!(Eq_S_B(1, "\u0131")))
{
goto lab9;
}
@@ -359,7 +359,7 @@
do
{
- if (!(in_grouping_b(g_vowel3, 97, 305)))
+ if (!(InGroupingB(g_vowel3, 97, 305)))
{
goto lab11;
}
@@ -384,7 +384,7 @@
{
// (, line 119
// literal, line 119
- if (!(eq_s_b(1, "i")))
+ if (!(Eq_S_B(1, "i")))
{
goto lab12;
}
@@ -396,7 +396,7 @@
do
{
- if (!(in_grouping_b(g_vowel4, 101, 105)))
+ if (!(InGroupingB(g_vowel4, 101, 105)))
{
goto lab14;
}
@@ -420,7 +420,7 @@
{
// (, line 120
// literal, line 120
- if (!(eq_s_b(1, "o")))
+ if (!(Eq_S_B(1, "o")))
{
goto lab15;
}
@@ -432,7 +432,7 @@
do
{
- if (!(in_grouping_b(g_vowel5, 111, 117)))
+ if (!(InGroupingB(g_vowel5, 111, 117)))
{
goto lab17;
}
@@ -456,7 +456,7 @@
{
// (, line 121
// literal, line 121
- if (!(eq_s_b(1, "\u00F6")))
+ if (!(Eq_S_B(1, "\u00F6")))
{
goto lab18;
}
@@ -468,7 +468,7 @@
do
{
- if (!(in_grouping_b(g_vowel6, 246, 252)))
+ if (!(InGroupingB(g_vowel6, 246, 252)))
{
goto lab20;
}
@@ -492,7 +492,7 @@
{
// (, line 122
// literal, line 122
- if (!(eq_s_b(1, "u")))
+ if (!(Eq_S_B(1, "u")))
{
goto lab21;
}
@@ -504,7 +504,7 @@
do
{
- if (!(in_grouping_b(g_vowel5, 111, 117)))
+ if (!(InGroupingB(g_vowel5, 111, 117)))
{
goto lab23;
}
@@ -526,7 +526,7 @@
m_cursor = m_limit - v_3;
// (, line 123
// literal, line 123
- if (!(eq_s_b(1, "\u00FC")))
+ if (!(Eq_S_B(1, "\u00FC")))
{
return false;
}
@@ -538,7 +538,7 @@
do
{
- if (!(in_grouping_b(g_vowel6, 246, 252)))
+ if (!(InGroupingB(g_vowel6, 246, 252)))
{
goto lab25;
}
@@ -582,7 +582,7 @@
// test, line 133
v_2 = m_limit - m_cursor;
// literal, line 133
- if (!(eq_s_b(1, "n")))
+ if (!(Eq_S_B(1, "n")))
{
goto lab1;
}
@@ -596,7 +596,7 @@
// (, line 133
// test, line 133
v_3 = m_limit - m_cursor;
- if (!(in_grouping_b(g_vowel, 97, 305)))
+ if (!(InGroupingB(g_vowel, 97, 305)))
{
goto lab1;
}
@@ -617,7 +617,7 @@
// test, line 135
v_5 = m_limit - m_cursor;
// literal, line 135
- if (!(eq_s_b(1, "n")))
+ if (!(Eq_S_B(1, "n")))
{
goto lab2;
}
@@ -639,7 +639,7 @@
// (, line 135
// test, line 135
v_7 = m_limit - m_cursor;
- if (!(in_grouping_b(g_vowel, 97, 305)))
+ if (!(InGroupingB(g_vowel, 97, 305)))
{
return false;
}
@@ -673,7 +673,7 @@
// test, line 144
v_2 = m_limit - m_cursor;
// literal, line 144
- if (!(eq_s_b(1, "s")))
+ if (!(Eq_S_B(1, "s")))
{
goto lab1;
}
@@ -687,7 +687,7 @@
// (, line 144
// test, line 144
v_3 = m_limit - m_cursor;
- if (!(in_grouping_b(g_vowel, 97, 305)))
+ if (!(InGroupingB(g_vowel, 97, 305)))
{
goto lab1;
}
@@ -708,7 +708,7 @@
// test, line 146
v_5 = m_limit - m_cursor;
// literal, line 146
- if (!(eq_s_b(1, "s")))
+ if (!(Eq_S_B(1, "s")))
{
goto lab2;
}
@@ -730,7 +730,7 @@
// (, line 146
// test, line 146
v_7 = m_limit - m_cursor;
- if (!(in_grouping_b(g_vowel, 97, 305)))
+ if (!(InGroupingB(g_vowel, 97, 305)))
{
return false;
}
@@ -764,7 +764,7 @@
// test, line 154
v_2 = m_limit - m_cursor;
// literal, line 154
- if (!(eq_s_b(1, "y")))
+ if (!(Eq_S_B(1, "y")))
{
goto lab1;
}
@@ -778,7 +778,7 @@
// (, line 154
// test, line 154
v_3 = m_limit - m_cursor;
- if (!(in_grouping_b(g_vowel, 97, 305)))
+ if (!(InGroupingB(g_vowel, 97, 305)))
{
goto lab1;
}
@@ -799,7 +799,7 @@
// test, line 156
v_5 = m_limit - m_cursor;
// literal, line 156
- if (!(eq_s_b(1, "y")))
+ if (!(Eq_S_B(1, "y")))
{
goto lab2;
}
@@ -821,7 +821,7 @@
// (, line 156
// test, line 156
v_7 = m_limit - m_cursor;
- if (!(in_grouping_b(g_vowel, 97, 305)))
+ if (!(InGroupingB(g_vowel, 97, 305)))
{
return false;
}
@@ -852,7 +852,7 @@
// (, line 160
// test, line 160
v_2 = m_limit - m_cursor;
- if (!(in_grouping_b(g_U, 105, 305)))
+ if (!(InGroupingB(g_U, 105, 305)))
{
goto lab1;
}
@@ -866,7 +866,7 @@
// (, line 160
// test, line 160
v_3 = m_limit - m_cursor;
- if (!(out_grouping_b(g_vowel, 97, 305)))
+ if (!(OutGroupingB(g_vowel, 97, 305)))
{
goto lab1;
}
@@ -886,7 +886,7 @@
// (, line 162
// test, line 162
v_5 = m_limit - m_cursor;
- if (!(in_grouping_b(g_U, 105, 305)))
+ if (!(InGroupingB(g_U, 105, 305)))
{
goto lab2;
}
@@ -908,7 +908,7 @@
// (, line 162
// test, line 162
v_7 = m_limit - m_cursor;
- if (!(out_grouping_b(g_vowel, 97, 305)))
+ if (!(OutGroupingB(g_vowel, 97, 305)))
{
return false;
}
@@ -923,7 +923,7 @@
{
// (, line 166
// among, line 167
- if (find_among_b(a_0, 10) == 0)
+ if (FindAmongB(a_0, 10) == 0)
{
return false;
}
@@ -944,7 +944,7 @@
{
return false;
}
- if (!(in_grouping_b(g_U, 105, 305)))
+ if (!(InGroupingB(g_U, 105, 305)))
{
return false;
}
@@ -961,7 +961,7 @@
{
// (, line 178
// among, line 179
- if (find_among_b(a_1, 2) == 0)
+ if (FindAmongB(a_1, 2) == 0)
{
return false;
}
@@ -976,7 +976,7 @@
{
return false;
}
- if (!(in_grouping_b(g_U, 105, 305)))
+ if (!(InGroupingB(g_U, 105, 305)))
{
return false;
}
@@ -998,7 +998,7 @@
return false;
}
// among, line 190
- if (find_among_b(a_2, 4) == 0)
+ if (FindAmongB(a_2, 4) == 0)
{
return false;
}
@@ -1014,7 +1014,7 @@
return false;
}
// among, line 195
- if (find_among_b(a_3, 4) == 0)
+ if (FindAmongB(a_3, 4) == 0)
{
return false;
}
@@ -1036,7 +1036,7 @@
return false;
}
// among, line 201
- if (find_among_b(a_4, 2) == 0)
+ if (FindAmongB(a_4, 2) == 0)
{
return false;
}
@@ -1058,7 +1058,7 @@
return false;
}
// among, line 207
- if (find_among_b(a_5, 2) == 0)
+ if (FindAmongB(a_5, 2) == 0)
{
return false;
}
@@ -1074,7 +1074,7 @@
return false;
}
// among, line 212
- if (find_among_b(a_6, 4) == 0)
+ if (FindAmongB(a_6, 4) == 0)
{
return false;
}
@@ -1090,7 +1090,7 @@
return false;
}
// among, line 217
- if (find_among_b(a_7, 2) == 0)
+ if (FindAmongB(a_7, 2) == 0)
{
return false;
}
@@ -1106,7 +1106,7 @@
return false;
}
// among, line 222
- if (find_among_b(a_8, 4) == 0)
+ if (FindAmongB(a_8, 4) == 0)
{
return false;
}
@@ -1122,7 +1122,7 @@
return false;
}
// among, line 227
- if (find_among_b(a_9, 2) == 0)
+ if (FindAmongB(a_9, 2) == 0)
{
return false;
}
@@ -1138,7 +1138,7 @@
return false;
}
// among, line 232
- if (find_among_b(a_10, 2) == 0)
+ if (FindAmongB(a_10, 2) == 0)
{
return false;
}
@@ -1155,7 +1155,7 @@
{
// (, line 236
// literal, line 237
- if (!(eq_s_b(2, "ki")))
+ if (!(Eq_S_B(2, "ki")))
{
return false;
}
@@ -1171,7 +1171,7 @@
return false;
}
// among, line 242
- if (find_among_b(a_11, 2) == 0)
+ if (FindAmongB(a_11, 2) == 0)
{
return false;
}
@@ -1193,7 +1193,7 @@
return false;
}
// among, line 248
- if (find_among_b(a_12, 4) == 0)
+ if (FindAmongB(a_12, 4) == 0)
{
return false;
}
@@ -1215,7 +1215,7 @@
return false;
}
// among, line 254
- if (find_among_b(a_13, 4) == 0)
+ if (FindAmongB(a_13, 4) == 0)
{
return false;
}
@@ -1231,7 +1231,7 @@
return false;
}
// among, line 259
- if (find_among_b(a_14, 4) == 0)
+ if (FindAmongB(a_14, 4) == 0)
{
return false;
}
@@ -1248,7 +1248,7 @@
{
// (, line 263
// among, line 264
- if (find_among_b(a_15, 4) == 0)
+ if (FindAmongB(a_15, 4) == 0)
{
return false;
}
@@ -1264,7 +1264,7 @@
return false;
}
// among, line 269
- if (find_among_b(a_16, 2) == 0)
+ if (FindAmongB(a_16, 2) == 0)
{
return false;
}
@@ -1280,7 +1280,7 @@
return false;
}
// among, line 274
- if (find_among_b(a_17, 4) == 0)
+ if (FindAmongB(a_17, 4) == 0)
{
return false;
}
@@ -1296,7 +1296,7 @@
return false;
}
// among, line 279
- if (find_among_b(a_18, 8) == 0)
+ if (FindAmongB(a_18, 8) == 0)
{
return false;
}
@@ -1307,7 +1307,7 @@
{
// (, line 282
// among, line 283
- if (find_among_b(a_19, 2) == 0)
+ if (FindAmongB(a_19, 2) == 0)
{
return false;
}
@@ -1323,7 +1323,7 @@
return false;
}
// among, line 288
- if (find_among_b(a_20, 32) == 0)
+ if (FindAmongB(a_20, 32) == 0)
{
return false;
}
@@ -1340,7 +1340,7 @@
{
// (, line 296
// among, line 297
- if (find_among_b(a_21, 8) == 0)
+ if (FindAmongB(a_21, 8) == 0)
{
return false;
}
@@ -1362,7 +1362,7 @@
return false;
}
// among, line 303
- if (find_among_b(a_22, 4) == 0)
+ if (FindAmongB(a_22, 4) == 0)
{
return false;
}
@@ -1379,7 +1379,7 @@
{
// (, line 307
// literal, line 308
- if (!(eq_s_b(3, "ken")))
+ if (!(Eq_S_B(3, "ken")))
{
return false;
}
@@ -1568,7 +1568,7 @@
// ], line 319
m_bra = m_cursor;
// delete, line 319
- slice_del();
+ SliceDel();
// try, line 319
v_4 = m_limit - m_cursor;
@@ -1729,7 +1729,7 @@
// ], line 325
m_bra = m_cursor;
// delete, line 325
- slice_del();
+ SliceDel();
// try, line 325
v_8 = m_limit - m_cursor;
@@ -1759,7 +1759,7 @@
// ], line 327
m_bra = m_cursor;
// delete, line 327
- slice_del();
+ SliceDel();
// try, line 327
v_9 = m_limit - m_cursor;
@@ -1849,7 +1849,7 @@
// ], line 328
m_bra = m_cursor;
// delete, line 328
- slice_del();
+ SliceDel();
return true;
}
@@ -1892,7 +1892,7 @@
// ], line 336
m_bra = m_cursor;
// delete, line 336
- slice_del();
+ SliceDel();
// try, line 336
v_2 = m_limit - m_cursor;
@@ -1918,7 +1918,7 @@
// ], line 337
m_bra = m_cursor;
// delete, line 337
- slice_del();
+ SliceDel();
// try, line 337
v_4 = m_limit - m_cursor;
@@ -1947,7 +1947,7 @@
// ], line 339
m_bra = m_cursor;
// delete, line 339
- slice_del();
+ SliceDel();
// try, line 339
v_5 = m_limit - m_cursor;
@@ -1965,7 +1965,7 @@
// ], line 339
m_bra = m_cursor;
// delete, line 339
- slice_del();
+ SliceDel();
// call stem_suffix_chain_before_ki, line 339
if (!r_stem_suffix_chain_before_ki())
{
@@ -1994,7 +1994,7 @@
// ], line 343
m_bra = m_cursor;
// delete, line 343
- slice_del();
+ SliceDel();
// try, line 343
v_6 = m_limit - m_cursor;
@@ -2020,7 +2020,7 @@
// ], line 344
m_bra = m_cursor;
// delete, line 344
- slice_del();
+ SliceDel();
goto lab9;
} while (false);
lab10:
@@ -2058,7 +2058,7 @@
// ], line 346
m_bra = m_cursor;
// delete, line 346
- slice_del();
+ SliceDel();
// try, line 346
v_9 = m_limit - m_cursor;
@@ -2076,7 +2076,7 @@
// ], line 346
m_bra = m_cursor;
// delete, line 346
- slice_del();
+ SliceDel();
// call stem_suffix_chain_before_ki, line 346
if (!r_stem_suffix_chain_before_ki())
{
@@ -2128,7 +2128,7 @@
// ], line 352
m_bra = m_cursor;
// delete, line 352
- slice_del();
+ SliceDel();
goto lab15;
} while (false);
lab16:
@@ -2146,7 +2146,7 @@
// ], line 354
m_bra = m_cursor;
// delete, line 354
- slice_del();
+ SliceDel();
// try, line 354
v_11 = m_limit - m_cursor;
@@ -2164,7 +2164,7 @@
// ], line 354
m_bra = m_cursor;
// delete, line 354
- slice_del();
+ SliceDel();
// call stem_suffix_chain_before_ki, line 354
if (!r_stem_suffix_chain_before_ki())
{
@@ -2239,7 +2239,7 @@
// ], line 362
m_bra = m_cursor;
// delete, line 362
- slice_del();
+ SliceDel();
// try, line 362
v_2 = m_limit - m_cursor;
@@ -2272,7 +2272,7 @@
// ], line 364
m_bra = m_cursor;
// delete, line 364
- slice_del();
+ SliceDel();
// try, line 365
v_3 = m_limit - m_cursor;
@@ -2298,7 +2298,7 @@
// ], line 366
m_bra = m_cursor;
// delete, line 366
- slice_del();
+ SliceDel();
goto lab5;
} while (false);
lab6:
@@ -2336,7 +2336,7 @@
// ], line 368
m_bra = m_cursor;
// delete, line 368
- slice_del();
+ SliceDel();
// try, line 368
v_6 = m_limit - m_cursor;
@@ -2354,7 +2354,7 @@
// ], line 368
m_bra = m_cursor;
// delete, line 368
- slice_del();
+ SliceDel();
// call stem_suffix_chain_before_ki, line 368
if (!r_stem_suffix_chain_before_ki())
{
@@ -2379,7 +2379,7 @@
// ], line 370
m_bra = m_cursor;
// delete, line 370
- slice_del();
+ SliceDel();
// call stem_suffix_chain_before_ki, line 370
if (!r_stem_suffix_chain_before_ki())
{
@@ -2443,7 +2443,7 @@
// ], line 376
m_bra = m_cursor;
// delete, line 376
- slice_del();
+ SliceDel();
goto lab14;
} while (false);
lab15:
@@ -2460,7 +2460,7 @@
// ], line 378
m_bra = m_cursor;
// delete, line 378
- slice_del();
+ SliceDel();
// try, line 378
v_9 = m_limit - m_cursor;
@@ -2478,7 +2478,7 @@
// ], line 378
m_bra = m_cursor;
// delete, line 378
- slice_del();
+ SliceDel();
// call stem_suffix_chain_before_ki, line 378
if (!r_stem_suffix_chain_before_ki())
{
@@ -2552,7 +2552,7 @@
// ], line 384
m_bra = m_cursor;
// delete, line 384
- slice_del();
+ SliceDel();
// try, line 384
v_12 = m_limit - m_cursor;
@@ -2570,7 +2570,7 @@
// ], line 384
m_bra = m_cursor;
// delete, line 384
- slice_del();
+ SliceDel();
// call stem_suffix_chain_before_ki, line 384
if (!r_stem_suffix_chain_before_ki())
{
@@ -2609,7 +2609,7 @@
// ], line 386
m_bra = m_cursor;
// delete, line 386
- slice_del();
+ SliceDel();
// try, line 386
v_13 = m_limit - m_cursor;
@@ -2636,7 +2636,7 @@
// ], line 388
m_bra = m_cursor;
// delete, line 388
- slice_del();
+ SliceDel();
// try, line 388
v_15 = m_limit - m_cursor;
@@ -2654,7 +2654,7 @@
// ], line 388
m_bra = m_cursor;
// delete, line 388
- slice_del();
+ SliceDel();
// call stem_suffix_chain_before_ki, line 388
if (!r_stem_suffix_chain_before_ki())
{
@@ -2679,7 +2679,7 @@
// ], line 390
m_bra = m_cursor;
// delete, line 390
- slice_del();
+ SliceDel();
// try, line 390
v_16 = m_limit - m_cursor;
@@ -2746,7 +2746,7 @@
// ], line 396
m_bra = m_cursor;
// delete, line 396
- slice_del();
+ SliceDel();
// try, line 397
v_18 = m_limit - m_cursor;
@@ -2772,7 +2772,7 @@
// ], line 398
m_bra = m_cursor;
// delete, line 398
- slice_del();
+ SliceDel();
// call stem_suffix_chain_before_ki, line 398
if (!r_stem_suffix_chain_before_ki())
{
@@ -2815,7 +2815,7 @@
// ], line 400
m_bra = m_cursor;
// delete, line 400
- slice_del();
+ SliceDel();
// try, line 400
v_21 = m_limit - m_cursor;
@@ -2833,7 +2833,7 @@
// ], line 400
m_bra = m_cursor;
// delete, line 400
- slice_del();
+ SliceDel();
// call stem_suffix_chain_before_ki, line 400
if (!r_stem_suffix_chain_before_ki())
{
@@ -2874,7 +2874,7 @@
// ], line 406
m_bra = m_cursor;
// delete, line 406
- slice_del();
+ SliceDel();
goto lab0;
} while (false);
lab41:
@@ -2937,7 +2937,7 @@
// ], line 410
m_bra = m_cursor;
// delete, line 410
- slice_del();
+ SliceDel();
// try, line 410
v_23 = m_limit - m_cursor;
@@ -2964,7 +2964,7 @@
// ], line 410
m_bra = m_cursor;
// delete, line 410
- slice_del();
+ SliceDel();
// try, line 410
v_25 = m_limit - m_cursor;
@@ -2996,7 +2996,7 @@
// ], line 410
m_bra = m_cursor;
// delete, line 410
- slice_del();
+ SliceDel();
// [, line 410
m_ket = m_cursor;
// call stem_suffix_chain_before_ki, line 410
@@ -3041,7 +3041,7 @@
// ], line 412
m_bra = m_cursor;
// delete, line 412
- slice_del();
+ SliceDel();
// try, line 412
v_27 = m_limit - m_cursor;
@@ -3059,7 +3059,7 @@
// ], line 412
m_bra = m_cursor;
// delete, line 412
- slice_del();
+ SliceDel();
// call stem_suffix_chain_before_ki, line 412
if (!r_stem_suffix_chain_before_ki())
{
@@ -3080,7 +3080,7 @@
// [, line 416
m_ket = m_cursor;
// substring, line 416
- among_var = find_among_b(a_23, 4);
+ among_var = FindAmongB(a_23, 4);
if (among_var == 0)
{
return false;
@@ -3094,22 +3094,22 @@
case 1:
// (, line 417
// <-, line 417
- slice_from("p");
+ SliceFrom("p");
break;
case 2:
// (, line 418
// <-, line 418
- slice_from("\u00E7");
+ SliceFrom("\u00E7");
break;
case 3:
// (, line 419
// <-, line 419
- slice_from("t");
+ SliceFrom("t");
break;
case 4:
// (, line 420
// <-, line 420
- slice_from("k");
+ SliceFrom("k");
break;
}
return true;
@@ -3145,7 +3145,7 @@
do
{
// literal, line 431
- if (!(eq_s_b(1, "d")))
+ if (!(Eq_S_B(1, "d")))
{
goto lab1;
}
@@ -3154,7 +3154,7 @@
lab1:
m_cursor = m_limit - v_2;
// literal, line 431
- if (!(eq_s_b(1, "g")))
+ if (!(Eq_S_B(1, "g")))
{
return false;
}
@@ -3182,7 +3182,7 @@
do
{
- if (!(in_grouping_b(g_vowel, 97, 305)))
+ if (!(InGroupingB(g_vowel, 97, 305)))
{
goto lab5;
}
@@ -3207,7 +3207,7 @@
do
{
// literal, line 432
- if (!(eq_s_b(1, "a")))
+ if (!(Eq_S_B(1, "a")))
{
goto lab7;
}
@@ -3216,7 +3216,7 @@
lab7:
m_cursor = m_limit - v_6;
// literal, line 432
- if (!(eq_s_b(1, "\u0131")))
+ if (!(Eq_S_B(1, "\u0131")))
{
goto lab3;
}
@@ -3226,7 +3226,7 @@
// <+, line 432
{
int c = m_cursor;
- insert(m_cursor, m_cursor, "\u0131");
+ Insert(m_cursor, m_cursor, "\u0131");
m_cursor = c;
}
goto lab2;
@@ -3249,7 +3249,7 @@
do
{
- if (!(in_grouping_b(g_vowel, 97, 305)))
+ if (!(InGroupingB(g_vowel, 97, 305)))
{
goto lab10;
}
@@ -3274,7 +3274,7 @@
do
{
// literal, line 434
- if (!(eq_s_b(1, "e")))
+ if (!(Eq_S_B(1, "e")))
{
goto lab12;
}
@@ -3283,7 +3283,7 @@
lab12:
m_cursor = m_limit - v_9;
// literal, line 434
- if (!(eq_s_b(1, "i")))
+ if (!(Eq_S_B(1, "i")))
{
goto lab8;
}
@@ -3293,7 +3293,7 @@
// <+, line 434
{
int c = m_cursor;
- insert(m_cursor, m_cursor, "i");
+ Insert(m_cursor, m_cursor, "i");
m_cursor = c;
}
goto lab2;
@@ -3316,7 +3316,7 @@
do
{
- if (!(in_grouping_b(g_vowel, 97, 305)))
+ if (!(InGroupingB(g_vowel, 97, 305)))
{
goto lab15;
}
@@ -3341,7 +3341,7 @@
do
{
// literal, line 436
- if (!(eq_s_b(1, "o")))
+ if (!(Eq_S_B(1, "o")))
{
goto lab17;
}
@@ -3350,7 +3350,7 @@
lab17:
m_cursor = m_limit - v_12;
// literal, line 436
- if (!(eq_s_b(1, "u")))
+ if (!(Eq_S_B(1, "u")))
{
goto lab13;
}
@@ -3360,7 +3360,7 @@
// <+, line 436
{
int c = m_cursor;
- insert(m_cursor, m_cursor, "u");
+ Insert(m_cursor, m_cursor, "u");
m_cursor = c;
}
goto lab2;
@@ -3380,7 +3380,7 @@
do
{
- if (!(in_grouping_b(g_vowel, 97, 305)))
+ if (!(InGroupingB(g_vowel, 97, 305)))
{
goto lab19;
}
@@ -3405,7 +3405,7 @@
do
{
// literal, line 438
- if (!(eq_s_b(1, "\u00F6")))
+ if (!(Eq_S_B(1, "\u00F6")))
{
goto lab21;
}
@@ -3414,7 +3414,7 @@
lab21:
m_cursor = m_limit - v_15;
// literal, line 438
- if (!(eq_s_b(1, "\u00FC")))
+ if (!(Eq_S_B(1, "\u00FC")))
{
return false;
}
@@ -3424,7 +3424,7 @@
// <+, line 438
{
int c = m_cursor;
- insert(m_cursor, m_cursor, "\u00FC");
+ Insert(m_cursor, m_cursor, "\u00FC");
m_cursor = c;
}
} while (false);
@@ -3459,7 +3459,7 @@
do
{
- if (!(in_grouping(g_vowel, 97, 305)))
+ if (!(InGrouping(g_vowel, 97, 305)))
{
goto lab3;
}
@@ -3523,7 +3523,7 @@
do
{
// literal, line 450
- if (!(eq_s(2, "ad")))
+ if (!(Eq_S(2, "ad")))
{
goto lab3;
}
@@ -3560,7 +3560,7 @@
do
{
// literal, line 452
- if (!(eq_s(5, "soyad")))
+ if (!(Eq_S(5, "soyad")))
{
goto lab5;
}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4abfb341/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/SnowballProgram.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/SnowballProgram.cs b/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/SnowballProgram.cs
index 8204316..b2636ff 100644
--- a/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/SnowballProgram.cs
+++ b/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/SnowballProgram.cs
@@ -51,7 +51,7 @@ namespace Lucene.Net.Tartarus.Snowball
{
private static readonly object[] EMPTY_ARGS = new object[0];
- protected SnowballProgram()
+ protected SnowballProgram()
{
m_current = new char[8];
SetCurrent("");
@@ -59,10 +59,10 @@ namespace Lucene.Net.Tartarus.Snowball
public abstract bool Stem();
- /**
- * Set the current string.
- */
- public void SetCurrent(string value)
+ /// <summary>
+ /// Set the current string.
+ /// </summary>
+ public virtual void SetCurrent(string value)
{
m_current = value.ToCharArray();
m_cursor = 0;
@@ -72,20 +72,20 @@ namespace Lucene.Net.Tartarus.Snowball
m_ket = m_limit;
}
- /**
- * Get the current string.
- */
- public string Current
+ /// <summary>
+ /// Get the current string.
+ /// </summary>
+ public virtual string Current
{
get { return new string(m_current, 0, m_limit); }
}
- /**
- * Set the current string.
- * @param text character array containing input
- * @param length valid length of text.
- */
- public void SetCurrent(char[] text, int length)
+ /// <summary>
+ /// Set the current string.
+ /// </summary>
+ /// <param name="text">character array containing input</param>
+ /// <param name="length">valid length of text.</param>
+ public virtual void SetCurrent(char[] text, int length)
{
m_current = text;
m_cursor = 0;
@@ -95,33 +95,29 @@ namespace Lucene.Net.Tartarus.Snowball
m_ket = m_limit;
}
- /**
- * Get the current buffer containing the stem.
- * <p>
- * NOTE: this may be a reference to a different character array than the
- * one originally provided with setCurrent, in the exceptional case that
- * stemming produced a longer intermediate or result string.
- * </p>
- * <p>
- * It is necessary to use {@link #getCurrentBufferLength()} to determine
- * the valid length of the returned buffer. For example, many words are
- * stemmed simply by subtracting from the length to remove suffixes.
- * </p>
- * @see #getCurrentBufferLength()
- */
+ /// <summary>
+ /// Get the current buffer containing the stem.
+ /// <para/>
+ /// NOTE: this may be a reference to a different character array than the
+ /// one originally provided with setCurrent, in the exceptional case that
+ /// stemming produced a longer intermediate or result string.
+ /// <para/>
+ /// It is necessary to use <see cref="CurrentBufferLength"/> to determine
+ /// the valid length of the returned buffer. For example, many words are
+ /// stemmed simply by subtracting from the length to remove suffixes.
+ /// </summary>
+ /// <seealso cref="CurrentBufferLength"/>
[WritableArray]
[SuppressMessage("Microsoft.Performance", "CA1819", Justification = "Lucene's design requires some writable array properties")]
- public char[] CurrentBuffer
+ public virtual char[] CurrentBuffer
{
get { return m_current; }
}
- /**
- * Get the valid length of the character array in
- * {@link #getCurrentBuffer()}.
- * @return valid length of the array.
- */
- public int CurrentBufferLength
+ /// <summary>
+ /// Get the valid length of the character array in <seealso cref="CurrentBuffer"/>
+ /// </summary>
+ public virtual int CurrentBufferLength
{
get { return m_limit; }
}
@@ -135,7 +131,7 @@ namespace Lucene.Net.Tartarus.Snowball
protected int m_bra;
protected int m_ket;
- protected void copy_from(SnowballProgram other)
+ protected virtual void CopyFrom(SnowballProgram other)
{
m_current = other.m_current;
m_cursor = other.m_cursor;
@@ -145,7 +141,7 @@ namespace Lucene.Net.Tartarus.Snowball
m_ket = other.m_ket;
}
- protected bool in_grouping(char[] s, int min, int max)
+ protected virtual bool InGrouping(char[] s, int min, int max)
{
if (m_cursor >= m_limit) return false;
char ch = m_current[m_cursor];
@@ -156,7 +152,7 @@ namespace Lucene.Net.Tartarus.Snowball
return true;
}
- protected bool in_grouping_b(char[] s, int min, int max)
+ protected virtual bool InGroupingB(char[] s, int min, int max)
{
if (m_cursor <= m_limit_backward) return false;
char ch = m_current[m_cursor - 1];
@@ -167,7 +163,7 @@ namespace Lucene.Net.Tartarus.Snowball
return true;
}
- protected bool out_grouping(char[] s, int min, int max)
+ protected virtual bool OutGrouping(char[] s, int min, int max)
{
if (m_cursor >= m_limit) return false;
char ch = m_current[m_cursor];
@@ -185,7 +181,7 @@ namespace Lucene.Net.Tartarus.Snowball
return false;
}
- protected bool out_grouping_b(char[] s, int min, int max)
+ protected virtual bool OutGroupingB(char[] s, int min, int max)
{
if (m_cursor <= m_limit_backward) return false;
char ch = m_current[m_cursor - 1];
@@ -203,7 +199,7 @@ namespace Lucene.Net.Tartarus.Snowball
return false;
}
- protected bool in_range(int min, int max)
+ protected virtual bool InRange(int min, int max)
{
if (m_cursor >= m_limit) return false;
char ch = m_current[m_cursor];
@@ -212,7 +208,7 @@ namespace Lucene.Net.Tartarus.Snowball
return true;
}
- protected bool in_range_b(int min, int max)
+ protected virtual bool InRangeB(int min, int max)
{
if (m_cursor <= m_limit_backward) return false;
char ch = m_current[m_cursor - 1];
@@ -221,7 +217,7 @@ namespace Lucene.Net.Tartarus.Snowball
return true;
}
- protected bool out_range(int min, int max)
+ protected virtual bool OutRange(int min, int max)
{
if (m_cursor >= m_limit) return false;
char ch = m_current[m_cursor];
@@ -230,7 +226,7 @@ namespace Lucene.Net.Tartarus.Snowball
return true;
}
- protected bool out_range_b(int min, int max)
+ protected virtual bool OutRangeB(int min, int max)
{
if (m_cursor <= m_limit_backward) return false;
char ch = m_current[m_cursor - 1];
@@ -239,7 +235,7 @@ namespace Lucene.Net.Tartarus.Snowball
return true;
}
- protected bool eq_s(int s_size, string s)
+ protected virtual bool Eq_S(int s_size, string s)
{
if (m_limit - m_cursor < s_size) return false;
int i;
@@ -251,7 +247,7 @@ namespace Lucene.Net.Tartarus.Snowball
return true;
}
- protected bool eq_s_b(int s_size, string s)
+ protected virtual bool Eq_S_B(int s_size, string s)
{
if (m_cursor - m_limit_backward < s_size) return false;
int i;
@@ -263,17 +259,17 @@ namespace Lucene.Net.Tartarus.Snowball
return true;
}
- protected bool eq_v(string s)
+ protected virtual bool Eq_V(string s)
{
- return eq_s(s.Length, s);
+ return Eq_S(s.Length, s);
}
- protected bool eq_v_b(string s)
+ protected virtual bool Eq_V_B(string s)
{
- return eq_s_b(s.Length, s);
+ return Eq_S_B(s.Length, s);
}
- protected int find_among(Among[] v, int v_size)
+ protected virtual int FindAmong(Among[] v, int v_size)
{
int i = 0;
int j = v_size;
@@ -359,7 +355,7 @@ namespace Lucene.Net.Tartarus.Snowball
}
// find_among_b is for backwards processing. Same comments apply
- protected int find_among_b(Among[] v, int v_size)
+ protected virtual int FindAmongB(Among[] v, int v_size)
{
int i = 0;
int j = v_size;
@@ -440,10 +436,11 @@ namespace Lucene.Net.Tartarus.Snowball
}
}
- /* to replace chars between c_bra and c_ket in current by the
- * chars in s.
- */
- protected int replace_s(int c_bra, int c_ket, string s)
+ /// <summary>
+ /// to replace chars between <paramref name="c_bra"/> and <paramref name="c_ket"/> in current by the
+ /// chars in <paramref name="s"/>.
+ /// </summary>
+ protected virtual int ReplaceS(int c_bra, int c_ket, string s)
{
int adjustment = s.Length - (c_ket - c_bra);
int newLength = m_limit + adjustment;
@@ -473,7 +470,7 @@ namespace Lucene.Net.Tartarus.Snowball
return adjustment;
}
- protected void slice_check()
+ protected virtual void SliceCheck()
{
if (m_bra < 0 ||
m_bra > m_ket ||
@@ -489,35 +486,37 @@ namespace Lucene.Net.Tartarus.Snowball
}
}
- protected void slice_from(string s)
+ protected virtual void SliceFrom(string s)
{
- slice_check();
- replace_s(m_bra, m_ket, s);
+ SliceCheck();
+ ReplaceS(m_bra, m_ket, s);
}
- protected void slice_del()
+ protected virtual void SliceDel()
{
- slice_from("");
+ SliceFrom("");
}
- protected void insert(int c_bra, int c_ket, string s)
+ protected virtual void Insert(int c_bra, int c_ket, string s)
{
- int adjustment = replace_s(c_bra, c_ket, s);
+ int adjustment = ReplaceS(c_bra, c_ket, s);
if (c_bra <= m_bra) m_bra += adjustment;
if (c_bra <= m_ket) m_ket += adjustment;
}
- /* Copy the slice into the supplied StringBuffer */
- protected StringBuilder slice_to(StringBuilder s)
+ /// <summary>
+ /// Copy the slice into the supplied <see cref="StringBuilder"/>
+ /// </summary>
+ protected virtual StringBuilder SliceTo(StringBuilder s)
{
- slice_check();
+ SliceCheck();
int len = m_ket - m_bra;
s.Length = 0;
s.Append(m_current, m_bra, len);
return s;
}
- protected StringBuilder assign_to(StringBuilder s)
+ protected virtual StringBuilder AssignTo(StringBuilder s)
{
s.Length = 0;
s.Append(m_current, 0, m_limit);
[39/39] lucenenet git commit: Lucene.Net.Analysis.Ngram - renamed
NGram in Git
Posted by ni...@apache.org.
Lucene.Net.Analysis.Ngram - renamed NGram in Git
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/ab81d913
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/ab81d913
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/ab81d913
Branch: refs/heads/api-work
Commit: ab81d91313149500e6c88b4ceabd6ff5aa4e0d63
Parents: 3201465
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Sun Feb 5 03:17:39 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Sun Feb 5 03:29:11 2017 +0700
----------------------------------------------------------------------
.../Analysis/NGram/EdgeNGramFilterFactory.cs | 60 +++
.../Analysis/NGram/EdgeNGramTokenFilter.cs | 245 ++++++++++++
.../Analysis/NGram/EdgeNGramTokenizer.cs | 72 ++++
.../Analysis/NGram/EdgeNGramTokenizerFactory.cs | 75 ++++
.../NGram/Lucene43EdgeNGramTokenizer.cs | 297 ++++++++++++++
.../Analysis/NGram/Lucene43NGramTokenizer.cs | 173 ++++++++
.../Analysis/NGram/NGramFilterFactory.cs | 56 +++
.../Analysis/NGram/NGramTokenFilter.cs | 252 ++++++++++++
.../Analysis/NGram/NGramTokenizer.cs | 319 +++++++++++++++
.../Analysis/NGram/NGramTokenizerFactory.cs | 70 ++++
.../Analysis/Ngram/EdgeNGramFilterFactory.cs | 60 ---
.../Analysis/Ngram/EdgeNGramTokenFilter.cs | 245 ------------
.../Analysis/Ngram/EdgeNGramTokenizer.cs | 72 ----
.../Analysis/Ngram/EdgeNGramTokenizerFactory.cs | 75 ----
.../Ngram/Lucene43EdgeNGramTokenizer.cs | 297 --------------
.../Analysis/Ngram/Lucene43NGramTokenizer.cs | 173 --------
.../Analysis/Ngram/NGramFilterFactory.cs | 56 ---
.../Analysis/Ngram/NGramTokenFilter.cs | 252 ------------
.../Analysis/Ngram/NGramTokenizer.cs | 319 ---------------
.../Analysis/Ngram/NGramTokenizerFactory.cs | 70 ----
.../Analysis/NGram/EdgeNGramTokenFilterTest.cs | 390 +++++++++++++++++++
.../Analysis/NGram/EdgeNGramTokenizerTest.cs | 278 +++++++++++++
.../Analysis/NGram/NGramTokenFilterTest.cs | 249 ++++++++++++
.../Analysis/NGram/NGramTokenizerTest.cs | 303 ++++++++++++++
.../Analysis/NGram/TestNGramFilters.cs | 196 ++++++++++
.../Analysis/Ngram/EdgeNGramTokenFilterTest.cs | 390 -------------------
.../Analysis/Ngram/EdgeNGramTokenizerTest.cs | 278 -------------
.../Analysis/Ngram/NGramTokenFilterTest.cs | 249 ------------
.../Analysis/Ngram/NGramTokenizerTest.cs | 303 --------------
.../Analysis/Ngram/TestNGramFilters.cs | 196 ----------
30 files changed, 3035 insertions(+), 3035 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/ab81d913/src/Lucene.Net.Analysis.Common/Analysis/NGram/EdgeNGramFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/NGram/EdgeNGramFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/NGram/EdgeNGramFilterFactory.cs
new file mode 100644
index 0000000..70b44d3
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/NGram/EdgeNGramFilterFactory.cs
@@ -0,0 +1,60 @@
+\ufeffusing Lucene.Net.Analysis.Util;
+using System.Collections.Generic;
+
+namespace Lucene.Net.Analysis.NGram
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// Creates new instances of <see cref="EdgeNGramTokenFilter"/>.
+ /// <code>
+ /// <fieldType name="text_edgngrm" class="solr.TextField" positionIncrementGap="100">
+ /// <analyzer>
+ /// <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ /// <filter class="solr.EdgeNGramFilterFactory" minGramSize="1" maxGramSize="1"/>
+ /// </analyzer>
+ /// </fieldType></code>
+ /// </summary>
+ public class EdgeNGramFilterFactory : TokenFilterFactory
+ {
+ private readonly int maxGramSize;
+ private readonly int minGramSize;
+ private readonly string side;
+
+ /// <summary>
+ /// Creates a new <see cref="EdgeNGramFilterFactory"/> </summary>
+ public EdgeNGramFilterFactory(IDictionary<string, string> args)
+ : base(args)
+ {
+ minGramSize = GetInt(args, "minGramSize", EdgeNGramTokenFilter.DEFAULT_MIN_GRAM_SIZE);
+ maxGramSize = GetInt(args, "maxGramSize", EdgeNGramTokenFilter.DEFAULT_MAX_GRAM_SIZE);
+ side = Get(args, "side", EdgeNGramTokenFilter.Side.FRONT.ToString());
+ if (args.Count > 0)
+ {
+ throw new System.ArgumentException("Unknown parameters: " + args);
+ }
+ }
+
+ public override TokenStream Create(TokenStream input)
+ {
+#pragma warning disable 612, 618
+ return new EdgeNGramTokenFilter(m_luceneMatchVersion, input, side, minGramSize, maxGramSize);
+#pragma warning restore 612, 618
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/ab81d913/src/Lucene.Net.Analysis.Common/Analysis/NGram/EdgeNGramTokenFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/NGram/EdgeNGramTokenFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/NGram/EdgeNGramTokenFilter.cs
new file mode 100644
index 0000000..8cf8172
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/NGram/EdgeNGramTokenFilter.cs
@@ -0,0 +1,245 @@
+\ufeffusing Lucene.Net.Analysis.TokenAttributes;
+using Lucene.Net.Analysis.Util;
+using Lucene.Net.Util;
+using System;
+
+namespace Lucene.Net.Analysis.NGram
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// Tokenizes the given token into n-grams of given size(s).
+ /// <para>
+ /// This <see cref="TokenFilter"/> create n-grams from the beginning edge or ending edge of a input token.
+ /// </para>
+ /// <para>As of Lucene 4.4, this filter does not support
+ /// <see cref="Side.BACK"/> (you can use <see cref="Reverse.ReverseStringFilter"/> up-front and
+ /// afterward to get the same behavior), handles supplementary characters
+ /// correctly and does not update offsets anymore.
+ /// </para>
+ /// </summary>
+ public sealed class EdgeNGramTokenFilter : TokenFilter
+ {
+ public const Side DEFAULT_SIDE = Side.FRONT;
+ public const int DEFAULT_MAX_GRAM_SIZE = 1;
+ public const int DEFAULT_MIN_GRAM_SIZE = 1;
+
+ /// <summary>
+ /// Specifies which side of the input the n-gram should be generated from </summary>
+ public enum Side
+ {
+ /// <summary>
+ /// Get the n-gram from the front of the input </summary>
+ FRONT,
+
+ /// <summary>
+ /// Get the n-gram from the end of the input </summary>
+ [System.Obsolete]
+ BACK,
+ }
+
+ /// <summary>
+ /// Get the appropriate <see cref="Side"/> from a string
+ /// </summary>
+ public static Side GetSide(string sideName)
+ {
+ Side result;
+ if (!Enum.TryParse(sideName, true, out result))
+ {
+ result = Side.FRONT;
+ }
+ return result;
+ }
+
+ private readonly LuceneVersion version;
+ private readonly CharacterUtils charUtils;
+ private readonly int minGram;
+ private readonly int maxGram;
+ private Side side;
+ private char[] curTermBuffer;
+ private int curTermLength;
+ private int curCodePointCount;
+ private int curGramSize;
+ private int tokStart;
+ private int tokEnd; // only used if the length changed before this filter
+ private bool updateOffsets; // never if the length changed before this filter
+ private int savePosIncr;
+ private int savePosLen;
+
+ private readonly ICharTermAttribute termAtt;
+ private readonly IOffsetAttribute offsetAtt;
+ private readonly IPositionIncrementAttribute posIncrAtt;
+ private readonly IPositionLengthAttribute posLenAtt;
+
+ /// <summary>
+ /// Creates <see cref="EdgeNGramTokenFilter"/> that can generate n-grams in the sizes of the given range
+ /// </summary>
+ /// <param name="version"> the Lucene match version - See <see cref="LuceneVersion"/> </param>
+ /// <param name="input"> <see cref="TokenStream"/> holding the input to be tokenized </param>
+ /// <param name="side"> the <see cref="Side"/> from which to chop off an n-gram </param>
+ /// <param name="minGram"> the smallest n-gram to generate </param>
+ /// <param name="maxGram"> the largest n-gram to generate </param>
+ [Obsolete]
+ public EdgeNGramTokenFilter(LuceneVersion version, TokenStream input, Side side, int minGram, int maxGram)
+ : base(input)
+ {
+
+ //if (version == null)
+ //{
+ // throw new System.ArgumentException("version must not be null");
+ //}
+
+ if (version.OnOrAfter(LuceneVersion.LUCENE_44) && side == Side.BACK)
+ {
+ throw new System.ArgumentException("Side.BACK is not supported anymore as of Lucene 4.4, use ReverseStringFilter up-front and afterward");
+ }
+
+ if (!Enum.IsDefined(typeof(Side), side))
+ {
+ throw new System.ArgumentException("sideLabel must be either front or back");
+ }
+
+ if (minGram < 1)
+ {
+ throw new System.ArgumentException("minGram must be greater than zero");
+ }
+
+ if (minGram > maxGram)
+ {
+ throw new System.ArgumentException("minGram must not be greater than maxGram");
+ }
+
+ this.version = version;
+ this.charUtils = version.OnOrAfter(LuceneVersion.LUCENE_44) ? CharacterUtils.GetInstance(version) : CharacterUtils.Java4Instance;
+ this.minGram = minGram;
+ this.maxGram = maxGram;
+ this.side = side;
+
+ this.termAtt = AddAttribute<ICharTermAttribute>();
+ this.offsetAtt = AddAttribute<IOffsetAttribute>();
+ this.posIncrAtt = AddAttribute<IPositionIncrementAttribute>();
+ this.posLenAtt = AddAttribute<IPositionLengthAttribute>();
+ }
+
+ /// <summary>
+ /// Creates <see cref="EdgeNGramTokenFilter"/> that can generate n-grams in the sizes of the given range
+ /// </summary>
+ /// <param name="version"> the Lucene match version - See <see cref="LuceneVersion"/> </param>
+ /// <param name="input"> <see cref="TokenStream"/> holding the input to be tokenized </param>
+ /// <param name="sideLabel"> the name of the <see cref="Side"/> from which to chop off an n-gram </param>
+ /// <param name="minGram"> the smallest n-gram to generate </param>
+ /// <param name="maxGram"> the largest n-gram to generate </param>
+ [Obsolete]
+ public EdgeNGramTokenFilter(LuceneVersion version, TokenStream input, string sideLabel, int minGram, int maxGram)
+ : this(version, input, GetSide(sideLabel), minGram, maxGram)
+ {
+ }
+
+ /// <summary>
+ /// Creates <see cref="EdgeNGramTokenFilter"/> that can generate n-grams in the sizes of the given range
+ /// </summary>
+ /// <param name="version"> the Lucene match version - See <see cref="LuceneVersion"/> </param>
+ /// <param name="input"> <see cref="TokenStream"/> holding the input to be tokenized </param>
+ /// <param name="minGram"> the smallest n-gram to generate </param>
+ /// <param name="maxGram"> the largest n-gram to generate </param>
+ public EdgeNGramTokenFilter(LuceneVersion version, TokenStream input, int minGram, int maxGram)
+#pragma warning disable 612, 618
+ : this(version, input, Side.FRONT, minGram, maxGram)
+#pragma warning restore 612, 618
+ {
+ }
+
+ public override sealed bool IncrementToken()
+ {
+ while (true)
+ {
+ if (curTermBuffer == null)
+ {
+ if (!m_input.IncrementToken())
+ {
+ return false;
+ }
+ else
+ {
+ curTermBuffer = (char[])termAtt.Buffer.Clone();
+ curTermLength = termAtt.Length;
+ curCodePointCount = charUtils.CodePointCount(termAtt.ToString());
+ curGramSize = minGram;
+ tokStart = offsetAtt.StartOffset;
+ tokEnd = offsetAtt.EndOffset;
+#pragma warning disable 612, 618
+ if (version.OnOrAfter(LuceneVersion.LUCENE_44))
+#pragma warning restore 612, 618
+ {
+ // Never update offsets
+ updateOffsets = false;
+ }
+ else
+ {
+ // if length by start + end offsets doesn't match the term text then assume
+ // this is a synonym and don't adjust the offsets.
+ updateOffsets = (tokStart + curTermLength) == tokEnd;
+ }
+ savePosIncr += posIncrAtt.PositionIncrement;
+ savePosLen = posLenAtt.PositionLength;
+ }
+ }
+ if (curGramSize <= maxGram) // if we have hit the end of our n-gram size range, quit
+ {
+ if (curGramSize <= curCodePointCount) // if the remaining input is too short, we can't generate any n-grams
+ {
+ // grab gramSize chars from front or back
+ int start = side == Side.FRONT ? 0 : charUtils.OffsetByCodePoints(curTermBuffer, 0, curTermLength, curTermLength, -curGramSize);
+ int end = charUtils.OffsetByCodePoints(curTermBuffer, 0, curTermLength, start, curGramSize);
+ ClearAttributes();
+ if (updateOffsets)
+ {
+ offsetAtt.SetOffset(tokStart + start, tokStart + end);
+ }
+ else
+ {
+ offsetAtt.SetOffset(tokStart, tokEnd);
+ }
+ // first ngram gets increment, others don't
+ if (curGramSize == minGram)
+ {
+ posIncrAtt.PositionIncrement = savePosIncr;
+ savePosIncr = 0;
+ }
+ else
+ {
+ posIncrAtt.PositionIncrement = 0;
+ }
+ posLenAtt.PositionLength = savePosLen;
+ termAtt.CopyBuffer(curTermBuffer, start, end - start);
+ curGramSize++;
+ return true;
+ }
+ }
+ curTermBuffer = null;
+ }
+ }
+
+ public override void Reset()
+ {
+ base.Reset();
+ curTermBuffer = null;
+ savePosIncr = 0;
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/ab81d913/src/Lucene.Net.Analysis.Common/Analysis/NGram/EdgeNGramTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/NGram/EdgeNGramTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/NGram/EdgeNGramTokenizer.cs
new file mode 100644
index 0000000..ed2cb3d
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/NGram/EdgeNGramTokenizer.cs
@@ -0,0 +1,72 @@
+\ufeffusing Lucene.Net.Util;
+using System.IO;
+
+namespace Lucene.Net.Analysis.NGram
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// Tokenizes the input from an edge into n-grams of given size(s).
+ /// <para>
+ /// This <see cref="Tokenizer"/> create n-grams from the beginning edge or ending edge of a input token.
+ /// </para>
+ /// <para>As of Lucene 4.4, this tokenizer
+ /// <list type="bullet">
+ /// <item>can handle <code>maxGram</code> larger than 1024 chars, but beware that this will result in increased memory usage</item>
+ /// <item>doesn't trim the input,</item>
+ /// <item>sets position increments equal to 1 instead of 1 for the first token and 0 for all other ones</item>
+ /// <item>doesn't support backward n-grams anymore.</item>
+ /// <item>supports <see cref="Util.CharTokenizer.IsTokenChar(int)"/> pre-tokenization,</item>
+ /// <item>correctly handles supplementary characters.</item>
+ /// </list>
+ /// </para>
+ /// <para>Although <b style="color:red">highly</b> discouraged, it is still possible
+ /// to use the old behavior through <see cref="Lucene43EdgeNGramTokenizer"/>.
+ /// </para>
+ /// </summary>
+ public class EdgeNGramTokenizer : NGramTokenizer
+ {
+ public const int DEFAULT_MAX_GRAM_SIZE = 1;
+ public const int DEFAULT_MIN_GRAM_SIZE = 1;
+
+ /// <summary>
+ /// Creates <see cref="EdgeNGramTokenizer"/> that can generate n-grams in the sizes of the given range
+ /// </summary>
+ /// <param name="version"> the Lucene match version - See <see cref="LuceneVersion"/> </param>
+ /// <param name="input"> <see cref="TextReader"/> holding the input to be tokenized </param>
+ /// <param name="minGram"> the smallest n-gram to generate </param>
+ /// <param name="maxGram"> the largest n-gram to generate </param>
+ public EdgeNGramTokenizer(LuceneVersion version, TextReader input, int minGram, int maxGram)
+ : base(version, input, minGram, maxGram, true)
+ {
+ }
+
+ /// <summary>
+ /// Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given range
+ /// </summary>
+ /// <param name="version"> the Lucene match version - See <see cref="LuceneVersion"/> </param>
+ /// <param name="factory"> <see cref="AttributeSource.AttributeFactory"/> to use </param>
+ /// <param name="input"> <see cref="TextReader"/> holding the input to be tokenized </param>
+ /// <param name="minGram"> the smallest n-gram to generate </param>
+ /// <param name="maxGram"> the largest n-gram to generate </param>
+ public EdgeNGramTokenizer(LuceneVersion version, AttributeSource.AttributeFactory factory, TextReader input, int minGram, int maxGram)
+ : base(version, factory, input, minGram, maxGram, true)
+ {
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/ab81d913/src/Lucene.Net.Analysis.Common/Analysis/NGram/EdgeNGramTokenizerFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/NGram/EdgeNGramTokenizerFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/NGram/EdgeNGramTokenizerFactory.cs
new file mode 100644
index 0000000..00325f5
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/NGram/EdgeNGramTokenizerFactory.cs
@@ -0,0 +1,75 @@
+\ufeffusing Lucene.Net.Analysis.Util;
+using Lucene.Net.Util;
+using System;
+using System.Collections.Generic;
+using System.IO;
+
+namespace Lucene.Net.Analysis.NGram
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// Creates new instances of <see cref="EdgeNGramTokenizer"/>.
+ /// <code>
+ /// <fieldType name="text_edgngrm" class="solr.TextField" positionIncrementGap="100">
+ /// <analyzer>
+ /// <tokenizer class="solr.EdgeNGramTokenizerFactory" minGramSize="1" maxGramSize="1"/>
+ /// </analyzer>
+ /// </fieldType></code>
+ /// </summary>
+ public class EdgeNGramTokenizerFactory : TokenizerFactory
+ {
+ private readonly int maxGramSize;
+ private readonly int minGramSize;
+ private readonly string side;
+
+ /// <summary>
+ /// Creates a new <see cref="EdgeNGramTokenizerFactory"/> </summary>
+ public EdgeNGramTokenizerFactory(IDictionary<string, string> args) : base(args)
+ {
+ minGramSize = GetInt(args, "minGramSize", EdgeNGramTokenizer.DEFAULT_MIN_GRAM_SIZE);
+ maxGramSize = GetInt(args, "maxGramSize", EdgeNGramTokenizer.DEFAULT_MAX_GRAM_SIZE);
+ side = Get(args, "side", EdgeNGramTokenFilter.Side.FRONT.ToString());
+ if (args.Count > 0)
+ {
+ throw new System.ArgumentException("Unknown parameters: " + args);
+ }
+ }
+
+ public override Tokenizer Create(AttributeSource.AttributeFactory factory, TextReader input)
+ {
+#pragma warning disable 612, 618
+ if (m_luceneMatchVersion.OnOrAfter(LuceneVersion.LUCENE_44))
+#pragma warning restore 612, 618
+ {
+ EdgeNGramTokenFilter.Side sideEnum;
+ if (!Enum.TryParse(this.side, true, out sideEnum))
+ {
+ throw new System.ArgumentException(typeof(EdgeNGramTokenizer).Name + " does not support backward n-grams as of Lucene 4.4");
+ }
+ return new EdgeNGramTokenizer(m_luceneMatchVersion, input, minGramSize, maxGramSize);
+ }
+ else
+ {
+#pragma warning disable 612, 618
+ return new Lucene43EdgeNGramTokenizer(m_luceneMatchVersion, input, side, minGramSize, maxGramSize);
+#pragma warning restore 612, 618
+ }
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/ab81d913/src/Lucene.Net.Analysis.Common/Analysis/NGram/Lucene43EdgeNGramTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/NGram/Lucene43EdgeNGramTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/NGram/Lucene43EdgeNGramTokenizer.cs
new file mode 100644
index 0000000..4dadbed
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/NGram/Lucene43EdgeNGramTokenizer.cs
@@ -0,0 +1,297 @@
+\ufeffusing Lucene.Net.Analysis.TokenAttributes;
+using Lucene.Net.Util;
+using System;
+using System.IO;
+
+namespace Lucene.Net.Analysis.NGram
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// Old version of <see cref="EdgeNGramTokenizer"/> which doesn't handle correctly
+ /// supplementary characters.
+ /// </summary>
+ [Obsolete]
+ public sealed class Lucene43EdgeNGramTokenizer : Tokenizer
+ {
+ public const Side DEFAULT_SIDE = Side.FRONT;
+ public const int DEFAULT_MAX_GRAM_SIZE = 1;
+ public const int DEFAULT_MIN_GRAM_SIZE = 1;
+
+ private ICharTermAttribute termAtt;
+ private IOffsetAttribute offsetAtt;
+ private IPositionIncrementAttribute posIncrAtt;
+
+ /// <summary>
+ /// Specifies which side of the input the n-gram should be generated from </summary>
+ public enum Side
+ {
+ /// <summary>
+ /// Get the n-gram from the front of the input </summary>
+ FRONT,
+
+ /// <summary>
+ /// Get the n-gram from the end of the input </summary>
+ BACK,
+ }
+
+ // Get the appropriate Side from a string
+ public static Side GetSide(string sideName)
+ {
+ Side result;
+ if (!Enum.TryParse(sideName, true, out result))
+ {
+ result = Side.FRONT;
+ }
+ return result;
+ }
+
+ private int minGram;
+ private int maxGram;
+ private int gramSize;
+ private Side side;
+ private bool started;
+ private int inLen; // length of the input AFTER trim()
+ private int charsRead; // length of the input
+ private string inStr;
+
+
+ /// <summary>
+ /// Creates <see cref="Lucene43EdgeNGramTokenizer"/> that can generate n-grams in the sizes of the given range
+ /// </summary>
+ /// <param name="version"> the Lucene match version - See <see cref="LuceneVersion"/> </param>
+ /// <param name="input"> <see cref="TextReader"/> holding the input to be tokenized </param>
+ /// <param name="side"> the <see cref="Side"/> from which to chop off an n-gram </param>
+ /// <param name="minGram"> the smallest n-gram to generate </param>
+ /// <param name="maxGram"> the largest n-gram to generate </param>
+ [Obsolete]
+ public Lucene43EdgeNGramTokenizer(LuceneVersion version, TextReader input, Side side, int minGram, int maxGram)
+ : base(input)
+ {
+ Init(version, side, minGram, maxGram);
+ }
+
+ /// <summary>
+ /// Creates <see cref="Lucene43EdgeNGramTokenizer"/> that can generate n-grams in the sizes of the given range
+ /// </summary>
+ /// <param name="version"> the Lucene match version - See <see cref="LuceneVersion"/> </param>
+ /// <param name="factory"> <see cref="AttributeSource.AttributeFactory"/> to use </param>
+ /// <param name="input"> <see cref="TextReader"/> holding the input to be tokenized </param>
+ /// <param name="side"> the <see cref="Side"/> from which to chop off an n-gram </param>
+ /// <param name="minGram"> the smallest n-gram to generate </param>
+ /// <param name="maxGram"> the largest n-gram to generate </param>
+ [Obsolete]
+ public Lucene43EdgeNGramTokenizer(LuceneVersion version, AttributeFactory factory, TextReader input, Side side, int minGram, int maxGram)
+ : base(factory, input)
+ {
+ Init(version, side, minGram, maxGram);
+ }
+
+ /// <summary>
+ /// Creates <see cref="Lucene43EdgeNGramTokenizer"/> that can generate n-grams in the sizes of the given range
+ /// </summary>
+ /// <param name="version"> the Lucene match version - See <see cref="LuceneVersion"/> </param>
+ /// <param name="input"> <see cref="TextReader"/> holding the input to be tokenized </param>
+ /// <param name="sideLabel"> the name of the <see cref="Side"/> from which to chop off an n-gram </param>
+ /// <param name="minGram"> the smallest n-gram to generate </param>
+ /// <param name="maxGram"> the largest n-gram to generate </param>
+ [Obsolete]
+ public Lucene43EdgeNGramTokenizer(LuceneVersion version, TextReader input, string sideLabel, int minGram, int maxGram)
+ : this(version, input, GetSide(sideLabel), minGram, maxGram)
+ {
+ }
+
+ /// <summary>
+ /// Creates <see cref="Lucene43EdgeNGramTokenizer"/> that can generate n-grams in the sizes of the given range
+ /// </summary>
+ /// <param name="version"> the Lucene match version - See <see cref="LuceneVersion"/> </param>
+ /// <param name="factory"> <see cref="AttributeSource.AttributeFactory"/> to use </param>
+ /// <param name="input"> <see cref="TextReader"/> holding the input to be tokenized </param>
+ /// <param name="sideLabel"> the name of the <see cref="Side"/> from which to chop off an n-gram </param>
+ /// <param name="minGram"> the smallest n-gram to generate </param>
+ /// <param name="maxGram"> the largest n-gram to generate </param>
+ [Obsolete]
+ public Lucene43EdgeNGramTokenizer(LuceneVersion version, AttributeFactory factory, TextReader input, string sideLabel, int minGram, int maxGram)
+ : this(version, factory, input, GetSide(sideLabel), minGram, maxGram)
+ {
+ }
+
+ /// <summary>
+ /// Creates <see cref="Lucene43EdgeNGramTokenizer"/> that can generate n-grams in the sizes of the given range
+ /// </summary>
+ /// <param name="version"> the Lucene match version - See <see cref="LuceneVersion"/> </param>
+ /// <param name="input"> <see cref="TextReader"/> holding the input to be tokenized </param>
+ /// <param name="minGram"> the smallest n-gram to generate </param>
+ /// <param name="maxGram"> the largest n-gram to generate </param>
+ public Lucene43EdgeNGramTokenizer(LuceneVersion version, TextReader input, int minGram, int maxGram)
+ : this(version, input, Side.FRONT, minGram, maxGram)
+ {
+ }
+
+ /// <summary>
+ /// Creates <see cref="Lucene43EdgeNGramTokenizer"/> that can generate n-grams in the sizes of the given range
+ /// </summary>
+ /// <param name="version"> the Lucene match version - See <see cref="LuceneVersion"/> </param>
+ /// <param name="factory"> <see cref="AttributeSource.AttributeFactory"/> to use </param>
+ /// <param name="input"> <see cref="TextReader"/> holding the input to be tokenized </param>
+ /// <param name="minGram"> the smallest n-gram to generate </param>
+ /// <param name="maxGram"> the largest n-gram to generate </param>
+ public Lucene43EdgeNGramTokenizer(LuceneVersion version, AttributeFactory factory, TextReader input, int minGram, int maxGram)
+ : this(version, factory, input, Side.FRONT, minGram, maxGram)
+ {
+ }
+
+ private void Init(LuceneVersion version, Side side, int minGram, int maxGram)
+ {
+ //if (version == null)
+ //{
+ // throw new System.ArgumentException("version must not be null");
+ //}
+
+ if (!Enum.IsDefined(typeof(Side), side))
+ {
+ throw new System.ArgumentException("sideLabel must be either front or back");
+ }
+
+ if (minGram < 1)
+ {
+ throw new System.ArgumentException("minGram must be greater than zero");
+ }
+
+ if (minGram > maxGram)
+ {
+ throw new System.ArgumentException("minGram must not be greater than maxGram");
+ }
+
+ if (version.OnOrAfter(LuceneVersion.LUCENE_44))
+ {
+ if (side == Side.BACK)
+ {
+ throw new System.ArgumentException("Side.BACK is not supported anymore as of Lucene 4.4");
+ }
+ }
+ else
+ {
+ maxGram = Math.Min(maxGram, 1024);
+ }
+
+ this.minGram = minGram;
+ this.maxGram = maxGram;
+ this.side = side;
+ this.termAtt = AddAttribute<ICharTermAttribute>();
+ this.offsetAtt = AddAttribute<IOffsetAttribute>();
+ this.posIncrAtt = AddAttribute<IPositionIncrementAttribute>();
+ }
+
+ /// <summary>
+ /// Returns the next token in the stream, or null at EOS. </summary>
+ public override bool IncrementToken()
+ {
+ ClearAttributes();
+ // if we are just starting, read the whole input
+ if (!started)
+ {
+ started = true;
+ gramSize = minGram;
+ int limit = side == Side.FRONT ? maxGram : 1024;
+ char[] chars = new char[Math.Min(1024, limit)];
+ charsRead = 0;
+ // TODO: refactor to a shared readFully somewhere:
+ bool exhausted = false;
+ while (charsRead < limit)
+ {
+ int inc = m_input.Read(chars, charsRead, chars.Length - charsRead);
+ if (inc <= 0)
+ {
+ exhausted = true;
+ break;
+ }
+ charsRead += inc;
+ if (charsRead == chars.Length && charsRead < limit)
+ {
+ chars = ArrayUtil.Grow(chars);
+ }
+ }
+
+ inStr = new string(chars, 0, charsRead);
+ inStr = inStr.Trim();
+
+ if (!exhausted)
+ {
+ // Read extra throwaway chars so that on end() we
+ // report the correct offset:
+ var throwaway = new char[1024];
+ while (true)
+ {
+ int inc = m_input.Read(throwaway, 0, throwaway.Length);
+ if (inc <= 0)
+ {
+ break;
+ }
+ charsRead += inc;
+ }
+ }
+
+ inLen = inStr.Length;
+ if (inLen == 0)
+ {
+ return false;
+ }
+ posIncrAtt.PositionIncrement = 1;
+ }
+ else
+ {
+ posIncrAtt.PositionIncrement = 0;
+ }
+
+ // if the remaining input is too short, we can't generate any n-grams
+ if (gramSize > inLen)
+ {
+ return false;
+ }
+
+ // if we have hit the end of our n-gram size range, quit
+ if (gramSize > maxGram || gramSize > inLen)
+ {
+ return false;
+ }
+
+ // grab gramSize chars from front or back
+ int start = side == Side.FRONT ? 0 : inLen - gramSize;
+ int end = start + gramSize;
+ termAtt.SetEmpty().Append(inStr, start, end);
+ offsetAtt.SetOffset(CorrectOffset(start), CorrectOffset(end));
+ gramSize++;
+ return true;
+ }
+
+ public override void End()
+ {
+ base.End();
+ // set final offset
+ int finalOffset = CorrectOffset(charsRead);
+ this.offsetAtt.SetOffset(finalOffset, finalOffset);
+ }
+
+ public override void Reset()
+ {
+ base.Reset();
+ started = false;
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/ab81d913/src/Lucene.Net.Analysis.Common/Analysis/NGram/Lucene43NGramTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/NGram/Lucene43NGramTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/NGram/Lucene43NGramTokenizer.cs
new file mode 100644
index 0000000..b806345
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/NGram/Lucene43NGramTokenizer.cs
@@ -0,0 +1,173 @@
+\ufeffusing Lucene.Net.Analysis.TokenAttributes;
+using System;
+using System.IO;
+
+namespace Lucene.Net.Analysis.NGram
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// Old broken version of <see cref="NGramTokenizer"/>.
+ /// </summary>
+ [Obsolete]
+ public sealed class Lucene43NGramTokenizer : Tokenizer
+ {
+ public const int DEFAULT_MIN_NGRAM_SIZE = 1;
+ public const int DEFAULT_MAX_NGRAM_SIZE = 2;
+
+ private int minGram, maxGram;
+ private int gramSize;
+ private int pos;
+ private int inLen; // length of the input AFTER trim()
+ private int charsRead; // length of the input
+ private string inStr;
+ private bool started;
+
+ private ICharTermAttribute termAtt;
+ private IOffsetAttribute offsetAtt;
+
+ /// <summary>
+ /// Creates <see cref="Lucene43NGramTokenizer"/> with given min and max n-grams. </summary>
+ /// <param name="input"> <see cref="TextReader"/> holding the input to be tokenized </param>
+ /// <param name="minGram"> the smallest n-gram to generate </param>
+ /// <param name="maxGram"> the largest n-gram to generate </param>
+ public Lucene43NGramTokenizer(TextReader input, int minGram, int maxGram)
+ : base(input)
+ {
+ Init(minGram, maxGram);
+ }
+
+ /// <summary>
+ /// Creates <see cref="Lucene43NGramTokenizer"/> with given min and max n-grams. </summary>
+ /// <param name="factory"> <see cref="Lucene.Net.Util.AttributeSource.AttributeFactory"/> to use </param>
+ /// <param name="input"> <see cref="TextReader"/> holding the input to be tokenized </param>
+ /// <param name="minGram"> the smallest n-gram to generate </param>
+ /// <param name="maxGram"> the largest n-gram to generate </param>
+ public Lucene43NGramTokenizer(AttributeFactory factory, TextReader input, int minGram, int maxGram)
+ : base(factory, input)
+ {
+ Init(minGram, maxGram);
+ }
+
+ /// <summary>
+ /// Creates <see cref="Lucene43NGramTokenizer"/> with default min and max n-grams. </summary>
+ /// <param name="input"> <see cref="TextReader"/> holding the input to be tokenized </param>
+ public Lucene43NGramTokenizer(TextReader input)
+ : this(input, DEFAULT_MIN_NGRAM_SIZE, DEFAULT_MAX_NGRAM_SIZE)
+ {
+ }
+
+ private void Init(int minGram, int maxGram)
+ {
+ if (minGram < 1)
+ {
+ throw new System.ArgumentException("minGram must be greater than zero");
+ }
+ if (minGram > maxGram)
+ {
+ throw new System.ArgumentException("minGram must not be greater than maxGram");
+ }
+ this.minGram = minGram;
+ this.maxGram = maxGram;
+ termAtt = AddAttribute<ICharTermAttribute>();
+ offsetAtt = AddAttribute<IOffsetAttribute>();
+ }
+
+ /// <summary>
+ /// Returns the next token in the stream, or null at EOS. </summary>
+ public override bool IncrementToken()
+ {
+ ClearAttributes();
+ if (!started)
+ {
+ started = true;
+ gramSize = minGram;
+ char[] chars = new char[1024];
+ charsRead = 0;
+ // TODO: refactor to a shared readFully somewhere:
+ while (charsRead < chars.Length)
+ {
+ int inc = m_input.Read(chars, charsRead, chars.Length - charsRead);
+ if (inc == -1)
+ {
+ break;
+ }
+ charsRead += inc;
+ }
+ inStr = (new string(chars, 0, charsRead)).Trim(); // remove any trailing empty strings
+
+ if (charsRead == chars.Length)
+ {
+ // Read extra throwaway chars so that on end() we
+ // report the correct offset:
+ var throwaway = new char[1024];
+ while (true)
+ {
+ int inc = m_input.Read(throwaway, 0, throwaway.Length);
+ if (inc == -1)
+ {
+ break;
+ }
+ charsRead += inc;
+ }
+ }
+
+ inLen = inStr.Length;
+ if (inLen == 0)
+ {
+ return false;
+ }
+ }
+
+ if (pos + gramSize > inLen) // if we hit the end of the string
+ {
+ pos = 0; // reset to beginning of string
+ gramSize++; // increase n-gram size
+ if (gramSize > maxGram) // we are done
+ {
+ return false;
+ }
+ if (pos + gramSize > inLen)
+ {
+ return false;
+ }
+ }
+
+ int oldPos = pos;
+ pos++;
+ termAtt.SetEmpty().Append(inStr, oldPos, oldPos + gramSize);
+ offsetAtt.SetOffset(CorrectOffset(oldPos), CorrectOffset(oldPos + gramSize));
+ return true;
+ }
+
+ public override void End()
+ {
+ base.End();
+ // set final offset
+ int finalOffset = CorrectOffset(charsRead);
+ this.offsetAtt.SetOffset(finalOffset, finalOffset);
+ }
+
+ public override void Reset()
+ {
+ base.Reset();
+ started = false;
+ pos = 0;
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/ab81d913/src/Lucene.Net.Analysis.Common/Analysis/NGram/NGramFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/NGram/NGramFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/NGram/NGramFilterFactory.cs
new file mode 100644
index 0000000..ca1d0bc
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/NGram/NGramFilterFactory.cs
@@ -0,0 +1,56 @@
+\ufeffusing Lucene.Net.Analysis.Util;
+using System.Collections.Generic;
+
+namespace Lucene.Net.Analysis.NGram
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// Factory for <see cref="NGramTokenFilter"/>.
+ /// <code>
+ /// <fieldType name="text_ngrm" class="solr.TextField" positionIncrementGap="100">
+ /// <analyzer>
+ /// <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ /// <filter class="solr.NGramFilterFactory" minGramSize="1" maxGramSize="2"/>
+ /// </analyzer>
+ /// </fieldType></code>
+ /// </summary>
+ public class NGramFilterFactory : TokenFilterFactory
+ {
+ private readonly int maxGramSize;
+ private readonly int minGramSize;
+
+ /// <summary>
+ /// Creates a new <see cref="NGramFilterFactory"/> </summary>
+ public NGramFilterFactory(IDictionary<string, string> args)
+ : base(args)
+ {
+ minGramSize = GetInt(args, "minGramSize", NGramTokenFilter.DEFAULT_MIN_NGRAM_SIZE);
+ maxGramSize = GetInt(args, "maxGramSize", NGramTokenFilter.DEFAULT_MAX_NGRAM_SIZE);
+ if (args.Count > 0)
+ {
+ throw new System.ArgumentException("Unknown parameters: " + args);
+ }
+ }
+
+ public override TokenStream Create(TokenStream input)
+ {
+ return new NGramTokenFilter(m_luceneMatchVersion, input, minGramSize, maxGramSize);
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/ab81d913/src/Lucene.Net.Analysis.Common/Analysis/NGram/NGramTokenFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/NGram/NGramTokenFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/NGram/NGramTokenFilter.cs
new file mode 100644
index 0000000..f1c82c5
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/NGram/NGramTokenFilter.cs
@@ -0,0 +1,252 @@
+\ufeffusing Lucene.Net.Analysis.Miscellaneous;
+using Lucene.Net.Analysis.TokenAttributes;
+using Lucene.Net.Analysis.Util;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Analysis.NGram
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// Tokenizes the input into n-grams of the given size(s).
+ /// <para>You must specify the required <see cref="LuceneVersion"/> compatibility when
+ /// creating a <see cref="NGramTokenFilter"/>. As of Lucene 4.4, this token filters:
+ /// <list type="bullet">
+ /// <item>handles supplementary characters correctly,</item>
+ /// <item>emits all n-grams for the same token at the same position,</item>
+ /// <item>does not modify offsets,</item>
+ /// <item>sorts n-grams by their offset in the original token first, then
+ /// increasing length (meaning that "abc" will give "a", "ab", "abc", "b", "bc",
+ /// "c").</item>
+ /// </list>
+ /// </para>
+ /// <para>You can make this filter use the old behavior by providing a version <
+ /// <see cref="LuceneVersion.LUCENE_44"/> in the constructor but this is not recommended as
+ /// it will lead to broken <see cref="TokenStream"/>s that will cause highlighting
+ /// bugs.
+ /// </para>
+ /// <para>If you were using this <see cref="TokenFilter"/> to perform partial highlighting,
+ /// this won't work anymore since this filter doesn't update offsets. You should
+ /// modify your analysis chain to use <see cref="NGramTokenizer"/>, and potentially
+ /// override <see cref="NGramTokenizer.IsTokenChar(int)"/> to perform pre-tokenization.
+ /// </para>
+ /// </summary>
+ public sealed class NGramTokenFilter : TokenFilter
+ {
+ public const int DEFAULT_MIN_NGRAM_SIZE = 1;
+ public const int DEFAULT_MAX_NGRAM_SIZE = 2;
+
+ private readonly int minGram, maxGram;
+
+ private char[] curTermBuffer;
+ private int curTermLength;
+ private int curCodePointCount;
+ private int curGramSize;
+ private int curPos;
+ private int curPosInc, curPosLen;
+ private int tokStart;
+ private int tokEnd;
+ private bool hasIllegalOffsets; // only if the length changed before this filter
+
+ private readonly LuceneVersion version;
+ private readonly CharacterUtils charUtils;
+ private readonly ICharTermAttribute termAtt;
+ private readonly IPositionIncrementAttribute posIncAtt;
+ private readonly IPositionLengthAttribute posLenAtt;
+ private readonly IOffsetAttribute offsetAtt;
+
+ /// <summary>
+ /// Creates <see cref="NGramTokenFilter"/> with given min and max n-grams. </summary>
+ /// <param name="version"> Lucene version to enable correct position increments.
+ /// See <see cref="NGramTokenFilter"/> for details. </param>
+ /// <param name="input"> <see cref="TokenStream"/> holding the input to be tokenized </param>
+ /// <param name="minGram"> the smallest n-gram to generate </param>
+ /// <param name="maxGram"> the largest n-gram to generate </param>
+ public NGramTokenFilter(LuceneVersion version, TokenStream input, int minGram, int maxGram)
+ : base(new CodepointCountFilter(version, input, minGram, int.MaxValue))
+ {
+ this.version = version;
+ this.charUtils = version.OnOrAfter(
+#pragma warning disable 612, 618
+ LuceneVersion.LUCENE_44) ?
+#pragma warning restore 612, 618
+ CharacterUtils.GetInstance(version) : CharacterUtils.Java4Instance;
+ if (minGram < 1)
+ {
+ throw new System.ArgumentException("minGram must be greater than zero");
+ }
+ if (minGram > maxGram)
+ {
+ throw new System.ArgumentException("minGram must not be greater than maxGram");
+ }
+ this.minGram = minGram;
+ this.maxGram = maxGram;
+#pragma warning disable 612, 618
+ if (version.OnOrAfter(LuceneVersion.LUCENE_44))
+#pragma warning restore 612, 618
+ {
+ posIncAtt = AddAttribute<IPositionIncrementAttribute>();
+ posLenAtt = AddAttribute<IPositionLengthAttribute>();
+ }
+ else
+ {
+ posIncAtt = new PositionIncrementAttributeAnonymousInnerClassHelper(this);
+ posLenAtt = new PositionLengthAttributeAnonymousInnerClassHelper(this);
+ }
+ termAtt = AddAttribute<ICharTermAttribute>();
+ offsetAtt = AddAttribute<IOffsetAttribute>();
+ }
+
+ private class PositionIncrementAttributeAnonymousInnerClassHelper : PositionIncrementAttribute
+ {
+ private readonly NGramTokenFilter outerInstance;
+
+ public PositionIncrementAttributeAnonymousInnerClassHelper(NGramTokenFilter outerInstance)
+ {
+ this.outerInstance = outerInstance;
+ }
+
+ public override int PositionIncrement
+ {
+ set
+ {
+ }
+ get
+ {
+ return 0;
+ }
+ }
+ }
+
+ private class PositionLengthAttributeAnonymousInnerClassHelper : PositionLengthAttribute
+ {
+ private readonly NGramTokenFilter outerInstance;
+
+ public PositionLengthAttributeAnonymousInnerClassHelper(NGramTokenFilter outerInstance)
+ {
+ this.outerInstance = outerInstance;
+ }
+
+ public override int PositionLength
+ {
+ set
+ {
+ }
+ get
+ {
+ return 0;
+ }
+ }
+ }
+
+ /// <summary>
+ /// Creates <see cref="NGramTokenFilter"/> with default min and max n-grams. </summary>
+ /// <param name="version"> Lucene version to enable correct position increments.
+ /// See <see cref="NGramTokenFilter"/> for details. </param>
+ /// <param name="input"> <see cref="TokenStream"/> holding the input to be tokenized </param>
+ public NGramTokenFilter(LuceneVersion version, TokenStream input)
+ : this(version, input, DEFAULT_MIN_NGRAM_SIZE, DEFAULT_MAX_NGRAM_SIZE)
+ {
+ }
+
+ /// <summary>
+ /// Returns the next token in the stream, or null at EOS.
+ /// </summary>
+ public override sealed bool IncrementToken()
+ {
+ while (true)
+ {
+ if (curTermBuffer == null)
+ {
+ if (!m_input.IncrementToken())
+ {
+ return false;
+ }
+ else
+ {
+ curTermBuffer = (char[])termAtt.Buffer.Clone();
+ curTermLength = termAtt.Length;
+ curCodePointCount = charUtils.CodePointCount(termAtt.ToString());
+ curGramSize = minGram;
+ curPos = 0;
+ curPosInc = posIncAtt.PositionIncrement;
+ curPosLen = posLenAtt.PositionLength;
+ tokStart = offsetAtt.StartOffset;
+ tokEnd = offsetAtt.EndOffset;
+ // if length by start + end offsets doesn't match the term text then assume
+ // this is a synonym and don't adjust the offsets.
+ hasIllegalOffsets = (tokStart + curTermLength) != tokEnd;
+ }
+ }
+#pragma warning disable 612, 618
+ if (version.OnOrAfter(LuceneVersion.LUCENE_44))
+#pragma warning restore 612, 618
+ {
+ if (curGramSize > maxGram || (curPos + curGramSize) > curCodePointCount)
+ {
+ ++curPos;
+ curGramSize = minGram;
+ }
+ if ((curPos + curGramSize) <= curCodePointCount)
+ {
+ ClearAttributes();
+ int start = charUtils.OffsetByCodePoints(curTermBuffer, 0, curTermLength, 0, curPos);
+ int end = charUtils.OffsetByCodePoints(curTermBuffer, 0, curTermLength, start, curGramSize);
+ termAtt.CopyBuffer(curTermBuffer, start, end - start);
+ posIncAtt.PositionIncrement = curPosInc;
+ curPosInc = 0;
+ posLenAtt.PositionLength = curPosLen;
+ offsetAtt.SetOffset(tokStart, tokEnd);
+ curGramSize++;
+ return true;
+ }
+ }
+ else
+ {
+ while (curGramSize <= maxGram)
+ {
+ while (curPos + curGramSize <= curTermLength) // while there is input
+ {
+ ClearAttributes();
+ termAtt.CopyBuffer(curTermBuffer, curPos, curGramSize);
+ if (hasIllegalOffsets)
+ {
+ offsetAtt.SetOffset(tokStart, tokEnd);
+ }
+ else
+ {
+ offsetAtt.SetOffset(tokStart + curPos, tokStart + curPos + curGramSize);
+ }
+ curPos++;
+ return true;
+ }
+ curGramSize++; // increase n-gram size
+ curPos = 0;
+ }
+ }
+ curTermBuffer = null;
+ }
+ }
+
+ public override void Reset()
+ {
+ base.Reset();
+ curTermBuffer = null;
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/ab81d913/src/Lucene.Net.Analysis.Common/Analysis/NGram/NGramTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/NGram/NGramTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/NGram/NGramTokenizer.cs
new file mode 100644
index 0000000..b1845c8
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/NGram/NGramTokenizer.cs
@@ -0,0 +1,319 @@
+\ufeffusing Lucene.Net.Analysis.TokenAttributes;
+using Lucene.Net.Analysis.Util;
+using Lucene.Net.Support;
+using Lucene.Net.Util;
+using System;
+using System.Diagnostics;
+using System.IO;
+
+namespace Lucene.Net.Analysis.NGram
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// Tokenizes the input into n-grams of the given size(s).
+ /// <para>On the contrary to <see cref="NGramTokenFilter"/>, this class sets offsets so
+ /// that characters between startOffset and endOffset in the original stream are
+ /// the same as the term chars.
+ /// </para>
+ /// <para>For example, "abcde" would be tokenized as (minGram=2, maxGram=3):
+ /// <list type="table">
+ /// <listheader>
+ /// <term>Term</term>
+ /// <term>Position increment</term>
+ /// <term>Position length</term>
+ /// <term>Offsets</term>
+ /// </listheader>
+ /// <item>
+ /// <term>ab</term>
+ /// <term>1</term>
+ /// <term>1</term>
+ /// <term>[0,2[</term>
+ /// </item>
+ /// <item>
+ /// <term>abc</term>
+ /// <term>1</term>
+ /// <term>1</term>
+ /// <term>[0,3[</term>
+ /// </item>
+ /// <item>
+ /// <term>bc</term>
+ /// <term>1</term>
+ /// <term>1</term>
+ /// <term>[1,3[</term>
+ /// </item>
+ /// <item>
+ /// <term>bcd</term>
+ /// <term>1</term>
+ /// <term>1</term>
+ /// <term>[1,4[</term>
+ /// </item>
+ /// <item>
+ /// <term>cd</term>
+ /// <term>1</term>
+ /// <term>1</term>
+ /// <term>[2,4[</term>
+ /// </item>
+ /// <item>
+ /// <term>cde</term>
+ /// <term>1</term>
+ /// <term>1</term>
+ /// <term>[2,5[</term>
+ /// </item>
+ /// <item>
+ /// <term>de</term>
+ /// <term>1</term>
+ /// <term>1</term>
+ /// <term>[3,5[</term>
+ /// </item>
+ /// </list>
+ /// </para>
+ /// <para>This tokenizer changed a lot in Lucene 4.4 in order to:
+ /// <list type="bullet">
+ /// <item>tokenize in a streaming fashion to support streams which are larger
+ /// than 1024 chars (limit of the previous version),</item>
+ /// <item>count grams based on unicode code points instead of java chars (and
+ /// never split in the middle of surrogate pairs),</item>
+ /// <item>give the ability to pre-tokenize the stream (<see cref="IsTokenChar(int)"/>)
+ /// before computing n-grams.</item>
+ /// </list>
+ /// </para>
+ /// <para>Additionally, this class doesn't trim trailing whitespaces and emits
+ /// tokens in a different order, tokens are now emitted by increasing start
+ /// offsets while they used to be emitted by increasing lengths (which prevented
+ /// from supporting large input streams).
+ /// </para>
+ /// <para>Although <b style="color:red">highly</b> discouraged, it is still possible
+ /// to use the old behavior through <see cref="Lucene43NGramTokenizer"/>.
+ /// </para>
+ /// </summary>
+ // non-sealed to allow for overriding IsTokenChar, but all other methods should be sealed
+ public class NGramTokenizer : Tokenizer
+ {
+ public const int DEFAULT_MIN_NGRAM_SIZE = 1;
+ public const int DEFAULT_MAX_NGRAM_SIZE = 2;
+
+ private CharacterUtils charUtils;
+ private CharacterUtils.CharacterBuffer charBuffer;
+ private int[] buffer; // like charBuffer, but converted to code points
+ private int bufferStart, bufferEnd; // remaining slice in buffer
+ private int offset;
+ private int gramSize;
+ private int minGram, maxGram;
+ private bool exhausted;
+ private int lastCheckedChar; // last offset in the buffer that we checked
+ private int lastNonTokenChar; // last offset that we found to not be a token char
+ private bool edgesOnly; // leading edges n-grams only
+
+ private ICharTermAttribute termAtt;
+ private IPositionIncrementAttribute posIncAtt;
+ private IPositionLengthAttribute posLenAtt;
+ private IOffsetAttribute offsetAtt;
+
+ internal NGramTokenizer(LuceneVersion version, TextReader input, int minGram, int maxGram, bool edgesOnly)
+ : base(input)
+ {
+ Init(version, minGram, maxGram, edgesOnly);
+ }
+
+ /// <summary>
+ /// Creates <see cref="NGramTokenizer"/> with given min and max n-grams. </summary>
+ /// <param name="version"> the lucene compatibility version </param>
+ /// <param name="input"> <see cref="TextReader"/> holding the input to be tokenized </param>
+ /// <param name="minGram"> the smallest n-gram to generate </param>
+ /// <param name="maxGram"> the largest n-gram to generate </param>
+ public NGramTokenizer(LuceneVersion version, TextReader input, int minGram, int maxGram)
+ : this(version, input, minGram, maxGram, false)
+ {
+ }
+
+ internal NGramTokenizer(LuceneVersion version, AttributeFactory factory, TextReader input, int minGram, int maxGram, bool edgesOnly)
+ : base(factory, input)
+ {
+ Init(version, minGram, maxGram, edgesOnly);
+ }
+
+ /// <summary>
+ /// Creates <see cref="NGramTokenizer"/> with given min and max n-grams. </summary>
+ /// <param name="version"> the lucene compatibility version </param>
+ /// <param name="factory"> <see cref="AttributeSource.AttributeFactory"/> to use </param>
+ /// <param name="input"> <see cref="TextReader"/> holding the input to be tokenized </param>
+ /// <param name="minGram"> the smallest n-gram to generate </param>
+ /// <param name="maxGram"> the largest n-gram to generate </param>
+ public NGramTokenizer(LuceneVersion version, AttributeFactory factory, TextReader input, int minGram, int maxGram)
+ : this(version, factory, input, minGram, maxGram, false)
+ {
+ }
+
+ /// <summary>
+ /// Creates <see cref="NGramTokenizer"/> with default min and max n-grams. </summary>
+ /// <param name="version"> the lucene compatibility version </param>
+ /// <param name="input"> <see cref="TextReader"/> holding the input to be tokenized </param>
+ public NGramTokenizer(LuceneVersion version, TextReader input)
+ : this(version, input, DEFAULT_MIN_NGRAM_SIZE, DEFAULT_MAX_NGRAM_SIZE)
+ {
+ }
+
+ private void Init(LuceneVersion version, int minGram, int maxGram, bool edgesOnly)
+ {
+#pragma warning disable 612, 618
+ if (!version.OnOrAfter(LuceneVersion.LUCENE_44))
+#pragma warning restore 612, 618
+ {
+ throw new System.ArgumentException("This class only works with Lucene 4.4+. To emulate the old (broken) behavior of NGramTokenizer, use Lucene43NGramTokenizer/Lucene43EdgeNGramTokenizer");
+ }
+#pragma warning disable 612, 618
+ charUtils = version.OnOrAfter(LuceneVersion.LUCENE_44) ?
+#pragma warning restore 612, 618
+ CharacterUtils.GetInstance(version) : CharacterUtils.Java4Instance;
+ if (minGram < 1)
+ {
+ throw new System.ArgumentException("minGram must be greater than zero");
+ }
+ if (minGram > maxGram)
+ {
+ throw new System.ArgumentException("minGram must not be greater than maxGram");
+ }
+ termAtt = AddAttribute<ICharTermAttribute>();
+ posIncAtt = AddAttribute<IPositionIncrementAttribute>();
+ posLenAtt = AddAttribute<IPositionLengthAttribute>();
+ offsetAtt = AddAttribute<IOffsetAttribute>();
+ this.minGram = minGram;
+ this.maxGram = maxGram;
+ this.edgesOnly = edgesOnly;
+ charBuffer = CharacterUtils.NewCharacterBuffer(2 * maxGram + 1024); // 2 * maxGram in case all code points require 2 chars and + 1024 for buffering to not keep polling the Reader
+ buffer = new int[charBuffer.Buffer.Length];
+
+ // Make the term att large enough
+ termAtt.ResizeBuffer(2 * maxGram);
+ }
+
+ public override sealed bool IncrementToken()
+ {
+ ClearAttributes();
+
+ // termination of this loop is guaranteed by the fact that every iteration
+ // either advances the buffer (calls consumes()) or increases gramSize
+ while (true)
+ {
+ // compact
+ if (bufferStart >= bufferEnd - maxGram - 1 && !exhausted)
+ {
+ Array.Copy(buffer, bufferStart, buffer, 0, bufferEnd - bufferStart);
+ bufferEnd -= bufferStart;
+ lastCheckedChar -= bufferStart;
+ lastNonTokenChar -= bufferStart;
+ bufferStart = 0;
+
+ // fill in remaining space
+ exhausted = !charUtils.Fill(charBuffer, m_input, buffer.Length - bufferEnd);
+ // convert to code points
+ bufferEnd += charUtils.ToCodePoints(charBuffer.Buffer, 0, charBuffer.Length, buffer, bufferEnd);
+ }
+
+ // should we go to the next offset?
+ if (gramSize > maxGram || (bufferStart + gramSize) > bufferEnd)
+ {
+ if (bufferStart + 1 + minGram > bufferEnd)
+ {
+ Debug.Assert(exhausted);
+ return false;
+ }
+ Consume();
+ gramSize = minGram;
+ }
+
+ UpdateLastNonTokenChar();
+
+ // retry if the token to be emitted was going to not only contain token chars
+ bool termContainsNonTokenChar = lastNonTokenChar >= bufferStart && lastNonTokenChar < (bufferStart + gramSize);
+ bool isEdgeAndPreviousCharIsTokenChar = edgesOnly && lastNonTokenChar != bufferStart - 1;
+ if (termContainsNonTokenChar || isEdgeAndPreviousCharIsTokenChar)
+ {
+ Consume();
+ gramSize = minGram;
+ continue;
+ }
+
+ int length = charUtils.ToChars(buffer, bufferStart, gramSize, termAtt.Buffer, 0);
+ termAtt.Length = length;
+ posIncAtt.PositionIncrement = 1;
+ posLenAtt.PositionLength = 1;
+ offsetAtt.SetOffset(CorrectOffset(offset), CorrectOffset(offset + length));
+ ++gramSize;
+ return true;
+ }
+ }
+
+ private void UpdateLastNonTokenChar()
+ {
+ int termEnd = bufferStart + gramSize - 1;
+ if (termEnd > lastCheckedChar)
+ {
+ for (int i = termEnd; i > lastCheckedChar; --i)
+ {
+ if (!IsTokenChar(buffer[i]))
+ {
+ lastNonTokenChar = i;
+ break;
+ }
+ }
+ lastCheckedChar = termEnd;
+ }
+ }
+
+ /// <summary>
+ /// Consume one code point. </summary>
+ private void Consume()
+ {
+ offset += Character.CharCount(buffer[bufferStart++]);
+ }
+
+ /// <summary>
+ /// Only collect characters which satisfy this condition. </summary>
+ protected virtual bool IsTokenChar(int chr)
+ {
+ return true;
+ }
+
+ public override sealed void End()
+ {
+ base.End();
+ Debug.Assert(bufferStart <= bufferEnd);
+ int endOffset = offset;
+ for (int i = bufferStart; i < bufferEnd; ++i)
+ {
+ endOffset += Character.CharCount(buffer[i]);
+ }
+ endOffset = CorrectOffset(endOffset);
+ // set final offset
+ offsetAtt.SetOffset(endOffset, endOffset);
+ }
+
+ public override sealed void Reset()
+ {
+ base.Reset();
+ bufferStart = bufferEnd = buffer.Length;
+ lastNonTokenChar = lastCheckedChar = bufferStart - 1;
+ offset = 0;
+ gramSize = minGram;
+ exhausted = false;
+ charBuffer.Reset();
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/ab81d913/src/Lucene.Net.Analysis.Common/Analysis/NGram/NGramTokenizerFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/NGram/NGramTokenizerFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/NGram/NGramTokenizerFactory.cs
new file mode 100644
index 0000000..cf25b65
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/NGram/NGramTokenizerFactory.cs
@@ -0,0 +1,70 @@
+\ufeffusing Lucene.Net.Analysis.Util;
+using Lucene.Net.Util;
+using System.Collections.Generic;
+using System.IO;
+
+namespace Lucene.Net.Analysis.NGram
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// Factory for <see cref="NGramTokenizer"/>.
+ /// <code>
+ /// <fieldType name="text_ngrm" class="solr.TextField" positionIncrementGap="100">
+ /// <analyzer>
+ /// <tokenizer class="solr.NGramTokenizerFactory" minGramSize="1" maxGramSize="2"/>
+ /// </analyzer>
+ /// </fieldType></code>
+ /// </summary>
+ public class NGramTokenizerFactory : TokenizerFactory
+ {
+ private readonly int maxGramSize;
+ private readonly int minGramSize;
+
+ /// <summary>
+ /// Creates a new <see cref="NGramTokenizerFactory"/> </summary>
+ public NGramTokenizerFactory(IDictionary<string, string> args)
+ : base(args)
+ {
+ minGramSize = GetInt(args, "minGramSize", NGramTokenizer.DEFAULT_MIN_NGRAM_SIZE);
+ maxGramSize = GetInt(args, "maxGramSize", NGramTokenizer.DEFAULT_MAX_NGRAM_SIZE);
+ if (args.Count > 0)
+ {
+ throw new System.ArgumentException("Unknown parameters: " + args);
+ }
+ }
+
+ /// <summary>
+ /// Creates the <see cref="TokenStream"/> of n-grams from the given <see cref="TextReader"/> and <see cref="AttributeSource.AttributeFactory"/>. </summary>
+ public override Tokenizer Create(AttributeSource.AttributeFactory factory, TextReader input)
+ {
+#pragma warning disable 612, 618
+ if (m_luceneMatchVersion.OnOrAfter(LuceneVersion.LUCENE_44))
+#pragma warning restore 612, 618
+ {
+ return new NGramTokenizer(m_luceneMatchVersion, factory, input, minGramSize, maxGramSize);
+ }
+ else
+ {
+#pragma warning disable 612, 618
+ return new Lucene43NGramTokenizer(factory, input, minGramSize, maxGramSize);
+#pragma warning restore 612, 618
+ }
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/ab81d913/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramFilterFactory.cs
deleted file mode 100644
index 70b44d3..0000000
--- a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramFilterFactory.cs
+++ /dev/null
@@ -1,60 +0,0 @@
-\ufeffusing Lucene.Net.Analysis.Util;
-using System.Collections.Generic;
-
-namespace Lucene.Net.Analysis.NGram
-{
- /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
- /// <summary>
- /// Creates new instances of <see cref="EdgeNGramTokenFilter"/>.
- /// <code>
- /// <fieldType name="text_edgngrm" class="solr.TextField" positionIncrementGap="100">
- /// <analyzer>
- /// <tokenizer class="solr.WhitespaceTokenizerFactory"/>
- /// <filter class="solr.EdgeNGramFilterFactory" minGramSize="1" maxGramSize="1"/>
- /// </analyzer>
- /// </fieldType></code>
- /// </summary>
- public class EdgeNGramFilterFactory : TokenFilterFactory
- {
- private readonly int maxGramSize;
- private readonly int minGramSize;
- private readonly string side;
-
- /// <summary>
- /// Creates a new <see cref="EdgeNGramFilterFactory"/> </summary>
- public EdgeNGramFilterFactory(IDictionary<string, string> args)
- : base(args)
- {
- minGramSize = GetInt(args, "minGramSize", EdgeNGramTokenFilter.DEFAULT_MIN_GRAM_SIZE);
- maxGramSize = GetInt(args, "maxGramSize", EdgeNGramTokenFilter.DEFAULT_MAX_GRAM_SIZE);
- side = Get(args, "side", EdgeNGramTokenFilter.Side.FRONT.ToString());
- if (args.Count > 0)
- {
- throw new System.ArgumentException("Unknown parameters: " + args);
- }
- }
-
- public override TokenStream Create(TokenStream input)
- {
-#pragma warning disable 612, 618
- return new EdgeNGramTokenFilter(m_luceneMatchVersion, input, side, minGramSize, maxGramSize);
-#pragma warning restore 612, 618
- }
- }
-}
\ No newline at end of file
[33/39] lucenenet git commit:
Lucene.Net.Analysis.Util.OpenStringBuilder refactor: Reserve() >
EnsureCapacity (to match StringBuilder)
Posted by ni...@apache.org.
Lucene.Net.Analysis.Util.OpenStringBuilder refactor: Reserve() > EnsureCapacity (to match StringBuilder)
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/b6c09987
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/b6c09987
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/b6c09987
Branch: refs/heads/api-work
Commit: b6c09987e3660ebb11f0c46fd7712693c5d6449c
Parents: 230c9f9
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Sun Feb 5 02:40:10 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Sun Feb 5 03:29:08 2017 +0700
----------------------------------------------------------------------
.../Analysis/En/KStemmer.cs | 2 +-
.../Analysis/Util/OpenStringBuilder.cs | 18 +++++++++---------
2 files changed, 10 insertions(+), 10 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b6c09987/src/Lucene.Net.Analysis.Common/Analysis/En/KStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/En/KStemmer.cs b/src/Lucene.Net.Analysis.Common/Analysis/En/KStemmer.cs
index b861ec3..c446532 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/En/KStemmer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/En/KStemmer.cs
@@ -1926,7 +1926,7 @@ namespace Lucene.Net.Analysis.En
word.Reset();
// allocate enough space so that an expansion is never needed
- word.Reserve(len + 10);
+ word.EnsureCapacity(len + 10);
for (int i = 0; i < len; i++)
{
char ch = term[i];
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b6c09987/src/Lucene.Net.Analysis.Common/Analysis/Util/OpenStringBuilder.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/OpenStringBuilder.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/OpenStringBuilder.cs
index 3dfec5c..b72f116 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Util/OpenStringBuilder.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/OpenStringBuilder.cs
@@ -89,7 +89,7 @@ namespace Lucene.Net.Analysis.Util
public virtual OpenStringBuilder Append(ICharSequence csq, int start, int end)
{
- Reserve(end - start);
+ EnsureCapacity(end - start);
for (int i = start; i < end; i++)
{
UnsafeWrite(csq[i]);
@@ -106,7 +106,7 @@ namespace Lucene.Net.Analysis.Util
// LUCENENET specific - overload for string (more common in .NET than ICharSequence)
public virtual OpenStringBuilder Append(string csq, int start, int end)
{
- Reserve(end - start);
+ EnsureCapacity(end - start);
for (int i = start; i < end; i++)
{
UnsafeWrite(csq[i]);
@@ -123,7 +123,7 @@ namespace Lucene.Net.Analysis.Util
// LUCENENET specific - overload for StringBuilder
public virtual OpenStringBuilder Append(StringBuilder csq, int start, int end)
{
- Reserve(end - start);
+ EnsureCapacity(end - start);
for (int i = start; i < end; i++)
{
UnsafeWrite(csq[i]);
@@ -191,11 +191,11 @@ namespace Lucene.Net.Analysis.Util
m_buf = newbuf;
}
- public virtual void Reserve(int num)
+ public virtual void EnsureCapacity(int capacity) // LUCENENET NOTE: renamed from reserve() in Lucene to match .NET StringBuilder
{
- if (m_len + num > m_buf.Length)
+ if (m_len + capacity > m_buf.Length)
{
- Resize(m_len + num);
+ Resize(m_len + capacity);
}
}
@@ -220,7 +220,7 @@ namespace Lucene.Net.Analysis.Util
public virtual void Write(char[] b, int off, int len)
{
- Reserve(len);
+ EnsureCapacity(len);
UnsafeWrite(b, off, len);
}
@@ -232,13 +232,13 @@ namespace Lucene.Net.Analysis.Util
// LUCENENET specific overload for StringBuilder
public void Write(StringBuilder arr)
{
- Reserve(arr.Length);
+ EnsureCapacity(arr.Length);
UnsafeWrite(arr, 0, arr.Length);
}
public virtual void Write(string s)
{
- Reserve(s.Length);
+ EnsureCapacity(s.Length);
s.CopyTo(0, m_buf, m_len, s.Length - 0);
m_len += s.Length;
}
[10/39] lucenenet git commit: Lucene.Net.Analysis.Tr refactor: member
accessibility and documentation comments
Posted by ni...@apache.org.
Lucene.Net.Analysis.Tr refactor: member accessibility and documentation comments
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/7cf215b4
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/7cf215b4
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/7cf215b4
Branch: refs/heads/api-work
Commit: 7cf215b4b7b710b5d4e0e2ed001a0cccaaccfdc4
Parents: 69bd851
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Sat Feb 4 16:12:13 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Sat Feb 4 23:08:15 2017 +0700
----------------------------------------------------------------------
.../Analysis/Tr/ApostropheFilter.cs | 5 ++---
.../Analysis/Tr/ApostropheFilterFactory.cs | 6 ++++--
.../Analysis/Tr/TurkishAnalyzer.cs | 13 +++++++------
.../Analysis/Tr/TurkishLowerCaseFilter.cs | 20 +++++++-------------
.../Tr/TurkishLowerCaseFilterFactory.cs | 6 +++---
5 files changed, 23 insertions(+), 27 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/7cf215b4/src/Lucene.Net.Analysis.Common/Analysis/Tr/ApostropheFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Tr/ApostropheFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Tr/ApostropheFilter.cs
index a8343b8..6a93f68 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Tr/ApostropheFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Tr/ApostropheFilter.cs
@@ -31,16 +31,15 @@ namespace Lucene.Net.Analysis.Tr
/// </summary>
public sealed class ApostropheFilter : TokenFilter
{
-
private readonly ICharTermAttribute termAtt;
public ApostropheFilter(TokenStream @in)
- : base(@in)
+ : base(@in)
{
termAtt = AddAttribute<ICharTermAttribute>();
}
- public override bool IncrementToken()
+ public override sealed bool IncrementToken()
{
if (!m_input.IncrementToken())
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/7cf215b4/src/Lucene.Net.Analysis.Common/Analysis/Tr/ApostropheFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Tr/ApostropheFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Tr/ApostropheFilterFactory.cs
index 198c382..986f8ea 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Tr/ApostropheFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Tr/ApostropheFilterFactory.cs
@@ -33,8 +33,10 @@ namespace Lucene.Net.Analysis.Tr
/// </summary>
public class ApostropheFilterFactory : TokenFilterFactory
{
-
- public ApostropheFilterFactory(IDictionary<string, string> args) : base(args)
+ /// <summary>
+ /// Creates a new <see cref="ApostropheFilterFactory"/> </summary>
+ public ApostropheFilterFactory(IDictionary<string, string> args)
+ : base(args)
{
if (args.Count > 0)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/7cf215b4/src/Lucene.Net.Analysis.Common/Analysis/Tr/TurkishAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Tr/TurkishAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Tr/TurkishAnalyzer.cs
index 93f08c4..17b7a2a 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Tr/TurkishAnalyzer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Tr/TurkishAnalyzer.cs
@@ -57,7 +57,7 @@ namespace Lucene.Net.Analysis.Tr
}
/// <summary>
- /// Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class
+ /// Atomically loads the <see cref="DEFAULT_STOP_SET"/> in a lazy fashion once the outer class
/// accesses the static final set the first time.;
/// </summary>
private class DefaultSetHolder
@@ -84,10 +84,10 @@ namespace Lucene.Net.Analysis.Tr
}
/// <summary>
- /// Builds an analyzer with the default stop words: <see cref="#DEFAULT_STOPWORD_FILE"/>.
+ /// Builds an analyzer with the default stop words: <see cref="DEFAULT_STOPWORD_FILE"/>.
/// </summary>
public TurkishAnalyzer(LuceneVersion matchVersion)
- : this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET)
+ : this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET)
{
}
@@ -97,7 +97,7 @@ namespace Lucene.Net.Analysis.Tr
/// <param name="matchVersion"> lucene compatibility version </param>
/// <param name="stopwords"> a stopword set </param>
public TurkishAnalyzer(LuceneVersion matchVersion, CharArraySet stopwords)
- : this(matchVersion, stopwords, CharArraySet.EMPTY_SET)
+ : this(matchVersion, stopwords, CharArraySet.EMPTY_SET)
{
}
@@ -109,7 +109,8 @@ namespace Lucene.Net.Analysis.Tr
/// <param name="matchVersion"> lucene compatibility version </param>
/// <param name="stopwords"> a stopword set </param>
/// <param name="stemExclusionSet"> a set of terms not to be stemmed </param>
- public TurkishAnalyzer(LuceneVersion matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) : base(matchVersion, stopwords)
+ public TurkishAnalyzer(LuceneVersion matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet)
+ : base(matchVersion, stopwords)
{
this.stemExclusionSet = CharArraySet.UnmodifiableSet(CharArraySet.Copy(matchVersion, stemExclusionSet));
}
@@ -117,7 +118,7 @@ namespace Lucene.Net.Analysis.Tr
/// <summary>
/// Creates a
/// <see cref="Analyzer.TokenStreamComponents"/>
- /// which tokenizes all the text in the provided <see cref="Reader"/>.
+ /// which tokenizes all the text in the provided <see cref="TextReader"/>.
/// </summary>
/// <returns> A
/// <see cref="Analyzer.TokenStreamComponents"/>
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/7cf215b4/src/Lucene.Net.Analysis.Common/Analysis/Tr/TurkishLowerCaseFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Tr/TurkishLowerCaseFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Tr/TurkishLowerCaseFilter.cs
index 1a47a89..644a4b9 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Tr/TurkishLowerCaseFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Tr/TurkishLowerCaseFilter.cs
@@ -39,29 +39,23 @@ namespace Lucene.Net.Analysis.Tr
private const int LATIN_SMALL_LETTER_I = '\u0069';
private const int LATIN_SMALL_LETTER_DOTLESS_I = '\u0131';
private const int COMBINING_DOT_ABOVE = '\u0307';
-
private readonly ICharTermAttribute termAtt;
/// <summary>
- /// Create a new TurkishLowerCaseFilter, that normalizes Turkish token text
+ /// Create a new <see cref="TurkishLowerCaseFilter"/>, that normalizes Turkish token text
/// to lower case.
/// </summary>
- /// <param name="in"> TokenStream to filter </param>
+ /// <param name="in"> <see cref="TokenStream"/> to filter </param>
public TurkishLowerCaseFilter(TokenStream @in)
- : base(@in)
+ : base(@in)
{
termAtt = AddAttribute<ICharTermAttribute>();
}
- public override bool IncrementToken()
+ public override sealed bool IncrementToken()
{
bool iOrAfter = false;
- var cultureInfo = new CultureInfo("tr");
-#if NET451
- Thread.CurrentThread.CurrentUICulture = cultureInfo;
-#else
- CultureInfo.CurrentUICulture = cultureInfo;
-#endif
+
if (m_input.IncrementToken())
{
char[] buffer = termAtt.Buffer;
@@ -99,7 +93,7 @@ namespace Lucene.Net.Analysis.Tr
}
}
- using (var culture = new CultureContext("tr-TR"))
+ using (var culture = new CultureContext("tr"))
{
switch (ch)
{
@@ -162,7 +156,7 @@ namespace Lucene.Net.Analysis.Tr
/// <summary>
/// delete a character in-place.
- /// rarely happens, only if COMBINING_DOT_ABOVE is found after an i
+ /// rarely happens, only if <see cref="COMBINING_DOT_ABOVE"/> is found after an i
/// </summary>
private int Delete(char[] s, int pos, int len)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/7cf215b4/src/Lucene.Net.Analysis.Common/Analysis/Tr/TurkishLowerCaseFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Tr/TurkishLowerCaseFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Tr/TurkishLowerCaseFilterFactory.cs
index 486b2c0..25fcd6e 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Tr/TurkishLowerCaseFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Tr/TurkishLowerCaseFilterFactory.cs
@@ -32,10 +32,10 @@ namespace Lucene.Net.Analysis.Tr
/// </summary>
public class TurkishLowerCaseFilterFactory : TokenFilterFactory, IMultiTermAwareComponent
{
-
/// <summary>
- /// Creates a new TurkishLowerCaseFilterFactory </summary>
- public TurkishLowerCaseFilterFactory(IDictionary<string, string> args) : base(args)
+ /// Creates a new <see cref="TurkishLowerCaseFilterFactory"/> </summary>
+ public TurkishLowerCaseFilterFactory(IDictionary<string, string> args)
+ : base(args)
{
if (args.Count > 0)
{
[15/39] lucenenet git commit: Lucene.Net.Analysis.Wikipedia refactor:
member accessibility and documentation comments
Posted by ni...@apache.org.
Lucene.Net.Analysis.Wikipedia refactor: member accessibility and documentation comments
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/d3903863
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/d3903863
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/d3903863
Branch: refs/heads/api-work
Commit: d39038630a228b541f4082f76b3b3e047bd53bca
Parents: f934ceb
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Sat Feb 4 21:45:14 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Sat Feb 4 23:08:20 2017 +0700
----------------------------------------------------------------------
.../Analysis/Wikipedia/WikipediaTokenizer.cs | 98 ++++---
.../Wikipedia/WikipediaTokenizerFactory.cs | 53 ++--
.../Wikipedia/WikipediaTokenizerImpl.cs | 281 +++++++++----------
3 files changed, 217 insertions(+), 215 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/d3903863/src/Lucene.Net.Analysis.Common/Analysis/Wikipedia/WikipediaTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Wikipedia/WikipediaTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Wikipedia/WikipediaTokenizer.cs
index f815db4..e6f1c71 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Wikipedia/WikipediaTokenizer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Wikipedia/WikipediaTokenizer.cs
@@ -1,9 +1,8 @@
-\ufeffusing System.Collections.Generic;
-using System.Text;
-using Lucene.Net.Analysis.TokenAttributes;
+\ufeffusing Lucene.Net.Analysis.TokenAttributes;
using Lucene.Net.Util;
+using System.Collections.Generic;
using System.IO;
-using System.Linq;
+using System.Text;
namespace Lucene.Net.Analysis.Wikipedia
{
@@ -25,10 +24,9 @@ namespace Lucene.Net.Analysis.Wikipedia
*/
/// <summary>
- /// Extension of StandardTokenizer that is aware of Wikipedia syntax. It is based off of the
+ /// Extension of <see cref="Standard.StandardTokenizer"/> that is aware of Wikipedia syntax. It is based off of the
/// Wikipedia tutorial available at http://en.wikipedia.org/wiki/Wikipedia:Tutorial, but it may not be complete.
- /// <p/>
- /// <p/>
+ /// <para/>
/// @lucene.experimental
/// </summary>
public sealed class WikipediaTokenizer : Tokenizer
@@ -66,7 +64,26 @@ namespace Lucene.Net.Analysis.Wikipedia
/// <summary>
/// String token types that correspond to token type int constants </summary>
- public static readonly string[] TOKEN_TYPES = new string[] { "<ALPHANUM>", "<APOSTROPHE>", "<ACRONYM>", "<COMPANY>", "<EMAIL>", "<HOST>", "<NUM>", "<CJ>", INTERNAL_LINK, EXTERNAL_LINK, CITATION, CATEGORY, BOLD, ITALICS, BOLD_ITALICS, HEADING, SUB_HEADING, EXTERNAL_LINK_URL };
+ public static readonly string[] TOKEN_TYPES = new string[] {
+ "<ALPHANUM>",
+ "<APOSTROPHE>",
+ "<ACRONYM>",
+ "<COMPANY>",
+ "<EMAIL>",
+ "<HOST>",
+ "<NUM>",
+ "<CJ>",
+ INTERNAL_LINK,
+ EXTERNAL_LINK,
+ CITATION,
+ CATEGORY,
+ BOLD,
+ ITALICS,
+ BOLD_ITALICS,
+ HEADING,
+ SUB_HEADING,
+ EXTERNAL_LINK_URL
+ };
/// <summary>
/// Only output tokens
@@ -81,7 +98,7 @@ namespace Lucene.Net.Analysis.Wikipedia
/// </summary>
public const int BOTH = 2;
/// <summary>
- /// This flag is used to indicate that the produced "Token" would, if <see cref="#TOKENS_ONLY"/> was used, produce multiple tokens.
+ /// This flag is used to indicate that the produced "Token" would, if <see cref="TOKENS_ONLY"/> was used, produce multiple tokens.
/// </summary>
public const int UNTOKENIZED_TOKEN_FLAG = 1;
/// <summary>
@@ -103,33 +120,36 @@ namespace Lucene.Net.Analysis.Wikipedia
/// <summary>
/// Creates a new instance of the <see cref="WikipediaTokenizer"/>. Attaches the
- /// <code>input</code> to a newly created JFlex scanner.
+ /// <paramref name="input"/> to a newly created JFlex scanner.
/// </summary>
- /// <param name="input"> The Input TextReader </param>
+ /// <param name="input"> The Input <see cref="TextReader"/> </param>
public WikipediaTokenizer(TextReader input)
- : this(input, TOKENS_ONLY, Collections.EmptyList<string>())
+ : this(input, TOKENS_ONLY, Collections.EmptyList<string>())
{
}
/// <summary>
- /// Creates a new instance of the <see cref="org.apache.lucene.analysis.wikipedia.WikipediaTokenizer"/>. Attaches the
- /// <code>input</code> to a the newly created JFlex scanner.
+ /// Creates a new instance of the <see cref="WikipediaTokenizer"/>. Attaches the
+ /// <paramref name="input"/> to a the newly created JFlex scanner.
/// </summary>
/// <param name="input"> The input </param>
- /// <param name="tokenOutput"> One of <see cref="#TOKENS_ONLY"/>, <see cref="#UNTOKENIZED_ONLY"/>, <see cref="#BOTH"/> </param>
+ /// <param name="tokenOutput"> One of <see cref="TOKENS_ONLY"/>, <see cref="UNTOKENIZED_ONLY"/>, <see cref="BOTH"/> </param>
+ /// <param name="untokenizedTypes"> Untokenized types </param>
public WikipediaTokenizer(TextReader input, int tokenOutput, ICollection<string> untokenizedTypes)
- : base(input)
+ : base(input)
{
this.scanner = new WikipediaTokenizerImpl(this.m_input);
Init(tokenOutput, untokenizedTypes);
}
/// <summary>
- /// Creates a new instance of the <see cref="org.apache.lucene.analysis.wikipedia.WikipediaTokenizer"/>. Attaches the
- /// <code>input</code> to a the newly created JFlex scanner. Uses the given <see cref="org.apache.lucene.util.AttributeSource.AttributeFactory"/>.
+ /// Creates a new instance of the <see cref="WikipediaTokenizer"/>. Attaches the
+ /// <paramref name="input"/> to a the newly created JFlex scanner. Uses the given <see cref="AttributeSource.AttributeFactory"/>.
/// </summary>
+ /// <param name="factory"> The <see cref="AttributeSource.AttributeFactory"/> </param>
/// <param name="input"> The input </param>
- /// <param name="tokenOutput"> One of <see cref="#TOKENS_ONLY"/>, <see cref="#UNTOKENIZED_ONLY"/>, <see cref="#BOTH"/> </param>
+ /// <param name="tokenOutput"> One of <see cref="TOKENS_ONLY"/>, <see cref="UNTOKENIZED_ONLY"/>, <see cref="BOTH"/> </param>
+ /// <param name="untokenizedTypes"> Untokenized types </param>
public WikipediaTokenizer(AttributeFactory factory, TextReader input, int tokenOutput, ICollection<string> untokenizedTypes)
: base(factory, input)
{
@@ -153,12 +173,10 @@ namespace Lucene.Net.Analysis.Wikipedia
flagsAtt = AddAttribute<IFlagsAttribute>();
}
- /*
- * (non-Javadoc)
- *
- * @see org.apache.lucene.analysis.TokenStream#next()
- */
- public override bool IncrementToken()
+ /// <summary>
+ /// <see cref="TokenStream.IncrementToken"/>
+ /// </summary>
+ public override sealed bool IncrementToken()
{
if (tokens != null && tokens.MoveNext())
{
@@ -176,18 +194,18 @@ namespace Lucene.Net.Analysis.Wikipedia
string type = WikipediaTokenizerImpl.TOKEN_TYPES[tokenType];
if (tokenOutput == TOKENS_ONLY || untokenizedTypes.Contains(type) == false)
{
- setupToken();
+ SetupToken();
}
else if (tokenOutput == UNTOKENIZED_ONLY && untokenizedTypes.Contains(type) == true)
{
- collapseTokens(tokenType);
+ CollapseTokens(tokenType);
}
else if (tokenOutput == BOTH)
{
//collapse into a single token, add it to tokens AND output the individual tokens
//output the untokenized Token first
- collapseAndSaveTokens(tokenType, type);
+ CollapseAndSaveTokens(tokenType, type);
}
int posinc = scanner.PositionIncrement;
if (first && posinc == 0)
@@ -200,7 +218,7 @@ namespace Lucene.Net.Analysis.Wikipedia
return true;
}
- private void collapseAndSaveTokens(int tokenType, string type)
+ private void CollapseAndSaveTokens(int tokenType, string type)
{
//collapse
StringBuilder buffer = new StringBuilder(32);
@@ -211,7 +229,7 @@ namespace Lucene.Net.Analysis.Wikipedia
int tmpTokType;
int numSeen = 0;
IList<AttributeSource.State> tmp = new List<AttributeSource.State>();
- setupSavedToken(0, type);
+ SetupSavedToken(0, type);
tmp.Add(CaptureState());
//while we can get a token and that token is the same type and we have not transitioned to a new wiki-item of the same type
while ((tmpTokType = scanner.GetNextToken()) != WikipediaTokenizerImpl.YYEOF && tmpTokType == tokenType && scanner.NumWikiTokensSeen > numSeen)
@@ -223,7 +241,7 @@ namespace Lucene.Net.Analysis.Wikipedia
buffer.Append(' ');
}
numAdded = scanner.SetText(buffer);
- setupSavedToken(scanner.PositionIncrement, type);
+ SetupSavedToken(scanner.PositionIncrement, type);
tmp.Add(CaptureState());
numSeen++;
lastPos = currPos + numAdded;
@@ -242,14 +260,14 @@ namespace Lucene.Net.Analysis.Wikipedia
tokens = tmp.GetEnumerator();
}
- private void setupSavedToken(int positionInc, string type)
+ private void SetupSavedToken(int positionInc, string type)
{
- setupToken();
+ SetupToken();
posIncrAtt.PositionIncrement = positionInc;
typeAtt.Type = type;
}
- private void collapseTokens(int tokenType)
+ private void CollapseTokens(int tokenType)
{
//collapse
StringBuilder buffer = new StringBuilder(32);
@@ -289,7 +307,7 @@ namespace Lucene.Net.Analysis.Wikipedia
}
}
- private void setupToken()
+ private void SetupToken()
{
scanner.GetText(termAtt);
int start = scanner.YyChar;
@@ -302,17 +320,15 @@ namespace Lucene.Net.Analysis.Wikipedia
scanner.YyReset(m_input);
}
- /*
- * (non-Javadoc)
- *
- * @see org.apache.lucene.analysis.TokenStream#reset()
- */
+ /// <summary>
+ /// <see cref="TokenStream.Reset"/>
+ /// </summary>
public override void Reset()
{
base.Reset();
scanner.YyReset(m_input);
tokens = null;
- scanner.reset();
+ scanner.Reset();
first = true;
}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/d3903863/src/Lucene.Net.Analysis.Common/Analysis/Wikipedia/WikipediaTokenizerFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Wikipedia/WikipediaTokenizerFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Wikipedia/WikipediaTokenizerFactory.cs
index d63e61a..f23fe28 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Wikipedia/WikipediaTokenizerFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Wikipedia/WikipediaTokenizerFactory.cs
@@ -1,15 +1,11 @@
-\ufeffusing System.Collections.Generic;
-using Lucene.Net.Analysis.Wikipedia;
-using Lucene.Net.Analysis.Util;
+\ufeffusing Lucene.Net.Analysis.Util;
using Lucene.Net.Util;
+using System.Collections.Generic;
using System.IO;
-using System;
-using System.Linq;
namespace Lucene.Net.Analysis.Wikipedia
{
-
- /*
+ /*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -26,28 +22,27 @@ namespace Lucene.Net.Analysis.Wikipedia
* limitations under the License.
*/
- /// <summary>
- /// Factory for <see cref="WikipediaTokenizer"/>.
- /// <code>
- /// <fieldType name="text_wiki" class="solr.TextField" positionIncrementGap="100">
- /// <analyzer>
- /// <tokenizer class="solr.WikipediaTokenizerFactory"/>
- /// </analyzer>
- /// </fieldType></code>
- /// </summary>
- public class WikipediaTokenizerFactory : TokenizerFactory
- {
-
- /// <summary>
- /// Creates a new WikipediaTokenizerFactory </summary>
- public WikipediaTokenizerFactory(IDictionary<string, string> args)
- : base(args)
- {
- if (args.Count > 0)
- {
- throw new System.ArgumentException("Unknown parameters: " + args);
- }
- }
+ /// <summary>
+ /// Factory for <see cref="WikipediaTokenizer"/>.
+ /// <code>
+ /// <fieldType name="text_wiki" class="solr.TextField" positionIncrementGap="100">
+ /// <analyzer>
+ /// <tokenizer class="solr.WikipediaTokenizerFactory"/>
+ /// </analyzer>
+ /// </fieldType></code>
+ /// </summary>
+ public class WikipediaTokenizerFactory : TokenizerFactory
+ {
+ /// <summary>
+ /// Creates a new <see cref="WikipediaTokenizerFactory"/> </summary>
+ public WikipediaTokenizerFactory(IDictionary<string, string> args)
+ : base(args)
+ {
+ if (args.Count > 0)
+ {
+ throw new System.ArgumentException("Unknown parameters: " + args);
+ }
+ }
// TODO: add support for WikipediaTokenizer's advanced options.
public override Tokenizer Create(AttributeSource.AttributeFactory factory, TextReader input)
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/d3903863/src/Lucene.Net.Analysis.Common/Analysis/Wikipedia/WikipediaTokenizerImpl.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Wikipedia/WikipediaTokenizerImpl.cs b/src/Lucene.Net.Analysis.Common/Analysis/Wikipedia/WikipediaTokenizerImpl.cs
index a7960e8..7b84648 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Wikipedia/WikipediaTokenizerImpl.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Wikipedia/WikipediaTokenizerImpl.cs
@@ -1,10 +1,7 @@
\ufeffusing Lucene.Net.Analysis.TokenAttributes;
using System;
-using System.Collections.Generic;
using System.IO;
-using System.Linq;
using System.Text;
-using System.Threading.Tasks;
namespace Lucene.Net.Analysis.Wikipedia
{
@@ -30,13 +27,13 @@ namespace Lucene.Net.Analysis.Wikipedia
/// </summary>
internal class WikipediaTokenizerImpl
{
- /** This character denotes the end of file */
+ /// <summary>This character denotes the end of file</summary>
public static readonly int YYEOF = -1;
- /** initial size of the lookahead buffer */
+ /// <summary>initial size of the lookahead buffer</summary>
private static readonly int ZZ_BUFFERSIZE = 4096;
- /** lexical states */
+ /// <summary>lexical states</summary>
public static readonly int YYINITIAL = 0;
public static readonly int CATEGORY_STATE = 2;
public static readonly int INTERNAL_LINK_STATE = 4;
@@ -48,20 +45,20 @@ namespace Lucene.Net.Analysis.Wikipedia
public static readonly int DOUBLE_BRACE_STATE = 16;
public static readonly int STRING = 18;
- /**
- * ZZ_LEXSTATE[l] is the state in the DFA for the lexical state l
- * ZZ_LEXSTATE[l+1] is the state in the DFA for the lexical state l
- * at the beginning of a line
- * l is of the form l = 2*k, k a non negative integer
- */
+ /// <summary>
+ /// ZZ_LEXSTATE[l] is the state in the DFA for the lexical state l
+ /// ZZ_LEXSTATE[l+1] is the state in the DFA for the lexical state l
+ /// at the beginning of a line
+ /// l is of the form l = 2*k, k a non negative integer
+ /// </summary>
private static readonly int[] ZZ_LEXSTATE = {
0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7,
8, 8, 9, 9
};
- /**
- * Translates characters to character classes
- */
+ /// <summary>
+ /// Translates characters to character classes
+ /// </summary>
private const string ZZ_CMAP_PACKED =
"\x0009\x0000\x0001\x0014\x0001\x0013\x0001\x0000\x0001\x0014\x0001\x0012\x0012\x0000\x0001\x0014\x0001\x0000\x0001\x000A" +
"\x0001\x002B\x0002\x0000\x0001\x0003\x0001\x0001\x0004\x0000\x0001\x000C\x0001\x0005\x0001\x0002\x0001\x0008\x000A\x000E" +
@@ -76,14 +73,14 @@ namespace Lucene.Net.Analysis.Wikipedia
"\x0010\x0011\u0100\x0000\x0080\x0011\x0080\x0000\u19c0\x0011\x0040\x0000\u5200\x0011\u0c00\x0000\u2bb0\x0010\u2150\x0000" +
"\u0200\x0011\u0465\x0000\x003B\x0011\x003D\x000D\x0023\x0000";
- /**
- * Translates characters to character classes
- */
+ /// <summary>
+ /// Translates characters to character classes
+ /// </summary>
private static readonly char[] ZZ_CMAP = ZzUnpackCMap(ZZ_CMAP_PACKED);
- /**
- * Translates DFA states to action switch labels.
- */
+ /// <summary>
+ /// Translates DFA states to action switch labels.
+ /// </summary>
private static readonly int[] ZZ_ACTION = ZzUnpackAction();
private const string ZZ_ACTION_PACKED_0 =
@@ -122,9 +119,9 @@ namespace Lucene.Net.Analysis.Wikipedia
}
- /**
- * Translates a state to a row index in the transition table
- */
+ /// <summary>
+ /// Translates a state to a row index in the transition table
+ /// </summary>
private static readonly int[] ZZ_ROWMAP = ZzUnpackRowMap();
private const string ZZ_ROWMAP_PACKED_0 =
@@ -173,9 +170,9 @@ namespace Lucene.Net.Analysis.Wikipedia
return j;
}
- /**
- * The transition table of the DFA
- */
+ /// <summary>
+ /// The transition table of the DFA
+ /// </summary>
private static readonly int[] ZZ_TRANS = ZzUnpackTrans();
private const string ZZ_TRANS_PACKED_0 =
@@ -357,9 +354,9 @@ namespace Lucene.Net.Analysis.Wikipedia
"Error: pushback value was too large"
};
- /**
- * ZZ_ATTRIBUTE[aState] contains the attributes of state <code>aState</code>
- */
+ /// <summary>
+ /// ZZ_ATTRIBUTE[aState] contains the attributes of state <c>aState</c>
+ /// </summary>
private static readonly int[] ZZ_ATTRIBUTE = ZzUnpackAttribute();
private const string ZZ_ATTRIBUTE_PACKED_0 =
@@ -394,54 +391,58 @@ namespace Lucene.Net.Analysis.Wikipedia
return j;
}
- /** the input device */
+ /// <summary>the input device</summary>
private TextReader zzReader;
- /** the current state of the DFA */
+ /// <summary>the current state of the DFA</summary>
private int zzState;
- /** the current lexical state */
+ /// <summary>the current lexical state</summary>
private int zzLexicalState = YYINITIAL;
- /** this buffer contains the current text to be matched and is
- the source of the yytext() string */
+ /// <summary>
+ /// this buffer contains the current text to be matched and is
+ /// the source of the YyText string
+ /// </summary>
private char[] zzBuffer = new char[ZZ_BUFFERSIZE];
- /** the textposition at the last accepting state */
+ /// <summary>the textposition at the last accepting state</summary>
private int zzMarkedPos;
- /** the current text position in the buffer */
+ /// <summary>the current text position in the buffer</summary>
private int zzCurrentPos;
- /** startRead marks the beginning of the yytext() string in the buffer */
+ /// <summary>startRead marks the beginning of the YyText string in the buffer</summary>
private int zzStartRead;
- /** endRead marks the last character in the buffer, that has been read
- from input */
+ /// <summary>
+ /// endRead marks the last character in the buffer, that has been read
+ /// from input
+ /// </summary>
private int zzEndRead;
- /** number of newlines encountered up to the start of the matched text */
+ /// <summary>number of newlines encountered up to the start of the matched text</summary>
private int yyline;
- /** the number of characters up to the start of the matched text */
+ /// <summary>the number of characters up to the start of the matched text</summary>
private int yychar;
#pragma warning disable 169, 414
- /**
- * the number of characters from the last newline up to the start of the
- * matched text
- */
+ /// <summary>
+ /// the number of characters from the last newline up to the start of the
+ /// matched text
+ /// </summary>
private int yycolumn;
- /**
- * zzAtBOL == true <=> the scanner is currently at the beginning of a line
- */
+ /// <summary>
+ /// zzAtBOL == true <=> the scanner is currently at the beginning of a line
+ /// </summary>
private bool zzAtBOL = true;
- /** zzAtEOF == true <=> the scanner is at the EOF */
+ /// <summary>zzAtEOF == true <=> the scanner is at the EOF</summary>
private bool zzAtEOF;
- /** denotes if the user-EOF-code has already been executed */
+ /// <summary>denotes if the user-EOF-code has already been executed</summary>
private bool zzEOFDone;
#pragma warning disable 169, 414
@@ -498,9 +499,9 @@ namespace Lucene.Net.Analysis.Wikipedia
get { return positionInc; }
}
- /**
- * Fills Lucene token with the current token text.
- */
+ /// <summary>
+ /// Fills Lucene token with the current token text.
+ /// </summary>
internal void GetText(ICharTermAttribute t)
{
t.CopyBuffer(zzBuffer, zzStartRead, zzMarkedPos - zzStartRead);
@@ -513,7 +514,7 @@ namespace Lucene.Net.Analysis.Wikipedia
return length;
}
- internal void reset()
+ internal void Reset()
{
currentTokType = 0;
numBalanced = 0;
@@ -522,26 +523,21 @@ namespace Lucene.Net.Analysis.Wikipedia
numWikiTokensSeen = 0;
}
-
-
-
- /**
- * Creates a new scanner
- *
- * @param in the TextReader to read input from.
- */
+ /// <summary>
+ /// Creates a new scanner
+ /// </summary>
+ /// <param name="in">the TextReader to read input from.</param>
internal WikipediaTokenizerImpl(TextReader @in)
{
this.zzReader = @in;
}
- /**
- * Unpacks the compressed character translation table.
- *
- * @param packed the packed character translation table
- * @return the unpacked character translation table
- */
+ /// <summary>
+ /// Unpacks the compressed character translation table.
+ /// </summary>
+ /// <param name="packed">the packed character translation table</param>
+ /// <returns>the unpacked character translation table</returns>
private static char[] ZzUnpackCMap(string packed)
{
char[] map = new char[0x10000];
@@ -557,13 +553,11 @@ namespace Lucene.Net.Analysis.Wikipedia
}
- /**
- * Refills the input buffer.
- *
- * @return <code>false</code>, iff there was new input.
- *
- * @exception java.io.IOException if any I/O-Error occurs
- */
+ /// <summary>
+ /// Refills the input buffer.
+ /// </summary>
+ /// <returns><c>false</c>, iff there was new input.</returns>
+ /// <exception cref="IOException">if any I/O-Error occurs</exception>
private bool ZzRefill()
{
@@ -619,9 +613,9 @@ namespace Lucene.Net.Analysis.Wikipedia
}
- /**
- * Closes the input stream.
- */
+ /// <summary>
+ /// Disposes the input stream.
+ /// </summary>
public void YyClose()
{
zzAtEOF = true; /* indicate end of file */
@@ -634,18 +628,17 @@ namespace Lucene.Net.Analysis.Wikipedia
}
- /**
- * Resets the scanner to read from a new input stream.
- * Does not close the old reader.
- *
- * All internal variables are reset, the old input stream
- * <b>cannot</b> be reused (internal buffer is discarded and lost).
- * Lexical state is set to <tt>ZZ_INITIAL</tt>.
- *
- * Internal scan buffer is resized down to its initial length, if it has grown.
- *
- * @param reader the new input stream
- */
+ /// <summary>
+ /// Resets the scanner to read from a new input stream.
+ /// Does not close the old reader.
+ /// <para/>
+ /// All internal variables are reset, the old input stream
+ /// <b>cannot</b> be reused (internal buffer is discarded and lost).
+ /// Lexical state is set to <see cref="YYINITIAL"/>.
+ /// <para/>
+ /// Internal scan buffer is resized down to its initial length, if it has grown.
+ /// </summary>
+ /// <param name="reader">the new input stream </param>
public void YyReset(TextReader reader)
{
zzReader = reader;
@@ -661,75 +654,73 @@ namespace Lucene.Net.Analysis.Wikipedia
}
- /**
- * Returns the current lexical state.
- */
+ /// <summary>
+ /// Returns the current lexical state.
+ /// </summary>
public int YyState
{
get { return zzLexicalState; }
}
- /**
- * Enters a new lexical state
- *
- * @param newState the new lexical state
- */
+ /// <summary>
+ /// Enters a new lexical state
+ /// </summary>
+ /// <param name="newState">the new lexical state</param>
public void YyBegin(int newState)
{
zzLexicalState = newState;
}
- /**
- * Returns the text matched by the current regular expression.
- */
+ /// <summary>
+ /// Returns the text matched by the current regular expression.
+ /// </summary>
public string YyText
{
get { return new string(zzBuffer, zzStartRead, zzMarkedPos - zzStartRead); }
}
- /**
- * Returns the character at position <tt>pos</tt> from the
- * matched text.
- *
- * It is equivalent to yytext().charAt(pos), but faster
- *
- * @param pos the position of the character to fetch.
- * A value from 0 to yylength()-1.
- *
- * @return the character at position pos
- */
+ /// <summary>
+ /// Returns the character at position <paramref name="pos"/> from the
+ /// matched text.
+ /// <para/>
+ /// It is equivalent to YyText[pos], but faster
+ /// </summary>
+ /// <param name="pos">
+ /// the position of the character to fetch.
+ /// A value from 0 to YyLength-1.
+ /// </param>
+ /// <returns>the character at position pos</returns>
public char YyCharAt(int pos)
{
return zzBuffer[zzStartRead + pos];
}
- /**
- * Returns the length of the matched text region.
- */
+ /// <summary>
+ /// Returns the length of the matched text region.
+ /// </summary>
public int YyLength
{
get { return zzMarkedPos - zzStartRead; }
}
- /**
- * Reports an error that occured while scanning.
- *
- * In a wellformed scanner (no or only correct usage of
- * yypushback(int) and a match-all fallback rule) this method
- * will only be called with things that "Can't Possibly Happen".
- * If this method is called, something is seriously wrong
- * (e.g. a JFlex bug producing a faulty scanner etc.).
- *
- * Usual syntax/scanner level error handling should be done
- * in error fallback rules.
- *
- * @param errorCode the code of the errormessage to display
- */
+ /// <summary>
+ /// Reports an error that occured while scanning.
+ /// <para/>
+ /// In a wellformed scanner (no or only correct usage of
+ /// YyPushBack(int) and a match-all fallback rule) this method
+ /// will only be called with things that "Can't Possibly Happen".
+ /// If this method is called, something is seriously wrong
+ /// (e.g. a JFlex bug producing a faulty scanner etc.).
+ /// <para/>
+ /// Usual syntax/scanner level error handling should be done
+ /// in error fallback rules.
+ /// </summary>
+ /// <param name="errorCode">the code of the errormessage to display</param>
private void ZzScanError(int errorCode)
{
string message;
@@ -746,14 +737,15 @@ namespace Lucene.Net.Analysis.Wikipedia
}
- /**
- * Pushes the specified amount of characters back into the input stream.
- *
- * They will be read again by then next call of the scanning method
- *
- * @param number the number of characters to be read again.
- * This number must not be greater than yylength()!
- */
+ /// <summary>
+ /// Pushes the specified amount of characters back into the input stream.
+ /// <para/>
+ /// They will be read again by then next call of the scanning method
+ /// </summary>
+ /// <param name="number">
+ /// the number of characters to be read again.
+ /// This number must not be greater than YyLength!
+ /// </param>
public void YyPushBack(int number)
{
if (number > YyLength)
@@ -763,13 +755,12 @@ namespace Lucene.Net.Analysis.Wikipedia
}
- /**
- * Resumes scanning until the next regular expression is matched,
- * the end of input is encountered or an I/O-Error occurs.
- *
- * @return the next token
- * @exception java.io.IOException if any I/O-Error occurs
- */
+ /// <summary>
+ /// Resumes scanning until the next regular expression is matched,
+ /// the end of input is encountered or an I/O-Error occurs.
+ /// </summary>
+ /// <returns>the next token</returns>
+ /// <exception cref="IOException">if any I/O-Error occurs</exception>
public int GetNextToken()
{
int zzInput;
[20/39] lucenenet git commit: Lucene.Net.Analysis.Common/Tartarus
refactor: member accessibility and documentation comments
Posted by ni...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4abfb341/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/IrishStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/IrishStemmer.cs b/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/IrishStemmer.cs
index f597209..a689499 100644
--- a/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/IrishStemmer.cs
+++ b/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/IrishStemmer.cs
@@ -108,7 +108,7 @@
I_p2 = other.I_p2;
I_p1 = other.I_p1;
I_pV = other.I_pV;
- base.copy_from(other);
+ base.CopyFrom(other);
}
private bool r_mark_regions()
@@ -129,7 +129,7 @@
{
do
{
- if (!(in_grouping(g_v, 97, 250)))
+ if (!(InGrouping(g_v, 97, 250)))
{
goto lab2;
}
@@ -158,7 +158,7 @@
{
do
{
- if (!(in_grouping(g_v, 97, 250)))
+ if (!(InGrouping(g_v, 97, 250)))
{
goto lab5;
}
@@ -177,7 +177,7 @@
{
do
{
- if (!(out_grouping(g_v, 97, 250)))
+ if (!(OutGrouping(g_v, 97, 250)))
{
goto lab7;
}
@@ -198,7 +198,7 @@
{
do
{
- if (!(in_grouping(g_v, 97, 250)))
+ if (!(InGrouping(g_v, 97, 250)))
{
goto lab9;
}
@@ -217,7 +217,7 @@
{
do
{
- if (!(out_grouping(g_v, 97, 250)))
+ if (!(OutGrouping(g_v, 97, 250)))
{
goto lab11;
}
@@ -246,7 +246,7 @@
// [, line 44
m_bra = m_cursor;
// substring, line 44
- among_var = find_among(a_0, 24);
+ among_var = FindAmong(a_0, 24);
if (among_var == 0)
{
return false;
@@ -260,107 +260,107 @@
case 1:
// (, line 46
// delete, line 46
- slice_del();
+ SliceDel();
break;
case 2:
// (, line 50
// delete, line 50
- slice_del();
+ SliceDel();
break;
case 3:
// (, line 52
// <-, line 52
- slice_from("f");
+ SliceFrom("f");
break;
case 4:
// (, line 55
// delete, line 55
- slice_del();
+ SliceDel();
break;
case 5:
// (, line 58
// <-, line 58
- slice_from("s");
+ SliceFrom("s");
break;
case 6:
// (, line 61
// <-, line 61
- slice_from("b");
+ SliceFrom("b");
break;
case 7:
// (, line 63
// <-, line 63
- slice_from("c");
+ SliceFrom("c");
break;
case 8:
// (, line 65
// <-, line 65
- slice_from("d");
+ SliceFrom("d");
break;
case 9:
// (, line 67
// <-, line 67
- slice_from("f");
+ SliceFrom("f");
break;
case 10:
// (, line 69
// <-, line 69
- slice_from("g");
+ SliceFrom("g");
break;
case 11:
// (, line 71
// <-, line 71
- slice_from("p");
+ SliceFrom("p");
break;
case 12:
// (, line 73
// <-, line 73
- slice_from("s");
+ SliceFrom("s");
break;
case 13:
// (, line 75
// <-, line 75
- slice_from("t");
+ SliceFrom("t");
break;
case 14:
// (, line 79
// <-, line 79
- slice_from("b");
+ SliceFrom("b");
break;
case 15:
// (, line 81
// <-, line 81
- slice_from("c");
+ SliceFrom("c");
break;
case 16:
// (, line 83
// <-, line 83
- slice_from("d");
+ SliceFrom("d");
break;
case 17:
// (, line 85
// <-, line 85
- slice_from("f");
+ SliceFrom("f");
break;
case 18:
// (, line 87
// <-, line 87
- slice_from("g");
+ SliceFrom("g");
break;
case 19:
// (, line 89
// <-, line 89
- slice_from("m");
+ SliceFrom("m");
break;
case 20:
// (, line 91
// <-, line 91
- slice_from("p");
+ SliceFrom("p");
break;
case 21:
// (, line 93
// <-, line 93
- slice_from("t");
+ SliceFrom("t");
break;
}
return true;
@@ -400,7 +400,7 @@
// [, line 104
m_ket = m_cursor;
// substring, line 104
- among_var = find_among_b(a_1, 16);
+ among_var = FindAmongB(a_1, 16);
if (among_var == 0)
{
return false;
@@ -419,7 +419,7 @@
return false;
}
// delete, line 108
- slice_del();
+ SliceDel();
break;
case 2:
// (, line 110
@@ -429,7 +429,7 @@
return false;
}
// delete, line 110
- slice_del();
+ SliceDel();
break;
}
return true;
@@ -442,7 +442,7 @@
// [, line 114
m_ket = m_cursor;
// substring, line 114
- among_var = find_among_b(a_2, 25);
+ among_var = FindAmongB(a_2, 25);
if (among_var == 0)
{
return false;
@@ -461,32 +461,32 @@
return false;
}
// delete, line 116
- slice_del();
+ SliceDel();
break;
case 2:
// (, line 118
// <-, line 118
- slice_from("arc");
+ SliceFrom("arc");
break;
case 3:
// (, line 120
// <-, line 120
- slice_from("gin");
+ SliceFrom("gin");
break;
case 4:
// (, line 122
// <-, line 122
- slice_from("graf");
+ SliceFrom("graf");
break;
case 5:
// (, line 124
// <-, line 124
- slice_from("paite");
+ SliceFrom("paite");
break;
case 6:
// (, line 126
// <-, line 126
- slice_from("\u00F3id");
+ SliceFrom("\u00F3id");
break;
}
return true;
@@ -499,7 +499,7 @@
// [, line 130
m_ket = m_cursor;
// substring, line 130
- among_var = find_among_b(a_3, 12);
+ among_var = FindAmongB(a_3, 12);
if (among_var == 0)
{
return false;
@@ -518,7 +518,7 @@
return false;
}
// delete, line 133
- slice_del();
+ SliceDel();
break;
case 2:
// (, line 138
@@ -528,7 +528,7 @@
return false;
}
// delete, line 138
- slice_del();
+ SliceDel();
break;
}
return true;
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4abfb341/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/ItalianStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/ItalianStemmer.cs b/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/ItalianStemmer.cs
index 26072e4..81d45f4 100644
--- a/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/ItalianStemmer.cs
+++ b/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/ItalianStemmer.cs
@@ -244,7 +244,7 @@
I_p2 = other.I_p2;
I_p1 = other.I_p1;
I_pV = other.I_pV;
- base.copy_from(other);
+ base.CopyFrom(other);
}
private bool r_prelude()
@@ -268,7 +268,7 @@
// [, line 36
m_bra = m_cursor;
// substring, line 36
- among_var = find_among(a_0, 7);
+ among_var = FindAmong(a_0, 7);
if (among_var == 0)
{
goto lab1;
@@ -282,32 +282,32 @@
case 1:
// (, line 37
// <-, line 37
- slice_from("\u00E0");
+ SliceFrom("\u00E0");
break;
case 2:
// (, line 38
// <-, line 38
- slice_from("\u00E8");
+ SliceFrom("\u00E8");
break;
case 3:
// (, line 39
// <-, line 39
- slice_from("\u00EC");
+ SliceFrom("\u00EC");
break;
case 4:
// (, line 40
// <-, line 40
- slice_from("\u00F2");
+ SliceFrom("\u00F2");
break;
case 5:
// (, line 41
// <-, line 41
- slice_from("\u00F9");
+ SliceFrom("\u00F9");
break;
case 6:
// (, line 42
// <-, line 42
- slice_from("qU");
+ SliceFrom("qU");
break;
case 7:
// (, line 43
@@ -349,7 +349,7 @@
do
{
// (, line 46
- if (!(in_grouping(g_v, 97, 249)))
+ if (!(InGrouping(g_v, 97, 249)))
{
goto lab5;
}
@@ -363,36 +363,36 @@
{
// (, line 47
// literal, line 47
- if (!(eq_s(1, "u")))
+ if (!(Eq_S(1, "u")))
{
goto lab7;
}
// ], line 47
m_ket = m_cursor;
- if (!(in_grouping(g_v, 97, 249)))
+ if (!(InGrouping(g_v, 97, 249)))
{
goto lab7;
}
// <-, line 47
- slice_from("U");
+ SliceFrom("U");
goto lab6;
} while (false);
lab7:
m_cursor = v_5;
// (, line 48
// literal, line 48
- if (!(eq_s(1, "i")))
+ if (!(Eq_S(1, "i")))
{
goto lab5;
}
// ], line 48
m_ket = m_cursor;
- if (!(in_grouping(g_v, 97, 249)))
+ if (!(InGrouping(g_v, 97, 249)))
{
goto lab5;
}
// <-, line 48
- slice_from("I");
+ SliceFrom("I");
} while (false);
lab6:
m_cursor = v_4;
@@ -449,7 +449,7 @@
do
{
// (, line 59
- if (!(in_grouping(g_v, 97, 249)))
+ if (!(InGrouping(g_v, 97, 249)))
{
goto lab2;
}
@@ -460,7 +460,7 @@
do
{
// (, line 59
- if (!(out_grouping(g_v, 97, 249)))
+ if (!(OutGrouping(g_v, 97, 249)))
{
goto lab4;
}
@@ -469,7 +469,7 @@
{
do
{
- if (!(in_grouping(g_v, 97, 249)))
+ if (!(InGrouping(g_v, 97, 249)))
{
goto lab6;
}
@@ -488,7 +488,7 @@
lab4:
m_cursor = v_3;
// (, line 59
- if (!(in_grouping(g_v, 97, 249)))
+ if (!(InGrouping(g_v, 97, 249)))
{
goto lab2;
}
@@ -497,7 +497,7 @@
{
do
{
- if (!(out_grouping(g_v, 97, 249)))
+ if (!(OutGrouping(g_v, 97, 249)))
{
goto lab8;
}
@@ -518,7 +518,7 @@
lab2:
m_cursor = v_2;
// (, line 61
- if (!(out_grouping(g_v, 97, 249)))
+ if (!(OutGrouping(g_v, 97, 249)))
{
goto lab0;
}
@@ -529,7 +529,7 @@
do
{
// (, line 61
- if (!(out_grouping(g_v, 97, 249)))
+ if (!(OutGrouping(g_v, 97, 249)))
{
goto lab10;
}
@@ -538,7 +538,7 @@
{
do
{
- if (!(in_grouping(g_v, 97, 249)))
+ if (!(InGrouping(g_v, 97, 249)))
{
goto lab12;
}
@@ -557,7 +557,7 @@
lab10:
m_cursor = v_6;
// (, line 61
- if (!(in_grouping(g_v, 97, 249)))
+ if (!(InGrouping(g_v, 97, 249)))
{
goto lab0;
}
@@ -586,7 +586,7 @@
{
do
{
- if (!(in_grouping(g_v, 97, 249)))
+ if (!(InGrouping(g_v, 97, 249)))
{
goto lab15;
}
@@ -605,7 +605,7 @@
{
do
{
- if (!(out_grouping(g_v, 97, 249)))
+ if (!(OutGrouping(g_v, 97, 249)))
{
goto lab17;
}
@@ -626,7 +626,7 @@
{
do
{
- if (!(in_grouping(g_v, 97, 249)))
+ if (!(InGrouping(g_v, 97, 249)))
{
goto lab19;
}
@@ -645,7 +645,7 @@
{
do
{
- if (!(out_grouping(g_v, 97, 249)))
+ if (!(OutGrouping(g_v, 97, 249)))
{
goto lab21;
}
@@ -681,7 +681,7 @@
// [, line 72
m_bra = m_cursor;
// substring, line 72
- among_var = find_among(a_1, 3);
+ among_var = FindAmong(a_1, 3);
if (among_var == 0)
{
goto lab1;
@@ -695,12 +695,12 @@
case 1:
// (, line 73
// <-, line 73
- slice_from("i");
+ SliceFrom("i");
break;
case 2:
// (, line 74
// <-, line 74
- slice_from("u");
+ SliceFrom("u");
break;
case 3:
// (, line 75
@@ -765,14 +765,14 @@
// [, line 87
m_ket = m_cursor;
// substring, line 87
- if (find_among_b(a_2, 37) == 0)
+ if (FindAmongB(a_2, 37) == 0)
{
return false;
}
// ], line 87
m_bra = m_cursor;
// among, line 97
- among_var = find_among_b(a_3, 5);
+ among_var = FindAmongB(a_3, 5);
if (among_var == 0)
{
return false;
@@ -790,12 +790,12 @@
case 1:
// (, line 98
// delete, line 98
- slice_del();
+ SliceDel();
break;
case 2:
// (, line 99
// <-, line 99
- slice_from("e");
+ SliceFrom("e");
break;
}
return true;
@@ -812,7 +812,7 @@
// [, line 104
m_ket = m_cursor;
// substring, line 104
- among_var = find_among_b(a_6, 51);
+ among_var = FindAmongB(a_6, 51);
if (among_var == 0)
{
return false;
@@ -831,7 +831,7 @@
return false;
}
// delete, line 111
- slice_del();
+ SliceDel();
break;
case 2:
// (, line 113
@@ -841,7 +841,7 @@
return false;
}
// delete, line 113
- slice_del();
+ SliceDel();
// try, line 114
v_1 = m_limit - m_cursor;
do
@@ -850,7 +850,7 @@
// [, line 114
m_ket = m_cursor;
// literal, line 114
- if (!(eq_s_b(2, "ic")))
+ if (!(Eq_S_B(2, "ic")))
{
m_cursor = m_limit - v_1;
goto lab0;
@@ -864,7 +864,7 @@
goto lab0;
}
// delete, line 114
- slice_del();
+ SliceDel();
} while (false);
lab0:
break;
@@ -876,7 +876,7 @@
return false;
}
// <-, line 117
- slice_from("log");
+ SliceFrom("log");
break;
case 4:
// (, line 119
@@ -886,7 +886,7 @@
return false;
}
// <-, line 119
- slice_from("u");
+ SliceFrom("u");
break;
case 5:
// (, line 121
@@ -896,7 +896,7 @@
return false;
}
// <-, line 121
- slice_from("ente");
+ SliceFrom("ente");
break;
case 6:
// (, line 123
@@ -906,7 +906,7 @@
return false;
}
// delete, line 123
- slice_del();
+ SliceDel();
break;
case 7:
// (, line 124
@@ -916,7 +916,7 @@
return false;
}
// delete, line 125
- slice_del();
+ SliceDel();
// try, line 126
v_2 = m_limit - m_cursor;
do
@@ -925,7 +925,7 @@
// [, line 127
m_ket = m_cursor;
// substring, line 127
- among_var = find_among_b(a_4, 4);
+ among_var = FindAmongB(a_4, 4);
if (among_var == 0)
{
m_cursor = m_limit - v_2;
@@ -940,7 +940,7 @@
goto lab1;
}
// delete, line 127
- slice_del();
+ SliceDel();
switch (among_var)
{
case 0:
@@ -951,7 +951,7 @@
// [, line 128
m_ket = m_cursor;
// literal, line 128
- if (!(eq_s_b(2, "at")))
+ if (!(Eq_S_B(2, "at")))
{
m_cursor = m_limit - v_2;
goto lab1;
@@ -965,7 +965,7 @@
goto lab1;
}
// delete, line 128
- slice_del();
+ SliceDel();
break;
}
} while (false);
@@ -979,7 +979,7 @@
return false;
}
// delete, line 134
- slice_del();
+ SliceDel();
// try, line 135
v_3 = m_limit - m_cursor;
do
@@ -988,7 +988,7 @@
// [, line 136
m_ket = m_cursor;
// substring, line 136
- among_var = find_among_b(a_5, 3);
+ among_var = FindAmongB(a_5, 3);
if (among_var == 0)
{
m_cursor = m_limit - v_3;
@@ -1010,7 +1010,7 @@
goto lab2;
}
// delete, line 137
- slice_del();
+ SliceDel();
break;
}
} while (false);
@@ -1024,7 +1024,7 @@
return false;
}
// delete, line 142
- slice_del();
+ SliceDel();
// try, line 143
v_4 = m_limit - m_cursor;
do
@@ -1033,7 +1033,7 @@
// [, line 143
m_ket = m_cursor;
// literal, line 143
- if (!(eq_s_b(2, "at")))
+ if (!(Eq_S_B(2, "at")))
{
m_cursor = m_limit - v_4;
goto lab3;
@@ -1047,11 +1047,11 @@
goto lab3;
}
// delete, line 143
- slice_del();
+ SliceDel();
// [, line 143
m_ket = m_cursor;
// literal, line 143
- if (!(eq_s_b(2, "ic")))
+ if (!(Eq_S_B(2, "ic")))
{
m_cursor = m_limit - v_4;
goto lab3;
@@ -1065,7 +1065,7 @@
goto lab3;
}
// delete, line 143
- slice_del();
+ SliceDel();
} while (false);
lab3:
break;
@@ -1093,7 +1093,7 @@
// [, line 149
m_ket = m_cursor;
// substring, line 149
- among_var = find_among_b(a_7, 87);
+ among_var = FindAmongB(a_7, 87);
if (among_var == 0)
{
m_limit_backward = v_2;
@@ -1109,7 +1109,7 @@
case 1:
// (, line 163
// delete, line 163
- slice_del();
+ SliceDel();
break;
}
m_limit_backward = v_2;
@@ -1128,7 +1128,7 @@
// (, line 171
// [, line 172
m_ket = m_cursor;
- if (!(in_grouping_b(g_AEIO, 97, 242)))
+ if (!(InGroupingB(g_AEIO, 97, 242)))
{
m_cursor = m_limit - v_1;
goto lab0;
@@ -1142,11 +1142,11 @@
goto lab0;
}
// delete, line 172
- slice_del();
+ SliceDel();
// [, line 173
m_ket = m_cursor;
// literal, line 173
- if (!(eq_s_b(1, "i")))
+ if (!(Eq_S_B(1, "i")))
{
m_cursor = m_limit - v_1;
goto lab0;
@@ -1160,7 +1160,7 @@
goto lab0;
}
// delete, line 173
- slice_del();
+ SliceDel();
} while (false);
lab0:
// try, line 175
@@ -1171,14 +1171,14 @@
// [, line 176
m_ket = m_cursor;
// literal, line 176
- if (!(eq_s_b(1, "h")))
+ if (!(Eq_S_B(1, "h")))
{
m_cursor = m_limit - v_2;
goto lab1;
}
// ], line 176
m_bra = m_cursor;
- if (!(in_grouping_b(g_CG, 99, 103)))
+ if (!(InGroupingB(g_CG, 99, 103)))
{
m_cursor = m_limit - v_2;
goto lab1;
@@ -1190,7 +1190,7 @@
goto lab1;
}
// delete, line 176
- slice_del();
+ SliceDel();
} while (false);
lab1:
return true;
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4abfb341/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/KpStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/KpStemmer.cs b/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/KpStemmer.cs
index c2f17de..616f132 100644
--- a/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/KpStemmer.cs
+++ b/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/KpStemmer.cs
@@ -137,7 +137,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
I_p1 = other.I_p1;
I_x = other.I_x;
S_ch = other.S_ch;
- base.copy_from(other);
+ base.CopyFrom(other);
}
private bool r_R1()
@@ -177,7 +177,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
v_2 = m_limit - m_cursor;
do
{
- if (!(in_grouping_b(g_v, 97, 121)))
+ if (!(InGroupingB(g_v, 97, 121)))
{
goto lab1;
}
@@ -186,7 +186,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
lab1:
m_cursor = m_limit - v_2;
// literal, line 35
- if (!(eq_s_b(2, "ij")))
+ if (!(Eq_S_B(2, "ij")))
{
return false;
}
@@ -215,7 +215,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
v_2 = m_limit - m_cursor;
do
{
- if (!(in_grouping_b(g_v, 97, 121)))
+ if (!(InGroupingB(g_v, 97, 121)))
{
goto lab1;
}
@@ -224,7 +224,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
lab1:
m_cursor = m_limit - v_2;
// literal, line 36
- if (!(eq_s_b(2, "ij")))
+ if (!(Eq_S_B(2, "ij")))
{
return false;
}
@@ -247,7 +247,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
do
{
// literal, line 37
- if (!(eq_s_b(2, "ij")))
+ if (!(Eq_S_B(2, "ij")))
{
goto lab0;
}
@@ -256,7 +256,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
lab0:
m_cursor = m_limit - v_2;
}
- if (!(out_grouping_b(g_v, 97, 121)))
+ if (!(OutGroupingB(g_v, 97, 121)))
{
return false;
}
@@ -279,7 +279,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
do
{
// (, line 39
- if (!(out_grouping_b(g_v_WX, 97, 121)))
+ if (!(OutGroupingB(g_v_WX, 97, 121)))
{
goto lab0;
}
@@ -292,7 +292,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
do
{
// (, line 40
- if (!(in_grouping_b(g_AOU, 97, 117)))
+ if (!(InGroupingB(g_AOU, 97, 117)))
{
goto lab2;
}
@@ -307,7 +307,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
v_4 = m_limit - m_cursor;
do
{
- if (!(out_grouping_b(g_v, 97, 121)))
+ if (!(OutGroupingB(g_v, 97, 121)))
{
goto lab4;
}
@@ -329,7 +329,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
m_cursor = m_limit - v_2;
// (, line 41
// literal, line 41
- if (!(eq_s_b(1, "e")))
+ if (!(Eq_S_B(1, "e")))
{
goto lab0;
}
@@ -344,7 +344,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
v_6 = m_limit - m_cursor;
do
{
- if (!(out_grouping_b(g_v, 97, 121)))
+ if (!(OutGroupingB(g_v, 97, 121)))
{
goto lab6;
}
@@ -364,7 +364,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
v_7 = m_limit - m_cursor;
do
{
- if (!(in_grouping_b(g_AIOU, 97, 117)))
+ if (!(InGroupingB(g_AIOU, 97, 117)))
{
goto lab7;
}
@@ -385,11 +385,11 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
goto lab8;
}
m_cursor--;
- if (!(in_grouping_b(g_AIOU, 97, 117)))
+ if (!(InGroupingB(g_AIOU, 97, 117)))
{
goto lab8;
}
- if (!(out_grouping_b(g_v, 97, 121)))
+ if (!(OutGroupingB(g_v, 97, 121)))
{
goto lab8;
}
@@ -402,11 +402,11 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
} while (false);
lab1:
// -> ch, line 44
- S_ch = slice_to(S_ch);
+ S_ch = SliceTo(S_ch);
// <+ ch, line 44
{
int c = m_cursor;
- insert(m_cursor, m_cursor, S_ch.ToString());
+ Insert(m_cursor, m_cursor, S_ch.ToString());
m_cursor = c;
}
} while (false);
@@ -426,7 +426,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
// [, line 49
m_ket = m_cursor;
// among, line 49
- among_var = find_among_b(a_0, 7);
+ among_var = FindAmongB(a_0, 7);
if (among_var == 0)
{
return false;
@@ -441,7 +441,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
case 1:
// (, line 51
// delete, line 51
- slice_del();
+ SliceDel();
break;
case 2:
// (, line 52
@@ -457,7 +457,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
{
// (, line 52
// literal, line 52
- if (!(eq_s_b(1, "t")))
+ if (!(Eq_S_B(1, "t")))
{
goto lab0;
}
@@ -477,7 +477,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
return false;
}
// delete, line 52
- slice_del();
+ SliceDel();
break;
case 3:
// (, line 53
@@ -487,7 +487,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
return false;
}
// <-, line 53
- slice_from("ie");
+ SliceFrom("ie");
break;
case 4:
// (, line 55
@@ -499,7 +499,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
{
// (, line 55
// literal, line 55
- if (!(eq_s_b(2, "ar")))
+ if (!(Eq_S_B(2, "ar")))
{
goto lab2;
}
@@ -516,7 +516,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
// ], line 55
m_bra = m_cursor;
// delete, line 55
- slice_del();
+ SliceDel();
// call lengthen_V, line 55
if (!r_lengthen_V())
{
@@ -530,7 +530,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
{
// (, line 56
// literal, line 56
- if (!(eq_s_b(2, "er")))
+ if (!(Eq_S_B(2, "er")))
{
goto lab3;
}
@@ -547,7 +547,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
// ], line 56
m_bra = m_cursor;
// delete, line 56
- slice_del();
+ SliceDel();
goto lab1;
} while (false);
lab3:
@@ -564,7 +564,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
return false;
}
// <-, line 57
- slice_from("e");
+ SliceFrom("e");
} while (false);
lab1:
break;
@@ -581,7 +581,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
return false;
}
// <-, line 59
- slice_from("au");
+ SliceFrom("au");
break;
case 6:
// (, line 60
@@ -593,7 +593,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
{
// (, line 60
// literal, line 60
- if (!(eq_s_b(3, "hed")))
+ if (!(Eq_S_B(3, "hed")))
{
goto lab5;
}
@@ -605,7 +605,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
// ], line 60
m_bra = m_cursor;
// <-, line 60
- slice_from("heid");
+ SliceFrom("heid");
goto lab4;
} while (false);
lab5:
@@ -614,12 +614,12 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
{
// (, line 61
// literal, line 61
- if (!(eq_s_b(2, "nd")))
+ if (!(Eq_S_B(2, "nd")))
{
goto lab6;
}
// delete, line 61
- slice_del();
+ SliceDel();
goto lab4;
} while (false);
lab6:
@@ -628,7 +628,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
{
// (, line 62
// literal, line 62
- if (!(eq_s_b(1, "d")))
+ if (!(Eq_S_B(1, "d")))
{
goto lab7;
}
@@ -645,7 +645,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
// ], line 62
m_bra = m_cursor;
// delete, line 62
- slice_del();
+ SliceDel();
goto lab4;
} while (false);
lab7:
@@ -660,7 +660,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
do
{
// literal, line 63
- if (!(eq_s_b(1, "i")))
+ if (!(Eq_S_B(1, "i")))
{
goto lab10;
}
@@ -669,7 +669,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
lab10:
m_cursor = m_limit - v_4;
// literal, line 63
- if (!(eq_s_b(1, "j")))
+ if (!(Eq_S_B(1, "j")))
{
goto lab8;
}
@@ -681,7 +681,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
goto lab8;
}
// delete, line 63
- slice_del();
+ SliceDel();
goto lab4;
} while (false);
lab8:
@@ -698,7 +698,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
return false;
}
// delete, line 64
- slice_del();
+ SliceDel();
// call lengthen_V, line 64
if (!r_lengthen_V())
{
@@ -710,7 +710,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
case 7:
// (, line 65
// <-, line 65
- slice_from("nd");
+ SliceFrom("nd");
break;
}
return true;
@@ -724,7 +724,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
// [, line 71
m_ket = m_cursor;
// among, line 71
- among_var = find_among_b(a_1, 11);
+ among_var = FindAmongB(a_1, 11);
if (among_var == 0)
{
return false;
@@ -746,14 +746,14 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
{
// (, line 72
// literal, line 72
- if (!(eq_s_b(2, "'t")))
+ if (!(Eq_S_B(2, "'t")))
{
goto lab1;
}
// ], line 72
m_bra = m_cursor;
// delete, line 72
- slice_del();
+ SliceDel();
goto lab0;
} while (false);
lab1:
@@ -762,7 +762,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
{
// (, line 73
// literal, line 73
- if (!(eq_s_b(2, "et")))
+ if (!(Eq_S_B(2, "et")))
{
goto lab2;
}
@@ -779,7 +779,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
goto lab2;
}
// delete, line 73
- slice_del();
+ SliceDel();
goto lab0;
} while (false);
lab2:
@@ -788,14 +788,14 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
{
// (, line 74
// literal, line 74
- if (!(eq_s_b(3, "rnt")))
+ if (!(Eq_S_B(3, "rnt")))
{
goto lab3;
}
// ], line 74
m_bra = m_cursor;
// <-, line 74
- slice_from("rn");
+ SliceFrom("rn");
goto lab0;
} while (false);
lab3:
@@ -804,7 +804,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
{
// (, line 75
// literal, line 75
- if (!(eq_s_b(1, "t")))
+ if (!(Eq_S_B(1, "t")))
{
goto lab4;
}
@@ -821,7 +821,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
goto lab4;
}
// delete, line 75
- slice_del();
+ SliceDel();
goto lab0;
} while (false);
lab4:
@@ -830,14 +830,14 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
{
// (, line 76
// literal, line 76
- if (!(eq_s_b(3, "ink")))
+ if (!(Eq_S_B(3, "ink")))
{
goto lab5;
}
// ], line 76
m_bra = m_cursor;
// <-, line 76
- slice_from("ing");
+ SliceFrom("ing");
goto lab0;
} while (false);
lab5:
@@ -846,14 +846,14 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
{
// (, line 77
// literal, line 77
- if (!(eq_s_b(2, "mp")))
+ if (!(Eq_S_B(2, "mp")))
{
goto lab6;
}
// ], line 77
m_bra = m_cursor;
// <-, line 77
- slice_from("m");
+ SliceFrom("m");
goto lab0;
} while (false);
lab6:
@@ -862,7 +862,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
{
// (, line 78
// literal, line 78
- if (!(eq_s_b(1, "'")))
+ if (!(Eq_S_B(1, "'")))
{
goto lab7;
}
@@ -874,7 +874,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
goto lab7;
}
// delete, line 78
- slice_del();
+ SliceDel();
goto lab0;
} while (false);
lab7:
@@ -893,7 +893,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
return false;
}
// delete, line 79
- slice_del();
+ SliceDel();
} while (false);
lab0:
break;
@@ -905,7 +905,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
return false;
}
// <-, line 80
- slice_from("g");
+ SliceFrom("g");
break;
case 3:
// (, line 81
@@ -915,7 +915,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
return false;
}
// <-, line 81
- slice_from("lijk");
+ SliceFrom("lijk");
break;
case 4:
// (, line 82
@@ -925,7 +925,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
return false;
}
// <-, line 82
- slice_from("isch");
+ SliceFrom("isch");
break;
case 5:
// (, line 83
@@ -940,7 +940,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
return false;
}
// delete, line 83
- slice_del();
+ SliceDel();
break;
case 6:
// (, line 84
@@ -950,7 +950,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
return false;
}
// <-, line 84
- slice_from("t");
+ SliceFrom("t");
break;
case 7:
// (, line 85
@@ -960,7 +960,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
return false;
}
// <-, line 85
- slice_from("s");
+ SliceFrom("s");
break;
case 8:
// (, line 86
@@ -970,7 +970,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
return false;
}
// <-, line 86
- slice_from("r");
+ SliceFrom("r");
break;
case 9:
// (, line 87
@@ -980,9 +980,9 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
return false;
}
// delete, line 87
- slice_del();
+ SliceDel();
// attach, line 87
- insert(m_cursor, m_cursor, "l");
+ Insert(m_cursor, m_cursor, "l");
// call lengthen_V, line 87
if (!r_lengthen_V())
{
@@ -1002,9 +1002,9 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
return false;
}
// delete, line 88
- slice_del();
+ SliceDel();
// attach, line 88
- insert(m_cursor, m_cursor, "en");
+ Insert(m_cursor, m_cursor, "en");
// call lengthen_V, line 88
if (!r_lengthen_V())
{
@@ -1024,7 +1024,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
return false;
}
// <-, line 89
- slice_from("ief");
+ SliceFrom("ief");
break;
}
return true;
@@ -1037,7 +1037,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
// [, line 95
m_ket = m_cursor;
// among, line 95
- among_var = find_among_b(a_2, 14);
+ among_var = FindAmongB(a_2, 14);
if (among_var == 0)
{
return false;
@@ -1057,7 +1057,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
return false;
}
// <-, line 96
- slice_from("eer");
+ SliceFrom("eer");
break;
case 2:
// (, line 97
@@ -1067,7 +1067,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
return false;
}
// delete, line 97
- slice_del();
+ SliceDel();
// call lengthen_V, line 97
if (!r_lengthen_V())
{
@@ -1082,12 +1082,12 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
return false;
}
// delete, line 100
- slice_del();
+ SliceDel();
break;
case 4:
// (, line 101
// <-, line 101
- slice_from("r");
+ SliceFrom("r");
break;
case 5:
// (, line 104
@@ -1097,7 +1097,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
return false;
}
// delete, line 104
- slice_del();
+ SliceDel();
// call lengthen_V, line 104
if (!r_lengthen_V())
{
@@ -1117,7 +1117,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
return false;
}
// <-, line 105
- slice_from("aar");
+ SliceFrom("aar");
break;
case 7:
// (, line 106
@@ -1127,9 +1127,9 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
return false;
}
// delete, line 106
- slice_del();
+ SliceDel();
// attach, line 106
- insert(m_cursor, m_cursor, "f");
+ Insert(m_cursor, m_cursor, "f");
// call lengthen_V, line 106
if (!r_lengthen_V())
{
@@ -1144,9 +1144,9 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
return false;
}
// delete, line 107
- slice_del();
+ SliceDel();
// attach, line 107
- insert(m_cursor, m_cursor, "g");
+ Insert(m_cursor, m_cursor, "g");
// call lengthen_V, line 107
if (!r_lengthen_V())
{
@@ -1166,7 +1166,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
return false;
}
// <-, line 108
- slice_from("t");
+ SliceFrom("t");
break;
case 10:
// (, line 109
@@ -1181,7 +1181,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
return false;
}
// <-, line 109
- slice_from("d");
+ SliceFrom("d");
break;
}
return true;
@@ -1202,7 +1202,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
// [, line 115
m_ket = m_cursor;
// among, line 115
- among_var = find_among_b(a_3, 16);
+ among_var = FindAmongB(a_3, 16);
if (among_var == 0)
{
goto lab1;
@@ -1222,7 +1222,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
goto lab1;
}
// <-, line 116
- slice_from("ie");
+ SliceFrom("ie");
break;
case 2:
// (, line 117
@@ -1232,7 +1232,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
goto lab1;
}
// <-, line 117
- slice_from("eer");
+ SliceFrom("eer");
break;
case 3:
// (, line 118
@@ -1242,7 +1242,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
goto lab1;
}
// delete, line 118
- slice_del();
+ SliceDel();
break;
case 4:
// (, line 119
@@ -1257,7 +1257,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
goto lab1;
}
// <-, line 119
- slice_from("n");
+ SliceFrom("n");
break;
case 5:
// (, line 120
@@ -1272,7 +1272,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
goto lab1;
}
// <-, line 120
- slice_from("l");
+ SliceFrom("l");
break;
case 6:
// (, line 121
@@ -1287,7 +1287,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
goto lab1;
}
// <-, line 121
- slice_from("r");
+ SliceFrom("r");
break;
case 7:
// (, line 122
@@ -1297,7 +1297,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
goto lab1;
}
// <-, line 122
- slice_from("teer");
+ SliceFrom("teer");
break;
case 8:
// (, line 124
@@ -1307,7 +1307,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
goto lab1;
}
// <-, line 124
- slice_from("lijk");
+ SliceFrom("lijk");
break;
case 9:
// (, line 127
@@ -1317,7 +1317,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
goto lab1;
}
// delete, line 127
- slice_del();
+ SliceDel();
break;
case 10:
// (, line 131
@@ -1332,7 +1332,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
goto lab1;
}
// delete, line 131
- slice_del();
+ SliceDel();
// call lengthen_V, line 131
if (!r_lengthen_V())
{
@@ -1348,7 +1348,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
// [, line 135
m_ket = m_cursor;
// among, line 135
- among_var = find_among_b(a_4, 3);
+ among_var = FindAmongB(a_4, 3);
if (among_var == 0)
{
return false;
@@ -1373,7 +1373,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
return false;
}
// delete, line 138
- slice_del();
+ SliceDel();
// call lengthen_V, line 138
if (!r_lengthen_V())
{
@@ -1393,7 +1393,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
// [, line 145
m_ket = m_cursor;
// among, line 145
- among_var = find_among_b(a_5, 3);
+ among_var = FindAmongB(a_5, 3);
if (among_var == 0)
{
return false;
@@ -1408,17 +1408,17 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
case 1:
// (, line 146
// <-, line 146
- slice_from("k");
+ SliceFrom("k");
break;
case 2:
// (, line 147
// <-, line 147
- slice_from("f");
+ SliceFrom("f");
break;
case 3:
// (, line 148
// <-, line 148
- slice_from("p");
+ SliceFrom("p");
break;
}
return true;
@@ -1431,7 +1431,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
// [, line 154
m_ket = m_cursor;
// among, line 154
- among_var = find_among_b(a_6, 22);
+ among_var = FindAmongB(a_6, 22);
if (among_var == 0)
{
return false;
@@ -1446,112 +1446,112 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
case 1:
// (, line 155
// <-, line 155
- slice_from("b");
+ SliceFrom("b");
break;
case 2:
// (, line 156
// <-, line 156
- slice_from("c");
+ SliceFrom("c");
break;
case 3:
// (, line 157
// <-, line 157
- slice_from("d");
+ SliceFrom("d");
break;
case 4:
// (, line 158
// <-, line 158
- slice_from("f");
+ SliceFrom("f");
break;
case 5:
// (, line 159
// <-, line 159
- slice_from("g");
+ SliceFrom("g");
break;
case 6:
// (, line 160
// <-, line 160
- slice_from("h");
+ SliceFrom("h");
break;
case 7:
// (, line 161
// <-, line 161
- slice_from("j");
+ SliceFrom("j");
break;
case 8:
// (, line 162
// <-, line 162
- slice_from("k");
+ SliceFrom("k");
break;
case 9:
// (, line 163
// <-, line 163
- slice_from("l");
+ SliceFrom("l");
break;
case 10:
// (, line 164
// <-, line 164
- slice_from("m");
+ SliceFrom("m");
break;
case 11:
// (, line 165
// <-, line 165
- slice_from("n");
+ SliceFrom("n");
break;
case 12:
// (, line 166
// <-, line 166
- slice_from("p");
+ SliceFrom("p");
break;
case 13:
// (, line 167
// <-, line 167
- slice_from("q");
+ SliceFrom("q");
break;
case 14:
// (, line 168
// <-, line 168
- slice_from("r");
+ SliceFrom("r");
break;
case 15:
// (, line 169
// <-, line 169
- slice_from("s");
+ SliceFrom("s");
break;
case 16:
// (, line 170
// <-, line 170
- slice_from("t");
+ SliceFrom("t");
break;
case 17:
// (, line 171
// <-, line 171
- slice_from("v");
+ SliceFrom("v");
break;
case 18:
// (, line 172
// <-, line 172
- slice_from("w");
+ SliceFrom("w");
break;
case 19:
// (, line 173
// <-, line 173
- slice_from("x");
+ SliceFrom("x");
break;
case 20:
// (, line 174
// <-, line 174
- slice_from("z");
+ SliceFrom("z");
break;
case 21:
// (, line 175
// <-, line 175
- slice_from("f");
+ SliceFrom("f");
break;
case 22:
// (, line 176
// <-, line 176
- slice_from("s");
+ SliceFrom("s");
break;
}
return true;
@@ -1566,7 +1566,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
// [, line 182
m_ket = m_cursor;
// among, line 182
- among_var = find_among_b(a_7, 2);
+ among_var = FindAmongB(a_7, 2);
if (among_var == 0)
{
return false;
@@ -1597,7 +1597,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
{
// (, line 183
// literal, line 183
- if (!(eq_s_b(1, "n")))
+ if (!(Eq_S_B(1, "n")))
{
goto lab0;
}
@@ -1612,7 +1612,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
m_cursor = m_limit - v_1;
}
// delete, line 183
- slice_del();
+ SliceDel();
break;
case 2:
// (, line 184
@@ -1623,7 +1623,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
{
// (, line 184
// literal, line 184
- if (!(eq_s_b(1, "h")))
+ if (!(Eq_S_B(1, "h")))
{
goto lab1;
}
@@ -1638,7 +1638,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
m_cursor = m_limit - v_2;
}
// delete, line 184
- slice_del();
+ SliceDel();
break;
}
return true;
@@ -1653,7 +1653,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
// [, line 190
m_bra = m_cursor;
// literal, line 190
- if (!(eq_s(2, "ge")))
+ if (!(Eq_S(2, "ge")))
{
return false;
}
@@ -1678,7 +1678,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
v_2 = m_cursor;
do
{
- if (!(in_grouping(g_v, 97, 121)))
+ if (!(InGrouping(g_v, 97, 121)))
{
goto lab1;
}
@@ -1700,7 +1700,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
v_3 = m_cursor;
do
{
- if (!(out_grouping(g_v, 97, 121)))
+ if (!(OutGrouping(g_v, 97, 121)))
{
goto lab3;
}
@@ -1719,7 +1719,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
// set GE_removed, line 191
B_GE_removed = true;
// delete, line 192
- slice_del();
+ SliceDel();
return true;
}
@@ -1744,7 +1744,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
// [, line 197
m_bra = m_cursor;
// literal, line 197
- if (!(eq_s(2, "ge")))
+ if (!(Eq_S(2, "ge")))
{
goto lab1;
}
@@ -1779,7 +1779,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
v_3 = m_cursor;
do
{
- if (!(in_grouping(g_v, 97, 121)))
+ if (!(InGrouping(g_v, 97, 121)))
{
goto lab3;
}
@@ -1801,7 +1801,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
v_4 = m_cursor;
do
{
- if (!(out_grouping(g_v, 97, 121)))
+ if (!(OutGrouping(g_v, 97, 121)))
{
goto lab5;
}
@@ -1820,7 +1820,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
// set GE_removed, line 198
B_GE_removed = true;
// delete, line 199
- slice_del();
+ SliceDel();
return true;
}
@@ -1857,7 +1857,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
{
do
{
- if (!(out_grouping(g_v, 97, 121)))
+ if (!(OutGrouping(g_v, 97, 121)))
{
goto lab3;
}
@@ -1893,7 +1893,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
do
{
// literal, line 209
- if (!(eq_s(2, "ij")))
+ if (!(Eq_S(2, "ij")))
{
goto lab7;
}
@@ -1901,7 +1901,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
} while (false);
lab7:
m_cursor = v_6;
- if (!(in_grouping(g_v, 97, 121)))
+ if (!(InGrouping(g_v, 97, 121)))
{
goto lab5;
}
@@ -1929,7 +1929,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
goto lab1;
}
}
- if (!(out_grouping(g_v, 97, 121)))
+ if (!(OutGrouping(g_v, 97, 121)))
{
goto lab1;
}
@@ -1940,7 +1940,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
{
do
{
- if (!(out_grouping(g_v, 97, 121)))
+ if (!(OutGrouping(g_v, 97, 121)))
{
goto lab9;
}
@@ -1976,7 +1976,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
do
{
// literal, line 210
- if (!(eq_s(2, "ij")))
+ if (!(Eq_S(2, "ij")))
{
goto lab13;
}
@@ -1984,7 +1984,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
} while (false);
lab13:
m_cursor = v_10;
- if (!(in_grouping(g_v, 97, 121)))
+ if (!(InGrouping(g_v, 97, 121)))
{
goto lab11;
}
@@ -2012,7 +2012,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
goto lab1;
}
}
- if (!(out_grouping(g_v, 97, 121)))
+ if (!(OutGrouping(g_v, 97, 121)))
{
goto lab1;
}
@@ -2059,14 +2059,14 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
// [, line 218
m_bra = m_cursor;
// literal, line 218
- if (!(eq_s(1, "y")))
+ if (!(Eq_S(1, "y")))
{
goto lab0;
}
// ], line 218
m_ket = m_cursor;
// <-, line 218
- slice_from("Y");
+ SliceFrom("Y");
// set Y_found, line 218
B_Y_found = true;
} while (false);
@@ -2090,14 +2090,14 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
do
{
// (, line 219
- if (!(in_grouping(g_v, 97, 121)))
+ if (!(InGrouping(g_v, 97, 121)))
{
goto lab5;
}
// [, line 219
m_bra = m_cursor;
// literal, line 219
- if (!(eq_s(1, "y")))
+ if (!(Eq_S(1, "y")))
{
goto lab5;
}
@@ -2116,7 +2116,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
}
golab4:
// <-, line 219
- slice_from("Y");
+ SliceFrom("Y");
// set Y_found, line 219
B_Y_found = true;
// LUCENENET NOTE: continue label is not supported directly in .NET,
@@ -2372,7 +2372,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
// [, line 243
m_bra = m_cursor;
// literal, line 243
- if (!(eq_s(1, "Y")))
+ if (!(Eq_S(1, "Y")))
{
goto lab22;
}
@@ -2391,7 +2391,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
}
golab21:
// <-, line 243
- slice_from("y");
+ SliceFrom("y");
// LUCENENET NOTE: continue label is not supported directly in .NET,
// so we just need to add another goto to get to the end of the outer loop.
// See: http://stackoverflow.com/a/359449/181087
[32/39] lucenenet git commit:
Lucene.Net.Analysis.Util.OpenStringBuilder refactor: added overload to
Write() method for StringBuilder
Posted by ni...@apache.org.
Lucene.Net.Analysis.Util.OpenStringBuilder refactor: added overload to Write() method for StringBuilder
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/230c9f90
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/230c9f90
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/230c9f90
Branch: refs/heads/api-work
Commit: 230c9f90f111dfe4010255fb89c90b3c467a898c
Parents: 252e30c
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Sun Feb 5 02:12:16 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Sun Feb 5 03:29:07 2017 +0700
----------------------------------------------------------------------
.../Analysis/Util/OpenStringBuilder.cs | 18 ++++++++++++++++--
1 file changed, 16 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/230c9f90/src/Lucene.Net.Analysis.Common/Analysis/Util/OpenStringBuilder.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/OpenStringBuilder.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/OpenStringBuilder.cs
index 8987189..3dfec5c 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Util/OpenStringBuilder.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/OpenStringBuilder.cs
@@ -177,6 +177,13 @@ namespace Lucene.Net.Analysis.Util
this.m_len += len;
}
+ // LUCENENET specific overload for StringBuilder
+ public virtual void UnsafeWrite(StringBuilder b, int off, int len)
+ {
+ b.CopyTo(off, m_buf, this.m_len, len);
+ this.m_len += len;
+ }
+
protected virtual void Resize(int len)
{
char[] newbuf = new char[Math.Max(m_buf.Length << 1, len)];
@@ -217,9 +224,16 @@ namespace Lucene.Net.Analysis.Util
UnsafeWrite(b, off, len);
}
- public void Write(OpenStringBuilder arr) // LUCENENET TODO: Add overload for StringBuilder
+ public void Write(OpenStringBuilder arr)
+ {
+ Write(arr.m_buf, 0, arr.Length); // LUCENENET specific - changed to arr.m_len (original was just len - appears to be a bug)
+ }
+
+ // LUCENENET specific overload for StringBuilder
+ public void Write(StringBuilder arr)
{
- Write(arr.m_buf, 0, m_len);
+ Reserve(arr.Length);
+ UnsafeWrite(arr, 0, arr.Length);
}
public virtual void Write(string s)
[06/39] lucenenet git commit: Lucene.Net.Analysis.Standard refactor:
member accessibility and documentation comments
Posted by ni...@apache.org.
Lucene.Net.Analysis.Standard refactor: member accessibility and documentation comments
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/ab69b431
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/ab69b431
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/ab69b431
Branch: refs/heads/api-work
Commit: ab69b43179733001dbb84836732c0f9671021de8
Parents: 6b01385
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Sat Feb 4 12:18:16 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Sat Feb 4 12:18:16 2017 +0700
----------------------------------------------------------------------
.../Analysis/Standard/ClassicAnalyzer.cs | 45 ++-
.../Analysis/Standard/ClassicFilter.cs | 7 +-
.../Analysis/Standard/ClassicFilterFactory.cs | 7 +-
.../Analysis/Standard/ClassicTokenizer.cs | 41 ++-
.../Standard/ClassicTokenizerFactory.cs | 2 +-
.../Analysis/Standard/ClassicTokenizerImpl.cs | 39 ++-
.../Analysis/Standard/StandardAnalyzer.cs | 46 ++-
.../Analysis/Standard/StandardFilterFactory.cs | 2 +-
.../Analysis/Standard/StandardTokenizer.cs | 66 ++--
.../Standard/StandardTokenizerFactory.cs | 2 +-
.../Analysis/Standard/StandardTokenizerImpl.cs | 48 ++-
.../Standard/StandardTokenizerInterface.cs | 22 +-
.../Standard/Std31/StandardTokenizerImpl31.cs | 297 ++++++++---------
.../Std31/UAX29URLEmailTokenizerImpl31.cs | 75 ++---
.../Standard/Std34/StandardTokenizerImpl34.cs | 302 +++++++++---------
.../Std34/UAX29URLEmailTokenizerImpl34.cs | 303 +++++++++---------
.../Std36/UAX29URLEmailTokenizerImpl36.cs | 299 +++++++++--------
.../Standard/Std40/StandardTokenizerImpl40.cs | 293 +++++++++--------
.../Std40/UAX29URLEmailTokenizerImpl40.cs | 295 +++++++++--------
.../Analysis/Standard/UAX29URLEmailAnalyzer.cs | 26 +-
.../Analysis/Standard/UAX29URLEmailTokenizer.cs | 69 ++--
.../Standard/UAX29URLEmailTokenizerFactory.cs | 7 +-
.../Standard/UAX29URLEmailTokenizerImpl.cs | 319 +++++++++----------
23 files changed, 1278 insertions(+), 1334 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/ab69b431/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicAnalyzer.cs
index 70aa887..888431b 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicAnalyzer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicAnalyzer.cs
@@ -23,23 +23,22 @@ namespace Lucene.Net.Analysis.Standard
*/
/// <summary>
- /// Filters <see cref="ClassicTokenizer"/> with <see cref="ClassicFilter"/>, {@link
- /// LowerCaseFilter} and <see cref="StopFilter"/>, using a list of
+ /// Filters <see cref="ClassicTokenizer"/> with <see cref="ClassicFilter"/>,
+ /// <see cref="LowerCaseFilter"/> and <see cref="StopFilter"/>, using a list of
/// English stop words.
///
- /// <a name="version"/>
/// <para>You must specify the required <see cref="LuceneVersion"/>
- /// compatibility when creating ClassicAnalyzer:
- /// <ul>
- /// <li> As of 3.1, StopFilter correctly handles Unicode 4.0
- /// supplementary characters in stopwords
- /// <li> As of 2.9, StopFilter preserves position
- /// increments
- /// <li> As of 2.4, Tokens incorrectly identified as acronyms
- /// are corrected (see <a href="https://issues.apache.org/jira/browse/LUCENE-1068">LUCENE-1068</a>)
- /// </ul>
+ /// compatibility when creating <see cref="ClassicAnalyzer"/>:
+ /// <list type="bullet">
+ /// <item> As of 3.1, <see cref="StopFilter"/> correctly handles Unicode 4.0
+ /// supplementary characters in stopwords</item>
+ /// <item> As of 2.9, <see cref="StopFilter"/> preserves position
+ /// increments</item>
+ /// <item> As of 2.4, <see cref="Token"/>s incorrectly identified as acronyms
+ /// are corrected (see <a href="https://issues.apache.org/jira/browse/LUCENE-1068">LUCENE-1068</a>)</item>
+ /// </list>
///
- /// ClassicAnalyzer was named StandardAnalyzer in Lucene versions prior to 3.1.
+ /// <see cref="ClassicAnalyzer"/> was named <see cref="StandardAnalyzer"/> in Lucene versions prior to 3.1.
/// As of 3.1, <see cref="StandardAnalyzer"/> implements Unicode text segmentation,
/// as specified by UAX#29.
/// </para>
@@ -60,8 +59,7 @@ namespace Lucene.Net.Analysis.Standard
/// <summary>
/// Builds an analyzer with the given stop words. </summary>
- /// <param name="matchVersion"> Lucene version to match See {@link
- /// <a href="#version">above</a>} </param>
+ /// <param name="matchVersion"> Lucene compatibility version - See <see cref="ClassicAnalyzer"/> </param>
/// <param name="stopWords"> stop words </param>
public ClassicAnalyzer(LuceneVersion matchVersion, CharArraySet stopWords)
: base(matchVersion, stopWords)
@@ -69,10 +67,9 @@ namespace Lucene.Net.Analysis.Standard
}
/// <summary>
- /// Builds an analyzer with the default stop words ({@link
- /// #STOP_WORDS_SET}). </summary>
- /// <param name="matchVersion"> Lucene version to match See {@link
- /// <a href="#version">above</a>} </param>
+ /// Builds an analyzer with the default stop words (<see cref="STOP_WORDS_SET"/>).
+ /// </summary>
+ /// <param name="matchVersion"> Lucene compatibility version - See <see cref="ClassicAnalyzer"/> </param>
public ClassicAnalyzer(LuceneVersion matchVersion)
: this(matchVersion, STOP_WORDS_SET)
{
@@ -80,17 +77,16 @@ namespace Lucene.Net.Analysis.Standard
/// <summary>
/// Builds an analyzer with the stop words from the given reader. </summary>
- /// <seealso cref= WordlistLoader#getWordSet(TextReader, Version) </seealso>
- /// <param name="matchVersion"> Lucene version to match See {@link
- /// <a href="#version">above</a>} </param>
- /// <param name="stopwords"> TextReader to read stop words from </param>
+ /// <seealso cref="WordlistLoader.GetWordSet(TextReader, LuceneVersion)"/>
+ /// <param name="matchVersion"> Lucene compatibility version - See <see cref="ClassicAnalyzer"/> </param>
+ /// <param name="stopwords"> <see cref="TextReader"/> to read stop words from </param>
public ClassicAnalyzer(LuceneVersion matchVersion, TextReader stopwords)
: this(matchVersion, LoadStopwordSet(stopwords, matchVersion))
{
}
/// <summary>
- /// Set maximum allowed token length. If a token is seen
+ /// Gets or sets maximum allowed token length. If a token is seen
/// that exceeds this length then it is discarded. This
/// setting only takes effect the next time tokenStream or
/// tokenStream is called.
@@ -101,7 +97,6 @@ namespace Lucene.Net.Analysis.Standard
get { return maxTokenLength; }
}
-
protected override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
{
var src = new ClassicTokenizer(m_matchVersion, reader);
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/ab69b431/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicFilter.cs
index c9bd8cf..6d629f8 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicFilter.cs
@@ -18,14 +18,13 @@ namespace Lucene.Net.Analysis.Standard
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
/// <summary>
/// Normalizes tokens extracted with <see cref="ClassicTokenizer"/>. </summary>
-
public class ClassicFilter : TokenFilter
{
-
/// <summary>
- /// Construct filtering <i>in</i>. </summary>
+ /// Construct filtering <paramref name="in"/>. </summary>
public ClassicFilter(TokenStream @in)
: base(@in)
{
@@ -42,7 +41,7 @@ namespace Lucene.Net.Analysis.Standard
/// <summary>
/// Returns the next token in the stream, or null at EOS.
- /// <para>Removes <tt>'s</tt> from the end of words.
+ /// <para>Removes <c>'s</c> from the end of words.
/// </para>
/// <para>Removes dots from acronyms.
/// </para>
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/ab69b431/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicFilterFactory.cs
index 80fac18..b15b460 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicFilterFactory.cs
@@ -1,5 +1,5 @@
-\ufeffusing System.Collections.Generic;
-using Lucene.Net.Analysis.Util;
+\ufeffusing Lucene.Net.Analysis.Util;
+using System.Collections.Generic;
namespace Lucene.Net.Analysis.Standard
{
@@ -32,9 +32,8 @@ namespace Lucene.Net.Analysis.Standard
/// </summary>
public class ClassicFilterFactory : TokenFilterFactory
{
-
/// <summary>
- /// Creates a new ClassicFilterFactory </summary>
+ /// Creates a new <see cref="ClassicFilterFactory"/> </summary>
public ClassicFilterFactory(IDictionary<string, string> args)
: base(args)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/ab69b431/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicTokenizer.cs
index 415bdb7..6898ca6 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicTokenizer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicTokenizer.cs
@@ -22,29 +22,28 @@ namespace Lucene.Net.Analysis.Standard
*/
/// <summary>
- /// A grammar-based tokenizer constructed with JFlex
+ /// A grammar-based tokenizer constructed with JFlex (and then ported to .NET)
///
/// <para> This should be a good tokenizer for most European-language documents:
///
- /// <ul>
- /// <li>Splits words at punctuation characters, removing punctuation. However, a
- /// dot that's not followed by whitespace is considered part of a token.
- /// <li>Splits words at hyphens, unless there's a number in the token, in which case
- /// the whole token is interpreted as a product number and is not split.
- /// <li>Recognizes email addresses and internet hostnames as one token.
- /// </ul>
+ /// <list type="bullet">
+ /// <item>Splits words at punctuation characters, removing punctuation. However, a
+ /// dot that's not followed by whitespace is considered part of a token.</item>
+ /// <item>Splits words at hyphens, unless there's a number in the token, in which case
+ /// the whole token is interpreted as a product number and is not split.</item>
+ /// <item>Recognizes email addresses and internet hostnames as one token.</item>
+ /// </list>
///
/// </para>
/// <para>Many applications have specific tokenizer needs. If this tokenizer does
/// not suit your application, please consider copying this source code
/// directory to your project and maintaining your own grammar-based tokenizer.
///
- /// ClassicTokenizer was named StandardTokenizer in Lucene versions prior to 3.1.
+ /// <see cref="ClassicTokenizer"/> was named <see cref="StandardTokenizer"/> in Lucene versions prior to 3.1.
/// As of 3.1, <see cref="StandardTokenizer"/> implements Unicode text segmentation,
/// as specified by UAX#29.
/// </para>
/// </summary>
-
public sealed class ClassicTokenizer : Tokenizer
{
/// <summary>
@@ -64,7 +63,17 @@ namespace Lucene.Net.Analysis.Standard
/// <summary>
/// String token types that correspond to token type int constants </summary>
- public static readonly string[] TOKEN_TYPES = new string[] { "<ALPHANUM>", "<APOSTROPHE>", "<ACRONYM>", "<COMPANY>", "<EMAIL>", "<HOST>", "<NUM>", "<CJ>", "<ACRONYM_DEP>" };
+ public static readonly string[] TOKEN_TYPES = new string[] {
+ "<ALPHANUM>",
+ "<APOSTROPHE>",
+ "<ACRONYM>",
+ "<COMPANY>",
+ "<EMAIL>",
+ "<HOST>",
+ "<NUM>",
+ "<CJ>",
+ "<ACRONYM_DEP>"
+ };
private int skippedPositions;
@@ -93,8 +102,9 @@ namespace Lucene.Net.Analysis.Standard
/// <summary>
/// Creates a new instance of the <see cref="ClassicTokenizer"/>. Attaches
- /// the <code>input</code> to the newly created JFlex scanner.
+ /// the <paramref name="input"/> to the newly created JFlex scanner.
/// </summary>
+ /// <param name="matchVersion"> lucene compatibility version </param>
/// <param name="input"> The input reader
///
/// See http://issues.apache.org/jira/browse/LUCENE-1068 </param>
@@ -105,7 +115,7 @@ namespace Lucene.Net.Analysis.Standard
}
/// <summary>
- /// Creates a new ClassicTokenizer with a given <see cref="org.apache.lucene.util.AttributeSource.AttributeFactory"/>
+ /// Creates a new <see cref="ClassicTokenizer"/> with a given <see cref="AttributeSource.AttributeFactory"/>
/// </summary>
public ClassicTokenizer(LuceneVersion matchVersion, AttributeFactory factory, Reader input)
: base(factory, input)
@@ -128,12 +138,13 @@ namespace Lucene.Net.Analysis.Standard
private IOffsetAttribute offsetAtt;
private IPositionIncrementAttribute posIncrAtt;
private ITypeAttribute typeAtt;
+
/*
* (non-Javadoc)
*
* @see org.apache.lucene.analysis.TokenStream#next()
*/
- public override bool IncrementToken()
+ public override sealed bool IncrementToken()
{
ClearAttributes();
skippedPositions = 0;
@@ -175,7 +186,7 @@ namespace Lucene.Net.Analysis.Standard
}
}
- public override void End()
+ public override sealed void End()
{
base.End();
// set final offset
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/ab69b431/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicTokenizerFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicTokenizerFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicTokenizerFactory.cs
index 079e824..0778eba 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicTokenizerFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicTokenizerFactory.cs
@@ -36,7 +36,7 @@ namespace Lucene.Net.Analysis.Standard
private readonly int maxTokenLength;
/// <summary>
- /// Creates a new ClassicTokenizerFactory </summary>
+ /// Creates a new <see cref="ClassicTokenizerFactory"/> </summary>
public ClassicTokenizerFactory(IDictionary<string, string> args)
: base(args)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/ab69b431/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicTokenizerImpl.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicTokenizerImpl.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicTokenizerImpl.cs
index e7a35b0..7de5f9e 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicTokenizerImpl.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicTokenizerImpl.cs
@@ -29,12 +29,11 @@ namespace Lucene.Net.Analysis.Standard
*/
/// <summary>
- /// This class implements the classic lucene StandardTokenizer up until 3.0
+ /// This class implements the classic lucene <see cref="StandardTokenizer"/> up until 3.0
/// </summary>
internal class ClassicTokenizerImpl : IStandardTokenizerInterface
{
-
/// <summary>
/// This character denotes the end of file </summary>
public static readonly int YYEOF = -1;
@@ -280,7 +279,7 @@ namespace Lucene.Net.Analysis.Standard
};
/// <summary>
- /// ZZ_ATTRIBUTE[aState] contains the attributes of state <code>aState</code>
+ /// ZZ_ATTRIBUTE[aState] contains the attributes of state <c>aState</c>
/// </summary>
private static readonly int[] ZZ_ATTRIBUTE = ZzUnpackAttribute();
@@ -327,7 +326,7 @@ namespace Lucene.Net.Analysis.Standard
/// <summary>
/// this buffer contains the current text to be matched and is
- /// the source of the YyText() string
+ /// the source of the YyText string
/// </summary>
private char[] zzBuffer = new char[ZZ_BUFFERSIZE];
@@ -340,7 +339,7 @@ namespace Lucene.Net.Analysis.Standard
private int zzCurrentPos;
/// <summary>
- /// startRead marks the beginning of the YyText() string in the buffer </summary>
+ /// startRead marks the beginning of the YyText string in the buffer </summary>
private int zzStartRead;
/// <summary>
@@ -365,12 +364,12 @@ namespace Lucene.Net.Analysis.Standard
private int yycolumn;
/// <summary>
- /// zzAtBOL == true <=> the scanner is currently at the beginning of a line
+ /// zzAtBOL == true <=> the scanner is currently at the beginning of a line
/// </summary>
private bool zzAtBOL = true;
/// <summary>
- /// zzAtEOF == true <=> the scanner is at the EOF </summary>
+ /// zzAtEOF == true <=> the scanner is at the EOF </summary>
private bool zzAtEOF;
/// <summary>
@@ -401,7 +400,7 @@ namespace Lucene.Net.Analysis.Standard
}
/// <summary>
- /// Fills CharTermAttribute with the current token text.
+ /// Fills ICharTermAttribute with the current token text.
/// </summary>
public void GetText(ICharTermAttribute t)
{
@@ -446,9 +445,9 @@ namespace Lucene.Net.Analysis.Standard
/// <summary>
/// Refills the input buffer.
/// </summary>
- /// <returns> <code>false</code>, iff there was new input.
+ /// <returns> <c>false</c>, iff there was new input.
/// </returns>
- /// <exception cref="java.io.IOException"> if any I/O-Error occurs </exception>
+ /// <exception cref="IOException"> if any I/O-Error occurs </exception>
private bool ZzRefill()
{
@@ -502,7 +501,7 @@ namespace Lucene.Net.Analysis.Standard
/// <summary>
- /// Closes the input stream.
+ /// Disposes the input stream.
/// </summary>
public void YyClose()
{
@@ -519,11 +518,11 @@ namespace Lucene.Net.Analysis.Standard
/// <summary>
/// Resets the scanner to read from a new input stream.
/// Does not close the old reader.
- ///
+ /// <para/>
/// All internal variables are reset, the old input stream
/// <b>cannot</b> be reused (internal buffer is discarded and lost).
- /// Lexical state is set to <tt>ZZ_INITIAL</tt>.
- ///
+ /// Lexical state is set to <see cref="YYINITIAL"/>.
+ /// <para/>
/// Internal scan buffer is resized down to its initial length, if it has grown.
/// </summary>
/// <param name="reader"> the new input stream </param>
@@ -576,10 +575,10 @@ namespace Lucene.Net.Analysis.Standard
/// Returns the character at position <tt>pos</tt> from the
/// matched text.
///
- /// It is equivalent to YyText().charAt(pos), but faster
+ /// It is equivalent to YyText[pos], but faster
/// </summary>
/// <param name="pos"> the position of the character to fetch.
- /// A value from 0 to YyLength()-1.
+ /// A value from 0 to YyLength-1.
/// </param>
/// <returns> the character at position pos </returns>
public char YyCharAt(int pos)
@@ -599,13 +598,13 @@ namespace Lucene.Net.Analysis.Standard
/// <summary>
/// Reports an error that occured while scanning.
- ///
+ /// <para/>
/// In a wellformed scanner (no or only correct usage of
/// YyPushBack(int) and a match-all fallback rule) this method
/// will only be called with things that "Can't Possibly Happen".
/// If this method is called, something is seriously wrong
/// (e.g. a JFlex bug producing a faulty scanner etc.).
- ///
+ /// <para/>
/// Usual syntax/scanner level error handling should be done
/// in error fallback rules.
/// </summary>
@@ -632,7 +631,7 @@ namespace Lucene.Net.Analysis.Standard
/// They will be read again by then next call of the scanning method
/// </summary>
/// <param name="number"> the number of characters to be read again.
- /// This number must not be greater than YyLength()! </param>
+ /// This number must not be greater than YyLength! </param>
public virtual void YyPushBack(int number)
{
if (number > YyLength)
@@ -649,7 +648,7 @@ namespace Lucene.Net.Analysis.Standard
/// the end of input is encountered or an I/O-Error occurs.
/// </summary>
/// <returns> the next token </returns>
- /// <exception cref="java.io.IOException"> if any I/O-Error occurs </exception>
+ /// <exception cref="IOException"> if any I/O-Error occurs </exception>
public virtual int GetNextToken()
{
int zzInput;
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/ab69b431/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardAnalyzer.cs
index 5770b55..d7f8515 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardAnalyzer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardAnalyzer.cs
@@ -23,31 +23,29 @@ namespace Lucene.Net.Analysis.Standard
*/
/// <summary>
- /// Filters <see cref="StandardTokenizer"/> with <see cref="StandardFilter"/>, {@link
- /// LowerCaseFilter} and <see cref="StopFilter"/>, using a list of
+ /// Filters <see cref="StandardTokenizer"/> with <see cref="StandardFilter"/>,
+ /// <see cref="LowerCaseFilter"/> and <see cref="StopFilter"/>, using a list of
/// English stop words.
///
- /// <a name="version"/>
/// <para>You must specify the required <see cref="LuceneVersion"/>
- /// compatibility when creating StandardAnalyzer:
- /// <ul>
- /// <li> As of 3.4, Hiragana and Han characters are no longer wrongly split
+ /// compatibility when creating <see cref="StandardAnalyzer"/>:
+ /// <list type="bullet">
+ /// <item> As of 3.4, Hiragana and Han characters are no longer wrongly split
/// from their combining characters. If you use a previous version number,
- /// you get the exact broken behavior for backwards compatibility.
- /// <li> As of 3.1, StandardTokenizer implements Unicode text segmentation,
- /// and StopFilter correctly handles Unicode 4.0 supplementary characters
+ /// you get the exact broken behavior for backwards compatibility.</item>
+ /// <item> As of 3.1, <see cref="StandardTokenizer"/> implements Unicode text segmentation,
+ /// and <see cref="StopFilter"/> correctly handles Unicode 4.0 supplementary characters
/// in stopwords. <see cref="ClassicTokenizer"/> and <see cref="ClassicAnalyzer"/>
- /// are the pre-3.1 implementations of StandardTokenizer and
- /// StandardAnalyzer.
- /// <li> As of 2.9, StopFilter preserves position increments
- /// <li> As of 2.4, Tokens incorrectly identified as acronyms
- /// are corrected (see <a href="https://issues.apache.org/jira/browse/LUCENE-1068">LUCENE-1068</a>)
- /// </ul>
+ /// are the pre-3.1 implementations of <see cref="StandardTokenizer"/> and
+ /// <see cref="StandardAnalyzer"/>.</item>
+ /// <item> As of 2.9, <see cref="StopFilter"/> preserves position increments</item>
+ /// <item> As of 2.4, <see cref="Token"/>s incorrectly identified as acronyms
+ /// are corrected (see <a href="https://issues.apache.org/jira/browse/LUCENE-1068">LUCENE-1068</a>)</item>
+ /// </list>
/// </para>
/// </summary>
public sealed class StandardAnalyzer : StopwordAnalyzerBase
{
-
/// <summary>
/// Default maximum allowed token length </summary>
public const int DEFAULT_MAX_TOKEN_LENGTH = 255;
@@ -62,8 +60,7 @@ namespace Lucene.Net.Analysis.Standard
/// <summary>
/// Builds an analyzer with the given stop words. </summary>
- /// <param name="matchVersion"> Lucene version to match See {@link
- /// <a href="#version">above</a>} </param>
+ /// <param name="matchVersion"> Lucene compatibility version - See <see cref="StandardAnalyzer"/> </param>
/// <param name="stopWords"> stop words </param>
public StandardAnalyzer(LuceneVersion matchVersion, CharArraySet stopWords)
: base(matchVersion, stopWords)
@@ -71,10 +68,8 @@ namespace Lucene.Net.Analysis.Standard
}
/// <summary>
- /// Builds an analyzer with the default stop words ({@link
- /// #STOP_WORDS_SET}). </summary>
- /// <param name="matchVersion"> Lucene version to match See {@link
- /// <a href="#version">above</a>} </param>
+ /// Builds an analyzer with the default stop words (<see cref="STOP_WORDS_SET"/>). </summary>
+ /// <param name="matchVersion"> Lucene compatibility version - See <see cref="StandardAnalyzer"/> </param>
public StandardAnalyzer(LuceneVersion matchVersion)
: this(matchVersion, STOP_WORDS_SET)
{
@@ -82,10 +77,9 @@ namespace Lucene.Net.Analysis.Standard
/// <summary>
/// Builds an analyzer with the stop words from the given reader. </summary>
- /// <seealso cref= WordlistLoader#getWordSet(Reader, Version) </seealso>
- /// <param name="matchVersion"> Lucene version to match See {@link
- /// <a href="#version">above</a>} </param>
- /// <param name="stopwords"> SetReader to read stop words from </param>
+ /// <seealso cref="WordlistLoader.GetWordSet(TextReader, LuceneVersion)"/>
+ /// <param name="matchVersion"> Lucene compatibility version - See <see cref="StandardAnalyzer"/> </param>
+ /// <param name="stopwords"> <see cref="TextReader"/> to read stop words from </param>
public StandardAnalyzer(LuceneVersion matchVersion, TextReader stopwords)
: this(matchVersion, LoadStopwordSet(stopwords, matchVersion))
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/ab69b431/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardFilterFactory.cs
index 5476920..45417f2 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardFilterFactory.cs
@@ -34,7 +34,7 @@ namespace Lucene.Net.Analysis.Standard
public class StandardFilterFactory : TokenFilterFactory
{
/// <summary>
- /// Creates a new StandardFilterFactory </summary>
+ /// Creates a new <see cref="StandardFilterFactory"/> </summary>
public StandardFilterFactory(IDictionary<string, string> args)
: base(args)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/ab69b431/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardTokenizer.cs
index deae880..dfc5ce9 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardTokenizer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardTokenizer.cs
@@ -1,12 +1,10 @@
\ufeffusing Lucene.Net.Analysis.Standard.Std31;
using Lucene.Net.Analysis.Standard.Std34;
-using Lucene.Net.Analysis.Standard.Std36;
using Lucene.Net.Analysis.Standard.Std40;
using Lucene.Net.Analysis.TokenAttributes;
using Lucene.Net.Util;
using System;
-using Reader = System.IO.TextReader;
-using Version = Lucene.Net.Util.LuceneVersion;
+using System.IO;
namespace Lucene.Net.Analysis.Standard
{
@@ -39,21 +37,19 @@ namespace Lucene.Net.Analysis.Standard
/// not suit your application, please consider copying this source code
/// directory to your project and maintaining your own grammar-based tokenizer.
///
- /// <a name="version"/>
/// </para>
/// <para>You must specify the required <see cref="LuceneVersion"/>
- /// compatibility when creating StandardTokenizer:
- /// <ul>
- /// <li> As of 3.4, Hiragana and Han characters are no longer wrongly split
- /// from their combining characters. If you use a previous version number,
- /// you get the exact broken behavior for backwards compatibility.
- /// <li> As of 3.1, StandardTokenizer implements Unicode text segmentation.
- /// If you use a previous version number, you get the exact behavior of
- /// <see cref="ClassicTokenizer"/> for backwards compatibility.
- /// </ul>
+ /// compatibility when creating <see cref="StandardTokenizer"/>:
+ /// <list type="bullet">
+ /// <item> As of 3.4, Hiragana and Han characters are no longer wrongly split
+ /// from their combining characters. If you use a previous version number,
+ /// you get the exact broken behavior for backwards compatibility.</item>
+ /// <item> As of 3.1, StandardTokenizer implements Unicode text segmentation.
+ /// If you use a previous version number, you get the exact behavior of
+ /// <see cref="ClassicTokenizer"/> for backwards compatibility.</item>
+ /// </list>
/// </para>
/// </summary>
-
public sealed class StandardTokenizer : Tokenizer
{
/// <summary>
@@ -91,7 +87,22 @@ namespace Lucene.Net.Analysis.Standard
/// <summary>
/// String token types that correspond to token type int constants </summary>
- public static readonly string[] TOKEN_TYPES = { "<ALPHANUM>", "<APOSTROPHE>", "<ACRONYM>", "<COMPANY>", "<EMAIL>", "<HOST>", "<NUM>", "<CJ>", "<ACRONYM_DEP>", "<SOUTHEAST_ASIAN>", "<IDEOGRAPHIC>", "<HIRAGANA>", "<KATAKANA>", "<HANGUL>" };
+ public static readonly string[] TOKEN_TYPES = {
+ "<ALPHANUM>",
+ "<APOSTROPHE>",
+ "<ACRONYM>",
+ "<COMPANY>",
+ "<EMAIL>",
+ "<HOST>",
+ "<NUM>",
+ "<CJ>",
+ "<ACRONYM_DEP>",
+ "<SOUTHEAST_ASIAN>",
+ "<IDEOGRAPHIC>",
+ "<HIRAGANA>",
+ "<KATAKANA>",
+ "<HANGUL>"
+ };
private int skippedPositions;
@@ -120,42 +131,43 @@ namespace Lucene.Net.Analysis.Standard
/// <summary>
/// Creates a new instance of the <see cref="StandardTokenizer"/>. Attaches
- /// the <code>input</code> to the newly created JFlex scanner.
+ /// the <paramref name="input"/> to the newly created JFlex-generated (then ported to .NET) scanner.
/// </summary>
+ /// <param name="matchVersion"> Lucene compatibility version - See <see cref="StandardTokenizer"/> </param>
/// <param name="input"> The input reader
///
/// See http://issues.apache.org/jira/browse/LUCENE-1068 </param>
- public StandardTokenizer(Version matchVersion, Reader input)
+ public StandardTokenizer(LuceneVersion matchVersion, TextReader input)
: base(input)
{
Init(matchVersion);
}
/// <summary>
- /// Creates a new StandardTokenizer with a given <see cref="org.apache.lucene.util.AttributeSource.AttributeFactory"/>
+ /// Creates a new <see cref="StandardTokenizer"/> with a given <see cref="AttributeSource.AttributeFactory"/>
/// </summary>
- public StandardTokenizer(Version matchVersion, AttributeFactory factory, Reader input)
+ public StandardTokenizer(LuceneVersion matchVersion, AttributeFactory factory, TextReader input)
: base(factory, input)
{
Init(matchVersion);
}
- private void Init(Version matchVersion)
+ private void Init(LuceneVersion matchVersion)
{
#pragma warning disable 612, 618
- if (matchVersion.OnOrAfter(Version.LUCENE_47))
+ if (matchVersion.OnOrAfter(LuceneVersion.LUCENE_47))
{
this.scanner = new StandardTokenizerImpl(m_input);
}
- else if (matchVersion.OnOrAfter(Version.LUCENE_40))
+ else if (matchVersion.OnOrAfter(LuceneVersion.LUCENE_40))
{
this.scanner = new StandardTokenizerImpl40(m_input);
}
- else if (matchVersion.OnOrAfter(Version.LUCENE_34))
+ else if (matchVersion.OnOrAfter(LuceneVersion.LUCENE_34))
{
this.scanner = new StandardTokenizerImpl34(m_input);
}
- else if (matchVersion.OnOrAfter(Version.LUCENE_31))
+ else if (matchVersion.OnOrAfter(LuceneVersion.LUCENE_31))
{
this.scanner = new StandardTokenizerImpl31(m_input);
}
@@ -183,7 +195,7 @@ namespace Lucene.Net.Analysis.Standard
*
* @see org.apache.lucene.analysis.TokenStream#next()
*/
- public override bool IncrementToken()
+ public override sealed bool IncrementToken()
{
ClearAttributes();
skippedPositions = 0;
@@ -201,8 +213,6 @@ namespace Lucene.Net.Analysis.Standard
{
posIncrAtt.PositionIncrement = skippedPositions + 1;
scanner.GetText(termAtt);
- //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
- //ORIGINAL LINE: final int start = scanner.YyChar();
int start = scanner.YyChar;
offsetAtt.SetOffset(CorrectOffset(start), CorrectOffset(start + termAtt.Length));
// This 'if' should be removed in the next release. For now, it converts
@@ -230,7 +240,7 @@ namespace Lucene.Net.Analysis.Standard
}
}
- public override void End()
+ public override sealed void End()
{
base.End();
// set final offset
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/ab69b431/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardTokenizerFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardTokenizerFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardTokenizerFactory.cs
index cbc5915..fd3accc 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardTokenizerFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardTokenizerFactory.cs
@@ -36,7 +36,7 @@ namespace Lucene.Net.Analysis.Standard
private readonly int maxTokenLength;
/// <summary>
- /// Creates a new StandardTokenizerFactory </summary>
+ /// Creates a new <see cref="StandardTokenizerFactory"/> </summary>
public StandardTokenizerFactory(IDictionary<string, string> args)
: base(args)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/ab69b431/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardTokenizerImpl.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardTokenizerImpl.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardTokenizerImpl.cs
index 2e1bb36..62e0237 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardTokenizerImpl.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardTokenizerImpl.cs
@@ -26,23 +26,21 @@ namespace Lucene.Net.Analysis.Standard
/// This class implements Word Break rules from the Unicode Text Segmentation
/// algorithm, as specified in
/// <a href="http://unicode.org/reports/tr29/">Unicode Standard Annex #29</a>.
- /// <p/>
+ /// <para/>
/// Tokens produced are of the following types:
- /// <ul>
- /// <li><ALPHANUM>: A sequence of alphabetic and numeric characters</li>
- /// <li><NUM>: A number</li>
- /// <li><SOUTHEAST_ASIAN>: A sequence of characters from South and Southeast
- /// Asian languages, including Thai, Lao, Myanmar, and Khmer</li>
- /// <li><IDEOGRAPHIC>: A single CJKV ideographic character</li>
- /// <li><HIRAGANA>: A single hiragana character</li>
- /// <li><KATAKANA>: A sequence of katakana characters</li>
- /// <li><HANGUL>: A sequence of Hangul characters</li>
- /// </ul>
+ /// <list type="bullet">
+ /// <item><ALPHANUM>: A sequence of alphabetic and numeric characters</item>
+ /// <item><NUM>: A number</item>
+ /// <item><SOUTHEAST_ASIAN>: A sequence of characters from South and Southeast
+ /// Asian languages, including Thai, Lao, Myanmar, and Khmer</item>
+ /// <item><IDEOGRAPHIC>: A single CJKV ideographic character</item>
+ /// <item><HIRAGANA>: A single hiragana character</item>
+ /// <item><KATAKANA>: A sequence of katakana characters</item>
+ /// <item><HANGUL>: A sequence of Hangul characters</item>
+ /// </list>
/// </summary>
-
public sealed class StandardTokenizerImpl : IStandardTokenizerInterface
{
-
/// <summary>
/// This character denotes the end of file </summary>
public static readonly int YYEOF = -1;
@@ -952,12 +950,12 @@ namespace Lucene.Net.Analysis.Standard
private int yycolumn;
/// <summary>
- /// zzAtBOL == true <=> the scanner is currently at the beginning of a line
+ /// zzAtBOL == true <=> the scanner is currently at the beginning of a line
/// </summary>
private bool zzAtBOL = true;
/// <summary>
- /// zzAtEOF == true <=> the scanner is at the EOF </summary>
+ /// zzAtEOF == true <=> the scanner is at the EOF </summary>
private bool zzAtEOF;
/// <summary>
@@ -1046,7 +1044,7 @@ namespace Lucene.Net.Analysis.Standard
/// </summary>
/// <returns> <code>false</code>, iff there was new input.
/// </returns>
- /// <exception cref="java.io.IOException"> if any I/O-Error occurs </exception>
+ /// <exception cref="IOException"> if any I/O-Error occurs </exception>
private bool ZzRefill()
{
@@ -1100,7 +1098,7 @@ namespace Lucene.Net.Analysis.Standard
/// <summary>
- /// Closes the input stream.
+ /// Disposes the input stream.
/// </summary>
public void YyClose()
{
@@ -1120,7 +1118,7 @@ namespace Lucene.Net.Analysis.Standard
///
/// All internal variables are reset, the old input stream
/// <b>cannot</b> be reused (internal buffer is discarded and lost).
- /// Lexical state is set to <tt>ZZ_INITIAL</tt>.
+ /// Lexical state is set to <see cref="YYINITIAL"/>.
///
/// Internal scan buffer is resized down to its initial length, if it has grown.
/// </summary>
@@ -1171,13 +1169,13 @@ namespace Lucene.Net.Analysis.Standard
/// <summary>
- /// Returns the character at position <tt>pos</tt> from the
+ /// Returns the character at position <paramref name="pos"/> from the
/// matched text.
///
- /// It is equivalent to YyText().charAt(pos), but faster
+ /// It is equivalent to YyText[pos], but faster
/// </summary>
/// <param name="pos"> the position of the character to fetch.
- /// A value from 0 to YyLength()-1.
+ /// A value from 0 to YyLength-1.
/// </param>
/// <returns> the character at position pos </returns>
public char YyCharAt(int pos)
@@ -1197,13 +1195,13 @@ namespace Lucene.Net.Analysis.Standard
/// <summary>
/// Reports an error that occured while scanning.
- ///
+ /// <para/>
/// In a wellformed scanner (no or only correct usage of
/// YyPushBack(int) and a match-all fallback rule) this method
/// will only be called with things that "Can't Possibly Happen".
/// If this method is called, something is seriously wrong
/// (e.g. a JFlex bug producing a faulty scanner etc.).
- ///
+ /// <para/>
/// Usual syntax/scanner level error handling should be done
/// in error fallback rules.
/// </summary>
@@ -1230,7 +1228,7 @@ namespace Lucene.Net.Analysis.Standard
/// They will be read again by then next call of the scanning method
/// </summary>
/// <param name="number"> the number of characters to be read again.
- /// This number must not be greater than YyLength()! </param>
+ /// This number must not be greater than YyLength! </param>
public void YyPushBack(int number)
{
if (number > YyLength)
@@ -1247,7 +1245,7 @@ namespace Lucene.Net.Analysis.Standard
/// the end of input is encountered or an I/O-Error occurs.
/// </summary>
/// <returns> the next token </returns>
- /// <exception cref="java.io.IOException"> if any I/O-Error occurs </exception>
+ /// <exception cref="IOException"> if any I/O-Error occurs </exception>
public int GetNextToken()
{
int zzInput;
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/ab69b431/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardTokenizerInterface.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardTokenizerInterface.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardTokenizerInterface.cs
index c250996..bfd4f1f 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardTokenizerInterface.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardTokenizerInterface.cs
@@ -1,5 +1,5 @@
-\ufeffusing System.IO;
-using Lucene.Net.Analysis.TokenAttributes;
+\ufeffusing Lucene.Net.Analysis.TokenAttributes;
+using System.IO;
namespace Lucene.Net.Analysis.Standard
{
@@ -26,12 +26,8 @@ namespace Lucene.Net.Analysis.Standard
/// </summary>
public interface IStandardTokenizerInterface
{
-
- /// <summary>
- /// This character denotes the end of file </summary>
-
/// <summary>
- /// Copies the matched text into the CharTermAttribute
+ /// Copies the matched text into the <see cref="ICharTermAttribute"/>
/// </summary>
void GetText(ICharTermAttribute t);
@@ -43,10 +39,10 @@ namespace Lucene.Net.Analysis.Standard
/// <summary>
/// Resets the scanner to read from a new input stream.
/// Does not close the old reader.
- ///
+ /// <para/>
/// All internal variables are reset, the old input stream
/// <b>cannot</b> be reused (internal buffer is discarded and lost).
- /// Lexical state is set to <tt>ZZ_INITIAL</tt>.
+ /// Lexical state is set to <c>YYINITIAL</c>.
/// </summary>
/// <param name="reader"> the new input stream </param>
void YyReset(TextReader reader);
@@ -60,15 +56,15 @@ namespace Lucene.Net.Analysis.Standard
/// Resumes scanning until the next regular expression is matched,
/// the end of input is encountered or an I/O-Error occurs.
/// </summary>
- /// <returns> the next token, <see cref="#YYEOF"/> on end of stream </returns>
+ /// <returns> the next token, <see cref="StandardTokenizerInterface_Fields.YYEOF"/> on end of stream </returns>
/// <exception cref="IOException"> if any I/O-Error occurs </exception>
int GetNextToken();
-
}
- public static class StandardTokenizerInterface_Fields
+ public static class StandardTokenizerInterface_Fields // LUCENENET TODO: Rename StandardTokenizerInterface (no longer collides with interface name)
{
+ /// <summary>
+ /// This character denotes the end of file </summary>
public const int YYEOF = -1;
}
-
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/ab69b431/src/Lucene.Net.Analysis.Common/Analysis/Standard/Std31/StandardTokenizerImpl31.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/Std31/StandardTokenizerImpl31.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/Std31/StandardTokenizerImpl31.cs
index 533a34f..5e5667f 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Standard/Std31/StandardTokenizerImpl31.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/Std31/StandardTokenizerImpl31.cs
@@ -31,27 +31,27 @@ namespace Lucene.Net.Analysis.Standard.Std31
[Obsolete("This class is only for exact backwards compatibility")]
public sealed class StandardTokenizerImpl31 : IStandardTokenizerInterface
{
- /** This character denotes the end of file */
+ /// <summary>This character denotes the end of file</summary>
public static readonly int YYEOF = -1;
- /** initial size of the lookahead buffer */
+ /// <summary>initial size of the lookahead buffer</summary>
private static readonly int ZZ_BUFFERSIZE = 4096;
- /** lexical states */
+ /// <summary>lexical states</summary>
public const int YYINITIAL = 0;
- /**
- * ZZ_LEXSTATE[l] is the state in the DFA for the lexical state l
- * ZZ_LEXSTATE[l+1] is the state in the DFA for the lexical state l
- * at the beginning of a line
- * l is of the form l = 2*k, k a non negative integer
- */
+ /// <summary>
+ /// ZZ_LEXSTATE[l] is the state in the DFA for the lexical state l
+ /// ZZ_LEXSTATE[l+1] is the state in the DFA for the lexical state l
+ /// at the beginning of a line
+ /// l is of the form l = 2*k, k a non negative integer
+ /// </summary>
private static readonly int[] ZZ_LEXSTATE = { 0, 0 };
- /**
- * Translates characters to character classes
- */
+ /// <summary>
+ /// Translates characters to character classes
+ /// </summary>
private const string ZZ_CMAP_PACKED =
"\x0027\x0000\x0001\x0060\x0004\x0000\x0001\x005F\x0001\x0000\x0001\x0060\x0001\x0000\x000A\x005C\x0001\x005E\x0001\x005F" +
"\x0005\x0000\x001A\x005A\x0004\x0000\x0001\x0061\x0001\x0000\x001A\x005A\x002F\x0000\x0001\x005A\x0002\x0000\x0001\x005B" +
@@ -187,14 +187,14 @@ namespace Lucene.Net.Analysis.Standard.Std31
"\x000B\x0000\x0038\x005D\x0002\x005B\x001F\x0066\x0003\x0000\x0006\x0066\x0002\x0000\x0006\x0066\x0002\x0000\x0006\x0066" +
"\x0002\x0000\x0003\x0066\x001C\x0000\x0003\x005B\x0004\x0000";
- /**
- * Translates characters to character classes
- */
+ /// <summary>
+ /// Translates characters to character classes
+ /// </summary>
private static readonly char[] ZZ_CMAP = ZzUnpackCMap(ZZ_CMAP_PACKED);
- /**
- * Translates DFA states to action switch labels.
- */
+ /// <summary>
+ /// Translates DFA states to action switch labels.
+ /// </summary>
private static readonly int[] ZZ_ACTION = ZzUnpackAction();
private const string ZZ_ACTION_PACKED_0 =
@@ -225,9 +225,9 @@ namespace Lucene.Net.Analysis.Standard.Std31
}
- /**
- * Translates a state to a row index in the transition table
- */
+ /// <summary>
+ /// Translates a state to a row index in the transition table
+ /// </summary>
private static readonly int[] ZZ_ROWMAP = ZzUnpackRowMap();
private const string ZZ_ROWMAP_PACKED_0 =
@@ -268,9 +268,9 @@ namespace Lucene.Net.Analysis.Standard.Std31
return j;
}
- /**
- * The transition table of the DFA
- */
+ /// <summary>
+ /// The transition table of the DFA
+ /// </summary>
private static readonly int[] ZZ_TRANS = ZzUnpackTrans();
private const string ZZ_TRANS_PACKED_0 =
@@ -615,9 +615,9 @@ namespace Lucene.Net.Analysis.Standard.Std31
"Error: pushback value was too large"
};
- /**
- * ZZ_ATTRIBUTE[aState] contains the attributes of state <code>aState</code>
- */
+ /// <summary>
+ /// ZZ_ATTRIBUTE[aState] contains the attributes of state <c>aState</c>
+ /// </summary>
private static readonly int[] ZZ_ATTRIBUTE = ZzUnpackAttribute();
private const string ZZ_ATTRIBUTE_PACKED_0 =
@@ -646,73 +646,76 @@ namespace Lucene.Net.Analysis.Standard.Std31
return j;
}
- /** the input device */
+ /// <summary>the input device</summary>
private TextReader zzReader;
- /** the current state of the DFA */
+ /// <summary>the current state of the DFA</summary>
private int zzState;
- /** the current lexical state */
+ /// <summary>the current lexical state</summary>
private int zzLexicalState = YYINITIAL;
- /** this buffer contains the current text to be matched and is
- the source of the YyText() string */
+ /// <summary>
+ /// this buffer contains the current text to be matched and is
+ /// the source of the YyText() string
+ /// </summary>
private char[] zzBuffer = new char[ZZ_BUFFERSIZE];
- /** the textposition at the last accepting state */
+ /// <summary>the textposition at the last accepting state</summary>
private int zzMarkedPos;
- /** the current text position in the buffer */
+ /// <summary>the current text position in the buffer</summary>
private int zzCurrentPos;
- /** startRead marks the beginning of the YyText() string in the buffer */
+ /// <summary>startRead marks the beginning of the YyText string in the buffer</summary>
private int zzStartRead;
- /** endRead marks the last character in the buffer, that has been read
- from input */
+ /// <summary>
+ /// endRead marks the last character in the buffer, that has been read
+ /// from input
+ /// </summary>
private int zzEndRead;
- /** number of newlines encountered up to the start of the matched text */
+ /// <summary>number of newlines encountered up to the start of the matched text</summary>
private int yyline;
- /** the number of characters up to the start of the matched text */
+ /// <summary>the number of characters up to the start of the matched text</summary>
private int yyChar;
#pragma warning disable 169, 414
- /**
- * the number of characters from the last newline up to the start of the
- * matched text
- */
+
+ /// <summary>
+ /// the number of characters from the last newline up to the start of the
+ /// matched text
+ /// </summary>
private int yycolumn;
- /**
- * zzAtBOL == true <=> the scanner is currently at the beginning of a line
- */
+ /// <summary>zzAtBOL == true <=> the scanner is currently at the beginning of a line</summary>
private bool zzAtBOL = true;
- /** zzAtEOF == true <=> the scanner is at the EOF */
+ /// <summary>zzAtEOF == true <=> the scanner is at the EOF</summary>
private bool zzAtEOF;
- /** denotes if the user-EOF-code has already been executed */
+ /// <summary>denotes if the user-EOF-code has already been executed</summary>
private bool zzEOFDone;
#pragma warning disable 169, 414
/* user code: */
- /** Alphanumeric sequences */
+ /// <summary>Alphanumeric sequences</summary>
public static readonly int WORD_TYPE = StandardTokenizer.ALPHANUM;
- /** Numbers */
+ /// <summary>Numbers</summary>
public static readonly int NUMERIC_TYPE = StandardTokenizer.NUM;
- /**
- * Chars in class \p{Line_Break = Complex_Context} are from South East Asian
- * scripts (Thai, Lao, Myanmar, Khmer, etc.). Sequences of these are kept
- * together as as a single token rather than broken up, because the logic
- * required to break them at word boundaries is too complex for UAX#29.
- * <p>
- * See Unicode Line Breaking Algorithm: http://www.unicode.org/reports/tr14/#SA
- */
+ /// <summary>
+ /// Chars in class \p{Line_Break = Complex_Context} are from South East Asian
+ /// scripts (Thai, Lao, Myanmar, Khmer, etc.). Sequences of these are kept
+ /// together as as a single token rather than broken up, because the logic
+ /// required to break them at word boundaries is too complex for UAX#29.
+ /// <para/>
+ /// See Unicode Line Breaking Algorithm: http://www.unicode.org/reports/tr14/#SA
+ /// </summary>
public static readonly int SOUTH_EAST_ASIAN_TYPE = StandardTokenizer.SOUTHEAST_ASIAN;
public static readonly int IDEOGRAPHIC_TYPE = StandardTokenizer.IDEOGRAPHIC;
@@ -728,32 +731,26 @@ namespace Lucene.Net.Analysis.Standard.Std31
get { return yyChar; }
}
- /**
- * Fills CharTermAttribute with the current token text.
- */
+ /// <summary>Fills ICharTermAttribute with the current token text.</summary>
public void GetText(ICharTermAttribute t)
{
t.CopyBuffer(zzBuffer, zzStartRead, zzMarkedPos - zzStartRead);
}
-
- /**
- * Creates a new scanner
- *
- * @param in the TextReader to read input from.
- */
+ /// <summary>
+ /// Creates a new scanner
+ /// </summary>
+ /// <param name="in">the TextReader to read input from.</param>
public StandardTokenizerImpl31(TextReader @in)
{
this.zzReader = @in;
}
-
- /**
- * Unpacks the compressed character translation table.
- *
- * @param packed the packed character translation table
- * @return the unpacked character translation table
- */
+ /// <summary>
+ /// Unpacks the compressed character translation table.
+ /// </summary>
+ /// <param name="packed">the packed character translation table</param>
+ /// <returns>the unpacked character translation table</returns>
private static char[] ZzUnpackCMap(string packed)
{
char[] map = new char[0x10000];
@@ -768,14 +765,11 @@ namespace Lucene.Net.Analysis.Standard.Std31
return map;
}
-
- /**
- * Refills the input buffer.
- *
- * @return <code>false</code>, iff there was new input.
- *
- * @exception java.io.IOException if any I/O-Error occurs
- */
+ /// <summary>
+ /// Refills the input buffer.
+ /// </summary>
+ /// <returns><c>false</c>, iff there was new input.</returns>
+ /// <exception cref="IOException">if any I/O-Error occurs</exception>
private bool ZzRefill()
{
@@ -831,9 +825,9 @@ namespace Lucene.Net.Analysis.Standard.Std31
}
- /**
- * Closes the input stream.
- */
+ /// <summary>
+ /// Disposes the input stream.
+ /// </summary>
public void YyClose()
{
zzAtEOF = true; /* indicate end of file */
@@ -843,19 +837,16 @@ namespace Lucene.Net.Analysis.Standard.Std31
zzReader.Dispose();
}
-
- /**
- * Resets the scanner to read from a new input stream.
- * Does not close the old reader.
- *
- * All internal variables are reset, the old input stream
- * <b>cannot</b> be reused (internal buffer is discarded and lost).
- * Lexical state is set to <tt>ZZ_INITIAL</tt>.
- *
- * Internal scan buffer is resized down to its initial length, if it has grown.
- *
- * @param reader the new input stream
- */
+ /// <summary>
+ /// Resets the scanner to read from a new input stream.
+ /// Does not close the old reader.
+ /// <para/>
+ /// All internal variables are reset, the old input stream
+ /// <b>cannot</b> be reused (internal buffer is discarded and lost).
+ /// Lexical state is set to <see cref="YYINITIAL"/>.
+ /// Internal scan buffer is resized down to its initial length, if it has grown.
+ /// </summary>
+ /// <param name="reader">the new input stream </param>
public void YyReset(TextReader reader)
{
zzReader = reader;
@@ -871,75 +862,69 @@ namespace Lucene.Net.Analysis.Standard.Std31
}
- /**
- * Returns the current lexical state.
- */
+ /// <summary>
+ /// Returns the current lexical state.
+ /// </summary>
public int YyState
{
get { return zzLexicalState; }
}
-
- /**
- * Enters a new lexical state
- *
- * @param newState the new lexical state
- */
+ /// <summary>
+ /// Enters a new lexical state
+ /// </summary>
+ /// <param name="newState">the new lexical state</param>
public void YyBegin(int newState)
{
zzLexicalState = newState;
}
- /**
- * Returns the text matched by the current regular expression.
- */
+ /// <summary>
+ /// Returns the text matched by the current regular expression.
+ /// </summary>
public string YyText
{
get { return new string(zzBuffer, zzStartRead, zzMarkedPos - zzStartRead); }
}
-
- /**
- * Returns the character at position <tt>pos</tt> from the
- * matched text.
- *
- * It is equivalent to YyText().charAt(pos), but faster
- *
- * @param pos the position of the character to fetch.
- * A value from 0 to YyLength()-1.
- *
- * @return the character at position pos
- */
+ /// <summary>
+ /// Returns the character at position <paramref name="pos"/> from the
+ /// matched text.
+ /// <para/>
+ /// It is equivalent to YyText[pos], but faster
+ /// </summary>
+ /// <param name="pos">
+ /// the position of the character to fetch.
+ /// A value from 0 to YyLength-1.
+ /// </param>
+ /// <returns>the character at position pos</returns>
public char YyCharAt(int pos)
{
return zzBuffer[zzStartRead + pos];
}
- /**
- * Returns the length of the matched text region.
- */
+ /// <summary>
+ /// Returns the length of the matched text region.
+ /// </summary>
public int YyLength
{
get { return zzMarkedPos - zzStartRead; }
}
-
- /**
- * Reports an error that occured while scanning.
- *
- * In a wellformed scanner (no or only correct usage of
- * YyPushBack(int) and a match-all fallback rule) this method
- * will only be called with things that "Can't Possibly Happen".
- * If this method is called, something is seriously wrong
- * (e.g. a JFlex bug producing a faulty scanner etc.).
- *
- * Usual syntax/scanner level error handling should be done
- * in error fallback rules.
- *
- * @param errorCode the code of the errormessage to display
- */
+ /// <summary>
+ /// Reports an error that occured while scanning.
+ /// <para/>
+ /// In a wellformed scanner (no or only correct usage of
+ /// YyPushBack(int) and a match-all fallback rule) this method
+ /// will only be called with things that "Can't Possibly Happen".
+ /// If this method is called, something is seriously wrong
+ /// (e.g. a JFlex bug producing a faulty scanner etc.).
+ /// Usual syntax/scanner level error handling should be done
+ /// in error fallback rules.
+ /// </summary>
+ /// <param name="errorCode">the code of the errormessage to display</param>
private void ZzScanError(int errorCode)
{
string message;
@@ -955,15 +940,15 @@ namespace Lucene.Net.Analysis.Standard.Std31
throw new Exception(message);
}
-
- /**
- * Pushes the specified amount of characters back into the input stream.
- *
- * They will be read again by then next call of the scanning method
- *
- * @param number the number of characters to be read again.
- * This number must not be greater than YyLength()!
- */
+ /// <summary>
+ /// Pushes the specified amount of characters back into the input stream.
+ /// <para/>
+ /// They will be read again by then next call of the scanning method
+ /// </summary>
+ /// <param name="number">
+ /// the number of characters to be read again.
+ /// This number must not be greater than YyLength!
+ /// </param>
public void YyPushBack(int number)
{
if (number > YyLength)
@@ -972,14 +957,12 @@ namespace Lucene.Net.Analysis.Standard.Std31
zzMarkedPos -= number;
}
-
- /**
- * Resumes scanning until the next regular expression is matched,
- * the end of input is encountered or an I/O-Error occurs.
- *
- * @return the next token
- * @exception java.io.IOException if any I/O-Error occurs
- */
+ /// <summary>
+ /// Resumes scanning until the next regular expression is matched,
+ /// the end of input is encountered or an I/O-Error occurs.
+ /// </summary>
+ /// <returns>the next token</returns>
+ /// <exception cref="IOException">if any I/O-Error occurs</exception>
public int GetNextToken()
{
int zzInput;
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/ab69b431/src/Lucene.Net.Analysis.Common/Analysis/Standard/Std31/UAX29URLEmailTokenizerImpl31.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/Std31/UAX29URLEmailTokenizerImpl31.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/Std31/UAX29URLEmailTokenizerImpl31.cs
index 324b47b..4fd4938 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Standard/Std31/UAX29URLEmailTokenizerImpl31.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/Std31/UAX29URLEmailTokenizerImpl31.cs
@@ -3160,7 +3160,7 @@ namespace Lucene.Net.Analysis.Standard.Std31
"Error: pushback value was too large"
};
- /// <summary>ZZ_ATTRIBUTE[aState] contains the attributes of state <code>aState</code></summary>
+ /// <summary>ZZ_ATTRIBUTE[aState] contains the attributes of state <c>aState</c></summary>
private static readonly int[] ZZ_ATTRIBUTE = ZzUnpackAttribute();
private const string ZZ_ATTRIBUTE_PACKED_0 =
@@ -3204,70 +3204,73 @@ namespace Lucene.Net.Analysis.Standard.Std31
/// <summary>the input device</summary>
private TextReader zzReader;
- /** the current state of the DFA */
+ /// <summary>the current state of the DFA</summary>
private int zzState;
- /** the current lexical state */
+ /// <summary>the current lexical state</summary>
private int zzLexicalState = YYINITIAL;
- /** this buffer contains the current text to be matched and is
- the source of the YyText() string */
+ /// <summary>
+ /// this buffer contains the current text to be matched and is
+ /// the source of the YyText string
+ /// </summary>
private char[] zzBuffer = new char[ZZ_BUFFERSIZE];
- /** the textposition at the last accepting state */
+ /// <summary>the textposition at the last accepting state</summary>
private int zzMarkedPos;
- /** the current text position in the buffer */
+ /// <summary>the current text position in the buffer</summary>
private int zzCurrentPos;
- /** startRead marks the beginning of the YyText() string in the buffer */
+ /// <summary>startRead marks the beginning of the YyText string in the buffer</summary>
private int zzStartRead;
- /** endRead marks the last character in the buffer, that has been read
- from input */
+ /// <summary>
+ /// endRead marks the last character in the buffer, that has been read
+ /// from input
+ /// </summary>
private int zzEndRead;
- /** number of newlines encountered up to the start of the matched text */
+ /// <summary>number of newlines encountered up to the start of the matched text</summary>
private int yyline;
- /** the number of characters up to the start of the matched text */
+ /// <summary>the number of characters up to the start of the matched text</summary>
private int yychar;
#pragma warning disable 169, 414
- /**
- * the number of characters from the last newline up to the start of the
- * matched text
- */
+
+ /// <summary>
+ /// the number of characters from the last newline up to the start of the
+ /// matched text
+ /// </summary>
private int yycolumn;
- /**
- * zzAtBOL == true <=> the scanner is currently at the beginning of a line
- */
+ /// <summary>zzAtBOL == true <=> the scanner is currently at the beginning of a line</summary>
private bool zzAtBOL = true;
- /** zzAtEOF == true <=> the scanner is at the EOF */
+ /// <summary>zzAtEOF == true <=> the scanner is at the EOF</summary>
private bool zzAtEOF;
- /** denotes if the user-EOF-code has already been executed */
+ /// <summary>denotes if the user-EOF-code has already been executed</summary>
private bool zzEOFDone;
#pragma warning restore 169, 414
/* user code: */
- /** Alphanumeric sequences */
+ /// <summary>Alphanumeric sequences</summary>
public static readonly int WORD_TYPE = UAX29URLEmailTokenizer.ALPHANUM;
- /** Numbers */
+ /// <summary>Numbers</summary>
public static readonly int NUMERIC_TYPE = UAX29URLEmailTokenizer.NUM;
- /**
- * Chars in class \p{Line_Break = Complex_Context} are from South East Asian
- * scripts (Thai, Lao, Myanmar, Khmer, etc.). Sequences of these are kept
- * together as as a single token rather than broken up, because the logic
- * required to break them at word boundaries is too complex for UAX#29.
- * <p>
- * See Unicode Line Breaking Algorithm: http://www.unicode.org/reports/tr14/#SA
- */
+ /// <summary>
+ /// Chars in class \p{Line_Break = Complex_Context} are from South East Asian
+ /// scripts (Thai, Lao, Myanmar, Khmer, etc.). Sequences of these are kept
+ /// together as as a single token rather than broken up, because the logic
+ /// required to break them at word boundaries is too complex for UAX#29.
+ /// <para/>
+ /// See Unicode Line Breaking Algorithm: http://www.unicode.org/reports/tr14/#SA
+ /// </summary>
public static readonly int SOUTH_EAST_ASIAN_TYPE = UAX29URLEmailTokenizer.SOUTHEAST_ASIAN;
public static readonly int IDEOGRAPHIC_TYPE = UAX29URLEmailTokenizer.IDEOGRAPHIC;
@@ -3399,7 +3402,7 @@ namespace Lucene.Net.Analysis.Standard.Std31
///
/// All internal variables are reset, the old input stream
/// <b>cannot</b> be reused (internal buffer is discarded and lost).
- /// Lexical state is set to <tt>ZZ_INITIAL</tt>.
+ /// Lexical state is set to <see cref="YYINITIAL"/>.
///
/// Internal scan buffer is resized down to its initial length, if it has grown.
/// </summary>
@@ -3447,10 +3450,10 @@ namespace Lucene.Net.Analysis.Standard.Std31
/// Returns the character at position <tt>pos</tt> from the
/// matched text.
///
- /// It is equivalent to YyText().charAt(pos), but faster
+ /// It is equivalent to YyText[pos], but faster
/// </summary>
/// <param name="pos">the position of the character to fetch.
- /// A value from 0 to YyLength()-1.</param>
+ /// A value from 0 to YyLength-1.</param>
/// <returns>the character at position pos</returns>
public char YyCharAt(int pos)
{
@@ -3498,7 +3501,7 @@ namespace Lucene.Net.Analysis.Standard.Std31
/// They will be read again by then next call of the scanning method
/// </summary>
/// <param name="number">the number of characters to be read again.
- /// This number must not be greater than YyLength()!</param>
+ /// This number must not be greater than YyLength!</param>
public void YyPushBack(int number)
{
if (number > YyLength)
@@ -3513,7 +3516,7 @@ namespace Lucene.Net.Analysis.Standard.Std31
/// the end of input is encountered or an I/O-Error occurs.
/// </summary>
/// <returns> the next token </returns>
- /// <exception cref="java.io.IOException"> if any I/O-Error occurs </exception>
+ /// <exception cref="IOException"> if any I/O-Error occurs </exception>
public int GetNextToken()
{
int zzInput;
[35/39] lucenenet git commit: Lucene.Net.Analysis.Ngram - renamed
NGram
Posted by ni...@apache.org.
Lucene.Net.Analysis.Ngram - renamed NGram
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/32014651
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/32014651
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/32014651
Branch: refs/heads/api-work
Commit: 3201465174aa88c6d22ddc03f854978ae6f02a22
Parents: f0791db
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Sun Feb 5 03:08:35 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Sun Feb 5 03:29:10 2017 +0700
----------------------------------------------------------------------
.../Analysis/Ngram/EdgeNGramFilterFactory.cs | 2 +-
.../Analysis/Ngram/EdgeNGramTokenFilter.cs | 2 +-
.../Analysis/Ngram/EdgeNGramTokenizer.cs | 2 +-
.../Analysis/Ngram/EdgeNGramTokenizerFactory.cs | 2 +-
.../Ngram/Lucene43EdgeNGramTokenizer.cs | 2 +-
.../Analysis/Ngram/Lucene43NGramTokenizer.cs | 2 +-
.../Analysis/Ngram/NGramFilterFactory.cs | 2 +-
.../Analysis/Ngram/NGramTokenFilter.cs | 2 +-
.../Analysis/Ngram/NGramTokenizer.cs | 2 +-
.../Analysis/Ngram/NGramTokenizerFactory.cs | 2 +-
.../Lucene.Net.Analysis.Common.csproj | 20 ++++++++++----------
.../Analyzing/AnalyzingInfixSuggester.cs | 2 +-
.../Analysis/Core/TestBugInSomething.cs | 2 +-
.../Analysis/Core/TestRandomChains.cs | 2 +-
.../Analysis/Ngram/EdgeNGramTokenFilterTest.cs | 2 +-
.../Analysis/Ngram/EdgeNGramTokenizerTest.cs | 2 +-
.../Analysis/Ngram/NGramTokenFilterTest.cs | 2 +-
.../Analysis/Ngram/NGramTokenizerTest.cs | 2 +-
.../Analysis/Ngram/TestNGramFilters.cs | 2 +-
.../Analysis/Synonym/TestSynonymMap.cs | 2 +-
.../Lucene.Net.Tests.Analysis.Common.csproj | 10 +++++-----
.../SimpleNaiveBayesClassifierTest.cs | 2 +-
22 files changed, 35 insertions(+), 35 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/32014651/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramFilterFactory.cs
index 2e3e0ed..70b44d3 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramFilterFactory.cs
@@ -1,7 +1,7 @@
\ufeffusing Lucene.Net.Analysis.Util;
using System.Collections.Generic;
-namespace Lucene.Net.Analysis.Ngram // LUCENENET TODO: Change namespace, directory, and Git to NGram
+namespace Lucene.Net.Analysis.NGram
{
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/32014651/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramTokenFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramTokenFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramTokenFilter.cs
index 4c1fff1..8cf8172 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramTokenFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramTokenFilter.cs
@@ -3,7 +3,7 @@ using Lucene.Net.Analysis.Util;
using Lucene.Net.Util;
using System;
-namespace Lucene.Net.Analysis.Ngram // LUCENENET TODO: Change namespace, directory, and Git to NGram
+namespace Lucene.Net.Analysis.NGram
{
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/32014651/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramTokenizer.cs
index 9eba29f..ed2cb3d 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramTokenizer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramTokenizer.cs
@@ -1,7 +1,7 @@
\ufeffusing Lucene.Net.Util;
using System.IO;
-namespace Lucene.Net.Analysis.Ngram // LUCENENET TODO: Change namespace, directory, and Git to NGram
+namespace Lucene.Net.Analysis.NGram
{
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/32014651/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramTokenizerFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramTokenizerFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramTokenizerFactory.cs
index d3f2bb6..00325f5 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramTokenizerFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramTokenizerFactory.cs
@@ -4,7 +4,7 @@ using System;
using System.Collections.Generic;
using System.IO;
-namespace Lucene.Net.Analysis.Ngram // LUCENENET TODO: Change namespace, directory, and Git to NGram
+namespace Lucene.Net.Analysis.NGram
{
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/32014651/src/Lucene.Net.Analysis.Common/Analysis/Ngram/Lucene43EdgeNGramTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/Lucene43EdgeNGramTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/Lucene43EdgeNGramTokenizer.cs
index eb09a94..4dadbed 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/Lucene43EdgeNGramTokenizer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/Lucene43EdgeNGramTokenizer.cs
@@ -3,7 +3,7 @@ using Lucene.Net.Util;
using System;
using System.IO;
-namespace Lucene.Net.Analysis.Ngram // LUCENENET TODO: Change namespace, directory, and Git to NGram
+namespace Lucene.Net.Analysis.NGram
{
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/32014651/src/Lucene.Net.Analysis.Common/Analysis/Ngram/Lucene43NGramTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/Lucene43NGramTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/Lucene43NGramTokenizer.cs
index a79ffba..b806345 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/Lucene43NGramTokenizer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/Lucene43NGramTokenizer.cs
@@ -2,7 +2,7 @@
using System;
using System.IO;
-namespace Lucene.Net.Analysis.Ngram // LUCENENET TODO: Change namespace, directory, and Git to NGram
+namespace Lucene.Net.Analysis.NGram
{
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/32014651/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramFilterFactory.cs
index 8b9b726..ca1d0bc 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramFilterFactory.cs
@@ -1,7 +1,7 @@
\ufeffusing Lucene.Net.Analysis.Util;
using System.Collections.Generic;
-namespace Lucene.Net.Analysis.Ngram // LUCENENET TODO: Change namespace, directory, and Git to NGram
+namespace Lucene.Net.Analysis.NGram
{
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/32014651/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramTokenFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramTokenFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramTokenFilter.cs
index 26cc8d5..f1c82c5 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramTokenFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramTokenFilter.cs
@@ -3,7 +3,7 @@ using Lucene.Net.Analysis.TokenAttributes;
using Lucene.Net.Analysis.Util;
using Lucene.Net.Util;
-namespace Lucene.Net.Analysis.Ngram // LUCENENET TODO: Change namespace, directory, and Git to NGram
+namespace Lucene.Net.Analysis.NGram
{
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/32014651/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramTokenizer.cs
index b37d290..b1845c8 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramTokenizer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramTokenizer.cs
@@ -6,7 +6,7 @@ using System;
using System.Diagnostics;
using System.IO;
-namespace Lucene.Net.Analysis.Ngram // LUCENENET TODO: Change namespace, directory, and Git to NGram
+namespace Lucene.Net.Analysis.NGram
{
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/32014651/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramTokenizerFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramTokenizerFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramTokenizerFactory.cs
index 33a81b2..cf25b65 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramTokenizerFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramTokenizerFactory.cs
@@ -3,7 +3,7 @@ using Lucene.Net.Util;
using System.Collections.Generic;
using System.IO;
-namespace Lucene.Net.Analysis.Ngram // LUCENENET TODO: Change namespace, directory, and Git to NGram
+namespace Lucene.Net.Analysis.NGram
{
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/32014651/src/Lucene.Net.Analysis.Common/Lucene.Net.Analysis.Common.csproj
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Lucene.Net.Analysis.Common.csproj b/src/Lucene.Net.Analysis.Common/Lucene.Net.Analysis.Common.csproj
index 610125f..0c43b07 100644
--- a/src/Lucene.Net.Analysis.Common/Lucene.Net.Analysis.Common.csproj
+++ b/src/Lucene.Net.Analysis.Common/Lucene.Net.Analysis.Common.csproj
@@ -277,16 +277,16 @@
<Compile Include="Analysis\Miscellaneous\WordDelimiterFilter.cs" />
<Compile Include="Analysis\Miscellaneous\WordDelimiterFilterFactory.cs" />
<Compile Include="Analysis\Miscellaneous\WordDelimiterIterator.cs" />
- <Compile Include="Analysis\Ngram\EdgeNGramFilterFactory.cs" />
- <Compile Include="Analysis\Ngram\EdgeNGramTokenFilter.cs" />
- <Compile Include="Analysis\Ngram\EdgeNGramTokenizer.cs" />
- <Compile Include="Analysis\Ngram\EdgeNGramTokenizerFactory.cs" />
- <Compile Include="Analysis\Ngram\Lucene43EdgeNGramTokenizer.cs" />
- <Compile Include="Analysis\Ngram\Lucene43NGramTokenizer.cs" />
- <Compile Include="Analysis\Ngram\NGramFilterFactory.cs" />
- <Compile Include="Analysis\Ngram\NGramTokenFilter.cs" />
- <Compile Include="Analysis\Ngram\NGramTokenizer.cs" />
- <Compile Include="Analysis\Ngram\NGramTokenizerFactory.cs" />
+ <Compile Include="Analysis\NGram\EdgeNGramFilterFactory.cs" />
+ <Compile Include="Analysis\NGram\EdgeNGramTokenFilter.cs" />
+ <Compile Include="Analysis\NGram\EdgeNGramTokenizer.cs" />
+ <Compile Include="Analysis\NGram\EdgeNGramTokenizerFactory.cs" />
+ <Compile Include="Analysis\NGram\Lucene43EdgeNGramTokenizer.cs" />
+ <Compile Include="Analysis\NGram\Lucene43NGramTokenizer.cs" />
+ <Compile Include="Analysis\NGram\NGramFilterFactory.cs" />
+ <Compile Include="Analysis\NGram\NGramTokenFilter.cs" />
+ <Compile Include="Analysis\NGram\NGramTokenizer.cs" />
+ <Compile Include="Analysis\NGram\NGramTokenizerFactory.cs" />
<Compile Include="Analysis\Nl\DutchAnalyzer.cs" />
<Compile Include="Analysis\Nl\DutchStemFilter.cs" />
<Compile Include="Analysis\Nl\DutchStemmer.cs" />
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/32014651/src/Lucene.Net.Suggest/Suggest/Analyzing/AnalyzingInfixSuggester.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Suggest/Suggest/Analyzing/AnalyzingInfixSuggester.cs b/src/Lucene.Net.Suggest/Suggest/Analyzing/AnalyzingInfixSuggester.cs
index 3ba1cb0..fce571d 100644
--- a/src/Lucene.Net.Suggest/Suggest/Analyzing/AnalyzingInfixSuggester.cs
+++ b/src/Lucene.Net.Suggest/Suggest/Analyzing/AnalyzingInfixSuggester.cs
@@ -1,5 +1,5 @@
\ufeffusing Lucene.Net.Analysis;
-using Lucene.Net.Analysis.Ngram;
+using Lucene.Net.Analysis.NGram;
using Lucene.Net.Analysis.TokenAttributes;
using Lucene.Net.Codecs.Lucene46;
using Lucene.Net.Documents;
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/32014651/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestBugInSomething.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestBugInSomething.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestBugInSomething.cs
index 1a7f7e9..5a08ec4 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestBugInSomething.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestBugInSomething.cs
@@ -1,7 +1,7 @@
\ufeffusing Lucene.Net.Analysis.CharFilters;
using Lucene.Net.Analysis.CommonGrams;
using Lucene.Net.Analysis.Miscellaneous;
-using Lucene.Net.Analysis.Ngram;
+using Lucene.Net.Analysis.NGram;
using Lucene.Net.Analysis.Shingle;
using Lucene.Net.Analysis.Util;
using Lucene.Net.Analysis.Wikipedia;
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/32014651/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestRandomChains.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestRandomChains.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestRandomChains.cs
index e1c0131..fbeb2f9 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestRandomChains.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestRandomChains.cs
@@ -5,7 +5,7 @@ using Lucene.Net.Analysis.Compound;
using Lucene.Net.Analysis.Compound.Hyphenation;
using Lucene.Net.Analysis.Hunspell;
using Lucene.Net.Analysis.Miscellaneous;
-using Lucene.Net.Analysis.Ngram;
+using Lucene.Net.Analysis.NGram;
using Lucene.Net.Analysis.Path;
using Lucene.Net.Analysis.Payloads;
using Lucene.Net.Analysis.Snowball;
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/32014651/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ngram/EdgeNGramTokenFilterTest.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ngram/EdgeNGramTokenFilterTest.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ngram/EdgeNGramTokenFilterTest.cs
index 82ac83c..ea6fbd7 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ngram/EdgeNGramTokenFilterTest.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ngram/EdgeNGramTokenFilterTest.cs
@@ -8,7 +8,7 @@ using NUnit.Framework;
using System;
using System.IO;
-namespace Lucene.Net.Analysis.Ngram
+namespace Lucene.Net.Analysis.NGram
{
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/32014651/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ngram/EdgeNGramTokenizerTest.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ngram/EdgeNGramTokenizerTest.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ngram/EdgeNGramTokenizerTest.cs
index 6307146..4ccecfa 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ngram/EdgeNGramTokenizerTest.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ngram/EdgeNGramTokenizerTest.cs
@@ -5,7 +5,7 @@ using System.IO;
using Reader = System.IO.TextReader;
using Version = Lucene.Net.Util.LuceneVersion;
-namespace Lucene.Net.Analysis.Ngram
+namespace Lucene.Net.Analysis.NGram
{
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/32014651/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ngram/NGramTokenFilterTest.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ngram/NGramTokenFilterTest.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ngram/NGramTokenFilterTest.cs
index 3554638..e485fc0 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ngram/NGramTokenFilterTest.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ngram/NGramTokenFilterTest.cs
@@ -7,7 +7,7 @@ using NUnit.Framework;
using System;
using System.IO;
-namespace Lucene.Net.Analysis.Ngram
+namespace Lucene.Net.Analysis.NGram
{
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/32014651/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ngram/NGramTokenizerTest.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ngram/NGramTokenizerTest.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ngram/NGramTokenizerTest.cs
index d72f4c5..2fc1356 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ngram/NGramTokenizerTest.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ngram/NGramTokenizerTest.cs
@@ -4,7 +4,7 @@ using Lucene.Net.Util;
using NUnit.Framework;
using System.IO;
-namespace Lucene.Net.Analysis.Ngram
+namespace Lucene.Net.Analysis.NGram
{
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/32014651/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ngram/TestNGramFilters.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ngram/TestNGramFilters.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ngram/TestNGramFilters.cs
index b21ddc2..c0683a6 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ngram/TestNGramFilters.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ngram/TestNGramFilters.cs
@@ -4,7 +4,7 @@ using NUnit.Framework;
using System.IO;
using Reader = System.IO.TextReader;
-namespace Lucene.Net.Analysis.Ngram
+namespace Lucene.Net.Analysis.NGram
{
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/32014651/src/Lucene.Net.Tests.Analysis.Common/Analysis/Synonym/TestSynonymMap.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Synonym/TestSynonymMap.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Synonym/TestSynonymMap.cs
index fd7458f..a657a33 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Synonym/TestSynonymMap.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Synonym/TestSynonymMap.cs
@@ -5,7 +5,7 @@ using System.IO;
using System.Text;
using NUnit.Framework;
using Lucene.Net.Analysis.Util;
-using Lucene.Net.Analysis.Ngram;
+using Lucene.Net.Analysis.NGram;
namespace Lucene.Net.Analysis.Synonym
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/32014651/src/Lucene.Net.Tests.Analysis.Common/Lucene.Net.Tests.Analysis.Common.csproj
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Lucene.Net.Tests.Analysis.Common.csproj b/src/Lucene.Net.Tests.Analysis.Common/Lucene.Net.Tests.Analysis.Common.csproj
index 0bd31c1..be921e7 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Lucene.Net.Tests.Analysis.Common.csproj
+++ b/src/Lucene.Net.Tests.Analysis.Common/Lucene.Net.Tests.Analysis.Common.csproj
@@ -221,11 +221,11 @@
<Compile Include="Analysis\Miscellaneous\TestTruncateTokenFilter.cs" />
<Compile Include="Analysis\Miscellaneous\TestTruncateTokenFilterFactory.cs" />
<Compile Include="Analysis\Miscellaneous\TestWordDelimiterFilter.cs" />
- <Compile Include="Analysis\Ngram\EdgeNGramTokenFilterTest.cs" />
- <Compile Include="Analysis\Ngram\EdgeNGramTokenizerTest.cs" />
- <Compile Include="Analysis\Ngram\NGramTokenFilterTest.cs" />
- <Compile Include="Analysis\Ngram\NGramTokenizerTest.cs" />
- <Compile Include="Analysis\Ngram\TestNGramFilters.cs" />
+ <Compile Include="Analysis\NGram\EdgeNGramTokenFilterTest.cs" />
+ <Compile Include="Analysis\NGram\EdgeNGramTokenizerTest.cs" />
+ <Compile Include="Analysis\NGram\NGramTokenFilterTest.cs" />
+ <Compile Include="Analysis\NGram\NGramTokenizerTest.cs" />
+ <Compile Include="Analysis\NGram\TestNGramFilters.cs" />
<Compile Include="Analysis\Nl\TestDutchStemmer.cs" />
<Compile Include="Analysis\No\TestNorwegianAnalyzer.cs" />
<Compile Include="Analysis\No\TestNorwegianLightStemFilter.cs" />
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/32014651/src/Lucene.Net.Tests.Classification/SimpleNaiveBayesClassifierTest.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Classification/SimpleNaiveBayesClassifierTest.cs b/src/Lucene.Net.Tests.Classification/SimpleNaiveBayesClassifierTest.cs
index a6376c2..077554c 100644
--- a/src/Lucene.Net.Tests.Classification/SimpleNaiveBayesClassifierTest.cs
+++ b/src/Lucene.Net.Tests.Classification/SimpleNaiveBayesClassifierTest.cs
@@ -1,6 +1,6 @@
using Lucene.Net.Analysis;
using Lucene.Net.Analysis.Core;
-using Lucene.Net.Analysis.Ngram;
+using Lucene.Net.Analysis.NGram;
using Lucene.Net.Analysis.Reverse;
using Lucene.Net.Index;
using Lucene.Net.Search;
[03/39] lucenenet git commit: Lucene.Net.Analysis.Snowball refactor:
member accessibility and documentation comments
Posted by ni...@apache.org.
Lucene.Net.Analysis.Snowball refactor: member accessibility and documentation comments
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/6b013858
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/6b013858
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/6b013858
Branch: refs/heads/api-work
Commit: 6b0138586698c6f053565c85935a09b8e10b07b8
Parents: f3a14db
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Sat Feb 4 09:14:35 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Sat Feb 4 09:14:35 2017 +0700
----------------------------------------------------------------------
.../Analysis/Snowball/SnowballAnalyzer.cs | 18 ++++++------
.../Analysis/Snowball/SnowballFilter.cs | 31 ++++++++------------
.../Snowball/SnowballPorterFilterFactory.cs | 4 +--
3 files changed, 23 insertions(+), 30 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/6b013858/src/Lucene.Net.Analysis.Common/Analysis/Snowball/SnowballAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Snowball/SnowballAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Snowball/SnowballAnalyzer.cs
index 352616a..bcaa1d6 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Snowball/SnowballAnalyzer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Snowball/SnowballAnalyzer.cs
@@ -27,22 +27,22 @@ namespace Lucene.Net.Analysis.Snowball
*/
/// <summary>
- /// Filters <see cref="StandardTokenizer"/> with <see cref="StandardFilter"/>, {@link
- /// LowerCaseFilter}, <see cref="StopFilter"/> and <see cref="SnowballFilter"/>.
+ /// Filters <see cref="StandardTokenizer"/> with <see cref="StandardFilter"/>,
+ /// <see cref="LowerCaseFilter"/>, <see cref="StopFilter"/> and <see cref="SnowballFilter"/>.
///
/// Available stemmers are listed in org.tartarus.snowball.ext. The name of a
/// stemmer is the part of the class name before "Stemmer", e.g., the stemmer in
- /// <see cref="org.tartarus.snowball.ext.EnglishStemmer"/> is named "English".
+ /// <see cref="Tartarus.Snowball.Ext.EnglishStemmer"/> is named "English".
///
/// <para><b>NOTE</b>: This class uses the same <see cref="LuceneVersion"/>
/// dependent settings as <see cref="StandardAnalyzer"/>, with the following addition:
- /// <ul>
- /// <li> As of 3.1, uses <see cref="TurkishLowerCaseFilter"/> for Turkish language.
- /// </ul>
+ /// <list type="bullet">
+ /// <item> As of 3.1, uses <see cref="TurkishLowerCaseFilter"/> for Turkish language.</item>
+ /// </list>
/// </para> </summary>
/// @deprecated (3.1) Use the language-specific analyzer in modules/analysis instead.
/// This analyzer will be removed in Lucene 5.0
- [Obsolete("(3.1) Use the language-specific analyzer in modules/analysis instead.")]
+ [Obsolete("(3.1) Use the language-specific analyzer in modules/analysis instead. This analyzer will be removed in Lucene 5.0.")]
public sealed class SnowballAnalyzer : Analyzer
{
private string name;
@@ -65,8 +65,8 @@ namespace Lucene.Net.Analysis.Snowball
}
/// <summary>
- /// Constructs a <see cref="StandardTokenizer"/> filtered by a {@link
- /// StandardFilter}, a <see cref="LowerCaseFilter"/>, a <see cref="StopFilter"/>,
+ /// Constructs a <see cref="StandardTokenizer"/> filtered by a
+ /// <see cref="StandardFilter"/>, a <see cref="LowerCaseFilter"/>, a <see cref="StopFilter"/>,
/// and a <see cref="SnowballFilter"/>
/// </summary>
protected override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/6b013858/src/Lucene.Net.Analysis.Common/Analysis/Snowball/SnowballFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Snowball/SnowballFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Snowball/SnowballFilter.cs
index 103bd66..d9f8672 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Snowball/SnowballFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Snowball/SnowballFilter.cs
@@ -25,29 +25,26 @@ namespace Lucene.Net.Analysis.Snowball
/// <summary>
/// A filter that stems words using a Snowball-generated stemmer.
///
- /// Available stemmers are listed in <see cref="org.tartarus.snowball.ext"/>.
- /// <para><b>NOTE</b>: SnowballFilter expects lowercased text.
- /// <ul>
- /// <li>For the Turkish language, see <see cref="TurkishLowerCaseFilter"/>.
- /// <li>For other languages, see <see cref="LowerCaseFilter"/>.
- /// </ul>
+ /// Available stemmers are listed in Lucene.Net.Tartarus.Snowball.Ext.
+ /// <para><b>NOTE</b>: <see cref="SnowballFilter"/> expects lowercased text.
+ /// <list type="bullet">
+ /// <item>For the Turkish language, see <see cref="Tr.TurkishLowerCaseFilter"/>.</item>
+ /// <item>For other languages, see <see cref="Core.LowerCaseFilter"/>.</item>
+ /// </list>
/// </para>
///
/// <para>
/// Note: This filter is aware of the <see cref="KeywordAttribute"/>. To prevent
/// certain terms from being passed to the stemmer
- /// <see cref="KeywordAttribute#isKeyword()"/> should be set to <code>true</code>
+ /// <see cref="KeywordAttribute.IsKeyword"/> should be set to <c>true</c>
/// in a previous <see cref="TokenStream"/>.
///
/// Note: For including the original term as well as the stemmed version, see
- /// <see cref="org.apache.lucene.analysis.miscellaneous.KeywordRepeatFilterFactory"/>
+ /// <see cref="Miscellaneous.KeywordRepeatFilterFactory"/>
/// </para>
- ///
- ///
/// </summary>
public sealed class SnowballFilter : TokenFilter
{
-
private readonly SnowballProgram stemmer;
private readonly ICharTermAttribute termAtt;
@@ -64,19 +61,17 @@ namespace Lucene.Net.Analysis.Snowball
/// <summary>
/// Construct the named stemming filter.
///
- /// Available stemmers are listed in <see cref="org.tartarus.snowball.ext"/>.
+ /// Available stemmers are listed in Lucene.Net.Tartarus.Snowball.Ext.
/// The name of a stemmer is the part of the class name before "Stemmer",
- /// e.g., the stemmer in <see cref="org.tartarus.snowball.ext.EnglishStemmer"/> is named "English".
+ /// e.g., the stemmer in <see cref="Tartarus.Snowball.Ext.EnglishStemmer"/> is named "English".
/// </summary>
/// <param name="in"> the input tokens to stem </param>
/// <param name="name"> the name of a stemmer </param>
public SnowballFilter(TokenStream @in, string name)
- : base(@in)
+ : base(@in)
{
try
{
- // LUCENENET TODO: There should probably be a way to make this an extesibility point so
- // custom extensions can be loaded.
string className = typeof(SnowballProgram).Namespace + ".Ext." +
name + "Stemmer, " + this.GetType().GetTypeInfo().Assembly.GetName().Name;
Type stemClass = Type.GetType(className);
@@ -93,8 +88,8 @@ namespace Lucene.Net.Analysis.Snowball
}
/// <summary>
- /// Returns the next input Token, after being stemmed </summary>
- public override bool IncrementToken()
+ /// Returns the next input <see cref="Token"/>, after being stemmed </summary>
+ public override sealed bool IncrementToken()
{
if (m_input.IncrementToken())
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/6b013858/src/Lucene.Net.Analysis.Common/Analysis/Snowball/SnowballPorterFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Snowball/SnowballPorterFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Snowball/SnowballPorterFilterFactory.cs
index 707467f..312a21e 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Snowball/SnowballPorterFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Snowball/SnowballPorterFilterFactory.cs
@@ -48,7 +48,7 @@ namespace Lucene.Net.Analysis.Snowball
private CharArraySet protectedWords = null;
/// <summary>
- /// Creates a new SnowballPorterFilterFactory </summary>
+ /// Creates a new <see cref="SnowballPorterFilterFactory"/> </summary>
public SnowballPorterFilterFactory(IDictionary<string, string> args) : base(args)
{
language = Get(args, "language", "English");
@@ -61,8 +61,6 @@ namespace Lucene.Net.Analysis.Snowball
public virtual void Inform(IResourceLoader loader)
{
- // LUCENENET TODO: There should probably be a way to make this an extesibility point so
- // custom extensions can be loaded.
string className = typeof(SnowballProgram).Namespace + ".Ext." +
language + "Stemmer, " + this.GetType().GetTypeInfo().Assembly.GetName().Name;
stemClass = Type.GetType(className);
[34/39] lucenenet git commit:
Lucene.Net.Analysis.Util.OpenStringBuilder refactor: Removed Flush() method
because this seems nonsensical for a .NET StringBuilder
Posted by ni...@apache.org.
Lucene.Net.Analysis.Util.OpenStringBuilder refactor: Removed Flush() method because this seems nonsensical for a .NET StringBuilder
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/f0791db7
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/f0791db7
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/f0791db7
Branch: refs/heads/api-work
Commit: f0791db788cff98cda1f089d99a631014b8e0cf6
Parents: b6c0998
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Sun Feb 5 02:48:58 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Sun Feb 5 03:29:09 2017 +0700
----------------------------------------------------------------------
.../Analysis/Util/OpenStringBuilder.cs | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/f0791db7/src/Lucene.Net.Analysis.Common/Analysis/Util/OpenStringBuilder.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/OpenStringBuilder.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/OpenStringBuilder.cs
index b72f116..d2dbab0 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Util/OpenStringBuilder.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/OpenStringBuilder.cs
@@ -243,9 +243,9 @@ namespace Lucene.Net.Analysis.Util
m_len += s.Length;
}
- public virtual void Flush()
- {
- }
+ //public virtual void Flush() // LUCENENET specific - removed because this doesn't make much sense on a StringBuilder in .NET, and it is not used
+ //{
+ //}
public void Reset()
{
[28/39] lucenenet git commit:
Lucene.Net.Analysis.Util.AbstractAnalysisFactory refactor: ClassArg
>GetClassArg() (makes conversion)
Posted by ni...@apache.org.
Lucene.Net.Analysis.Util.AbstractAnalysisFactory refactor: ClassArg >GetClassArg() (makes conversion)
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/e3efbd0d
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/e3efbd0d
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/e3efbd0d
Branch: refs/heads/api-work
Commit: e3efbd0d463bd8b3dfb2a08de7bbd009b4db0c16
Parents: 01f01ab
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Sun Feb 5 01:24:41 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Sun Feb 5 01:24:41 2017 +0700
----------------------------------------------------------------------
.../Analysis/Util/AbstractAnalysisFactory.cs | 15 ++++++---------
1 file changed, 6 insertions(+), 9 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/e3efbd0d/src/Lucene.Net.Analysis.Common/Analysis/Util/AbstractAnalysisFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/AbstractAnalysisFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/AbstractAnalysisFactory.cs
index 1d6d304..cb010bb 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Util/AbstractAnalysisFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/AbstractAnalysisFactory.cs
@@ -390,20 +390,17 @@ namespace Lucene.Net.Analysis.Util
/// <returns> the string used to specify the concrete class name in a serialized representation: the class arg.
/// If the concrete class name was not specified via a class arg, returns <c>GetType().Name</c>. </returns>
- public virtual string ClassArg // LUCENENET TODO: Change to GetClassArg()
+ public virtual string GetClassArg()
{
- get
+ if (null != originalArgs)
{
- if (null != originalArgs)
+ string className = originalArgs[CLASS_NAME];
+ if (null != className)
{
- string className = originalArgs[CLASS_NAME];
- if (null != className)
- {
- return className;
- }
+ return className;
}
- return this.GetType().Name;
}
+ return this.GetType().Name;
}
public virtual bool IsExplicitLuceneMatchVersion { get; set; }
[05/39] lucenenet git commit: Lucene.Net.Analysis.Standard refactor:
member accessibility and documentation comments
Posted by ni...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/ab69b431/src/Lucene.Net.Analysis.Common/Analysis/Standard/Std34/StandardTokenizerImpl34.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/Std34/StandardTokenizerImpl34.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/Std34/StandardTokenizerImpl34.cs
index fd446a3..855470d 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Standard/Std34/StandardTokenizerImpl34.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/Std34/StandardTokenizerImpl34.cs
@@ -29,26 +29,26 @@ namespace Lucene.Net.Analysis.Standard.Std34
[Obsolete("This class is only for exact backwards compatibility")]
public sealed class StandardTokenizerImpl34 : IStandardTokenizerInterface
{
- /** This character denotes the end of file */
+ /// <summary>This character denotes the end of file</summary>
public static readonly int YYEOF = -1;
- /** initial size of the lookahead buffer */
+ /// <summary>initial size of the lookahead buffer</summary>
private static readonly int ZZ_BUFFERSIZE = 4096;
- /** lexical states */
+ /// <summary>lexical states</summary>
public const int YYINITIAL = 0;
- /**
- * ZZ_LEXSTATE[l] is the state in the DFA for the lexical state l
- * ZZ_LEXSTATE[l+1] is the state in the DFA for the lexical state l
- * at the beginning of a line
- * l is of the form l = 2*k, k a non negative integer
- */
+ /// <summary>
+ /// ZZ_LEXSTATE[l] is the state in the DFA for the lexical state l
+ /// ZZ_LEXSTATE[l+1] is the state in the DFA for the lexical state l
+ /// at the beginning of a line
+ /// l is of the form l = 2*k, k a non negative integer
+ /// </summary>
private static readonly int[] ZZ_LEXSTATE = { 0, 0 };
- /**
- * Translates characters to character classes
- */
+ /// <summary>
+ /// Translates characters to character classes
+ /// </summary>
private const string ZZ_CMAP_PACKED =
"\x0027\x0000\x0001\x0060\x0004\x0000\x0001\x005F\x0001\x0000\x0001\x0060\x0001\x0000\x000A\x005C\x0001\x005E\x0001\x005F" +
"\x0005\x0000\x001A\x005A\x0004\x0000\x0001\x0061\x0001\x0000\x001A\x005A\x002F\x0000\x0001\x005A\x0002\x0000\x0001\x005B" +
@@ -184,14 +184,14 @@ namespace Lucene.Net.Analysis.Standard.Std34
"\x000B\x0000\x0038\x005D\x0002\x005B\x001F\x0066\x0003\x0000\x0006\x0066\x0002\x0000\x0006\x0066\x0002\x0000\x0006\x0066" +
"\x0002\x0000\x0003\x0066\x001C\x0000\x0003\x005B\x0004\x0000";
- /**
- * Translates characters to character classes
- */
+ /// <summary>
+ /// Translates characters to character classes
+ /// </summary>
private static readonly char[] ZZ_CMAP = ZzUnpackCMap(ZZ_CMAP_PACKED);
- /**
- * Translates DFA states to action switch labels.
- */
+ /// <summary>
+ /// Translates DFA states to action switch labels.
+ /// </summary>
private static readonly int[] ZZ_ACTION = ZzUnpackAction();
private const string ZZ_ACTION_PACKED_0 =
@@ -222,9 +222,9 @@ namespace Lucene.Net.Analysis.Standard.Std34
}
- /**
- * Translates a state to a row index in the transition table
- */
+ /// <summary>
+ /// Translates a state to a row index in the transition table
+ /// </summary>
private static readonly int[] ZZ_ROWMAP = ZzUnpackRowMap();
private const string ZZ_ROWMAP_PACKED_0 =
@@ -266,9 +266,9 @@ namespace Lucene.Net.Analysis.Standard.Std34
return j;
}
- /**
- * The transition table of the DFA
- */
+ /// <summary>
+ /// The transition table of the DFA
+ /// </summary>
private static readonly int[] ZZ_TRANS = ZzUnpackTrans();
private const string ZZ_TRANS_PACKED_0 =
@@ -631,9 +631,9 @@ namespace Lucene.Net.Analysis.Standard.Std34
"Error: pushback value was too large"
};
- /**
- * ZZ_ATTRIBUTE[aState] contains the attributes of state <code>aState</code>
- */
+ /// <summary>
+ /// ZZ_ATTRIBUTE[aState] contains the attributes of state <c>aState</c>
+ /// </summary>
private static readonly int[] ZZ_ATTRIBUTE = ZzUnpackAttribute();
private const string ZZ_ATTRIBUTE_PACKED_0 =
@@ -662,73 +662,78 @@ namespace Lucene.Net.Analysis.Standard.Std34
return j;
}
- /** the input device */
+ /// <summary>the input device</summary>
private TextReader zzReader;
- /** the current state of the DFA */
+ /// <summary>the current state of the DFA</summary>
private int zzState;
- /** the current lexical state */
+ /// <summary>the current lexical state</summary>
private int zzLexicalState = YYINITIAL;
- /** this buffer contains the current text to be matched and is
- the source of the YyText() string */
+ /// <summary>
+ /// this buffer contains the current text to be matched and is
+ /// the source of the YyText string
+ /// </summary>
private char[] zzBuffer = new char[ZZ_BUFFERSIZE];
- /** the textposition at the last accepting state */
+ /// <summary>the textposition at the last accepting state</summary>
private int zzMarkedPos;
- /** the current text position in the buffer */
+ /// <summary>the current text position in the buffer</summary>
private int zzCurrentPos;
- /** startRead marks the beginning of the YyText() string in the buffer */
+ /// <summary>startRead marks the beginning of the YyText string in the buffer</summary>
private int zzStartRead;
- /** endRead marks the last character in the buffer, that has been read
- from input */
+ /// <summary>
+ /// endRead marks the last character in the buffer, that has been read
+ /// from input
+ /// </summary>
private int zzEndRead;
- /** number of newlines encountered up to the start of the matched text */
+ /// <summary>number of newlines encountered up to the start of the matched text</summary>
private int yyline;
- /** the number of characters up to the start of the matched text */
+ /// <summary>the number of characters up to the start of the matched text</summary>
private int yyChar;
#pragma warning disable 169, 414
- /**
- * the number of characters from the last newline up to the start of the
- * matched text
- */
+
+ /// <summary>
+ /// the number of characters from the last newline up to the start of the
+ /// matched text
+ /// </summary>
private int yycolumn;
- /**
- * zzAtBOL == true <=> the scanner is currently at the beginning of a line
- */
+ /// <summary>
+ /// zzAtBOL == true <=> the scanner is currently at the beginning of a line
+ /// </summary>
private bool zzAtBOL = true;
- /** zzAtEOF == true <=> the scanner is at the EOF */
+ /// <summary>zzAtEOF == true <=> the scanner is at the EOF</summary>
private bool zzAtEOF;
- /** denotes if the user-EOF-code has already been executed */
+ /// <summary>denotes if the user-EOF-code has already been executed</summary>
private bool zzEOFDone;
#pragma warning restore 169, 414
/* user code: */
- /** Alphanumeric sequences */
+ /// <summary>Alphanumeric sequences</summary>
public static readonly int WORD_TYPE = StandardTokenizer.ALPHANUM;
- /** Numbers */
+ /// <summary>Numbers</summary>
public static readonly int NUMERIC_TYPE = StandardTokenizer.NUM;
- /**
- * Chars in class \p{Line_Break = Complex_Context} are from South East Asian
- * scripts (Thai, Lao, Myanmar, Khmer, etc.). Sequences of these are kept
- * together as as a single token rather than broken up, because the logic
- * required to break them at word boundaries is too complex for UAX#29.
- * <p>
- * See Unicode Line Breaking Algorithm: http://www.unicode.org/reports/tr14/#SA
- */
+ /// <summary>
+ /// Chars in class \p{Line_Break = Complex_Context} are from South East Asian
+ /// scripts (Thai, Lao, Myanmar, Khmer, etc.). Sequences of these are kept
+ /// together as as a single token rather than broken up, because the logic
+ /// required to break them at word boundaries is too complex for UAX#29.
+ /// <para/>
+ /// See Unicode Line Breaking Algorithm: http://www.unicode.org/reports/tr14/#SA
+ /// </summary>
public static readonly int SOUTH_EAST_ASIAN_TYPE = StandardTokenizer.SOUTHEAST_ASIAN;
public static readonly int IDEOGRAPHIC_TYPE = StandardTokenizer.IDEOGRAPHIC;
@@ -744,32 +749,28 @@ namespace Lucene.Net.Analysis.Standard.Std34
get { return yyChar; }
}
- /**
- * Fills CharTermAttribute with the current token text.
- */
+ /// <summary>
+ /// Fills ICharTermAttribute with the current token text.
+ /// </summary>
public void GetText(ICharTermAttribute t)
{
t.CopyBuffer(zzBuffer, zzStartRead, zzMarkedPos - zzStartRead);
}
-
- /**
- * Creates a new scanner
- *
- * @param in the TextReader to read input from.
- */
+ /// <summary>
+ /// Creates a new scanner
+ /// </summary>
+ /// <param name="in">the TextReader to read input from.</param>
public StandardTokenizerImpl34(TextReader @in)
{
this.zzReader = @in;
}
-
- /**
- * Unpacks the compressed character translation table.
- *
- * @param packed the packed character translation table
- * @return the unpacked character translation table
- */
+ /// <summary>
+ /// Unpacks the compressed character translation table.
+ /// </summary>
+ /// <param name="packed">the packed character translation table</param>
+ /// <returns>the unpacked character translation table</returns>
private static char[] ZzUnpackCMap(string packed)
{
char[] map = new char[0x10000];
@@ -784,14 +785,11 @@ namespace Lucene.Net.Analysis.Standard.Std34
return map;
}
-
- /**
- * Refills the input buffer.
- *
- * @return <code>false</code>, iff there was new input.
- *
- * @exception java.io.IOException if any I/O-Error occurs
- */
+ /// <summary>
+ /// Refills the input buffer.
+ /// </summary>
+ /// <returns><c>false</c>, iff there was new input.</returns>
+ /// <exception cref="IOException">if any I/O-Error occurs</exception>
private bool ZzRefill()
{
@@ -847,9 +845,9 @@ namespace Lucene.Net.Analysis.Standard.Std34
}
- /**
- * Closes the input stream.
- */
+ /// <summary>
+ /// Disposes the input stream.
+ /// </summary>
public void YyClose()
{
zzAtEOF = true; /* indicate end of file */
@@ -859,19 +857,17 @@ namespace Lucene.Net.Analysis.Standard.Std34
zzReader.Dispose();
}
-
- /**
- * Resets the scanner to read from a new input stream.
- * Does not close the old reader.
- *
- * All internal variables are reset, the old input stream
- * <b>cannot</b> be reused (internal buffer is discarded and lost).
- * Lexical state is set to <tt>ZZ_INITIAL</tt>.
- *
- * Internal scan buffer is resized down to its initial length, if it has grown.
- *
- * @param reader the new input stream
- */
+ /// <summary>
+ /// Resets the scanner to read from a new input stream.
+ /// Does not close the old reader.
+ /// <para/>
+ /// All internal variables are reset, the old input stream
+ /// <b>cannot</b> be reused (internal buffer is discarded and lost).
+ /// Lexical state is set to <see cref="YYINITIAL"/>.
+ /// <para/>
+ /// Internal scan buffer is resized down to its initial length, if it has grown.
+ /// </summary>
+ /// <param name="reader">the new input stream </param>
public void YyReset(TextReader reader)
{
zzReader = reader;
@@ -887,75 +883,67 @@ namespace Lucene.Net.Analysis.Standard.Std34
}
- /**
- * Returns the current lexical state.
- */
+ /// <summary>
+ /// Returns the current lexical state.
+ /// </summary>
public int YyState
{
get { return zzLexicalState; }
}
-
- /**
- * Enters a new lexical state
- *
- * @param newState the new lexical state
- */
+ /// <summary>
+ /// Enters a new lexical state
+ /// </summary>
+ /// <param name="newState">the new lexical state</param>
public void YyBegin(int newState)
{
zzLexicalState = newState;
}
-
- /**
- * Returns the text matched by the current regular expression.
- */
+ /// <summary>
+ /// Returns the text matched by the current regular expression.
+ /// </summary>
public string YyText
{
get { return new string(zzBuffer, zzStartRead, zzMarkedPos - zzStartRead); }
}
-
- /**
- * Returns the character at position <tt>pos</tt> from the
- * matched text.
- *
- * It is equivalent to YyText().charAt(pos), but faster
- *
- * @param pos the position of the character to fetch.
- * A value from 0 to YyLength()-1.
- *
- * @return the character at position pos
- */
+ /// <summary>
+ /// Returns the character at position <paramref name="pos"/> from the
+ /// matched text.
+ /// <para/>
+ /// It is equivalent to YyText[pos], but faster
+ /// </summary>
+ /// <param name="pos">
+ /// the position of the character to fetch.
+ /// A value from 0 to YyLength-1.
+ /// </param>
+ /// <returns>the character at position pos</returns>
public char YyCharAt(int pos)
{
return zzBuffer[zzStartRead + pos];
}
- /**
- * Returns the length of the matched text region.
- */
+ /// <summary>
+ /// Returns the length of the matched text region.
+ /// </summary>
public int YyLength
{
get { return zzMarkedPos - zzStartRead; }
}
-
- /**
- * Reports an error that occured while scanning.
- *
- * In a wellformed scanner (no or only correct usage of
- * YyPushBack(int) and a match-all fallback rule) this method
- * will only be called with things that "Can't Possibly Happen".
- * If this method is called, something is seriously wrong
- * (e.g. a JFlex bug producing a faulty scanner etc.).
- *
- * Usual syntax/scanner level error handling should be done
- * in error fallback rules.
- *
- * @param errorCode the code of the errormessage to display
- */
+ /// <summary>
+ /// Reports an error that occured while scanning.
+ /// <para/>
+ /// In a wellformed scanner (no or only correct usage of
+ /// YyPushBack(int) and a match-all fallback rule) this method
+ /// will only be called with things that "Can't Possibly Happen".
+ /// If this method is called, something is seriously wrong
+ /// Usual syntax/scanner level error handling should be done
+ /// in error fallback rules.
+ /// </summary>
+ /// <param name="errorCode">the code of the errormessage to display</param>
private void ZzScanError(int errorCode)
{
string message;
@@ -971,15 +959,15 @@ namespace Lucene.Net.Analysis.Standard.Std34
throw new Exception(message);
}
-
- /**
- * Pushes the specified amount of characters back into the input stream.
- *
- * They will be read again by then next call of the scanning method
- *
- * @param number the number of characters to be read again.
- * This number must not be greater than YyLength()!
- */
+ /// <summary>
+ /// Pushes the specified amount of characters back into the input stream.
+ /// <para/>
+ /// They will be read again by then next call of the scanning method
+ /// </summary>
+ /// <param name="number">
+ /// the number of characters to be read again.
+ /// This number must not be greater than YyLength!
+ /// </param>
public void YyPushBack(int number)
{
if (number > YyLength)
@@ -988,14 +976,12 @@ namespace Lucene.Net.Analysis.Standard.Std34
zzMarkedPos -= number;
}
-
- /**
- * Resumes scanning until the next regular expression is matched,
- * the end of input is encountered or an I/O-Error occurs.
- *
- * @return the next token
- * @exception java.io.IOException if any I/O-Error occurs
- */
+ /// <summary>
+ /// Resumes scanning until the next regular expression is matched,
+ /// the end of input is encountered or an I/O-Error occurs.
+ /// </summary>
+ /// <returns>the next token</returns>
+ /// <exception cref="IOException">if any I/O-Error occurs</exception>
public int GetNextToken()
{
int zzInput;
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/ab69b431/src/Lucene.Net.Analysis.Common/Analysis/Standard/Std34/UAX29URLEmailTokenizerImpl34.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/Std34/UAX29URLEmailTokenizerImpl34.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/Std34/UAX29URLEmailTokenizerImpl34.cs
index c2066de..d774d6d 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Standard/Std34/UAX29URLEmailTokenizerImpl34.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/Std34/UAX29URLEmailTokenizerImpl34.cs
@@ -32,26 +32,26 @@ namespace Lucene.Net.Analysis.Standard.Std34
[Obsolete("This class is only for exact backwards compatibility")]
public sealed class UAX29URLEmailTokenizerImpl34 : IStandardTokenizerInterface
{
- /** This character denotes the end of file */
+ /// <summary>This character denotes the end of file</summary>
public static readonly int YYEOF = -1;
- /** initial size of the lookahead buffer */
+ /// <summary>initial size of the lookahead buffer </summary>
private static readonly int ZZ_BUFFERSIZE = 4096;
- /** lexical states */
+ /// <summary>lexical states</summary>
public const int YYINITIAL = 0;
- /**
- * ZZ_LEXSTATE[l] is the state in the DFA for the lexical state l
- * ZZ_LEXSTATE[l+1] is the state in the DFA for the lexical state l
- * at the beginning of a line
- * l is of the form l = 2*k, k a non negative integer
- */
+ /// <summary>
+ /// ZZ_LEXSTATE[l] is the state in the DFA for the lexical state l
+ /// ZZ_LEXSTATE[l+1] is the state in the DFA for the lexical state l
+ /// at the beginning of a line
+ /// l is of the form l = 2*k, k a non negative integer
+ /// </summary>
private static readonly int[] ZZ_LEXSTATE = { 0, 0 };
- /**
- * Translates characters to character classes
- */
+ /// <summary>
+ /// Translates characters to character classes
+ /// </summary>
private static readonly string ZZ_CMAP_PACKED =
"\x0001\x009F\x0008\x009D\x0002\x009F\x0002\x009D\x0001\x009F\x0013\x009D\x0001\x00A0\x0001\x009C\x0001\x0097\x0001\x00A0" +
"\x0001\x0090\x0001\x008E\x0001\x0093\x0002\x0091\x0002\x00A0\x0001\x0092\x0001\x0082\x0001\x0067\x0001\x0096\x0001\x0083" +
@@ -195,14 +195,14 @@ namespace Lucene.Net.Analysis.Standard.Std34
"\x001A\x005A\x000B\x0000\x0038\x005D\x0002\x005B\x001F\x0066\x0003\x0000\x0006\x0066\x0002\x0000\x0006\x0066\x0002\x0000" +
"\x0006\x0066\x0002\x0000\x0003\x0066\x001C\x0000\x0003\x005B\x0004\x0000";
- /**
- * Translates characters to character classes
- */
+ /// <summary>
+ /// Translates characters to character classes
+ /// </summary>
private static readonly char[] ZZ_CMAP = ZzUnpackCMap(ZZ_CMAP_PACKED);
- /**
- * Translates DFA states to action switch labels.
- */
+ /// <summary>
+ /// Translates DFA states to action switch labels.
+ /// </summary>
private static readonly int[] ZZ_ACTION = ZzUnpackAction();
private const string ZZ_ACTION_PACKED_0 =
@@ -247,9 +247,9 @@ namespace Lucene.Net.Analysis.Standard.Std34
}
- /**
- * Translates a state to a row index in the transition table
- */
+ /// <summary>
+ /// Translates a state to a row index in the transition table
+ /// </summary>
private static readonly int[] ZZ_ROWMAP = ZzUnpackRowMap();
private const string ZZ_ROWMAP_PACKED_0 =
@@ -448,9 +448,9 @@ namespace Lucene.Net.Analysis.Standard.Std34
return j;
}
- /**
- * The transition table of the DFA
- */
+ /// <summary>
+ /// The transition table of the DFA
+ /// </summary>
private static readonly int[] ZZ_TRANS = ZzUnpackTrans();
private const string ZZ_TRANS_PACKED_0 =
@@ -3261,9 +3261,9 @@ namespace Lucene.Net.Analysis.Standard.Std34
"Error: pushback value was too large"
};
- /**
- * ZZ_ATTRIBUTE[aState] contains the attributes of state <code>aState</code>
- */
+ /// <summary>
+ /// ZZ_ATTRIBUTE[aState] contains the attributes of state <c>aState</c>
+ /// </summary>
private static readonly int[] ZZ_ATTRIBUTE = ZzUnpackAttribute();
private const string ZZ_ATTRIBUTE_PACKED_0 =
@@ -3304,73 +3304,77 @@ namespace Lucene.Net.Analysis.Standard.Std34
return j;
}
- /** the input device */
+ /// <summary>the input device</summary>
private TextReader zzReader;
- /** the current state of the DFA */
+ /// <summary>the current state of the DFA</summary>
private int zzState;
- /** the current lexical state */
+ /// <summary>the current lexical state</summary>
private int zzLexicalState = YYINITIAL;
- /** this buffer contains the current text to be matched and is
- the source of the YyText() string */
+ /// <summary>
+ /// this buffer contains the current text to be matched and is
+ /// the source of the YyText string
+ /// </summary>
private char[] zzBuffer = new char[ZZ_BUFFERSIZE];
- /** the textposition at the last accepting state */
+ /// <summary>the textposition at the last accepting state</summary>
private int zzMarkedPos;
- /** the current text position in the buffer */
+ /// <summary>the current text position in the buffer</summary>
private int zzCurrentPos;
- /** startRead marks the beginning of the YyText() string in the buffer */
+ /// <summary>startRead marks the beginning of the YyText string in the buffer</summary>
private int zzStartRead;
- /** endRead marks the last character in the buffer, that has been read
- from input */
+ /// <summary>
+ /// endRead marks the last character in the buffer, that has been read
+ /// from input
+ /// </summary>
private int zzEndRead;
- /** number of newlines encountered up to the start of the matched text */
+ /// <summary>number of newlines encountered up to the start of the matched text</summary>
private int yyline;
- /** the number of characters up to the start of the matched text */
+ /// <summary>the number of characters up to the start of the matched text</summary>
private int yychar;
#pragma warning disable 169, 414
- /**
- * the number of characters from the last newline up to the start of the
- * matched text
- */
+ /// <summary>
+ /// the number of characters from the last newline up to the start of the
+ /// matched text
+ /// </summary>
private int yycolumn;
- /**
- * zzAtBOL == true <=> the scanner is currently at the beginning of a line
- */
+ /// <summary>
+ /// zzAtBOL == true <=> the scanner is currently at the beginning of a line
+ /// </summary>
private bool zzAtBOL = true;
- /** zzAtEOF == true <=> the scanner is at the EOF */
+ /// <summary>zzAtEOF == true <=> the scanner is at the EOF</summary>
private bool zzAtEOF;
- /** denotes if the user-EOF-code has already been executed */
+ /// <summary>denotes if the user-EOF-code has already been executed</summary>
private bool zzEOFDone;
#pragma warning restore 169, 414
/* user code: */
- /** Alphanumeric sequences */
+ /// <summary>Alphanumeric sequences</summary>
public static readonly int WORD_TYPE = UAX29URLEmailTokenizer.ALPHANUM;
- /** Numbers */
+ /// <summary>Numbers</summary>
public static readonly int NUMERIC_TYPE = UAX29URLEmailTokenizer.NUM;
- /**
- * Chars in class \p{Line_Break = Complex_Context} are from South East Asian
- * scripts (Thai, Lao, Myanmar, Khmer, etc.). Sequences of these are kept
- * together as as a single token rather than broken up, because the logic
- * required to break them at word boundaries is too complex for UAX#29.
- * <p>
- * See Unicode Line Breaking Algorithm: http://www.unicode.org/reports/tr14/#SA
- */
+ /// <summary>
+ /// Chars in class \p{Line_Break = Complex_Context} are from South East Asian
+ /// scripts (Thai, Lao, Myanmar, Khmer, etc.). Sequences of these are kept
+ /// together as as a single token rather than broken up, because the logic
+ /// required to break them at word boundaries is too complex for UAX#29.
+ /// <para/>
+ /// See Unicode Line Breaking Algorithm: http://www.unicode.org/reports/tr14/#SA
+ /// </summary>
public static readonly int SOUTH_EAST_ASIAN_TYPE = UAX29URLEmailTokenizer.SOUTHEAST_ASIAN;
public static readonly int IDEOGRAPHIC_TYPE = UAX29URLEmailTokenizer.IDEOGRAPHIC;
@@ -3390,32 +3394,28 @@ namespace Lucene.Net.Analysis.Standard.Std34
get { return yychar; }
}
- /**
- * Fills CharTermAttribute with the current token text.
- */
+ /// <summary>
+ /// Fills ICharTermAttribute with the current token text.
+ /// </summary>
public void GetText(ICharTermAttribute t)
{
t.CopyBuffer(zzBuffer, zzStartRead, zzMarkedPos - zzStartRead);
}
-
- /**
- * Creates a new scanner
- *
- * @param in the TextReader to read input from.
- */
+ /// <summary>
+ /// Creates a new scanner
+ /// </summary>
+ /// <param name="in">the TextReader to read input from.</param>
public UAX29URLEmailTokenizerImpl34(TextReader @in)
{
this.zzReader = @in;
}
-
- /**
- * Unpacks the compressed character translation table.
- *
- * @param packed the packed character translation table
- * @return the unpacked character translation table
- */
+ /// <summary>
+ /// Unpacks the compressed character translation table.
+ /// </summary>
+ /// <param name="packed">the packed character translation table</param>
+ /// <returns>the unpacked character translation table</returns>
private static char[] ZzUnpackCMap(string packed)
{
char[] map = new char[0x10000];
@@ -3430,14 +3430,11 @@ namespace Lucene.Net.Analysis.Standard.Std34
return map;
}
-
- /**
- * Refills the input buffer.
- *
- * @return <code>false</code>, iff there was new input.
- *
- * @exception java.io.IOException if any I/O-Error occurs
- */
+ /// <summary>
+ /// Refills the input buffer.
+ /// </summary>
+ /// <returns><c>false</c>, iff there was new input.</returns>
+ /// <exception cref="IOException">if any I/O-Error occurs</exception>
private bool ZzRefill()
{
@@ -3493,9 +3490,9 @@ namespace Lucene.Net.Analysis.Standard.Std34
}
- /**
- * Closes the input stream.
- */
+ /// <summary>
+ /// Disposes the input stream.
+ /// </summary>
public void YyClose()
{
zzAtEOF = true; /* indicate end of file */
@@ -3507,19 +3504,17 @@ namespace Lucene.Net.Analysis.Standard.Std34
}
}
-
- /**
- * Resets the scanner to read from a new input stream.
- * Does not close the old reader.
- *
- * All internal variables are reset, the old input stream
- * <b>cannot</b> be reused (internal buffer is discarded and lost).
- * Lexical state is set to <tt>ZZ_INITIAL</tt>.
- *
- * Internal scan buffer is resized down to its initial length, if it has grown.
- *
- * @param reader the new input stream
- */
+ /// <summary>
+ /// Resets the scanner to read from a new input stream.
+ /// Does not close the old reader.
+ /// <para/>
+ /// All internal variables are reset, the old input stream
+ /// <b>cannot</b> be reused (internal buffer is discarded and lost).
+ /// Lexical state is set to <see cref="YYINITIAL"/>.
+ /// <para/>
+ /// Internal scan buffer is resized down to its initial length, if it has grown.
+ /// </summary>
+ /// <param name="reader">the new input stream </param>
public void YyReset(TextReader reader)
{
zzReader = reader;
@@ -3535,75 +3530,69 @@ namespace Lucene.Net.Analysis.Standard.Std34
}
- /**
- * Returns the current lexical state.
- */
+ /// <summary>
+ /// Returns the current lexical state.
+ /// </summary>
public int YyState
{
get { return zzLexicalState; }
}
-
- /**
- * Enters a new lexical state
- *
- * @param newState the new lexical state
- */
+ /// <summary>
+ /// Enters a new lexical state
+ /// </summary>
+ /// <param name="newState">the new lexical state</param>
public void YyBegin(int newState)
{
zzLexicalState = newState;
}
-
- /**
- * Returns the text matched by the current regular expression.
- */
+ /// <summary>
+ /// Returns the text matched by the current regular expression.
+ /// </summary>
public string YyText
{
get { return new string(zzBuffer, zzStartRead, zzMarkedPos - zzStartRead); }
}
-
- /**
- * Returns the character at position <tt>pos</tt> from the
- * matched text.
- *
- * It is equivalent to YyText().charAt(pos), but faster
- *
- * @param pos the position of the character to fetch.
- * A value from 0 to YyLength()-1.
- *
- * @return the character at position pos
- */
+ /// <summary>
+ /// Returns the character at position <paramref name="pos"/> from the
+ /// matched text.
+ /// <para/>
+ /// It is equivalent to YyText[pos], but faster
+ /// </summary>
+ /// <param name="pos">
+ /// the position of the character to fetch.
+ /// A value from 0 to YyLength-1.
+ /// </param>
+ /// <returns>the character at position pos</returns>
public char YyCharAt(int pos)
{
return zzBuffer[zzStartRead + pos];
}
- /**
- * Returns the length of the matched text region.
- */
+ /// <summary>
+ /// Returns the length of the matched text region.
+ /// </summary>
public int YyLength
{
get { return zzMarkedPos - zzStartRead; }
}
-
- /**
- * Reports an error that occured while scanning.
- *
- * In a wellformed scanner (no or only correct usage of
- * YyPushBack(int) and a match-all fallback rule) this method
- * will only be called with things that "Can't Possibly Happen".
- * If this method is called, something is seriously wrong
- * (e.g. a JFlex bug producing a faulty scanner etc.).
- *
- * Usual syntax/scanner level error handling should be done
- * in error fallback rules.
- *
- * @param errorCode the code of the errormessage to display
- */
+ /// <summary>
+ /// Reports an error that occured while scanning.
+ /// <para/>
+ /// In a wellformed scanner (no or only correct usage of
+ /// YyPushBack(int) and a match-all fallback rule) this method
+ /// will only be called with things that "Can't Possibly Happen".
+ /// If this method is called, something is seriously wrong
+ /// (e.g. a JFlex bug producing a faulty scanner etc.).
+ /// <para/>
+ /// Usual syntax/scanner level error handling should be done
+ /// in error fallback rules.
+ /// </summary>
+ /// <param name="errorCode">the code of the errormessage to display</param>
private void ZzScanError(int errorCode)
{
string message;
@@ -3619,15 +3608,15 @@ namespace Lucene.Net.Analysis.Standard.Std34
throw new Exception(message);
}
-
- /**
- * Pushes the specified amount of characters back into the input stream.
- *
- * They will be read again by then next call of the scanning method
- *
- * @param number the number of characters to be read again.
- * This number must not be greater than YyLength()!
- */
+ /// <summary>
+ /// Pushes the specified amount of characters back into the input stream.
+ /// <para/>
+ /// They will be read again by then next call of the scanning method
+ /// </summary>
+ /// <param name="number">
+ /// the number of characters to be read again.
+ /// This number must not be greater than YyLength!
+ /// </param>
public void YyPushBack(int number)
{
if (number > YyLength)
@@ -3636,14 +3625,12 @@ namespace Lucene.Net.Analysis.Standard.Std34
zzMarkedPos -= number;
}
-
- /**
- * Resumes scanning until the next regular expression is matched,
- * the end of input is encountered or an I/O-Error occurs.
- *
- * @return the next token
- * @exception java.io.IOException if any I/O-Error occurs
- */
+ /// <summary>
+ /// Resumes scanning until the next regular expression is matched,
+ /// the end of input is encountered or an I/O-Error occurs.
+ /// </summary>
+ /// <returns>the next token</returns>
+ /// <exception cref="IOException">if any I/O-Error occurs</exception>
public int GetNextToken()
{
int zzInput;
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/ab69b431/src/Lucene.Net.Analysis.Common/Analysis/Standard/Std36/UAX29URLEmailTokenizerImpl36.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/Std36/UAX29URLEmailTokenizerImpl36.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/Std36/UAX29URLEmailTokenizerImpl36.cs
index 9d8f68d..afa04ff 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Standard/Std36/UAX29URLEmailTokenizerImpl36.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/Std36/UAX29URLEmailTokenizerImpl36.cs
@@ -28,26 +28,26 @@ namespace Lucene.Net.Analysis.Standard.Std36
[Obsolete("This class is only for exact backwards compatibility")]
public sealed class UAX29URLEmailTokenizerImpl36 : IStandardTokenizerInterface
{
- /** This character denotes the end of file */
+ /// <summary>This character denotes the end of file</summary>
public static readonly int YYEOF = -1;
- /** initial size of the lookahead buffer */
+ /// <summary>initial size of the lookahead buffer</summary>
private static readonly int ZZ_BUFFERSIZE = 4096;
- /** lexical states */
+ /// <summary>lexical states</summary>
public const int YYINITIAL = 0;
- /**
- * ZZ_LEXSTATE[l] is the state in the DFA for the lexical state l
- * ZZ_LEXSTATE[l+1] is the state in the DFA for the lexical state l
- * at the beginning of a line
- * l is of the form l = 2*k, k a non negative integer
- */
+ /// <summary>
+ /// ZZ_LEXSTATE[l] is the state in the DFA for the lexical state l
+ /// ZZ_LEXSTATE[l+1] is the state in the DFA for the lexical state l
+ /// at the beginning of a line
+ /// l is of the form l = 2*k, k a non negative integer
+ /// </summary>
private static readonly int[] ZZ_LEXSTATE = { 0, 0 };
- /**
- * Translates characters to character classes
- */
+ /// <summary>
+ /// Translates characters to character classes
+ /// </summary>
private const string ZZ_CMAP_PACKED =
"\x0001\x009F\x0008\x009D\x0002\x009F\x0002\x009D\x0001\x009F\x0013\x009D\x0001\x00A0\x0001\x009C\x0001\x0097\x0001\x00A0" +
"\x0001\x0090\x0001\x008E\x0001\x0093\x0002\x0091\x0002\x00A0\x0001\x0092\x0001\x0082\x0001\x0067\x0001\x0096\x0001\x0083" +
@@ -191,14 +191,14 @@ namespace Lucene.Net.Analysis.Standard.Std36
"\x001A\x005A\x000B\x0000\x0038\x005D\x0002\x005B\x001F\x0066\x0003\x0000\x0006\x0066\x0002\x0000\x0006\x0066\x0002\x0000" +
"\x0006\x0066\x0002\x0000\x0003\x0066\x001C\x0000\x0003\x005B\x0004\x0000";
- /**
- * Translates characters to character classes
- */
+ /// <summary>
+ /// Translates characters to character classes
+ /// </summary>
private static readonly char[] ZZ_CMAP = ZzUnpackCMap(ZZ_CMAP_PACKED);
- /**
- * Translates DFA states to action switch labels.
- */
+ /// <summary>
+ /// Translates DFA states to action switch labels.
+ /// </summary>
private static readonly int[] ZZ_ACTION = ZzUnpackAction();
private const string ZZ_ACTION_PACKED_0 =
@@ -245,9 +245,9 @@ namespace Lucene.Net.Analysis.Standard.Std36
}
- /**
- * Translates a state to a row index in the transition table
- */
+ /// <summary>
+ /// Translates a state to a row index in the transition table
+ /// </summary>
private static readonly int[] ZZ_ROWMAP = ZzUnpackRowMap();
private const string ZZ_ROWMAP_PACKED_0 =
@@ -487,9 +487,9 @@ namespace Lucene.Net.Analysis.Standard.Std36
return j;
}
- /**
- * The transition table of the DFA
- */
+ /// <summary>
+ /// The transition table of the DFA
+ /// </summary>
private static readonly int[] ZZ_TRANS = ZzUnpackTrans();
private const string ZZ_TRANS_PACKED_0 =
@@ -3694,9 +3694,9 @@ namespace Lucene.Net.Analysis.Standard.Std36
"Error: pushback value was too large"
};
- /**
- * ZZ_ATTRIBUTE[aState] contains the attributes of state <code>aState</code>
- */
+ /// <summary>
+ /// ZZ_ATTRIBUTE[aState] contains the attributes of state <c>aState</c>
+ /// </summary>
private static readonly int[] ZZ_ATTRIBUTE = ZzUnpackAttribute();
private const string ZZ_ATTRIBUTE_PACKED_0 =
@@ -3739,73 +3739,77 @@ namespace Lucene.Net.Analysis.Standard.Std36
return j;
}
- /** the input device */
+ /// <summary>the input device</summary>
private TextReader zzReader;
- /** the current state of the DFA */
+ /// <summary>the current state of the DFA</summary>
private int zzState;
- /** the current lexical state */
+ /// <summary>the current lexical state</summary>
private int zzLexicalState = YYINITIAL;
- /** this buffer contains the current text to be matched and is
- the source of the YyText() string */
+ /// <summary>
+ /// this buffer contains the current text to be matched and is
+ /// the source of the YyText string
+ /// </summary>
private char[] zzBuffer = new char[ZZ_BUFFERSIZE];
- /** the textposition at the last accepting state */
+ /// <summary>the textposition at the last accepting state</summary>
private int zzMarkedPos;
- /** the current text position in the buffer */
+ /// <summary>the current text position in the buffer</summary>
private int zzCurrentPos;
- /** startRead marks the beginning of the YyText() string in the buffer */
+ /// <summary>startRead marks the beginning of the YyText string in the buffer</summary>
private int zzStartRead;
- /** endRead marks the last character in the buffer, that has been read
- from input */
+ /// <summary>
+ /// endRead marks the last character in the buffer, that has been read
+ /// from input
+ /// </summary>
private int zzEndRead;
- /** number of newlines encountered up to the start of the matched text */
+ /// <summary>number of newlines encountered up to the start of the matched text</summary>
private int yyline;
- /** the number of characters up to the start of the matched text */
+ /// <summary>the number of characters up to the start of the matched text</summary>
private int yychar;
#pragma warning disable 169, 414
- /**
- * the number of characters from the last newline up to the start of the
- * matched text
- */
+ /// <summary>
+ /// the number of characters from the last newline up to the start of the
+ /// matched text
+ /// </summary>
private int yycolumn;
- /**
- * zzAtBOL == true <=> the scanner is currently at the beginning of a line
- */
+ /// <summary>
+ /// zzAtBOL == true <=> the scanner is currently at the beginning of a line
+ /// </summary>
private bool zzAtBOL = true;
- /** zzAtEOF == true <=> the scanner is at the EOF */
+ /// <summary>zzAtEOF == true <=> the scanner is at the EOF</summary>
private bool zzAtEOF;
- /** denotes if the user-EOF-code has already been executed */
+ /// <summary>denotes if the user-EOF-code has already been executed</summary>
private bool zzEOFDone;
#pragma warning restore 169, 414
/* user code: */
- /** Alphanumeric sequences */
+ /// <summary>Alphanumeric sequences</summary>
public static readonly int WORD_TYPE = UAX29URLEmailTokenizer.ALPHANUM;
- /** Numbers */
+ /// <summary>Numbers</summary>
public static readonly int NUMERIC_TYPE = UAX29URLEmailTokenizer.NUM;
- /**
- * Chars in class \p{Line_Break = Complex_Context} are from South East Asian
- * scripts (Thai, Lao, Myanmar, Khmer, etc.). Sequences of these are kept
- * together as as a single token rather than broken up, because the logic
- * required to break them at word boundaries is too complex for UAX#29.
- * <p>
- * See Unicode Line Breaking Algorithm: http://www.unicode.org/reports/tr14/#SA
- */
+ /// <summary>
+ /// Chars in class \p{Line_Break = Complex_Context} are from South East Asian
+ /// scripts (Thai, Lao, Myanmar, Khmer, etc.). Sequences of these are kept
+ /// together as as a single token rather than broken up, because the logic
+ /// required to break them at word boundaries is too complex for UAX#29.
+ /// <para/>
+ /// See Unicode Line Breaking Algorithm: http://www.unicode.org/reports/tr14/#SA
+ /// </summary>
public static readonly int SOUTH_EAST_ASIAN_TYPE = UAX29URLEmailTokenizer.SOUTHEAST_ASIAN;
public static readonly int IDEOGRAPHIC_TYPE = UAX29URLEmailTokenizer.IDEOGRAPHIC;
@@ -3825,32 +3829,28 @@ namespace Lucene.Net.Analysis.Standard.Std36
get { return yychar; }
}
- /**
- * Fills CharTermAttribute with the current token text.
- */
+ /// <summary>
+ /// Fills ICharTermAttribute with the current token text.
+ /// </summary>
public void GetText(ICharTermAttribute t)
{
t.CopyBuffer(zzBuffer, zzStartRead, zzMarkedPos - zzStartRead);
}
-
- /**
- * Creates a new scanner
- *
- * @param in the TextReader to read input from.
- */
+ /// <summary>
+ /// Creates a new scanner
+ /// </summary>
+ /// <param name="in">the TextReader to read input from.</param>
public UAX29URLEmailTokenizerImpl36(TextReader @in)
{
this.zzReader = @in;
}
-
- /**
- * Unpacks the compressed character translation table.
- *
- * @param packed the packed character translation table
- * @return the unpacked character translation table
- */
+ /// <summary>
+ /// Unpacks the compressed character translation table.
+ /// </summary>
+ /// <param name="packed">the packed character translation table</param>
+ /// <returns>the unpacked character translation table</returns>
private static char[] ZzUnpackCMap(string packed)
{
char[] map = new char[0x10000];
@@ -3865,14 +3865,11 @@ namespace Lucene.Net.Analysis.Standard.Std36
return map;
}
-
- /**
- * Refills the input buffer.
- *
- * @return <code>false</code>, iff there was new input.
- *
- * @exception java.io.IOException if any I/O-Error occurs
- */
+ /// <summary>
+ /// Refills the input buffer.
+ /// </summary>
+ /// <returns><c>false</c>, iff there was new input.</returns>
+ /// <exception cref="IOException">if any I/O-Error occurs</exception>
private bool ZzRefill()
{
@@ -3928,9 +3925,9 @@ namespace Lucene.Net.Analysis.Standard.Std36
}
- /**
- * Closes the input stream.
- */
+ /// <summary>
+ /// Disposes the input stream.
+ /// </summary>
public void YyClose()
{
zzAtEOF = true; /* indicate end of file */
@@ -3942,19 +3939,17 @@ namespace Lucene.Net.Analysis.Standard.Std36
}
}
-
- /**
- * Resets the scanner to read from a new input stream.
- * Does not close the old reader.
- *
- * All internal variables are reset, the old input stream
- * <b>cannot</b> be reused (internal buffer is discarded and lost).
- * Lexical state is set to <tt>ZZ_INITIAL</tt>.
- *
- * Internal scan buffer is resized down to its initial length, if it has grown.
- *
- * @param reader the new input stream
- */
+ /// <summary>
+ /// Resets the scanner to read from a new input stream.
+ /// Does not close the old reader.
+ /// <para/>
+ /// All internal variables are reset, the old input stream
+ /// <b>cannot</b> be reused (internal buffer is discarded and lost).
+ /// Lexical state is set to <see cref="YYINITIAL"/>.
+ /// <para/>
+ /// Internal scan buffer is resized down to its initial length, if it has grown.
+ /// </summary>
+ /// <param name="reader">the new input stream </param>
public void YyReset(TextReader reader)
{
zzReader = reader;
@@ -3970,75 +3965,71 @@ namespace Lucene.Net.Analysis.Standard.Std36
}
- /**
- * Returns the current lexical state.
- */
+ /// <summary>
+ /// Returns the current lexical state.
+ /// </summary>
public int YyState
{
get { return zzLexicalState; }
}
-
- /**
- * Enters a new lexical state
- *
- * @param newState the new lexical state
- */
+ /// <summary>
+ /// Enters a new lexical state
+ /// </summary>
+ /// <param name="newState">the new lexical state</param>
public void YyBegin(int newState)
{
zzLexicalState = newState;
}
-
- /**
- * Returns the text matched by the current regular expression.
- */
+ /// <summary>
+ /// Returns the text matched by the current regular expression.
+ /// </summary>
public string YyText
{
get { return new string(zzBuffer, zzStartRead, zzMarkedPos - zzStartRead); }
}
- /**
- * Returns the character at position <tt>pos</tt> from the
- * matched text.
- *
- * It is equivalent to YyText().charAt(pos), but faster
- *
- * @param pos the position of the character to fetch.
- * A value from 0 to YyLength()-1.
- *
- * @return the character at position pos
- */
+ /// <summary>
+ /// Returns the character at position <paramref name="pos"/> from the
+ /// matched text.
+ /// <para/>
+ /// It is equivalent to YyText[pos], but faster
+ /// </summary>
+ /// <param name="pos">
+ /// the position of the character to fetch.
+ /// A value from 0 to YyLength-1.
+ /// </param>
+ /// <returns>the character at position pos</returns>
public char YyCharAt(int pos)
{
return zzBuffer[zzStartRead + pos];
}
- /**
- * Returns the length of the matched text region.
- */
+ /// <summary>
+ /// Returns the length of the matched text region.
+ /// </summary>
public int YyLength
{
get { return zzMarkedPos - zzStartRead; }
}
- /**
- * Reports an error that occured while scanning.
- *
- * In a wellformed scanner (no or only correct usage of
- * YyPushBack(int) and a match-all fallback rule) this method
- * will only be called with things that "Can't Possibly Happen".
- * If this method is called, something is seriously wrong
- * (e.g. a JFlex bug producing a faulty scanner etc.).
- *
- * Usual syntax/scanner level error handling should be done
- * in error fallback rules.
- *
- * @param errorCode the code of the errormessage to display
- */
+ /// <summary>
+ /// Reports an error that occured while scanning.
+ /// <para/>
+ /// In a wellformed scanner (no or only correct usage of
+ /// YyPushBack(int) and a match-all fallback rule) this method
+ /// will only be called with things that "Can't Possibly Happen".
+ /// If this method is called, something is seriously wrong
+ /// (e.g. a JFlex bug producing a faulty scanner etc.).
+ /// <para/>
+ /// Usual syntax/scanner level error handling should be done
+ /// in error fallback rules.
+ /// </summary>
+ /// <param name="errorCode">the code of the errormessage to display</param>
private void ZzScanError(int errorCode)
{
string message;
@@ -4055,14 +4046,15 @@ namespace Lucene.Net.Analysis.Standard.Std36
}
- /**
- * Pushes the specified amount of characters back into the input stream.
- *
- * They will be read again by then next call of the scanning method
- *
- * @param number the number of characters to be read again.
- * This number must not be greater than YyLength()!
- */
+ /// <summary>
+ /// Pushes the specified amount of characters back into the input stream.
+ /// <para/>
+ /// They will be read again by then next call of the scanning method
+ /// </summary>
+ /// <param name="number">
+ /// the number of characters to be read again.
+ /// This number must not be greater than YyLength!
+ /// </param>
public void YyPushBack(int number)
{
if (number > YyLength)
@@ -4072,13 +4064,12 @@ namespace Lucene.Net.Analysis.Standard.Std36
}
- /**
- * Resumes scanning until the next regular expression is matched,
- * the end of input is encountered or an I/O-Error occurs.
- *
- * @return the next token
- * @exception java.io.IOException if any I/O-Error occurs
- */
+ /// <summary>
+ /// Resumes scanning until the next regular expression is matched,
+ /// the end of input is encountered or an I/O-Error occurs.
+ /// </summary>
+ /// <returns>the next token</returns>
+ /// <exception cref="IOException">if any I/O-Error occurs</exception>
public int GetNextToken()
{
int zzInput;
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/ab69b431/src/Lucene.Net.Analysis.Common/Analysis/Standard/Std40/StandardTokenizerImpl40.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/Std40/StandardTokenizerImpl40.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/Std40/StandardTokenizerImpl40.cs
index c0be18d..59bd49d 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Standard/Std40/StandardTokenizerImpl40.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/Std40/StandardTokenizerImpl40.cs
@@ -29,26 +29,26 @@ namespace Lucene.Net.Analysis.Standard.Std40
[Obsolete("This class is only for exact backwards compatibility")]
public sealed class StandardTokenizerImpl40 : IStandardTokenizerInterface
{
- /** This character denotes the end of file */
+ /// <summary>This character denotes the end of file</summary>
public static readonly int YYEOF = -1;
- /** initial size of the lookahead buffer */
+ /// <summary>initial size of the lookahead buffer</summary>
private static readonly int ZZ_BUFFERSIZE = 4096;
- /** lexical states */
+ /// <summary>lexical states</summary>
public const int YYINITIAL = 0;
- /**
- * ZZ_LEXSTATE[l] is the state in the DFA for the lexical state l
- * ZZ_LEXSTATE[l+1] is the state in the DFA for the lexical state l
- * at the beginning of a line
- * l is of the form l = 2*k, k a non negative integer
- */
+ /// <summary>
+ /// ZZ_LEXSTATE[l] is the state in the DFA for the lexical state l
+ /// ZZ_LEXSTATE[l+1] is the state in the DFA for the lexical state l
+ /// at the beginning of a line
+ /// l is of the form l = 2*k, k a non negative integer
+ /// </summary>
private static readonly int[] ZZ_LEXSTATE = { 0, 0 };
- /**
- * Translates characters to character classes
- */
+ /// <summary>
+ /// Translates characters to character classes
+ /// </summary>
private const string ZZ_CMAP_PACKED =
"\x0027\x0000\x0001\x0082\x0004\x0000\x0001\x0081\x0001\x0000\x0001\x0082\x0001\x0000\x000A\x007E\x0001\x0080\x0001\x0081" +
"\x0005\x0000\x001A\x007C\x0004\x0000\x0001\x0083\x0001\x0000\x001A\x007C\x002F\x0000\x0001\x007C\x0002\x0000\x0001\x007D" +
@@ -194,14 +194,14 @@ namespace Lucene.Net.Analysis.Standard.Std40
"\x0038\x007F\x0002\x007D\x001F\x0088\x0003\x0000\x0006\x0088\x0002\x0000\x0006\x0088\x0002\x0000\x0006\x0088\x0002\x0000" +
"\x0003\x0088\x001C\x0000\x0003\x007D\x0004\x0000";
- /**
- * Translates characters to character classes
- */
+ /// <summary>
+ /// Translates characters to character classes
+ /// </summary>
private static readonly char[] ZZ_CMAP = ZzUnpackCMap(ZZ_CMAP_PACKED);
- /**
- * Translates DFA states to action switch labels.
- */
+ /// <summary>
+ /// Translates DFA states to action switch labels.
+ /// </summary>
private static readonly int[] ZZ_ACTION = ZzUnpackAction();
private const string ZZ_ACTION_PACKED_0 =
@@ -232,9 +232,9 @@ namespace Lucene.Net.Analysis.Standard.Std40
}
- /**
- * Translates a state to a row index in the transition table
- */
+ /// <summary>
+ /// Translates a state to a row index in the transition table
+ /// </summary>
private static readonly int[] ZZ_ROWMAP = ZzUnpackRowMap();
private const string ZZ_ROWMAP_PACKED_0 =
@@ -280,9 +280,9 @@ namespace Lucene.Net.Analysis.Standard.Std40
return j;
}
- /**
- * The transition table of the DFA
- */
+ /// <summary>
+ /// The transition table of the DFA
+ /// </summary>
private static readonly int[] ZZ_TRANS = ZzUnpackTrans();
private const string ZZ_TRANS_PACKED_0 =
@@ -738,9 +738,9 @@ namespace Lucene.Net.Analysis.Standard.Std40
"Error: pushback value was too large"
};
- /**
- * ZZ_ATTRIBUTE[aState] contains the attributes of state <code>aState</code>
- */
+ /// <summary>
+ /// ZZ_ATTRIBUTE[aState] contains the attributes of state <c>aState</c>
+ /// </summary>
private static readonly int[] ZZ_ATTRIBUTE = ZzUnpackAttribute();
private const string ZZ_ATTRIBUTE_PACKED_0 =
@@ -769,73 +769,77 @@ namespace Lucene.Net.Analysis.Standard.Std40
return j;
}
- /** the input device */
+ /// <summary>the input device</summary>
private TextReader zzReader;
- /** the current state of the DFA */
+ /// <summary>the current state of the DFA</summary>
private int zzState;
- /** the current lexical state */
+ /// <summary>the current lexical state</summary>
private int zzLexicalState = YYINITIAL;
- /** this buffer contains the current text to be matched and is
- the source of the YyText() string */
+ /// <summary>
+ /// this buffer contains the current text to be matched and is
+ /// the source of the YyText string
+ /// </summary>
private char[] zzBuffer = new char[ZZ_BUFFERSIZE];
- /** the textposition at the last accepting state */
+ /// <summary>the textposition at the last accepting state</summary>
private int zzMarkedPos;
- /** the current text position in the buffer */
+ /// <summary>the current text position in the buffer</summary>
private int zzCurrentPos;
- /** startRead marks the beginning of the YyText() string in the buffer */
+ /// <summary>startRead marks the beginning of the YyText string in the buffer</summary>
private int zzStartRead;
- /** endRead marks the last character in the buffer, that has been read
- from input */
+ /// <summary>
+ /// endRead marks the last character in the buffer, that has been read
+ /// from input
+ /// </summary>
private int zzEndRead;
- /** number of newlines encountered up to the start of the matched text */
+ /// <summary>number of newlines encountered up to the start of the matched text</summary>
private int yyline;
- /** the number of characters up to the start of the matched text */
+ /// <summary>the number of characters up to the start of the matched text</summary>
private int yyChar;
#pragma warning disable 169, 414
- /**
- * the number of characters from the last newline up to the start of the
- * matched text
- */
+ /// <summary>
+ /// the number of characters from the last newline up to the start of the
+ /// matched text
+ /// </summary>
private int yycolumn;
- /**
- * zzAtBOL == true <=> the scanner is currently at the beginning of a line
- */
+ /// <summary>
+ /// zzAtBOL == true <=> the scanner is currently at the beginning of a line
+ /// </summary>
private bool zzAtBOL = true;
- /** zzAtEOF == true <=> the scanner is at the EOF */
+ /// <summary>zzAtEOF == true <=> the scanner is at the EOF</summary>
private bool zzAtEOF;
- /** denotes if the user-EOF-code has already been executed */
+ /// <summary>denotes if the user-EOF-code has already been executed</summary>
private bool zzEOFDone;
#pragma warning restore 169, 414
/* user code: */
- /** Alphanumeric sequences */
+ /// <summary>Alphanumeric sequences</summary>
public static readonly int WORD_TYPE = StandardTokenizer.ALPHANUM;
- /** Numbers */
+ /// <summary>Numbers</summary>
public static readonly int NUMERIC_TYPE = StandardTokenizer.NUM;
- /**
- * Chars in class \p{Line_Break = Complex_Context} are from South East Asian
- * scripts (Thai, Lao, Myanmar, Khmer, etc.). Sequences of these are kept
- * together as as a single token rather than broken up, because the logic
- * required to break them at word boundaries is too complex for UAX#29.
- * <p>
- * See Unicode Line Breaking Algorithm: http://www.unicode.org/reports/tr14/#SA
- */
+ /// <summary>
+ /// Chars in class \p{Line_Break = Complex_Context} are from South East Asian
+ /// scripts (Thai, Lao, Myanmar, Khmer, etc.). Sequences of these are kept
+ /// together as as a single token rather than broken up, because the logic
+ /// required to break them at word boundaries is too complex for UAX#29.
+ /// <para/>
+ /// See Unicode Line Breaking Algorithm: http://www.unicode.org/reports/tr14/#SA
+ /// </summary>
public static readonly int SOUTH_EAST_ASIAN_TYPE = StandardTokenizer.SOUTHEAST_ASIAN;
public static readonly int IDEOGRAPHIC_TYPE = StandardTokenizer.IDEOGRAPHIC;
@@ -851,32 +855,30 @@ namespace Lucene.Net.Analysis.Standard.Std40
get { return yyChar; }
}
- /**
- * Fills CharTermAttribute with the current token text.
- */
+ /// <summary>
+ /// Fills ICharTermAttribute with the current token text.
+ /// </summary>
public void GetText(ICharTermAttribute t)
{
t.CopyBuffer(zzBuffer, zzStartRead, zzMarkedPos - zzStartRead);
}
- /**
- * Creates a new scanner
- *
- * @param in the TextReader to read input from.
- */
+ /// <summary>
+ /// Creates a new scanner
+ /// </summary>
+ /// <param name="in">the TextReader to read input from.</param>
public StandardTokenizerImpl40(TextReader @in)
{
this.zzReader = @in;
}
- /**
- * Unpacks the compressed character translation table.
- *
- * @param packed the packed character translation table
- * @return the unpacked character translation table
- */
+ /// <summary>
+ /// Unpacks the compressed character translation table.
+ /// </summary>
+ /// <param name="packed">the packed character translation table</param>
+ /// <returns>the unpacked character translation table</returns>
private static char[] ZzUnpackCMap(string packed)
{
char[] map = new char[0x10000];
@@ -892,13 +894,11 @@ namespace Lucene.Net.Analysis.Standard.Std40
}
- /**
- * Refills the input buffer.
- *
- * @return <code>false</code>, iff there was new input.
- *
- * @exception java.io.IOException if any I/O-Error occurs
- */
+ /// <summary>
+ /// Refills the input buffer.
+ /// </summary>
+ /// <returns><c>false</c>, iff there was new input.</returns>
+ /// <exception cref="IOException">if any I/O-Error occurs</exception>
private bool ZzRefill()
{
@@ -954,9 +954,9 @@ namespace Lucene.Net.Analysis.Standard.Std40
}
- /**
- * Closes the input stream.
- */
+ /// <summary>
+ /// Disposes the input stream.
+ /// </summary>
public void YyClose()
{
zzAtEOF = true; /* indicate end of file */
@@ -967,18 +967,17 @@ namespace Lucene.Net.Analysis.Standard.Std40
}
- /**
- * Resets the scanner to read from a new input stream.
- * Does not close the old reader.
- *
- * All internal variables are reset, the old input stream
- * <b>cannot</b> be reused (internal buffer is discarded and lost).
- * Lexical state is set to <tt>ZZ_INITIAL</tt>.
- *
- * Internal scan buffer is resized down to its initial length, if it has grown.
- *
- * @param reader the new input stream
- */
+ /// <summary>
+ /// Resets the scanner to read from a new input stream.
+ /// Does not close the old reader.
+ /// <para/>
+ /// All internal variables are reset, the old input stream
+ /// <b>cannot</b> be reused (internal buffer is discarded and lost).
+ /// Lexical state is set to <see cref="YYINITIAL"/>.
+ /// <para/>
+ /// Internal scan buffer is resized down to its initial length, if it has grown.
+ /// </summary>
+ /// <param name="reader">the new input stream </param>
public void YyReset(TextReader reader)
{
zzReader = reader;
@@ -994,75 +993,73 @@ namespace Lucene.Net.Analysis.Standard.Std40
}
- /**
- * Returns the current lexical state.
- */
+ /// <summary>
+ /// Returns the current lexical state.
+ /// </summary>
public int YyState
{
get { return zzLexicalState; }
}
- /**
- * Enters a new lexical state
- *
- * @param newState the new lexical state
- */
+ /// <summary>
+ /// Enters a new lexical state
+ /// </summary>
+ /// <param name="newState">the new lexical state</param>
public void YyBegin(int newState)
{
zzLexicalState = newState;
}
- /**
- * Returns the text matched by the current regular expression.
- */
+ /// <summary>
+ /// Returns the text matched by the current regular expression.
+ /// </summary>
public string YyText
{
get { return new string(zzBuffer, zzStartRead, zzMarkedPos - zzStartRead); }
}
- /**
- * Returns the character at position <tt>pos</tt> from the
- * matched text.
- *
- * It is equivalent to YyText().charAt(pos), but faster
- *
- * @param pos the position of the character to fetch.
- * A value from 0 to YyLength()-1.
- *
- * @return the character at position pos
- */
+ /// <summary>
+ /// Returns the character at position <paramref name="pos"/> from the
+ /// matched text.
+ /// <para/>
+ /// It is equivalent to YyText[pos], but faster
+ /// </summary>
+ /// <param name="pos">
+ /// the position of the character to fetch.
+ /// A value from 0 to YyLength-1.
+ /// </param>
+ /// <returns>the character at position pos</returns>
public char YyCharAt(int pos)
{
return zzBuffer[zzStartRead + pos];
}
- /**
- * Returns the length of the matched text region.
- */
+ /// <summary>
+ /// Returns the length of the matched text region.
+ /// </summary>
public int YyLength
{
get { return zzMarkedPos - zzStartRead; }
}
- /**
- * Reports an error that occured while scanning.
- *
- * In a wellformed scanner (no or only correct usage of
- * YyPushBack(int) and a match-all fallback rule) this method
- * will only be called with things that "Can't Possibly Happen".
- * If this method is called, something is seriously wrong
- * (e.g. a JFlex bug producing a faulty scanner etc.).
- *
- * Usual syntax/scanner level error handling should be done
- * in error fallback rules.
- *
- * @param errorCode the code of the errormessage to display
- */
+ /// <summary>
+ /// Reports an error that occured while scanning.
+ /// <para/>
+ /// In a wellformed scanner (no or only correct usage of
+ /// YyPushBack(int) and a match-all fallback rule) this method
+ /// will only be called with things that "Can't Possibly Happen".
+ /// If this method is called, something is seriously wrong
+ /// (e.g. a JFlex bug producing a faulty scanner etc.).
+ /// <para/>
+ /// Usual syntax/scanner level error handling should be done
+ /// in error fallback rules.
+ /// </summary>
+ /// <param name="errorCode">the code of the errormessage to display</param>
private void ZzScanError(int errorCode)
{
string message;
@@ -1079,14 +1076,15 @@ namespace Lucene.Net.Analysis.Standard.Std40
}
- /**
- * Pushes the specified amount of characters back into the input stream.
- *
- * They will be read again by then next call of the scanning method
- *
- * @param number the number of characters to be read again.
- * This number must not be greater than YyLength()!
- */
+ /// <summary>
+ /// Pushes the specified amount of characters back into the input stream.
+ /// <para/>
+ /// They will be read again by then next call of the scanning method
+ /// </summary>
+ /// <param name="number">
+ /// the number of characters to be read again.
+ /// This number must not be greater than YyLength!
+ /// </param>
public void YyPushBack(int number)
{
if (number > YyLength)
@@ -1096,13 +1094,12 @@ namespace Lucene.Net.Analysis.Standard.Std40
}
- /**
- * Resumes scanning until the next regular expression is matched,
- * the end of input is encountered or an I/O-Error occurs.
- *
- * @return the next token
- * @exception java.io.IOException if any I/O-Error occurs
- */
+ /// <summary>
+ /// Resumes scanning until the next regular expression is matched,
+ /// the end of input is encountered or an I/O-Error occurs.
+ /// </summary>
+ /// <returns>the next token</returns>
+ /// <exception cref="IOException">if any I/O-Error occurs</exception>
public int GetNextToken()
{
int zzInput;
[26/39] lucenenet git commit: SWEEP: in all LinkedLists that are
being used as a queue,
ensure we remove the same instance from the queue that we use further on in
the process.
Posted by ni...@apache.org.
SWEEP: in all LinkedLists that are being used as a queue, ensure we remove the same instance from the queue that we use further on in the process.
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/e382d8a9
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/e382d8a9
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/e382d8a9
Branch: refs/heads/api-work
Commit: e382d8a930610b8e08fc1ad8798b9663bd4294f9
Parents: 3031be6
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Sun Feb 5 01:14:17 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Sun Feb 5 01:14:17 2017 +0700
----------------------------------------------------------------------
.../Analysis/Compound/CompoundWordTokenFilterBase.cs | 3 ++-
.../Analysis/Shingle/ShingleFilter.cs | 2 +-
.../Analysis/Synonym/SlowSynonymFilter.cs | 7 +++----
src/Lucene.Net.Core/Index/DocumentsWriterFlushQueue.cs | 2 +-
src/Lucene.Net.Core/Index/IndexWriter.cs | 2 +-
src/Lucene.Net.Core/Search/NumericRangeQuery.cs | 4 ++--
.../Support/LimitedConcurrencyLevelTaskScheduler.cs | 2 +-
src/Lucene.Net.Core/Util/Automaton/Automaton.cs | 6 +++---
src/Lucene.Net.Core/Util/Automaton/BasicOperations.cs | 4 ++--
src/Lucene.Net.Core/Util/Automaton/MinimizationOperations.cs | 2 +-
.../Util/automaton/AutomatonTestUtil.cs | 4 ++--
11 files changed, 19 insertions(+), 19 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/e382d8a9/src/Lucene.Net.Analysis.Common/Analysis/Compound/CompoundWordTokenFilterBase.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/CompoundWordTokenFilterBase.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/CompoundWordTokenFilterBase.cs
index 0d42753..f479951 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Compound/CompoundWordTokenFilterBase.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/CompoundWordTokenFilterBase.cs
@@ -110,7 +110,8 @@ namespace Lucene.Net.Analysis.Compound
if (m_tokens.Count > 0)
{
Debug.Assert(current != null);
- CompoundToken token = m_tokens.First.Value; m_tokens.RemoveFirst();
+ CompoundToken token = m_tokens.First.Value;
+ m_tokens.Remove(token);
RestoreState(current); // keep all other attributes untouched
m_termAtt.SetEmpty().Append(token.Text);
m_offsetAtt.SetOffset(token.StartOffset, token.EndOffset);
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/e382d8a9/src/Lucene.Net.Analysis.Common/Analysis/Shingle/ShingleFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Shingle/ShingleFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Shingle/ShingleFilter.cs
index 1153d9e..609a50b 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Shingle/ShingleFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Shingle/ShingleFilter.cs
@@ -509,7 +509,7 @@ namespace Lucene.Net.Analysis.Shingle
if (inputWindow.Count > 0)
{
firstToken = inputWindow.First.Value;
- inputWindow.RemoveFirst(); // LUCENENET TODO: Safer if we remove the .First.Value from the previous line (do this across the solution) - extension method?
+ inputWindow.Remove(firstToken);
}
while (inputWindow.Count < maxShingleSize)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/e382d8a9/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SlowSynonymFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SlowSynonymFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SlowSynonymFilter.cs
index ebdf488..c488c83 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SlowSynonymFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SlowSynonymFilter.cs
@@ -167,7 +167,7 @@ namespace Lucene.Net.Analysis.Synonym
else
{
origTok = matched.First.Value;
- matched.RemoveFirst();
+ matched.Remove(origTok);
}
if (origTok != null)
{
@@ -195,7 +195,7 @@ namespace Lucene.Net.Analysis.Synonym
else
{
origTok = matched.First.Value;
- matched.RemoveFirst();
+ matched.Remove(origTok);
}
if (origTok != null)
{
@@ -230,9 +230,8 @@ namespace Lucene.Net.Analysis.Synonym
if (buffer != null && buffer.Count > 0)
{
var first = buffer.First.Value;
- buffer.RemoveFirst();
+ buffer.Remove(first);
return first;
-
}
else
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/e382d8a9/src/Lucene.Net.Core/Index/DocumentsWriterFlushQueue.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Index/DocumentsWriterFlushQueue.cs b/src/Lucene.Net.Core/Index/DocumentsWriterFlushQueue.cs
index c414c6f..f488afb 100644
--- a/src/Lucene.Net.Core/Index/DocumentsWriterFlushQueue.cs
+++ b/src/Lucene.Net.Core/Index/DocumentsWriterFlushQueue.cs
@@ -156,7 +156,7 @@ namespace Lucene.Net.Index
{
// finally remove the published ticket from the queue
FlushTicket poll = queue.First.Value;
- queue.RemoveFirst();
+ queue.Remove(poll);
ticketCount.DecrementAndGet();
Debug.Assert(poll == head);
}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/e382d8a9/src/Lucene.Net.Core/Index/IndexWriter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Index/IndexWriter.cs b/src/Lucene.Net.Core/Index/IndexWriter.cs
index 39983c1..d929cf1 100644
--- a/src/Lucene.Net.Core/Index/IndexWriter.cs
+++ b/src/Lucene.Net.Core/Index/IndexWriter.cs
@@ -2463,7 +2463,7 @@ namespace Lucene.Net.Index
{
// Advance the merge from pending to running
MergePolicy.OneMerge merge = pendingMerges.First.Value;
- pendingMerges.RemoveFirst();
+ pendingMerges.Remove(merge);
runningMerges.Add(merge);
return merge;
}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/e382d8a9/src/Lucene.Net.Core/Search/NumericRangeQuery.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Search/NumericRangeQuery.cs b/src/Lucene.Net.Core/Search/NumericRangeQuery.cs
index 63928c9..7c17a5f 100644
--- a/src/Lucene.Net.Core/Search/NumericRangeQuery.cs
+++ b/src/Lucene.Net.Core/Search/NumericRangeQuery.cs
@@ -457,11 +457,11 @@ namespace Lucene.Net.Search
Debug.Assert(rangeBounds.Count % 2 == 0);
currentLowerBound = rangeBounds.First.Value;
- rangeBounds.RemoveFirst();
+ rangeBounds.Remove(currentLowerBound);
Debug.Assert(currentUpperBound == null || termComp.Compare(currentUpperBound, currentLowerBound) <= 0, "The current upper bound must be <= the new lower bound");
currentUpperBound = rangeBounds.First.Value;
- rangeBounds.RemoveFirst();
+ rangeBounds.Remove(currentUpperBound);
}
protected override sealed BytesRef NextSeekTerm(BytesRef term)
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/e382d8a9/src/Lucene.Net.Core/Support/LimitedConcurrencyLevelTaskScheduler.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Support/LimitedConcurrencyLevelTaskScheduler.cs b/src/Lucene.Net.Core/Support/LimitedConcurrencyLevelTaskScheduler.cs
index 1531584..9190722 100644
--- a/src/Lucene.Net.Core/Support/LimitedConcurrencyLevelTaskScheduler.cs
+++ b/src/Lucene.Net.Core/Support/LimitedConcurrencyLevelTaskScheduler.cs
@@ -80,7 +80,7 @@ namespace Lucene.Net.Support
// Get the next item from the queue
item = _tasks.First.Value;
- _tasks.RemoveFirst();
+ _tasks.Remove(item);
}
// Execute the task we pulled out of the queue
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/e382d8a9/src/Lucene.Net.Core/Util/Automaton/Automaton.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Util/Automaton/Automaton.cs b/src/Lucene.Net.Core/Util/Automaton/Automaton.cs
index 488a641..5238672 100644
--- a/src/Lucene.Net.Core/Util/Automaton/Automaton.cs
+++ b/src/Lucene.Net.Core/Util/Automaton/Automaton.cs
@@ -291,7 +291,7 @@ namespace Lucene.Net.Util.Automaton
while (worklist.Count > 0)
{
State s = worklist.First.Value;
- worklist.RemoveFirst();
+ worklist.Remove(s);
for (int i = 0; i < s.numTransitions; i++)
{
Transition t = s.TransitionsArray[i];
@@ -364,7 +364,7 @@ namespace Lucene.Net.Util.Automaton
while (worklist.Count > 0)
{
State s = worklist.First.Value;
- worklist.RemoveFirst();
+ worklist.Remove(s);
if (s.accept)
{
accepts.Add(s);
@@ -502,7 +502,7 @@ namespace Lucene.Net.Util.Automaton
while (worklist.Count > 0)
{
State s = worklist.First.Value;
- worklist.RemoveFirst();
+ worklist.Remove(s);
foreach (State p in map[s.number])
{
if (!live.Contains(p))
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/e382d8a9/src/Lucene.Net.Core/Util/Automaton/BasicOperations.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Util/Automaton/BasicOperations.cs b/src/Lucene.Net.Core/Util/Automaton/BasicOperations.cs
index 962e1b1..5b560b3 100644
--- a/src/Lucene.Net.Core/Util/Automaton/BasicOperations.cs
+++ b/src/Lucene.Net.Core/Util/Automaton/BasicOperations.cs
@@ -407,7 +407,7 @@ namespace Lucene.Net.Util.Automaton
while (worklist.Count > 0)
{
p = worklist.First.Value;
- worklist.RemoveFirst();
+ worklist.Remove(p);
p.s.accept = p.S1.accept && p.S2.accept;
Transition[] t1 = transitions1[p.S1.number];
Transition[] t2 = transitions2[p.S2.number];
@@ -958,7 +958,7 @@ namespace Lucene.Net.Util.Automaton
while (worklist.Count > 0)
{
StatePair p = worklist.First.Value;
- worklist.RemoveFirst();
+ worklist.Remove(p);
workset.Remove(p);
HashSet<State> to;
HashSet<State> from;
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/e382d8a9/src/Lucene.Net.Core/Util/Automaton/MinimizationOperations.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Util/Automaton/MinimizationOperations.cs b/src/Lucene.Net.Core/Util/Automaton/MinimizationOperations.cs
index e583215..3a1d413 100644
--- a/src/Lucene.Net.Core/Util/Automaton/MinimizationOperations.cs
+++ b/src/Lucene.Net.Core/Util/Automaton/MinimizationOperations.cs
@@ -142,7 +142,7 @@ namespace Lucene.Net.Util.Automaton
while (pending.Count > 0)
{
IntPair ip = pending.First.Value;
- pending.RemoveFirst();
+ pending.Remove(ip);
int p = ip.N1;
int x = ip.N2;
pending2.SafeSet(x * statesLen + p, false);
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/e382d8a9/src/Lucene.Net.TestFramework/Util/automaton/AutomatonTestUtil.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.TestFramework/Util/automaton/AutomatonTestUtil.cs b/src/Lucene.Net.TestFramework/Util/automaton/AutomatonTestUtil.cs
index 954a213..c060fd1 100644
--- a/src/Lucene.Net.TestFramework/Util/automaton/AutomatonTestUtil.cs
+++ b/src/Lucene.Net.TestFramework/Util/automaton/AutomatonTestUtil.cs
@@ -264,7 +264,7 @@ namespace Lucene.Net.Util.Automaton
while (q.Count > 0)
{
State s = q.First.Value;
- q.RemoveFirst();
+ q.Remove(s);
IList<ArrivingTransition> arriving;
allArriving.TryGetValue(s, out arriving);
if (arriving != null)
@@ -478,7 +478,7 @@ namespace Lucene.Net.Util.Automaton
while (worklist.Count > 0)
{
ISet<State> s = worklist.First.Value;
- worklist.RemoveFirst();
+ worklist.Remove(s);
State r = newstate[s];
foreach (State q in s)
{
[21/39] lucenenet git commit: Lucene.Net.Analysis.Common/Tartarus
refactor: member accessibility and documentation comments
Posted by ni...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4abfb341/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/FrenchStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/FrenchStemmer.cs b/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/FrenchStemmer.cs
index af66028..9d45b92 100644
--- a/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/FrenchStemmer.cs
+++ b/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/FrenchStemmer.cs
@@ -192,7 +192,7 @@
I_p2 = other.I_p2;
I_p1 = other.I_p1;
I_pV = other.I_pV;
- base.copy_from(other);
+ base.CopyFrom(other);
}
private bool r_prelude()
@@ -227,7 +227,7 @@
do
{
// (, line 40
- if (!(in_grouping(g_v, 97, 251)))
+ if (!(InGrouping(g_v, 97, 251)))
{
goto lab5;
}
@@ -243,18 +243,18 @@
{
// (, line 40
// literal, line 40
- if (!(eq_s(1, "u")))
+ if (!(Eq_S(1, "u")))
{
goto lab7;
}
// ], line 40
m_ket = m_cursor;
- if (!(in_grouping(g_v, 97, 251)))
+ if (!(InGrouping(g_v, 97, 251)))
{
goto lab7;
}
// <-, line 40
- slice_from("U");
+ SliceFrom("U");
goto lab6;
} while (false);
lab7:
@@ -264,32 +264,32 @@
{
// (, line 41
// literal, line 41
- if (!(eq_s(1, "i")))
+ if (!(Eq_S(1, "i")))
{
goto lab8;
}
// ], line 41
m_ket = m_cursor;
- if (!(in_grouping(g_v, 97, 251)))
+ if (!(InGrouping(g_v, 97, 251)))
{
goto lab8;
}
// <-, line 41
- slice_from("I");
+ SliceFrom("I");
goto lab6;
} while (false);
lab8:
m_cursor = v_4;
// (, line 42
// literal, line 42
- if (!(eq_s(1, "y")))
+ if (!(Eq_S(1, "y")))
{
goto lab5;
}
// ], line 42
m_ket = m_cursor;
// <-, line 42
- slice_from("Y");
+ SliceFrom("Y");
} while (false);
lab6:
goto lab4;
@@ -303,39 +303,39 @@
// [, line 45
m_bra = m_cursor;
// literal, line 45
- if (!(eq_s(1, "y")))
+ if (!(Eq_S(1, "y")))
{
goto lab9;
}
// ], line 45
m_ket = m_cursor;
- if (!(in_grouping(g_v, 97, 251)))
+ if (!(InGrouping(g_v, 97, 251)))
{
goto lab9;
}
// <-, line 45
- slice_from("Y");
+ SliceFrom("Y");
goto lab4;
} while (false);
lab9:
m_cursor = v_3;
// (, line 47
// literal, line 47
- if (!(eq_s(1, "q")))
+ if (!(Eq_S(1, "q")))
{
goto lab3;
}
// [, line 47
m_bra = m_cursor;
// literal, line 47
- if (!(eq_s(1, "u")))
+ if (!(Eq_S(1, "u")))
{
goto lab3;
}
// ], line 47
m_ket = m_cursor;
// <-, line 47
- slice_from("U");
+ SliceFrom("U");
} while (false);
lab4:
m_cursor = v_2;
@@ -393,11 +393,11 @@
do
{
// (, line 57
- if (!(in_grouping(g_v, 97, 251)))
+ if (!(InGrouping(g_v, 97, 251)))
{
goto lab2;
}
- if (!(in_grouping(g_v, 97, 251)))
+ if (!(InGrouping(g_v, 97, 251)))
{
goto lab2;
}
@@ -415,7 +415,7 @@
do
{
// among, line 59
- if (find_among(a_0, 3) == 0)
+ if (FindAmong(a_0, 3) == 0)
{
goto lab3;
}
@@ -437,7 +437,7 @@
do
{
- if (!(in_grouping(g_v, 97, 251)))
+ if (!(InGrouping(g_v, 97, 251)))
{
goto lab5;
}
@@ -471,7 +471,7 @@
do
{
- if (!(in_grouping(g_v, 97, 251)))
+ if (!(InGrouping(g_v, 97, 251)))
{
goto lab8;
}
@@ -492,7 +492,7 @@
do
{
- if (!(out_grouping(g_v, 97, 251)))
+ if (!(OutGrouping(g_v, 97, 251)))
{
goto lab10;
}
@@ -515,7 +515,7 @@
do
{
- if (!(in_grouping(g_v, 97, 251)))
+ if (!(InGrouping(g_v, 97, 251)))
{
goto lab12;
}
@@ -536,7 +536,7 @@
do
{
- if (!(out_grouping(g_v, 97, 251)))
+ if (!(OutGrouping(g_v, 97, 251)))
{
goto lab14;
}
@@ -574,7 +574,7 @@
// [, line 77
m_bra = m_cursor;
// substring, line 77
- among_var = find_among(a_1, 4);
+ among_var = FindAmong(a_1, 4);
if (among_var == 0)
{
goto lab1;
@@ -588,17 +588,17 @@
case 1:
// (, line 78
// <-, line 78
- slice_from("i");
+ SliceFrom("i");
break;
case 2:
// (, line 79
// <-, line 79
- slice_from("u");
+ SliceFrom("u");
break;
case 3:
// (, line 80
// <-, line 80
- slice_from("y");
+ SliceFrom("y");
break;
case 4:
// (, line 81
@@ -674,7 +674,7 @@
// [, line 92
m_ket = m_cursor;
// substring, line 92
- among_var = find_among_b(a_4, 43);
+ among_var = FindAmongB(a_4, 43);
if (among_var == 0)
{
return false;
@@ -693,7 +693,7 @@
return false;
}
// delete, line 96
- slice_del();
+ SliceDel();
break;
case 2:
// (, line 99
@@ -703,7 +703,7 @@
return false;
}
// delete, line 99
- slice_del();
+ SliceDel();
// try, line 100
v_1 = m_limit - m_cursor;
@@ -713,7 +713,7 @@
// [, line 100
m_ket = m_cursor;
// literal, line 100
- if (!(eq_s_b(2, "ic")))
+ if (!(Eq_S_B(2, "ic")))
{
m_cursor = m_limit - v_1;
goto lab0;
@@ -735,13 +735,13 @@
goto lab2;
}
// delete, line 100
- slice_del();
+ SliceDel();
goto lab1;
} while (false);
lab2:
m_cursor = m_limit - v_2;
// <-, line 100
- slice_from("iqU");
+ SliceFrom("iqU");
} while (false);
lab1:;
} while (false);
@@ -755,7 +755,7 @@
return false;
}
// <-, line 104
- slice_from("log");
+ SliceFrom("log");
break;
case 4:
// (, line 107
@@ -765,7 +765,7 @@
return false;
}
// <-, line 107
- slice_from("u");
+ SliceFrom("u");
break;
case 5:
// (, line 110
@@ -775,7 +775,7 @@
return false;
}
// <-, line 110
- slice_from("ent");
+ SliceFrom("ent");
break;
case 6:
// (, line 113
@@ -785,7 +785,7 @@
return false;
}
// delete, line 114
- slice_del();
+ SliceDel();
// try, line 115
v_3 = m_limit - m_cursor;
@@ -795,7 +795,7 @@
// [, line 116
m_ket = m_cursor;
// substring, line 116
- among_var = find_among_b(a_2, 6);
+ among_var = FindAmongB(a_2, 6);
if (among_var == 0)
{
m_cursor = m_limit - v_3;
@@ -817,11 +817,11 @@
goto lab3;
}
// delete, line 117
- slice_del();
+ SliceDel();
// [, line 117
m_ket = m_cursor;
// literal, line 117
- if (!(eq_s_b(2, "at")))
+ if (!(Eq_S_B(2, "at")))
{
m_cursor = m_limit - v_3;
goto lab3;
@@ -835,7 +835,7 @@
goto lab3;
}
// delete, line 117
- slice_del();
+ SliceDel();
break;
case 2:
// (, line 118
@@ -854,7 +854,7 @@
goto lab5;
}
// delete, line 118
- slice_del();
+ SliceDel();
goto lab4;
} while (false);
lab5:
@@ -867,7 +867,7 @@
goto lab3;
}
// <-, line 118
- slice_from("eux");
+ SliceFrom("eux");
} while (false);
lab4:
break;
@@ -880,7 +880,7 @@
goto lab3;
}
// delete, line 120
- slice_del();
+ SliceDel();
break;
case 4:
// (, line 122
@@ -891,7 +891,7 @@
goto lab3;
}
// <-, line 122
- slice_from("i");
+ SliceFrom("i");
break;
}
} while (false);
@@ -905,7 +905,7 @@
return false;
}
// delete, line 129
- slice_del();
+ SliceDel();
// try, line 130
v_5 = m_limit - m_cursor;
@@ -915,7 +915,7 @@
// [, line 131
m_ket = m_cursor;
// substring, line 131
- among_var = find_among_b(a_3, 3);
+ among_var = FindAmongB(a_3, 3);
if (among_var == 0)
{
m_cursor = m_limit - v_5;
@@ -945,13 +945,13 @@
goto lab8;
}
// delete, line 132
- slice_del();
+ SliceDel();
goto lab7;
} while (false);
lab8:
m_cursor = m_limit - v_6;
// <-, line 132
- slice_from("abl");
+ SliceFrom("abl");
} while (false);
lab7:
break;
@@ -972,13 +972,13 @@
goto lab10;
}
// delete, line 133
- slice_del();
+ SliceDel();
goto lab9;
} while (false);
lab10:
m_cursor = m_limit - v_7;
// <-, line 133
- slice_from("iqU");
+ SliceFrom("iqU");
} while (false);
lab9:
break;
@@ -991,7 +991,7 @@
goto lab6;
}
// delete, line 134
- slice_del();
+ SliceDel();
break;
}
} while (false);
@@ -1005,7 +1005,7 @@
return false;
}
// delete, line 141
- slice_del();
+ SliceDel();
// try, line 142
v_8 = m_limit - m_cursor;
@@ -1015,7 +1015,7 @@
// [, line 142
m_ket = m_cursor;
// literal, line 142
- if (!(eq_s_b(2, "at")))
+ if (!(Eq_S_B(2, "at")))
{
m_cursor = m_limit - v_8;
goto lab11;
@@ -1029,11 +1029,11 @@
goto lab11;
}
// delete, line 142
- slice_del();
+ SliceDel();
// [, line 142
m_ket = m_cursor;
// literal, line 142
- if (!(eq_s_b(2, "ic")))
+ if (!(Eq_S_B(2, "ic")))
{
m_cursor = m_limit - v_8;
goto lab11;
@@ -1055,13 +1055,13 @@
goto lab13;
}
// delete, line 142
- slice_del();
+ SliceDel();
goto lab12;
} while (false);
lab13:
m_cursor = m_limit - v_9;
// <-, line 142
- slice_from("iqU");
+ SliceFrom("iqU");
} while (false);
lab12:;
} while (false);
@@ -1070,7 +1070,7 @@
case 9:
// (, line 144
// <-, line 144
- slice_from("eau");
+ SliceFrom("eau");
break;
case 10:
// (, line 145
@@ -1080,7 +1080,7 @@
return false;
}
// <-, line 145
- slice_from("al");
+ SliceFrom("al");
break;
case 11:
// (, line 147
@@ -1099,7 +1099,7 @@
goto lab15;
}
// delete, line 147
- slice_del();
+ SliceDel();
goto lab14;
} while (false);
lab15:
@@ -1111,7 +1111,7 @@
return false;
}
// <-, line 147
- slice_from("eux");
+ SliceFrom("eux");
} while (false);
lab14:
break;
@@ -1122,12 +1122,12 @@
{
return false;
}
- if (!(out_grouping_b(g_v, 97, 251)))
+ if (!(OutGroupingB(g_v, 97, 251)))
{
return false;
}
// delete, line 150
- slice_del();
+ SliceDel();
break;
case 13:
// (, line 155
@@ -1139,7 +1139,7 @@
// fail, line 155
// (, line 155
// <-, line 155
- slice_from("ant");
+ SliceFrom("ant");
return false;
case 14:
// (, line 156
@@ -1151,14 +1151,14 @@
// fail, line 156
// (, line 156
// <-, line 156
- slice_from("ent");
+ SliceFrom("ent");
return false;
case 15:
// (, line 158
// test, line 158
v_11 = m_limit - m_cursor;
// (, line 158
- if (!(in_grouping_b(g_v, 97, 251)))
+ if (!(InGroupingB(g_v, 97, 251)))
{
return false;
}
@@ -1171,7 +1171,7 @@
// fail, line 158
// (, line 158
// delete, line 158
- slice_del();
+ SliceDel();
return false;
}
return true;
@@ -1197,7 +1197,7 @@
// [, line 164
m_ket = m_cursor;
// substring, line 164
- among_var = find_among_b(a_5, 35);
+ among_var = FindAmongB(a_5, 35);
if (among_var == 0)
{
m_limit_backward = v_2;
@@ -1212,13 +1212,13 @@
return false;
case 1:
// (, line 170
- if (!(out_grouping_b(g_v, 97, 251)))
+ if (!(OutGroupingB(g_v, 97, 251)))
{
m_limit_backward = v_2;
return false;
}
// delete, line 170
- slice_del();
+ SliceDel();
break;
}
m_limit_backward = v_2;
@@ -1246,7 +1246,7 @@
// [, line 175
m_ket = m_cursor;
// substring, line 175
- among_var = find_among_b(a_6, 38);
+ among_var = FindAmongB(a_6, 38);
if (among_var == 0)
{
m_limit_backward = v_2;
@@ -1268,17 +1268,17 @@
return false;
}
// delete, line 177
- slice_del();
+ SliceDel();
break;
case 2:
// (, line 185
// delete, line 185
- slice_del();
+ SliceDel();
break;
case 3:
// (, line 190
// delete, line 190
- slice_del();
+ SliceDel();
// try, line 191
v_3 = m_limit - m_cursor;
@@ -1288,7 +1288,7 @@
// [, line 191
m_ket = m_cursor;
// literal, line 191
- if (!(eq_s_b(1, "e")))
+ if (!(Eq_S_B(1, "e")))
{
m_cursor = m_limit - v_3;
goto lab0;
@@ -1296,7 +1296,7 @@
// ], line 191
m_bra = m_cursor;
// delete, line 191
- slice_del();
+ SliceDel();
} while (false);
lab0:
break;
@@ -1323,7 +1323,7 @@
// [, line 199
m_ket = m_cursor;
// literal, line 199
- if (!(eq_s_b(1, "s")))
+ if (!(Eq_S_B(1, "s")))
{
m_cursor = m_limit - v_1;
goto lab0;
@@ -1332,14 +1332,14 @@
m_bra = m_cursor;
// test, line 199
v_2 = m_limit - m_cursor;
- if (!(out_grouping_b(g_keep_with_s, 97, 232)))
+ if (!(OutGroupingB(g_keep_with_s, 97, 232)))
{
m_cursor = m_limit - v_1;
goto lab0;
}
m_cursor = m_limit - v_2;
// delete, line 199
- slice_del();
+ SliceDel();
} while (false);
lab0:
// setlimit, line 200
@@ -1357,7 +1357,7 @@
// [, line 201
m_ket = m_cursor;
// substring, line 201
- among_var = find_among_b(a_7, 7);
+ among_var = FindAmongB(a_7, 7);
if (among_var == 0)
{
m_limit_backward = v_4;
@@ -1387,7 +1387,7 @@
do
{
// literal, line 202
- if (!(eq_s_b(1, "s")))
+ if (!(Eq_S_B(1, "s")))
{
goto lab2;
}
@@ -1396,7 +1396,7 @@
lab2:
m_cursor = m_limit - v_5;
// literal, line 202
- if (!(eq_s_b(1, "t")))
+ if (!(Eq_S_B(1, "t")))
{
m_limit_backward = v_4;
return false;
@@ -1404,28 +1404,28 @@
} while (false);
lab1:
// delete, line 202
- slice_del();
+ SliceDel();
break;
case 2:
// (, line 204
// <-, line 204
- slice_from("i");
+ SliceFrom("i");
break;
case 3:
// (, line 205
// delete, line 205
- slice_del();
+ SliceDel();
break;
case 4:
// (, line 206
// literal, line 206
- if (!(eq_s_b(2, "gu")))
+ if (!(Eq_S_B(2, "gu")))
{
m_limit_backward = v_4;
return false;
}
// delete, line 206
- slice_del();
+ SliceDel();
break;
}
m_limit_backward = v_4;
@@ -1439,7 +1439,7 @@
// test, line 212
v_1 = m_limit - m_cursor;
// among, line 212
- if (find_among_b(a_8, 5) == 0)
+ if (FindAmongB(a_8, 5) == 0)
{
return false;
}
@@ -1455,7 +1455,7 @@
// ], line 212
m_bra = m_cursor;
// delete, line 212
- slice_del();
+ SliceDel();
return true;
}
@@ -1473,7 +1473,7 @@
do
{
- if (!(out_grouping_b(g_v, 97, 251)))
+ if (!(OutGroupingB(g_v, 97, 251)))
{
goto lab1;
}
@@ -1509,7 +1509,7 @@
do
{
// literal, line 217
- if (!(eq_s_b(1, "\u00E9")))
+ if (!(Eq_S_B(1, "\u00E9")))
{
goto lab3;
}
@@ -1518,7 +1518,7 @@
lab3:
m_cursor = m_limit - v_3;
// literal, line 217
- if (!(eq_s_b(1, "\u00E8")))
+ if (!(Eq_S_B(1, "\u00E8")))
{
return false;
}
@@ -1527,7 +1527,7 @@
// ], line 217
m_bra = m_cursor;
// <-, line 217
- slice_from("e");
+ SliceFrom("e");
return true;
}
@@ -1648,21 +1648,21 @@
{
// (, line 234
// literal, line 234
- if (!(eq_s_b(1, "Y")))
+ if (!(Eq_S_B(1, "Y")))
{
goto lab10;
}
// ], line 234
m_bra = m_cursor;
// <-, line 234
- slice_from("i");
+ SliceFrom("i");
goto lab9;
} while (false);
lab10:
m_cursor = m_limit - v_8;
// (, line 235
// literal, line 235
- if (!(eq_s_b(1, "\u00E7")))
+ if (!(Eq_S_B(1, "\u00E7")))
{
m_cursor = m_limit - v_7;
goto lab8;
@@ -1670,7 +1670,7 @@
// ], line 235
m_bra = m_cursor;
// <-, line 235
- slice_from("c");
+ SliceFrom("c");
} while (false);
lab9:;
} while (false);
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4abfb341/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/German2Stemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/German2Stemmer.cs b/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/German2Stemmer.cs
index 31fc7aa..deef2dc 100644
--- a/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/German2Stemmer.cs
+++ b/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/German2Stemmer.cs
@@ -74,7 +74,7 @@
I_x = other.I_x;
I_p2 = other.I_p2;
I_p1 = other.I_p1;
- base.copy_from(other);
+ base.CopyFrom(other);
}
private bool r_prelude()
@@ -105,7 +105,7 @@
do
{
// (, line 30
- if (!(in_grouping(g_v, 97, 252)))
+ if (!(InGrouping(g_v, 97, 252)))
{
goto lab3;
}
@@ -121,36 +121,36 @@
{
// (, line 31
// literal, line 31
- if (!(eq_s(1, "u")))
+ if (!(Eq_S(1, "u")))
{
goto lab5;
}
// ], line 31
m_ket = m_cursor;
- if (!(in_grouping(g_v, 97, 252)))
+ if (!(InGrouping(g_v, 97, 252)))
{
goto lab5;
}
// <-, line 31
- slice_from("U");
+ SliceFrom("U");
goto lab4;
} while (false);
lab5:
m_cursor = v_4;
// (, line 32
// literal, line 32
- if (!(eq_s(1, "y")))
+ if (!(Eq_S(1, "y")))
{
goto lab3;
}
// ], line 32
m_ket = m_cursor;
- if (!(in_grouping(g_v, 97, 252)))
+ if (!(InGrouping(g_v, 97, 252)))
{
goto lab3;
}
// <-, line 32
- slice_from("Y");
+ SliceFrom("Y");
} while (false);
lab4:
m_cursor = v_3;
@@ -194,7 +194,7 @@
// [, line 36
m_bra = m_cursor;
// substring, line 36
- among_var = find_among(a_0, 6);
+ among_var = FindAmong(a_0, 6);
if (among_var == 0)
{
goto lab7;
@@ -208,22 +208,22 @@
case 1:
// (, line 37
// <-, line 37
- slice_from("ss");
+ SliceFrom("ss");
break;
case 2:
// (, line 38
// <-, line 38
- slice_from("\u00E4");
+ SliceFrom("\u00E4");
break;
case 3:
// (, line 39
// <-, line 39
- slice_from("\u00F6");
+ SliceFrom("\u00F6");
break;
case 4:
// (, line 40
// <-, line 40
- slice_from("\u00FC");
+ SliceFrom("\u00FC");
break;
case 5:
// (, line 41
@@ -294,7 +294,7 @@
do
{
- if (!(in_grouping(g_v, 97, 252)))
+ if (!(InGrouping(g_v, 97, 252)))
{
goto lab1;
}
@@ -315,7 +315,7 @@
do
{
- if (!(out_grouping(g_v, 97, 252)))
+ if (!(OutGrouping(g_v, 97, 252)))
{
goto lab3;
}
@@ -350,7 +350,7 @@
do
{
- if (!(in_grouping(g_v, 97, 252)))
+ if (!(InGrouping(g_v, 97, 252)))
{
goto lab6;
}
@@ -371,7 +371,7 @@
do
{
- if (!(out_grouping(g_v, 97, 252)))
+ if (!(OutGrouping(g_v, 97, 252)))
{
goto lab8;
}
@@ -406,7 +406,7 @@
// [, line 63
m_bra = m_cursor;
// substring, line 63
- among_var = find_among(a_1, 6);
+ among_var = FindAmong(a_1, 6);
if (among_var == 0)
{
goto lab1;
@@ -420,27 +420,27 @@
case 1:
// (, line 64
// <-, line 64
- slice_from("y");
+ SliceFrom("y");
break;
case 2:
// (, line 65
// <-, line 65
- slice_from("u");
+ SliceFrom("u");
break;
case 3:
// (, line 66
// <-, line 66
- slice_from("a");
+ SliceFrom("a");
break;
case 4:
// (, line 67
// <-, line 67
- slice_from("o");
+ SliceFrom("o");
break;
case 5:
// (, line 68
// <-, line 68
- slice_from("u");
+ SliceFrom("u");
break;
case 6:
// (, line 69
@@ -511,7 +511,7 @@
// [, line 81
m_ket = m_cursor;
// substring, line 81
- among_var = find_among_b(a_2, 7);
+ among_var = FindAmongB(a_2, 7);
if (among_var == 0)
{
goto lab0;
@@ -530,16 +530,16 @@
case 1:
// (, line 83
// delete, line 83
- slice_del();
+ SliceDel();
break;
case 2:
// (, line 86
- if (!(in_grouping_b(g_s_ending, 98, 116)))
+ if (!(InGroupingB(g_s_ending, 98, 116)))
{
goto lab0;
}
// delete, line 86
- slice_del();
+ SliceDel();
break;
}
} while (false);
@@ -554,7 +554,7 @@
// [, line 91
m_ket = m_cursor;
// substring, line 91
- among_var = find_among_b(a_3, 4);
+ among_var = FindAmongB(a_3, 4);
if (among_var == 0)
{
goto lab1;
@@ -573,11 +573,11 @@
case 1:
// (, line 93
// delete, line 93
- slice_del();
+ SliceDel();
break;
case 2:
// (, line 96
- if (!(in_grouping_b(g_st_ending, 98, 116)))
+ if (!(InGroupingB(g_st_ending, 98, 116)))
{
goto lab1;
}
@@ -591,7 +591,7 @@
m_cursor = c;
}
// delete, line 96
- slice_del();
+ SliceDel();
break;
}
} while (false);
@@ -606,7 +606,7 @@
// [, line 101
m_ket = m_cursor;
// substring, line 101
- among_var = find_among_b(a_5, 8);
+ among_var = FindAmongB(a_5, 8);
if (among_var == 0)
{
goto lab2;
@@ -625,7 +625,7 @@
case 1:
// (, line 103
// delete, line 103
- slice_del();
+ SliceDel();
// try, line 104
v_4 = m_limit - m_cursor;
@@ -635,7 +635,7 @@
// [, line 104
m_ket = m_cursor;
// literal, line 104
- if (!(eq_s_b(2, "ig")))
+ if (!(Eq_S_B(2, "ig")))
{
m_cursor = m_limit - v_4;
goto lab3;
@@ -649,7 +649,7 @@
do
{
// literal, line 104
- if (!(eq_s_b(1, "e")))
+ if (!(Eq_S_B(1, "e")))
{
goto lab4;
}
@@ -666,7 +666,7 @@
goto lab3;
}
// delete, line 104
- slice_del();
+ SliceDel();
} while (false);
lab3:
break;
@@ -679,7 +679,7 @@
do
{
// literal, line 107
- if (!(eq_s_b(1, "e")))
+ if (!(Eq_S_B(1, "e")))
{
goto lab5;
}
@@ -689,12 +689,12 @@
m_cursor = m_limit - v_6;
}
// delete, line 107
- slice_del();
+ SliceDel();
break;
case 3:
// (, line 110
// delete, line 110
- slice_del();
+ SliceDel();
// try, line 111
v_7 = m_limit - m_cursor;
@@ -712,7 +712,7 @@
do
{
// literal, line 112
- if (!(eq_s_b(2, "er")))
+ if (!(Eq_S_B(2, "er")))
{
goto lab8;
}
@@ -721,7 +721,7 @@
lab8:
m_cursor = m_limit - v_8;
// literal, line 112
- if (!(eq_s_b(2, "en")))
+ if (!(Eq_S_B(2, "en")))
{
m_cursor = m_limit - v_7;
goto lab6;
@@ -737,14 +737,14 @@
goto lab6;
}
// delete, line 112
- slice_del();
+ SliceDel();
} while (false);
lab6:
break;
case 4:
// (, line 116
// delete, line 116
- slice_del();
+ SliceDel();
// try, line 117
v_9 = m_limit - m_cursor;
@@ -754,7 +754,7 @@
// [, line 118
m_ket = m_cursor;
// substring, line 118
- among_var = find_among_b(a_4, 2);
+ among_var = FindAmongB(a_4, 2);
if (among_var == 0)
{
m_cursor = m_limit - v_9;
@@ -776,7 +776,7 @@
case 1:
// (, line 120
// delete, line 120
- slice_del();
+ SliceDel();
break;
}
} while (false);
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4abfb341/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/GermanStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/GermanStemmer.cs b/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/GermanStemmer.cs
index 24d650b..37ff2e7 100644
--- a/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/GermanStemmer.cs
+++ b/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/GermanStemmer.cs
@@ -65,7 +65,7 @@
I_x = other.I_x;
I_p2 = other.I_p2;
I_p1 = other.I_p1;
- base.copy_from(other);
+ base.CopyFrom(other);
}
private bool r_prelude()
@@ -100,14 +100,14 @@
// [, line 32
m_bra = m_cursor;
// literal, line 32
- if (!(eq_s(1, "\u00DF")))
+ if (!(Eq_S(1, "\u00DF")))
{
goto lab3;
}
// ], line 32
m_ket = m_cursor;
// <-, line 32
- slice_from("ss");
+ SliceFrom("ss");
goto lab2;
} while (false);
lab3:
@@ -154,7 +154,7 @@
do
{
// (, line 36
- if (!(in_grouping(g_v, 97, 252)))
+ if (!(InGrouping(g_v, 97, 252)))
{
goto lab7;
}
@@ -170,36 +170,36 @@
{
// (, line 37
// literal, line 37
- if (!(eq_s(1, "u")))
+ if (!(Eq_S(1, "u")))
{
goto lab9;
}
// ], line 37
m_ket = m_cursor;
- if (!(in_grouping(g_v, 97, 252)))
+ if (!(InGrouping(g_v, 97, 252)))
{
goto lab9;
}
// <-, line 37
- slice_from("U");
+ SliceFrom("U");
goto lab8;
} while (false);
lab9:
m_cursor = v_6;
// (, line 38
// literal, line 38
- if (!(eq_s(1, "y")))
+ if (!(Eq_S(1, "y")))
{
goto lab7;
}
// ], line 38
m_ket = m_cursor;
- if (!(in_grouping(g_v, 97, 252)))
+ if (!(InGrouping(g_v, 97, 252)))
{
goto lab7;
}
// <-, line 38
- slice_from("Y");
+ SliceFrom("Y");
} while (false);
lab8:
m_cursor = v_5;
@@ -261,7 +261,7 @@
do
{
- if (!(in_grouping(g_v, 97, 252)))
+ if (!(InGrouping(g_v, 97, 252)))
{
goto lab1;
}
@@ -282,7 +282,7 @@
do
{
- if (!(out_grouping(g_v, 97, 252)))
+ if (!(OutGrouping(g_v, 97, 252)))
{
goto lab3;
}
@@ -316,7 +316,7 @@
{
do
{
- if (!(in_grouping(g_v, 97, 252)))
+ if (!(InGrouping(g_v, 97, 252)))
{
goto lab6;
}
@@ -337,7 +337,7 @@
do
{
- if (!(out_grouping(g_v, 97, 252)))
+ if (!(OutGrouping(g_v, 97, 252)))
{
goto lab8;
}
@@ -372,7 +372,7 @@
// [, line 57
m_bra = m_cursor;
// substring, line 57
- among_var = find_among(a_0, 6);
+ among_var = FindAmong(a_0, 6);
if (among_var == 0)
{
goto lab1;
@@ -386,27 +386,27 @@
case 1:
// (, line 58
// <-, line 58
- slice_from("y");
+ SliceFrom("y");
break;
case 2:
// (, line 59
// <-, line 59
- slice_from("u");
+ SliceFrom("u");
break;
case 3:
// (, line 60
// <-, line 60
- slice_from("a");
+ SliceFrom("a");
break;
case 4:
// (, line 61
// <-, line 61
- slice_from("o");
+ SliceFrom("o");
break;
case 5:
// (, line 62
// <-, line 62
- slice_from("u");
+ SliceFrom("u");
break;
case 6:
// (, line 63
@@ -476,7 +476,7 @@
// [, line 75
m_ket = m_cursor;
// substring, line 75
- among_var = find_among_b(a_1, 7);
+ among_var = FindAmongB(a_1, 7);
if (among_var == 0)
{
goto lab0;
@@ -495,16 +495,16 @@
case 1:
// (, line 77
// delete, line 77
- slice_del();
+ SliceDel();
break;
case 2:
// (, line 80
- if (!(in_grouping_b(g_s_ending, 98, 116)))
+ if (!(InGroupingB(g_s_ending, 98, 116)))
{
goto lab0;
}
// delete, line 80
- slice_del();
+ SliceDel();
break;
}
} while (false);
@@ -518,7 +518,7 @@
// [, line 85
m_ket = m_cursor;
// substring, line 85
- among_var = find_among_b(a_2, 4);
+ among_var = FindAmongB(a_2, 4);
if (among_var == 0)
{
goto lab1;
@@ -537,11 +537,11 @@
case 1:
// (, line 87
// delete, line 87
- slice_del();
+ SliceDel();
break;
case 2:
// (, line 90
- if (!(in_grouping_b(g_st_ending, 98, 116)))
+ if (!(InGroupingB(g_st_ending, 98, 116)))
{
goto lab1;
}
@@ -555,7 +555,7 @@
m_cursor = c;
}
// delete, line 90
- slice_del();
+ SliceDel();
break;
}
} while (false);
@@ -569,7 +569,7 @@
// [, line 95
m_ket = m_cursor;
// substring, line 95
- among_var = find_among_b(a_4, 8);
+ among_var = FindAmongB(a_4, 8);
if (among_var == 0)
{
goto lab2;
@@ -588,7 +588,7 @@
case 1:
// (, line 97
// delete, line 97
- slice_del();
+ SliceDel();
// try, line 98
v_4 = m_limit - m_cursor;
do
@@ -597,7 +597,7 @@
// [, line 98
m_ket = m_cursor;
// literal, line 98
- if (!(eq_s_b(2, "ig")))
+ if (!(Eq_S_B(2, "ig")))
{
m_cursor = m_limit - v_4;
goto lab3;
@@ -610,7 +610,7 @@
do
{
// literal, line 98
- if (!(eq_s_b(1, "e")))
+ if (!(Eq_S_B(1, "e")))
{
goto lab4;
}
@@ -627,7 +627,7 @@
goto lab3;
}
// delete, line 98
- slice_del();
+ SliceDel();
} while (false);
lab3:
break;
@@ -639,7 +639,7 @@
do
{
// literal, line 101
- if (!(eq_s_b(1, "e")))
+ if (!(Eq_S_B(1, "e")))
{
goto lab5;
}
@@ -649,12 +649,12 @@
m_cursor = m_limit - v_6;
}
// delete, line 101
- slice_del();
+ SliceDel();
break;
case 3:
// (, line 104
// delete, line 104
- slice_del();
+ SliceDel();
// try, line 105
v_7 = m_limit - m_cursor;
do
@@ -669,7 +669,7 @@
do
{
// literal, line 106
- if (!(eq_s_b(2, "er")))
+ if (!(Eq_S_B(2, "er")))
{
goto lab8;
}
@@ -678,7 +678,7 @@
lab8:
m_cursor = m_limit - v_8;
// literal, line 106
- if (!(eq_s_b(2, "en")))
+ if (!(Eq_S_B(2, "en")))
{
m_cursor = m_limit - v_7;
goto lab6;
@@ -694,14 +694,14 @@
goto lab6;
}
// delete, line 106
- slice_del();
+ SliceDel();
} while (false);
lab6:
break;
case 4:
// (, line 110
// delete, line 110
- slice_del();
+ SliceDel();
// try, line 111
v_9 = m_limit - m_cursor;
do
@@ -710,7 +710,7 @@
// [, line 112
m_ket = m_cursor;
// substring, line 112
- among_var = find_among_b(a_3, 2);
+ among_var = FindAmongB(a_3, 2);
if (among_var == 0)
{
m_cursor = m_limit - v_9;
@@ -732,7 +732,7 @@
case 1:
// (, line 114
// delete, line 114
- slice_del();
+ SliceDel();
break;
}
} while (false);
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4abfb341/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/HungarianStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/HungarianStemmer.cs b/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/HungarianStemmer.cs
index 10b5f76..8a029cf 100644
--- a/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/HungarianStemmer.cs
+++ b/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/HungarianStemmer.cs
@@ -233,7 +233,7 @@
private void copy_from(HungarianStemmer other)
{
I_p1 = other.I_p1;
- base.copy_from(other);
+ base.CopyFrom(other);
}
private bool r_mark_regions()
@@ -250,7 +250,7 @@
do
{
// (, line 48
- if (!(in_grouping(g_v, 97, 252)))
+ if (!(InGrouping(g_v, 97, 252)))
{
goto lab1;
}
@@ -260,7 +260,7 @@
v_2 = m_cursor;
do
{
- if (!(out_grouping(g_v, 97, 252)))
+ if (!(OutGrouping(g_v, 97, 252)))
{
goto lab3;
}
@@ -283,7 +283,7 @@
do
{
// among, line 49
- if (find_among(a_0, 8) == 0)
+ if (FindAmong(a_0, 8) == 0)
{
goto lab5;
}
@@ -306,7 +306,7 @@
lab1:
m_cursor = v_1;
// (, line 53
- if (!(out_grouping(g_v, 97, 252)))
+ if (!(OutGrouping(g_v, 97, 252)))
{
return false;
}
@@ -315,7 +315,7 @@
{
do
{
- if (!(in_grouping(g_v, 97, 252)))
+ if (!(InGrouping(g_v, 97, 252)))
{
goto lab7;
}
@@ -352,7 +352,7 @@
// [, line 61
m_ket = m_cursor;
// substring, line 61
- among_var = find_among_b(a_1, 2);
+ among_var = FindAmongB(a_1, 2);
if (among_var == 0)
{
return false;
@@ -371,12 +371,12 @@
case 1:
// (, line 62
// <-, line 62
- slice_from("a");
+ SliceFrom("a");
break;
case 2:
// (, line 63
// <-, line 63
- slice_from("e");
+ SliceFrom("e");
break;
}
return true;
@@ -389,7 +389,7 @@
// test, line 68
v_1 = m_limit - m_cursor;
// among, line 68
- if (find_among_b(a_2, 23) == 0)
+ if (FindAmongB(a_2, 23) == 0)
{
return false;
}
@@ -420,7 +420,7 @@
// ], line 73
m_bra = m_cursor;
// delete, line 73
- slice_del();
+ SliceDel();
return true;
}
@@ -431,7 +431,7 @@
// [, line 77
m_ket = m_cursor;
// substring, line 77
- among_var = find_among_b(a_3, 2);
+ among_var = FindAmongB(a_3, 2);
if (among_var == 0)
{
return false;
@@ -465,7 +465,7 @@
break;
}
// delete, line 81
- slice_del();
+ SliceDel();
// call undouble, line 82
if (!r_undouble())
{
@@ -480,7 +480,7 @@
// [, line 87
m_ket = m_cursor;
// substring, line 87
- if (find_among_b(a_4, 44) == 0)
+ if (FindAmongB(a_4, 44) == 0)
{
return false;
}
@@ -492,7 +492,7 @@
return false;
}
// delete, line 111
- slice_del();
+ SliceDel();
// call v_ending, line 112
if (!r_v_ending())
{
@@ -508,7 +508,7 @@
// [, line 116
m_ket = m_cursor;
// substring, line 116
- among_var = find_among_b(a_5, 3);
+ among_var = FindAmongB(a_5, 3);
if (among_var == 0)
{
return false;
@@ -527,17 +527,17 @@
case 1:
// (, line 117
// <-, line 117
- slice_from("e");
+ SliceFrom("e");
break;
case 2:
// (, line 118
// <-, line 118
- slice_from("a");
+ SliceFrom("a");
break;
case 3:
// (, line 119
// <-, line 119
- slice_from("a");
+ SliceFrom("a");
break;
}
return true;
@@ -550,7 +550,7 @@
// [, line 124
m_ket = m_cursor;
// substring, line 124
- among_var = find_among_b(a_6, 6);
+ among_var = FindAmongB(a_6, 6);
if (among_var == 0)
{
return false;
@@ -569,22 +569,22 @@
case 1:
// (, line 125
// delete, line 125
- slice_del();
+ SliceDel();
break;
case 2:
// (, line 126
// delete, line 126
- slice_del();
+ SliceDel();
break;
case 3:
// (, line 127
// <-, line 127
- slice_from("a");
+ SliceFrom("a");
break;
case 4:
// (, line 128
// <-, line 128
- slice_from("e");
+ SliceFrom("e");
break;
}
return true;
@@ -597,7 +597,7 @@
// [, line 133
m_ket = m_cursor;
// substring, line 133
- among_var = find_among_b(a_7, 2);
+ among_var = FindAmongB(a_7, 2);
if (among_var == 0)
{
return false;
@@ -631,7 +631,7 @@
break;
}
// delete, line 137
- slice_del();
+ SliceDel();
// call undouble, line 138
if (!r_undouble())
{
@@ -647,7 +647,7 @@
// [, line 142
m_ket = m_cursor;
// substring, line 142
- among_var = find_among_b(a_8, 7);
+ among_var = FindAmongB(a_8, 7);
if (among_var == 0)
{
return false;
@@ -666,37 +666,37 @@
case 1:
// (, line 143
// <-, line 143
- slice_from("a");
+ SliceFrom("a");
break;
case 2:
// (, line 144
// <-, line 144
- slice_from("e");
+ SliceFrom("e");
break;
case 3:
// (, line 145
// delete, line 145
- slice_del();
+ SliceDel();
break;
case 4:
// (, line 146
// delete, line 146
- slice_del();
+ SliceDel();
break;
case 5:
// (, line 147
// delete, line 147
- slice_del();
+ SliceDel();
break;
case 6:
// (, line 148
// delete, line 148
- slice_del();
+ SliceDel();
break;
case 7:
// (, line 149
// delete, line 149
- slice_del();
+ SliceDel();
break;
}
return true;
@@ -709,7 +709,7 @@
// [, line 154
m_ket = m_cursor;
// substring, line 154
- among_var = find_among_b(a_9, 12);
+ among_var = FindAmongB(a_9, 12);
if (among_var == 0)
{
return false;
@@ -728,47 +728,47 @@
case 1:
// (, line 155
// delete, line 155
- slice_del();
+ SliceDel();
break;
case 2:
// (, line 156
// <-, line 156
- slice_from("e");
+ SliceFrom("e");
break;
case 3:
// (, line 157
// <-, line 157
- slice_from("a");
+ SliceFrom("a");
break;
case 4:
// (, line 158
// delete, line 158
- slice_del();
+ SliceDel();
break;
case 5:
// (, line 159
// <-, line 159
- slice_from("e");
+ SliceFrom("e");
break;
case 6:
// (, line 160
// <-, line 160
- slice_from("a");
+ SliceFrom("a");
break;
case 7:
// (, line 161
// delete, line 161
- slice_del();
+ SliceDel();
break;
case 8:
// (, line 162
// <-, line 162
- slice_from("e");
+ SliceFrom("e");
break;
case 9:
// (, line 163
// delete, line 163
- slice_del();
+ SliceDel();
break;
}
return true;
@@ -781,7 +781,7 @@
// [, line 168
m_ket = m_cursor;
// substring, line 168
- among_var = find_among_b(a_10, 31);
+ among_var = FindAmongB(a_10, 31);
if (among_var == 0)
{
return false;
@@ -800,102 +800,102 @@
case 1:
// (, line 169
// delete, line 169
- slice_del();
+ SliceDel();
break;
case 2:
// (, line 170
// <-, line 170
- slice_from("a");
+ SliceFrom("a");
break;
case 3:
// (, line 171
// <-, line 171
- slice_from("e");
+ SliceFrom("e");
break;
case 4:
// (, line 172
// delete, line 172
- slice_del();
+ SliceDel();
break;
case 5:
// (, line 173
// <-, line 173
- slice_from("a");
+ SliceFrom("a");
break;
case 6:
// (, line 174
// <-, line 174
- slice_from("e");
+ SliceFrom("e");
break;
case 7:
// (, line 175
// delete, line 175
- slice_del();
+ SliceDel();
break;
case 8:
// (, line 176
// delete, line 176
- slice_del();
+ SliceDel();
break;
case 9:
// (, line 177
// delete, line 177
- slice_del();
+ SliceDel();
break;
case 10:
// (, line 178
// <-, line 178
- slice_from("a");
+ SliceFrom("a");
break;
case 11:
// (, line 179
// <-, line 179
- slice_from("e");
+ SliceFrom("e");
break;
case 12:
// (, line 180
// delete, line 180
- slice_del();
+ SliceDel();
break;
case 13:
// (, line 181
// delete, line 181
- slice_del();
+ SliceDel();
break;
case 14:
// (, line 182
// <-, line 182
- slice_from("a");
+ SliceFrom("a");
break;
case 15:
// (, line 183
// <-, line 183
- slice_from("e");
+ SliceFrom("e");
break;
case 16:
// (, line 184
// delete, line 184
- slice_del();
+ SliceDel();
break;
case 17:
// (, line 185
// delete, line 185
- slice_del();
+ SliceDel();
break;
case 18:
// (, line 186
// delete, line 186
- slice_del();
+ SliceDel();
break;
case 19:
// (, line 187
// <-, line 187
- slice_from("a");
+ SliceFrom("a");
break;
case 20:
// (, line 188
// <-, line 188
- slice_from("e");
+ SliceFrom("e");
break;
}
return true;
@@ -908,7 +908,7 @@
// [, line 193
m_ket = m_cursor;
// substring, line 193
- among_var = find_among_b(a_11, 42);
+ among_var = FindAmongB(a_11, 42);
if (among_var == 0)
{
return false;
@@ -927,147 +927,147 @@
case 1:
// (, line 194
// delete, line 194
- slice_del();
+ SliceDel();
break;
case 2:
// (, line 195
// <-, line 195
- slice_from("a");
+ SliceFrom("a");
break;
case 3:
// (, line 196
// <-, line 196
- slice_from("e");
+ SliceFrom("e");
break;
case 4:
// (, line 197
// delete, line 197
- slice_del();
+ SliceDel();
break;
case 5:
// (, line 198
// delete, line 198
- slice_del();
+ SliceDel();
break;
case 6:
// (, line 199
// delete, line 199
- slice_del();
+ SliceDel();
break;
case 7:
// (, line 200
// <-, line 200
- slice_from("a");
+ SliceFrom("a");
break;
case 8:
// (, line 201
// <-, line 201
- slice_from("e");
+ SliceFrom("e");
break;
case 9:
// (, line 202
// delete, line 202
- slice_del();
+ SliceDel();
break;
case 10:
// (, line 203
// delete, line 203
- slice_del();
+ SliceDel();
break;
case 11:
// (, line 204
// delete, line 204
- slice_del();
+ SliceDel();
break;
case 12:
// (, line 205
// <-, line 205
- slice_from("a");
+ SliceFrom("a");
break;
case 13:
// (, line 206
// <-, line 206
- slice_from("e");
+ SliceFrom("e");
break;
case 14:
// (, line 207
// delete, line 207
- slice_del();
+ SliceDel();
break;
case 15:
// (, line 208
// delete, line 208
- slice_del();
+ SliceDel();
break;
case 16:
// (, line 209
// delete, line 209
- slice_del();
+ SliceDel();
break;
case 17:
// (, line 210
// delete, line 210
- slice_del();
+ SliceDel();
break;
case 18:
// (, line 211
// <-, line 211
- slice_from("a");
+ SliceFrom("a");
break;
case 19:
// (, line 212
// <-, line 212
- slice_from("e");
+ SliceFrom("e");
break;
case 20:
// (, line 214
// delete, line 214
- slice_del();
+ SliceDel();
break;
case 21:
// (, line 215
// delete, line 215
- slice_del();
+ SliceDel();
break;
case 22:
// (, line 216
// <-, line 216
- slice_from("a");
+ SliceFrom("a");
break;
case 23:
// (, line 217
// <-, line 217
- slice_from("e");
+ SliceFrom("e");
break;
case 24:
// (, line 218
// delete, line 218
- slice_del();
+ SliceDel();
break;
case 25:
// (, line 219
// delete, line 219
- slice_del();
+ SliceDel();
break;
case 26:
// (, line 220
// delete, line 220
- slice_del();
+ SliceDel();
break;
case 27:
// (, line 221
// <-, line 221
- slice_from("a");
+ SliceFrom("a");
break;
case 28:
// (, line 222
// <-, line 222
- slice_from("e");
+ SliceFrom("e");
break;
case 29:
// (, line 223
// delete, line 223
- slice_del();
+ SliceDel();
break;
}
return true;
[31/39] lucenenet git commit:
Lucene.Net.Analysis.Util.OpenStringBuilder refactor: Added the original
Append() method overloads for ICharSequence (primarily so OpenStringBuilder
can work with itself), and for StringBuilder for better compatibility with .
Posted by ni...@apache.org.
Lucene.Net.Analysis.Util.OpenStringBuilder refactor: Added the original Append() method overloads for ICharSequence (primarily so OpenStringBuilder can work with itself), and for StringBuilder for better compatibility with .NET
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/252e30cc
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/252e30cc
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/252e30cc
Branch: refs/heads/api-work
Commit: 252e30cca29b71adfd0663a8e03f4828fae9af52
Parents: eedf7d8
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Sun Feb 5 01:55:46 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Sun Feb 5 01:55:46 2017 +0700
----------------------------------------------------------------------
.../Analysis/Util/OpenStringBuilder.cs | 39 +++++++++++++++++++-
1 file changed, 37 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/252e30cc/src/Lucene.Net.Analysis.Common/Analysis/Util/OpenStringBuilder.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/OpenStringBuilder.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/OpenStringBuilder.cs
index 74035ea..8987189 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Util/OpenStringBuilder.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/OpenStringBuilder.cs
@@ -1,6 +1,7 @@
\ufeffusing Lucene.Net.Support;
using System;
using System.Diagnostics.CodeAnalysis;
+using System.Text;
namespace Lucene.Net.Analysis.Util
{
@@ -81,12 +82,46 @@ namespace Lucene.Net.Analysis.Util
get { return m_buf.Length; }
}
- public virtual OpenStringBuilder Append(string csq) // LUCENENET TODO: Add overloads for ICharSequence and StringBuilder
+ public virtual OpenStringBuilder Append(ICharSequence csq)
{
return Append(csq, 0, csq.Length);
}
- public virtual OpenStringBuilder Append(string csq, int start, int end) // LUCENENET TODO: Add overloads for ICharSequence and StringBuilder
+ public virtual OpenStringBuilder Append(ICharSequence csq, int start, int end)
+ {
+ Reserve(end - start);
+ for (int i = start; i < end; i++)
+ {
+ UnsafeWrite(csq[i]);
+ }
+ return this;
+ }
+
+ // LUCENENET specific - overload for string (more common in .NET than ICharSequence)
+ public virtual OpenStringBuilder Append(string csq)
+ {
+ return Append(csq, 0, csq.Length);
+ }
+
+ // LUCENENET specific - overload for string (more common in .NET than ICharSequence)
+ public virtual OpenStringBuilder Append(string csq, int start, int end)
+ {
+ Reserve(end - start);
+ for (int i = start; i < end; i++)
+ {
+ UnsafeWrite(csq[i]);
+ }
+ return this;
+ }
+
+ // LUCENENET specific - overload for StringBuilder
+ public virtual OpenStringBuilder Append(StringBuilder csq)
+ {
+ return Append(csq, 0, csq.Length);
+ }
+
+ // LUCENENET specific - overload for StringBuilder
+ public virtual OpenStringBuilder Append(StringBuilder csq, int start, int end)
{
Reserve(end - start);
for (int i = start; i < end; i++)
[07/39] lucenenet git commit: Lucene.Net.Analysis.Sv refactor: member
accessibility and documentation comments
Posted by ni...@apache.org.
Lucene.Net.Analysis.Sv refactor: member accessibility and documentation comments
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/b158f914
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/b158f914
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/b158f914
Branch: refs/heads/api-work
Commit: b158f9146c606334c6b40403258ec9541c50a391
Parents: ab69b43
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Sat Feb 4 12:53:47 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Sat Feb 4 12:53:47 2017 +0700
----------------------------------------------------------------------
.../Analysis/Sv/SwedishAnalyzer.cs | 17 +++++++++--------
.../Analysis/Sv/SwedishLightStemFilter.cs | 5 +++--
.../Analysis/Sv/SwedishLightStemFilterFactory.cs | 6 +++---
.../Analysis/Sv/SwedishLightStemmer.cs | 3 +--
4 files changed, 16 insertions(+), 15 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b158f914/src/Lucene.Net.Analysis.Common/Analysis/Sv/SwedishAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Sv/SwedishAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Sv/SwedishAnalyzer.cs
index 7a334a7..d2572ba 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Sv/SwedishAnalyzer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Sv/SwedishAnalyzer.cs
@@ -51,7 +51,7 @@ namespace Lucene.Net.Analysis.Sv
}
/// <summary>
- /// Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class
+ /// Atomically loads the <see cref="DEFAULT_STOP_SET"/> in a lazy fashion once the outer class
/// accesses the static final set the first time.;
/// </summary>
private class DefaultSetHolder
@@ -79,10 +79,11 @@ namespace Lucene.Net.Analysis.Sv
}
/// <summary>
- /// Builds an analyzer with the default stop words: <see cref="#DEFAULT_STOPWORD_FILE"/>.
+ /// Builds an analyzer with the default stop words: <see cref="DEFAULT_STOPWORD_FILE"/>.
/// </summary>
+ /// <param name="matchVersion"> lucene compatibility version </param>
public SwedishAnalyzer(LuceneVersion matchVersion)
- : this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET)
+ : this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET)
{
}
@@ -92,7 +93,7 @@ namespace Lucene.Net.Analysis.Sv
/// <param name="matchVersion"> lucene compatibility version </param>
/// <param name="stopwords"> a stopword set </param>
public SwedishAnalyzer(LuceneVersion matchVersion, CharArraySet stopwords)
- : this(matchVersion, stopwords, CharArraySet.EMPTY_SET)
+ : this(matchVersion, stopwords, CharArraySet.EMPTY_SET)
{
}
@@ -105,7 +106,7 @@ namespace Lucene.Net.Analysis.Sv
/// <param name="stopwords"> a stopword set </param>
/// <param name="stemExclusionSet"> a set of terms not to be stemmed </param>
public SwedishAnalyzer(LuceneVersion matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet)
- : base(matchVersion, stopwords)
+ : base(matchVersion, stopwords)
{
this.stemExclusionSet = CharArraySet.UnmodifiableSet(CharArraySet.Copy(matchVersion, stemExclusionSet));
}
@@ -113,13 +114,13 @@ namespace Lucene.Net.Analysis.Sv
/// <summary>
/// Creates a
/// <see cref="Analyzer.TokenStreamComponents"/>
- /// which tokenizes all the text in the provided <see cref="Reader"/>.
+ /// which tokenizes all the text in the provided <see cref="TextReader"/>.
/// </summary>
/// <returns> A
/// <see cref="Analyzer.TokenStreamComponents"/>
/// built from an <see cref="StandardTokenizer"/> filtered with
- /// <see cref="StandardFilter"/>, <see cref="LowerCaseFilter"/>, <see cref="StopFilter"/>
- /// , <see cref="SetKeywordMarkerFilter"/> if a stem exclusion set is
+ /// <see cref="StandardFilter"/>, <see cref="LowerCaseFilter"/>, <see cref="StopFilter"/>,
+ /// <see cref="SetKeywordMarkerFilter"/> if a stem exclusion set is
/// provided and <see cref="SnowballFilter"/>. </returns>
protected override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b158f914/src/Lucene.Net.Analysis.Common/Analysis/Sv/SwedishLightStemFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Sv/SwedishLightStemFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Sv/SwedishLightStemFilter.cs
index 356469e..df8b2f3 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Sv/SwedishLightStemFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Sv/SwedishLightStemFilter.cs
@@ -25,7 +25,7 @@ namespace Lucene.Net.Analysis.Sv
/// words.
/// <para>
/// To prevent terms from being stemmed use an instance of
- /// <see cref="SetKeywordMarkerFilter"/> or a custom <see cref="TokenFilter"/> that sets
+ /// <see cref="Miscellaneous.SetKeywordMarkerFilter"/> or a custom <see cref="TokenFilter"/> that sets
/// the <see cref="KeywordAttribute"/> before this <see cref="TokenStream"/>.
/// </para>
/// </summary>
@@ -35,7 +35,8 @@ namespace Lucene.Net.Analysis.Sv
private readonly ICharTermAttribute termAtt;
private readonly IKeywordAttribute keywordAttr;
- public SwedishLightStemFilter(TokenStream input) : base(input)
+ public SwedishLightStemFilter(TokenStream input)
+ : base(input)
{
termAtt = AddAttribute<ICharTermAttribute>();
keywordAttr = AddAttribute<IKeywordAttribute>();
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b158f914/src/Lucene.Net.Analysis.Common/Analysis/Sv/SwedishLightStemFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Sv/SwedishLightStemFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Sv/SwedishLightStemFilterFactory.cs
index 11ca0ab..5fcdedf 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Sv/SwedishLightStemFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Sv/SwedishLightStemFilterFactory.cs
@@ -34,10 +34,10 @@ namespace Lucene.Net.Analysis.Sv
/// </summary>
public class SwedishLightStemFilterFactory : TokenFilterFactory
{
-
/// <summary>
- /// Creates a new SwedishLightStemFilterFactory </summary>
- public SwedishLightStemFilterFactory(IDictionary<string, string> args) : base(args)
+ /// Creates a new <see cref="SwedishLightStemFilterFactory"/> </summary>
+ public SwedishLightStemFilterFactory(IDictionary<string, string> args)
+ : base(args)
{
if (args.Count > 0)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b158f914/src/Lucene.Net.Analysis.Common/Analysis/Sv/SwedishLightStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Sv/SwedishLightStemmer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Sv/SwedishLightStemmer.cs
index f9ff21c..1c5d082 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Sv/SwedishLightStemmer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Sv/SwedishLightStemmer.cs
@@ -59,13 +59,12 @@ namespace Lucene.Net.Analysis.Sv
/// Light Stemmer for Swedish.
/// <para>
/// This stemmer implements the algorithm described in:
- /// <i>Report on CLEF-2003 Monolingual Tracks</i>
+ /// <c>Report on CLEF-2003 Monolingual Tracks</c>
/// Jacques Savoy
/// </para>
/// </summary>
public class SwedishLightStemmer
{
-
public virtual int Stem(char[] s, int len)
{
if (len > 4 && s[len - 1] == 's')
[27/39] lucenenet git commit:
Lucene.Net.Analysis.Standard.StandardTokenizerInterface_Fields refactor:
renamed StandardTokenizerInterface (after fixing .NET interface name
convention, the original name no longer conflicts with the interface)
Posted by ni...@apache.org.
Lucene.Net.Analysis.Standard.StandardTokenizerInterface_Fields refactor: renamed StandardTokenizerInterface (after fixing .NET interface name convention, the original name no longer conflicts with the interface)
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/01f01abd
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/01f01abd
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/01f01abd
Branch: refs/heads/api-work
Commit: 01f01abdae2373f0c419dfa1a982e062ef813540
Parents: e382d8a
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Sun Feb 5 01:16:31 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Sun Feb 5 01:16:31 2017 +0700
----------------------------------------------------------------------
.../Analysis/Standard/ClassicTokenizer.cs | 2 +-
.../Analysis/Standard/ClassicTokenizerImpl.cs | 8 ++++----
.../Analysis/Standard/StandardTokenizer.cs | 2 +-
.../Analysis/Standard/StandardTokenizerImpl.cs | 8 ++++----
.../Analysis/Standard/StandardTokenizerInterface.cs | 4 ++--
.../Analysis/Standard/Std31/StandardTokenizerImpl31.cs | 2 +-
.../Analysis/Standard/Std31/UAX29URLEmailTokenizerImpl31.cs | 2 +-
.../Analysis/Standard/Std34/StandardTokenizerImpl34.cs | 2 +-
.../Analysis/Standard/Std34/UAX29URLEmailTokenizerImpl34.cs | 2 +-
.../Analysis/Standard/Std36/UAX29URLEmailTokenizerImpl36.cs | 2 +-
.../Analysis/Standard/Std40/StandardTokenizerImpl40.cs | 2 +-
.../Analysis/Standard/Std40/UAX29URLEmailTokenizerImpl40.cs | 2 +-
.../Analysis/Standard/UAX29URLEmailTokenizer.cs | 2 +-
.../Analysis/Standard/UAX29URLEmailTokenizerImpl.cs | 4 ++--
14 files changed, 22 insertions(+), 22 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/01f01abd/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicTokenizer.cs
index 6898ca6..888615a 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicTokenizer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicTokenizer.cs
@@ -153,7 +153,7 @@ namespace Lucene.Net.Analysis.Standard
{
int tokenType = scanner.GetNextToken();
- if (tokenType == StandardTokenizerInterface_Fields.YYEOF)
+ if (tokenType == StandardTokenizerInterface.YYEOF)
{
return false;
}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/01f01abd/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicTokenizerImpl.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicTokenizerImpl.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicTokenizerImpl.cs
index 7de5f9e..790fa76 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicTokenizerImpl.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicTokenizerImpl.cs
@@ -695,7 +695,7 @@ namespace Lucene.Net.Analysis.Standard
}
else if (zzAtEOF)
{
- zzInput = StandardTokenizerInterface_Fields.YYEOF;
+ zzInput = StandardTokenizerInterface.YYEOF;
goto zzForActionBreak;
}
else
@@ -711,7 +711,7 @@ namespace Lucene.Net.Analysis.Standard
zzEndReadL = zzEndRead;
if (eof)
{
- zzInput = StandardTokenizerInterface_Fields.YYEOF;
+ zzInput = StandardTokenizerInterface.YYEOF;
goto zzForActionBreak;
}
else
@@ -808,10 +808,10 @@ namespace Lucene.Net.Analysis.Standard
case 20:
break;
default:
- if (zzInput == StandardTokenizerInterface_Fields.YYEOF && zzStartRead == zzCurrentPos)
+ if (zzInput == StandardTokenizerInterface.YYEOF && zzStartRead == zzCurrentPos)
{
zzAtEOF = true;
- return StandardTokenizerInterface_Fields.YYEOF;
+ return StandardTokenizerInterface.YYEOF;
}
else
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/01f01abd/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardTokenizer.cs
index dfc5ce9..c83e1e5 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardTokenizer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardTokenizer.cs
@@ -204,7 +204,7 @@ namespace Lucene.Net.Analysis.Standard
{
int tokenType = scanner.GetNextToken();
- if (tokenType == StandardTokenizerInterface_Fields.YYEOF)
+ if (tokenType == StandardTokenizerInterface.YYEOF)
{
return false;
}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/01f01abd/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardTokenizerImpl.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardTokenizerImpl.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardTokenizerImpl.cs
index 62e0237..aa66336 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardTokenizerImpl.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardTokenizerImpl.cs
@@ -1292,7 +1292,7 @@ namespace Lucene.Net.Analysis.Standard
}
else if (zzAtEOF)
{
- zzInput = StandardTokenizerInterface_Fields.YYEOF;
+ zzInput = StandardTokenizerInterface.YYEOF;
goto zzForActionBreak;
}
else
@@ -1308,7 +1308,7 @@ namespace Lucene.Net.Analysis.Standard
zzEndReadL = zzEndRead;
if (eof)
{
- zzInput = StandardTokenizerInterface_Fields.YYEOF;
+ zzInput = StandardTokenizerInterface.YYEOF;
goto zzForActionBreak;
}
else
@@ -1393,11 +1393,11 @@ namespace Lucene.Net.Analysis.Standard
case 16:
break;
default:
- if (zzInput == StandardTokenizerInterface_Fields.YYEOF && zzStartRead == zzCurrentPos)
+ if (zzInput == StandardTokenizerInterface.YYEOF && zzStartRead == zzCurrentPos)
{
zzAtEOF = true;
{
- return StandardTokenizerInterface_Fields.YYEOF;
+ return StandardTokenizerInterface.YYEOF;
}
}
else
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/01f01abd/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardTokenizerInterface.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardTokenizerInterface.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardTokenizerInterface.cs
index bfd4f1f..c750d7a 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardTokenizerInterface.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardTokenizerInterface.cs
@@ -56,12 +56,12 @@ namespace Lucene.Net.Analysis.Standard
/// Resumes scanning until the next regular expression is matched,
/// the end of input is encountered or an I/O-Error occurs.
/// </summary>
- /// <returns> the next token, <see cref="StandardTokenizerInterface_Fields.YYEOF"/> on end of stream </returns>
+ /// <returns> the next token, <see cref="StandardTokenizerInterface.YYEOF"/> on end of stream </returns>
/// <exception cref="IOException"> if any I/O-Error occurs </exception>
int GetNextToken();
}
- public static class StandardTokenizerInterface_Fields // LUCENENET TODO: Rename StandardTokenizerInterface (no longer collides with interface name)
+ public static class StandardTokenizerInterface
{
/// <summary>
/// This character denotes the end of file </summary>
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/01f01abd/src/Lucene.Net.Analysis.Common/Analysis/Standard/Std31/StandardTokenizerImpl31.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/Std31/StandardTokenizerImpl31.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/Std31/StandardTokenizerImpl31.cs
index 5e5667f..ececc6e 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Standard/Std31/StandardTokenizerImpl31.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/Std31/StandardTokenizerImpl31.cs
@@ -1097,7 +1097,7 @@ namespace Lucene.Net.Analysis.Standard.Std31
{
zzAtEOF = true;
{
- return StandardTokenizerInterface_Fields.YYEOF;
+ return StandardTokenizerInterface.YYEOF;
}
}
else
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/01f01abd/src/Lucene.Net.Analysis.Common/Analysis/Standard/Std31/UAX29URLEmailTokenizerImpl31.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/Std31/UAX29URLEmailTokenizerImpl31.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/Std31/UAX29URLEmailTokenizerImpl31.cs
index 4fd4938..b5d759f 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Standard/Std31/UAX29URLEmailTokenizerImpl31.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/Std31/UAX29URLEmailTokenizerImpl31.cs
@@ -3658,7 +3658,7 @@ namespace Lucene.Net.Analysis.Standard.Std31
{
zzAtEOF = true;
{
- return StandardTokenizerInterface_Fields.YYEOF;
+ return StandardTokenizerInterface.YYEOF;
}
}
else
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/01f01abd/src/Lucene.Net.Analysis.Common/Analysis/Standard/Std34/StandardTokenizerImpl34.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/Std34/StandardTokenizerImpl34.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/Std34/StandardTokenizerImpl34.cs
index 855470d..4e52f06 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Standard/Std34/StandardTokenizerImpl34.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/Std34/StandardTokenizerImpl34.cs
@@ -1115,7 +1115,7 @@ namespace Lucene.Net.Analysis.Standard.Std34
{
zzAtEOF = true;
{
- return StandardTokenizerInterface_Fields.YYEOF;
+ return StandardTokenizerInterface.YYEOF;
}
}
else
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/01f01abd/src/Lucene.Net.Analysis.Common/Analysis/Standard/Std34/UAX29URLEmailTokenizerImpl34.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/Std34/UAX29URLEmailTokenizerImpl34.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/Std34/UAX29URLEmailTokenizerImpl34.cs
index d774d6d..9bea471 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Standard/Std34/UAX29URLEmailTokenizerImpl34.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/Std34/UAX29URLEmailTokenizerImpl34.cs
@@ -3772,7 +3772,7 @@ namespace Lucene.Net.Analysis.Standard.Std34
{
zzAtEOF = true;
{
- return StandardTokenizerInterface_Fields.YYEOF;
+ return StandardTokenizerInterface.YYEOF;
}
}
else
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/01f01abd/src/Lucene.Net.Analysis.Common/Analysis/Standard/Std36/UAX29URLEmailTokenizerImpl36.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/Std36/UAX29URLEmailTokenizerImpl36.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/Std36/UAX29URLEmailTokenizerImpl36.cs
index afa04ff..2ab6bae 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Standard/Std36/UAX29URLEmailTokenizerImpl36.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/Std36/UAX29URLEmailTokenizerImpl36.cs
@@ -4220,7 +4220,7 @@ namespace Lucene.Net.Analysis.Standard.Std36
{
zzAtEOF = true;
{
- return StandardTokenizerInterface_Fields.YYEOF;
+ return StandardTokenizerInterface.YYEOF;
}
}
else
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/01f01abd/src/Lucene.Net.Analysis.Common/Analysis/Standard/Std40/StandardTokenizerImpl40.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/Std40/StandardTokenizerImpl40.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/Std40/StandardTokenizerImpl40.cs
index 59bd49d..5499abb 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Standard/Std40/StandardTokenizerImpl40.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/Std40/StandardTokenizerImpl40.cs
@@ -1233,7 +1233,7 @@ namespace Lucene.Net.Analysis.Standard.Std40
{
zzAtEOF = true;
{
- return StandardTokenizerInterface_Fields.YYEOF;
+ return StandardTokenizerInterface.YYEOF;
}
}
else
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/01f01abd/src/Lucene.Net.Analysis.Common/Analysis/Standard/Std40/UAX29URLEmailTokenizerImpl40.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/Std40/UAX29URLEmailTokenizerImpl40.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/Std40/UAX29URLEmailTokenizerImpl40.cs
index 3593baa..eabcd40 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Standard/Std40/UAX29URLEmailTokenizerImpl40.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/Std40/UAX29URLEmailTokenizerImpl40.cs
@@ -4438,7 +4438,7 @@ namespace Lucene.Net.Analysis.Standard.Std40
{
zzAtEOF = true;
{
- return StandardTokenizerInterface_Fields.YYEOF;
+ return StandardTokenizerInterface.YYEOF;
}
}
else
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/01f01abd/src/Lucene.Net.Analysis.Common/Analysis/Standard/UAX29URLEmailTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/UAX29URLEmailTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/UAX29URLEmailTokenizer.cs
index 83659e2..32a042c 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Standard/UAX29URLEmailTokenizer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/UAX29URLEmailTokenizer.cs
@@ -183,7 +183,7 @@ namespace Lucene.Net.Analysis.Standard
{
int tokenType = scanner.GetNextToken();
- if (tokenType == StandardTokenizerInterface_Fields.YYEOF)
+ if (tokenType == StandardTokenizerInterface.YYEOF)
{
return false;
}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/01f01abd/src/Lucene.Net.Analysis.Common/Analysis/Standard/UAX29URLEmailTokenizerImpl.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/UAX29URLEmailTokenizerImpl.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/UAX29URLEmailTokenizerImpl.cs
index dbf05a7..547a62c 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Standard/UAX29URLEmailTokenizerImpl.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/UAX29URLEmailTokenizerImpl.cs
@@ -9572,12 +9572,12 @@ namespace Lucene.Net.Analysis.Standard
{
case YYINITIAL:
{
- return StandardTokenizerInterface_Fields.YYEOF;
+ return StandardTokenizerInterface.YYEOF;
}
case 3117: break;
case AVOID_BAD_URL:
{
- return StandardTokenizerInterface_Fields.YYEOF;
+ return StandardTokenizerInterface.YYEOF;
}
case 3118: break;
default:
[19/39] lucenenet git commit: Lucene.Net.Analysis.Common/Tartarus
refactor: member accessibility and documentation comments
Posted by ni...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4abfb341/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/LovinsStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/LovinsStemmer.cs b/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/LovinsStemmer.cs
index 7d00c0a..151aa80 100644
--- a/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/LovinsStemmer.cs
+++ b/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/LovinsStemmer.cs
@@ -370,7 +370,7 @@
private void copy_from(LovinsStemmer other)
{
- base.copy_from(other);
+ base.CopyFrom(other);
}
private bool r_A()
@@ -456,7 +456,7 @@
do
{
// literal, line 25
- if (!(eq_s_b(1, "e")))
+ if (!(Eq_S_B(1, "e")))
{
goto lab0;
}
@@ -491,7 +491,7 @@
do
{
// literal, line 26
- if (!(eq_s_b(1, "e")))
+ if (!(Eq_S_B(1, "e")))
{
goto lab0;
}
@@ -520,7 +520,7 @@
}
m_cursor = m_limit - v_1;
// literal, line 27
- if (!(eq_s_b(1, "f")))
+ if (!(Eq_S_B(1, "f")))
{
return false;
}
@@ -551,7 +551,7 @@
do
{
// literal, line 28
- if (!(eq_s_b(1, "t")))
+ if (!(Eq_S_B(1, "t")))
{
goto lab1;
}
@@ -560,7 +560,7 @@
lab1:
m_cursor = m_limit - v_2;
// literal, line 28
- if (!(eq_s_b(2, "ll")))
+ if (!(Eq_S_B(2, "ll")))
{
return false;
}
@@ -593,7 +593,7 @@
do
{
// literal, line 29
- if (!(eq_s_b(1, "o")))
+ if (!(Eq_S_B(1, "o")))
{
goto lab0;
}
@@ -608,7 +608,7 @@
do
{
// literal, line 29
- if (!(eq_s_b(1, "e")))
+ if (!(Eq_S_B(1, "e")))
{
goto lab1;
}
@@ -644,7 +644,7 @@
do
{
// literal, line 30
- if (!(eq_s_b(1, "a")))
+ if (!(Eq_S_B(1, "a")))
{
goto lab0;
}
@@ -659,7 +659,7 @@
do
{
// literal, line 30
- if (!(eq_s_b(1, "e")))
+ if (!(Eq_S_B(1, "e")))
{
goto lab1;
}
@@ -695,7 +695,7 @@
do
{
// literal, line 31
- if (!(eq_s_b(1, "l")))
+ if (!(Eq_S_B(1, "l")))
{
goto lab1;
}
@@ -706,7 +706,7 @@
do
{
// literal, line 31
- if (!(eq_s_b(1, "i")))
+ if (!(Eq_S_B(1, "i")))
{
goto lab2;
}
@@ -716,7 +716,7 @@
m_cursor = m_limit - v_2;
// (, line 31
// literal, line 31
- if (!(eq_s_b(1, "e")))
+ if (!(Eq_S_B(1, "e")))
{
return false;
}
@@ -727,7 +727,7 @@
}
m_cursor--;
// literal, line 31
- if (!(eq_s_b(1, "u")))
+ if (!(Eq_S_B(1, "u")))
{
return false;
}
@@ -762,7 +762,7 @@
do
{
// literal, line 32
- if (!(eq_s_b(1, "u")))
+ if (!(Eq_S_B(1, "u")))
{
goto lab0;
}
@@ -777,7 +777,7 @@
do
{
// literal, line 32
- if (!(eq_s_b(1, "x")))
+ if (!(Eq_S_B(1, "x")))
{
goto lab1;
}
@@ -793,7 +793,7 @@
{
// (, line 32
// literal, line 32
- if (!(eq_s_b(1, "s")))
+ if (!(Eq_S_B(1, "s")))
{
goto lab2;
}
@@ -803,7 +803,7 @@
do
{
// literal, line 32
- if (!(eq_s_b(1, "o")))
+ if (!(Eq_S_B(1, "o")))
{
goto lab3;
}
@@ -846,7 +846,7 @@
do
{
// literal, line 33
- if (!(eq_s_b(1, "a")))
+ if (!(Eq_S_B(1, "a")))
{
goto lab0;
}
@@ -861,7 +861,7 @@
do
{
// literal, line 33
- if (!(eq_s_b(1, "c")))
+ if (!(Eq_S_B(1, "c")))
{
goto lab1;
}
@@ -876,7 +876,7 @@
do
{
// literal, line 33
- if (!(eq_s_b(1, "e")))
+ if (!(Eq_S_B(1, "e")))
{
goto lab2;
}
@@ -891,7 +891,7 @@
do
{
// literal, line 33
- if (!(eq_s_b(1, "m")))
+ if (!(Eq_S_B(1, "m")))
{
goto lab3;
}
@@ -943,7 +943,7 @@
do
{
// literal, line 34
- if (!(eq_s_b(1, "s")))
+ if (!(Eq_S_B(1, "s")))
{
goto lab2;
}
@@ -994,7 +994,7 @@
do
{
// literal, line 35
- if (!(eq_s_b(1, "l")))
+ if (!(Eq_S_B(1, "l")))
{
goto lab1;
}
@@ -1003,7 +1003,7 @@
lab1:
m_cursor = m_limit - v_2;
// literal, line 35
- if (!(eq_s_b(1, "i")))
+ if (!(Eq_S_B(1, "i")))
{
return false;
}
@@ -1035,7 +1035,7 @@
do
{
// literal, line 36
- if (!(eq_s_b(1, "c")))
+ if (!(Eq_S_B(1, "c")))
{
goto lab0;
}
@@ -1084,7 +1084,7 @@
do
{
// literal, line 37
- if (!(eq_s_b(1, "l")))
+ if (!(Eq_S_B(1, "l")))
{
goto lab0;
}
@@ -1099,7 +1099,7 @@
do
{
// literal, line 37
- if (!(eq_s_b(1, "n")))
+ if (!(Eq_S_B(1, "n")))
{
goto lab1;
}
@@ -1135,7 +1135,7 @@
do
{
// literal, line 38
- if (!(eq_s_b(1, "n")))
+ if (!(Eq_S_B(1, "n")))
{
goto lab1;
}
@@ -1144,7 +1144,7 @@
lab1:
m_cursor = m_limit - v_2;
// literal, line 38
- if (!(eq_s_b(1, "r")))
+ if (!(Eq_S_B(1, "r")))
{
return false;
}
@@ -1178,7 +1178,7 @@
do
{
// literal, line 39
- if (!(eq_s_b(2, "dr")))
+ if (!(Eq_S_B(2, "dr")))
{
goto lab1;
}
@@ -1188,7 +1188,7 @@
m_cursor = m_limit - v_2;
// (, line 39
// literal, line 39
- if (!(eq_s_b(1, "t")))
+ if (!(Eq_S_B(1, "t")))
{
return false;
}
@@ -1198,7 +1198,7 @@
do
{
// literal, line 39
- if (!(eq_s_b(1, "t")))
+ if (!(Eq_S_B(1, "t")))
{
goto lab2;
}
@@ -1237,7 +1237,7 @@
do
{
// literal, line 40
- if (!(eq_s_b(1, "s")))
+ if (!(Eq_S_B(1, "s")))
{
goto lab1;
}
@@ -1247,7 +1247,7 @@
m_cursor = m_limit - v_2;
// (, line 40
// literal, line 40
- if (!(eq_s_b(1, "t")))
+ if (!(Eq_S_B(1, "t")))
{
return false;
}
@@ -1257,7 +1257,7 @@
do
{
// literal, line 40
- if (!(eq_s_b(1, "o")))
+ if (!(Eq_S_B(1, "o")))
{
goto lab2;
}
@@ -1295,7 +1295,7 @@
do
{
// literal, line 41
- if (!(eq_s_b(1, "l")))
+ if (!(Eq_S_B(1, "l")))
{
goto lab1;
}
@@ -1306,7 +1306,7 @@
do
{
// literal, line 41
- if (!(eq_s_b(1, "m")))
+ if (!(Eq_S_B(1, "m")))
{
goto lab2;
}
@@ -1317,7 +1317,7 @@
do
{
// literal, line 41
- if (!(eq_s_b(1, "n")))
+ if (!(Eq_S_B(1, "n")))
{
goto lab3;
}
@@ -1326,7 +1326,7 @@
lab3:
m_cursor = m_limit - v_2;
// literal, line 41
- if (!(eq_s_b(1, "r")))
+ if (!(Eq_S_B(1, "r")))
{
return false;
}
@@ -1352,7 +1352,7 @@
}
m_cursor = m_limit - v_1;
// literal, line 42
- if (!(eq_s_b(1, "c")))
+ if (!(Eq_S_B(1, "c")))
{
return false;
}
@@ -1383,7 +1383,7 @@
do
{
// literal, line 43
- if (!(eq_s_b(1, "s")))
+ if (!(Eq_S_B(1, "s")))
{
goto lab0;
}
@@ -1398,7 +1398,7 @@
do
{
// literal, line 43
- if (!(eq_s_b(1, "u")))
+ if (!(Eq_S_B(1, "u")))
{
goto lab1;
}
@@ -1434,7 +1434,7 @@
do
{
// literal, line 44
- if (!(eq_s_b(1, "l")))
+ if (!(Eq_S_B(1, "l")))
{
goto lab1;
}
@@ -1445,7 +1445,7 @@
do
{
// literal, line 44
- if (!(eq_s_b(1, "i")))
+ if (!(Eq_S_B(1, "i")))
{
goto lab2;
}
@@ -1455,7 +1455,7 @@
m_cursor = m_limit - v_2;
// (, line 44
// literal, line 44
- if (!(eq_s_b(1, "e")))
+ if (!(Eq_S_B(1, "e")))
{
return false;
}
@@ -1466,7 +1466,7 @@
}
m_cursor--;
// literal, line 44
- if (!(eq_s_b(1, "u")))
+ if (!(Eq_S_B(1, "u")))
{
return false;
}
@@ -1492,7 +1492,7 @@
}
m_cursor = m_limit - v_1;
// literal, line 45
- if (!(eq_s_b(2, "in")))
+ if (!(Eq_S_B(2, "in")))
{
return false;
}
@@ -1522,7 +1522,7 @@
do
{
// literal, line 46
- if (!(eq_s_b(1, "f")))
+ if (!(Eq_S_B(1, "f")))
{
goto lab0;
}
@@ -1551,7 +1551,7 @@
}
m_cursor = m_limit - v_1;
// among, line 47
- if (find_among_b(a_0, 9) == 0)
+ if (FindAmongB(a_0, 9) == 0)
{
return false;
}
@@ -1582,7 +1582,7 @@
do
{
// literal, line 49
- if (!(eq_s_b(3, "met")))
+ if (!(Eq_S_B(3, "met")))
{
goto lab0;
}
@@ -1597,7 +1597,7 @@
do
{
// literal, line 49
- if (!(eq_s_b(4, "ryst")))
+ if (!(Eq_S_B(4, "ryst")))
{
goto lab1;
}
@@ -1626,7 +1626,7 @@
}
m_cursor = m_limit - v_1;
// literal, line 50
- if (!(eq_s_b(1, "l")))
+ if (!(Eq_S_B(1, "l")))
{
return false;
}
@@ -1640,7 +1640,7 @@
// [, line 56
m_ket = m_cursor;
// substring, line 56
- among_var = find_among_b(a_1, 294);
+ among_var = FindAmongB(a_1, 294);
if (among_var == 0)
{
return false;
@@ -1654,7 +1654,7 @@
case 1:
// (, line 145
// delete, line 145
- slice_del();
+ SliceDel();
break;
}
return true;
@@ -1667,7 +1667,7 @@
// test, line 152
v_1 = m_limit - m_cursor;
// substring, line 152
- if (find_among_b(a_2, 10) == 0)
+ if (FindAmongB(a_2, 10) == 0)
{
return false;
}
@@ -1683,7 +1683,7 @@
// ], line 154
m_bra = m_cursor;
// delete, line 154
- slice_del();
+ SliceDel();
return true;
}
@@ -1702,7 +1702,7 @@
// [, line 160
m_ket = m_cursor;
// substring, line 160
- among_var = find_among_b(a_3, 34);
+ among_var = FindAmongB(a_3, 34);
if (among_var == 0)
{
return false;
@@ -1716,42 +1716,42 @@
case 1:
// (, line 161
// <-, line 161
- slice_from("ief");
+ SliceFrom("ief");
break;
case 2:
// (, line 162
// <-, line 162
- slice_from("uc");
+ SliceFrom("uc");
break;
case 3:
// (, line 163
// <-, line 163
- slice_from("um");
+ SliceFrom("um");
break;
case 4:
// (, line 164
// <-, line 164
- slice_from("rb");
+ SliceFrom("rb");
break;
case 5:
// (, line 165
// <-, line 165
- slice_from("ur");
+ SliceFrom("ur");
break;
case 6:
// (, line 166
// <-, line 166
- slice_from("ister");
+ SliceFrom("ister");
break;
case 7:
// (, line 167
// <-, line 167
- slice_from("meter");
+ SliceFrom("meter");
break;
case 8:
// (, line 168
// <-, line 168
- slice_from("olut");
+ SliceFrom("olut");
break;
case 9:
// (, line 169
@@ -1761,7 +1761,7 @@
do
{
// literal, line 169
- if (!(eq_s_b(1, "a")))
+ if (!(Eq_S_B(1, "a")))
{
goto lab0;
}
@@ -1776,7 +1776,7 @@
do
{
// literal, line 169
- if (!(eq_s_b(1, "i")))
+ if (!(Eq_S_B(1, "i")))
{
goto lab1;
}
@@ -1791,7 +1791,7 @@
do
{
// literal, line 169
- if (!(eq_s_b(1, "o")))
+ if (!(Eq_S_B(1, "o")))
{
goto lab2;
}
@@ -1801,77 +1801,77 @@
m_cursor = m_limit - v_3;
}
// <-, line 169
- slice_from("l");
+ SliceFrom("l");
break;
case 10:
// (, line 170
// <-, line 170
- slice_from("bic");
+ SliceFrom("bic");
break;
case 11:
// (, line 171
// <-, line 171
- slice_from("dic");
+ SliceFrom("dic");
break;
case 12:
// (, line 172
// <-, line 172
- slice_from("pic");
+ SliceFrom("pic");
break;
case 13:
// (, line 173
// <-, line 173
- slice_from("tic");
+ SliceFrom("tic");
break;
case 14:
// (, line 174
// <-, line 174
- slice_from("ac");
+ SliceFrom("ac");
break;
case 15:
// (, line 175
// <-, line 175
- slice_from("ec");
+ SliceFrom("ec");
break;
case 16:
// (, line 176
// <-, line 176
- slice_from("ic");
+ SliceFrom("ic");
break;
case 17:
// (, line 177
// <-, line 177
- slice_from("luc");
+ SliceFrom("luc");
break;
case 18:
// (, line 178
// <-, line 178
- slice_from("uas");
+ SliceFrom("uas");
break;
case 19:
// (, line 179
// <-, line 179
- slice_from("vas");
+ SliceFrom("vas");
break;
case 20:
// (, line 180
// <-, line 180
- slice_from("cis");
+ SliceFrom("cis");
break;
case 21:
// (, line 181
// <-, line 181
- slice_from("lis");
+ SliceFrom("lis");
break;
case 22:
// (, line 182
// <-, line 182
- slice_from("eris");
+ SliceFrom("eris");
break;
case 23:
// (, line 183
// <-, line 183
- slice_from("pans");
+ SliceFrom("pans");
break;
case 24:
// (, line 184
@@ -1881,7 +1881,7 @@
do
{
// literal, line 184
- if (!(eq_s_b(1, "s")))
+ if (!(Eq_S_B(1, "s")))
{
goto lab3;
}
@@ -1891,22 +1891,22 @@
m_cursor = m_limit - v_4;
}
// <-, line 184
- slice_from("ens");
+ SliceFrom("ens");
break;
case 25:
// (, line 185
// <-, line 185
- slice_from("ons");
+ SliceFrom("ons");
break;
case 26:
// (, line 186
// <-, line 186
- slice_from("lus");
+ SliceFrom("lus");
break;
case 27:
// (, line 187
// <-, line 187
- slice_from("rus");
+ SliceFrom("rus");
break;
case 28:
// (, line 188
@@ -1916,7 +1916,7 @@
do
{
// literal, line 188
- if (!(eq_s_b(1, "p")))
+ if (!(Eq_S_B(1, "p")))
{
goto lab4;
}
@@ -1931,7 +1931,7 @@
do
{
// literal, line 188
- if (!(eq_s_b(1, "t")))
+ if (!(Eq_S_B(1, "t")))
{
goto lab5;
}
@@ -1941,12 +1941,12 @@
m_cursor = m_limit - v_6;
}
// <-, line 188
- slice_from("hes");
+ SliceFrom("hes");
break;
case 29:
// (, line 189
// <-, line 189
- slice_from("mis");
+ SliceFrom("mis");
break;
case 30:
// (, line 190
@@ -1956,7 +1956,7 @@
do
{
// literal, line 190
- if (!(eq_s_b(1, "m")))
+ if (!(Eq_S_B(1, "m")))
{
goto lab6;
}
@@ -1966,12 +1966,12 @@
m_cursor = m_limit - v_7;
}
// <-, line 190
- slice_from("ens");
+ SliceFrom("ens");
break;
case 31:
// (, line 192
// <-, line 192
- slice_from("ers");
+ SliceFrom("ers");
break;
case 32:
// (, line 193
@@ -1981,7 +1981,7 @@
do
{
// literal, line 193
- if (!(eq_s_b(1, "n")))
+ if (!(Eq_S_B(1, "n")))
{
goto lab7;
}
@@ -1991,17 +1991,17 @@
m_cursor = m_limit - v_8;
}
// <-, line 193
- slice_from("es");
+ SliceFrom("es");
break;
case 33:
// (, line 194
// <-, line 194
- slice_from("ys");
+ SliceFrom("ys");
break;
case 34:
// (, line 195
// <-, line 195
- slice_from("ys");
+ SliceFrom("ys");
break;
}
return true;
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4abfb341/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/NorwegianStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/NorwegianStemmer.cs b/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/NorwegianStemmer.cs
index 94885bc..41a9137 100644
--- a/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/NorwegianStemmer.cs
+++ b/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/NorwegianStemmer.cs
@@ -70,7 +70,7 @@
{
I_x = other.I_x;
I_p1 = other.I_p1;
- base.copy_from(other);
+ base.CopyFrom(other);
}
private bool r_mark_regions()
@@ -100,7 +100,7 @@
v_2 = m_cursor;
do
{
- if (!(in_grouping(g_v, 97, 248)))
+ if (!(InGrouping(g_v, 97, 248)))
{
goto lab1;
}
@@ -121,7 +121,7 @@
{
do
{
- if (!(out_grouping(g_v, 97, 248)))
+ if (!(OutGrouping(g_v, 97, 248)))
{
goto lab3;
}
@@ -173,7 +173,7 @@
// [, line 38
m_ket = m_cursor;
// substring, line 38
- among_var = find_among_b(a_0, 29);
+ among_var = FindAmongB(a_0, 29);
if (among_var == 0)
{
m_limit_backward = v_2;
@@ -189,7 +189,7 @@
case 1:
// (, line 44
// delete, line 44
- slice_del();
+ SliceDel();
break;
case 2:
// (, line 46
@@ -199,7 +199,7 @@
v_3 = m_limit - m_cursor;
do
{
- if (!(in_grouping_b(g_s_ending, 98, 122)))
+ if (!(InGroupingB(g_s_ending, 98, 122)))
{
goto lab1;
}
@@ -209,23 +209,23 @@
m_cursor = m_limit - v_3;
// (, line 46
// literal, line 46
- if (!(eq_s_b(1, "k")))
+ if (!(Eq_S_B(1, "k")))
{
return false;
}
- if (!(out_grouping_b(g_v, 97, 248)))
+ if (!(OutGroupingB(g_v, 97, 248)))
{
return false;
}
} while (false);
lab0:
// delete, line 46
- slice_del();
+ SliceDel();
break;
case 3:
// (, line 48
// <-, line 48
- slice_from("er");
+ SliceFrom("er");
break;
}
return true;
@@ -255,7 +255,7 @@
// [, line 54
m_ket = m_cursor;
// substring, line 54
- if (find_among_b(a_1, 2) == 0)
+ if (FindAmongB(a_1, 2) == 0)
{
m_limit_backward = v_3;
return false;
@@ -273,7 +273,7 @@
// ], line 59
m_bra = m_cursor;
// delete, line 59
- slice_del();
+ SliceDel();
return true;
}
@@ -298,7 +298,7 @@
// [, line 63
m_ket = m_cursor;
// substring, line 63
- among_var = find_among_b(a_2, 11);
+ among_var = FindAmongB(a_2, 11);
if (among_var == 0)
{
m_limit_backward = v_2;
@@ -314,7 +314,7 @@
case 1:
// (, line 67
// delete, line 67
- slice_del();
+ SliceDel();
break;
}
return true;
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4abfb341/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/PorterStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/PorterStemmer.cs b/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/PorterStemmer.cs
index 4e93896..5b64ec9 100644
--- a/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/PorterStemmer.cs
+++ b/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/PorterStemmer.cs
@@ -105,21 +105,21 @@
B_Y_found = other.B_Y_found;
I_p2 = other.I_p2;
I_p1 = other.I_p1;
- base.copy_from(other);
+ base.CopyFrom(other);
}
private bool r_shortv()
{
// (, line 19
- if (!(out_grouping_b(g_v_WXY, 89, 121)))
+ if (!(OutGroupingB(g_v_WXY, 89, 121)))
{
return false;
}
- if (!(in_grouping_b(g_v, 97, 121)))
+ if (!(InGroupingB(g_v, 97, 121)))
{
return false;
}
- if (!(out_grouping_b(g_v, 97, 121)))
+ if (!(OutGroupingB(g_v, 97, 121)))
{
return false;
}
@@ -151,7 +151,7 @@
// [, line 25
m_ket = m_cursor;
// substring, line 25
- among_var = find_among_b(a_0, 4);
+ among_var = FindAmongB(a_0, 4);
if (among_var == 0)
{
return false;
@@ -165,17 +165,17 @@
case 1:
// (, line 26
// <-, line 26
- slice_from("ss");
+ SliceFrom("ss");
break;
case 2:
// (, line 27
// <-, line 27
- slice_from("i");
+ SliceFrom("i");
break;
case 3:
// (, line 29
// delete, line 29
- slice_del();
+ SliceDel();
break;
}
return true;
@@ -191,7 +191,7 @@
// [, line 34
m_ket = m_cursor;
// substring, line 34
- among_var = find_among_b(a_2, 3);
+ among_var = FindAmongB(a_2, 3);
if (among_var == 0)
{
return false;
@@ -210,7 +210,7 @@
return false;
}
// <-, line 35
- slice_from("ee");
+ SliceFrom("ee");
break;
case 2:
// (, line 37
@@ -221,7 +221,7 @@
{
do
{
- if (!(in_grouping_b(g_v, 97, 121)))
+ if (!(InGroupingB(g_v, 97, 121)))
{
goto lab1;
}
@@ -237,11 +237,11 @@
golab0:
m_cursor = m_limit - v_1;
// delete, line 38
- slice_del();
+ SliceDel();
// test, line 39
v_3 = m_limit - m_cursor;
// substring, line 39
- among_var = find_among_b(a_1, 13);
+ among_var = FindAmongB(a_1, 13);
if (among_var == 0)
{
return false;
@@ -256,7 +256,7 @@
// <+, line 41
{
int c = m_cursor;
- insert(m_cursor, m_cursor, "e");
+ Insert(m_cursor, m_cursor, "e");
m_cursor = c;
}
break;
@@ -273,7 +273,7 @@
// ], line 44
m_bra = m_cursor;
// delete, line 44
- slice_del();
+ SliceDel();
break;
case 3:
// (, line 45
@@ -293,7 +293,7 @@
// <+, line 45
{
int c = m_cursor;
- insert(m_cursor, m_cursor, "e");
+ Insert(m_cursor, m_cursor, "e");
m_cursor = c;
}
break;
@@ -316,7 +316,7 @@
do
{
// literal, line 52
- if (!(eq_s_b(1, "y")))
+ if (!(Eq_S_B(1, "y")))
{
goto lab1;
}
@@ -325,7 +325,7 @@
lab1:
m_cursor = m_limit - v_1;
// literal, line 52
- if (!(eq_s_b(1, "Y")))
+ if (!(Eq_S_B(1, "Y")))
{
return false;
}
@@ -338,7 +338,7 @@
{
do
{
- if (!(in_grouping_b(g_v, 97, 121)))
+ if (!(InGroupingB(g_v, 97, 121)))
{
goto lab3;
}
@@ -353,7 +353,7 @@
}
golab2:
// <-, line 54
- slice_from("i");
+ SliceFrom("i");
return true;
}
@@ -364,7 +364,7 @@
// [, line 58
m_ket = m_cursor;
// substring, line 58
- among_var = find_among_b(a_3, 20);
+ among_var = FindAmongB(a_3, 20);
if (among_var == 0)
{
return false;
@@ -383,72 +383,72 @@
case 1:
// (, line 59
// <-, line 59
- slice_from("tion");
+ SliceFrom("tion");
break;
case 2:
// (, line 60
// <-, line 60
- slice_from("ence");
+ SliceFrom("ence");
break;
case 3:
// (, line 61
// <-, line 61
- slice_from("ance");
+ SliceFrom("ance");
break;
case 4:
// (, line 62
// <-, line 62
- slice_from("able");
+ SliceFrom("able");
break;
case 5:
// (, line 63
// <-, line 63
- slice_from("ent");
+ SliceFrom("ent");
break;
case 6:
// (, line 64
// <-, line 64
- slice_from("e");
+ SliceFrom("e");
break;
case 7:
// (, line 66
// <-, line 66
- slice_from("ize");
+ SliceFrom("ize");
break;
case 8:
// (, line 68
// <-, line 68
- slice_from("ate");
+ SliceFrom("ate");
break;
case 9:
// (, line 69
// <-, line 69
- slice_from("al");
+ SliceFrom("al");
break;
case 10:
// (, line 71
// <-, line 71
- slice_from("al");
+ SliceFrom("al");
break;
case 11:
// (, line 72
// <-, line 72
- slice_from("ful");
+ SliceFrom("ful");
break;
case 12:
// (, line 74
// <-, line 74
- slice_from("ous");
+ SliceFrom("ous");
break;
case 13:
// (, line 76
// <-, line 76
- slice_from("ive");
+ SliceFrom("ive");
break;
case 14:
// (, line 77
// <-, line 77
- slice_from("ble");
+ SliceFrom("ble");
break;
}
return true;
@@ -461,7 +461,7 @@
// [, line 82
m_ket = m_cursor;
// substring, line 82
- among_var = find_among_b(a_4, 7);
+ among_var = FindAmongB(a_4, 7);
if (among_var == 0)
{
return false;
@@ -480,17 +480,17 @@
case 1:
// (, line 83
// <-, line 83
- slice_from("al");
+ SliceFrom("al");
break;
case 2:
// (, line 85
// <-, line 85
- slice_from("ic");
+ SliceFrom("ic");
break;
case 3:
// (, line 87
// delete, line 87
- slice_del();
+ SliceDel();
break;
}
return true;
@@ -504,7 +504,7 @@
// [, line 92
m_ket = m_cursor;
// substring, line 92
- among_var = find_among_b(a_5, 19);
+ among_var = FindAmongB(a_5, 19);
if (among_var == 0)
{
return false;
@@ -523,7 +523,7 @@
case 1:
// (, line 95
// delete, line 95
- slice_del();
+ SliceDel();
break;
case 2:
// (, line 96
@@ -534,7 +534,7 @@
do
{
// literal, line 96
- if (!(eq_s_b(1, "s")))
+ if (!(Eq_S_B(1, "s")))
{
goto lab1;
}
@@ -543,14 +543,14 @@
lab1:
m_cursor = m_limit - v_1;
// literal, line 96
- if (!(eq_s_b(1, "t")))
+ if (!(Eq_S_B(1, "t")))
{
return false;
}
} while (false);
lab0:
// delete, line 96
- slice_del();
+ SliceDel();
break;
}
return true;
@@ -564,7 +564,7 @@
// [, line 101
m_ket = m_cursor;
// literal, line 101
- if (!(eq_s_b(1, "e")))
+ if (!(Eq_S_B(1, "e")))
{
return false;
}
@@ -609,7 +609,7 @@
} while (false);
lab0:
// delete, line 103
- slice_del();
+ SliceDel();
return true;
}
@@ -619,7 +619,7 @@
// [, line 107
m_ket = m_cursor;
// literal, line 107
- if (!(eq_s_b(1, "l")))
+ if (!(Eq_S_B(1, "l")))
{
return false;
}
@@ -631,12 +631,12 @@
return false;
}
// literal, line 108
- if (!(eq_s_b(1, "l")))
+ if (!(Eq_S_B(1, "l")))
{
return false;
}
// delete, line 109
- slice_del();
+ SliceDel();
return true;
}
@@ -670,14 +670,14 @@
// [, line 116
m_bra = m_cursor;
// literal, line 116
- if (!(eq_s(1, "y")))
+ if (!(Eq_S(1, "y")))
{
goto lab0;
}
// ], line 116
m_ket = m_cursor;
// <-, line 116
- slice_from("Y");
+ SliceFrom("Y");
// set Y_found, line 116
B_Y_found = true;
} while (false);
@@ -701,14 +701,14 @@
do
{
// (, line 117
- if (!(in_grouping(g_v, 97, 121)))
+ if (!(InGrouping(g_v, 97, 121)))
{
goto lab5;
}
// [, line 117
m_bra = m_cursor;
// literal, line 117
- if (!(eq_s(1, "y")))
+ if (!(Eq_S(1, "y")))
{
goto lab5;
}
@@ -727,7 +727,7 @@
}
golab4:
// <-, line 117
- slice_from("Y");
+ SliceFrom("Y");
// set Y_found, line 117
B_Y_found = true;
// LUCENENET NOTE: continue label is not supported directly in .NET,
@@ -761,7 +761,7 @@
{
do
{
- if (!(in_grouping(g_v, 97, 121)))
+ if (!(InGrouping(g_v, 97, 121)))
{
goto lab8;
}
@@ -780,7 +780,7 @@
{
do
{
- if (!(out_grouping(g_v, 97, 121)))
+ if (!(OutGrouping(g_v, 97, 121)))
{
goto lab10;
}
@@ -801,7 +801,7 @@
{
do
{
- if (!(in_grouping(g_v, 97, 121)))
+ if (!(InGrouping(g_v, 97, 121)))
{
goto lab12;
}
@@ -820,7 +820,7 @@
{
do
{
- if (!(out_grouping(g_v, 97, 121)))
+ if (!(OutGrouping(g_v, 97, 121)))
{
goto lab14;
}
@@ -965,7 +965,7 @@
// [, line 137
m_bra = m_cursor;
// literal, line 137
- if (!(eq_s(1, "Y")))
+ if (!(Eq_S(1, "Y")))
{
goto lab27;
}
@@ -984,7 +984,7 @@
}
golab26:
// <-, line 137
- slice_from("y");
+ SliceFrom("y");
// LUCENENET NOTE: continue label is not supported directly in .NET,
// so we just need to add another goto to get to the end of the outer loop.
// See: http://stackoverflow.com/a/359449/181087
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4abfb341/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/PortugueseStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/PortugueseStemmer.cs b/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/PortugueseStemmer.cs
index 248b2b2..6d17ad0 100644
--- a/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/PortugueseStemmer.cs
+++ b/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/PortugueseStemmer.cs
@@ -238,7 +238,7 @@
I_p2 = other.I_p2;
I_p1 = other.I_p1;
I_pV = other.I_pV;
- base.copy_from(other);
+ base.CopyFrom(other);
}
private bool r_prelude()
@@ -255,7 +255,7 @@
// [, line 37
m_bra = m_cursor;
// substring, line 37
- among_var = find_among(a_0, 3);
+ among_var = FindAmong(a_0, 3);
if (among_var == 0)
{
goto lab1;
@@ -269,12 +269,12 @@
case 1:
// (, line 38
// <-, line 38
- slice_from("a~");
+ SliceFrom("a~");
break;
case 2:
// (, line 39
// <-, line 39
- slice_from("o~");
+ SliceFrom("o~");
break;
case 3:
// (, line 40
@@ -328,7 +328,7 @@
do
{
// (, line 51
- if (!(in_grouping(g_v, 97, 250)))
+ if (!(InGrouping(g_v, 97, 250)))
{
goto lab2;
}
@@ -339,7 +339,7 @@
do
{
// (, line 51
- if (!(out_grouping(g_v, 97, 250)))
+ if (!(OutGrouping(g_v, 97, 250)))
{
goto lab4;
}
@@ -348,7 +348,7 @@
{
do
{
- if (!(in_grouping(g_v, 97, 250)))
+ if (!(InGrouping(g_v, 97, 250)))
{
goto lab6;
}
@@ -367,7 +367,7 @@
lab4:
m_cursor = v_3;
// (, line 51
- if (!(in_grouping(g_v, 97, 250)))
+ if (!(InGrouping(g_v, 97, 250)))
{
goto lab2;
}
@@ -376,7 +376,7 @@
{
do
{
- if (!(out_grouping(g_v, 97, 250)))
+ if (!(OutGrouping(g_v, 97, 250)))
{
goto lab8;
}
@@ -397,7 +397,7 @@
lab2:
m_cursor = v_2;
// (, line 53
- if (!(out_grouping(g_v, 97, 250)))
+ if (!(OutGrouping(g_v, 97, 250)))
{
goto lab0;
}
@@ -408,7 +408,7 @@
do
{
// (, line 53
- if (!(out_grouping(g_v, 97, 250)))
+ if (!(OutGrouping(g_v, 97, 250)))
{
goto lab10;
}
@@ -417,7 +417,7 @@
{
do
{
- if (!(in_grouping(g_v, 97, 250)))
+ if (!(InGrouping(g_v, 97, 250)))
{
goto lab12;
}
@@ -436,7 +436,7 @@
lab10:
m_cursor = v_6;
// (, line 53
- if (!(in_grouping(g_v, 97, 250)))
+ if (!(InGrouping(g_v, 97, 250)))
{
goto lab0;
}
@@ -465,7 +465,7 @@
{
do
{
- if (!(in_grouping(g_v, 97, 250)))
+ if (!(InGrouping(g_v, 97, 250)))
{
goto lab15;
}
@@ -484,7 +484,7 @@
{
do
{
- if (!(out_grouping(g_v, 97, 250)))
+ if (!(OutGrouping(g_v, 97, 250)))
{
goto lab17;
}
@@ -505,7 +505,7 @@
{
do
{
- if (!(in_grouping(g_v, 97, 250)))
+ if (!(InGrouping(g_v, 97, 250)))
{
goto lab19;
}
@@ -524,7 +524,7 @@
{
do
{
- if (!(out_grouping(g_v, 97, 250)))
+ if (!(OutGrouping(g_v, 97, 250)))
{
goto lab21;
}
@@ -560,7 +560,7 @@
// [, line 63
m_bra = m_cursor;
// substring, line 63
- among_var = find_among(a_1, 3);
+ among_var = FindAmong(a_1, 3);
if (among_var == 0)
{
goto lab1;
@@ -574,12 +574,12 @@
case 1:
// (, line 64
// <-, line 64
- slice_from("\u00E3");
+ SliceFrom("\u00E3");
break;
case 2:
// (, line 65
// <-, line 65
- slice_from("\u00F5");
+ SliceFrom("\u00F5");
break;
case 3:
// (, line 66
@@ -648,7 +648,7 @@
// [, line 77
m_ket = m_cursor;
// substring, line 77
- among_var = find_among_b(a_5, 45);
+ among_var = FindAmongB(a_5, 45);
if (among_var == 0)
{
return false;
@@ -667,7 +667,7 @@
return false;
}
// delete, line 93
- slice_del();
+ SliceDel();
break;
case 2:
// (, line 97
@@ -677,7 +677,7 @@
return false;
}
// <-, line 98
- slice_from("log");
+ SliceFrom("log");
break;
case 3:
// (, line 101
@@ -687,7 +687,7 @@
return false;
}
// <-, line 102
- slice_from("u");
+ SliceFrom("u");
break;
case 4:
// (, line 105
@@ -697,7 +697,7 @@
return false;
}
// <-, line 106
- slice_from("ente");
+ SliceFrom("ente");
break;
case 5:
// (, line 109
@@ -707,7 +707,7 @@
return false;
}
// delete, line 110
- slice_del();
+ SliceDel();
// try, line 111
v_1 = m_limit - m_cursor;
do
@@ -716,7 +716,7 @@
// [, line 112
m_ket = m_cursor;
// substring, line 112
- among_var = find_among_b(a_2, 4);
+ among_var = FindAmongB(a_2, 4);
if (among_var == 0)
{
m_cursor = m_limit - v_1;
@@ -731,7 +731,7 @@
goto lab0;
}
// delete, line 112
- slice_del();
+ SliceDel();
switch (among_var)
{
case 0:
@@ -742,7 +742,7 @@
// [, line 113
m_ket = m_cursor;
// literal, line 113
- if (!(eq_s_b(2, "at")))
+ if (!(Eq_S_B(2, "at")))
{
m_cursor = m_limit - v_1;
goto lab0;
@@ -756,7 +756,7 @@
goto lab0;
}
// delete, line 113
- slice_del();
+ SliceDel();
break;
}
} while (false);
@@ -770,7 +770,7 @@
return false;
}
// delete, line 122
- slice_del();
+ SliceDel();
// try, line 123
v_2 = m_limit - m_cursor;
do
@@ -779,7 +779,7 @@
// [, line 124
m_ket = m_cursor;
// substring, line 124
- among_var = find_among_b(a_3, 3);
+ among_var = FindAmongB(a_3, 3);
if (among_var == 0)
{
m_cursor = m_limit - v_2;
@@ -801,7 +801,7 @@
goto lab1;
}
// delete, line 127
- slice_del();
+ SliceDel();
break;
}
} while (false);
@@ -815,7 +815,7 @@
return false;
}
// delete, line 134
- slice_del();
+ SliceDel();
// try, line 135
v_3 = m_limit - m_cursor;
do
@@ -824,7 +824,7 @@
// [, line 136
m_ket = m_cursor;
// substring, line 136
- among_var = find_among_b(a_4, 3);
+ among_var = FindAmongB(a_4, 3);
if (among_var == 0)
{
m_cursor = m_limit - v_3;
@@ -846,7 +846,7 @@
goto lab2;
}
// delete, line 139
- slice_del();
+ SliceDel();
break;
}
} while (false);
@@ -860,7 +860,7 @@
return false;
}
// delete, line 146
- slice_del();
+ SliceDel();
// try, line 147
v_4 = m_limit - m_cursor;
do
@@ -869,7 +869,7 @@
// [, line 148
m_ket = m_cursor;
// literal, line 148
- if (!(eq_s_b(2, "at")))
+ if (!(Eq_S_B(2, "at")))
{
m_cursor = m_limit - v_4;
goto lab3;
@@ -883,7 +883,7 @@
goto lab3;
}
// delete, line 148
- slice_del();
+ SliceDel();
} while (false);
lab3:
break;
@@ -895,12 +895,12 @@
return false;
}
// literal, line 153
- if (!(eq_s_b(1, "e")))
+ if (!(Eq_S_B(1, "e")))
{
return false;
}
// <-, line 154
- slice_from("ir");
+ SliceFrom("ir");
break;
}
return true;
@@ -926,7 +926,7 @@
// [, line 160
m_ket = m_cursor;
// substring, line 160
- among_var = find_among_b(a_6, 120);
+ among_var = FindAmongB(a_6, 120);
if (among_var == 0)
{
m_limit_backward = v_2;
@@ -942,7 +942,7 @@
case 1:
// (, line 179
// delete, line 179
- slice_del();
+ SliceDel();
break;
}
m_limit_backward = v_2;
@@ -956,7 +956,7 @@
// [, line 184
m_ket = m_cursor;
// substring, line 184
- among_var = find_among_b(a_7, 7);
+ among_var = FindAmongB(a_7, 7);
if (among_var == 0)
{
return false;
@@ -975,7 +975,7 @@
return false;
}
// delete, line 187
- slice_del();
+ SliceDel();
break;
}
return true;
@@ -991,7 +991,7 @@
// [, line 192
m_ket = m_cursor;
// substring, line 192
- among_var = find_among_b(a_8, 4);
+ among_var = FindAmongB(a_8, 4);
if (among_var == 0)
{
return false;
@@ -1010,7 +1010,7 @@
return false;
}
// delete, line 194
- slice_del();
+ SliceDel();
// [, line 194
m_ket = m_cursor;
// or, line 194
@@ -1021,7 +1021,7 @@
{
// (, line 194
// literal, line 194
- if (!(eq_s_b(1, "u")))
+ if (!(Eq_S_B(1, "u")))
{
goto lab1;
}
@@ -1030,7 +1030,7 @@
// test, line 194
v_2 = m_limit - m_cursor;
// literal, line 194
- if (!(eq_s_b(1, "g")))
+ if (!(Eq_S_B(1, "g")))
{
goto lab1;
}
@@ -1041,7 +1041,7 @@
m_cursor = m_limit - v_1;
// (, line 195
// literal, line 195
- if (!(eq_s_b(1, "i")))
+ if (!(Eq_S_B(1, "i")))
{
return false;
}
@@ -1050,7 +1050,7 @@
// test, line 195
v_3 = m_limit - m_cursor;
// literal, line 195
- if (!(eq_s_b(1, "c")))
+ if (!(Eq_S_B(1, "c")))
{
return false;
}
@@ -1063,12 +1063,12 @@
return false;
}
// delete, line 195
- slice_del();
+ SliceDel();
break;
case 2:
// (, line 196
// <-, line 196
- slice_from("c");
+ SliceFrom("c");
break;
}
return true;
@@ -1161,7 +1161,7 @@
// [, line 207
m_ket = m_cursor;
// literal, line 207
- if (!(eq_s_b(1, "i")))
+ if (!(Eq_S_B(1, "i")))
{
goto lab7;
}
@@ -1170,7 +1170,7 @@
// test, line 207
v_8 = m_limit - m_cursor;
// literal, line 207
- if (!(eq_s_b(1, "c")))
+ if (!(Eq_S_B(1, "c")))
{
goto lab7;
}
@@ -1181,7 +1181,7 @@
goto lab7;
}
// delete, line 207
- slice_del();
+ SliceDel();
} while (false);
lab7:
m_cursor = m_limit - v_7;
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4abfb341/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/RomanianStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/RomanianStemmer.cs b/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/RomanianStemmer.cs
index b04441b..de03a61 100644
--- a/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/RomanianStemmer.cs
+++ b/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/RomanianStemmer.cs
@@ -265,7 +265,7 @@
I_p2 = other.I_p2;
I_p1 = other.I_p1;
I_pV = other.I_pV;
- base.copy_from(other);
+ base.CopyFrom(other);
}
private bool r_prelude()
@@ -287,7 +287,7 @@
do
{
// (, line 32
- if (!(in_grouping(g_v, 97, 259)))
+ if (!(InGrouping(g_v, 97, 259)))
{
goto lab3;
}
@@ -301,36 +301,36 @@
{
// (, line 33
// literal, line 33
- if (!(eq_s(1, "u")))
+ if (!(Eq_S(1, "u")))
{
goto lab5;
}
// ], line 33
m_ket = m_cursor;
- if (!(in_grouping(g_v, 97, 259)))
+ if (!(InGrouping(g_v, 97, 259)))
{
goto lab5;
}
// <-, line 33
- slice_from("U");
+ SliceFrom("U");
goto lab4;
} while (false);
lab5:
m_cursor = v_3;
// (, line 34
// literal, line 34
- if (!(eq_s(1, "i")))
+ if (!(Eq_S(1, "i")))
{
goto lab3;
}
// ], line 34
m_ket = m_cursor;
- if (!(in_grouping(g_v, 97, 259)))
+ if (!(InGrouping(g_v, 97, 259)))
{
goto lab3;
}
// <-, line 34
- slice_from("I");
+ SliceFrom("I");
} while (false);
lab4:
m_cursor = v_2;
@@ -387,7 +387,7 @@
do
{
// (, line 45
- if (!(in_grouping(g_v, 97, 259)))
+ if (!(InGrouping(g_v, 97, 259)))
{
goto lab2;
}
@@ -398,7 +398,7 @@
do
{
// (, line 45
- if (!(out_grouping(g_v, 97, 259)))
+ if (!(OutGrouping(g_v, 97, 259)))
{
goto lab4;
}
@@ -407,7 +407,7 @@
{
do
{
- if (!(in_grouping(g_v, 97, 259)))
+ if (!(InGrouping(g_v, 97, 259)))
{
goto lab6;
}
@@ -426,7 +426,7 @@
lab4:
m_cursor = v_3;
// (, line 45
- if (!(in_grouping(g_v, 97, 259)))
+ if (!(InGrouping(g_v, 97, 259)))
{
goto lab2;
}
@@ -435,7 +435,7 @@
{
do
{
- if (!(out_grouping(g_v, 97, 259)))
+ if (!(OutGrouping(g_v, 97, 259)))
{
goto lab8;
}
@@ -456,7 +456,7 @@
lab2:
m_cursor = v_2;
// (, line 47
- if (!(out_grouping(g_v, 97, 259)))
+ if (!(OutGrouping(g_v, 97, 259)))
{
goto lab0;
}
@@ -467,7 +467,7 @@
do
{
// (, line 47
- if (!(out_grouping(g_v, 97, 259)))
+ if (!(OutGrouping(g_v, 97, 259)))
{
goto lab10;
}
@@ -476,7 +476,7 @@
{
do
{
- if (!(in_grouping(g_v, 97, 259)))
+ if (!(InGrouping(g_v, 97, 259)))
{
goto lab12;
}
@@ -495,7 +495,7 @@
lab10:
m_cursor = v_6;
// (, line 47
- if (!(in_grouping(g_v, 97, 259)))
+ if (!(InGrouping(g_v, 97, 259)))
{
goto lab0;
}
@@ -524,7 +524,7 @@
{
do
{
- if (!(in_grouping(g_v, 97, 259)))
+ if (!(InGrouping(g_v, 97, 259)))
{
goto lab15;
}
@@ -543,7 +543,7 @@
{
do
{
- if (!(out_grouping(g_v, 97, 259)))
+ if (!(OutGrouping(g_v, 97, 259)))
{
goto lab17;
}
@@ -564,7 +564,7 @@
{
do
{
- if (!(in_grouping(g_v, 97, 259)))
+ if (!(InGrouping(g_v, 97, 259)))
{
goto lab19;
}
@@ -583,7 +583,7 @@
{
do
{
- if (!(out_grouping(g_v, 97, 259)))
+ if (!(OutGrouping(g_v, 97, 259)))
{
goto lab21;
}
@@ -619,7 +619,7 @@
// [, line 58
m_bra = m_cursor;
// substring, line 58
- among_var = find_among(a_0, 3);
+ among_var = FindAmong(a_0, 3);
if (among_var == 0)
{
goto lab1;
@@ -633,12 +633,12 @@
case 1:
// (, line 59
// <-, line 59
- slice_from("i");
+ SliceFrom("i");
break;
case 2:
// (, line 60
// <-, line 60
- slice_from("u");
+ SliceFrom("u");
break;
case 3:
// (, line 61
@@ -704,7 +704,7 @@
// [, line 73
m_ket = m_cursor;
// substring, line 73
- among_var = find_among_b(a_1, 16);
+ among_var = FindAmongB(a_1, 16);
if (among_var == 0)
{
return false;
@@ -723,22 +723,22 @@
case 1:
// (, line 75
// delete, line 75
- slice_del();
+ SliceDel();
break;
case 2:
// (, line 77
// <-, line 77
- slice_from("a");
+ SliceFrom("a");
break;
case 3:
// (, line 79
// <-, line 79
- slice_from("e");
+ SliceFrom("e");
break;
case 4:
// (, line 81
// <-, line 81
- slice_from("i");
+ SliceFrom("i");
break;
case 5:
// (, line 83
@@ -748,7 +748,7 @@
do
{
// literal, line 83
- if (!(eq_s_b(2, "ab")))
+ if (!(Eq_S_B(2, "ab")))
{
goto lab0;
}
@@ -758,17 +758,17 @@
m_cursor = m_limit - v_1;
}
// <-, line 83
- slice_from("i");
+ SliceFrom("i");
break;
case 6:
// (, line 85
// <-, line 85
- slice_from("at");
+ SliceFrom("at");
break;
case 7:
// (, line 87
// <-, line 87
- slice_from("a\u0163i");
+ SliceFrom("a\u0163i");
break;
}
return true;
@@ -784,7 +784,7 @@
// [, line 92
m_ket = m_cursor;
// substring, line 92
- among_var = find_among_b(a_2, 46);
+ among_var = FindAmongB(a_2, 46);
if (among_var == 0)
{
return false;
@@ -804,32 +804,32 @@
case 1:
// (, line 100
// <-, line 101
- slice_from("abil");
+ SliceFrom("abil");
break;
case 2:
// (, line 103
// <-, line 104
- slice_from("ibil");
+ SliceFrom("ibil");
break;
case 3:
// (, line 106
// <-, line 107
- slice_from("iv");
+ SliceFrom("iv");
break;
case 4:
// (, line 112
// <-, line 113
- slice_from("ic");
+ SliceFrom("ic");
break;
case 5:
// (, line 117
// <-, line 118
- slice_from("at");
+ SliceFrom("at");
break;
case 6:
// (, line 121
// <-, line 122
- slice_from("it");
+ SliceFrom("it");
break;
}
// set standard_suffix_removed, line 125
@@ -875,7 +875,7 @@
// [, line 132
m_ket = m_cursor;
// substring, line 132
- among_var = find_among_b(a_3, 62);
+ among_var = FindAmongB(a_3, 62);
if (among_var == 0)
{
return false;
@@ -895,24 +895,24 @@
case 1:
// (, line 148
// delete, line 149
- slice_del();
+ SliceDel();
break;
case 2:
// (, line 151
// literal, line 152
- if (!(eq_s_b(1, "\u0163")))
+ if (!(Eq_S_B(1, "\u0163")))
{
return false;
}
// ], line 152
m_bra = m_cursor;
// <-, line 152
- slice_from("t");
+ SliceFrom("t");
break;
case 3:
// (, line 155
// <-, line 156
- slice_from("ist");
+ SliceFrom("ist");
break;
}
// set standard_suffix_removed, line 160
@@ -941,7 +941,7 @@
// [, line 165
m_ket = m_cursor;
// substring, line 165
- among_var = find_among_b(a_4, 94);
+ among_var = FindAmongB(a_4, 94);
if (among_var == 0)
{
m_limit_backward = v_2;
@@ -962,7 +962,7 @@
v_3 = m_limit - m_cursor;
do
{
- if (!(out_grouping_b(g_v, 97, 259)))
+ if (!(OutGroupingB(g_v, 97, 259)))
{
goto lab1;
}
@@ -971,7 +971,7 @@
lab1:
m_cursor = m_limit - v_3;
// literal, line 200
- if (!(eq_s_b(1, "u")))
+ if (!(Eq_S_B(1, "u")))
{
m_limit_backward = v_2;
return false;
@@ -979,12 +979,12 @@
} while (false);
lab0:
// delete, line 200
- slice_del();
+ SliceDel();
break;
case 2:
// (, line 214
// delete, line 214
- slice_del();
+ SliceDel();
break;
}
m_limit_backward = v_2;
@@ -998,7 +998,7 @@
// [, line 219
m_ket = m_cursor;
// substring, line 219
- among_var = find_among_b(a_5, 5);
+ among_var = FindAmongB(a_5, 5);
if (among_var == 0)
{
return false;
@@ -1017,7 +1017,7 @@
case 1:
// (, line 220
// delete, line 220
- slice_del();
+ SliceDel();
break;
}
return true;
[25/39] lucenenet git commit:
Lucene.Net.Analysis.Miscellaneous.WordDelimiterIterator refactor: SingleWord
> IsSingleWord() (performs conversion)
Posted by ni...@apache.org.
Lucene.Net.Analysis.Miscellaneous.WordDelimiterIterator refactor: SingleWord > IsSingleWord() (performs conversion)
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/3031be6c
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/3031be6c
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/3031be6c
Branch: refs/heads/api-work
Commit: 3031be6c1abfa6f1321eeb23e436ecd11e98dd11
Parents: de106d9
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Sun Feb 5 01:01:01 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Sun Feb 5 01:01:01 2017 +0700
----------------------------------------------------------------------
.../Miscellaneous/Lucene47WordDelimiterFilter.cs | 2 +-
.../Miscellaneous/WordDelimiterFilter.cs | 2 +-
.../Miscellaneous/WordDelimiterIterator.cs | 19 ++++++++-----------
3 files changed, 10 insertions(+), 13 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/3031be6c/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/Lucene47WordDelimiterFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/Lucene47WordDelimiterFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/Lucene47WordDelimiterFilter.cs
index 070ad85..7db0cd5 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/Lucene47WordDelimiterFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/Lucene47WordDelimiterFilter.cs
@@ -256,7 +256,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
}
// word surrounded by delimiters: always output
- if (iterator.SingleWord)
+ if (iterator.IsSingleWord())
{
GeneratePart(true);
iterator.Next();
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/3031be6c/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterFilter.cs
index c3cc5a5..298d3db 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterFilter.cs
@@ -338,7 +338,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
}
// word surrounded by delimiters: always output
- if (iterator.SingleWord)
+ if (iterator.IsSingleWord())
{
GeneratePart(true);
iterator.Next();
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/3031be6c/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterIterator.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterIterator.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterIterator.cs
index ee19be7..cc7ff94 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterIterator.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterIterator.cs
@@ -174,7 +174,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// This currently uses the type of the first character in the subword.
/// </summary>
/// <returns> type of the current word </returns>
- internal int Type // LUCENENET TODO: Change to GetType()
+ internal int Type
{
get
{
@@ -248,18 +248,15 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// Determines if the current word contains only one subword. Note, it could be potentially surrounded by delimiters
/// </summary>
/// <returns> <c>true</c> if the current word contains only one subword, <c>false</c> otherwise </returns>
- internal bool SingleWord // LUCENENET TODO: Change to IsSingleWord()
+ internal bool IsSingleWord()
{
- get
+ if (hasFinalPossessive)
{
- if (hasFinalPossessive)
- {
- return current == startBounds && end == endBounds - 2;
- }
- else
- {
- return current == startBounds && end == endBounds;
- }
+ return current == startBounds && end == endBounds - 2;
+ }
+ else
+ {
+ return current == startBounds && end == endBounds;
}
}
[09/39] lucenenet git commit: Lucene.Net.Analysis.Th refactor: member
accessibility and documentation comments
Posted by ni...@apache.org.
Lucene.Net.Analysis.Th refactor: member accessibility and documentation comments
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/69bd8518
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/69bd8518
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/69bd8518
Branch: refs/heads/api-work
Commit: 69bd85188bbdeaa2dbbaef5b0dd0161ef2ca0e72
Parents: 0f3d7fb
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Sat Feb 4 14:19:21 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Sat Feb 4 14:19:21 2017 +0700
----------------------------------------------------------------------
.../Analysis/Th/ThaiAnalyzer.cs | 22 +++++++----------
.../Analysis/Th/ThaiTokenizer.cs | 25 +++++++++++---------
.../Analysis/Th/ThaiTokenizerFactory.cs | 6 ++---
.../Analysis/Th/ThaiWordFilter.cs | 17 +++++++------
.../Analysis/Th/ThaiWordFilterFactory.cs | 6 ++---
5 files changed, 39 insertions(+), 37 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/69bd8518/src/Lucene.Net.Analysis.Common/Analysis/Th/ThaiAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Th/ThaiAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Th/ThaiAnalyzer.cs
index f143f90..440e4b1 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Th/ThaiAnalyzer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Th/ThaiAnalyzer.cs
@@ -25,20 +25,16 @@ namespace Lucene.Net.Analysis.Th
*/
/// <summary>
- /// <see cref="Analyzer"/> for Thai language. It uses <see cref="java.text.BreakIterator"/> to break words.
- /// <para>
- /// <a name="version"/>
- /// </para>
+ /// <see cref="Analyzer"/> for Thai language. It uses <see cref="Support.BreakIterator"/> to break words.
/// <para>You must specify the required <see cref="LuceneVersion"/>
- /// compatibility when creating ThaiAnalyzer:
- /// <ul>
- /// <li> As of 3.6, a set of Thai stopwords is used by default
- /// </ul>
+ /// compatibility when creating <see cref="ThaiAnalyzer"/>:
+ /// <list type="bullet">
+ /// <item> As of 3.6, a set of Thai stopwords is used by default</item>
+ /// </list>
/// </para>
/// </summary>
public sealed class ThaiAnalyzer : StopwordAnalyzerBase
{
-
/// <summary>
/// File containing default Thai stopwords. </summary>
public const string DEFAULT_STOPWORD_FILE = "stopwords.txt";
@@ -60,7 +56,7 @@ namespace Lucene.Net.Analysis.Th
}
/// <summary>
- /// Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class
+ /// Atomically loads the <see cref="DEFAULT_STOP_SET"/> in a lazy fashion once the outer class
/// accesses the static final set the first time.;
/// </summary>
private class DefaultSetHolder
@@ -88,7 +84,7 @@ namespace Lucene.Net.Analysis.Th
/// </summary>
/// <param name="matchVersion"> lucene compatibility version </param>
public ThaiAnalyzer(LuceneVersion matchVersion)
- : this(matchVersion,
+ : this(matchVersion,
#pragma warning disable 612, 618
matchVersion.OnOrAfter(LuceneVersion.LUCENE_36) ?
#pragma warning restore 612, 618
@@ -102,14 +98,14 @@ namespace Lucene.Net.Analysis.Th
/// <param name="matchVersion"> lucene compatibility version </param>
/// <param name="stopwords"> a stopword set </param>
public ThaiAnalyzer(LuceneVersion matchVersion, CharArraySet stopwords)
- : base(matchVersion, stopwords)
+ : base(matchVersion, stopwords)
{
}
/// <summary>
/// Creates
/// <see cref="Analyzer.TokenStreamComponents"/>
- /// used to tokenize all the text in the provided <see cref="Reader"/>.
+ /// used to tokenize all the text in the provided <see cref="TextReader"/>.
/// </summary>
/// <returns> <see cref="Analyzer.TokenStreamComponents"/>
/// built from a <see cref="StandardTokenizer"/> filtered with
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/69bd8518/src/Lucene.Net.Analysis.Common/Analysis/Th/ThaiTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Th/ThaiTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Th/ThaiTokenizer.cs
index 21287f2..2608fba 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Th/ThaiTokenizer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Th/ThaiTokenizer.cs
@@ -27,14 +27,17 @@ namespace Lucene.Net.Analysis.Th
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+ // LUCENENET NOTE: Removing this notice from the doc comment because it is not relevant for our purposes.
+
+ // <para>WARNING: this tokenizer may not be supported by all JREs.
+ // It is known to work with Sun/Oracle and Harmony JREs.
+ // If your application needs to be fully portable, consider using ICUTokenizer instead,
+ // which uses an ICU Thai BreakIterator that will always be available.
+ // </para>
/// <summary>
/// Tokenizer that use <see cref="BreakIterator"/> to tokenize Thai text.
- /// <para>WARNING: this tokenizer may not be supported by all JREs.
- /// It is known to work with Sun/Oracle and Harmony JREs.
- /// If your application needs to be fully portable, consider using ICUTokenizer instead,
- /// which uses an ICU Thai BreakIterator that will always be available.
- /// </para>
/// </summary>
public class ThaiTokenizer : SegmentingTokenizerBase
{
@@ -54,23 +57,23 @@ namespace Lucene.Net.Analysis.Th
private readonly ThaiWordBreaker wordBreaker;
private readonly CharArrayIterator wrapper = CharArrayIterator.NewWordInstance();
- internal int sentenceStart;
- internal int sentenceEnd;
+ private int sentenceStart;
+ private int sentenceEnd;
private readonly ICharTermAttribute termAtt;
private readonly IOffsetAttribute offsetAtt;
/// <summary>
- /// Creates a new ThaiTokenizer </summary>
+ /// Creates a new <see cref="ThaiTokenizer"/> </summary>
public ThaiTokenizer(TextReader reader)
- : this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, reader)
+ : this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, reader)
{
}
/// <summary>
- /// Creates a new ThaiTokenizer, supplying the AttributeFactory </summary>
+ /// Creates a new <see cref="ThaiTokenizer"/>, supplying the <see cref="Lucene.Net.Util.AttributeSource.AttributeFactory"/> </summary>
public ThaiTokenizer(AttributeFactory factory, TextReader reader)
- : base(factory, reader, new IcuBreakIterator(Icu.BreakIterator.UBreakIteratorType.SENTENCE, new CultureInfo("th")))
+ : base(factory, reader, new IcuBreakIterator(Icu.BreakIterator.UBreakIteratorType.SENTENCE, new CultureInfo("th")))
{
if (!DBBI_AVAILABLE)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/69bd8518/src/Lucene.Net.Analysis.Common/Analysis/Th/ThaiTokenizerFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Th/ThaiTokenizerFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Th/ThaiTokenizerFactory.cs
index 67a1388..99a9839 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Th/ThaiTokenizerFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Th/ThaiTokenizerFactory.cs
@@ -33,10 +33,10 @@ namespace Lucene.Net.Analysis.Th
/// </summary>
public class ThaiTokenizerFactory : TokenizerFactory
{
-
/// <summary>
- /// Creates a new ThaiTokenizerFactory </summary>
- public ThaiTokenizerFactory(IDictionary<string, string> args) : base(args)
+ /// Creates a new <see cref="ThaiTokenizerFactory"/> </summary>
+ public ThaiTokenizerFactory(IDictionary<string, string> args)
+ : base(args)
{
if (args.Count > 0)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/69bd8518/src/Lucene.Net.Analysis.Common/Analysis/Th/ThaiWordFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Th/ThaiWordFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Th/ThaiWordFilter.cs
index 8387639..3da11a2 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Th/ThaiWordFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Th/ThaiWordFilter.cs
@@ -27,19 +27,22 @@ namespace Lucene.Net.Analysis.Th
* limitations under the License.
*/
+ // LUCENENET NOTE: Removing this notice from the doc comment because it is not relevant for our purposes.
+
+ //<para>WARNING: this filter may not be supported by all JREs.
+ // It is known to work with Sun/Oracle and Harmony JREs.
+ // If your application needs to be fully portable, consider using ICUTokenizer instead,
+ // which uses an ICU Thai BreakIterator that will always be available.
+ // </para>
+
/// <summary>
- /// <see cref="TokenFilter"/> that use <see cref="java.text.BreakIterator"/> to break each
+ /// <see cref="TokenFilter"/> that use <see cref="Support.BreakIterator"/> to break each
/// Token that is Thai into separate Token(s) for each Thai word.
/// <para>Please note: Since matchVersion 3.1 on, this filter no longer lowercases non-thai text.
/// <see cref="ThaiAnalyzer"/> will insert a <see cref="LowerCaseFilter"/> before this filter
/// so the behaviour of the Analyzer does not change. With version 3.1, the filter handles
/// position increments correctly.
/// </para>
- /// <para>WARNING: this filter may not be supported by all JREs.
- /// It is known to work with Sun/Oracle and Harmony JREs.
- /// If your application needs to be fully portable, consider using ICUTokenizer instead,
- /// which uses an ICU Thai BreakIterator that will always be available.
- /// </para>
/// </summary>
/// @deprecated Use <see cref="ThaiTokenizer"/> instead.
[Obsolete("Use ThaiTokenizer instead.")]
@@ -66,7 +69,7 @@ namespace Lucene.Net.Analysis.Th
private bool hasIllegalOffsets = false; // only if the length changed before this filter
/// <summary>
- /// Creates a new ThaiWordFilter with the specified match version. </summary>
+ /// Creates a new <see cref="ThaiWordFilter"/> with the specified match version. </summary>
public ThaiWordFilter(LuceneVersion matchVersion, TokenStream input)
: base(matchVersion.OnOrAfter(LuceneVersion.LUCENE_31) ? input : new LowerCaseFilter(matchVersion, input))
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/69bd8518/src/Lucene.Net.Analysis.Common/Analysis/Th/ThaiWordFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Th/ThaiWordFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Th/ThaiWordFilterFactory.cs
index 6b289f9..d0300be 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Th/ThaiWordFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Th/ThaiWordFilterFactory.cs
@@ -29,14 +29,14 @@ namespace Lucene.Net.Analysis.Th
/// <tokenizer class="solr.StandardTokenizerFactory"/>
/// <filter class="solr.ThaiWordFilterFactory"/>
/// </analyzer>
- /// </fieldType></code> </summary>
+ /// </fieldType></code>
+ /// </summary>
/// @deprecated Use <see cref="ThaiTokenizerFactory"/> instead
[Obsolete("Use ThaiTokenizerFactory instead")]
public class ThaiWordFilterFactory : TokenFilterFactory
{
-
/// <summary>
- /// Creates a new ThaiWordFilterFactory </summary>
+ /// Creates a new <see cref="ThaiWordFilterFactory"/> </summary>
public ThaiWordFilterFactory(IDictionary<string, string> args) : base(args)
{
AssureMatchVersion();
[08/39] lucenenet git commit: Lucene.Net.Analysis.Synonym refactor:
member accessibility and documentation comments
Posted by ni...@apache.org.
Lucene.Net.Analysis.Synonym refactor: member accessibility and documentation comments
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/0f3d7fb6
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/0f3d7fb6
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/0f3d7fb6
Branch: refs/heads/api-work
Commit: 0f3d7fb6f320dae5dbd0021eed9406ab1129890c
Parents: b158f91
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Sat Feb 4 14:02:10 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Sat Feb 4 14:02:10 2017 +0700
----------------------------------------------------------------------
.../Analysis/Synonym/FSTSynonymFilterFactory.cs | 6 +--
.../Analysis/Synonym/SlowSynonymFilter.cs | 16 +++---
.../Synonym/SlowSynonymFilterFactory.cs | 34 ++++++-------
.../Analysis/Synonym/SlowSynonymMap.cs | 24 ++++-----
.../Analysis/Synonym/SolrSynonymParser.cs | 51 +++++++++++---------
.../Analysis/Synonym/SynonymFilter.cs | 26 +++++-----
.../Analysis/Synonym/SynonymFilterFactory.cs | 34 ++++++-------
.../Analysis/Synonym/SynonymMap.cs | 15 +++---
.../Analysis/Synonym/WordnetSynonymParser.cs | 7 +--
9 files changed, 103 insertions(+), 110 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f3d7fb6/src/Lucene.Net.Analysis.Common/Analysis/Synonym/FSTSynonymFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Synonym/FSTSynonymFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Synonym/FSTSynonymFilterFactory.cs
index 2b09a51..167e17c 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Synonym/FSTSynonymFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Synonym/FSTSynonymFilterFactory.cs
@@ -3,9 +3,7 @@ using Lucene.Net.Analysis.Util;
using Lucene.Net.Util;
using System;
using System.Collections.Generic;
-using System.Globalization;
using System.IO;
-using System.Reflection;
using System.Text;
using System.Text.RegularExpressions;
@@ -29,7 +27,7 @@ namespace Lucene.Net.Analysis.Synonym
*/
internal sealed class FSTSynonymFilterFactory : TokenFilterFactory, IResourceLoaderAware
{
- internal readonly bool ignoreCase;
+ private readonly bool ignoreCase;
private readonly string tokenizerFactory;
private readonly string synonyms;
private readonly string format;
@@ -38,7 +36,7 @@ namespace Lucene.Net.Analysis.Synonym
private SynonymMap map;
- [Obsolete(@"(3.4) use <see cref=""SynonymFilterFactory"" instead. this is only a backwards compatibility")]
+ [Obsolete(@"(3.4) use SynonymFilterFactory instead. this is only a backwards compatibility")]
public FSTSynonymFilterFactory(IDictionary<string, string> args)
: base(args)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f3d7fb6/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SlowSynonymFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SlowSynonymFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SlowSynonymFilter.cs
index b9b7eb0..ebdf488 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SlowSynonymFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SlowSynonymFilter.cs
@@ -1,7 +1,7 @@
-\ufeffusing System;
-using System.Collections.Generic;
-using Lucene.Net.Analysis.TokenAttributes;
+\ufeffusing Lucene.Net.Analysis.TokenAttributes;
using Lucene.Net.Util;
+using System;
+using System.Collections.Generic;
namespace Lucene.Net.Analysis.Synonym
{
@@ -23,7 +23,7 @@ namespace Lucene.Net.Analysis.Synonym
*/
/// <summary>
- /// SynonymFilter handles multi-token synonyms with variable position increment offsets.
+ /// <see cref="SynonymFilter"/> handles multi-token synonyms with variable position increment offsets.
/// <para>
/// The matched tokens from the input stream may be optionally passed through (includeOrig=true)
/// or discarded. If the original tokens are included, the position increments may be modified
@@ -33,15 +33,15 @@ namespace Lucene.Net.Analysis.Synonym
/// Generated synonyms will start at the same position as the first matched source token.
/// </para>
/// </summary>
- /// @deprecated (3.4) use <see cref="SynonymFilterFactory"/> instead. only for precise index backwards compatibility. this factory will be removed in Lucene 5.0
- [Obsolete("(3.4) use <seealso cref=\"SynonymFilterFactory\"/> instead. only for precise index backwards compatibility. this factory will be removed in Lucene 5.0")]
+ /// @deprecated (3.4) use SynonymFilterFactory instead. only for precise index backwards compatibility. this factory will be removed in Lucene 5.0
+ [Obsolete("(3.4) use SynonymFilterFactory instead. only for precise index backwards compatibility. this factory will be removed in Lucene 5.0")]
internal sealed class SlowSynonymFilter : TokenFilter
{
-
private readonly SlowSynonymMap map; // Map<String, SynonymMap>
private IEnumerator<AttributeSource> replacement; // iterator over generated tokens
- public SlowSynonymFilter(TokenStream @in, SlowSynonymMap map) : base(@in)
+ public SlowSynonymFilter(TokenStream @in, SlowSynonymMap map)
+ : base(@in)
{
if (map == null)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f3d7fb6/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SlowSynonymFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SlowSynonymFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SlowSynonymFilterFactory.cs
index 7d51320..a089173 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SlowSynonymFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SlowSynonymFilterFactory.cs
@@ -1,14 +1,10 @@
-\ufeffusing System;
-using System.Collections;
+\ufeffusing Lucene.Net.Analysis.TokenAttributes;
+using Lucene.Net.Analysis.Util;
+using System;
using System.Collections.Generic;
-using System.Linq;
using System.IO;
using System.Text;
using System.Text.RegularExpressions;
-using Lucene.Net.Analysis.Util;
-using Lucene.Net.Analysis.TokenAttributes;
-using System.Reflection;
-using System.Globalization;
namespace Lucene.Net.Analysis.Synonym
{
@@ -30,17 +26,18 @@ namespace Lucene.Net.Analysis.Synonym
*/
/// <summary>
- /// Factory for <see cref="SlowSynonymFilter"/> (only used with luceneMatchVersion < 3.4)
- /// <pre class="prettyprint" >
+ /// Factory for <see cref="SlowSynonymFilter"/> (only used with luceneMatchVersion < 3.4)
+ /// <code>
/// <fieldType name="text_synonym" class="solr.TextField" positionIncrementGap="100">
/// <analyzer>
/// <tokenizer class="solr.WhitespaceTokenizerFactory"/>
/// <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="false"
/// expand="true" tokenizerFactory="solr.WhitespaceTokenizerFactory"/>
/// </analyzer>
- /// </fieldType></code> </summary>
- /// @deprecated (3.4) use <see cref="SynonymFilterFactory"/> instead. only for precise index backwards compatibility. this factory will be removed in Lucene 5.0
- [Obsolete("(3.4) use <seealso cref=\"SynonymFilterFactory\"/> instead. only for precise index backwards compatibility. this factory will be removed in Lucene 5.0")]
+ /// </fieldType></code>
+ /// </summary>
+ /// @deprecated (3.4) use SynonymFilterFactory instead. only for precise index backwards compatibility. this factory will be removed in Lucene 5.0
+ [Obsolete("(3.4) use SynonymFilterFactory instead. only for precise index backwards compatibility. this factory will be removed in Lucene 5.0")]
internal sealed class SlowSynonymFilterFactory : TokenFilterFactory, IResourceLoaderAware
{
private readonly string synonyms;
@@ -49,7 +46,8 @@ namespace Lucene.Net.Analysis.Synonym
private readonly string tf;
private readonly IDictionary<string, string> tokArgs = new Dictionary<string, string>();
- public SlowSynonymFilterFactory(IDictionary<string, string> args) : base(args)
+ public SlowSynonymFilterFactory(IDictionary<string, string> args)
+ : base(args)
{
synonyms = Require(args, "synonyms");
ignoreCase = GetBoolean(args, "ignoreCase", false);
@@ -89,7 +87,7 @@ namespace Lucene.Net.Analysis.Synonym
}
/// <returns> a list of all rules </returns>
- internal IEnumerable<string> LoadRules(string synonyms, IResourceLoader loader)
+ private IEnumerable<string> LoadRules(string synonyms, IResourceLoader loader)
{
List<string> wlist = null;
if (File.Exists(synonyms))
@@ -303,12 +301,10 @@ namespace Lucene.Net.Analysis.Synonym
/// <summary>
/// Splits a backslash escaped string on the separator.
- /// <para>
+ /// <para/>
/// Current backslash escaping supported:
- /// <br> \n \t \r \b \f are escaped the same as a Java String
- /// <br> Other characters following a backslash are produced verbatim (\c => c)
- ///
- /// </para>
+ /// <para/> \n \t \r \b \f are escaped the same as a .NET string
+ /// <para/> Other characters following a backslash are produced verbatim (\c => c)
/// </summary>
/// <param name="s"> the string to split </param>
/// <param name="separator"> the separator to split on </param>
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f3d7fb6/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SlowSynonymMap.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SlowSynonymMap.cs b/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SlowSynonymMap.cs
index 178618c..7a6f259 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SlowSynonymMap.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SlowSynonymMap.cs
@@ -27,7 +27,8 @@ namespace Lucene.Net.Analysis.Synonym
*/
/// <summary>
- /// Mapping rules for use with <see cref="SlowSynonymFilter"/> </summary>
+ /// Mapping rules for use with <see cref="SlowSynonymFilter"/>
+ /// </summary>
/// @deprecated (3.4) use <see cref="SynonymFilterFactory"/> instead. only for precise index backwards compatibility. this factory will be removed in Lucene 5.0
[Obsolete("(3.4) use SynonymFilterFactory instead. only for precise index backwards compatibility. this factory will be removed in Lucene 5.0")]
internal class SlowSynonymMap
@@ -40,6 +41,7 @@ namespace Lucene.Net.Analysis.Synonym
set { submap = value; }
}
private CharArrayMap<SlowSynonymMap> submap;
+
/// <summary>
/// @lucene.internal </summary>
[WritableArray]
@@ -58,6 +60,7 @@ namespace Lucene.Net.Analysis.Synonym
public SlowSynonymMap()
{
}
+
public SlowSynonymMap(bool ignoreCase)
{
if (ignoreCase)
@@ -70,9 +73,7 @@ namespace Lucene.Net.Analysis.Synonym
{
get
{
- {
- return (flags & INCLUDE_ORIG) != 0;
- }
+ return (flags & INCLUDE_ORIG) != 0;
}
}
@@ -80,14 +81,12 @@ namespace Lucene.Net.Analysis.Synonym
{
get
{
- {
- return (flags & IGNORE_CASE) != 0;
- }
+ return (flags & IGNORE_CASE) != 0;
}
}
- /// <param name="singleMatch"> List<String>, the sequence of strings to match </param>
- /// <param name="replacement"> List<Token> the list of tokens to use on a match </param>
+ /// <param name="singleMatch"> <see cref="IList{String}"/>, the sequence of strings to match </param>
+ /// <param name="replacement"> <see cref="IList{Token}"/> the list of tokens to use on a match </param>
/// <param name="includeOrig"> sets a flag on this mapping signaling the generation of matched tokens in addition to the replacement tokens </param>
/// <param name="mergeExisting"> merge the replacement tokens with any other mappings that exist </param>
public virtual void Add(IList<string> singleMatch, IList<Token> replacement, bool includeOrig, bool mergeExisting)
@@ -125,7 +124,6 @@ namespace Lucene.Net.Analysis.Synonym
}
}
-
public override string ToString()
{
var sb = new StringBuilder("<");
@@ -151,10 +149,9 @@ namespace Lucene.Net.Analysis.Synonym
return sb.ToString();
}
-
-
/// <summary>
- /// Produces a List<Token> from a List<String> </summary>
+ /// Produces a <see cref="IList{Token}"/> from a <see cref="IList{String}"/>
+ /// </summary>
public static IList<Token> MakeTokens(IList<string> strings)
{
IList<Token> ret = new List<Token>(strings.Count);
@@ -174,7 +171,6 @@ namespace Lucene.Net.Analysis.Synonym
///
/// Example: [a b] merged with [c d] produces [a/b c/d] ('/' denotes tokens in the same position)
/// Example: [a,5 b,2] merged with [c d,4 e,4] produces [c a,5/d b,2 e,2] (a,n means a has posInc=n)
- ///
/// </summary>
public static IList<Token> MergeTokens(IList<Token> lst1, IList<Token> lst2)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f3d7fb6/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SolrSynonymParser.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SolrSynonymParser.cs b/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SolrSynonymParser.cs
index 94c9416..cd13921 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SolrSynonymParser.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SolrSynonymParser.cs
@@ -25,36 +25,39 @@ namespace Lucene.Net.Analysis.Synonym
/// <summary>
/// Parser for the Solr synonyms format.
- /// <ol>
- /// <li> Blank lines and lines starting with '#' are comments.
- /// <li> Explicit mappings match any token sequence on the LHS of "=>"
- /// and replace with all alternatives on the RHS. These types of mappings
- /// ignore the expand parameter in the constructor.
- /// Example:
- /// <blockquote>i-pod, i pod => ipod</blockquote>
- /// <li> Equivalent synonyms may be separated with commas and give
- /// no explicit mapping. In this case the mapping behavior will
- /// be taken from the expand parameter in the constructor. This allows
- /// the same synonym file to be used in different synonym handling strategies.
- /// Example:
- /// <blockquote>ipod, i-pod, i pod</blockquote>
- ///
- /// <li> Multiple synonym mapping entries are merged.
- /// Example:
- /// <blockquote>
- /// foo => foo bar<br>
- /// foo => baz<br><br>
- /// is equivalent to<br><br>
- /// foo => foo bar, baz
- /// </blockquote>
- /// </ol>
+ /// <list type="bullet">
+ /// <item> Blank lines and lines starting with '#' are comments.</item>
+ /// <item> Explicit mappings match any token sequence on the LHS of "=>"
+ /// and replace with all alternatives on the RHS. These types of mappings
+ /// ignore the expand parameter in the constructor.
+ /// Example:
+ /// <code>i-pod, i pod => ipod</code>
+ /// </item>
+ /// <item> Equivalent synonyms may be separated with commas and give
+ /// no explicit mapping. In this case the mapping behavior will
+ /// be taken from the expand parameter in the constructor. This allows
+ /// the same synonym file to be used in different synonym handling strategies.
+ /// Example:
+ /// <code>ipod, i-pod, i pod</code>
+ /// </item>
+ /// <item> Multiple synonym mapping entries are merged.
+ /// Example:
+ /// <code>
+ /// foo => foo bar
+ /// foo => baz
+ /// is equivalent to
+ /// foo => foo bar, baz
+ /// </code>
+ /// </item>
+ /// </list>
/// @lucene.experimental
/// </summary>
public class SolrSynonymParser : SynonymMap.Parser
{
private readonly bool expand;
- public SolrSynonymParser(bool dedup, bool expand, Analyzer analyzer) : base(dedup, analyzer)
+ public SolrSynonymParser(bool dedup, bool expand, Analyzer analyzer)
+ : base(dedup, analyzer)
{
this.expand = expand;
}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f3d7fb6/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SynonymFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SynonymFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SynonymFilter.cs
index 112c7fa..52bb61d 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SynonymFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SynonymFilter.cs
@@ -1,10 +1,10 @@
-\ufeffusing System;
-using System.Diagnostics;
-using Lucene.Net.Analysis.TokenAttributes;
+\ufeffusing Lucene.Net.Analysis.TokenAttributes;
using Lucene.Net.Store;
using Lucene.Net.Support;
using Lucene.Net.Util;
using Lucene.Net.Util.Fst;
+using System;
+using System.Diagnostics;
namespace Lucene.Net.Analysis.Synonym
{
@@ -36,14 +36,14 @@ namespace Lucene.Net.Analysis.Synonym
/// starting the earliest and parsing the most tokens wins.
/// For example if you have these rules:
///
- /// <pre>
+ /// <code>
/// a -> x
/// a b -> y
/// b c d -> z
/// </code>
///
- /// Then input <code>a b c d e</code> parses to <code>y b c
- /// d</code>, ie the 2nd rule "wins" because it started
+ /// Then input <c>a b c d e</c> parses to <c>y b c
+ /// d</c>, ie the 2nd rule "wins" because it started
/// earliest and matched the most input tokens of other rules
/// starting at that point.</para>
///
@@ -56,14 +56,14 @@ namespace Lucene.Net.Analysis.Synonym
/// <para><b>NOTE</b>: when a match occurs, the output tokens
/// associated with the matching rule are "stacked" on top of
/// the input stream (if the rule had
- /// <code>keepOrig=true</code>) and also on top of another
+ /// <c>keepOrig=true</c>) and also on top of another
/// matched rule's output tokens. This is not a correct
/// solution, as really the output should be an arbitrary
/// graph/lattice. For example, with the above match, you
- /// would expect an exact <code>PhraseQuery</code> <code>"y b
- /// c"</code> to match the parsed tokens, but it will fail to
+ /// would expect an exact <see cref="Search.PhraseQuery"/> <c>"y b
+ /// c"</c> to match the parsed tokens, but it will fail to
/// do so. This limitation is necessary because Lucene's
- /// TokenStream (and index) cannot yet represent an arbitrary
+ /// <see cref="TokenStream"/> (and index) cannot yet represent an arbitrary
/// graph.</para>
///
/// <para><b>NOTE</b>: If multiple incoming tokens arrive on the
@@ -95,7 +95,6 @@ namespace Lucene.Net.Analysis.Synonym
public sealed class SynonymFilter : TokenFilter
{
-
public const string TYPE_SYNONYM = "SYNONYM";
private readonly SynonymMap synonyms;
@@ -265,10 +264,11 @@ namespace Lucene.Net.Analysis.Synonym
/// <param name="input"> input tokenstream </param>
/// <param name="synonyms"> synonym map </param>
- /// <param name="ignoreCase"> case-folds input for matching with <see cref="Character#toLowerCase(int)"/>.
+ /// <param name="ignoreCase"> case-folds input for matching with <see cref="Character.ToLowerCase(int)"/>.
/// Note, if you set this to true, its your responsibility to lowercase
/// the input entries when you create the <see cref="SynonymMap"/> </param>
- public SynonymFilter(TokenStream input, SynonymMap synonyms, bool ignoreCase) : base(input)
+ public SynonymFilter(TokenStream input, SynonymMap synonyms, bool ignoreCase)
+ : base(input)
{
termAtt = AddAttribute<ICharTermAttribute>();
posIncrAtt = AddAttribute<IPositionIncrementAttribute>();
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f3d7fb6/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SynonymFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SynonymFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SynonymFilterFactory.cs
index 4c05334..80699e6 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SynonymFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SynonymFilterFactory.cs
@@ -1,7 +1,7 @@
-\ufeffusing System;
-using System.Collections.Generic;
-using Lucene.Net.Analysis.Util;
+\ufeffusing Lucene.Net.Analysis.Util;
using Lucene.Net.Util;
+using System;
+using System.Collections.Generic;
namespace Lucene.Net.Analysis.Synonym
{
@@ -24,7 +24,7 @@ namespace Lucene.Net.Analysis.Synonym
/// <summary>
/// Factory for <see cref="SynonymFilter"/>.
- /// <pre class="prettyprint" >
+ /// <code>
/// <fieldType name="text_synonym" class="solr.TextField" positionIncrementGap="100">
/// <analyzer>
/// <tokenizer class="solr.WhitespaceTokenizerFactory"/>
@@ -37,22 +37,22 @@ namespace Lucene.Net.Analysis.Synonym
///
/// <para>
/// An optional param name prefix of "tokenizerFactory." may be used for any
- /// init params that the SynonymFilterFactory needs to pass to the specified
- /// TokenizerFactory. If the TokenizerFactory expects an init parameters with
- /// the same name as an init param used by the SynonymFilterFactory, the prefix
+ /// init params that the <see cref="SynonymFilterFactory"/> needs to pass to the specified
+ /// <see cref="TokenizerFactory"/>. If the <see cref="TokenizerFactory"/> expects an init parameters with
+ /// the same name as an init param used by the <see cref="SynonymFilterFactory"/>, the prefix
/// is mandatory.
/// </para>
/// <para>
- /// The optional {@code format} parameter controls how the synonyms will be parsed:
- /// It supports the short names of {@code solr} for <see cref="SolrSynonymParser"/>
- /// and {@code wordnet} for and <see cref="WordnetSynonymParser"/>, or your own
- /// {@code SynonymMap.Parser} class name. The default is {@code solr}.
+ /// The optional <c>format</c> parameter controls how the synonyms will be parsed:
+ /// It supports the short names of <c>solr</c> for <see cref="SolrSynonymParser"/>
+ /// and <c>wordnet</c> for and <see cref="WordnetSynonymParser"/>, or your own
+ /// <see cref="SynonymMap.Parser"/> class name. The default is <c>solr</c>.
/// A custom <see cref="SynonymMap.Parser"/> is expected to have a constructor taking:
- /// <ul>
- /// <li><code>boolean dedup</code> - true if duplicates should be ignored, false otherwise</li>
- /// <li><code>boolean expand</code> - true if conflation groups should be expanded, false if they are one-directional</li>
- /// <li><code><see cref="Analyzer"/> analyzer</code> - an analyzer used for each raw synonym</li>
- /// </ul>
+ /// <list type="bullet">
+ /// <item><c><see cref="bool"/> dedup</c> - true if duplicates should be ignored, false otherwise</item>
+ /// <item><c><see cref="bool"/> expand</c> - true if conflation groups should be expanded, false if they are one-directional</item>
+ /// <item><c><see cref="Analyzer"/> analyzer</c> - an analyzer used for each raw synonym</item>
+ /// </list>
/// </para>
/// </summary>
public class SynonymFilterFactory : TokenFilterFactory, IResourceLoaderAware
@@ -94,7 +94,7 @@ namespace Lucene.Net.Analysis.Synonym
}
/// <summary>
- /// Access to the delegator TokenFilterFactory for test verification
+ /// Access to the delegator <see cref="TokenFilterFactory"/> for test verification
/// </summary>
/// @deprecated Method exists only for testing 4x, will be removed in 5.0
/// @lucene.internal
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f3d7fb6/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SynonymMap.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SynonymMap.cs b/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SynonymMap.cs
index ca9e038..1c37c85 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SynonymMap.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SynonymMap.cs
@@ -71,7 +71,7 @@ namespace Lucene.Net.Analysis.Synonym
/// <summary>
/// Builds an FSTSynonymMap.
/// <para>
- /// Call add() until you have added all the mappings, then call build() to get an FSTSynonymMap
+ /// Call <see cref="Add(CharsRef, CharsRef, bool)"/> until you have added all the mappings, then call <see cref="Build"/> to get an FSTSynonymMap
/// @lucene.experimental
/// </para>
/// </summary>
@@ -100,9 +100,9 @@ namespace Lucene.Net.Analysis.Synonym
}
/// <summary>
- /// Sugar: just joins the provided terms with {@link
- /// SynonymMap#WORD_SEPARATOR}. reuse and its chars
- /// must not be null.
+ /// Sugar: just joins the provided terms with
+ /// <see cref="SynonymMap.WORD_SEPARATOR"/>. reuse and its chars
+ /// must not be null.
/// </summary>
public static CharsRef Join(string[] words, CharsRef reuse)
{
@@ -339,8 +339,7 @@ namespace Lucene.Net.Analysis.Synonym
/// </summary>
public abstract class Parser : Builder
{
-
- internal readonly Analyzer analyzer;
+ private readonly Analyzer analyzer;
public Parser(bool dedup, Analyzer analyzer)
: base(dedup)
@@ -355,8 +354,8 @@ namespace Lucene.Net.Analysis.Synonym
/// <summary>
/// Sugar: analyzes the text with the analyzer and
- /// separates by <see cref="SynonymMap#WORD_SEPARATOR"/>.
- /// reuse and its chars must not be null.
+ /// separates by <see cref="SynonymMap.WORD_SEPARATOR"/>.
+ /// reuse and its chars must not be null.
/// </summary>
public virtual CharsRef Analyze(string text, CharsRef reuse)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f3d7fb6/src/Lucene.Net.Analysis.Common/Analysis/Synonym/WordnetSynonymParser.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Synonym/WordnetSynonymParser.cs b/src/Lucene.Net.Analysis.Common/Analysis/Synonym/WordnetSynonymParser.cs
index 53677b0..604cbb8 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Synonym/WordnetSynonymParser.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Synonym/WordnetSynonymParser.cs
@@ -1,6 +1,6 @@
-\ufeffusing System;
+\ufeffusing Lucene.Net.Util;
+using System;
using System.IO;
-using Lucene.Net.Util;
namespace Lucene.Net.Analysis.Synonym
{
@@ -33,7 +33,8 @@ namespace Lucene.Net.Analysis.Synonym
{
private readonly bool expand;
- public WordnetSynonymParser(bool dedup, bool expand, Analyzer analyzer) : base(dedup, analyzer)
+ public WordnetSynonymParser(bool dedup, bool expand, Analyzer analyzer)
+ : base(dedup, analyzer)
{
this.expand = expand;
}
[12/39] lucenenet git commit: Lucene.Net.Analysis.Util
(AbstractAnalysisFactory + AnalysisSPILoader + BufferedCharFilter +
CharacterUtils) refactor: member accessibility and documentation comments
Posted by ni...@apache.org.
Lucene.Net.Analysis.Util (AbstractAnalysisFactory + AnalysisSPILoader + BufferedCharFilter + CharacterUtils) refactor: member accessibility and documentation comments
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/47155b3f
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/47155b3f
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/47155b3f
Branch: refs/heads/api-work
Commit: 47155b3f515def19d907d8a0cf4e20ee24ac6e24
Parents: dc21329
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Sat Feb 4 17:24:37 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Sat Feb 4 23:08:17 2017 +0700
----------------------------------------------------------------------
.../Analysis/Util/AbstractAnalysisFactory.cs | 65 ++++++-----
.../Analysis/Util/AnalysisSPILoader.cs | 12 +--
.../Analysis/Util/BufferedCharFilter.cs | 2 +-
.../Analysis/Util/CharacterUtils.cs | 108 +++++++++----------
.../Analysis/Util/TestCharacterUtils.cs | 4 +-
5 files changed, 97 insertions(+), 94 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/47155b3f/src/Lucene.Net.Analysis.Common/Analysis/Util/AbstractAnalysisFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/AbstractAnalysisFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/AbstractAnalysisFactory.cs
index 1eebb02..1d6d304 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Util/AbstractAnalysisFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/AbstractAnalysisFactory.cs
@@ -1,5 +1,4 @@
\ufeffusing Lucene.Net.Analysis.Core;
-using Lucene.Net.Support;
using Lucene.Net.Util;
using System;
using System.Collections.Generic;
@@ -32,11 +31,12 @@ namespace Lucene.Net.Analysis.Util
/// <see cref="TokenFilterFactory"/> and <see cref="CharFilterFactory"/>.
/// <para>
/// The typical lifecycle for a factory consumer is:
- /// <ol>
- /// <li>Create factory via its constructor (or via XXXFactory.forName)</li>
- /// <li>(Optional) If the factory uses resources such as files, <see cref="ResourceLoaderAware#inform(ResourceLoader)"/> is called to initialize those resources.</li>
- /// <li>Consumer calls create() to obtain instances.</li>
- /// </ol>
+ /// <list type="bullet">
+ /// <item>Create factory via its constructor (or via XXXFactory.ForName)</item>
+ /// <item>(Optional) If the factory uses resources such as files,
+ /// <see cref="IResourceLoaderAware.Inform(IResourceLoader)"/> is called to initialize those resources.</item>
+ /// <item>Consumer calls create() to obtain instances.</item>
+ /// </list>
/// </para>
/// </summary>
public abstract class AbstractAnalysisFactory
@@ -49,14 +49,14 @@ namespace Lucene.Net.Analysis.Util
/// <summary>
/// the luceneVersion arg </summary>
- protected internal readonly LuceneVersion m_luceneMatchVersion;
+ protected readonly LuceneVersion m_luceneMatchVersion;
/// <summary>
/// Initialize this factory via a set of key-value pairs.
/// </summary>
- protected internal AbstractAnalysisFactory(IDictionary<string, string> args)
+ protected AbstractAnalysisFactory(IDictionary<string, string> args)
{
- ExplicitLuceneMatchVersion = false;
+ IsExplicitLuceneMatchVersion = false;
originalArgs = Collections.UnmodifiableMap(args);
string version = Get(args, LUCENE_MATCH_VERSION_PARAM);
// LUCENENET TODO: What should we do if the version is null?
@@ -75,11 +75,11 @@ namespace Lucene.Net.Analysis.Util
}
/// <summary>
- /// this method can be called in the <see cref="TokenizerFactory#create(java.io.Reader)"/>
- /// or <see cref="TokenFilterFactory#create(org.apache.lucene.analysis.TokenStream)"/> methods,
- /// to inform user, that for this factory a <see cref="#luceneMatchVersion"/> is required
+ /// this method can be called in the <see cref="TokenizerFactory.Create(TextReader)"/>
+ /// or <see cref="TokenFilterFactory.Create(TokenStream)"/> methods,
+ /// to inform user, that for this factory a <see cref="m_luceneMatchVersion"/> is required
/// </summary>
- protected internal void AssureMatchVersion()
+ protected void AssureMatchVersion() // LUCENENET TODO: Remove this method (not used anyway in .NET)
{
// LUCENENET NOTE: since luceneMatchVersion can never be null in .NET,
// this method effectively does nothing. However, leaving it in place because
@@ -148,14 +148,17 @@ namespace Lucene.Net.Analysis.Util
args.Remove(name);
return s ?? defaultVal;
}
+
public virtual string Get(IDictionary<string, string> args, string name, ICollection<string> allowedValues)
{
return Get(args, name, allowedValues, null); // defaultVal = null
}
+
public virtual string Get(IDictionary<string, string> args, string name, ICollection<string> allowedValues, string defaultVal)
{
return Get(args, name, allowedValues, defaultVal, true);
}
+
public virtual string Get(IDictionary<string, string> args, string name, ICollection<string> allowedValues, string defaultVal, bool caseSensitive)
{
string s = null;
@@ -188,11 +191,12 @@ namespace Lucene.Net.Analysis.Util
}
}
- protected internal int RequireInt(IDictionary<string, string> args, string name)
+ protected int RequireInt(IDictionary<string, string> args, string name)
{
return int.Parse(Require(args, name));
}
- protected internal int GetInt(IDictionary<string, string> args, string name, int defaultVal)
+
+ protected int GetInt(IDictionary<string, string> args, string name, int defaultVal)
{
string s;
if (args.TryGetValue(name, out s))
@@ -203,11 +207,12 @@ namespace Lucene.Net.Analysis.Util
return defaultVal;
}
- protected internal bool RequireBoolean(IDictionary<string, string> args, string name)
+ protected bool RequireBoolean(IDictionary<string, string> args, string name)
{
return bool.Parse(Require(args, name));
}
- protected internal bool GetBoolean(IDictionary<string, string> args, string name, bool defaultVal)
+
+ protected bool GetBoolean(IDictionary<string, string> args, string name, bool defaultVal)
{
string s;
if (args.TryGetValue(name, out s))
@@ -218,11 +223,12 @@ namespace Lucene.Net.Analysis.Util
return defaultVal;
}
- protected internal float RequireFloat(IDictionary<string, string> args, string name)
+ protected float RequireFloat(IDictionary<string, string> args, string name)
{
return float.Parse(Require(args, name));
}
- protected internal float GetFloat(IDictionary<string, string> args, string name, float defaultVal)
+
+ protected float GetFloat(IDictionary<string, string> args, string name, float defaultVal)
{
string s;
if (args.TryGetValue(name, out s))
@@ -237,6 +243,7 @@ namespace Lucene.Net.Analysis.Util
{
return Require(args, name)[0];
}
+
public virtual char GetChar(IDictionary<string, string> args, string name, char defaultVal)
{
string s;
@@ -284,9 +291,9 @@ namespace Lucene.Net.Analysis.Util
}
/// <summary>
- /// Compiles a pattern for the value of the specified argument key <code>name</code>
+ /// Compiles a pattern for the value of the specified argument key <paramref name="name"/>
/// </summary>
- protected internal Regex GetPattern(IDictionary<string, string> args, string name)
+ protected Regex GetPattern(IDictionary<string, string> args, string name)
{
try
{
@@ -302,7 +309,7 @@ namespace Lucene.Net.Analysis.Util
/// Returns as <see cref="CharArraySet"/> from wordFiles, which
/// can be a comma-separated list of filenames
/// </summary>
- protected internal CharArraySet GetWordSet(IResourceLoader loader, string wordFiles, bool ignoreCase)
+ protected CharArraySet GetWordSet(IResourceLoader loader, string wordFiles, bool ignoreCase)
{
AssureMatchVersion();
IList<string> files = SplitFileNames(wordFiles);
@@ -324,16 +331,16 @@ namespace Lucene.Net.Analysis.Util
/// <summary>
/// Returns the resource's lines (with content treated as UTF-8)
/// </summary>
- protected internal IList<string> GetLines(IResourceLoader loader, string resource)
+ protected IList<string> GetLines(IResourceLoader loader, string resource)
{
return WordlistLoader.GetLines(loader.OpenResource(resource), Encoding.UTF8);
}
/// <summary>
- /// same as <see cref="#getWordSet(ResourceLoader, String, boolean)"/>,
+ /// Same as <see cref="GetWordSet(IResourceLoader, string, bool)"/>,
/// except the input is in snowball format.
/// </summary>
- protected internal CharArraySet GetSnowballWordSet(IResourceLoader loader, string wordFiles, bool ignoreCase)
+ protected CharArraySet GetSnowballWordSet(IResourceLoader loader, string wordFiles, bool ignoreCase)
{
AssureMatchVersion();
IList<string> files = SplitFileNames(wordFiles);
@@ -363,7 +370,7 @@ namespace Lucene.Net.Analysis.Util
/// </summary>
/// <param name="fileNames"> the string containing file names </param>
/// <returns> a list of file names with the escaping backslashed removed </returns>
- protected internal IList<string> SplitFileNames(string fileNames)
+ protected IList<string> SplitFileNames(string fileNames)
{
if (fileNames == null)
{
@@ -382,8 +389,8 @@ namespace Lucene.Net.Analysis.Util
private const string CLASS_NAME = "class";
/// <returns> the string used to specify the concrete class name in a serialized representation: the class arg.
- /// If the concrete class name was not specified via a class arg, returns {@code getClass().getName()}. </returns>
- public virtual string ClassArg
+ /// If the concrete class name was not specified via a class arg, returns <c>GetType().Name</c>. </returns>
+ public virtual string ClassArg // LUCENENET TODO: Change to GetClassArg()
{
get
{
@@ -399,6 +406,6 @@ namespace Lucene.Net.Analysis.Util
}
}
- public virtual bool ExplicitLuceneMatchVersion { get; set; }
+ public virtual bool IsExplicitLuceneMatchVersion { get; set; }
}
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/47155b3f/src/Lucene.Net.Analysis.Common/Analysis/Util/AnalysisSPILoader.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/AnalysisSPILoader.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/AnalysisSPILoader.cs
index 3ba2f08..c8502e6 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Util/AnalysisSPILoader.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/AnalysisSPILoader.cs
@@ -1,10 +1,7 @@
\ufeffusing Lucene.Net.Support;
using Lucene.Net.Util;
using System;
-using System.Collections;
using System.Collections.Generic;
-using System.Globalization;
-using System.Reflection;
namespace Lucene.Net.Analysis.Util
{
@@ -31,7 +28,6 @@ namespace Lucene.Net.Analysis.Util
/// </summary>
internal sealed class AnalysisSPILoader<S> where S : AbstractAnalysisFactory
{
-
private volatile IDictionary<string, Type> services = Collections.EmptyMap<string, Type>();
private readonly Type clazz = typeof(S);
private readonly string[] suffixes;
@@ -49,14 +45,14 @@ namespace Lucene.Net.Analysis.Util
}
/// <summary>
- /// Reloads the internal SPI list from the given <see cref="ClassLoader"/>.
+ /// Reloads the internal SPI list.
/// Changes to the service list are visible after the method ends, all
- /// iterators (<see cref="#iterator()"/>,...) stay consistent.
+ /// iterators (e.g, from <see cref="AvailableServices"/>,...) stay consistent.
///
- /// <p><b>NOTE:</b> Only new service providers are added, existing ones are
+ /// <para/><b>NOTE:</b> Only new service providers are added, existing ones are
/// never removed or replaced.
///
- /// <p><em>this method is expensive and should only be called for discovery
+ /// <para/><em>this method is expensive and should only be called for discovery
/// of new service providers on the given classpath/classloader!</em>
/// </summary>
public void Reload()
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/47155b3f/src/Lucene.Net.Analysis.Common/Analysis/Util/BufferedCharFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/BufferedCharFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/BufferedCharFilter.cs
index 84cf093..c6103f3 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Util/BufferedCharFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/BufferedCharFilter.cs
@@ -40,7 +40,7 @@ namespace Lucene.Net.Analysis.Util
private static int defaultExpectedLineLength = 80;
/// <summary>
- /// LUCENENET specific to throw an exception if the user calls Close() instead of Dispose()
+ /// LUCENENET specific to throw an exception if the user calls <see cref="Close()"/> instead of <see cref="TextReader.Dispose()"/>
/// </summary>
private bool isDisposing = false;
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/47155b3f/src/Lucene.Net.Analysis.Common/Analysis/Util/CharacterUtils.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/CharacterUtils.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/CharacterUtils.cs
index 3d8801d..d1177ca 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Util/CharacterUtils.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/CharacterUtils.cs
@@ -1,13 +1,12 @@
-\ufeffusing System.Diagnostics;
-using System.Diagnostics.CodeAnalysis;
-using Lucene.Net.Support;
+\ufeffusing Lucene.Net.Support;
using Lucene.Net.Util;
-using Reader = System.IO.TextReader;
-using Version = Lucene.Net.Util.LuceneVersion;
+using System;
+using System.Diagnostics;
+using System.Diagnostics.CodeAnalysis;
+using System.IO;
namespace Lucene.Net.Analysis.Util
{
-
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -24,6 +23,7 @@ namespace Lucene.Net.Analysis.Util
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
/// <summary>
/// <see cref="CharacterUtils"/> provides a unified interface to Character-related
/// operations to implement backwards compatible character operations based on a
@@ -62,10 +62,10 @@ namespace Lucene.Net.Analysis.Util
}
/// <summary>
- /// Returns the code point at the given index of the <see cref="CharSequence"/>.
+ /// Returns the code point at the given index of the <see cref="ICharSequence"/>.
/// Depending on the <see cref="LuceneVersion"/> passed to
- /// <see cref="CharacterUtils#getInstance(Version)"/> this method mimics the behavior
- /// of <see cref="Character#codePointAt(char[], int)"/> as it would have been
+ /// <see cref="CharacterUtils.GetInstance(LuceneVersion)"/> this method mimics the behavior
+ /// of <c>Character.CodePointAt(char[], int)</c> as it would have been
/// available on a Java 1.4 JVM or on a later virtual machine version.
/// </summary>
/// <param name="seq">
@@ -74,20 +74,21 @@ namespace Lucene.Net.Analysis.Util
/// the offset to the char values in the chars array to be converted
/// </param>
/// <returns> the Unicode code point at the given index </returns>
- /// <exception cref="NullPointerException">
+ /// <exception cref="NullReferenceException">
/// - if the sequence is null. </exception>
- /// <exception cref="IndexOutOfBoundsException">
+ /// <exception cref="IndexOutOfRangeException">
/// - if the value offset is negative or not less than the length of
/// the character sequence. </exception>
public abstract int CodePointAt(string seq, int offset);
+
public abstract int CodePointAt(ICharSequence seq, int offset);
/// <summary>
/// Returns the code point at the given index of the char array where only elements
/// with index less than the limit are used.
/// Depending on the <see cref="LuceneVersion"/> passed to
- /// <see cref="CharacterUtils#getInstance(Version)"/> this method mimics the behavior
- /// of <see cref="Character#codePointAt(char[], int)"/> as it would have been
+ /// <see cref="CharacterUtils.GetInstance(LuceneVersion)"/> this method mimics the behavior
+ /// of <c>Character.CodePointAt(char[], int)</c> as it would have been
/// available on a Java 1.4 JVM or on a later virtual machine version.
/// </summary>
/// <param name="chars">
@@ -98,23 +99,23 @@ namespace Lucene.Net.Analysis.Util
/// codepoint.
/// </param>
/// <returns> the Unicode code point at the given index </returns>
- /// <exception cref="NullPointerException">
+ /// <exception cref="NullReferenceException">
/// - if the array is null. </exception>
- /// <exception cref="IndexOutOfBoundsException">
+ /// <exception cref="IndexOutOfRangeException">
/// - if the value offset is negative or not less than the length of
/// the char array. </exception>
public abstract int CodePointAt(char[] chars, int offset, int limit);
/// <summary>
- /// Return the number of characters in <code>seq</code>. </summary>
+ /// Return the number of characters in <paramref name="seq"/>. </summary>
public abstract int CodePointCount(string seq);
/// <summary>
- /// Creates a new <see cref="CharacterBuffer"/> and allocates a <code>char[]</code>
+ /// Creates a new <see cref="CharacterBuffer"/> and allocates a <see cref="T:char[]"/>
/// of the given bufferSize.
/// </summary>
/// <param name="bufferSize">
- /// the internal char buffer size, must be <code>>= 2</code> </param>
+ /// the internal char buffer size, must be <c>>= 2</c> </param>
/// <returns> a new <see cref="CharacterBuffer"/> instance. </returns>
public static CharacterBuffer NewCharacterBuffer(int bufferSize)
{
@@ -127,7 +128,7 @@ namespace Lucene.Net.Analysis.Util
/// <summary>
- /// Converts each unicode codepoint to lowerCase via <see cref="Character#toLowerCase(int)"/> starting
+ /// Converts each unicode codepoint to lowerCase via <see cref="Character.ToLowerCase(int)"/> starting
/// at the given offset. </summary>
/// <param name="buffer"> the char buffer to lowercase </param>
/// <param name="offset"> the offset to start at </param>
@@ -145,7 +146,7 @@ namespace Lucene.Net.Analysis.Util
}
/// <summary>
- /// Converts each unicode codepoint to UpperCase via <see cref="Character#toUpperCase(int)"/> starting
+ /// Converts each unicode codepoint to UpperCase via <see cref="Character.ToUpperCase(int)"/> starting
/// at the given offset. </summary>
/// <param name="buffer"> the char buffer to UPPERCASE </param>
/// <param name="offset"> the offset to start at </param>
@@ -163,9 +164,9 @@ namespace Lucene.Net.Analysis.Util
}
/// <summary>
- /// Converts a sequence of Java characters to a sequence of unicode code points. </summary>
+ /// Converts a sequence of .NET characters to a sequence of unicode code points. </summary>
/// <returns> the number of code points written to the destination buffer </returns>
- public int toCodePoints(char[] src, int srcOff, int srcLen, int[] dest, int destOff)
+ public int ToCodePoints(char[] src, int srcOff, int srcLen, int[] dest, int destOff)
{
if (srcLen < 0)
{
@@ -183,9 +184,9 @@ namespace Lucene.Net.Analysis.Util
}
/// <summary>
- /// Converts a sequence of unicode code points to a sequence of Java characters. </summary>
+ /// Converts a sequence of unicode code points to a sequence of .NET characters. </summary>
/// <returns> the number of chars written to the destination buffer </returns>
- public int toChars(int[] src, int srcOff, int srcLen, char[] dest, int destOff)
+ public int ToChars(int[] src, int srcOff, int srcLen, char[] dest, int destOff)
{
if (srcLen < 0)
{
@@ -201,27 +202,27 @@ namespace Lucene.Net.Analysis.Util
/// <summary>
/// Fills the <see cref="CharacterBuffer"/> with characters read from the given
- /// reader <see cref="Reader"/>. This method tries to read <code>numChars</code>
+ /// reader <see cref="TextReader"/>. This method tries to read <code>numChars</code>
/// characters into the <see cref="CharacterBuffer"/>, each call to fill will start
- /// filling the buffer from offset <code>0</code> up to <code>numChars</code>.
+ /// filling the buffer from offset <c>0</c> up to <paramref name="numChars"/>.
/// In case code points can span across 2 java characters, this method may
- /// only fill <code>numChars - 1</code> characters in order not to split in
+ /// only fill <c>numChars - 1</c> characters in order not to split in
/// the middle of a surrogate pair, even if there are remaining characters in
- /// the <see cref="Reader"/>.
+ /// the <see cref="TextReader"/>.
/// <para>
/// Depending on the <see cref="LuceneVersion"/> passed to
- /// <see cref="CharacterUtils#getInstance(Version)"/> this method implements
+ /// <see cref="CharacterUtils.GetInstance(LuceneVersion)"/> this method implements
/// supplementary character awareness when filling the given buffer. For all
- /// <see cref="LuceneVersion"/> > 3.0 <see cref="#fill(CharacterBuffer, Reader, int)"/> guarantees
+ /// <see cref="LuceneVersion"/> > 3.0 <see cref="Fill(CharacterBuffer, TextReader, int)"/> guarantees
/// that the given <see cref="CharacterBuffer"/> will never contain a high surrogate
/// character as the last element in the buffer unless it is the last available
/// character in the reader. In other words, high and low surrogate pairs will
/// always be preserved across buffer boarders.
/// </para>
/// <para>
- /// A return value of <code>false</code> means that this method call exhausted
+ /// A return value of <c>false</c> means that this method call exhausted
/// the reader, but there may be some bytes which have been read, which can be
- /// verified by checking whether <code>buffer.getLength() > 0</code>.
+ /// verified by checking whether <c>buffer.Length > 0</c>.
/// </para>
/// </summary>
/// <param name="buffer">
@@ -233,22 +234,22 @@ namespace Lucene.Net.Analysis.Util
/// <returns> <code>false</code> if and only if reader.read returned -1 while trying to fill the buffer </returns>
/// <exception cref="IOException">
/// if the reader throws an <see cref="IOException"/>. </exception>
- public abstract bool Fill(CharacterBuffer buffer, Reader reader, int numChars);
+ public abstract bool Fill(CharacterBuffer buffer, TextReader reader, int numChars);
/// <summary>
- /// Convenience method which calls <code>fill(buffer, reader, buffer.buffer.length)</code>. </summary>
- public virtual bool Fill(CharacterBuffer buffer, Reader reader)
+ /// Convenience method which calls <c>Fill(buffer, reader, buffer.Buffer.Length)</c>. </summary>
+ public virtual bool Fill(CharacterBuffer buffer, TextReader reader)
{
- return Fill(buffer, reader, buffer.buffer.Length);
+ return Fill(buffer, reader, buffer.Buffer.Length);
}
/// <summary>
- /// Return the index within <code>buf[start:start+count]</code> which is by <code>offset</code>
- /// code points from <code>index</code>.
+ /// Return the index within <c>buf[start:start+count]</c> which is by <paramref name="offset"/>
+ /// code points from <paramref name="index"/>.
/// </summary>
public abstract int OffsetByCodePoints(char[] buf, int start, int count, int index, int offset);
- internal static int ReadFully(Reader reader, char[] dest, int offset, int len)
+ private static int ReadFully(TextReader reader, char[] dest, int offset, int len)
{
int read = 0;
while (read < len)
@@ -276,17 +277,17 @@ namespace Lucene.Net.Analysis.Util
public override int CodePointAt(char[] chars, int offset, int limit)
{
- return Character.CodePointAt(chars, offset, limit);
+ return Character.CodePointAt(chars, offset, limit); // LUCENENET TODO: This will throw a NullReferenceException if chars is null. Should this be an ArgumentNullException in .NET?
}
- public override bool Fill(CharacterBuffer buffer, Reader reader, int numChars)
+ public override bool Fill(CharacterBuffer buffer, TextReader reader, int numChars)
{
- Debug.Assert(buffer.buffer.Length >= 2);
- if (numChars < 2 || numChars > buffer.buffer.Length)
+ Debug.Assert(buffer.Buffer.Length >= 2);
+ if (numChars < 2 || numChars > buffer.Buffer.Length)
{
throw new System.ArgumentException("numChars must be >= 2 and <= the buffer size");
}
- char[] charBuffer = buffer.buffer;
+ char[] charBuffer = buffer.Buffer;
buffer.offset = 0;
int offset;
@@ -337,6 +338,7 @@ namespace Lucene.Net.Analysis.Util
{
return seq[offset];
}
+
public override int CodePointAt(ICharSequence seq, int offset)
{
return seq[offset];
@@ -348,18 +350,18 @@ namespace Lucene.Net.Analysis.Util
{
throw new System.IndexOutOfRangeException("offset must be less than limit");
}
- return chars[offset];
+ return chars[offset]; // LUCENENET TODO: This will throw a NullReferenceException if chars is null. Should this be an ArgumentNullException in .NET?
}
- public override bool Fill(CharacterBuffer buffer, Reader reader, int numChars)
+ public override bool Fill(CharacterBuffer buffer, TextReader reader, int numChars)
{
- Debug.Assert(buffer.buffer.Length >= 1);
- if (numChars < 1 || numChars > buffer.buffer.Length)
+ Debug.Assert(buffer.Buffer.Length >= 1);
+ if (numChars < 1 || numChars > buffer.Buffer.Length)
{
throw new System.ArgumentException("numChars must be >= 1 and <= the buffer size");
}
buffer.offset = 0;
- int read = ReadFully(reader, buffer.buffer, 0, numChars);
+ int read = ReadFully(reader, buffer.Buffer, 0, numChars);
buffer.length = read;
buffer.lastTrailingHighSurrogate = (char)0;
return read == numChars;
@@ -379,17 +381,15 @@ namespace Lucene.Net.Analysis.Util
}
return result;
}
-
}
/// <summary>
/// A simple IO buffer to use with
- /// <see cref="CharacterUtils#fill(CharacterBuffer, Reader)"/>.
+ /// <see cref="CharacterUtils.Fill(CharacterBuffer, TextReader)"/>.
/// </summary>
public sealed class CharacterBuffer
{
-
- internal readonly char[] buffer;
+ private readonly char[] buffer;
internal int offset;
internal int length;
// NOTE: not private so outer class can access without
@@ -431,7 +431,7 @@ namespace Lucene.Net.Analysis.Util
/// <summary>
/// Return the length of the data in the internal buffer starting at
- /// <see cref="#getOffset()"/>
+ /// <see cref="Offset"/>
/// </summary>
/// <returns> the length </returns>
public int Length
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/47155b3f/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestCharacterUtils.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestCharacterUtils.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestCharacterUtils.cs
index 306f16c..2a842c9 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestCharacterUtils.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestCharacterUtils.cs
@@ -159,8 +159,8 @@ namespace Lucene.Net.Analysis.Util
var o1 = TestUtil.NextInt(Random(), 0, Math.Min(5, orig.Length));
var o2 = TestUtil.NextInt(Random(), 0, o1);
var o3 = TestUtil.NextInt(Random(), 0, o1);
- var codePointCount = charUtils.toCodePoints(orig, o1, orig.Length - o1, buf, o2);
- var charCount = charUtils.toChars(buf, o2, codePointCount, restored, o3);
+ var codePointCount = charUtils.ToCodePoints(orig, o1, orig.Length - o1, buf, o2);
+ var charCount = charUtils.ToChars(buf, o2, codePointCount, restored, o3);
assertEquals(orig.Length - o1, charCount);
assertArrayEquals(Arrays.CopyOfRange(orig, o1, o1 + charCount), Arrays.CopyOfRange(restored, o3, o3 + charCount));
}
[23/39] lucenenet git commit:
Lucene.Net.Analysis.Miscellaneous.SingleTokenTokenStream refactor: changed
Token property to GetToken() and SetToken(Token) (makes a clone each way)
Posted by ni...@apache.org.
Lucene.Net.Analysis.Miscellaneous.SingleTokenTokenStream refactor: changed Token property to GetToken() and SetToken(Token) (makes a clone each way)
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/0697c7a4
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/0697c7a4
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/0697c7a4
Branch: refs/heads/api-work
Commit: 0697c7a4c14b6ba341ebff33e3409e375e826bd2
Parents: 4abfb34
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Sun Feb 5 00:39:07 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Sun Feb 5 00:39:07 2017 +0700
----------------------------------------------------------------------
.../Analysis/Miscellaneous/SingleTokenTokenStream.cs | 10 +++++++---
.../Analysis/Miscellaneous/TestSingleTokenTokenFilter.cs | 2 +-
2 files changed, 8 insertions(+), 4 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0697c7a4/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/SingleTokenTokenStream.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/SingleTokenTokenStream.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/SingleTokenTokenStream.cs
index cf8b0b9..ff1a0ca 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/SingleTokenTokenStream.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/SingleTokenTokenStream.cs
@@ -61,10 +61,14 @@ namespace Lucene.Net.Analysis.Miscellaneous
exhausted = false;
}
- public Token Token // LUCENENET TODO: Change to GetToken() and SetToken() (conversion)
+ public Token GetToken() // LUCENENET NOTE: These remain methods because they make a conversion of the value
{
- get { return (Token)singleToken.Clone(); }
- set { this.singleToken = (Token)value.Clone(); }
+ return (Token)singleToken.Clone();
+ }
+
+ public void SetToken(Token token)
+ {
+ this.singleToken = (Token)token.Clone();
}
}
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0697c7a4/src/Lucene.Net.Tests.Analysis.Common/Analysis/Miscellaneous/TestSingleTokenTokenFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Miscellaneous/TestSingleTokenTokenFilter.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Miscellaneous/TestSingleTokenTokenFilter.cs
index 614e0b3..a3da13c 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Miscellaneous/TestSingleTokenTokenFilter.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Miscellaneous/TestSingleTokenTokenFilter.cs
@@ -38,7 +38,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
assertFalse(ts.IncrementToken());
token = new Token("hallo", 10, 20, "someType");
- ts.Token = token;
+ ts.SetToken(token);
ts.Reset();
assertTrue(ts.IncrementToken());
[22/39] lucenenet git commit: Lucene.Net.Analysis.Common/Tartarus
refactor: member accessibility and documentation comments
Posted by ni...@apache.org.
Lucene.Net.Analysis.Common/Tartarus refactor: member accessibility and documentation comments
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/4abfb341
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/4abfb341
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/4abfb341
Branch: refs/heads/api-work
Commit: 4abfb341282149cee14431e58f98b84921eab7bc
Parents: 898a818
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Sat Feb 4 23:06:18 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Sat Feb 4 23:08:24 2017 +0700
----------------------------------------------------------------------
.../Tartarus/Snowball/Among.cs | 2 +-
.../Tartarus/Snowball/Ext/ArmenianStemmer.cs | 26 +-
.../Tartarus/Snowball/Ext/BasqueStemmer.cs | 68 ++---
.../Tartarus/Snowball/Ext/CatalanStemmer.cs | 64 ++--
.../Tartarus/Snowball/Ext/DanishStemmer.cs | 38 +--
.../Tartarus/Snowball/Ext/DutchStemmer.cs | 104 +++----
.../Tartarus/Snowball/Ext/EnglishStemmer.cs | 176 +++++------
.../Tartarus/Snowball/Ext/FinnishStemmer.cs | 124 ++++----
.../Tartarus/Snowball/Ext/FrenchStemmer.cs | 198 ++++++-------
.../Tartarus/Snowball/Ext/German2Stemmer.cs | 90 +++---
.../Tartarus/Snowball/Ext/GermanStemmer.cs | 84 +++---
.../Tartarus/Snowball/Ext/HungarianStemmer.cs | 190 ++++++------
.../Tartarus/Snowball/Ext/IrishStemmer.cs | 82 ++---
.../Tartarus/Snowball/Ext/ItalianStemmer.cs | 132 ++++-----
.../Tartarus/Snowball/Ext/KpStemmer.cs | 296 +++++++++----------
.../Tartarus/Snowball/Ext/LovinsStemmer.cs | 200 ++++++-------
.../Tartarus/Snowball/Ext/NorwegianStemmer.cs | 28 +-
.../Tartarus/Snowball/Ext/PorterStemmer.cs | 120 ++++----
.../Tartarus/Snowball/Ext/PortugueseStemmer.cs | 112 +++----
.../Tartarus/Snowball/Ext/RomanianStemmer.cs | 104 +++----
.../Tartarus/Snowball/Ext/RussianStemmer.cs | 76 ++---
.../Tartarus/Snowball/Ext/SpanishStemmer.cs | 132 ++++-----
.../Tartarus/Snowball/Ext/SwedishStemmer.cs | 26 +-
.../Tartarus/Snowball/Ext/TurkishStemmer.cs | 254 ++++++++--------
.../Tartarus/Snowball/SnowballProgram.cs | 137 +++++----
25 files changed, 1431 insertions(+), 1432 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4abfb341/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Among.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Among.cs b/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Among.cs
index 72c49f4..09e015e 100644
--- a/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Among.cs
+++ b/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Among.cs
@@ -50,7 +50,7 @@ namespace Lucene.Net.Tartarus.Snowball
private readonly Type[] EMPTY_PARAMS = new Type[0];
public Among(string s, int substring_i, int result,
- string methodname, SnowballProgram methodobject)
+ string methodname, SnowballProgram methodobject)
{
this.s_size = s.Length;
this.s = s.ToCharArray();
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4abfb341/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/ArmenianStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/ArmenianStemmer.cs b/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/ArmenianStemmer.cs
index b96805c..bcd82d2 100644
--- a/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/ArmenianStemmer.cs
+++ b/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/ArmenianStemmer.cs
@@ -220,7 +220,7 @@
{
I_p2 = other.I_p2;
I_pV = other.I_pV;
- base.copy_from(other);
+ base.CopyFrom(other);
}
private bool r_mark_regions()
@@ -242,7 +242,7 @@
do
{
- if (!(in_grouping(g_v, 1377, 1413)))
+ if (!(InGrouping(g_v, 1377, 1413)))
{
goto lab2;
}
@@ -265,7 +265,7 @@
do
{
- if (!(out_grouping(g_v, 1377, 1413)))
+ if (!(OutGrouping(g_v, 1377, 1413)))
{
goto lab4;
}
@@ -286,7 +286,7 @@
do
{
- if (!(in_grouping(g_v, 1377, 1413)))
+ if (!(InGrouping(g_v, 1377, 1413)))
{
goto lab6;
}
@@ -307,7 +307,7 @@
do
{
- if (!(out_grouping(g_v, 1377, 1413)))
+ if (!(OutGrouping(g_v, 1377, 1413)))
{
goto lab8;
}
@@ -345,7 +345,7 @@
// [, line 73
m_ket = m_cursor;
// substring, line 73
- among_var = find_among_b(a_0, 23);
+ among_var = FindAmongB(a_0, 23);
if (among_var == 0)
{
return false;
@@ -359,7 +359,7 @@
case 1:
// (, line 98
// delete, line 98
- slice_del();
+ SliceDel();
break;
}
return true;
@@ -372,7 +372,7 @@
// [, line 103
m_ket = m_cursor;
// substring, line 103
- among_var = find_among_b(a_1, 71);
+ among_var = FindAmongB(a_1, 71);
if (among_var == 0)
{
return false;
@@ -386,7 +386,7 @@
case 1:
// (, line 176
// delete, line 176
- slice_del();
+ SliceDel();
break;
}
return true;
@@ -399,7 +399,7 @@
// [, line 181
m_ket = m_cursor;
// substring, line 181
- among_var = find_among_b(a_2, 40);
+ among_var = FindAmongB(a_2, 40);
if (among_var == 0)
{
return false;
@@ -413,7 +413,7 @@
case 1:
// (, line 223
// delete, line 223
- slice_del();
+ SliceDel();
break;
}
return true;
@@ -426,7 +426,7 @@
// [, line 228
m_ket = m_cursor;
// substring, line 228
- among_var = find_among_b(a_3, 57);
+ among_var = FindAmongB(a_3, 57);
if (among_var == 0)
{
return false;
@@ -445,7 +445,7 @@
case 1:
// (, line 287
// delete, line 287
- slice_del();
+ SliceDel();
break;
}
return true;
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4abfb341/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/BasqueStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/BasqueStemmer.cs b/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/BasqueStemmer.cs
index 47967f2..609e032 100644
--- a/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/BasqueStemmer.cs
+++ b/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/BasqueStemmer.cs
@@ -451,7 +451,7 @@
I_p2 = other.I_p2;
I_p1 = other.I_p1;
I_pV = other.I_pV;
- base.copy_from(other);
+ base.CopyFrom(other);
}
private bool r_mark_regions()
@@ -480,7 +480,7 @@
do
{
// (, line 32
- if (!(in_grouping(g_v, 97, 117)))
+ if (!(InGrouping(g_v, 97, 117)))
{
goto lab2;
}
@@ -493,7 +493,7 @@
do
{
// (, line 32
- if (!(out_grouping(g_v, 97, 117)))
+ if (!(OutGrouping(g_v, 97, 117)))
{
goto lab4;
}
@@ -504,7 +504,7 @@
do
{
- if (!(in_grouping(g_v, 97, 117)))
+ if (!(InGrouping(g_v, 97, 117)))
{
goto lab6;
}
@@ -523,7 +523,7 @@
lab4:
m_cursor = v_3;
// (, line 32
- if (!(in_grouping(g_v, 97, 117)))
+ if (!(InGrouping(g_v, 97, 117)))
{
goto lab2;
}
@@ -534,7 +534,7 @@
do
{
- if (!(out_grouping(g_v, 97, 117)))
+ if (!(OutGrouping(g_v, 97, 117)))
{
goto lab8;
}
@@ -555,7 +555,7 @@
lab2:
m_cursor = v_2;
// (, line 34
- if (!(out_grouping(g_v, 97, 117)))
+ if (!(OutGrouping(g_v, 97, 117)))
{
goto lab0;
}
@@ -568,7 +568,7 @@
do
{
// (, line 34
- if (!(out_grouping(g_v, 97, 117)))
+ if (!(OutGrouping(g_v, 97, 117)))
{
goto lab10;
}
@@ -579,7 +579,7 @@
do
{
- if (!(in_grouping(g_v, 97, 117)))
+ if (!(InGrouping(g_v, 97, 117)))
{
goto lab12;
}
@@ -598,7 +598,7 @@
lab10:
m_cursor = v_6;
// (, line 34
- if (!(in_grouping(g_v, 97, 117)))
+ if (!(InGrouping(g_v, 97, 117)))
{
goto lab0;
}
@@ -630,7 +630,7 @@
do
{
- if (!(in_grouping(g_v, 97, 117)))
+ if (!(InGrouping(g_v, 97, 117)))
{
goto lab15;
}
@@ -651,7 +651,7 @@
do
{
- if (!(out_grouping(g_v, 97, 117)))
+ if (!(OutGrouping(g_v, 97, 117)))
{
goto lab17;
}
@@ -674,7 +674,7 @@
do
{
- if (!(in_grouping(g_v, 97, 117)))
+ if (!(InGrouping(g_v, 97, 117)))
{
goto lab19;
}
@@ -695,7 +695,7 @@
do
{
- if (!(out_grouping(g_v, 97, 117)))
+ if (!(OutGrouping(g_v, 97, 117)))
{
goto lab21;
}
@@ -751,7 +751,7 @@
// [, line 50
m_ket = m_cursor;
// substring, line 50
- among_var = find_among_b(a_0, 109);
+ among_var = FindAmongB(a_0, 109);
if (among_var == 0)
{
return false;
@@ -770,7 +770,7 @@
return false;
}
// delete, line 61
- slice_del();
+ SliceDel();
break;
case 2:
// (, line 63
@@ -780,22 +780,22 @@
return false;
}
// delete, line 63
- slice_del();
+ SliceDel();
break;
case 3:
// (, line 65
// <-, line 65
- slice_from("atseden");
+ SliceFrom("atseden");
break;
case 4:
// (, line 67
// <-, line 67
- slice_from("arabera");
+ SliceFrom("arabera");
break;
case 5:
// (, line 69
// <-, line 69
- slice_from("baditu");
+ SliceFrom("baditu");
break;
}
return true;
@@ -808,7 +808,7 @@
// [, line 75
m_ket = m_cursor;
// substring, line 75
- among_var = find_among_b(a_1, 295);
+ among_var = FindAmongB(a_1, 295);
if (among_var == 0)
{
return false;
@@ -827,7 +827,7 @@
return false;
}
// delete, line 105
- slice_del();
+ SliceDel();
break;
case 2:
// (, line 107
@@ -837,12 +837,12 @@
return false;
}
// delete, line 107
- slice_del();
+ SliceDel();
break;
case 3:
// (, line 109
// <-, line 109
- slice_from("jok");
+ SliceFrom("jok");
break;
case 4:
// (, line 111
@@ -852,37 +852,37 @@
return false;
}
// delete, line 111
- slice_del();
+ SliceDel();
break;
case 5:
// (, line 113
// <-, line 113
- slice_from("tra");
+ SliceFrom("tra");
break;
case 6:
// (, line 115
// <-, line 115
- slice_from("minutu");
+ SliceFrom("minutu");
break;
case 7:
// (, line 117
// <-, line 117
- slice_from("zehar");
+ SliceFrom("zehar");
break;
case 8:
// (, line 119
// <-, line 119
- slice_from("geldi");
+ SliceFrom("geldi");
break;
case 9:
// (, line 121
// <-, line 121
- slice_from("igaro");
+ SliceFrom("igaro");
break;
case 10:
// (, line 123
// <-, line 123
- slice_from("aurka");
+ SliceFrom("aurka");
break;
}
return true;
@@ -895,7 +895,7 @@
// [, line 128
m_ket = m_cursor;
// substring, line 128
- among_var = find_among_b(a_2, 19);
+ among_var = FindAmongB(a_2, 19);
if (among_var == 0)
{
return false;
@@ -914,12 +914,12 @@
return false;
}
// delete, line 131
- slice_del();
+ SliceDel();
break;
case 2:
// (, line 133
// <-, line 133
- slice_from("z");
+ SliceFrom("z");
break;
}
return true;
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4abfb341/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/CatalanStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/CatalanStemmer.cs b/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/CatalanStemmer.cs
index 41ff1ab..f782716 100644
--- a/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/CatalanStemmer.cs
+++ b/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/CatalanStemmer.cs
@@ -589,7 +589,7 @@
{
I_p2 = other.I_p2;
I_p1 = other.I_p1;
- base.copy_from(other);
+ base.CopyFrom(other);
}
private bool r_mark_regions()
@@ -611,7 +611,7 @@
do
{
- if (!(in_grouping(g_v, 97, 252)))
+ if (!(InGrouping(g_v, 97, 252)))
{
goto lab2;
}
@@ -632,7 +632,7 @@
do
{
- if (!(out_grouping(g_v, 97, 252)))
+ if (!(OutGrouping(g_v, 97, 252)))
{
goto lab4;
}
@@ -655,7 +655,7 @@
do
{
- if (!(in_grouping(g_v, 97, 252)))
+ if (!(InGrouping(g_v, 97, 252)))
{
goto lab6;
}
@@ -676,7 +676,7 @@
do
{
- if (!(out_grouping(g_v, 97, 252)))
+ if (!(OutGrouping(g_v, 97, 252)))
{
goto lab8;
}
@@ -714,7 +714,7 @@
// [, line 53
m_bra = m_cursor;
// substring, line 53
- among_var = find_among(a_0, 13);
+ among_var = FindAmong(a_0, 13);
if (among_var == 0)
{
goto lab1;
@@ -728,62 +728,62 @@
case 1:
// (, line 54
// <-, line 54
- slice_from("a");
+ SliceFrom("a");
break;
case 2:
// (, line 55
// <-, line 55
- slice_from("a");
+ SliceFrom("a");
break;
case 3:
// (, line 56
// <-, line 56
- slice_from("e");
+ SliceFrom("e");
break;
case 4:
// (, line 57
// <-, line 57
- slice_from("e");
+ SliceFrom("e");
break;
case 5:
// (, line 58
// <-, line 58
- slice_from("i");
+ SliceFrom("i");
break;
case 6:
// (, line 59
// <-, line 59
- slice_from("i");
+ SliceFrom("i");
break;
case 7:
// (, line 60
// <-, line 60
- slice_from("o");
+ SliceFrom("o");
break;
case 8:
// (, line 61
// <-, line 61
- slice_from("o");
+ SliceFrom("o");
break;
case 9:
// (, line 62
// <-, line 62
- slice_from("u");
+ SliceFrom("u");
break;
case 10:
// (, line 63
// <-, line 63
- slice_from("u");
+ SliceFrom("u");
break;
case 11:
// (, line 64
// <-, line 64
- slice_from("i");
+ SliceFrom("i");
break;
case 12:
// (, line 65
// <-, line 65
- slice_from(".");
+ SliceFrom(".");
break;
case 13:
// (, line 66
@@ -839,7 +839,7 @@
// [, line 76
m_ket = m_cursor;
// substring, line 76
- among_var = find_among_b(a_1, 39);
+ among_var = FindAmongB(a_1, 39);
if (among_var == 0)
{
return false;
@@ -858,7 +858,7 @@
return false;
}
// delete, line 86
- slice_del();
+ SliceDel();
break;
}
return true;
@@ -871,7 +871,7 @@
// [, line 91
m_ket = m_cursor;
// substring, line 91
- among_var = find_among_b(a_2, 200);
+ among_var = FindAmongB(a_2, 200);
if (among_var == 0)
{
return false;
@@ -890,7 +890,7 @@
return false;
}
// delete, line 115
- slice_del();
+ SliceDel();
break;
case 2:
// (, line 117
@@ -900,7 +900,7 @@
return false;
}
// delete, line 117
- slice_del();
+ SliceDel();
break;
case 3:
// (, line 119
@@ -910,7 +910,7 @@
return false;
}
// <-, line 119
- slice_from("log");
+ SliceFrom("log");
break;
case 4:
// (, line 121
@@ -920,7 +920,7 @@
return false;
}
// <-, line 121
- slice_from("ic");
+ SliceFrom("ic");
break;
case 5:
// (, line 123
@@ -930,7 +930,7 @@
return false;
}
// <-, line 123
- slice_from("c");
+ SliceFrom("c");
break;
}
return true;
@@ -943,7 +943,7 @@
// [, line 128
m_ket = m_cursor;
// substring, line 128
- among_var = find_among_b(a_3, 283);
+ among_var = FindAmongB(a_3, 283);
if (among_var == 0)
{
return false;
@@ -962,7 +962,7 @@
return false;
}
// delete, line 173
- slice_del();
+ SliceDel();
break;
case 2:
// (, line 175
@@ -972,7 +972,7 @@
return false;
}
// delete, line 175
- slice_del();
+ SliceDel();
break;
}
return true;
@@ -985,7 +985,7 @@
// [, line 180
m_ket = m_cursor;
// substring, line 180
- among_var = find_among_b(a_4, 22);
+ among_var = FindAmongB(a_4, 22);
if (among_var == 0)
{
return false;
@@ -1004,7 +1004,7 @@
return false;
}
// delete, line 183
- slice_del();
+ SliceDel();
break;
case 2:
// (, line 185
@@ -1014,7 +1014,7 @@
return false;
}
// <-, line 185
- slice_from("ic");
+ SliceFrom("ic");
break;
}
return true;
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4abfb341/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/DanishStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/DanishStemmer.cs b/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/DanishStemmer.cs
index 54bcfc4..6176a2e 100644
--- a/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/DanishStemmer.cs
+++ b/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/DanishStemmer.cs
@@ -73,7 +73,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
I_x = other.I_x;
I_p1 = other.I_p1;
S_ch = other.S_ch;
- base.copy_from(other);
+ base.CopyFrom(other);
}
private bool r_mark_regions()
@@ -105,7 +105,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
do
{
- if (!(in_grouping(g_v, 97, 248)))
+ if (!(InGrouping(g_v, 97, 248)))
{
goto lab1;
}
@@ -128,7 +128,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
do
{
- if (!(out_grouping(g_v, 97, 248)))
+ if (!(OutGrouping(g_v, 97, 248)))
{
goto lab3;
}
@@ -180,7 +180,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
// [, line 41
m_ket = m_cursor;
// substring, line 41
- among_var = find_among_b(a_0, 32);
+ among_var = FindAmongB(a_0, 32);
if (among_var == 0)
{
m_limit_backward = v_2;
@@ -196,16 +196,16 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
case 1:
// (, line 48
// delete, line 48
- slice_del();
+ SliceDel();
break;
case 2:
// (, line 50
- if (!(in_grouping_b(g_s_ending, 97, 229)))
+ if (!(InGroupingB(g_s_ending, 97, 229)))
{
return false;
}
// delete, line 50
- slice_del();
+ SliceDel();
break;
}
return true;
@@ -235,7 +235,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
// [, line 56
m_ket = m_cursor;
// substring, line 56
- if (find_among_b(a_1, 4) == 0)
+ if (FindAmongB(a_1, 4) == 0)
{
m_limit_backward = v_3;
return false;
@@ -253,7 +253,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
// ], line 62
m_bra = m_cursor;
// delete, line 62
- slice_del();
+ SliceDel();
return true;
}
@@ -274,19 +274,19 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
// [, line 66
m_ket = m_cursor;
// literal, line 66
- if (!(eq_s_b(2, "st")))
+ if (!(Eq_S_B(2, "st")))
{
goto lab0;
}
// ], line 66
m_bra = m_cursor;
// literal, line 66
- if (!(eq_s_b(2, "ig")))
+ if (!(Eq_S_B(2, "ig")))
{
goto lab0;
}
// delete, line 66
- slice_del();
+ SliceDel();
} while (false);
lab0:
m_cursor = m_limit - v_1;
@@ -305,7 +305,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
// [, line 67
m_ket = m_cursor;
// substring, line 67
- among_var = find_among_b(a_2, 5);
+ among_var = FindAmongB(a_2, 5);
if (among_var == 0)
{
m_limit_backward = v_3;
@@ -321,7 +321,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
case 1:
// (, line 70
// delete, line 70
- slice_del();
+ SliceDel();
// do, line 70
v_4 = m_limit - m_cursor;
@@ -339,7 +339,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
case 2:
// (, line 72
// <-, line 72
- slice_from("l\u00F8s");
+ SliceFrom("l\u00F8s");
break;
}
return true;
@@ -364,7 +364,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
// (, line 76
// [, line 76
m_ket = m_cursor;
- if (!(out_grouping_b(g_v, 97, 248)))
+ if (!(OutGroupingB(g_v, 97, 248)))
{
m_limit_backward = v_2;
return false;
@@ -372,15 +372,15 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
// ], line 76
m_bra = m_cursor;
// -> ch, line 76
- S_ch = slice_to(S_ch);
+ S_ch = SliceTo(S_ch);
m_limit_backward = v_2;
// name ch, line 77
- if (!(eq_v_b(S_ch.ToString())))
+ if (!(Eq_V_B(S_ch.ToString())))
{
return false;
}
// delete, line 78
- slice_del();
+ SliceDel();
return true;
}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4abfb341/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/DutchStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/DutchStemmer.cs b/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/DutchStemmer.cs
index aec6e3a..0152f64 100644
--- a/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/DutchStemmer.cs
+++ b/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/DutchStemmer.cs
@@ -73,7 +73,7 @@
I_p2 = other.I_p2;
I_p1 = other.I_p1;
B_e_found = other.B_e_found;
- base.copy_from(other);
+ base.CopyFrom(other);
}
private bool r_prelude()
@@ -100,7 +100,7 @@
// [, line 43
m_bra = m_cursor;
// substring, line 43
- among_var = find_among(a_0, 11);
+ among_var = FindAmong(a_0, 11);
if (among_var == 0)
{
goto lab1;
@@ -114,27 +114,27 @@
case 1:
// (, line 45
// <-, line 45
- slice_from("a");
+ SliceFrom("a");
break;
case 2:
// (, line 47
// <-, line 47
- slice_from("e");
+ SliceFrom("e");
break;
case 3:
// (, line 49
// <-, line 49
- slice_from("i");
+ SliceFrom("i");
break;
case 4:
// (, line 51
// <-, line 51
- slice_from("o");
+ SliceFrom("o");
break;
case 5:
// (, line 53
// <-, line 53
- slice_from("u");
+ SliceFrom("u");
break;
case 6:
// (, line 54
@@ -172,7 +172,7 @@
// [, line 57
m_bra = m_cursor;
// literal, line 57
- if (!(eq_s(1, "y")))
+ if (!(Eq_S(1, "y")))
{
m_cursor = v_3;
goto lab2;
@@ -180,7 +180,7 @@
// ], line 57
m_ket = m_cursor;
// <-, line 57
- slice_from("Y");
+ SliceFrom("Y");
} while (false);
lab2:
// repeat, line 58
@@ -200,7 +200,7 @@
do
{
// (, line 58
- if (!(in_grouping(g_v, 97, 232)))
+ if (!(InGrouping(g_v, 97, 232)))
{
goto lab6;
}
@@ -216,32 +216,32 @@
{
// (, line 59
// literal, line 59
- if (!(eq_s(1, "i")))
+ if (!(Eq_S(1, "i")))
{
goto lab8;
}
// ], line 59
m_ket = m_cursor;
- if (!(in_grouping(g_v, 97, 232)))
+ if (!(InGrouping(g_v, 97, 232)))
{
goto lab8;
}
// <-, line 59
- slice_from("I");
+ SliceFrom("I");
goto lab7;
} while (false);
lab8:
m_cursor = v_6;
// (, line 60
// literal, line 60
- if (!(eq_s(1, "y")))
+ if (!(Eq_S(1, "y")))
{
goto lab6;
}
// ], line 60
m_ket = m_cursor;
// <-, line 60
- slice_from("Y");
+ SliceFrom("Y");
} while (false);
lab7:
m_cursor = v_5;
@@ -287,7 +287,7 @@
do
{
- if (!(in_grouping(g_v, 97, 232)))
+ if (!(InGrouping(g_v, 97, 232)))
{
goto lab1;
}
@@ -308,7 +308,7 @@
do
{
- if (!(out_grouping(g_v, 97, 232)))
+ if (!(OutGrouping(g_v, 97, 232)))
{
goto lab3;
}
@@ -343,7 +343,7 @@
do
{
- if (!(in_grouping(g_v, 97, 232)))
+ if (!(InGrouping(g_v, 97, 232)))
{
goto lab6;
}
@@ -364,7 +364,7 @@
do
{
- if (!(out_grouping(g_v, 97, 232)))
+ if (!(OutGrouping(g_v, 97, 232)))
{
goto lab8;
}
@@ -399,7 +399,7 @@
// [, line 77
m_bra = m_cursor;
// substring, line 77
- among_var = find_among(a_1, 3);
+ among_var = FindAmong(a_1, 3);
if (among_var == 0)
{
goto lab1;
@@ -413,12 +413,12 @@
case 1:
// (, line 78
// <-, line 78
- slice_from("y");
+ SliceFrom("y");
break;
case 2:
// (, line 79
// <-, line 79
- slice_from("i");
+ SliceFrom("i");
break;
case 3:
// (, line 80
@@ -474,7 +474,7 @@
// test, line 91
v_1 = m_limit - m_cursor;
// among, line 91
- if (find_among_b(a_2, 3) == 0)
+ if (FindAmongB(a_2, 3) == 0)
{
return false;
}
@@ -490,7 +490,7 @@
// ], line 91
m_bra = m_cursor;
// delete, line 91
- slice_del();
+ SliceDel();
return true;
}
@@ -503,7 +503,7 @@
// [, line 96
m_ket = m_cursor;
// literal, line 96
- if (!(eq_s_b(1, "e")))
+ if (!(Eq_S_B(1, "e")))
{
return false;
}
@@ -516,13 +516,13 @@
}
// test, line 96
v_1 = m_limit - m_cursor;
- if (!(out_grouping_b(g_v, 97, 232)))
+ if (!(OutGroupingB(g_v, 97, 232)))
{
return false;
}
m_cursor = m_limit - v_1;
// delete, line 96
- slice_del();
+ SliceDel();
// set e_found, line 97
B_e_found = true;
// call undouble, line 98
@@ -545,7 +545,7 @@
}
// and, line 102
v_1 = m_limit - m_cursor;
- if (!(out_grouping_b(g_v, 97, 232)))
+ if (!(OutGroupingB(g_v, 97, 232)))
{
return false;
}
@@ -557,7 +557,7 @@
do
{
// literal, line 102
- if (!(eq_s_b(3, "gem")))
+ if (!(Eq_S_B(3, "gem")))
{
goto lab0;
}
@@ -567,7 +567,7 @@
m_cursor = m_limit - v_2;
}
// delete, line 102
- slice_del();
+ SliceDel();
// call undouble, line 103
if (!r_undouble())
{
@@ -599,7 +599,7 @@
// [, line 108
m_ket = m_cursor;
// substring, line 108
- among_var = find_among_b(a_3, 5);
+ among_var = FindAmongB(a_3, 5);
if (among_var == 0)
{
goto lab0;
@@ -618,7 +618,7 @@
goto lab0;
}
// <-, line 110
- slice_from("heid");
+ SliceFrom("heid");
break;
case 2:
// (, line 113
@@ -635,12 +635,12 @@
{
goto lab0;
}
- if (!(out_grouping_b(g_v_j, 97, 232)))
+ if (!(OutGroupingB(g_v_j, 97, 232)))
{
goto lab0;
}
// delete, line 116
- slice_del();
+ SliceDel();
break;
}
} while (false);
@@ -668,7 +668,7 @@
// [, line 122
m_ket = m_cursor;
// literal, line 122
- if (!(eq_s_b(4, "heid")))
+ if (!(Eq_S_B(4, "heid")))
{
goto lab2;
}
@@ -686,7 +686,7 @@
do
{
// literal, line 122
- if (!(eq_s_b(1, "c")))
+ if (!(Eq_S_B(1, "c")))
{
goto lab3;
}
@@ -696,11 +696,11 @@
m_cursor = m_limit - v_4;
}
// delete, line 122
- slice_del();
+ SliceDel();
// [, line 123
m_ket = m_cursor;
// literal, line 123
- if (!(eq_s_b(2, "en")))
+ if (!(Eq_S_B(2, "en")))
{
goto lab2;
}
@@ -723,7 +723,7 @@
// [, line 127
m_ket = m_cursor;
// substring, line 127
- among_var = find_among_b(a_4, 6);
+ among_var = FindAmongB(a_4, 6);
if (among_var == 0)
{
goto lab4;
@@ -742,7 +742,7 @@
goto lab4;
}
// delete, line 129
- slice_del();
+ SliceDel();
// or, line 130
do
@@ -755,7 +755,7 @@
// [, line 130
m_ket = m_cursor;
// literal, line 130
- if (!(eq_s_b(2, "ig")))
+ if (!(Eq_S_B(2, "ig")))
{
goto lab6;
}
@@ -773,7 +773,7 @@
do
{
// literal, line 130
- if (!(eq_s_b(1, "e")))
+ if (!(Eq_S_B(1, "e")))
{
goto lab7;
}
@@ -783,7 +783,7 @@
m_cursor = m_limit - v_7;
}
// delete, line 130
- slice_del();
+ SliceDel();
goto lab5;
} while (false);
lab6:
@@ -810,7 +810,7 @@
do
{
// literal, line 133
- if (!(eq_s_b(1, "e")))
+ if (!(Eq_S_B(1, "e")))
{
goto lab8;
}
@@ -820,7 +820,7 @@
m_cursor = m_limit - v_8;
}
// delete, line 133
- slice_del();
+ SliceDel();
break;
case 3:
// (, line 136
@@ -830,7 +830,7 @@
goto lab4;
}
// delete, line 136
- slice_del();
+ SliceDel();
// call e_ending, line 136
if (!r_e_ending())
{
@@ -845,7 +845,7 @@
goto lab4;
}
// delete, line 139
- slice_del();
+ SliceDel();
break;
case 5:
// (, line 142
@@ -860,7 +860,7 @@
goto lab4;
}
// delete, line 142
- slice_del();
+ SliceDel();
break;
}
} while (false);
@@ -872,7 +872,7 @@
do
{
// (, line 146
- if (!(out_grouping_b(g_v_I, 73, 232)))
+ if (!(OutGroupingB(g_v_I, 73, 232)))
{
goto lab9;
}
@@ -880,11 +880,11 @@
v_10 = m_limit - m_cursor;
// (, line 148
// among, line 149
- if (find_among_b(a_5, 4) == 0)
+ if (FindAmongB(a_5, 4) == 0)
{
goto lab9;
}
- if (!(out_grouping_b(g_v, 97, 232)))
+ if (!(OutGroupingB(g_v, 97, 232)))
{
goto lab9;
}
@@ -900,7 +900,7 @@
// ], line 152
m_bra = m_cursor;
// delete, line 152
- slice_del();
+ SliceDel();
} while (false);
lab9:
m_cursor = m_limit - v_9;
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4abfb341/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/EnglishStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/EnglishStemmer.cs b/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/EnglishStemmer.cs
index 07a21d6..7eda965 100644
--- a/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/EnglishStemmer.cs
+++ b/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/EnglishStemmer.cs
@@ -166,7 +166,7 @@
B_Y_found = other.B_Y_found;
I_p2 = other.I_p2;
I_p1 = other.I_p1;
- base.copy_from(other);
+ base.CopyFrom(other);
}
private bool r_prelude()
@@ -188,14 +188,14 @@
// [, line 27
m_bra = m_cursor;
// literal, line 27
- if (!(eq_s(1, "'")))
+ if (!(Eq_S(1, "'")))
{
goto lab0;
}
// ], line 27
m_ket = m_cursor;
// delete, line 27
- slice_del();
+ SliceDel();
} while (false);
lab0:
m_cursor = v_1;
@@ -208,14 +208,14 @@
// [, line 28
m_bra = m_cursor;
// literal, line 28
- if (!(eq_s(1, "y")))
+ if (!(Eq_S(1, "y")))
{
goto lab1;
}
// ], line 28
m_ket = m_cursor;
// <-, line 28
- slice_from("Y");
+ SliceFrom("Y");
// set Y_found, line 28
B_Y_found = true;
} while (false);
@@ -244,14 +244,14 @@
do
{
// (, line 29
- if (!(in_grouping(g_v, 97, 121)))
+ if (!(InGrouping(g_v, 97, 121)))
{
goto lab6;
}
// [, line 29
m_bra = m_cursor;
// literal, line 29
- if (!(eq_s(1, "y")))
+ if (!(Eq_S(1, "y")))
{
goto lab6;
}
@@ -270,7 +270,7 @@
}
golab5:
// <-, line 29
- slice_from("Y");
+ SliceFrom("Y");
// set Y_found, line 29
B_Y_found = true;
@@ -318,7 +318,7 @@
do
{
// among, line 36
- if (find_among(a_0, 3) == 0)
+ if (FindAmong(a_0, 3) == 0)
{
goto lab2;
}
@@ -334,7 +334,7 @@
do
{
- if (!(in_grouping(g_v, 97, 121)))
+ if (!(InGrouping(g_v, 97, 121)))
{
goto lab4;
}
@@ -355,7 +355,7 @@
do
{
- if (!(out_grouping(g_v, 97, 121)))
+ if (!(OutGrouping(g_v, 97, 121)))
{
goto lab6;
}
@@ -380,7 +380,7 @@
do
{
- if (!(in_grouping(g_v, 97, 121)))
+ if (!(InGrouping(g_v, 97, 121)))
{
goto lab8;
}
@@ -401,7 +401,7 @@
do
{
- if (!(out_grouping(g_v, 97, 121)))
+ if (!(OutGrouping(g_v, 97, 121)))
{
goto lab10;
}
@@ -436,15 +436,15 @@
do
{
// (, line 50
- if (!(out_grouping_b(g_v_WXY, 89, 121)))
+ if (!(OutGroupingB(g_v_WXY, 89, 121)))
{
goto lab1;
}
- if (!(in_grouping_b(g_v, 97, 121)))
+ if (!(InGroupingB(g_v, 97, 121)))
{
goto lab1;
}
- if (!(out_grouping_b(g_v, 97, 121)))
+ if (!(OutGroupingB(g_v, 97, 121)))
{
goto lab1;
}
@@ -453,11 +453,11 @@
lab1:
m_cursor = m_limit - v_1;
// (, line 52
- if (!(out_grouping_b(g_v, 97, 121)))
+ if (!(OutGroupingB(g_v, 97, 121)))
{
return false;
}
- if (!(in_grouping_b(g_v, 97, 121)))
+ if (!(InGroupingB(g_v, 97, 121)))
{
return false;
}
@@ -504,7 +504,7 @@
// [, line 60
m_ket = m_cursor;
// substring, line 60
- among_var = find_among_b(a_1, 3);
+ among_var = FindAmongB(a_1, 3);
if (among_var == 0)
{
m_cursor = m_limit - v_1;
@@ -520,7 +520,7 @@
case 1:
// (, line 62
// delete, line 62
- slice_del();
+ SliceDel();
break;
}
} while (false);
@@ -528,7 +528,7 @@
// [, line 65
m_ket = m_cursor;
// substring, line 65
- among_var = find_among_b(a_2, 6);
+ among_var = FindAmongB(a_2, 6);
if (among_var == 0)
{
return false;
@@ -542,7 +542,7 @@
case 1:
// (, line 66
// <-, line 66
- slice_from("ss");
+ SliceFrom("ss");
break;
case 2:
// (, line 68
@@ -565,13 +565,13 @@
m_cursor = c;
}
// <-, line 68
- slice_from("i");
+ SliceFrom("i");
goto lab1;
} while (false);
lab2:
m_cursor = m_limit - v_2;
// <-, line 68
- slice_from("ie");
+ SliceFrom("ie");
} while (false);
lab1:
break;
@@ -590,7 +590,7 @@
do
{
- if (!(in_grouping_b(g_v, 97, 121)))
+ if (!(InGroupingB(g_v, 97, 121)))
{
goto lab4;
}
@@ -605,7 +605,7 @@
}
golab3:
// delete, line 69
- slice_del();
+ SliceDel();
break;
}
return true;
@@ -621,7 +621,7 @@
// [, line 75
m_ket = m_cursor;
// substring, line 75
- among_var = find_among_b(a_4, 6);
+ among_var = FindAmongB(a_4, 6);
if (among_var == 0)
{
return false;
@@ -640,7 +640,7 @@
return false;
}
// <-, line 77
- slice_from("ee");
+ SliceFrom("ee");
break;
case 2:
// (, line 79
@@ -653,7 +653,7 @@
do
{
- if (!(in_grouping_b(g_v, 97, 121)))
+ if (!(InGroupingB(g_v, 97, 121)))
{
goto lab1;
}
@@ -669,11 +669,11 @@
golab0:
m_cursor = m_limit - v_1;
// delete, line 80
- slice_del();
+ SliceDel();
// test, line 81
v_3 = m_limit - m_cursor;
// substring, line 81
- among_var = find_among_b(a_3, 13);
+ among_var = FindAmongB(a_3, 13);
if (among_var == 0)
{
return false;
@@ -688,7 +688,7 @@
// <+, line 83
{
int c = m_cursor;
- insert(m_cursor, m_cursor, "e");
+ Insert(m_cursor, m_cursor, "e");
m_cursor = c;
}
break;
@@ -705,7 +705,7 @@
// ], line 86
m_bra = m_cursor;
// delete, line 86
- slice_del();
+ SliceDel();
break;
case 3:
// (, line 87
@@ -725,7 +725,7 @@
// <+, line 87
{
int c = m_cursor;
- insert(m_cursor, m_cursor, "e");
+ Insert(m_cursor, m_cursor, "e");
m_cursor = c;
}
break;
@@ -751,7 +751,7 @@
do
{
// literal, line 94
- if (!(eq_s_b(1, "y")))
+ if (!(Eq_S_B(1, "y")))
{
goto lab1;
}
@@ -760,7 +760,7 @@
lab1:
m_cursor = m_limit - v_1;
// literal, line 94
- if (!(eq_s_b(1, "Y")))
+ if (!(Eq_S_B(1, "Y")))
{
return false;
}
@@ -768,7 +768,7 @@
lab0:
// ], line 94
m_bra = m_cursor;
- if (!(out_grouping_b(g_v, 97, 121)))
+ if (!(OutGroupingB(g_v, 97, 121)))
{
return false;
}
@@ -789,7 +789,7 @@
m_cursor = m_limit - v_2;
}
// <-, line 96
- slice_from("i");
+ SliceFrom("i");
return true;
}
@@ -800,7 +800,7 @@
// [, line 100
m_ket = m_cursor;
// substring, line 100
- among_var = find_among_b(a_5, 24);
+ among_var = FindAmongB(a_5, 24);
if (among_var == 0)
{
return false;
@@ -819,91 +819,91 @@
case 1:
// (, line 101
// <-, line 101
- slice_from("tion");
+ SliceFrom("tion");
break;
case 2:
// (, line 102
// <-, line 102
- slice_from("ence");
+ SliceFrom("ence");
break;
case 3:
// (, line 103
// <-, line 103
- slice_from("ance");
+ SliceFrom("ance");
break;
case 4:
// (, line 104
// <-, line 104
- slice_from("able");
+ SliceFrom("able");
break;
case 5:
// (, line 105
// <-, line 105
- slice_from("ent");
+ SliceFrom("ent");
break;
case 6:
// (, line 107
// <-, line 107
- slice_from("ize");
+ SliceFrom("ize");
break;
case 7:
// (, line 109
// <-, line 109
- slice_from("ate");
+ SliceFrom("ate");
break;
case 8:
// (, line 111
// <-, line 111
- slice_from("al");
+ SliceFrom("al");
break;
case 9:
// (, line 112
// <-, line 112
- slice_from("ful");
+ SliceFrom("ful");
break;
case 10:
// (, line 114
// <-, line 114
- slice_from("ous");
+ SliceFrom("ous");
break;
case 11:
// (, line 116
// <-, line 116
- slice_from("ive");
+ SliceFrom("ive");
break;
case 12:
// (, line 118
// <-, line 118
- slice_from("ble");
+ SliceFrom("ble");
break;
case 13:
// (, line 119
// literal, line 119
- if (!(eq_s_b(1, "l")))
+ if (!(Eq_S_B(1, "l")))
{
return false;
}
// <-, line 119
- slice_from("og");
+ SliceFrom("og");
break;
case 14:
// (, line 120
// <-, line 120
- slice_from("ful");
+ SliceFrom("ful");
break;
case 15:
// (, line 121
// <-, line 121
- slice_from("less");
+ SliceFrom("less");
break;
case 16:
// (, line 122
- if (!(in_grouping_b(g_valid_LI, 99, 116)))
+ if (!(InGroupingB(g_valid_LI, 99, 116)))
{
return false;
}
// delete, line 122
- slice_del();
+ SliceDel();
break;
}
return true;
@@ -916,7 +916,7 @@
// [, line 127
m_ket = m_cursor;
// substring, line 127
- among_var = find_among_b(a_6, 9);
+ among_var = FindAmongB(a_6, 9);
if (among_var == 0)
{
return false;
@@ -935,27 +935,27 @@
case 1:
// (, line 128
// <-, line 128
- slice_from("tion");
+ SliceFrom("tion");
break;
case 2:
// (, line 129
// <-, line 129
- slice_from("ate");
+ SliceFrom("ate");
break;
case 3:
// (, line 130
// <-, line 130
- slice_from("al");
+ SliceFrom("al");
break;
case 4:
// (, line 132
// <-, line 132
- slice_from("ic");
+ SliceFrom("ic");
break;
case 5:
// (, line 134
// delete, line 134
- slice_del();
+ SliceDel();
break;
case 6:
// (, line 136
@@ -965,7 +965,7 @@
return false;
}
// delete, line 136
- slice_del();
+ SliceDel();
break;
}
return true;
@@ -979,7 +979,7 @@
// [, line 141
m_ket = m_cursor;
// substring, line 141
- among_var = find_among_b(a_7, 18);
+ among_var = FindAmongB(a_7, 18);
if (among_var == 0)
{
return false;
@@ -998,7 +998,7 @@
case 1:
// (, line 144
// delete, line 144
- slice_del();
+ SliceDel();
break;
case 2:
// (, line 145
@@ -1011,7 +1011,7 @@
do
{
// literal, line 145
- if (!(eq_s_b(1, "s")))
+ if (!(Eq_S_B(1, "s")))
{
goto lab1;
}
@@ -1020,14 +1020,14 @@
lab1:
m_cursor = m_limit - v_1;
// literal, line 145
- if (!(eq_s_b(1, "t")))
+ if (!(Eq_S_B(1, "t")))
{
return false;
}
} while (false);
lab0:
// delete, line 145
- slice_del();
+ SliceDel();
break;
}
return true;
@@ -1042,7 +1042,7 @@
// [, line 150
m_ket = m_cursor;
// substring, line 150
- among_var = find_among_b(a_8, 2);
+ among_var = FindAmongB(a_8, 2);
if (among_var == 0)
{
return false;
@@ -1097,7 +1097,7 @@
} while (false);
lab0:
// delete, line 151
- slice_del();
+ SliceDel();
break;
case 2:
// (, line 152
@@ -1107,12 +1107,12 @@
return false;
}
// literal, line 152
- if (!(eq_s_b(1, "l")))
+ if (!(Eq_S_B(1, "l")))
{
return false;
}
// delete, line 152
- slice_del();
+ SliceDel();
break;
}
return true;
@@ -1124,7 +1124,7 @@
// [, line 158
m_ket = m_cursor;
// substring, line 158
- if (find_among_b(a_9, 8) == 0)
+ if (FindAmongB(a_9, 8) == 0)
{
return false;
}
@@ -1145,7 +1145,7 @@
// [, line 170
m_bra = m_cursor;
// substring, line 170
- among_var = find_among(a_10, 18);
+ among_var = FindAmong(a_10, 18);
if (among_var == 0)
{
return false;
@@ -1164,57 +1164,57 @@
case 1:
// (, line 174
// <-, line 174
- slice_from("ski");
+ SliceFrom("ski");
break;
case 2:
// (, line 175
// <-, line 175
- slice_from("sky");
+ SliceFrom("sky");
break;
case 3:
// (, line 176
// <-, line 176
- slice_from("die");
+ SliceFrom("die");
break;
case 4:
// (, line 177
// <-, line 177
- slice_from("lie");
+ SliceFrom("lie");
break;
case 5:
// (, line 178
// <-, line 178
- slice_from("tie");
+ SliceFrom("tie");
break;
case 6:
// (, line 182
// <-, line 182
- slice_from("idl");
+ SliceFrom("idl");
break;
case 7:
// (, line 183
// <-, line 183
- slice_from("gentl");
+ SliceFrom("gentl");
break;
case 8:
// (, line 184
// <-, line 184
- slice_from("ugli");
+ SliceFrom("ugli");
break;
case 9:
// (, line 185
// <-, line 185
- slice_from("earli");
+ SliceFrom("earli");
break;
case 10:
// (, line 186
// <-, line 186
- slice_from("onli");
+ SliceFrom("onli");
break;
case 11:
// (, line 187
// <-, line 187
- slice_from("singl");
+ SliceFrom("singl");
break;
}
return true;
@@ -1251,7 +1251,7 @@
// [, line 203
m_bra = m_cursor;
// literal, line 203
- if (!(eq_s(1, "Y")))
+ if (!(Eq_S(1, "Y")))
{
goto lab3;
}
@@ -1270,7 +1270,7 @@
}
golab2:
// <-, line 203
- slice_from("y");
+ SliceFrom("y");
// LUCENENET NOTE: continue label is not supported directly in .NET,
// so we just need to add another goto to get to the end of the outer loop.
// See: http://stackoverflow.com/a/359449/181087
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4abfb341/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/FinnishStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/FinnishStemmer.cs b/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/FinnishStemmer.cs
index cedf9ba..b43e761 100644
--- a/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/FinnishStemmer.cs
+++ b/src/Lucene.Net.Analysis.Common/Tartarus/Snowball/Ext/FinnishStemmer.cs
@@ -147,7 +147,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
S_x = other.S_x;
I_p2 = other.I_p2;
I_p1 = other.I_p1;
- base.copy_from(other);
+ base.CopyFrom(other);
}
private bool r_mark_regions()
@@ -165,7 +165,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
do
{
- if (!(in_grouping(g_V1, 97, 246)))
+ if (!(InGrouping(g_V1, 97, 246)))
{
goto lab1;
}
@@ -188,7 +188,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
do
{
- if (!(out_grouping(g_V1, 97, 246)))
+ if (!(OutGrouping(g_V1, 97, 246)))
{
goto lab3;
}
@@ -212,7 +212,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
do
{
- if (!(in_grouping(g_V1, 97, 246)))
+ if (!(InGrouping(g_V1, 97, 246)))
{
goto lab5;
}
@@ -235,7 +235,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
do
{
- if (!(out_grouping(g_V1, 97, 246)))
+ if (!(OutGrouping(g_V1, 97, 246)))
{
goto lab7;
}
@@ -284,7 +284,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
// [, line 55
m_ket = m_cursor;
// substring, line 55
- among_var = find_among_b(a_0, 10);
+ among_var = FindAmongB(a_0, 10);
if (among_var == 0)
{
m_limit_backward = v_2;
@@ -299,7 +299,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
return false;
case 1:
// (, line 62
- if (!(in_grouping_b(g_particle_end, 97, 246)))
+ if (!(InGroupingB(g_particle_end, 97, 246)))
{
return false;
}
@@ -314,7 +314,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
break;
}
// delete, line 66
- slice_del();
+ SliceDel();
return true;
}
@@ -340,7 +340,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
// [, line 69
m_ket = m_cursor;
// substring, line 69
- among_var = find_among_b(a_4, 9);
+ among_var = FindAmongB(a_4, 9);
if (among_var == 0)
{
m_limit_backward = v_2;
@@ -362,7 +362,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
do
{
// literal, line 72
- if (!(eq_s_b(1, "k")))
+ if (!(Eq_S_B(1, "k")))
{
goto lab0;
}
@@ -372,58 +372,58 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
m_cursor = m_limit - v_3;
}
// delete, line 72
- slice_del();
+ SliceDel();
break;
case 2:
// (, line 74
// delete, line 74
- slice_del();
+ SliceDel();
// [, line 74
m_ket = m_cursor;
// literal, line 74
- if (!(eq_s_b(3, "kse")))
+ if (!(Eq_S_B(3, "kse")))
{
return false;
}
// ], line 74
m_bra = m_cursor;
// <-, line 74
- slice_from("ksi");
+ SliceFrom("ksi");
break;
case 3:
// (, line 78
// delete, line 78
- slice_del();
+ SliceDel();
break;
case 4:
// (, line 81
// among, line 81
- if (find_among_b(a_1, 6) == 0)
+ if (FindAmongB(a_1, 6) == 0)
{
return false;
}
// delete, line 81
- slice_del();
+ SliceDel();
break;
case 5:
// (, line 83
// among, line 83
- if (find_among_b(a_2, 6) == 0)
+ if (FindAmongB(a_2, 6) == 0)
{
return false;
}
// delete, line 84
- slice_del();
+ SliceDel();
break;
case 6:
// (, line 86
// among, line 86
- if (find_among_b(a_3, 2) == 0)
+ if (FindAmongB(a_3, 2) == 0)
{
return false;
}
// delete, line 86
- slice_del();
+ SliceDel();
break;
}
return true;
@@ -432,7 +432,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
private bool r_LONG()
{
// among, line 91
- if (find_among_b(a_5, 7) == 0)
+ if (FindAmongB(a_5, 7) == 0)
{
return false;
}
@@ -443,11 +443,11 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
{
// (, line 93
// literal, line 93
- if (!(eq_s_b(1, "i")))
+ if (!(Eq_S_B(1, "i")))
{
return false;
}
- if (!(in_grouping_b(g_V2, 97, 246)))
+ if (!(InGroupingB(g_V2, 97, 246)))
{
return false;
}
@@ -478,7 +478,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
// [, line 96
m_ket = m_cursor;
// substring, line 96
- among_var = find_among_b(a_6, 30);
+ among_var = FindAmongB(a_6, 30);
if (among_var == 0)
{
m_limit_backward = v_2;
@@ -494,7 +494,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
case 1:
// (, line 98
// literal, line 98
- if (!(eq_s_b(1, "a")))
+ if (!(Eq_S_B(1, "a")))
{
return false;
}
@@ -502,7 +502,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
case 2:
// (, line 99
// literal, line 99
- if (!(eq_s_b(1, "e")))
+ if (!(Eq_S_B(1, "e")))
{
return false;
}
@@ -510,7 +510,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
case 3:
// (, line 100
// literal, line 100
- if (!(eq_s_b(1, "i")))
+ if (!(Eq_S_B(1, "i")))
{
return false;
}
@@ -518,7 +518,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
case 4:
// (, line 101
// literal, line 101
- if (!(eq_s_b(1, "o")))
+ if (!(Eq_S_B(1, "o")))
{
return false;
}
@@ -526,7 +526,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
case 5:
// (, line 102
// literal, line 102
- if (!(eq_s_b(1, "\u00E4")))
+ if (!(Eq_S_B(1, "\u00E4")))
{
return false;
}
@@ -534,7 +534,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
case 6:
// (, line 103
// literal, line 103
- if (!(eq_s_b(1, "\u00F6")))
+ if (!(Eq_S_B(1, "\u00F6")))
{
return false;
}
@@ -567,7 +567,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
lab2:
m_cursor = m_limit - v_5;
// literal, line 112
- if (!(eq_s_b(2, "ie")))
+ if (!(Eq_S_B(2, "ie")))
{
m_cursor = m_limit - v_3;
goto lab0;
@@ -589,11 +589,11 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
break;
case 8:
// (, line 119
- if (!(in_grouping_b(g_V1, 97, 246)))
+ if (!(InGroupingB(g_V1, 97, 246)))
{
return false;
}
- if (!(out_grouping_b(g_V1, 97, 246)))
+ if (!(OutGroupingB(g_V1, 97, 246)))
{
return false;
}
@@ -601,14 +601,14 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
case 9:
// (, line 121
// literal, line 121
- if (!(eq_s_b(1, "e")))
+ if (!(Eq_S_B(1, "e")))
{
return false;
}
break;
}
// delete, line 138
- slice_del();
+ SliceDel();
// set ending_removed, line 139
B_ending_removed = true;
return true;
@@ -636,7 +636,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
// [, line 142
m_ket = m_cursor;
// substring, line 142
- among_var = find_among_b(a_7, 14);
+ among_var = FindAmongB(a_7, 14);
if (among_var == 0)
{
m_limit_backward = v_2;
@@ -658,7 +658,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
do
{
// literal, line 146
- if (!(eq_s_b(2, "po")))
+ if (!(Eq_S_B(2, "po")))
{
goto lab0;
}
@@ -670,7 +670,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
break;
}
// delete, line 151
- slice_del();
+ SliceDel();
return true;
}
@@ -694,7 +694,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
// [, line 154
m_ket = m_cursor;
// substring, line 154
- if (find_among_b(a_8, 2) == 0)
+ if (FindAmongB(a_8, 2) == 0)
{
m_limit_backward = v_2;
return false;
@@ -703,7 +703,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
m_bra = m_cursor;
m_limit_backward = v_2;
// delete, line 158
- slice_del();
+ SliceDel();
return true;
}
@@ -732,7 +732,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
// [, line 162
m_ket = m_cursor;
// literal, line 162
- if (!(eq_s_b(1, "t")))
+ if (!(Eq_S_B(1, "t")))
{
m_limit_backward = v_2;
return false;
@@ -741,14 +741,14 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
m_bra = m_cursor;
// test, line 162
v_3 = m_limit - m_cursor;
- if (!(in_grouping_b(g_V1, 97, 246)))
+ if (!(InGroupingB(g_V1, 97, 246)))
{
m_limit_backward = v_2;
return false;
}
m_cursor = m_limit - v_3;
// delete, line 163
- slice_del();
+ SliceDel();
m_limit_backward = v_2;
// setlimit, line 165
v_4 = m_limit - m_cursor;
@@ -765,7 +765,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
// [, line 165
m_ket = m_cursor;
// substring, line 165
- among_var = find_among_b(a_9, 2);
+ among_var = FindAmongB(a_9, 2);
if (among_var == 0)
{
m_limit_backward = v_5;
@@ -787,7 +787,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
do
{
// literal, line 167
- if (!(eq_s_b(2, "po")))
+ if (!(Eq_S_B(2, "po")))
{
goto lab0;
}
@@ -799,7 +799,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
break;
}
// delete, line 170
- slice_del();
+ SliceDel();
return true;
}
@@ -853,7 +853,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
// ], line 174
m_bra = m_cursor;
// delete, line 174
- slice_del();
+ SliceDel();
} while (false);
lab0:
m_cursor = m_limit - v_3;
@@ -865,18 +865,18 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
// (, line 175
// [, line 175
m_ket = m_cursor;
- if (!(in_grouping_b(g_AEI, 97, 228)))
+ if (!(InGroupingB(g_AEI, 97, 228)))
{
goto lab1;
}
// ], line 175
m_bra = m_cursor;
- if (!(out_grouping_b(g_V1, 97, 246)))
+ if (!(OutGroupingB(g_V1, 97, 246)))
{
goto lab1;
}
// delete, line 175
- slice_del();
+ SliceDel();
} while (false);
lab1:
m_cursor = m_limit - v_5;
@@ -889,7 +889,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
// [, line 176
m_ket = m_cursor;
// literal, line 176
- if (!(eq_s_b(1, "j")))
+ if (!(Eq_S_B(1, "j")))
{
goto lab2;
}
@@ -904,7 +904,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
do
{
// literal, line 176
- if (!(eq_s_b(1, "o")))
+ if (!(Eq_S_B(1, "o")))
{
goto lab4;
}
@@ -913,14 +913,14 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
lab4:
m_cursor = m_limit - v_7;
// literal, line 176
- if (!(eq_s_b(1, "u")))
+ if (!(Eq_S_B(1, "u")))
{
goto lab2;
}
} while (false);
lab3:
// delete, line 176
- slice_del();
+ SliceDel();
} while (false);
lab2:
m_cursor = m_limit - v_6;
@@ -933,19 +933,19 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
// [, line 177
m_ket = m_cursor;
// literal, line 177
- if (!(eq_s_b(1, "o")))
+ if (!(Eq_S_B(1, "o")))
{
goto lab5;
}
// ], line 177
m_bra = m_cursor;
// literal, line 177
- if (!(eq_s_b(1, "j")))
+ if (!(Eq_S_B(1, "j")))
{
goto lab5;
}
// delete, line 177
- slice_del();
+ SliceDel();
} while (false);
lab5:
m_cursor = m_limit - v_8;
@@ -958,7 +958,7 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
do
{
- if (!(out_grouping_b(g_V1, 97, 246)))
+ if (!(OutGroupingB(g_V1, 97, 246)))
{
goto lab7;
}
@@ -985,14 +985,14 @@ namespace Lucene.Net.Tartarus.Snowball.Ext
// ], line 179
m_bra = m_cursor;
// -> x, line 179
- S_x = slice_to(S_x);
+ S_x = SliceTo(S_x);
// name x, line 179
- if (!(eq_v_b(S_x.ToString())))
+ if (!(Eq_V_B(S_x.ToString())))
{
return false;
}
// delete, line 179
- slice_del();
+ SliceDel();
return true;
}
[02/39] lucenenet git commit: Lucene.Net.Analysis.Sinks refactor:
member accessibility and documentation comments
Posted by ni...@apache.org.
Lucene.Net.Analysis.Sinks refactor: member accessibility and documentation comments
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/f3a14dbc
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/f3a14dbc
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/f3a14dbc
Branch: refs/heads/api-work
Commit: f3a14dbccf1c9ee410be7ed2b11ae3abdc6d9435
Parents: 6ea63fd
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Sat Feb 4 09:03:55 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Sat Feb 4 09:03:55 2017 +0700
----------------------------------------------------------------------
.../Analysis/Sinks/DateRecognizerSinkFilter.cs | 13 ++--
.../Analysis/Sinks/TeeSinkTokenFilter.cs | 68 ++++++++++----------
.../Analysis/Sinks/TokenRangeSinkFilter.cs | 2 -
3 files changed, 40 insertions(+), 43 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/f3a14dbc/src/Lucene.Net.Analysis.Common/Analysis/Sinks/DateRecognizerSinkFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Sinks/DateRecognizerSinkFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Sinks/DateRecognizerSinkFilter.cs
index bb31ae6..ee634e2 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Sinks/DateRecognizerSinkFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Sinks/DateRecognizerSinkFilter.cs
@@ -32,17 +32,17 @@ namespace Lucene.Net.Analysis.Sinks
/// </summary>
public class DateRecognizerSinkFilter : TeeSinkTokenFilter.SinkFilter
{
- protected internal DateTimeStyles m_style;
- protected internal ICharTermAttribute m_termAtt;
- protected internal IFormatProvider m_culture;
- protected internal string[] m_formats;
+ protected DateTimeStyles m_style;
+ protected ICharTermAttribute m_termAtt;
+ protected IFormatProvider m_culture;
+ protected string[] m_formats;
/// <summary>
/// Creates a new instance of <see cref="DateRecognizerSinkFilter"/> using the current culture and <see cref="DateTimeStyles.None"/>.
/// Loosely matches standard DateTime formats using <see cref="DateTime.TryParse(string, IFormatProvider, DateTimeStyles, out DateTime)"/>.
/// </summary>
public DateRecognizerSinkFilter()
- : this((string[])null, DateTimeFormatInfo.CurrentInfo, DateTimeStyles.None)
+ : this((string[])null, DateTimeFormatInfo.CurrentInfo, DateTimeStyles.None)
{ }
/// <summary>
@@ -78,12 +78,11 @@ namespace Lucene.Net.Analysis.Sinks
/// Creates a new instance of <see cref="DateRecognizerSinkFilter"/> using the supplied culture and <see cref="DateTimeStyles"/>.
/// Loosely matches standard DateTime formats using <see cref="DateTime.TryParse(string, IFormatProvider, DateTimeStyles, out DateTime)"/>.
/// </summary>
- /// If supplied, one of them must match the format of the date exactly to get a match.</param>
/// <param name="culture">An object that supplies culture-specific format information</param>
/// <param name="style">A bitwise combination of enumeration values that indicates the permitted format of s.
/// A typical value to specify is <see cref="DateTimeStyles.None"/></param>
public DateRecognizerSinkFilter(IFormatProvider culture, DateTimeStyles style)
- :this((string[])null, culture, style)
+ : this((string[])null, culture, style)
{ }
/// <summary>
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/f3a14dbc/src/Lucene.Net.Analysis.Common/Analysis/Sinks/TeeSinkTokenFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Sinks/TeeSinkTokenFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Sinks/TeeSinkTokenFilter.cs
index 1538470..e1eeabe 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Sinks/TeeSinkTokenFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Sinks/TeeSinkTokenFilter.cs
@@ -1,6 +1,6 @@
-\ufeffusing System;
+\ufeffusing Lucene.Net.Util;
+using System;
using System.Collections.Generic;
-using Lucene.Net.Util;
namespace Lucene.Net.Analysis.Sinks
{
@@ -25,43 +25,43 @@ namespace Lucene.Net.Analysis.Sinks
/// This TokenFilter provides the ability to set aside attribute states
/// that have already been analyzed. This is useful in situations where multiple fields share
/// many common analysis steps and then go their separate ways.
- /// <p/>
+ /// <para/>
/// It is also useful for doing things like entity extraction or proper noun analysis as
/// part of the analysis workflow and saving off those tokens for use in another field.
- ///
+ /// <para/>
/// <code>
/// TeeSinkTokenFilter source1 = new TeeSinkTokenFilter(new WhitespaceTokenizer(version, reader1));
- /// TeeSinkTokenFilter.SinkTokenStream sink1 = source1.newSinkTokenStream();
- /// TeeSinkTokenFilter.SinkTokenStream sink2 = source1.newSinkTokenStream();
+ /// TeeSinkTokenFilter.SinkTokenStream sink1 = source1.NewSinkTokenStream();
+ /// TeeSinkTokenFilter.SinkTokenStream sink2 = source1.NewSinkTokenStream();
///
/// TeeSinkTokenFilter source2 = new TeeSinkTokenFilter(new WhitespaceTokenizer(version, reader2));
- /// source2.addSinkTokenStream(sink1);
- /// source2.addSinkTokenStream(sink2);
+ /// source2.AddSinkTokenStream(sink1);
+ /// source2.AddSinkTokenStream(sink2);
///
/// TokenStream final1 = new LowerCaseFilter(version, source1);
/// TokenStream final2 = source2;
/// TokenStream final3 = new EntityDetect(sink1);
/// TokenStream final4 = new URLDetect(sink2);
///
- /// d.add(new TextField("f1", final1, Field.Store.NO));
- /// d.add(new TextField("f2", final2, Field.Store.NO));
- /// d.add(new TextField("f3", final3, Field.Store.NO));
- /// d.add(new TextField("f4", final4, Field.Store.NO));
+ /// d.Add(new TextField("f1", final1, Field.Store.NO));
+ /// d.Add(new TextField("f2", final2, Field.Store.NO));
+ /// d.Add(new TextField("f3", final3, Field.Store.NO));
+ /// d.Add(new TextField("f4", final4, Field.Store.NO));
/// </code>
- /// In this example, <code>sink1</code> and <code>sink2</code> will both get tokens from both
- /// <code>reader1</code> and <code>reader2</code> after whitespace tokenizer
+ /// In this example, <c>sink1</c> and <c>sink2</c> will both get tokens from both
+ /// <c>reader1</c> and <c>reader2</c> after whitespace tokenizer
/// and now we can further wrap any of these in extra analysis, and more "sources" can be inserted if desired.
/// It is important, that tees are consumed before sinks (in the above example, the field names must be
/// less the sink's field names). If you are not sure, which stream is consumed first, you can simply
- /// add another sink and then pass all tokens to the sinks at once using <see cref="#consumeAllTokens"/>.
- /// This TokenFilter is exhausted after this. In the above example, change
+ /// add another sink and then pass all tokens to the sinks at once using <see cref="ConsumeAllTokens"/>.
+ /// This <see cref="TokenFilter"/> is exhausted after this. In the above example, change
/// the example above to:
/// <code>
/// ...
- /// TokenStream final1 = new LowerCaseFilter(version, source1.newSinkTokenStream());
- /// TokenStream final2 = source2.newSinkTokenStream();
- /// sink1.consumeAllTokens();
- /// sink2.consumeAllTokens();
+ /// TokenStream final1 = new LowerCaseFilter(version, source1.NewSinkTokenStream());
+ /// TokenStream final2 = source2.NewSinkTokenStream();
+ /// sink1.ConsumeAllTokens();
+ /// sink2.ConsumeAllTokens();
/// ...
/// </code>
/// In this case, the fields can be added in any order, because the sources are not used anymore and all sinks are ready.
@@ -73,7 +73,7 @@ namespace Lucene.Net.Analysis.Sinks
private readonly ICollection<WeakReference<SinkTokenStream>> sinks = new LinkedList<WeakReference<SinkTokenStream>>();
/// <summary>
- /// Instantiates a new TeeSinkTokenFilter.
+ /// Instantiates a new <see cref="TeeSinkTokenFilter"/>.
/// </summary>
public TeeSinkTokenFilter(TokenStream input)
: base(input)
@@ -91,7 +91,7 @@ namespace Lucene.Net.Analysis.Sinks
/// <summary>
/// Returns a new <see cref="SinkTokenStream"/> that receives all tokens consumed by this stream
/// that pass the supplied filter. </summary>
- /// <seealso cref= SinkFilter></seealso>
+ /// <seealso cref="SinkFilter"/>
public SinkTokenStream NewSinkTokenStream(SinkFilter filter)
{
var sink = new SinkTokenStream(CloneAttributes(), filter);
@@ -100,7 +100,7 @@ namespace Lucene.Net.Analysis.Sinks
}
/// <summary>
- /// Adds a <see cref="SinkTokenStream"/> created by another <code>TeeSinkTokenFilter</code>
+ /// Adds a <see cref="SinkTokenStream"/> created by another <see cref="TeeSinkTokenFilter"/>
/// to this one. The supplied stream will also receive all consumed tokens.
/// This method can be used to pass tokens from two different tees to one sink.
/// </summary>
@@ -120,7 +120,7 @@ namespace Lucene.Net.Analysis.Sinks
}
/// <summary>
- /// <code>TeeSinkTokenFilter</code> passes all tokens to the added sinks
+ /// <see cref="TeeSinkTokenFilter"/> passes all tokens to the added sinks
/// when itself is consumed. To be sure, that all tokens from the input
/// stream are passed to the sinks, you can call this methods.
/// This instance is exhausted after this, but all sinks are instant available.
@@ -159,7 +159,7 @@ namespace Lucene.Net.Analysis.Sinks
return false;
}
- public override void End()
+ public override sealed void End()
{
base.End();
AttributeSource.State finalState = CaptureState();
@@ -185,7 +185,7 @@ namespace Lucene.Net.Analysis.Sinks
public abstract bool Accept(AttributeSource source);
/// <summary>
- /// Called by <see cref="SinkTokenStream#reset()"/>. This method does nothing by default
+ /// Called by <see cref="SinkTokenStream.Reset()"/>. This method does nothing by default
/// and can optionally be overridden.
/// </summary>
public virtual void Reset()
@@ -195,14 +195,14 @@ namespace Lucene.Net.Analysis.Sinks
}
/// <summary>
- /// TokenStream output from a tee with optional filtering.
+ /// <see cref="TokenStream"/> output from a tee with optional filtering.
/// </summary>
public sealed class SinkTokenStream : TokenStream
{
- internal readonly IList<AttributeSource.State> cachedStates = new List<AttributeSource.State>();
- internal AttributeSource.State finalState;
- internal IEnumerator<AttributeSource.State> it = null;
- internal SinkFilter filter;
+ private readonly IList<AttributeSource.State> cachedStates = new List<AttributeSource.State>();
+ private AttributeSource.State finalState;
+ private IEnumerator<AttributeSource.State> it = null;
+ private SinkFilter filter;
internal SinkTokenStream(AttributeSource source, SinkFilter filter)
: base(source)
@@ -229,7 +229,7 @@ namespace Lucene.Net.Analysis.Sinks
this.finalState = finalState;
}
- public override bool IncrementToken()
+ public override sealed bool IncrementToken()
{
// lazy init the iterator
if (it == null)
@@ -245,7 +245,7 @@ namespace Lucene.Net.Analysis.Sinks
return true;
}
- public override void End()
+ public override sealed void End()
{
if (finalState != null)
{
@@ -253,7 +253,7 @@ namespace Lucene.Net.Analysis.Sinks
}
}
- public override void Reset()
+ public override sealed void Reset()
{
it = cachedStates.GetEnumerator();
}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/f3a14dbc/src/Lucene.Net.Analysis.Common/Analysis/Sinks/TokenRangeSinkFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Sinks/TokenRangeSinkFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Sinks/TokenRangeSinkFilter.cs
index c9da3d2..4e0d773 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Sinks/TokenRangeSinkFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Sinks/TokenRangeSinkFilter.cs
@@ -21,8 +21,6 @@ namespace Lucene.Net.Analysis.Sinks
/// <summary>
/// Counts the tokens as they go by and saves to the internal list those between the range of lower and upper, exclusive of upper
- ///
- ///
/// </summary>
public class TokenRangeSinkFilter : TeeSinkTokenFilter.SinkFilter
{
[04/39] lucenenet git commit: Lucene.Net.Analysis.Standard refactor:
member accessibility and documentation comments
Posted by ni...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/ab69b431/src/Lucene.Net.Analysis.Common/Analysis/Standard/Std40/UAX29URLEmailTokenizerImpl40.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/Std40/UAX29URLEmailTokenizerImpl40.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/Std40/UAX29URLEmailTokenizerImpl40.cs
index e3d58e5..3593baa 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Standard/Std40/UAX29URLEmailTokenizerImpl40.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/Std40/UAX29URLEmailTokenizerImpl40.cs
@@ -22,32 +22,32 @@ namespace Lucene.Net.Analysis.Standard.Std40
*/
/// <summary>
- /// This class implements UAX29URLEmailTokenizer using Unicode 6.1.0.
+ /// This class implements <see cref="UAX29URLEmailTokenizer"/> using Unicode 6.1.0.
/// @deprecated This class is only for exact backwards compatibility
/// </summary>
[Obsolete("This class is only for exact backwards compatibility")]
public sealed class UAX29URLEmailTokenizerImpl40 : IStandardTokenizerInterface
{
- /** This character denotes the end of file */
+ /// <summary>This character denotes the end of file</summary>
public static readonly int YYEOF = -1;
- /** initial size of the lookahead buffer */
+ /// <summary>initial size of the lookahead buffer</summary>
private static readonly int ZZ_BUFFERSIZE = 4096;
- /** lexical states */
+ /// <summary>lexical states</summary>
public const int YYINITIAL = 0;
- /**
- * ZZ_LEXSTATE[l] is the state in the DFA for the lexical state l
- * ZZ_LEXSTATE[l+1] is the state in the DFA for the lexical state l
- * at the beginning of a line
- * l is of the form l = 2*k, k a non negative integer
- */
+ /// <summary>
+ /// ZZ_LEXSTATE[l] is the state in the DFA for the lexical state l
+ /// ZZ_LEXSTATE[l+1] is the state in the DFA for the lexical state l
+ /// at the beginning of a line
+ /// l is of the form l = 2*k, k a non negative integer
+ /// </summary>
private static readonly int[] ZZ_LEXSTATE = { 0, 0 };
- /**
- * Translates characters to character classes
- */
+ /// <summary>
+ /// Translates characters to character classes
+ /// </summary>
private const string ZZ_CMAP_PACKED =
"\x0001\x00C1\x0008\x00BF\x0002\x00C1\x0002\x00BF\x0001\x00C1\x0013\x00BF\x0001\x00C2\x0001\x00BE\x0001\x00B9\x0001\x00C2" +
"\x0001\x00B2\x0001\x00B0\x0001\x00B5\x0002\x00B3\x0002\x00C2\x0001\x00B4\x0001\x00A4\x0001\x0089\x0001\x00B8\x0001\x00A5" +
@@ -201,14 +201,14 @@ namespace Lucene.Net.Analysis.Standard.Std40
"\x000B\x0000\x0038\x007F\x0002\x007D\x001F\x0088\x0003\x0000\x0006\x0088\x0002\x0000\x0006\x0088\x0002\x0000\x0006\x0088" +
"\x0002\x0000\x0003\x0088\x001C\x0000\x0003\x007D\x0004\x0000";
- /**
- * Translates characters to character classes
- */
+ /// <summary>
+ /// Translates characters to character classes
+ /// </summary>
private static readonly char[] ZZ_CMAP = ZzUnpackCMap(ZZ_CMAP_PACKED);
- /**
- * Translates DFA states to action switch labels.
- */
+ /// <summary>
+ /// Translates DFA states to action switch labels.
+ /// </summary>
private static readonly int[] ZZ_ACTION = ZzUnpackAction();
private const string ZZ_ACTION_PACKED_0 =
@@ -255,9 +255,9 @@ namespace Lucene.Net.Analysis.Standard.Std40
}
- /**
- * Translates a state to a row index in the transition table
- */
+ /// <summary>
+ /// Translates a state to a row index in the transition table
+ /// </summary>
private static readonly int[] ZZ_ROWMAP = ZzUnpackRowMap();
private const string ZZ_ROWMAP_PACKED_0 =
@@ -502,9 +502,9 @@ namespace Lucene.Net.Analysis.Standard.Std40
return j;
}
- /**
- * The transition table of the DFA
- */
+ /// <summary>
+ /// The transition table of the DFA
+ /// </summary>
private static readonly int[] ZZ_TRANS = ZzUnpackTrans();
private const string ZZ_TRANS_PACKED_0 =
@@ -3906,9 +3906,9 @@ namespace Lucene.Net.Analysis.Standard.Std40
"Error: pushback value was too large"
};
- /**
- * ZZ_ATTRIBUTE[aState] contains the attributes of state <code>aState</code>
- */
+ /// <summary>
+ /// ZZ_ATTRIBUTE[aState] contains the attributes of state <c>aState</c>
+ /// </summary>
private static readonly int[] ZZ_ATTRIBUTE = ZzUnpackAttribute();
private const string ZZ_ATTRIBUTE_PACKED_0 =
@@ -3951,73 +3951,77 @@ namespace Lucene.Net.Analysis.Standard.Std40
return j;
}
- /** the input device */
+ /// <summary>the input device</summary>
private TextReader zzReader;
- /** the current state of the DFA */
+ /// <summary>the current state of the DFA</summary>
private int zzState;
- /** the current lexical state */
+ /// <summary>the current lexical state</summary>
private int zzLexicalState = YYINITIAL;
- /** this buffer contains the current text to be matched and is
- the source of the YyText() string */
+ /// <summary>
+ /// this buffer contains the current text to be matched and is
+ /// the source of the YyText string
+ /// </summary>
private char[] zzBuffer = new char[ZZ_BUFFERSIZE];
- /** the textposition at the last accepting state */
+ /// <summary>the textposition at the last accepting state</summary>
private int zzMarkedPos;
- /** the current text position in the buffer */
+ /// <summary>the current text position in the buffer</summary>
private int zzCurrentPos;
- /** startRead marks the beginning of the YyText() string in the buffer */
+ /// <summary>startRead marks the beginning of the YyText string in the buffer</summary>
private int zzStartRead;
- /** endRead marks the last character in the buffer, that has been read
- from input */
+ /// <summary>
+ /// endRead marks the last character in the buffer, that has been read
+ /// from input
+ /// </summary>
private int zzEndRead;
- /** number of newlines encountered up to the start of the matched text */
+ /// <summary>number of newlines encountered up to the start of the matched text</summary>
private int yyline;
- /** the number of characters up to the start of the matched text */
+ /// <summary>the number of characters up to the start of the matched text</summary>
private int yychar;
#pragma warning disable 169, 414
- /**
- * the number of characters from the last newline up to the start of the
- * matched text
- */
+ /// <summary>
+ /// the number of characters from the last newline up to the start of the
+ /// matched text
+ /// </summary>
private int yycolumn;
- /**
- * zzAtBOL == true <=> the scanner is currently at the beginning of a line
- */
+ /// <summary>
+ /// zzAtBOL == true <=> the scanner is currently at the beginning of a line
+ /// </summary>
private bool zzAtBOL = true;
- /** zzAtEOF == true <=> the scanner is at the EOF */
+ /// <summary>zzAtEOF == true <=> the scanner is at the EOF</summary>
private bool zzAtEOF;
- /** denotes if the user-EOF-code has already been executed */
+ /// <summary>denotes if the user-EOF-code has already been executed</summary>
private bool zzEOFDone;
#pragma warning restore 169, 414
/* user code: */
- /** Alphanumeric sequences */
+ /// <summary>Alphanumeric sequences</summary>
public static readonly int WORD_TYPE = UAX29URLEmailTokenizer.ALPHANUM;
- /** Numbers */
+ /// <summary>Numbers</summary>
public static readonly int NUMERIC_TYPE = UAX29URLEmailTokenizer.NUM;
- /**
- * Chars in class \p{Line_Break = Complex_Context} are from South East Asian
- * scripts (Thai, Lao, Myanmar, Khmer, etc.). Sequences of these are kept
- * together as as a single token rather than broken up, because the logic
- * required to break them at word boundaries is too complex for UAX#29.
- * <p>
- * See Unicode Line Breaking Algorithm: http://www.unicode.org/reports/tr14/#SA
- */
+ /// <summary>
+ /// Chars in class \p{Line_Break = Complex_Context} are from South East Asian
+ /// scripts (Thai, Lao, Myanmar, Khmer, etc.). Sequences of these are kept
+ /// together as as a single token rather than broken up, because the logic
+ /// required to break them at word boundaries is too complex for UAX#29.
+ /// <para/>
+ /// See Unicode Line Breaking Algorithm: http://www.unicode.org/reports/tr14/#SA
+ /// </summary>
public static readonly int SOUTH_EAST_ASIAN_TYPE = UAX29URLEmailTokenizer.SOUTHEAST_ASIAN;
public static readonly int IDEOGRAPHIC_TYPE = UAX29URLEmailTokenizer.IDEOGRAPHIC;
@@ -4037,32 +4041,30 @@ namespace Lucene.Net.Analysis.Standard.Std40
get { return yychar; }
}
- /**
- * Fills CharTermAttribute with the current token text.
- */
+ /// <summary>
+ /// Fills ICharTermAttribute with the current token text.
+ /// </summary>
public void GetText(ICharTermAttribute t)
{
t.CopyBuffer(zzBuffer, zzStartRead, zzMarkedPos - zzStartRead);
}
- /**
- * Creates a new scanner
- *
- * @param in the TextReader to read input from.
- */
+ /// <summary>
+ /// Creates a new scanner
+ /// </summary>
+ /// <param name="in">the TextReader to read input from.</param>
public UAX29URLEmailTokenizerImpl40(TextReader @in)
{
this.zzReader = @in;
}
- /**
- * Unpacks the compressed character translation table.
- *
- * @param packed the packed character translation table
- * @return the unpacked character translation table
- */
+ /// <summary>
+ /// Unpacks the compressed character translation table.
+ /// </summary>
+ /// <param name="packed">the packed character translation table</param>
+ /// <returns>the unpacked character translation table</returns>
private static char[] ZzUnpackCMap(string packed)
{
char[] map = new char[0x10000];
@@ -4078,13 +4080,11 @@ namespace Lucene.Net.Analysis.Standard.Std40
}
- /**
- * Refills the input buffer.
- *
- * @return <code>false</code>, iff there was new input.
- *
- * @exception java.io.IOException if any I/O-Error occurs
- */
+ /// <summary>
+ /// Refills the input buffer.
+ /// </summary>
+ /// <returns><c>false</c>, iff there was new input.</returns>
+ /// <exception cref="IOException">if any I/O-Error occurs</exception>
private bool ZzRefill()
{
@@ -4140,9 +4140,9 @@ namespace Lucene.Net.Analysis.Standard.Std40
}
- /**
- * Closes the input stream.
- */
+ /// <summary>
+ /// Disposes the input stream.
+ /// </summary>
public void YyClose()
{
zzAtEOF = true; /* indicate end of file */
@@ -4155,18 +4155,17 @@ namespace Lucene.Net.Analysis.Standard.Std40
}
- /**
- * Resets the scanner to read from a new input stream.
- * Does not close the old reader.
- *
- * All internal variables are reset, the old input stream
- * <b>cannot</b> be reused (internal buffer is discarded and lost).
- * Lexical state is set to <tt>ZZ_INITIAL</tt>.
- *
- * Internal scan buffer is resized down to its initial length, if it has grown.
- *
- * @param reader the new input stream
- */
+ /// <summary>
+ /// Resets the scanner to read from a new input stream.
+ /// Does not close the old reader.
+ /// <para/>
+ /// All internal variables are reset, the old input stream
+ /// <b>cannot</b> be reused (internal buffer is discarded and lost).
+ /// Lexical state is set to <see cref="YYINITIAL"/>.
+ /// <para/>
+ /// Internal scan buffer is resized down to its initial length, if it has grown.
+ /// </summary>
+ /// <param name="reader">the new input stream </param>
public void YyReset(TextReader reader)
{
zzReader = reader;
@@ -4182,75 +4181,73 @@ namespace Lucene.Net.Analysis.Standard.Std40
}
- /**
- * Returns the current lexical state.
- */
+ /// <summary>
+ /// Returns the current lexical state.
+ /// </summary>
public int YyState
{
get { return zzLexicalState; }
}
- /**
- * Enters a new lexical state
- *
- * @param newState the new lexical state
- */
+ /// <summary>
+ /// Enters a new lexical state
+ /// </summary>
+ /// <param name="newState">the new lexical state</param>
public void YyBegin(int newState)
{
zzLexicalState = newState;
}
- /**
- * Returns the text matched by the current regular expression.
- */
+ /// <summary>
+ /// Returns the text matched by the current regular expression.
+ /// </summary>
public string YyText
{
get { return new string(zzBuffer, zzStartRead, zzMarkedPos - zzStartRead); }
}
- /**
- * Returns the character at position <tt>pos</tt> from the
- * matched text.
- *
- * It is equivalent to YyText().charAt(pos), but faster
- *
- * @param pos the position of the character to fetch.
- * A value from 0 to YyLength()-1.
- *
- * @return the character at position pos
- */
+ /// <summary>
+ /// Returns the character at position <paramref name="pos"/> from the
+ /// matched text.
+ /// <para/>
+ /// It is equivalent to YyText[pos], but faster
+ /// </summary>
+ /// <param name="pos">
+ /// the position of the character to fetch.
+ /// A value from 0 to YyLength-1.
+ /// </param>
+ /// <returns>the character at position pos</returns>
public char YyCharAt(int pos)
{
return zzBuffer[zzStartRead + pos];
}
- /**
- * Returns the length of the matched text region.
- */
+ /// <summary>
+ /// Returns the length of the matched text region.
+ /// </summary>
public int YyLength
{
get { return zzMarkedPos - zzStartRead; }
}
- /**
- * Reports an error that occured while scanning.
- *
- * In a wellformed scanner (no or only correct usage of
- * YyPushBack(int) and a match-all fallback rule) this method
- * will only be called with things that "Can't Possibly Happen".
- * If this method is called, something is seriously wrong
- * (e.g. a JFlex bug producing a faulty scanner etc.).
- *
- * Usual syntax/scanner level error handling should be done
- * in error fallback rules.
- *
- * @param errorCode the code of the errormessage to display
- */
+ /// <summary>
+ /// Reports an error that occured while scanning.
+ /// <para/>
+ /// In a wellformed scanner (no or only correct usage of
+ /// YyPushBack(int) and a match-all fallback rule) this method
+ /// will only be called with things that "Can't Possibly Happen".
+ /// If this method is called, something is seriously wrong
+ /// (e.g. a JFlex bug producing a faulty scanner etc.).
+ /// <para/>
+ /// Usual syntax/scanner level error handling should be done
+ /// in error fallback rules.
+ /// </summary>
+ /// <param name="errorCode">the code of the errormessage to display</param>
private void ZzScanError(int errorCode)
{
string message;
@@ -4267,14 +4264,15 @@ namespace Lucene.Net.Analysis.Standard.Std40
}
- /**
- * Pushes the specified amount of characters back into the input stream.
- *
- * They will be read again by then next call of the scanning method
- *
- * @param number the number of characters to be read again.
- * This number must not be greater than YyLength()!
- */
+ /// <summary>
+ /// Pushes the specified amount of characters back into the input stream.
+ /// <para/>
+ /// They will be read again by then next call of the scanning method
+ /// </summary>
+ /// <param name="number">
+ /// the number of characters to be read again.
+ /// This number must not be greater than YyLength!
+ /// </param>
public void YyPushBack(int number)
{
if (number > YyLength)
@@ -4284,13 +4282,12 @@ namespace Lucene.Net.Analysis.Standard.Std40
}
- /**
- * Resumes scanning until the next regular expression is matched,
- * the end of input is encountered or an I/O-Error occurs.
- *
- * @return the next token
- * @exception java.io.IOException if any I/O-Error occurs
- */
+ /// <summary>
+ /// Resumes scanning until the next regular expression is matched,
+ /// the end of input is encountered or an I/O-Error occurs.
+ /// </summary>
+ /// <returns>the next token</returns>
+ /// <exception cref="IOException">if any I/O-Error occurs</exception>
public int GetNextToken()
{
int zzInput;
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/ab69b431/src/Lucene.Net.Analysis.Common/Analysis/Standard/UAX29URLEmailAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/UAX29URLEmailAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/UAX29URLEmailAnalyzer.cs
index 502b98c..65aecc2 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Standard/UAX29URLEmailAnalyzer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/UAX29URLEmailAnalyzer.cs
@@ -23,21 +23,19 @@ namespace Lucene.Net.Analysis.Standard
*/
/// <summary>
- /// Filters <see cref="org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer"/>
+ /// Filters <see cref="UAX29URLEmailTokenizer"/>
/// with <see cref="StandardFilter"/>,
/// <see cref="LowerCaseFilter"/> and
/// <see cref="StopFilter"/>, using a list of
/// English stop words.
///
- /// <a name="version"/>
/// <para>
- /// You must specify the required <see cref="org.apache.lucene.util.Version"/>
- /// compatibility when creating UAX29URLEmailAnalyzer
+ /// You must specify the required <see cref="LuceneVersion"/>
+ /// compatibility when creating <see cref="UAX29URLEmailAnalyzer"/>
/// </para>
/// </summary>
public sealed class UAX29URLEmailAnalyzer : StopwordAnalyzerBase
{
-
/// <summary>
/// Default maximum allowed token length </summary>
public const int DEFAULT_MAX_TOKEN_LENGTH = StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH;
@@ -52,8 +50,7 @@ namespace Lucene.Net.Analysis.Standard
/// <summary>
/// Builds an analyzer with the given stop words. </summary>
- /// <param name="matchVersion"> Lucene version to match See {@link
- /// <a href="#version">above</a>} </param>
+ /// <param name="matchVersion"> Lucene version to match - See <see cref="UAX29URLEmailAnalyzer"/> </param>
/// <param name="stopWords"> stop words </param>
public UAX29URLEmailAnalyzer(LuceneVersion matchVersion, CharArraySet stopWords)
: base(matchVersion, stopWords)
@@ -61,10 +58,9 @@ namespace Lucene.Net.Analysis.Standard
}
/// <summary>
- /// Builds an analyzer with the default stop words ({@link
- /// #STOP_WORDS_SET}). </summary>
- /// <param name="matchVersion"> Lucene version to match See {@link
- /// <a href="#version">above</a>} </param>
+ /// Builds an analyzer with the default stop words (<see cref="STOP_WORDS_SET"/>.
+ /// </summary>
+ /// <param name="matchVersion"> Lucene version to match - See <see cref="UAX29URLEmailAnalyzer"/> </param>
public UAX29URLEmailAnalyzer(LuceneVersion matchVersion)
: this(matchVersion, STOP_WORDS_SET)
{
@@ -72,10 +68,9 @@ namespace Lucene.Net.Analysis.Standard
/// <summary>
/// Builds an analyzer with the stop words from the given reader. </summary>
- /// <seealso cref= org.apache.lucene.analysis.util.WordlistLoader#getWordSet(java.io.Reader, org.apache.lucene.util.Version) </seealso>
- /// <param name="matchVersion"> Lucene version to match See {@link
- /// <a href="#version">above</a>} </param>
- /// <param name="stopwords"> TextReader to read stop words from </param>
+ /// <seealso cref="WordlistLoader.GetWordSet(TextReader, LuceneVersion)"/>
+ /// <param name="matchVersion"> Lucene version to match - See <see cref="UAX29URLEmailAnalyzer"/> </param>
+ /// <param name="stopwords"> <see cref="TextReader"/> to read stop words from </param>
public UAX29URLEmailAnalyzer(LuceneVersion matchVersion, TextReader stopwords)
: this(matchVersion, LoadStopwordSet(stopwords, matchVersion))
{
@@ -93,7 +88,6 @@ namespace Lucene.Net.Analysis.Standard
get { return maxTokenLength; }
}
-
protected override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
{
UAX29URLEmailTokenizer src = new UAX29URLEmailTokenizer(m_matchVersion, reader);
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/ab69b431/src/Lucene.Net.Analysis.Common/Analysis/Standard/UAX29URLEmailTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/UAX29URLEmailTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/UAX29URLEmailTokenizer.cs
index 2c91236..83659e2 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Standard/UAX29URLEmailTokenizer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/UAX29URLEmailTokenizer.cs
@@ -1,11 +1,10 @@
-\ufeffusing Lucene.Net.Analysis.Standard;
-using Lucene.Net.Analysis.TokenAttributes;
-using Lucene.Net.Util;
-using System.IO;
-using Lucene.Net.Analysis.Standard.Std31;
+\ufeffusing Lucene.Net.Analysis.Standard.Std31;
using Lucene.Net.Analysis.Standard.Std34;
using Lucene.Net.Analysis.Standard.Std36;
using Lucene.Net.Analysis.Standard.Std40;
+using Lucene.Net.Analysis.TokenAttributes;
+using Lucene.Net.Util;
+using System.IO;
namespace Lucene.Net.Analysis.Standard
{
@@ -31,26 +30,25 @@ namespace Lucene.Net.Analysis.Standard
/// algorithm, as specified in `
/// <a href="http://unicode.org/reports/tr29/">Unicode Standard Annex #29</a>
/// URLs and email addresses are also tokenized according to the relevant RFCs.
- /// <p/>
+ /// <para/>
/// Tokens produced are of the following types:
- /// <ul>
- /// <li><ALPHANUM>: A sequence of alphabetic and numeric characters</li>
- /// <li><NUM>: A number</li>
- /// <li><URL>: A URL</li>
- /// <li><EMAIL>: An email address</li>
- /// <li><SOUTHEAST_ASIAN>: A sequence of characters from South and Southeast
- /// Asian languages, including Thai, Lao, Myanmar, and Khmer</li>
- /// <li><IDEOGRAPHIC>: A single CJKV ideographic character</li>
- /// <li><HIRAGANA>: A single hiragana character</li>
- /// </ul>
- /// <a name="version"/>
+ /// <list type="bullet">
+ /// <item><ALPHANUM>: A sequence of alphabetic and numeric characters</item>
+ /// <item><NUM>: A number</item>
+ /// <item><URL>: A URL</item>
+ /// <item><EMAIL>: An email address</item>
+ /// <item><SOUTHEAST_ASIAN>: A sequence of characters from South and Southeast
+ /// Asian languages, including Thai, Lao, Myanmar, and Khmer</item>
+ /// <item><IDEOGRAPHIC>: A single CJKV ideographic character</item>
+ /// <item><HIRAGANA>: A single hiragana character</item>
+ /// </list>
/// <para>You must specify the required <see cref="LuceneVersion"/>
- /// compatibility when creating UAX29URLEmailTokenizer:
- /// <ul>
- /// <li> As of 3.4, Hiragana and Han characters are no longer wrongly split
- /// from their combining characters. If you use a previous version number,
- /// you get the exact broken behavior for backwards compatibility.
- /// </ul>
+ /// compatibility when creating <see cref="UAX29URLEmailTokenizer"/>:
+ /// <list type="bullet">
+ /// <item> As of 3.4, Hiragana and Han characters are no longer wrongly split
+ /// from their combining characters. If you use a previous version number,
+ /// you get the exact broken behavior for backwards compatibility.</item>
+ /// </list>
/// </para>
/// </summary>
public sealed class UAX29URLEmailTokenizer : Tokenizer
@@ -71,7 +69,17 @@ namespace Lucene.Net.Analysis.Standard
/// <summary>
/// String token types that correspond to token type int constants </summary>
- public static readonly string[] TOKEN_TYPES = new string[] { StandardTokenizer.TOKEN_TYPES[StandardTokenizer.ALPHANUM], StandardTokenizer.TOKEN_TYPES[StandardTokenizer.NUM], StandardTokenizer.TOKEN_TYPES[StandardTokenizer.SOUTHEAST_ASIAN], StandardTokenizer.TOKEN_TYPES[StandardTokenizer.IDEOGRAPHIC], StandardTokenizer.TOKEN_TYPES[StandardTokenizer.HIRAGANA], StandardTokenizer.TOKEN_TYPES[StandardTokenizer.KATAKANA], StandardTokenizer.TOKEN_TYPES[StandardTokenizer.HANGUL], "<URL>", "<EMAIL>" };
+ public static readonly string[] TOKEN_TYPES = new string[] {
+ StandardTokenizer.TOKEN_TYPES[StandardTokenizer.ALPHANUM],
+ StandardTokenizer.TOKEN_TYPES[StandardTokenizer.NUM],
+ StandardTokenizer.TOKEN_TYPES[StandardTokenizer.SOUTHEAST_ASIAN],
+ StandardTokenizer.TOKEN_TYPES[StandardTokenizer.IDEOGRAPHIC],
+ StandardTokenizer.TOKEN_TYPES[StandardTokenizer.HIRAGANA],
+ StandardTokenizer.TOKEN_TYPES[StandardTokenizer.KATAKANA],
+ StandardTokenizer.TOKEN_TYPES[StandardTokenizer.HANGUL],
+ "<URL>",
+ "<EMAIL>"
+ };
private int skippedPositions;
@@ -99,9 +107,10 @@ namespace Lucene.Net.Analysis.Standard
/// <summary>
- /// Creates a new instance of the UAX29URLEmailTokenizer. Attaches
- /// the <code>input</code> to the newly created JFlex scanner.
+ /// Creates a new instance of the <see cref="UAX29URLEmailTokenizer"/>. Attaches
+ /// the <paramref name="input"/> to the newly created JFlex scanner.
/// </summary>
+ /// <param name="matchVersion"> Lucene compatibility version </param>
/// <param name="input"> The input reader </param>
public UAX29URLEmailTokenizer(LuceneVersion matchVersion, TextReader input)
: base(input)
@@ -110,7 +119,7 @@ namespace Lucene.Net.Analysis.Standard
}
/// <summary>
- /// Creates a new UAX29URLEmailTokenizer with a given <see cref="org.apache.lucene.util.AttributeSource.AttributeFactory"/>
+ /// Creates a new <see cref="UAX29URLEmailTokenizer"/> with a given <see cref="AttributeSource.AttributeFactory"/>
/// </summary>
public UAX29URLEmailTokenizer(LuceneVersion matchVersion, AttributeFactory factory, TextReader input)
: base(factory, input)
@@ -119,7 +128,7 @@ namespace Lucene.Net.Analysis.Standard
}
/// <summary>
- /// LUCENENET: This method was added in .NET to prevent having to repeat code in the constructors.
+ /// LUCENENET specific: This method was added in .NET to prevent having to repeat code in the constructors.
/// </summary>
/// <param name="matchVersion"></param>
private void Init(LuceneVersion matchVersion)
@@ -165,7 +174,7 @@ namespace Lucene.Net.Analysis.Standard
private IPositionIncrementAttribute posIncrAtt;
private ITypeAttribute typeAtt;
- public override bool IncrementToken()
+ public override sealed bool IncrementToken()
{
ClearAttributes();
skippedPositions = 0;
@@ -197,7 +206,7 @@ namespace Lucene.Net.Analysis.Standard
}
}
- public override void End()
+ public override sealed void End()
{
base.End();
// set final offset
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/ab69b431/src/Lucene.Net.Analysis.Common/Analysis/Standard/UAX29URLEmailTokenizerFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/UAX29URLEmailTokenizerFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/UAX29URLEmailTokenizerFactory.cs
index dc902f8..976f4c5 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Standard/UAX29URLEmailTokenizerFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/UAX29URLEmailTokenizerFactory.cs
@@ -1,8 +1,7 @@
-\ufeffusing Lucene.Net.Util;
+\ufeffusing Lucene.Net.Analysis.Util;
+using Lucene.Net.Util;
using System.Collections.Generic;
using System.IO;
-using TokenizerFactory = Lucene.Net.Analysis.Util.TokenizerFactory;
-using System;
namespace Lucene.Net.Analysis.Standard
{
@@ -37,7 +36,7 @@ namespace Lucene.Net.Analysis.Standard
private readonly int maxTokenLength;
/// <summary>
- /// Creates a new UAX29URLEmailTokenizerFactory </summary>
+ /// Creates a new <see cref="UAX29URLEmailTokenizerFactory"/> </summary>
public UAX29URLEmailTokenizerFactory(IDictionary<string, string> args)
: base(args)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/ab69b431/src/Lucene.Net.Analysis.Common/Analysis/Standard/UAX29URLEmailTokenizerImpl.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/UAX29URLEmailTokenizerImpl.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/UAX29URLEmailTokenizerImpl.cs
index b45186e..dbf05a7 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Standard/UAX29URLEmailTokenizerImpl.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/UAX29URLEmailTokenizerImpl.cs
@@ -26,44 +26,44 @@ namespace Lucene.Net.Analysis.Standard
/// algorithm, as specified in
/// <a href="http://unicode.org/reports/tr29/">Unicode Standard Annex #29</a>
/// URLs and email addresses are also tokenized according to the relevant RFCs.
- /// <p/>
+ /// <para/>
/// Tokens produced are of the following types:
- /// <ul>
- /// <li><ALPHANUM>: A sequence of alphabetic and numeric characters</li>
- /// <li><NUM>: A number</li>
- /// <li><URL>: A URL</li>
- /// <li><EMAIL>: An email address</li>
- /// <li><SOUTHEAST_ASIAN>: A sequence of characters from South and Southeast
- /// Asian languages, including Thai, Lao, Myanmar, and Khmer</li>
- /// <li><IDEOGRAPHIC>: A single CJKV ideographic character</li>
- /// <li><HIRAGANA>: A single hiragana character</li>
- /// <li><KATAKANA>: A sequence of katakana characters</li>
- /// <li><HANGUL>: A sequence of Hangul characters</li>
- /// </ul>
+ /// <list type="bullet">
+ /// <item><ALPHANUM>: A sequence of alphabetic and numeric characters</item>
+ /// <item><NUM>: A number</item>
+ /// <item><URL>: A URL</item>
+ /// <item><EMAIL>: An email address</item>
+ /// <item><SOUTHEAST_ASIAN>: A sequence of characters from South and Southeast
+ /// Asian languages, including Thai, Lao, Myanmar, and Khmer</item>
+ /// <item><IDEOGRAPHIC>: A single CJKV ideographic character</item>
+ /// <item><HIRAGANA>: A single hiragana character</item>
+ /// <item><KATAKANA>: A sequence of katakana characters</item>
+ /// <item><HANGUL>: A sequence of Hangul characters</item>
+ /// </list>
/// </summary>
public sealed class UAX29URLEmailTokenizerImpl : IStandardTokenizerInterface
{
- /** This character denotes the end of file */
+ /// <summary>This character denotes the end of file</summary>
public static readonly int YYEOF = -1;
- /** initial size of the lookahead buffer */
+ /// <summary>initial size of the lookahead buffer</summary>
private static readonly int ZZ_BUFFERSIZE = 4096;
- /** lexical states */
+ /// <summary>lexical states</summary>
public const int YYINITIAL = 0;
public const int AVOID_BAD_URL = 2;
- /**
- * ZZ_LEXSTATE[l] is the state in the DFA for the lexical state l
- * ZZ_LEXSTATE[l+1] is the state in the DFA for the lexical state l
- * at the beginning of a line
- * l is of the form l = 2*k, k a non negative integer
- */
+ /// <summary>
+ /// ZZ_LEXSTATE[l] is the state in the DFA for the lexical state l
+ /// ZZ_LEXSTATE[l+1] is the state in the DFA for the lexical state l
+ /// at the beginning of a line
+ /// l is of the form l = 2*k, k a non negative integer
+ /// </summary>
private static readonly int[] ZZ_LEXSTATE = { 0, 0, 1, 1 };
- /**
- * Translates characters to character classes
- */
+ /// <summary>
+ /// Translates characters to character classes
+ /// </summary>
private const string ZZ_CMAP_PACKED =
"\x0001\x00C6\x0008\x00C4\x0002\x00C6\x0002\x00C4\x0001\x00C6\x0013\x00C4\x0001\x00C7\x0001\x008D\x0001\x00BF\x0001\x00C7" +
"\x0001\x00B9\x0001\x00B7\x0001\x008C\x0002\x00BA\x0002\x00C7\x0001\x00BB\x0001\x00AB\x0001\x0090\x0001\x00BE\x0001\x00AD" +
@@ -219,14 +219,14 @@ namespace Lucene.Net.Analysis.Standard
"\x0002\x0000\x0003\x008F\x001C\x0000\x0003\x007F\x0004\x0000";
- /**
- * Translates characters to character classes
- */
+ /// <summary>
+ /// Translates characters to character classes
+ /// </summary>
private static readonly char[] ZZ_CMAP = ZzUnpackCMap(ZZ_CMAP_PACKED);
- /**
- * Translates DFA states to action switch labels.
- */
+ /// <summary>
+ /// Translates DFA states to action switch labels.
+ /// </summary>
private static readonly int[] ZZ_ACTION = ZzUnpackAction();
private const string ZZ_ACTION_PACKED_0 =
@@ -292,9 +292,9 @@ namespace Lucene.Net.Analysis.Standard
}
- /**
- * Translates a state to a row index in the transition table
- */
+ /// <summary>
+ /// Translates a state to a row index in the transition table
+ /// </summary>
private static readonly int[] ZZ_ROWMAP = ZzUnpackRowMap();
private const string ZZ_ROWMAP_PACKED_0 =
@@ -710,9 +710,9 @@ namespace Lucene.Net.Analysis.Standard
return j;
}
- /**
- * The transition table of the DFA
- */
+ /// <summary>
+ /// The transition table of the DFA
+ /// </summary>
private static readonly int[] ZZ_TRANS = ZzUnpackTrans();
private const string ZZ_TRANS_PACKED_0 =
@@ -8998,9 +8998,9 @@ namespace Lucene.Net.Analysis.Standard
"Error: pushback value was too large"
};
- /**
- * ZZ_ATTRIBUTE[aState] contains the attributes of state <code>aState</code>
- */
+ /// <summary>
+ /// ZZ_ATTRIBUTE[aState] contains the attributes of state <c>aState</c>
+ /// </summary>
private static readonly int[] ZZ_ATTRIBUTE = ZzUnpackAttribute();
private const string ZZ_ATTRIBUTE_PACKED_0 =
@@ -9056,73 +9056,77 @@ namespace Lucene.Net.Analysis.Standard
return j;
}
- /** the input device */
+ /// <summary>the input device</summary>
private TextReader zzReader;
- /** the current state of the DFA */
+ /// <summary>the current state of the DFA</summary>
private int zzState;
- /** the current lexical state */
+ /// <summary>the current lexical state</summary>
private int zzLexicalState = YYINITIAL;
- /** this buffer contains the current text to be matched and is
- the source of the YyText() string */
+ /// <summary>
+ /// this buffer contains the current text to be matched and is
+ /// the source of the YyText string
+ /// </summary>
private char[] zzBuffer = new char[ZZ_BUFFERSIZE];
- /** the textposition at the last accepting state */
+ /// <summary>the textposition at the last accepting state</summary>
private int zzMarkedPos;
- /** the current text position in the buffer */
+ /// <summary>the current text position in the buffer</summary>
private int zzCurrentPos;
- /** startRead marks the beginning of the YyText() string in the buffer */
+ /// <summary>startRead marks the beginning of the YyText string in the buffer</summary>
private int zzStartRead;
- /** endRead marks the last character in the buffer, that has been read
- from input */
+ /// <summary>
+ /// endRead marks the last character in the buffer, that has been read
+ /// from input
+ /// </summary>
private int zzEndRead;
- /** number of newlines encountered up to the start of the matched text */
+ /// <summary>number of newlines encountered up to the start of the matched text</summary>
private int yyline;
- /** the number of characters up to the start of the matched text */
+ /// <summary>the number of characters up to the start of the matched text</summary>
private int yychar;
#pragma warning disable 169, 414
- /**
- * the number of characters from the last newline up to the start of the
- * matched text
- */
+ /// <summary>
+ /// the number of characters from the last newline up to the start of the
+ /// matched text
+ /// </summary>
private int yycolumn;
- /**
- * zzAtBOL == true <=> the scanner is currently at the beginning of a line
- */
+ /// <summary>
+ /// zzAtBOL == true <=> the scanner is currently at the beginning of a line
+ /// </summary>
private bool zzAtBOL = true;
- /** zzAtEOF == true <=> the scanner is at the EOF */
+ /// <summary>zzAtEOF == true <=> the scanner is at the EOF</summary>
private bool zzAtEOF;
- /** denotes if the user-EOF-code has already been executed */
+ /// <summary>denotes if the user-EOF-code has already been executed</summary>
private bool zzEOFDone;
#pragma warning restore 169, 414
/* user code: */
- /** Alphanumeric sequences */
+ /// <summary>Alphanumeric sequences</summary>
public static readonly int WORD_TYPE = UAX29URLEmailTokenizer.ALPHANUM;
- /** Numbers */
+ /// <summary>Numbers</summary>
public static readonly int NUMERIC_TYPE = UAX29URLEmailTokenizer.NUM;
- /**
- * Chars in class \p{Line_Break = Complex_Context} are from South East Asian
- * scripts (Thai, Lao, Myanmar, Khmer, etc.). Sequences of these are kept
- * together as as a single token rather than broken up, because the logic
- * required to break them at word boundaries is too complex for UAX#29.
- * <p>
- * See Unicode Line Breaking Algorithm: http://www.unicode.org/reports/tr14/#SA
- */
+ /// <summary>
+ /// Chars in class \p{Line_Break = Complex_Context} are from South East Asian
+ /// scripts (Thai, Lao, Myanmar, Khmer, etc.). Sequences of these are kept
+ /// together as as a single token rather than broken up, because the logic
+ /// required to break them at word boundaries is too complex for UAX#29.
+ /// <para/>
+ /// See Unicode Line Breaking Algorithm: http://www.unicode.org/reports/tr14/#SA
+ /// </summary>
public static readonly int SOUTH_EAST_ASIAN_TYPE = UAX29URLEmailTokenizer.SOUTHEAST_ASIAN;
public static readonly int IDEOGRAPHIC_TYPE = UAX29URLEmailTokenizer.IDEOGRAPHIC;
@@ -9142,32 +9146,30 @@ namespace Lucene.Net.Analysis.Standard
get { return yychar; }
}
- /**
- * Fills CharTermAttribute with the current token text.
- */
+ /// <summary>
+ /// Fills ICharTermAttribute with the current token text.
+ /// </summary>
public void GetText(ICharTermAttribute t)
{
t.CopyBuffer(zzBuffer, zzStartRead, zzMarkedPos - zzStartRead);
}
- /**
- * Creates a new scanner
- *
- * @param in the TextReader to read input from.
- */
+ /// <summary>
+ /// Creates a new scanner
+ /// </summary>
+ /// <param name="in">the TextReader to read input from.</param>
public UAX29URLEmailTokenizerImpl(TextReader @in)
{
this.zzReader = @in;
}
- /**
- * Unpacks the compressed character translation table.
- *
- * @param packed the packed character translation table
- * @return the unpacked character translation table
- */
+ /// <summary>
+ /// Unpacks the compressed character translation table.
+ /// </summary>
+ /// <param name="packed">the packed character translation table</param>
+ /// <returns>the unpacked character translation table</returns>
private static char[] ZzUnpackCMap(string packed)
{
char[] map = new char[0x10000];
@@ -9183,13 +9185,11 @@ namespace Lucene.Net.Analysis.Standard
}
- /**
- * Refills the input buffer.
- *
- * @return <code>false</code>, iff there was new input.
- *
- * @exception java.io.IOException if any I/O-Error occurs
- */
+ /// <summary>
+ /// Refills the input buffer.
+ /// </summary>
+ /// <returns><c>false</c>, iff there was new input.</returns>
+ /// <exception cref="IOException">if any I/O-Error occurs</exception>
private bool ZzRefill()
{
@@ -9245,9 +9245,9 @@ namespace Lucene.Net.Analysis.Standard
}
- /**
- * Closes the input stream.
- */
+ /// <summary>
+ /// Disposes the input stream.
+ /// </summary>
public void YyClose()
{
zzAtEOF = true; /* indicate end of file */
@@ -9260,18 +9260,17 @@ namespace Lucene.Net.Analysis.Standard
}
- /**
- * Resets the scanner to read from a new input stream.
- * Does not close the old reader.
- *
- * All internal variables are reset, the old input stream
- * <b>cannot</b> be reused (internal buffer is discarded and lost).
- * Lexical state is set to <tt>ZZ_INITIAL</tt>.
- *
- * Internal scan buffer is resized down to its initial length, if it has grown.
- *
- * @param reader the new input stream
- */
+ /// <summary>
+ /// Resets the scanner to read from a new input stream.
+ /// Does not close the old reader.
+ /// <para/>
+ /// All internal variables are reset, the old input stream
+ /// <b>cannot</b> be reused (internal buffer is discarded and lost).
+ /// Lexical state is set to <see cref="YYINITIAL"/>.
+ /// <para/>
+ /// Internal scan buffer is resized down to its initial length, if it has grown.
+ /// </summary>
+ /// <param name="reader">the new input stream </param>
public void YyReset(TextReader reader)
{
zzReader = reader;
@@ -9287,75 +9286,73 @@ namespace Lucene.Net.Analysis.Standard
}
- /**
- * Returns the current lexical state.
- */
+ /// <summary>
+ /// Returns the current lexical state.
+ /// </summary>
public int YyState
{
get { return zzLexicalState; }
}
- /**
- * Enters a new lexical state
- *
- * @param newState the new lexical state
- */
+ /// <summary>
+ /// Enters a new lexical state
+ /// </summary>
+ /// <param name="newState">the new lexical state</param>
public void YyBegin(int newState)
{
zzLexicalState = newState;
}
- /**
- * Returns the text matched by the current regular expression.
- */
+ /// <summary>
+ /// Returns the text matched by the current regular expression.
+ /// </summary>
public string YyText
{
get { return new string(zzBuffer, zzStartRead, zzMarkedPos - zzStartRead); }
}
- /**
- * Returns the character at position <tt>pos</tt> from the
- * matched text.
- *
- * It is equivalent to YyText().charAt(pos), but faster
- *
- * @param pos the position of the character to fetch.
- * A value from 0 to YyLength()-1.
- *
- * @return the character at position pos
- */
+ /// <summary>
+ /// Returns the character at position <paramref name="pos"/> from the
+ /// matched text.
+ /// <para/>
+ /// It is equivalent to YyText[pos], but faster
+ /// </summary>
+ /// <param name="pos">
+ /// the position of the character to fetch.
+ /// A value from 0 to YyLength-1.
+ /// </param>
+ /// <returns>the character at position pos</returns>
public char YyCharAt(int pos)
{
return zzBuffer[zzStartRead + pos];
}
- /**
- * Returns the length of the matched text region.
- */
+ /// <summary>
+ /// Returns the length of the matched text region.
+ /// </summary>
public int YyLength
{
get { return zzMarkedPos - zzStartRead; }
}
- /**
- * Reports an error that occured while scanning.
- *
- * In a wellformed scanner (no or only correct usage of
- * YyPushBack(int) and a match-all fallback rule) this method
- * will only be called with things that "Can't Possibly Happen".
- * If this method is called, something is seriously wrong
- * (e.g. a JFlex bug producing a faulty scanner etc.).
- *
- * Usual syntax/scanner level error handling should be done
- * in error fallback rules.
- *
- * @param errorCode the code of the errormessage to display
- */
+ /// <summary>
+ /// Reports an error that occured while scanning.
+ /// <para/>
+ /// In a wellformed scanner (no or only correct usage of
+ /// YyPushBack(int) and a match-all fallback rule) this method
+ /// will only be called with things that "Can't Possibly Happen".
+ /// If this method is called, something is seriously wrong
+ /// (e.g. a JFlex bug producing a faulty scanner etc.).
+ /// <para/>
+ /// Usual syntax/scanner level error handling should be done
+ /// in error fallback rules.
+ /// </summary>
+ /// <param name="errorCode">the code of the errormessage to display</param>
private void ZzScanError(int errorCode)
{
string message;
@@ -9372,14 +9369,15 @@ namespace Lucene.Net.Analysis.Standard
}
- /**
- * Pushes the specified amount of characters back into the input stream.
- *
- * They will be read again by then next call of the scanning method
- *
- * @param number the number of characters to be read again.
- * This number must not be greater than YyLength()!
- */
+ /// <summary>
+ /// Pushes the specified amount of characters back into the input stream.
+ /// <para/>
+ /// They will be read again by then next call of the scanning method
+ /// </summary>
+ /// <param name="number">
+ /// the number of characters to be read again.
+ /// This number must not be greater than YyLength!
+ /// </param>
public void YyPushBack(int number)
{
if (number > YyLength)
@@ -9389,13 +9387,12 @@ namespace Lucene.Net.Analysis.Standard
}
- /**
- * Resumes scanning until the next regular expression is matched,
- * the end of input is encountered or an I/O-Error occurs.
- *
- * @return the next token
- * @exception java.io.IOException if any I/O-Error occurs
- */
+ /// <summary>
+ /// Resumes scanning until the next regular expression is matched,
+ /// the end of input is encountered or an I/O-Error occurs.
+ /// </summary>
+ /// <returns>the next token</returns>
+ /// <exception cref="IOException">if any I/O-Error occurs</exception>
public int GetNextToken()
{
int zzInput;
[14/39] lucenenet git commit: Lucene.Net.Analysis.Util (CharArraySet
- WordlistLoader) refactor: member accessibility and documentation comments
Posted by ni...@apache.org.
Lucene.Net.Analysis.Util (CharArraySet - WordlistLoader) refactor: member accessibility and documentation comments
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/f934cebe
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/f934cebe
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/f934cebe
Branch: refs/heads/api-work
Commit: f934cebea3707a46224a2a1a7092a62ee017990f
Parents: b19aee5
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Sat Feb 4 21:20:08 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Sat Feb 4 23:08:19 2017 +0700
----------------------------------------------------------------------
.../Analysis/Util/CharFilterFactory.cs | 13 ++--
.../Analysis/Util/CharTokenizer.cs | 27 ++++----
.../Analysis/Util/ClasspathResourceLoader.cs | 14 +++--
.../Analysis/Util/ElisionFilter.cs | 7 ++-
.../Analysis/Util/ElisionFilterFactory.cs | 2 +-
.../Analysis/Util/FilesystemResourceLoader.cs | 20 +++---
.../Analysis/Util/FilteringTokenFilter.cs | 17 +++--
.../Analysis/Util/MultiTermAwareComponent.cs | 2 +-
.../Analysis/Util/OpenStringBuilder.cs | 14 +++--
.../Analysis/Util/ResourceLoader.cs | 2 +-
.../Analysis/Util/ResourceLoaderAware.cs | 8 +--
.../Analysis/Util/RollingCharBuffer.cs | 32 +++++-----
.../Analysis/Util/SegmentingTokenizerBase.cs | 16 ++---
.../Analysis/Util/StemmerUtil.cs | 9 ++-
.../Analysis/Util/StopwordAnalyzerBase.cs | 50 ++++++++-------
.../Analysis/Util/TokenFilterFactory.cs | 17 ++---
.../Analysis/Util/TokenizerFactory.cs | 21 +++----
.../Analysis/Util/TypeExtensions.cs | 4 +-
.../Analysis/Util/WordlistLoader.cs | 65 ++++++++++----------
19 files changed, 171 insertions(+), 169 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/f934cebe/src/Lucene.Net.Analysis.Common/Analysis/Util/CharFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/CharFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/CharFilterFactory.cs
index a6a1efe..9da80b3 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Util/CharFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/CharFilterFactory.cs
@@ -27,18 +27,17 @@ namespace Lucene.Net.Analysis.Util
/// </summary>
public abstract class CharFilterFactory : AbstractAnalysisFactory
{
-
private static readonly AnalysisSPILoader<CharFilterFactory> loader = new AnalysisSPILoader<CharFilterFactory>();
/// <summary>
- /// looks up a charfilter by name from context classpath </summary>
+ /// looks up a charfilter by name from the host project's dependent assemblies </summary>
public static CharFilterFactory ForName(string name, IDictionary<string, string> args)
{
return loader.NewInstance(name, args);
}
/// <summary>
- /// looks up a charfilter class by name from context classpath </summary>
+ /// looks up a charfilter class by name from the host project's dependent assemblies </summary>
public static Type LookupClass(string name)
{
return loader.LookupClass(name);
@@ -52,9 +51,9 @@ namespace Lucene.Net.Analysis.Util
}
/// <summary>
- /// Reloads the factory list from the given <see cref="ClassLoader"/>.
+ /// Reloads the factory list.
/// Changes to the factories are visible after the method ends, all
- /// iterators (<see cref="#availableCharFilters()"/>,...) stay consistent.
+ /// iterators (<see cref="AvailableCharFilters"/>,...) stay consistent.
///
/// <para><b>NOTE:</b> Only new factories are added, existing ones are
/// never removed or replaced.
@@ -72,13 +71,13 @@ namespace Lucene.Net.Analysis.Util
/// <summary>
/// Initialize this factory via a set of key-value pairs.
/// </summary>
- protected internal CharFilterFactory(IDictionary<string, string> args)
+ protected CharFilterFactory(IDictionary<string, string> args)
: base(args)
{
}
/// <summary>
- /// Wraps the given TextReader with a CharFilter. </summary>
+ /// Wraps the given <see cref="TextReader"/> with a <see cref="CharFilter"/>. </summary>
public abstract TextReader Create(TextReader input);
}
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/f934cebe/src/Lucene.Net.Analysis.Common/Analysis/Util/CharTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/CharTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/CharTokenizer.cs
index 58cc255..9ef33c4 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Util/CharTokenizer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/CharTokenizer.cs
@@ -26,13 +26,13 @@ namespace Lucene.Net.Analysis.Util
/// <summary>
/// An abstract base class for simple, character-oriented tokenizers.
/// <para>
- /// <a name="version">You must specify the required <see cref="LuceneVersion"/> compatibility
+ /// You must specify the required <see cref="LuceneVersion"/> compatibility
/// when creating <see cref="CharTokenizer"/>:
- /// <ul>
- /// <li>As of 3.1, <see cref="CharTokenizer"/> uses an int based API to normalize and
- /// detect token codepoints. See <see cref="#isTokenChar(int)"/> and
- /// <see cref="#normalize(int)"/> for details.</li>
- /// </ul>
+ /// <list type="bullet">
+ /// <item>As of 3.1, <see cref="CharTokenizer"/> uses an int based API to normalize and
+ /// detect token codepoints. See <see cref="IsTokenChar(int)"/> and
+ /// <see cref="Normalize(int)"/> for details.</item>
+ /// </list>
/// </para>
/// <para>
/// A new <see cref="CharTokenizer"/> API has been introduced with Lucene 3.1. This API
@@ -41,26 +41,25 @@ namespace Lucene.Net.Analysis.Util
/// "http://java.sun.com/j2se/1.5.0/docs/api/java/lang/Character.html#supplementary"
/// >supplementary characters</a>. The old <i>char</i> based API has been
/// deprecated and should be replaced with the <i>int</i> based methods
- /// <see cref="#isTokenChar(int)"/> and <see cref="#normalize(int)"/>.
+ /// <see cref="IsTokenChar(int)"/> and <see cref="Normalize(int)"/>.
/// </para>
/// <para>
/// As of Lucene 3.1 each <see cref="CharTokenizer"/> - constructor expects a
/// <see cref="LuceneVersion"/> argument. Based on the given <see cref="LuceneVersion"/> either the new
/// API or a backwards compatibility layer is used at runtime. For
- /// <see cref="LuceneVersion"/> < 3.1 the backwards compatibility layer ensures correct
+ /// <see cref="LuceneVersion"/> < 3.1 the backwards compatibility layer ensures correct
/// behavior even for indexes build with previous versions of Lucene. If a
/// <see cref="LuceneVersion"/> >= 3.1 is used <see cref="CharTokenizer"/> requires the new API to
/// be implemented by the instantiated class. Yet, the old <i>char</i> based API
/// is not required anymore even if backwards compatibility must be preserved.
/// <see cref="CharTokenizer"/> subclasses implementing the new API are fully backwards
- /// compatible if instantiated with <see cref="LuceneVersion"/> < 3.1.
+ /// compatible if instantiated with <see cref="LuceneVersion"/> < 3.1.
/// </para>
/// <para>
/// <strong>Note:</strong> If you use a subclass of <see cref="CharTokenizer"/> with <see cref="LuceneVersion"/> >=
- /// 3.1 on an index build with a version < 3.1, created tokens might not be
+ /// 3.1 on an index build with a version < 3.1, created tokens might not be
/// compatible with the terms in your index.
/// </para>
- ///
/// </summary>
public abstract class CharTokenizer : Tokenizer
{
@@ -71,7 +70,7 @@ namespace Lucene.Net.Analysis.Util
/// Lucene version to match </param>
/// <param name="input">
/// the input to split up into tokens </param>
- protected CharTokenizer(LuceneVersion matchVersion, TextReader input)
+ public CharTokenizer(LuceneVersion matchVersion, TextReader input)
: base(input)
{
Init(matchVersion);
@@ -86,14 +85,14 @@ namespace Lucene.Net.Analysis.Util
/// the attribute factory to use for this <see cref="Tokenizer"/> </param>
/// <param name="input">
/// the input to split up into tokens </param>
- protected CharTokenizer(LuceneVersion matchVersion, AttributeFactory factory, TextReader input)
+ public CharTokenizer(LuceneVersion matchVersion, AttributeFactory factory, TextReader input)
: base(factory, input)
{
Init(matchVersion);
}
/// <summary>
- /// LUCENENET Added in the .NET version to assist with setting the attributes
+ /// LUCENENET specific - Added in the .NET version to assist with setting the attributes
/// from multiple constructors.
/// </summary>
/// <param name="matchVersion"></param>
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/f934cebe/src/Lucene.Net.Analysis.Common/Analysis/Util/ClasspathResourceLoader.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/ClasspathResourceLoader.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/ClasspathResourceLoader.cs
index aa425c7..40434f3 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Util/ClasspathResourceLoader.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/ClasspathResourceLoader.cs
@@ -23,9 +23,9 @@ namespace Lucene.Net.Analysis.Util
*/
/// <summary>
- /// Simple <see cref="ResourceLoader"/> that uses <see cref="ClassLoader#getResourceAsStream(String)"/>
- /// and <see cref="Class#forName(String,boolean,ClassLoader)"/> to open resources and
- /// classes, respectively.
+ /// Simple <see cref="IResourceLoader"/> that uses <see cref="Assembly.GetManifestResourceStream(string)"/>
+ /// and <see cref="Assembly.GetType(string)"/> to open resources and
+ /// <see cref="Type"/>s, respectively.
/// </summary>
public sealed class ClasspathResourceLoader : IResourceLoader
{
@@ -48,7 +48,7 @@ namespace Lucene.Net.Analysis.Util
/// Resource names are relative to the resourcePrefix.
/// </summary>
/// <param name="clazz">The class type</param>
- /// <param name="namespacePrefixToExclude">Removes the part of the namespace of the class that matches the regex.
+ /// <param name="namespaceExcludeRegex">Removes the part of the namespace of the class that matches the regex.
/// This is useful to get to the resource if the assembly name and namespace name don't happen to match.
/// If provided, the assembly name will be concatnated with the namespace name (excluding the part tha matches the regex)
/// to provide the complete path to the embedded resource in the assembly. Note you can view the entire path to all of
@@ -110,8 +110,12 @@ namespace Lucene.Net.Analysis.Util
/// </summary>
/// <param name="resource"></param>
/// <returns></returns>
- private string GetQualifiedResourceName(string resource)
+ private string GetQualifiedResourceName(string resource)
{
+ // LUCENENET TODO: Need to ensure this works in .NET Core (and perhaps refactor to make it more reliable).
+ // Perhaps it would make more sense to use Assembly.GetManifestResourceStream(Type, string), which allows
+ // you to filter by the namespace of a Type.
+
var namespaceName = this.clazz.Namespace;
var assemblyName = clazz.GetTypeInfo().Assembly.GetName().Name;
if (string.IsNullOrEmpty(this.namespaceExcludeRegex) && (assemblyName.Equals(namespaceName, StringComparison.OrdinalIgnoreCase)))
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/f934cebe/src/Lucene.Net.Analysis.Common/Analysis/Util/ElisionFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/ElisionFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/ElisionFilter.cs
index bfa7751..4209310 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Util/ElisionFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/ElisionFilter.cs
@@ -22,15 +22,16 @@ namespace Lucene.Net.Analysis.Util
/// <summary>
/// Removes elisions from a <see cref="TokenStream"/>. For example, "l'avion" (the plane) will be
/// tokenized as "avion" (plane).
+ /// <para/>
+ /// <a href="http://fr.wikipedia.org/wiki/%C3%89lision">Elision in Wikipedia</a>
/// </summary>
- /// <seealso cref= <a href="http://fr.wikipedia.org/wiki/%C3%89lision">Elision in Wikipedia</a> </seealso>
public sealed class ElisionFilter : TokenFilter
{
private readonly CharArraySet articles;
private readonly ICharTermAttribute termAtt;
/// <summary>
- /// Constructs an elision filter with a Set of stop words </summary>
+ /// Constructs an elision filter with a <see cref="CharArraySet"/> of stop words </summary>
/// <param name="input"> the source <see cref="TokenStream"/> </param>
/// <param name="articles"> a set of stopword articles </param>
public ElisionFilter(TokenStream input, CharArraySet articles)
@@ -43,7 +44,7 @@ namespace Lucene.Net.Analysis.Util
/// <summary>
/// Increments the <see cref="TokenStream"/> with a <see cref="CharTermAttribute"/> without elisioned start
/// </summary>
- public override bool IncrementToken()
+ public override sealed bool IncrementToken()
{
if (m_input.IncrementToken())
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/f934cebe/src/Lucene.Net.Analysis.Common/Analysis/Util/ElisionFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/ElisionFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/ElisionFilterFactory.cs
index f12f57b..e591916 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Util/ElisionFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/ElisionFilterFactory.cs
@@ -39,7 +39,7 @@ namespace Lucene.Net.Analysis.Util
private CharArraySet articles;
/// <summary>
- /// Creates a new ElisionFilterFactory </summary>
+ /// Creates a new <see cref="ElisionFilterFactory"/> </summary>
public ElisionFilterFactory(IDictionary<string, string> args) : base(args)
{
articlesFile = Get(args, "articles");
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/f934cebe/src/Lucene.Net.Analysis.Common/Analysis/Util/FilesystemResourceLoader.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/FilesystemResourceLoader.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/FilesystemResourceLoader.cs
index 09aab01..275d335 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Util/FilesystemResourceLoader.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/FilesystemResourceLoader.cs
@@ -21,17 +21,17 @@ namespace Lucene.Net.Analysis.Util
*/
/// <summary>
- /// Simple <see cref="ResourceLoader"/> that opens resource files
+ /// Simple <see cref="IResourceLoader"/> that opens resource files
/// from the local file system, optionally resolving against
/// a base directory.
///
- /// <para>This loader wraps a delegate <see cref="ResourceLoader"/>
+ /// <para>This loader wraps a delegate <see cref="IResourceLoader"/>
/// that is used to resolve all files, the current base directory
- /// does not contain. <see cref="#newInstance"/> is always resolved
- /// against the delegate, as a <see cref="ClassLoader"/> is needed.
+ /// does not contain. <see cref="NewInstance"/> is always resolved
+ /// against the delegate, as an <see cref="T:System.Assembly"/> is needed.
///
/// </para>
- /// <para>You can chain several {@code FilesystemResourceLoader}s
+ /// <para>You can chain several <see cref="FilesystemResourceLoader"/>s
/// to allow lookup of files in more than one base directory.
/// </para>
/// </summary>
@@ -46,26 +46,26 @@ namespace Lucene.Net.Analysis.Util
/// are delegated to context classloader.
/// </summary>
public FilesystemResourceLoader()
- : this((DirectoryInfo)null)
+ : this((DirectoryInfo)null)
{
}
/// <summary>
/// Creates a resource loader that resolves resources against the given
- /// base directory (may be {@code null} to refer to CWD).
+ /// base directory (may be <c>null</c> to refer to CWD).
/// Files not found in file system and class lookups are delegated to context
/// classloader.
/// </summary>
public FilesystemResourceLoader(DirectoryInfo baseDirectory)
- : this(baseDirectory, new ClasspathResourceLoader(typeof(FilesystemResourceLoader)))
+ : this(baseDirectory, new ClasspathResourceLoader(typeof(FilesystemResourceLoader)))
{
}
/// <summary>
/// Creates a resource loader that resolves resources against the given
- /// base directory (may be {@code null} to refer to CWD).
+ /// base directory (may be <c>null</c> to refer to CWD).
/// Files not found in file system and class lookups are delegated
- /// to the given delegate <see cref="ResourceLoader"/>.
+ /// to the given delegate <see cref="IResourceLoader"/>.
/// </summary>
public FilesystemResourceLoader(DirectoryInfo baseDirectory, IResourceLoader @delegate)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/f934cebe/src/Lucene.Net.Analysis.Common/Analysis/Util/FilteringTokenFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/FilteringTokenFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/FilteringTokenFilter.cs
index 241c8da..c829ac0 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Util/FilteringTokenFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/FilteringTokenFilter.cs
@@ -23,17 +23,17 @@ namespace Lucene.Net.Analysis.Util
/// <summary>
/// Abstract base class for TokenFilters that may remove tokens.
- /// You have to implement <see cref="#accept"/> and return a boolean if the current
- /// token should be preserved. <see cref="#incrementToken"/> uses this method
+ /// You have to implement <see cref="Accept"/> and return a boolean if the current
+ /// token should be preserved. <see cref="IncrementToken"/> uses this method
/// to decide if a token should be passed to the caller.
- /// <para><a name="lucene_match_version" />As of Lucene 4.4, an
- /// <see cref="IllegalArgumentException"/> is thrown when trying to disable position
+ /// <para>
+ /// As of Lucene 4.4, an
+ /// <see cref="ArgumentException"/> is thrown when trying to disable position
/// increments when filtering terms.
/// </para>
/// </summary>
public abstract class FilteringTokenFilter : TokenFilter
{
-
private static void CheckPositionIncrement(LuceneVersion version, bool enablePositionIncrements)
{
if (!enablePositionIncrements &&
@@ -45,7 +45,7 @@ namespace Lucene.Net.Analysis.Util
}
}
- protected internal readonly LuceneVersion m_version;
+ protected readonly LuceneVersion m_version;
private readonly IPositionIncrementAttribute posIncrAtt;
private bool enablePositionIncrements; // no init needed, as ctor enforces setting value!
private bool first = true;
@@ -78,7 +78,7 @@ namespace Lucene.Net.Analysis.Util
}
/// <summary>
- /// Override this method and return if the current input token should be returned by <see cref="#incrementToken"/>. </summary>
+ /// Override this method and return if the current input token should be returned by <see cref="IncrementToken"/>. </summary>
protected abstract bool Accept();
public override sealed bool IncrementToken()
@@ -129,7 +129,6 @@ namespace Lucene.Net.Analysis.Util
skippedPositions = 0;
}
- /// <seealso cref= #setEnablePositionIncrements(boolean) </seealso>
public virtual bool EnablePositionIncrements
{
get
@@ -139,7 +138,7 @@ namespace Lucene.Net.Analysis.Util
// LUCENENET TODO:
// deprecated enablePositionIncrements=false is not supported anymore as of Lucene 4.4
// There doesn't appear to be a way to apply [Obsolete] on a property setter only. The only way
- // to make it show the obsolete warning is to change this back to separate Get and Set methods.
+ // to make it show the obsolete warning is to change this back to a separate Set method.
set
{
CheckPositionIncrement(m_version, value);
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/f934cebe/src/Lucene.Net.Analysis.Common/Analysis/Util/MultiTermAwareComponent.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/MultiTermAwareComponent.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/MultiTermAwareComponent.cs
index 9f32238..397b110 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Util/MultiTermAwareComponent.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/MultiTermAwareComponent.cs
@@ -28,7 +28,7 @@
{
/// <summary>
/// Returns an analysis component to handle analysis if multi-term queries.
- /// The returned component must be a TokenizerFactory, TokenFilterFactory or CharFilterFactory.
+ /// The returned component must be a <see cref="TokenizerFactory"/>, <see cref="TokenFilterFactory"/> or <see cref="CharFilterFactory"/>.
/// </summary>
AbstractAnalysisFactory MultiTermComponent { get; } // LUCENENET TODO: Change to GetMultiTermComponent() ? Some implementations return new instance.
}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/f934cebe/src/Lucene.Net.Analysis.Common/Analysis/Util/OpenStringBuilder.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/OpenStringBuilder.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/OpenStringBuilder.cs
index c97d9f0..fc73055 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Util/OpenStringBuilder.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/OpenStringBuilder.cs
@@ -26,8 +26,8 @@ namespace Lucene.Net.Analysis.Util
/// </summary>
public class OpenStringBuilder : ICharSequence
{
- protected internal char[] m_buf;
- protected internal int m_len;
+ protected char[] m_buf;
+ protected int m_len;
public OpenStringBuilder()
: this(32)
@@ -68,6 +68,8 @@ namespace Lucene.Net.Analysis.Util
return m_buf;
}
}
+
+ // LUCENENE TODO: Change to Length (StringBuilder uses Length in .NET)
public virtual int Count // LUCENENET NOTE: This was size() in Lucene.
{
get{ return m_len; }
@@ -78,12 +80,12 @@ namespace Lucene.Net.Analysis.Util
get { return m_buf.Length; }
}
- public virtual OpenStringBuilder Append(string csq)
+ public virtual OpenStringBuilder Append(string csq) // LUCENENET TODO: Add overloads for ICharSequence and StringBuilder
{
return Append(csq, 0, csq.Length);
}
- public virtual OpenStringBuilder Append(string csq, int start, int end)
+ public virtual OpenStringBuilder Append(string csq, int start, int end) // LUCENENET TODO: Add overloads for ICharSequence and StringBuilder
{
Reserve(end - start);
for (int i = start; i < end; i++)
@@ -137,7 +139,7 @@ namespace Lucene.Net.Analysis.Util
this.m_len += len;
}
- protected internal virtual void Resize(int len)
+ protected virtual void Resize(int len)
{
char[] newbuf = new char[Math.Max(m_buf.Length << 1, len)];
System.Array.Copy(m_buf, 0, newbuf, 0, Count);
@@ -177,7 +179,7 @@ namespace Lucene.Net.Analysis.Util
UnsafeWrite(b, off, len);
}
- public void Write(OpenStringBuilder arr)
+ public void Write(OpenStringBuilder arr) // LUCENENET TODO: Add overload for StringBuilder
{
Write(arr.m_buf, 0, m_len);
}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/f934cebe/src/Lucene.Net.Analysis.Common/Analysis/Util/ResourceLoader.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/ResourceLoader.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/ResourceLoader.cs
index 301a3d3..7c472bc 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Util/ResourceLoader.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/ResourceLoader.cs
@@ -33,7 +33,7 @@ namespace Lucene.Net.Analysis.Util
/// <summary>
/// Finds class of the name
/// </summary>
- Type FindClass(string cname);
+ Type FindClass(string cname); // LUCENENET TODO: Rename FindType ?
/// <summary>
/// Creates an instance of the name and expected type
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/f934cebe/src/Lucene.Net.Analysis.Common/Analysis/Util/ResourceLoaderAware.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/ResourceLoaderAware.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/ResourceLoaderAware.cs
index f9c0506..6d8fe1c 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Util/ResourceLoaderAware.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/ResourceLoaderAware.cs
@@ -19,14 +19,14 @@
/// <summary>
/// Interface for a component that needs to be initialized by
- /// an implementation of <see cref="ResourceLoader"/>.
+ /// an implementation of <see cref="IResourceLoader"/>.
/// </summary>
- /// <seealso cref= ResourceLoader </seealso>
+ /// <seealso cref="IResourceLoader"/>
public interface IResourceLoaderAware
{
/// <summary>
- /// Initializes this component with the provided ResourceLoader
- /// (used for loading classes, files, etc).
+ /// Initializes this component with the provided <see cref="IResourceLoader"/>
+ /// (used for loading types, embedded resources, files, etc).
/// </summary>
void Inform(IResourceLoader loader);
}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/f934cebe/src/Lucene.Net.Analysis.Common/Analysis/Util/RollingCharBuffer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/RollingCharBuffer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/RollingCharBuffer.cs
index 6bddce4..3cf6f12 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Util/RollingCharBuffer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/RollingCharBuffer.cs
@@ -5,7 +5,6 @@ using Lucene.Net.Util;
namespace Lucene.Net.Analysis.Util
{
-
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -22,19 +21,18 @@ namespace Lucene.Net.Analysis.Util
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
/// <summary>
- /// Acts like a forever growing char[] as you read
- /// characters into it from the provided reader, but
- /// internally it uses a circular buffer to only hold the
- /// characters that haven't been freed yet. This is like a
- /// PushbackReader, except you don't have to specify
- /// up-front the max size of the buffer, but you do have to
- /// periodically call <see cref="#freeBefore"/>.
+ /// Acts like a forever growing <see cref="T:char[]"/> as you read
+ /// characters into it from the provided reader, but
+ /// internally it uses a circular buffer to only hold the
+ /// characters that haven't been freed yet. This is like a
+ /// PushbackReader, except you don't have to specify
+ /// up-front the max size of the buffer, but you do have to
+ /// periodically call <see cref="FreeBefore"/>.
/// </summary>
-
public sealed class RollingCharBuffer
{
-
private TextReader reader;
private char[] buffer = new char[512];
@@ -62,11 +60,13 @@ namespace Lucene.Net.Analysis.Util
end = false;
}
- /* Absolute position read. NOTE: pos must not jump
- * ahead by more than 1! Ie, it's OK to read arbitarily
- * far back (just not prior to the last {@link
- * #freeBefore}), but NOT ok to read arbitrarily far
- * ahead. Returns -1 if you hit EOF. */
+ /// <summary>
+ /// Absolute position read. NOTE: pos must not jump
+ /// ahead by more than 1! Ie, it's OK to read arbitarily
+ /// far back (just not prior to the last <see cref="FreeBefore(int)"/>,
+ /// but NOT ok to read arbitrarily far
+ /// ahead. Returns -1 if you hit EOF.
+ /// </summary>
public int Get(int pos)
{
//System.out.println(" Get pos=" + pos + " nextPos=" + nextPos + " count=" + count);
@@ -161,7 +161,7 @@ namespace Lucene.Net.Analysis.Util
/// <summary>
/// Call this to notify us that no chars before this
- /// absolute position are needed anymore.
+ /// absolute position are needed anymore.
/// </summary>
public void FreeBefore(int pos)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/f934cebe/src/Lucene.Net.Analysis.Common/Analysis/Util/SegmentingTokenizerBase.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/SegmentingTokenizerBase.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/SegmentingTokenizerBase.cs
index ca0b994..a0a63c5 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Util/SegmentingTokenizerBase.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/SegmentingTokenizerBase.cs
@@ -59,22 +59,22 @@ namespace Lucene.Net.Analysis.Util
/// <summary>
/// Construct a new SegmenterBase, using
- /// the provided BreakIterator for sentence segmentation.
+ /// the provided <see cref="BreakIterator"/> for sentence segmentation.
/// <para>
- /// Note that you should never share BreakIterators across different
- /// TokenStreams, instead a newly created or cloned one should always
+ /// Note that you should never share <see cref="BreakIterator"/>s across different
+ /// <see cref="TokenStream"/>s, instead a newly created or cloned one should always
/// be provided to this constructor.
/// </para>
/// </summary>
- protected SegmentingTokenizerBase(TextReader reader, BreakIterator iterator)
+ public SegmentingTokenizerBase(TextReader reader, BreakIterator iterator)
: this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, reader, iterator)
{
}
/// <summary>
- /// Construct a new SegmenterBase, also supplying the AttributeFactory
+ /// Construct a new SegmenterBase, also supplying the <see cref="Lucene.Net.Util.AttributeSource.AttributeFactory"/>
/// </summary>
- protected SegmentingTokenizerBase(AttributeFactory factory, TextReader reader, BreakIterator iterator)
+ public SegmentingTokenizerBase(AttributeFactory factory, TextReader reader, BreakIterator iterator)
: base(factory, reader)
{
offsetAtt = AddAttribute<IOffsetAttribute>();
@@ -106,7 +106,7 @@ namespace Lucene.Net.Analysis.Util
length = usableLength = m_offset = 0;
}
- public override void End()
+ public override sealed void End()
{
base.End();
int finalOffset = CorrectOffset(length < 0 ? m_offset : m_offset + length);
@@ -129,7 +129,7 @@ namespace Lucene.Net.Analysis.Util
/// <summary>
/// For sentence tokenization, these are the unambiguous break positions. </summary>
- protected internal virtual bool IsSafeEnd(char ch)
+ protected virtual bool IsSafeEnd(char ch)
{
switch ((int)ch)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/f934cebe/src/Lucene.Net.Analysis.Common/Analysis/Util/StemmerUtil.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/StemmerUtil.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/StemmerUtil.cs
index 145c064..9164e95 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Util/StemmerUtil.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/StemmerUtil.cs
@@ -3,7 +3,6 @@ using System.Diagnostics;
namespace Lucene.Net.Analysis.Util
{
-
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -35,12 +34,12 @@ namespace Lucene.Net.Analysis.Util
}
/// <summary>
- /// Returns true if the character array starts with the suffix.
+ /// Returns true if the character array starts with the prefix.
/// </summary>
/// <param name="s"> Input Buffer </param>
/// <param name="len"> length of input buffer </param>
/// <param name="prefix"> Prefix string to test </param>
- /// <returns> true if <code>s</code> starts with <code>prefix</code> </returns>
+ /// <returns> <c>true</c> if <paramref name="s"/> starts with <paramref name="prefix"/> </returns>
public static bool StartsWith(char[] s, int len, string prefix)
{
int prefixLen = prefix.Length;
@@ -64,7 +63,7 @@ namespace Lucene.Net.Analysis.Util
/// <param name="s"> Input Buffer </param>
/// <param name="len"> length of input buffer </param>
/// <param name="suffix"> Suffix string to test </param>
- /// <returns> true if <code>s</code> ends with <code>suffix</code> </returns>
+ /// <returns> <c>true</c> if <paramref name="s"/> ends with <paramref name="suffix"/> </returns>
public static bool EndsWith(char[] s, int len, string suffix)
{
int suffixLen = suffix.Length;
@@ -89,7 +88,7 @@ namespace Lucene.Net.Analysis.Util
/// <param name="s"> Input Buffer </param>
/// <param name="len"> length of input buffer </param>
/// <param name="suffix"> Suffix string to test </param>
- /// <returns> true if <code>s</code> ends with <code>suffix</code> </returns>
+ /// <returns> <c>true</c> if <paramref name="s"/> ends with <paramref name="suffix"/> </returns>
public static bool EndsWith(char[] s, int len, char[] suffix)
{
int suffixLen = suffix.Length;
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/f934cebe/src/Lucene.Net.Analysis.Common/Analysis/Util/StopwordAnalyzerBase.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/StopwordAnalyzerBase.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/StopwordAnalyzerBase.cs
index c19ace3..98d71c9 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Util/StopwordAnalyzerBase.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/StopwordAnalyzerBase.cs
@@ -1,8 +1,8 @@
-\ufeffusing System;
+\ufeffusing Lucene.Net.Util;
+using System;
using System.IO;
-using System.Text;
-using Lucene.Net.Util;
using System.Reflection;
+using System.Text;
namespace Lucene.Net.Analysis.Util
{
@@ -24,18 +24,16 @@ namespace Lucene.Net.Analysis.Util
*/
/// <summary>
- /// Base class for Analyzers that need to make use of stopword sets.
- ///
+ /// Base class for <see cref="Analyzer"/>s that need to make use of stopword sets.
/// </summary>
public abstract class StopwordAnalyzerBase : Analyzer
{
-
/// <summary>
/// An immutable stopword set
/// </summary>
- protected internal readonly CharArraySet m_stopwords;
+ protected readonly CharArraySet m_stopwords;
- protected internal readonly LuceneVersion m_matchVersion;
+ protected readonly LuceneVersion m_matchVersion;
/// <summary>
/// Returns the analyzer's stopword set or an empty set if the analyzer has no
@@ -58,7 +56,7 @@ namespace Lucene.Net.Analysis.Util
/// the Lucene version for cross version compatibility </param>
/// <param name="stopwords">
/// the analyzer's stopword set </param>
- protected internal StopwordAnalyzerBase(LuceneVersion version, CharArraySet stopwords)
+ protected StopwordAnalyzerBase(LuceneVersion version, CharArraySet stopwords)
{
m_matchVersion = version;
// analyzers should use char array set for stopwords!
@@ -66,40 +64,40 @@ namespace Lucene.Net.Analysis.Util
}
/// <summary>
- /// Creates a new Analyzer with an empty stopword set
+ /// Creates a new <see cref="Analyzer"/> with an empty stopword set
/// </summary>
/// <param name="version">
/// the Lucene version for cross version compatibility </param>
- protected internal StopwordAnalyzerBase(LuceneVersion version)
+ protected StopwordAnalyzerBase(LuceneVersion version)
: this(version, null)
{
}
- // LUCENENET TODO: If this works, need to update the documentation for the .NET version of the story.
-
/// <summary>
- /// Creates a CharArraySet from a file resource associated with a class. (See
- /// <see cref="Class#getResourceAsStream(String)"/>).
+ /// Creates a <see cref="CharArraySet"/> from an embedded resource associated with a class. (See
+ /// <see cref="Assembly.GetManifestResourceStream(string)"/>).
/// </summary>
/// <param name="ignoreCase">
- /// <code>true</code> if the set should ignore the case of the
- /// stopwords, otherwise <code>false</code> </param>
+ /// <c>true</c> if the set should ignore the case of the
+ /// stopwords, otherwise <c>false</c> </param>
/// <param name="aClass">
/// a class that is associated with the given stopwordResource </param>
/// <param name="resource">
/// name of the resource file associated with the given class </param>
/// <param name="comment">
/// comment string to ignore in the stopword file </param>
- /// <returns> a CharArraySet containing the distinct stopwords from the given
+ /// <returns> a <see cref="CharArraySet"/> containing the distinct stopwords from the given
/// file </returns>
/// <exception cref="IOException">
/// if loading the stopwords throws an <see cref="IOException"/> </exception>
- protected internal static CharArraySet LoadStopwordSet(bool ignoreCase, Type aClass, string resource, string comment)
+ protected static CharArraySet LoadStopwordSet(bool ignoreCase, Type aClass, string resource, string comment)
{
TextReader reader = null;
try
{
- var resourceNames = aClass.GetTypeInfo().Assembly.GetManifestResourceNames();
+ //var resourceNames = aClass.GetTypeInfo().Assembly.GetManifestResourceNames();
+ // LUCENENET TODO: Maybe it would make more sense to use this overload?
+ //var resourceStream = aClass.GetTypeInfo().Assembly.GetManifestResourceStream(aClass, resource);
var resourceStream = aClass.GetTypeInfo().Assembly.GetManifestResourceStream(resource);
reader = IOUtils.GetDecodingReader(resourceStream, Encoding.UTF8);
return WordlistLoader.GetWordSet(reader, comment, new CharArraySet(
@@ -114,18 +112,18 @@ namespace Lucene.Net.Analysis.Util
}
/// <summary>
- /// Creates a CharArraySet from a file.
+ /// Creates a <see cref="CharArraySet"/> from a file.
/// </summary>
/// <param name="stopwords">
/// the stopwords file to load
/// </param>
/// <param name="matchVersion">
/// the Lucene version for cross version compatibility </param>
- /// <returns> a CharArraySet containing the distinct stopwords from the given
+ /// <returns> a <see cref="CharArraySet"/> containing the distinct stopwords from the given
/// file </returns>
/// <exception cref="IOException">
/// if loading the stopwords throws an <see cref="IOException"/> </exception>
- protected internal static CharArraySet LoadStopwordSet(FileInfo stopwords, LuceneVersion matchVersion)
+ protected static CharArraySet LoadStopwordSet(FileInfo stopwords, LuceneVersion matchVersion)
{
TextReader reader = null;
try
@@ -140,18 +138,18 @@ namespace Lucene.Net.Analysis.Util
}
/// <summary>
- /// Creates a CharArraySet from a file.
+ /// Creates a <see cref="CharArraySet"/> from a file.
/// </summary>
/// <param name="stopwords">
/// the stopwords reader to load
/// </param>
/// <param name="matchVersion">
/// the Lucene version for cross version compatibility </param>
- /// <returns> a CharArraySet containing the distinct stopwords from the given
+ /// <returns> a <see cref="CharArraySet"/> containing the distinct stopwords from the given
/// reader </returns>
/// <exception cref="IOException">
/// if loading the stopwords throws an <see cref="IOException"/> </exception>
- protected internal static CharArraySet LoadStopwordSet(TextReader stopwords, LuceneVersion matchVersion)
+ protected static CharArraySet LoadStopwordSet(TextReader stopwords, LuceneVersion matchVersion)
{
try
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/f934cebe/src/Lucene.Net.Analysis.Common/Analysis/Util/TokenFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/TokenFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/TokenFilterFactory.cs
index b2822d2..beb9b22 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Util/TokenFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/TokenFilterFactory.cs
@@ -26,33 +26,34 @@ namespace Lucene.Net.Analysis.Util
/// </summary>
public abstract class TokenFilterFactory : AbstractAnalysisFactory
{
- private static readonly AnalysisSPILoader<TokenFilterFactory> loader = new AnalysisSPILoader<TokenFilterFactory>(new string[] { "TokenFilterFactory", "FilterFactory" });
+ private static readonly AnalysisSPILoader<TokenFilterFactory> loader =
+ new AnalysisSPILoader<TokenFilterFactory>(new string[] { "TokenFilterFactory", "FilterFactory" });
/// <summary>
- /// looks up a tokenfilter by name from context classpath </summary>
+ /// looks up a tokenfilter by name from the host project's referenced assemblies </summary>
public static TokenFilterFactory ForName(string name, IDictionary<string, string> args)
{
return loader.NewInstance(name, args);
}
/// <summary>
- /// looks up a tokenfilter class by name from context classpath </summary>
+ /// looks up a tokenfilter class by name from the host project's referenced assemblies </summary>
public static Type LookupClass(string name)
{
return loader.LookupClass(name);
}
/// <summary>
- /// returns a list of all available tokenfilter names from context classpath </summary>
+ /// returns a list of all available tokenfilter names from the host project's referenced assemblies </summary>
public static ICollection<string> AvailableTokenFilters
{
get { return loader.AvailableServices; }
}
/// <summary>
- /// Reloads the factory list from the given <see cref="ClassLoader"/>.
+ /// Reloads the factory list.
/// Changes to the factories are visible after the method ends, all
- /// iterators (<see cref="#availableTokenFilters()"/>,...) stay consistent.
+ /// iterators (<see cref="AvailableTokenFilters"/>,...) stay consistent.
///
/// <para><b>NOTE:</b> Only new factories are added, existing ones are
/// never removed or replaced.
@@ -70,13 +71,13 @@ namespace Lucene.Net.Analysis.Util
/// <summary>
/// Initialize this factory via a set of key-value pairs.
/// </summary>
- protected internal TokenFilterFactory(IDictionary<string, string> args)
+ protected TokenFilterFactory(IDictionary<string, string> args)
: base(args)
{
}
/// <summary>
- /// Transform the specified input TokenStream </summary>
+ /// Transform the specified input <see cref="TokenStream"/> </summary>
public abstract TokenStream Create(TokenStream input);
}
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/f934cebe/src/Lucene.Net.Analysis.Common/Analysis/Util/TokenizerFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/TokenizerFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/TokenizerFactory.cs
index 285f090..e2916eb 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Util/TokenizerFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/TokenizerFactory.cs
@@ -1,7 +1,7 @@
-\ufeffusing System;
+\ufeffusing Lucene.Net.Util;
+using System;
using System.Collections.Generic;
using System.IO;
-using Lucene.Net.Util;
namespace Lucene.Net.Analysis.Util
{
@@ -28,34 +28,33 @@ namespace Lucene.Net.Analysis.Util
/// </summary>
public abstract class TokenizerFactory : AbstractAnalysisFactory
{
-
private static readonly AnalysisSPILoader<TokenizerFactory> loader = new AnalysisSPILoader<TokenizerFactory>();
/// <summary>
- /// looks up a tokenizer by name from context classpath </summary>
+ /// looks up a tokenizer by name from the host project's referenced assemblies </summary>
public static TokenizerFactory ForName(string name, IDictionary<string, string> args)
{
return loader.NewInstance(name, args);
}
/// <summary>
- /// looks up a tokenizer class by name from context classpath </summary>
+ /// looks up a tokenizer class by name from the host project's referenced assemblies </summary>
public static Type LookupClass(string name)
{
return loader.LookupClass(name);
}
/// <summary>
- /// returns a list of all available tokenizer names from context classpath </summary>
+ /// returns a list of all available tokenizer names from the host project's referenced assemblies </summary>
public static ICollection<string> AvailableTokenizers
{
get { return loader.AvailableServices; }
}
/// <summary>
- /// Reloads the factory list from the given <see cref="ClassLoader"/>.
+ /// Reloads the factory list.
/// Changes to the factories are visible after the method ends, all
- /// iterators (<see cref="#availableTokenizers()"/>,...) stay consistent.
+ /// iterators (<see cref="AvailableTokenizers"/>,...) stay consistent.
///
/// <para><b>NOTE:</b> Only new factories are added, existing ones are
/// never removed or replaced.
@@ -73,20 +72,20 @@ namespace Lucene.Net.Analysis.Util
/// <summary>
/// Initialize this factory via a set of key-value pairs.
/// </summary>
- protected internal TokenizerFactory(IDictionary<string, string> args)
+ protected TokenizerFactory(IDictionary<string, string> args)
: base(args)
{
}
/// <summary>
- /// Creates a TokenStream of the specified input using the default attribute factory. </summary>
+ /// Creates a <see cref="TokenStream"/> of the specified input using the default attribute factory. </summary>
public Tokenizer Create(TextReader input)
{
return Create(AttributeSource.AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, input);
}
/// <summary>
- /// Creates a TokenStream of the specified input using the given AttributeFactory </summary>
+ /// Creates a <see cref="TokenStream"/> of the specified input using the given <see cref="AttributeSource.AttributeFactory"/> </summary>
public abstract Tokenizer Create(AttributeSource.AttributeFactory factory, TextReader input);
}
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/f934cebe/src/Lucene.Net.Analysis.Common/Analysis/Util/TypeExtensions.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/TypeExtensions.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/TypeExtensions.cs
index 79cc6c7..2c53dc4 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Util/TypeExtensions.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/TypeExtensions.cs
@@ -1,10 +1,12 @@
\ufeffusing System;
-using System.Reflection;
namespace Lucene.Net.Analysis.Util
{
internal static class TypeExtensions
{
+ // LUCENENET TODO: Try to eliminate this class by using Assembly.GetManifestResourceStream(Type, string), if possible.
+ // If not possible, perhaps we should move this and BufferedCharFilter into a Support namespace here in Analysis.Common ?
+
/// <summary>
/// LUCENENET specific:
/// In .NET Core, resources are embedded with the namespace based on
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/f934cebe/src/Lucene.Net.Analysis.Common/Analysis/Util/WordlistLoader.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/WordlistLoader.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/WordlistLoader.cs
index d091f87..5687823 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Util/WordlistLoader.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/WordlistLoader.cs
@@ -26,12 +26,12 @@ namespace Lucene.Net.Analysis.Util
/// <summary>
/// Loader for text files that represent a list of stopwords.
+ /// <para/>
+ /// <see cref="IOUtils"/> to obtain <see cref="TextReader"/> instances.
+ /// @lucene.internal
/// </summary>
- /// <seealso cref= IOUtils to obtain <see cref="Reader"/> instances
- /// @lucene.internal </seealso>
public class WordlistLoader
{
-
private const int INITIAL_CAPACITY = 16;
/// <summary>
@@ -43,12 +43,12 @@ namespace Lucene.Net.Analysis.Util
// LUCENENET TODO: Add .NET overloads that accept a file name? Or at least a FileInfo object as was done in 3.0.3?
/// <summary>
- /// Reads lines from a TextReader and adds every line as an entry to a CharArraySet (omitting
- /// leading and trailing whitespace). Every line of the TextReader should contain only
+ /// Reads lines from a <see cref="TextReader"/> and adds every line as an entry to a <see cref="CharArraySet"/> (omitting
+ /// leading and trailing whitespace). Every line of the <see cref="TextReader"/> should contain only
/// one word. The words need to be in lowercase if you make use of an
- /// Analyzer which uses LowerCaseFilter (like StandardAnalyzer).
+ /// <see cref="Analyzer"/> which uses <see cref="Core.LowerCaseFilter"/> (like <see cref="Standard.StandardAnalyzer"/>).
/// </summary>
- /// <param name="reader"> TextReader containing the wordlist </param>
+ /// <param name="reader"> <see cref="TextReader"/> containing the wordlist </param>
/// <param name="result"> the <see cref="CharArraySet"/> to fill with the readers words </param>
/// <returns> the given <see cref="CharArraySet"/> with the reader's words </returns>
public static CharArraySet GetWordSet(TextReader reader, CharArraySet result)
@@ -70,12 +70,12 @@ namespace Lucene.Net.Analysis.Util
}
/// <summary>
- /// Reads lines from a TextReader and adds every line as an entry to a CharArraySet (omitting
- /// leading and trailing whitespace). Every line of the TextReader should contain only
+ /// Reads lines from a <see cref="TextReader"/> and adds every line as an entry to a <see cref="CharArraySet"/> (omitting
+ /// leading and trailing whitespace). Every line of the <see cref="TextReader"/> should contain only
/// one word. The words need to be in lowercase if you make use of an
- /// Analyzer which uses LowerCaseFilter (like StandardAnalyzer).
+ /// <see cref="Analyzer"/> which uses <see cref="Core.LowerCaseFilter"/> (like <see cref="Standard.StandardAnalyzer"/>).
/// </summary>
- /// <param name="reader"> TextReader containing the wordlist </param>
+ /// <param name="reader"> <see cref="TextReader"/> containing the wordlist </param>
/// <param name="matchVersion"> the <see cref="LuceneVersion"/> </param>
/// <returns> A <see cref="CharArraySet"/> with the reader's words </returns>
public static CharArraySet GetWordSet(TextReader reader, LuceneVersion matchVersion)
@@ -84,12 +84,12 @@ namespace Lucene.Net.Analysis.Util
}
/// <summary>
- /// Reads lines from a TextReader and adds every non-comment line as an entry to a CharArraySet (omitting
- /// leading and trailing whitespace). Every line of the TextReader should contain only
+ /// Reads lines from a <see cref="TextReader"/> and adds every non-comment line as an entry to a <see cref="CharArraySet"/> (omitting
+ /// leading and trailing whitespace). Every line of the <see cref="TextReader"/> should contain only
/// one word. The words need to be in lowercase if you make use of an
- /// Analyzer which uses LowerCaseFilter (like StandardAnalyzer).
+ /// <see cref="Analyzer"/> which uses <see cref="Core.LowerCaseFilter"/> (like <see cref="Standard.StandardAnalyzer"/>).
/// </summary>
- /// <param name="reader"> TextReader containing the wordlist </param>
+ /// <param name="reader"> <see cref="TextReader"/> containing the wordlist </param>
/// <param name="comment"> The string representing a comment. </param>
/// <param name="matchVersion"> the <see cref="LuceneVersion"/> </param>
/// <returns> A CharArraySet with the reader's words </returns>
@@ -99,12 +99,12 @@ namespace Lucene.Net.Analysis.Util
}
/// <summary>
- /// Reads lines from a TextReader and adds every non-comment line as an entry to a CharArraySet (omitting
- /// leading and trailing whitespace). Every line of the TextReader should contain only
+ /// Reads lines from a <see cref="TextReader"/> and adds every non-comment line as an entry to a <see cref="CharArraySet"/> (omitting
+ /// leading and trailing whitespace). Every line of the <see cref="TextReader"/> should contain only
/// one word. The words need to be in lowercase if you make use of an
- /// Analyzer which uses LowerCaseFilter (like StandardAnalyzer).
+ /// <see cref="Analyzer"/> which uses <see cref="Core.LowerCaseFilter"/> (like <see cref="Standard.StandardAnalyzer"/>).
/// </summary>
- /// <param name="reader"> TextReader containing the wordlist </param>
+ /// <param name="reader"> <see cref="TextReader"/> containing the wordlist </param>
/// <param name="comment"> The string representing a comment. </param>
/// <param name="result"> the <see cref="CharArraySet"/> to fill with the readers words </param>
/// <returns> the given <see cref="CharArraySet"/> with the reader's words </returns>
@@ -133,14 +133,14 @@ namespace Lucene.Net.Analysis.Util
/// Reads stopwords from a stopword list in Snowball format.
/// <para>
/// The snowball format is the following:
- /// <ul>
- /// <li>Lines may contain multiple words separated by whitespace.
- /// <li>The comment character is the vertical line (|).
- /// <li>Lines may contain trailing comments.
- /// </ul>
+ /// <list type="bullet">
+ /// <item>Lines may contain multiple words separated by whitespace.</item>
+ /// <item>The comment character is the vertical line (|).</item>
+ /// <item>Lines may contain trailing comments.</item>
+ /// </list>
/// </para>
/// </summary>
- /// <param name="reader"> TextReader containing a Snowball stopword list </param>
+ /// <param name="reader"> <see cref="TextReader"/> containing a Snowball stopword list </param>
/// <param name="result"> the <see cref="CharArraySet"/> to fill with the readers words </param>
/// <returns> the given <see cref="CharArraySet"/> with the reader's words </returns>
public static CharArraySet GetSnowballWordSet(TextReader reader, CharArraySet result)
@@ -176,14 +176,14 @@ namespace Lucene.Net.Analysis.Util
/// Reads stopwords from a stopword list in Snowball format.
/// <para>
/// The snowball format is the following:
- /// <ul>
- /// <li>Lines may contain multiple words separated by whitespace.
- /// <li>The comment character is the vertical line (|).
- /// <li>Lines may contain trailing comments.
- /// </ul>
+ /// <list type="bullet">
+ /// <item>Lines may contain multiple words separated by whitespace.</item>
+ /// <item>The comment character is the vertical line (|).</item>
+ /// <item>Lines may contain trailing comments.</item>
+ /// </list>
/// </para>
/// </summary>
- /// <param name="reader"> TextReader containing a Snowball stopword list </param>
+ /// <param name="reader"> <see cref="TextReader"/> containing a Snowball stopword list </param>
/// <param name="matchVersion"> the Lucene <see cref="LuceneVersion"/> </param>
/// <returns> A <see cref="CharArraySet"/> with the reader's words </returns>
public static CharArraySet GetSnowballWordSet(TextReader reader, LuceneVersion matchVersion)
@@ -194,7 +194,7 @@ namespace Lucene.Net.Analysis.Util
/// <summary>
/// Reads a stem dictionary. Each line contains:
- /// <pre>word<b>\t</b>stem</code>
+ /// <code>word<b>\t</b>stem</code>
/// (i.e. two tab separated words)
/// </summary>
/// <returns> stem dictionary that overrules the stemming algorithm </returns>
@@ -220,7 +220,6 @@ namespace Lucene.Net.Analysis.Util
/// <summary>
/// Accesses a resource by name and returns the (non comment) lines containing
/// data using the given character encoding.
- ///
/// <para>
/// A comment line is any line that starts with the character "#"
/// </para>
[30/39] lucenenet git commit:
Lucene.Net.Analysis.Util.OpenStringBuilder refactor: Removed CharAt() and
SetCharAt() methods because we already have an indexer this[int] that is
.NETified
Posted by ni...@apache.org.
Lucene.Net.Analysis.Util.OpenStringBuilder refactor: Removed CharAt() and SetCharAt() methods because we already have an indexer this[int] that is .NETified
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/eedf7d8c
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/eedf7d8c
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/eedf7d8c
Branch: refs/heads/api-work
Commit: eedf7d8c70d9e9b4367a4eb6deb0a47a9bbe9ad6
Parents: 4e209cd
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Sun Feb 5 01:45:00 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Sun Feb 5 01:45:00 2017 +0700
----------------------------------------------------------------------
.../Analysis/En/KStemmer.cs | 100 +++++++++----------
.../Analysis/Util/OpenStringBuilder.cs | 18 ++--
2 files changed, 60 insertions(+), 58 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/eedf7d8c/src/Lucene.Net.Analysis.Common/Analysis/En/KStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/En/KStemmer.cs b/src/Lucene.Net.Analysis.Common/Analysis/En/KStemmer.cs
index 9173a2a..b861ec3 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/En/KStemmer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/En/KStemmer.cs
@@ -377,7 +377,7 @@ namespace Lucene.Net.Analysis.En
{
char ch;
- ch = word.CharAt(index);
+ ch = word[index];
if ((ch == 'a') || (ch == 'e') || (ch == 'i') || (ch == 'o') || (ch == 'u'))
{
@@ -596,7 +596,7 @@ namespace Lucene.Net.Analysis.En
j = k;
for (int r1 = r, i = 0; i < s.Length; i++, r1++)
{
- if (s[i] != word.CharAt(r1))
+ if (s[i] != word[r1])
{
return false;
}
@@ -612,7 +612,7 @@ namespace Lucene.Net.Analysis.En
return false;
}
// check left to right since the endings have often already matched
- if (word.CharAt(k - 1) == a && word.CharAt(k) == b)
+ if (word[k - 1] == a && word[k] == b)
{
j = k - 2;
return true;
@@ -626,7 +626,7 @@ namespace Lucene.Net.Analysis.En
{
return false;
}
- if (word.CharAt(k - 2) == a && word.CharAt(k - 1) == b && word.CharAt(k) == c)
+ if (word[k - 2] == a && word[k - 1] == b && word[k] == c)
{
j = k - 3;
return true;
@@ -640,7 +640,7 @@ namespace Lucene.Net.Analysis.En
{
return false;
}
- if (word.CharAt(k - 3) == a && word.CharAt(k - 2) == b && word.CharAt(k - 1) == c && word.CharAt(k) == d)
+ if (word[k - 3] == a && word[k - 2] == b && word[k - 1] == c && word[k] == d)
{
j = k - 4;
return true;
@@ -671,7 +671,7 @@ namespace Lucene.Net.Analysis.En
/// <summary>Convert plurals to singular form, and '-ies' to 'y'</summary>
private void Plural()
{
- if (word.CharAt(k) == 's')
+ if (word[k] == 's')
{
if (EndsIn('i', 'e', 's'))
{
@@ -707,7 +707,7 @@ namespace Lucene.Net.Analysis.En
// !((word.CharAt(j) == 's') && (word.CharAt(j-1) == 's'))) return;
// ****
- bool tryE = j > 0 && !((word.CharAt(j) == 's') && (word.CharAt(j - 1) == 's'));
+ bool tryE = j > 0 && !((word[j] == 's') && (word[j - 1] == 's'));
if (tryE && Lookup())
{
return;
@@ -852,7 +852,7 @@ namespace Lucene.Net.Analysis.En
{
return;
}
- word.UnsafeWrite(word.CharAt(k));
+ word.UnsafeWrite(word[k]);
k++;
Lookup();
return;
@@ -862,7 +862,7 @@ namespace Lucene.Net.Analysis.En
/* (this will sometimes screw up with `under-', but we */
/* will take care of that later) */
- if ((word.CharAt(0) == 'u') && (word.CharAt(1) == 'n'))
+ if ((word[0] == 'u') && (word[1] == 'n'))
{
word.UnsafeWrite('e');
word.UnsafeWrite('d');
@@ -892,7 +892,7 @@ namespace Lucene.Net.Analysis.En
return false;
}
- if (word.CharAt(i) != word.CharAt(i - 1))
+ if (word[i] != word[i - 1])
{
return false;
}
@@ -931,7 +931,7 @@ namespace Lucene.Net.Analysis.En
{
/* try adding an `e' to the stem and check against the dictionary */
- word.SetCharAt(j + 1, 'e');
+ word[j + 1] = 'e';
word.Length = j + 2;
k = j + 1;
@@ -962,7 +962,7 @@ namespace Lucene.Net.Analysis.En
{
return;
}
- word.UnsafeWrite(word.CharAt(k)); // restore the doubled consonant
+ word.UnsafeWrite(word[k]); // restore the doubled consonant
/* the default is to leave the consonant doubled */
/* (e.g.,`fingerspelling' -> `fingerspell'). Unfortunately */
@@ -1026,14 +1026,14 @@ namespace Lucene.Net.Analysis.En
{
return;
}
- word.SetCharAt(j + 1, 'i');
+ word[j + 1] = 'i';
word.Append("ty");
k = old_k;
/*
* the -ability and -ibility endings are highly productive, so just accept
* them
*/
- if ((j > 0) && (word.CharAt(j - 1) == 'i') && (word.CharAt(j) == 'l'))
+ if ((j > 0) && (word[j - 1] == 'i') && (word[j] == 'l'))
{
word.Length = j - 1;
word.Append("le"); // convert to -ble
@@ -1043,7 +1043,7 @@ namespace Lucene.Net.Analysis.En
}
/* ditto for -ivity */
- if ((j > 0) && (word.CharAt(j - 1) == 'i') && (word.CharAt(j) == 'v'))
+ if ((j > 0) && (word[j - 1] == 'i') && (word[j] == 'v'))
{
word.Length = j + 1;
word.UnsafeWrite('e'); // convert to -ive
@@ -1052,7 +1052,7 @@ namespace Lucene.Net.Analysis.En
return;
}
/* ditto for -ality */
- if ((j > 0) && (word.CharAt(j - 1) == 'a') && (word.CharAt(j) == 'l'))
+ if ((j > 0) && (word[j - 1] == 'a') && (word[j] == 'l'))
{
word.Length = j + 1;
k = j;
@@ -1088,7 +1088,7 @@ namespace Lucene.Net.Analysis.En
if (EndsIn('n', 'c', 'e'))
{
- word_char = word.CharAt(j);
+ word_char = word[j];
if (!((word_char == 'e') || (word_char == 'a')))
{
return;
@@ -1128,9 +1128,9 @@ namespace Lucene.Net.Analysis.En
*/
word.Length = j + 1;
k = j;
- if (word.CharAt(j) == 'i')
+ if (word[j] == 'i')
{
- word.SetCharAt(j, 'y');
+ word[j] = 'y';
}
Lookup();
}
@@ -1196,7 +1196,7 @@ namespace Lucene.Net.Analysis.En
{
return;
}
- word.UnsafeWrite(word.CharAt(j - 1));
+ word.UnsafeWrite(word[j - 1]);
}
word.Length = j + 1;
@@ -1219,11 +1219,11 @@ namespace Lucene.Net.Analysis.En
{
if (EndsIn('n', 'c', 'y'))
{
- if (!((word.CharAt(j) == 'e') || (word.CharAt(j) == 'a')))
+ if (!((word[j] == 'e') || (word[j] == 'a')))
{
return;
}
- word.SetCharAt(j + 2, 't'); // try converting -ncy to -nt
+ word[j + 2] = 't'; // try converting -ncy to -nt
word.Length = j + 3;
k = j + 2;
@@ -1232,7 +1232,7 @@ namespace Lucene.Net.Analysis.En
return;
}
- word.SetCharAt(j + 2, 'c'); // the default is to convert it to -nce
+ word[j + 2] = 'c'; // the default is to convert it to -nce
word.UnsafeWrite('e');
k = j + 3;
Lookup();
@@ -1248,11 +1248,11 @@ namespace Lucene.Net.Analysis.En
if (EndsIn('b', 'l', 'e'))
{
- if (!((word.CharAt(j) == 'a') || (word.CharAt(j) == 'i')))
+ if (!((word[j] == 'a') || (word[j] == 'i')))
{
return;
}
- word_char = word.CharAt(j);
+ word_char = word[j];
word.Length = j; // try just removing the ending
k = j - 1;
if (Lookup())
@@ -1268,7 +1268,7 @@ namespace Lucene.Net.Analysis.En
return;
}
k++;
- word.UnsafeWrite(word.CharAt(k - 1));
+ word.UnsafeWrite(word[k - 1]);
}
word.Length = j;
word.UnsafeWrite('e'); // try removing -a/ible and adding -e
@@ -1311,7 +1311,7 @@ namespace Lucene.Net.Analysis.En
return;
}
- word.SetCharAt(j + 1, 'y'); // try converting -ic to -y
+ word[j + 1] = 'y'; // try converting -ic to -y
word.Length = j + 2;
k = j + 1;
if (Lookup())
@@ -1319,7 +1319,7 @@ namespace Lucene.Net.Analysis.En
return;
}
- word.SetCharAt(j + 1, 'e'); // try converting -ic to -e
+ word[j + 1] = 'e'; // try converting -ic to -e
if (Lookup())
{
return;
@@ -1492,7 +1492,7 @@ namespace Lucene.Net.Analysis.En
{
int old_k = k;
- if (word.CharAt(k) != 'r') // YCS
+ if (word[k] != 'r') // YCS
{
return;
}
@@ -1513,7 +1513,7 @@ namespace Lucene.Net.Analysis.En
if (EndsIn('e', 'r') || EndsIn('o', 'r'))
{
- word_char = word.CharAt(j + 1);
+ word_char = word[j + 1];
if (DoubleC(j))
{
word.Length = j;
@@ -1522,23 +1522,23 @@ namespace Lucene.Net.Analysis.En
{
return;
}
- word.UnsafeWrite(word.CharAt(j - 1)); // restore the doubled consonant
+ word.UnsafeWrite(word[j - 1]); // restore the doubled consonant
}
- if (word.CharAt(j) == 'i') // do we have a -ier ending?
+ if (word[j] == 'i') // do we have a -ier ending?
{
- word.SetCharAt(j, 'y');
+ word[j] = 'y';
word.Length = j + 1;
k = j;
if (Lookup()) // yes, so check against the dictionary
{
return;
}
- word.SetCharAt(j, 'i'); // restore the endings
+ word[j] = 'i'; // restore the endings
word.UnsafeWrite('e');
}
- if (word.CharAt(j) == 'e') // handle -eer
+ if (word[j] == 'e') // handle -eer
{
word.Length = j;
k = j - 1;
@@ -1589,13 +1589,13 @@ namespace Lucene.Net.Analysis.En
if (EndsIn('l', 'y'))
{
- word.SetCharAt(j + 2, 'e'); // try converting -ly to -le
+ word[j + 2] = 'e'; // try converting -ly to -le
if (Lookup())
{
return;
}
- word.SetCharAt(j + 2, 'y');
+ word[j + 2] = 'y';
word.Length = j + 1; // try just removing the -ly
k = j;
@@ -1605,7 +1605,7 @@ namespace Lucene.Net.Analysis.En
return;
}
- if ((j > 0) && (word.CharAt(j - 1) == 'a') && (word.CharAt(j) == 'l')) /*
+ if ((j > 0) && (word[j - 1] == 'a') && (word[j] == 'l')) /*
* always
* convert
* -
@@ -1620,7 +1620,7 @@ namespace Lucene.Net.Analysis.En
word.Append("ly");
k = old_k;
- if ((j > 0) && (word.CharAt(j - 1) == 'a') && (word.CharAt(j) == 'b'))
+ if ((j > 0) && (word[j - 1] == 'a') && (word[j] == 'b'))
{
/*
* always
@@ -1631,12 +1631,12 @@ namespace Lucene.Net.Analysis.En
* -
* able
*/
- word.SetCharAt(j + 2, 'e');
+ word[j + 2] = 'e';
k = j + 2;
return;
}
- if (word.CharAt(j) == 'i') // e.g., militarily -> military
+ if (word[j] == 'i') // e.g., militarily -> military
{
word.Length = j;
word.UnsafeWrite('y');
@@ -1687,7 +1687,7 @@ namespace Lucene.Net.Analysis.En
{
return;
}
- word.UnsafeWrite(word.CharAt(j - 1));
+ word.UnsafeWrite(word[j - 1]);
}
word.Length = j + 1;
@@ -1711,7 +1711,7 @@ namespace Lucene.Net.Analysis.En
word.Append("al"); // restore the ending to the way it was
k = old_k;
- if ((j > 0) && (word.CharAt(j - 1) == 'i') && (word.CharAt(j) == 'c'))
+ if ((j > 0) && (word[j - 1] == 'i') && (word[j] == 'c'))
{
word.Length = j - 1; // try removing -ical
k = j - 2;
@@ -1738,7 +1738,7 @@ namespace Lucene.Net.Analysis.En
return;
}
- if (word.CharAt(j) == 'i') // sometimes -ial endings should be removed
+ if (word[j] == 'i') // sometimes -ial endings should be removed
{
word.Length = j; // (sometimes it gets turned into -y, but we
k = j - 1; // aren't dealing with that case for now)
@@ -1780,9 +1780,9 @@ namespace Lucene.Net.Analysis.En
}
word.Length = j + 1;
word.Append("ive");
- if ((j > 0) && (word.CharAt(j - 1) == 'a') && (word.CharAt(j) == 't'))
+ if ((j > 0) && (word[j - 1] == 'a') && (word[j] == 't'))
{
- word.SetCharAt(j - 1, 'e'); // try removing -ative and adding -e
+ word[j - 1] = 'e'; // try removing -ative and adding -e
word.Length = j; // (e.g., determinative -> determine)
k = j - 1;
if (Lookup())
@@ -1800,15 +1800,15 @@ namespace Lucene.Net.Analysis.En
}
/* try mapping -ive to -ion (e.g., injunctive/injunction) */
- word.SetCharAt(j + 2, 'o');
- word.SetCharAt(j + 3, 'n');
+ word[j + 2] = 'o';
+ word[j + 3] = 'n';
if (Lookup())
{
return;
}
- word.SetCharAt(j + 2, 'v'); // restore the original values
- word.SetCharAt(j + 3, 'e');
+ word[j + 2] = 'v'; // restore the original values
+ word[j + 3] = 'e';
k = old_k;
// nolookup()
}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/eedf7d8c/src/Lucene.Net.Analysis.Common/Analysis/Util/OpenStringBuilder.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/OpenStringBuilder.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/OpenStringBuilder.cs
index b930b3f..74035ea 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Util/OpenStringBuilder.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/OpenStringBuilder.cs
@@ -102,15 +102,17 @@ namespace Lucene.Net.Analysis.Util
return this;
}
- public virtual char CharAt(int index) // LUCENENET TODO: Remove (replaced with this[])
- {
- return m_buf[index];
- }
+ // LUCENENET specific - removed (replaced with this[])
+ //public virtual char CharAt(int index)
+ //{
+ // return m_buf[index];
+ //}
- public virtual void SetCharAt(int index, char ch) // LUCENENET TODO: Remove (replaced with this[])
- {
- m_buf[index] = ch;
- }
+ // LUCENENET specific - removed (replaced with this[])
+ //public virtual void SetCharAt(int index, char ch)
+ //{
+ // m_buf[index] = ch;
+ //}
// LUCENENET specific - added to .NETify
public virtual char this[int index]