You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by ni...@apache.org on 2017/02/03 17:51:07 UTC
[01/11] lucenenet git commit: Lucene.Net.Analysis.Id refactor: member
accessibility and documentation comments
Repository: lucenenet
Updated Branches:
refs/heads/api-work a4828b1ad -> 30f14be68
Lucene.Net.Analysis.Id refactor: member accessibility and documentation comments
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/3eada003
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/3eada003
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/3eada003
Branch: refs/heads/api-work
Commit: 3eada0038bbf24ddfea303df4836b88b1f86592c
Parents: a4828b1
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Fri Feb 3 17:31:51 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Fri Feb 3 17:31:51 2017 +0700
----------------------------------------------------------------------
.../Analysis/Id/IndonesianAnalyzer.cs | 14 +++----
.../Analysis/Id/IndonesianStemFilter.cs | 8 ++--
.../Analysis/Id/IndonesianStemFilterFactory.cs | 4 +-
.../Analysis/Id/IndonesianStemmer.cs | 44 ++++++++++----------
4 files changed, 35 insertions(+), 35 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/3eada003/src/Lucene.Net.Analysis.Common/Analysis/Id/IndonesianAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Id/IndonesianAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Id/IndonesianAnalyzer.cs
index 44d6c11..74e1818 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Id/IndonesianAnalyzer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Id/IndonesianAnalyzer.cs
@@ -26,7 +26,7 @@ namespace Lucene.Net.Analysis.Id
*/
/// <summary>
- /// Analyzer for Indonesian (Bahasa)
+ /// <see cref="Analyzer"/> for Indonesian (Bahasa)
/// </summary>
public sealed class IndonesianAnalyzer : StopwordAnalyzerBase
{
@@ -46,7 +46,7 @@ namespace Lucene.Net.Analysis.Id
}
/// <summary>
- /// Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class
+ /// Atomically loads the <see cref="DEFAULT_STOP_SET"/> in a lazy fashion once the outer class
/// accesses the static final set the first time.;
/// </summary>
private class DefaultSetHolder
@@ -72,10 +72,10 @@ namespace Lucene.Net.Analysis.Id
private readonly CharArraySet stemExclusionSet;
/// <summary>
- /// Builds an analyzer with the default stop words: <see cref="#DEFAULT_STOPWORD_FILE"/>.
+ /// Builds an analyzer with the default stop words: <see cref="DEFAULT_STOPWORD_FILE"/>.
/// </summary>
public IndonesianAnalyzer(LuceneVersion matchVersion)
- : this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET)
+ : this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET)
{
}
@@ -87,7 +87,7 @@ namespace Lucene.Net.Analysis.Id
/// <param name="stopwords">
/// a stopword set </param>
public IndonesianAnalyzer(LuceneVersion matchVersion, CharArraySet stopwords)
- : this(matchVersion, stopwords, CharArraySet.EMPTY_SET)
+ : this(matchVersion, stopwords, CharArraySet.EMPTY_SET)
{
}
@@ -103,7 +103,7 @@ namespace Lucene.Net.Analysis.Id
/// <param name="stemExclusionSet">
/// a set of terms not to be stemmed </param>
public IndonesianAnalyzer(LuceneVersion matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet)
- : base(matchVersion, stopwords)
+ : base(matchVersion, stopwords)
{
this.stemExclusionSet = CharArraySet.UnmodifiableSet(CharArraySet.Copy(matchVersion, stemExclusionSet));
}
@@ -111,7 +111,7 @@ namespace Lucene.Net.Analysis.Id
/// <summary>
/// Creates
/// <see cref="Analyzer.TokenStreamComponents"/>
- /// used to tokenize all the text in the provided <see cref="Reader"/>.
+ /// used to tokenize all the text in the provided <see cref="TextReader"/>.
/// </summary>
/// <returns> <see cref="Analyzer.TokenStreamComponents"/>
/// built from an <see cref="StandardTokenizer"/> filtered with
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/3eada003/src/Lucene.Net.Analysis.Common/Analysis/Id/IndonesianStemFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Id/IndonesianStemFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Id/IndonesianStemFilter.cs
index a2ac74d..89616c3 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Id/IndonesianStemFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Id/IndonesianStemFilter.cs
@@ -31,7 +31,7 @@ namespace Lucene.Net.Analysis.Id
private readonly bool stemDerivational;
/// <summary>
- /// Calls <see cref="#IndonesianStemFilter(TokenStream, boolean) IndonesianStemFilter(input, true)"/>
+ /// Calls <see cref="IndonesianStemFilter(TokenStream, bool)">IndonesianStemFilter(input, true)</see>
/// </summary>
public IndonesianStemFilter(TokenStream input)
: this(input, true)
@@ -39,9 +39,9 @@ namespace Lucene.Net.Analysis.Id
}
/// <summary>
- /// Create a new IndonesianStemFilter.
+ /// Create a new <see cref="IndonesianStemFilter"/>.
/// <para>
- /// If <code>stemDerivational</code> is false,
+ /// If <paramref name="stemDerivational"/> is false,
/// only inflectional suffixes (particles and possessive pronouns) are stemmed.
/// </para>
/// </summary>
@@ -59,7 +59,7 @@ namespace Lucene.Net.Analysis.Id
{
if (!keywordAtt.IsKeyword)
{
- int newlen = stemmer.stem(termAtt.Buffer, termAtt.Length, stemDerivational);
+ int newlen = stemmer.Stem(termAtt.Buffer, termAtt.Length, stemDerivational);
termAtt.Length = newlen;
}
return true;
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/3eada003/src/Lucene.Net.Analysis.Common/Analysis/Id/IndonesianStemFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Id/IndonesianStemFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Id/IndonesianStemFilterFactory.cs
index 2944496..8f152fd 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Id/IndonesianStemFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Id/IndonesianStemFilterFactory.cs
@@ -37,9 +37,9 @@ namespace Lucene.Net.Analysis.Id
private readonly bool stemDerivational;
/// <summary>
- /// Creates a new IndonesianStemFilterFactory </summary>
+ /// Creates a new <see cref="IndonesianStemFilterFactory"/> </summary>
public IndonesianStemFilterFactory(IDictionary<string, string> args)
- : base(args)
+ : base(args)
{
stemDerivational = GetBoolean(args, "stemDerivational", true);
if (args.Count > 0)
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/3eada003/src/Lucene.Net.Analysis.Common/Analysis/Id/IndonesianStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Id/IndonesianStemmer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Id/IndonesianStemmer.cs
index 5c2e9ae..3ddc0b3 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Id/IndonesianStemmer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Id/IndonesianStemmer.cs
@@ -24,8 +24,8 @@ namespace Lucene.Net.Analysis.Id
/// Stemmer for Indonesian.
/// <para>
/// Stems Indonesian words with the algorithm presented in:
- /// <i>A Study of Stemming Effects on Information Retrieval in
- /// Bahasa Indonesia</i>, Fadillah Z Tala.
+ /// <c>A Study of Stemming Effects on Information Retrieval in
+ /// Bahasa Indonesia</c>, Fadillah Z Tala.
/// http://www.illc.uva.nl/Publications/ResearchReports/MoL-2003-02.text.pdf
/// </para>
/// </summary>
@@ -44,17 +44,17 @@ namespace Lucene.Net.Analysis.Id
/// <summary>
/// Stem a term (returning its new length).
/// <para>
- /// Use <code>stemDerivational</code> to control whether full stemming
+ /// Use <paramref name="stemDerivational"/> to control whether full stemming
/// or only light inflectional stemming is done.
/// </para>
/// </summary>
- public virtual int stem(char[] text, int length, bool stemDerivational)
+ public virtual int Stem(char[] text, int length, bool stemDerivational)
{
flags = 0;
numSyllables = 0;
for (int i = 0; i < length; i++)
{
- if (isVowel(text[i]))
+ if (IsVowel(text[i]))
{
numSyllables++;
}
@@ -62,11 +62,11 @@ namespace Lucene.Net.Analysis.Id
if (numSyllables > 2)
{
- length = removeParticle(text, length);
+ length = RemoveParticle(text, length);
}
if (numSyllables > 2)
{
- length = removePossessivePronoun(text, length);
+ length = RemovePossessivePronoun(text, length);
}
if (stemDerivational)
@@ -81,20 +81,20 @@ namespace Lucene.Net.Analysis.Id
int oldLength = length;
if (numSyllables > 2)
{
- length = removeFirstOrderPrefix(text, length);
+ length = RemoveFirstOrderPrefix(text, length);
}
if (oldLength != length) // a rule is fired
{
oldLength = length;
if (numSyllables > 2)
{
- length = removeSuffix(text, length);
+ length = RemoveSuffix(text, length);
}
if (oldLength != length) // a rule is fired
{
if (numSyllables > 2)
{
- length = removeSecondOrderPrefix(text, length);
+ length = RemoveSecondOrderPrefix(text, length);
}
}
} // fail
@@ -102,17 +102,17 @@ namespace Lucene.Net.Analysis.Id
{
if (numSyllables > 2)
{
- length = removeSecondOrderPrefix(text, length);
+ length = RemoveSecondOrderPrefix(text, length);
}
if (numSyllables > 2)
{
- length = removeSuffix(text, length);
+ length = RemoveSuffix(text, length);
}
}
return length;
}
- private bool isVowel(char ch)
+ private bool IsVowel(char ch)
{
switch (ch)
{
@@ -127,7 +127,7 @@ namespace Lucene.Net.Analysis.Id
}
}
- private int removeParticle(char[] text, int length)
+ private int RemoveParticle(char[] text, int length)
{
if (StemmerUtil.EndsWith(text, length, "kah") || StemmerUtil.EndsWith(text, length, "lah") || StemmerUtil.EndsWith(text, length, "pun"))
{
@@ -138,7 +138,7 @@ namespace Lucene.Net.Analysis.Id
return length;
}
- private int removePossessivePronoun(char[] text, int length)
+ private int RemovePossessivePronoun(char[] text, int length)
{
if (StemmerUtil.EndsWith(text, length, "ku") || StemmerUtil.EndsWith(text, length, "mu"))
{
@@ -155,7 +155,7 @@ namespace Lucene.Net.Analysis.Id
return length;
}
- private int removeFirstOrderPrefix(char[] text, int length)
+ private int RemoveFirstOrderPrefix(char[] text, int length)
{
if (StemmerUtil.StartsWith(text, length, "meng"))
{
@@ -164,7 +164,7 @@ namespace Lucene.Net.Analysis.Id
return StemmerUtil.DeleteN(text, 0, length, 4);
}
- if (StemmerUtil.StartsWith(text, length, "meny") && length > 4 && isVowel(text[4]))
+ if (StemmerUtil.StartsWith(text, length, "meny") && length > 4 && IsVowel(text[4]))
{
flags |= REMOVED_MENG;
text[3] = 's';
@@ -200,7 +200,7 @@ namespace Lucene.Net.Analysis.Id
return StemmerUtil.DeleteN(text, 0, length, 4);
}
- if (StemmerUtil.StartsWith(text, length, "peny") && length > 4 && isVowel(text[4]))
+ if (StemmerUtil.StartsWith(text, length, "peny") && length > 4 && IsVowel(text[4]))
{
flags |= REMOVED_PENG;
text[3] = 's';
@@ -215,7 +215,7 @@ namespace Lucene.Net.Analysis.Id
return StemmerUtil.DeleteN(text, 0, length, 4);
}
- if (StemmerUtil.StartsWith(text, length, "pen") && length > 3 && isVowel(text[3]))
+ if (StemmerUtil.StartsWith(text, length, "pen") && length > 3 && IsVowel(text[3]))
{
flags |= REMOVED_PENG;
text[2] = 't';
@@ -261,7 +261,7 @@ namespace Lucene.Net.Analysis.Id
return length;
}
- private int removeSecondOrderPrefix(char[] text, int length)
+ private int RemoveSecondOrderPrefix(char[] text, int length)
{
if (StemmerUtil.StartsWith(text, length, "ber"))
{
@@ -277,7 +277,7 @@ namespace Lucene.Net.Analysis.Id
return StemmerUtil.DeleteN(text, 0, length, 3);
}
- if (StemmerUtil.StartsWith(text, length, "be") && length > 4 && !isVowel(text[2]) && text[3] == 'e' && text[4] == 'r')
+ if (StemmerUtil.StartsWith(text, length, "be") && length > 4 && !IsVowel(text[2]) && text[3] == 'e' && text[4] == 'r')
{
flags |= REMOVED_BER;
numSyllables--;
@@ -306,7 +306,7 @@ namespace Lucene.Net.Analysis.Id
return length;
}
- private int removeSuffix(char[] text, int length)
+ private int RemoveSuffix(char[] text, int length)
{
if (StemmerUtil.EndsWith(text, length, "kan") && (flags & REMOVED_KE) == 0 && (flags & REMOVED_PENG) == 0 && (flags & REMOVED_PE) == 0)
{
[11/11] lucenenet git commit: Lucene.Net.Analysis.Path refactor:
member accessibility and documentation comments
Posted by ni...@apache.org.
Lucene.Net.Analysis.Path refactor: member accessibility and documentation comments
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/30f14be6
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/30f14be6
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/30f14be6
Branch: refs/heads/api-work
Commit: 30f14be685b2a62194a891c6ff8209efe62eed10
Parents: 100a8c5
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Sat Feb 4 00:37:31 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Sat Feb 4 00:45:32 2017 +0700
----------------------------------------------------------------------
.../Analysis/Path/PathHierarchyTokenizer.cs | 7 +++---
.../Path/PathHierarchyTokenizerFactory.cs | 24 ++++++++++----------
.../Path/ReversePathHierarchyTokenizer.cs | 7 +++---
3 files changed, 18 insertions(+), 20 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/30f14be6/src/Lucene.Net.Analysis.Common/Analysis/Path/PathHierarchyTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Path/PathHierarchyTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Path/PathHierarchyTokenizer.cs
index b2dc6cf..abab79f 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Path/PathHierarchyTokenizer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Path/PathHierarchyTokenizer.cs
@@ -26,13 +26,13 @@ namespace Lucene.Net.Analysis.Path
/// <para>
/// Take something like:
///
- /// <pre>
+ /// <code>
/// /something/something/else
/// </code>
///
/// and make:
///
- /// <pre>
+ /// <code>
/// /something
/// /something/something
/// /something/something/else
@@ -41,7 +41,6 @@ namespace Lucene.Net.Analysis.Path
/// </summary>
public class PathHierarchyTokenizer : Tokenizer
{
-
public PathHierarchyTokenizer(TextReader input)
: this(input, DEFAULT_BUFFER_SIZE, DEFAULT_DELIMITER, DEFAULT_DELIMITER, DEFAULT_SKIP)
{
@@ -222,7 +221,7 @@ namespace Lucene.Net.Analysis.Path
return true;
}
- public override void End()
+ public override sealed void End()
{
base.End();
// set final offset
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/30f14be6/src/Lucene.Net.Analysis.Common/Analysis/Path/PathHierarchyTokenizerFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Path/PathHierarchyTokenizerFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Path/PathHierarchyTokenizerFactory.cs
index c25239b..2a5251e 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Path/PathHierarchyTokenizerFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Path/PathHierarchyTokenizerFactory.cs
@@ -26,15 +26,15 @@ namespace Lucene.Net.Analysis.Path
/// Factory for <see cref="PathHierarchyTokenizer"/>.
/// <para>
/// This factory is typically configured for use only in the <code>index</code>
- /// Analyzer (or only in the <code>query</code> Analyzer, but never both).
+ /// Analyzer (or only in the <c>query</c> Analyzer, but never both).
/// </para>
/// <para>
/// For example, in the configuration below a query for
- /// <code>Books/NonFic</code> will match documents indexed with values like
- /// <code>Books/NonFic</code>, <code>Books/NonFic/Law</code>,
- /// <code>Books/NonFic/Science/Physics</code>, etc. But it will not match
- /// documents indexed with values like <code>Books</code>, or
- /// <code>Books/Fic</code>...
+ /// <c>Books/NonFic</c> will match documents indexed with values like
+ /// <c>Books/NonFic</c>, <c>Books/NonFic/Law</c>,
+ /// <c>Books/NonFic/Science/Physics</c>, etc. But it will not match
+ /// documents indexed with values like <c>Books</c>, or
+ /// <c>Books/Fic</c>...
/// </para>
///
/// <code>
@@ -49,11 +49,11 @@ namespace Lucene.Net.Analysis.Path
/// </code>
/// <para>
/// In this example however we see the oposite configuration, so that a query
- /// for <code>Books/NonFic/Science/Physics</code> would match documents
- /// containing <code>Books/NonFic</code>, <code>Books/NonFic/Science</code>,
- /// or <code>Books/NonFic/Science/Physics</code>, but not
- /// <code>Books/NonFic/Science/Physics/Theory</code> or
- /// <code>Books/NonFic/Law</code>.
+ /// for <c>Books/NonFic/Science/Physics</c> would match documents
+ /// containing <c>Books/NonFic</c>, <c>Books/NonFic/Science</c>,
+ /// or <c>Books/NonFic/Science/Physics</c>, but not
+ /// <c>Books/NonFic/Science/Physics/Theory</c> or
+ /// <c>Books/NonFic/Law</c>.
/// </para>
/// <code>
/// <fieldType name="descendent_path" class="solr.TextField">
@@ -74,7 +74,7 @@ namespace Lucene.Net.Analysis.Path
private readonly int skip;
/// <summary>
- /// Creates a new PathHierarchyTokenizerFactory </summary>
+ /// Creates a new <see cref="PathHierarchyTokenizerFactory"/> </summary>
public PathHierarchyTokenizerFactory(IDictionary<string, string> args)
: base(args)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/30f14be6/src/Lucene.Net.Analysis.Common/Analysis/Path/ReversePathHierarchyTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Path/ReversePathHierarchyTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Path/ReversePathHierarchyTokenizer.cs
index 8def15d..09daf2b 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Path/ReversePathHierarchyTokenizer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Path/ReversePathHierarchyTokenizer.cs
@@ -27,13 +27,13 @@ namespace Lucene.Net.Analysis.Path
/// <para>
/// Take something like:
///
- /// <pre>
+ /// <code>
/// www.site.co.uk
/// </code>
///
/// and make:
///
- /// <pre>
+ /// <code>
/// www.site.co.uk
/// site.co.uk
/// co.uk
@@ -44,7 +44,6 @@ namespace Lucene.Net.Analysis.Path
/// </summary>
public class ReversePathHierarchyTokenizer : Tokenizer
{
-
public ReversePathHierarchyTokenizer(TextReader input)
: this(input, DEFAULT_BUFFER_SIZE, DEFAULT_DELIMITER, DEFAULT_DELIMITER, DEFAULT_SKIP)
{
@@ -198,7 +197,7 @@ namespace Lucene.Net.Analysis.Path
return false;
}
- public override void End()
+ public override sealed void End()
{
base.End();
// set final offset
[05/11] lucenenet git commit: Lucene.Net.Analysis.Miscellaneous
refactor: member accessibility and documentation comments
Posted by ni...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/46b02fbc/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ScandinavianFoldingFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ScandinavianFoldingFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ScandinavianFoldingFilter.cs
index 51b115a..53cebfe 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ScandinavianFoldingFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ScandinavianFoldingFilter.cs
@@ -23,30 +23,30 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// <summary>
/// This filter folds Scandinavian characters ������->a and ����->o.
/// It also discriminate against use of double vowels aa, ae, ao, oe and oo, leaving just the first one.
- /// <p/>
+ /// <para/>
/// It's is a semantically more destructive solution than <see cref="ScandinavianNormalizationFilter"/> but
/// can in addition help with matching raksmorgas as r�ksm�rg�s.
- /// <p/>
+ /// <para/>
/// bl�b�rsyltet�j == bl�b�rsyltet�j == blaabaarsyltetoej == blabarsyltetoj
/// r�ksm�rg�s == r�ksm�rg�s == r�ksm�rgaos == raeksmoergaas == raksmorgas
- /// <p/>
+ /// <para/>
/// Background:
/// Swedish ��� are in fact the same letters as Norwegian and Danish ��� and thus interchangeable
/// when used between these languages. They are however folded differently when people type
/// them on a keyboard lacking these characters.
- /// <p/>
+ /// <para/>
/// In that situation almost all Swedish people use a, a, o instead of �, �, �.
- /// <p/>
+ /// <para/>
/// Norwegians and Danes on the other hand usually type aa, ae and oe instead of �, � and �.
/// Some do however use a, a, o, oo, ao and sometimes permutations of everything above.
- /// <p/>
+ /// <para/>
/// This filter solves that mismatch problem, but might also cause new.
- /// <p/> </summary>
- /// <seealso cref= ScandinavianNormalizationFilter </seealso>
+ /// </summary>
+ /// <seealso cref="ScandinavianNormalizationFilter"/>
public sealed class ScandinavianFoldingFilter : TokenFilter
{
-
- public ScandinavianFoldingFilter(TokenStream input) : base(input)
+ public ScandinavianFoldingFilter(TokenStream input)
+ : base(input)
{
charTermAttribute = AddAttribute<ICharTermAttribute>();
}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/46b02fbc/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ScandinavianFoldingFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ScandinavianFoldingFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ScandinavianFoldingFilterFactory.cs
index c6930b2..97e60a7 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ScandinavianFoldingFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ScandinavianFoldingFilterFactory.cs
@@ -32,8 +32,10 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// </summary>
public class ScandinavianFoldingFilterFactory : TokenFilterFactory
{
-
- public ScandinavianFoldingFilterFactory(IDictionary<string, string> args) : base(args)
+ /// <summary>
+ /// Creates a new <see cref="ScandinavianFoldingFilterFactory"/> </summary>
+ public ScandinavianFoldingFilterFactory(IDictionary<string, string> args)
+ : base(args)
{
if (args.Count > 0)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/46b02fbc/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ScandinavianNormalizationFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ScandinavianNormalizationFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ScandinavianNormalizationFilter.cs
index 5ad937b..9a75541 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ScandinavianNormalizationFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ScandinavianNormalizationFilter.cs
@@ -23,15 +23,15 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// <summary>
/// This filter normalize use of the interchangeable Scandinavian characters ��������
/// and folded variants (aa, ao, ae, oe and oo) by transforming them to ������.
- /// <p/>
+ /// <para/>
/// It's a semantically less destructive solution than <see cref="ScandinavianFoldingFilter"/>,
/// most useful when a person with a Norwegian or Danish keyboard queries a Swedish index
/// and vice versa. This filter does <b>not</b> the common Swedish folds of � and � to a nor � to o.
- /// <p/>
+ /// <para/>
/// bl�b�rsyltet�j == bl�b�rsyltet�j == blaabaarsyltetoej but not blabarsyltetoj
/// r�ksm�rg�s == r�ksm�rg�s == r�ksm�rgaos == raeksmoergaas but not raksmorgas
- /// <p/> </summary>
- /// <seealso cref= ScandinavianFoldingFilter </seealso>
+ /// </summary>
+ /// <seealso cref="ScandinavianFoldingFilter"/>
public sealed class ScandinavianNormalizationFilter : TokenFilter
{
public ScandinavianNormalizationFilter(TokenStream input)
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/46b02fbc/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ScandinavianNormalizationFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ScandinavianNormalizationFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ScandinavianNormalizationFilterFactory.cs
index e5a5832..1bc9dae 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ScandinavianNormalizationFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ScandinavianNormalizationFilterFactory.cs
@@ -21,7 +21,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
*/
/// <summary>
- /// Factory for <see cref="org.apache.lucene.analysis.miscellaneous.ScandinavianNormalizationFilter"/>.
+ /// Factory for <see cref="ScandinavianNormalizationFilter"/>.
/// <code>
/// <fieldType name="text_scandnorm" class="solr.TextField" positionIncrementGap="100">
/// <analyzer>
@@ -32,7 +32,8 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// </summary>
public class ScandinavianNormalizationFilterFactory : TokenFilterFactory
{
-
+ /// <summary>
+ /// Creates a new <see cref="ScandinavianNormalizationFilterFactory"/> </summary>
public ScandinavianNormalizationFilterFactory(IDictionary<string, string> args)
: base(args)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/46b02fbc/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/SetKeywordMarkerFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/SetKeywordMarkerFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/SetKeywordMarkerFilter.cs
index f4adbfe..769de5e 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/SetKeywordMarkerFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/SetKeywordMarkerFilter.cs
@@ -23,7 +23,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// <summary>
/// Marks terms as keywords via the <see cref="KeywordAttribute"/>. Each token
/// contained in the provided set is marked as a keyword by setting
- /// <see cref="KeywordAttribute#setKeyword(boolean)"/> to <code>true</code>.
+ /// <see cref="KeywordAttribute.IsKeyword"/> to <c>true</c>.
/// </summary>
public sealed class SetKeywordMarkerFilter : KeywordMarkerFilter
{
@@ -31,12 +31,12 @@ namespace Lucene.Net.Analysis.Miscellaneous
private readonly CharArraySet keywordSet;
/// <summary>
- /// Create a new KeywordSetMarkerFilter, that marks the current token as a
+ /// Create a new <see cref="SetKeywordMarkerFilter"/>, that marks the current token as a
/// keyword if the tokens term buffer is contained in the given set via the
/// <see cref="KeywordAttribute"/>.
/// </summary>
/// <param name="in">
- /// TokenStream to filter </param>
+ /// <see cref="TokenStream"/> to filter </param>
/// <param name="keywordSet">
/// the keywords set to lookup the current termbuffer </param>
public SetKeywordMarkerFilter(TokenStream @in, CharArraySet keywordSet)
@@ -46,7 +46,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
termAtt = AddAttribute<ICharTermAttribute>();
}
- protected internal override bool Keyword
+ protected override bool IsKeyword
{
get
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/46b02fbc/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/SingleTokenTokenStream.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/SingleTokenTokenStream.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/SingleTokenTokenStream.cs
index f2c00ce..cf8b0b9 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/SingleTokenTokenStream.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/SingleTokenTokenStream.cs
@@ -31,9 +31,9 @@ namespace Lucene.Net.Analysis.Miscellaneous
private Token singleToken;
private readonly ICharTermAttribute tokenAtt;
- public SingleTokenTokenStream(Token token) : base(Token.TOKEN_ATTRIBUTE_FACTORY)
+ public SingleTokenTokenStream(Token token)
+ : base(Token.TOKEN_ATTRIBUTE_FACTORY)
{
-
Debug.Assert(token != null);
this.singleToken = (Token)token.Clone();
@@ -41,7 +41,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
Debug.Assert(tokenAtt is Token);
}
- public override bool IncrementToken()
+ public override sealed bool IncrementToken()
{
if (exhausted)
{
@@ -61,7 +61,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
exhausted = false;
}
- public Token Token
+ public Token Token // LUCENENET TODO: Change to GetToken() and SetToken() (conversion)
{
get { return (Token)singleToken.Clone(); }
set { this.singleToken = (Token)value.Clone(); }
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/46b02fbc/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/StemmerOverrideFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/StemmerOverrideFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/StemmerOverrideFilter.cs
index 0e09209..d0f6618 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/StemmerOverrideFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/StemmerOverrideFilter.cs
@@ -1,9 +1,8 @@
-\ufeffusing System.Collections.Generic;
-using System.IO;
-using Lucene.Net.Analysis.TokenAttributes;
+\ufeffusing Lucene.Net.Analysis.TokenAttributes;
using Lucene.Net.Support;
using Lucene.Net.Util;
using Lucene.Net.Util.Fst;
+using System.Collections.Generic;
namespace Lucene.Net.Analysis.Miscellaneous
{
@@ -39,8 +38,8 @@ namespace Lucene.Net.Analysis.Miscellaneous
private readonly CharsRef spare = new CharsRef();
/// <summary>
- /// Create a new StemmerOverrideFilter, performing dictionary-based stemming
- /// with the provided <code>dictionary</code>.
+ /// Create a new <see cref="StemmerOverrideFilter"/>, performing dictionary-based stemming
+ /// with the provided dictionary (<paramref name="stemmerOverrideMap"/>).
/// <para>
/// Any dictionary-stemmed terms will be marked with <see cref="KeywordAttribute"/>
/// so that they will not be stemmed with stemmers down the chain.
@@ -94,8 +93,8 @@ namespace Lucene.Net.Analysis.Miscellaneous
// TODO maybe we can generalize this and reuse this map somehow?
public sealed class StemmerOverrideMap
{
- internal readonly FST<BytesRef> fst;
- internal readonly bool ignoreCase;
+ private readonly FST<BytesRef> fst;
+ private readonly bool ignoreCase;
/// <summary>
/// Creates a new <see cref="StemmerOverrideMap"/> </summary>
@@ -108,9 +107,9 @@ namespace Lucene.Net.Analysis.Miscellaneous
}
/// <summary>
- /// Returns a <see cref="BytesReader"/> to pass to the <see cref="#get(char[], int, FST.Arc, FST.BytesReader)"/> method.
+ /// Returns a <see cref="FST.BytesReader"/> to pass to the <see cref="Get(char[], int, FST.Arc{BytesRef}, FST.BytesReader)"/> method.
/// </summary>
- public FST.BytesReader BytesReader
+ public FST.BytesReader BytesReader // LUCENENET TODO: Change to GetBytesReader()
{
get
{
@@ -150,24 +149,24 @@ namespace Lucene.Net.Analysis.Miscellaneous
}
return matchOutput;
}
-
}
+
/// <summary>
/// This builder builds an <see cref="FST"/> for the <see cref="StemmerOverrideFilter"/>
/// </summary>
public class Builder
{
- internal readonly BytesRefHash hash = new BytesRefHash();
- internal readonly BytesRef spare = new BytesRef();
- internal readonly List<string> outputValues = new List<string>();
- internal readonly bool ignoreCase;
- internal readonly CharsRef charsSpare = new CharsRef();
+ private readonly BytesRefHash hash = new BytesRefHash();
+ private readonly BytesRef spare = new BytesRef();
+ private readonly List<string> outputValues = new List<string>();
+ private readonly bool ignoreCase;
+ private readonly CharsRef charsSpare = new CharsRef();
/// <summary>
- /// Creates a new <see cref="Builder"/> with ignoreCase set to <code>false</code>
+ /// Creates a new <see cref="Builder"/> with <see cref="ignoreCase"/> set to <c>false</c>
/// </summary>
public Builder()
- : this(false)
+ : this(false)
{
}
@@ -184,7 +183,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// </summary>
/// <param name="input"> the input char sequence </param>
/// <param name="output"> the stemmer override output char sequence </param>
- /// <returns> <code>false</code> iff the input has already been added to this builder otherwise <code>true</code>. </returns>
+ /// <returns> <c>false</c> if the input has already been added to this builder otherwise <c>true</c>. </returns>
public virtual bool Add(string input, string output)
{
int length = input.Length;
@@ -212,13 +211,13 @@ namespace Lucene.Net.Analysis.Miscellaneous
}
/// <summary>
- /// Returns an <see cref="StemmerOverrideMap"/> to be used with the <see cref="StemmerOverrideFilter"/> </summary>
- /// <returns> an <see cref="StemmerOverrideMap"/> to be used with the <see cref="StemmerOverrideFilter"/> </returns>
- /// <exception cref="IOException"> if an <see cref="IOException"/> occurs; </exception>
+ /// Returns a <see cref="StemmerOverrideMap"/> to be used with the <see cref="StemmerOverrideFilter"/> </summary>
+ /// <returns> a <see cref="StemmerOverrideMap"/> to be used with the <see cref="StemmerOverrideFilter"/> </returns>
+ /// <exception cref="System.IO.IOException"> if an <see cref="System.IO.IOException"/> occurs; </exception>
public virtual StemmerOverrideMap Build()
{
ByteSequenceOutputs outputs = ByteSequenceOutputs.Singleton;
- Lucene.Net.Util.Fst.Builder<BytesRef> builder = new Lucene.Net.Util.Fst.Builder<BytesRef>(FST.INPUT_TYPE.BYTE4, outputs);
+ Builder<BytesRef> builder = new Builder<BytesRef>(FST.INPUT_TYPE.BYTE4, outputs);
int[] sort = hash.Sort(BytesRef.UTF8SortedAsUnicodeComparer);
IntsRef intsSpare = new IntsRef();
int size = hash.Count;
@@ -231,7 +230,6 @@ namespace Lucene.Net.Analysis.Miscellaneous
}
return new StemmerOverrideMap(builder.Finish(), ignoreCase);
}
-
}
}
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/46b02fbc/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/StemmerOverrideFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/StemmerOverrideFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/StemmerOverrideFilterFactory.cs
index e0c9323..c7d0aad 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/StemmerOverrideFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/StemmerOverrideFilterFactory.cs
@@ -39,7 +39,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
private readonly bool ignoreCase;
/// <summary>
- /// Creates a new StemmerOverrideFilterFactory </summary>
+ /// Creates a new <see cref="StemmerOverrideFilterFactory"/> </summary>
public StemmerOverrideFilterFactory(IDictionary<string, string> args)
: base(args)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/46b02fbc/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/TrimFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/TrimFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/TrimFilter.cs
index 98539c7..197946f 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/TrimFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/TrimFilter.cs
@@ -29,7 +29,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// </summary>
public sealed class TrimFilter : TokenFilter
{
- internal readonly bool updateOffsets;
+ private readonly bool updateOffsets;
private readonly ICharTermAttribute termAtt;
private readonly IOffsetAttribute offsetAtt;
@@ -53,7 +53,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
}
/// <summary>
- /// Create a new <see cref="TrimFilter"/> on top of <code>in</code>. </summary>
+ /// Create a new <see cref="TrimFilter"/> on top of <paramref name="in"/>. </summary>
public TrimFilter(LuceneVersion version, TokenStream @in)
#pragma warning disable 612, 618
: this(version, @in, false)
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/46b02fbc/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/TrimFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/TrimFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/TrimFilterFactory.cs
index d091842..77a1d1f 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/TrimFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/TrimFilterFactory.cs
@@ -1,5 +1,5 @@
-\ufeffusing System.Collections.Generic;
-using Lucene.Net.Analysis.Util;
+\ufeffusing Lucene.Net.Analysis.Util;
+using System.Collections.Generic;
namespace Lucene.Net.Analysis.Miscellaneous
{
@@ -30,13 +30,13 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// </analyzer>
/// </fieldType></code>
/// </summary>
- /// <seealso cref= TrimFilter </seealso>
+ /// <seealso cref="TrimFilter"/>
public class TrimFilterFactory : TokenFilterFactory
{
- protected internal readonly bool m_updateOffsets;
+ protected readonly bool m_updateOffsets;
/// <summary>
- /// Creates a new TrimFilterFactory </summary>
+ /// Creates a new <see cref="TrimFilterFactory"/> </summary>
public TrimFilterFactory(IDictionary<string, string> args)
: base(args)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/46b02fbc/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/TruncateTokenFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/TruncateTokenFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/TruncateTokenFilter.cs
index 182bc9c..cd58541 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/TruncateTokenFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/TruncateTokenFilter.cs
@@ -33,7 +33,8 @@ namespace Lucene.Net.Analysis.Miscellaneous
private readonly int length;
- public TruncateTokenFilter(TokenStream input, int length) : base(input)
+ public TruncateTokenFilter(TokenStream input, int length)
+ : base(input)
{
if (length < 1)
{
@@ -44,7 +45,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
this.keywordAttr = AddAttribute<IKeywordAttribute>();
}
- public override bool IncrementToken()
+ public override sealed bool IncrementToken()
{
if (m_input.IncrementToken())
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/46b02fbc/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/TruncateTokenFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/TruncateTokenFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/TruncateTokenFilterFactory.cs
index 2b738ec..1aeb5d3 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/TruncateTokenFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/TruncateTokenFilterFactory.cs
@@ -21,7 +21,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
*/
/// <summary>
- /// Factory for <see cref="org.apache.lucene.analysis.miscellaneous.TruncateTokenFilter"/>. The following type is recommended for "<i>diacritics-insensitive search</i>" for Turkish.
+ /// Factory for <see cref="TruncateTokenFilter"/>. The following type is recommended for "<i>diacritics-insensitive search</i>" for Turkish.
/// <code>
/// <fieldType name="text_tr_ascii_f5" class="solr.TextField" positionIncrementGap="100">
/// <analyzer>
@@ -37,11 +37,13 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// </summary>
public class TruncateTokenFilterFactory : TokenFilterFactory
{
-
public const string PREFIX_LENGTH_KEY = "prefixLength";
private readonly sbyte prefixLength;
- public TruncateTokenFilterFactory(IDictionary<string, string> args) : base(args)
+ /// <summary>
+ /// Creates a new <see cref="TruncateTokenFilterFactory"/> </summary>
+ public TruncateTokenFilterFactory(IDictionary<string, string> args)
+ : base(args)
{
prefixLength = sbyte.Parse(Get(args, PREFIX_LENGTH_KEY, "5"));
if (prefixLength < 1)
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/46b02fbc/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterFilter.cs
index 3c639d2..c3cc5a5 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterFilter.cs
@@ -25,63 +25,57 @@ namespace Lucene.Net.Analysis.Miscellaneous
*/
/// <summary>
- /// Splits words into subwords and performs optional transformations on subword
- /// groups. Words are split into subwords with the following rules:
- /// <ul>
- /// <li>split on intra-word delimiters (by default, all non alpha-numeric
- /// characters): <code>"Wi-Fi"</code> → <code>"Wi", "Fi"</code></li>
- /// <li>split on case transitions: <code>"PowerShot"</code> →
- /// <code>"Power", "Shot"</code></li>
- /// <li>split on letter-number transitions: <code>"SD500"</code> →
- /// <code>"SD", "500"</code></li>
- /// <li>leading and trailing intra-word delimiters on each subword are ignored:
- /// <code>"//hello---there, 'dude'"</code> →
- /// <code>"hello", "there", "dude"</code></li>
- /// <li>trailing "'s" are removed for each subword: <code>"O'Neil's"</code>
- /// → <code>"O", "Neil"</code>
- /// <ul>
- /// <li>Note: this step isn't performed in a separate filter because of possible
- /// subword combinations.</li>
- /// </ul>
- /// </li>
- /// </ul>
- ///
- /// The <b>combinations</b> parameter affects how subwords are combined:
- /// <ul>
- /// <li>combinations="0" causes no subword combinations: <code>"PowerShot"</code>
- /// → <code>0:"Power", 1:"Shot"</code> (0 and 1 are the token positions)</li>
- /// <li>combinations="1" means that in addition to the subwords, maximum runs of
- /// non-numeric subwords are catenated and produced at the same position of the
- /// last subword in the run:
- /// <ul>
- /// <li><code>"PowerShot"</code> →
- /// <code>0:"Power", 1:"Shot" 1:"PowerShot"</code></li>
- /// <li><code>"A's+B's&C's"</code> -gt; <code>0:"A", 1:"B", 2:"C", 2:"ABC"</code>
- /// </li>
- /// <li><code>"Super-Duper-XL500-42-AutoCoder!"</code> →
- /// <code>0:"Super", 1:"Duper", 2:"XL", 2:"SuperDuperXL", 3:"500" 4:"42", 5:"Auto", 6:"Coder", 6:"AutoCoder"</code>
- /// </li>
- /// </ul>
- /// </li>
- /// </ul>
- /// One use for <see cref="WordDelimiterFilter"/> is to help match words with different
- /// subword delimiters. For example, if the source text contained "wi-fi" one may
- /// want "wifi" "WiFi" "wi-fi" "wi+fi" queries to all match. One way of doing so
- /// is to specify combinations="1" in the analyzer used for indexing, and
- /// combinations="0" (the default) in the analyzer used for querying. Given that
- /// the current <see cref="StandardTokenizer"/> immediately removes many intra-word
- /// delimiters, it is recommended that this filter be used after a tokenizer that
- /// does not do this (such as <see cref="WhitespaceTokenizer"/>).
- /// </summary>
- public sealed class WordDelimiterFilter : TokenFilter
+ /// Splits words into subwords and performs optional transformations on subword
+ /// groups. Words are split into subwords with the following rules:
+ /// <list type="bullet">
+ /// <item>split on intra-word delimiters (by default, all non alpha-numeric
+ /// characters): <c>"Wi-Fi"</c> → <c>"Wi", "Fi"</c></item>
+ /// <item>split on case transitions: <c>"PowerShot"</c> →
+ /// <c>"Power", "Shot"</c></item>
+ /// <item>split on letter-number transitions: <c>"SD500"</c> →
+ /// <c>"SD", "500"</c></item>
+ /// <item>leading and trailing intra-word delimiters on each subword are ignored:
+ /// <c>"//hello---there, 'dude'"</c> →
+ /// <c>"hello", "there", "dude"</c></item>
+ /// <item>trailing "'s" are removed for each subword: <c>"O'Neil's"</c>
+ /// → <c>"O", "Neil"</c>
+ /// <ul>
+ /// <item>Note: this step isn't performed in a separate filter because of possible
+ /// subword combinations.</item>
+ /// </ul>
+ /// </item>
+ /// </list>
+ /// <para/>
+ /// The <b>combinations</b> parameter affects how subwords are combined:
+ /// <list type="bullet">
+ /// <item>combinations="0" causes no subword combinations: <code>"PowerShot"</code>
+ /// → <c>0:"Power", 1:"Shot"</c> (0 and 1 are the token positions)</item>
+ /// <item>combinations="1" means that in addition to the subwords, maximum runs of
+ /// non-numeric subwords are catenated and produced at the same position of the
+ /// last subword in the run:
+ /// <ul>
+ /// <item><c>"PowerShot"</c> →
+ /// <c>0:"Power", 1:"Shot" 1:"PowerShot"</c></item>
+ /// <item><c>"A's+B's&C's"</c> -gt; <c>0:"A", 1:"B", 2:"C", 2:"ABC"</c>
+ /// </item>
+ /// <item><c>"Super-Duper-XL500-42-AutoCoder!"</c> →
+ /// <c>0:"Super", 1:"Duper", 2:"XL", 2:"SuperDuperXL", 3:"500" 4:"42", 5:"Auto", 6:"Coder", 6:"AutoCoder"</c>
+ /// </item>
+ /// </ul>
+ /// </item>
+ /// </list>
+ /// <para/>
+ /// One use for <see cref="WordDelimiterFilter"/> is to help match words with different
+ /// subword delimiters. For example, if the source text contained "wi-fi" one may
+ /// want "wifi" "WiFi" "wi-fi" "wi+fi" queries to all match. One way of doing so
+ /// is to specify combinations="1" in the analyzer used for indexing, and
+ /// combinations="0" (the default) in the analyzer used for querying. Given that
+ /// the current <see cref="Standard.StandardTokenizer"/> immediately removes many intra-word
+ /// delimiters, it is recommended that this filter be used after a tokenizer that
+ /// does not do this (such as <see cref="Core.WhitespaceTokenizer"/>).
+ /// </summary>
+ public sealed class WordDelimiterFilter : TokenFilter
{
- private void InitializeInstanceFields()
- {
- concat = new WordDelimiterConcatenation(this);
- concatAll = new WordDelimiterConcatenation(this);
- sorter = new OffsetSorter(this);
- }
-
// LUCENENET: Added as a replacement for null in Java
internal const int NOT_SET = 0x00;
public const int LOWER = 0x01;
@@ -161,7 +155,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// If not null is the set of tokens to protect from being delimited
///
/// </summary>
- internal readonly CharArraySet protWords;
+ private readonly CharArraySet protWords;
private readonly int flags;
@@ -202,14 +196,22 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// <summary>
/// Creates a new WordDelimiterFilter
/// </summary>
+ /// <param name="matchVersion"> lucene compatibility version </param>
/// <param name="in"> TokenStream to be filtered </param>
/// <param name="charTypeTable"> table containing character types </param>
/// <param name="configurationFlags"> Flags configuring the filter </param>
/// <param name="protWords"> If not null is the set of tokens to protect from being delimited </param>
public WordDelimiterFilter(LuceneVersion matchVersion, TokenStream @in, byte[] charTypeTable, int configurationFlags, CharArraySet protWords)
- : base(@in)
+ : base(@in)
{
- InitializeInstanceFields();
+ this.termAttribute = AddAttribute<ICharTermAttribute>();
+ this.offsetAttribute = AddAttribute<IOffsetAttribute>();
+ this.posIncAttribute = AddAttribute<IPositionIncrementAttribute>();
+ this.typeAttribute = AddAttribute<ITypeAttribute>();
+ concat = new WordDelimiterConcatenation(this);
+ concatAll = new WordDelimiterConcatenation(this);
+ sorter = new OffsetSorter(this);
+
if (!matchVersion.OnOrAfter(LuceneVersion.LUCENE_48))
{
throw new System.ArgumentException("This class only works with Lucene 4.8+. To emulate the old (broken) behavior of WordDelimiterFilter, use Lucene47WordDelimiterFilter");
@@ -217,22 +219,18 @@ namespace Lucene.Net.Analysis.Miscellaneous
this.flags = configurationFlags;
this.protWords = protWords;
this.iterator = new WordDelimiterIterator(charTypeTable, Has(SPLIT_ON_CASE_CHANGE), Has(SPLIT_ON_NUMERICS), Has(STEM_ENGLISH_POSSESSIVE));
-
- this.termAttribute = AddAttribute<ICharTermAttribute>();
- this.offsetAttribute = AddAttribute<IOffsetAttribute>();
- this.posIncAttribute = AddAttribute<IPositionIncrementAttribute>();
- this.typeAttribute = AddAttribute<ITypeAttribute>();
}
/// <summary>
- /// Creates a new WordDelimiterFilter using <see cref="WordDelimiterIterator#DEFAULT_WORD_DELIM_TABLE"/>
+ /// Creates a new WordDelimiterFilter using <see cref="WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE"/>
/// as its charTypeTable
/// </summary>
- /// <param name="in"> TokenStream to be filtered </param>
+ /// <param name="matchVersion"> lucene compatibility version </param>
+ /// <param name="in"> <see cref="TokenStream"/> to be filtered </param>
/// <param name="configurationFlags"> Flags configuring the filter </param>
/// <param name="protWords"> If not null is the set of tokens to protect from being delimited </param>
public WordDelimiterFilter(LuceneVersion matchVersion, TokenStream @in, int configurationFlags, CharArraySet protWords)
- : this(matchVersion, @in, WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE, configurationFlags, protWords)
+ : this(matchVersion, @in, WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE, configurationFlags, protWords)
{
}
@@ -295,22 +293,22 @@ namespace Lucene.Net.Analysis.Miscellaneous
// at the end of the string, output any concatenations
if (iterator.end == WordDelimiterIterator.DONE)
{
- if (!concat.Empty)
+ if (!concat.IsEmpty)
{
if (FlushConcatenation(concat))
{
- buffer();
+ Buffer();
continue;
}
}
- if (!concatAll.Empty)
+ if (!concatAll.IsEmpty)
{
// only if we haven't output this same combo above!
if (concatAll.subwordCount > lastConcatCount)
{
concatAll.WriteAndClear();
- buffer();
+ Buffer();
continue;
}
concatAll.Clear();
@@ -351,12 +349,12 @@ namespace Lucene.Net.Analysis.Miscellaneous
int wordType = iterator.Type;
// do we already have queued up incompatible concatenations?
- if (!concat.Empty && (concat.type & wordType) == 0)
+ if (!concat.IsEmpty && (concat.type & wordType) == 0)
{
if (FlushConcatenation(concat))
{
hasOutputToken = false;
- buffer();
+ Buffer();
continue;
}
hasOutputToken = false;
@@ -365,7 +363,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
// add subwords depending upon options
if (ShouldConcatenate(wordType))
{
- if (concat.Empty)
+ if (concat.IsEmpty)
{
concat.type = wordType;
}
@@ -382,7 +380,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
if (ShouldGenerateParts(wordType))
{
GeneratePart(false);
- buffer();
+ Buffer();
}
iterator.Next();
@@ -409,7 +407,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
private int bufferedPos = 0;
private bool first;
- internal class OffsetSorter : InPlaceMergeSorter
+ internal class OffsetSorter : InPlaceMergeSorter // LUCENENET NOTE: Changed from private to internal because exposed by internal member
{
private readonly WordDelimiterFilter outerInstance;
@@ -420,12 +418,9 @@ namespace Lucene.Net.Analysis.Miscellaneous
protected override int Compare(int i, int j)
{
-
- //int cmp = int.Compare(outerInstance.startOff[i], outerInstance.startOff[j]);
int cmp = outerInstance.startOff[i].CompareTo(outerInstance.startOff[j]);
if (cmp == 0)
{
- //cmp = int.compare(outerInstance.posInc[j], outerInstance.posInc[i]);
cmp = outerInstance.posInc[j].CompareTo(outerInstance.posInc[i]);
}
return cmp;
@@ -454,7 +449,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
get { return this.sorter; }
}
- private void buffer()
+ private void Buffer()
{
if (bufferedLen == buffered.Length)
{
@@ -493,10 +488,10 @@ namespace Lucene.Net.Analysis.Miscellaneous
}
/// <summary>
- /// Flushes the given WordDelimiterConcatenation by either writing its concat and then clearing, or just clearing.
+ /// Flushes the given <see cref="WordDelimiterConcatenation"/> by either writing its concat and then clearing, or just clearing.
/// </summary>
- /// <param name="concatenation"> WordDelimiterConcatenation that will be flushed </param>
- /// <returns> {@code true} if the concatenation was written before it was cleared, {@code false} otherwise </returns>
+ /// <param name="concatenation"> <see cref="WordDelimiterConcatenation"/> that will be flushed </param>
+ /// <returns> <c>true</c> if the concatenation was written before it was cleared, <c>false</c> otherwise </returns>
private bool FlushConcatenation(WordDelimiterConcatenation concatenation)
{
lastConcatCount = concatenation.subwordCount;
@@ -513,7 +508,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// Determines whether to concatenate a word or number if the current word is the given type
/// </summary>
/// <param name="wordType"> Type of the current word used to determine if it should be concatenated </param>
- /// <returns> {@code true} if concatenation should occur, {@code false} otherwise </returns>
+ /// <returns> <c>true</c> if concatenation should occur, <c>false</c> otherwise </returns>
private bool ShouldConcatenate(int wordType)
{
return (Has(CATENATE_WORDS) && IsAlpha(wordType)) || (Has(CATENATE_NUMBERS) && IsDigit(wordType));
@@ -523,19 +518,19 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// Determines whether a word/number part should be generated for a word of the given type
/// </summary>
/// <param name="wordType"> Type of the word used to determine if a word/number part should be generated </param>
- /// <returns> {@code true} if a word/number part should be generated, {@code false} otherwise </returns>
+ /// <returns> <c>true</c> if a word/number part should be generated, <c>false</c> otherwise </returns>
private bool ShouldGenerateParts(int wordType)
{
return (Has(GENERATE_WORD_PARTS) && IsAlpha(wordType)) || (Has(GENERATE_NUMBER_PARTS) && IsDigit(wordType));
}
/// <summary>
- /// Concatenates the saved buffer to the given WordDelimiterConcatenation
+ /// Concatenates the saved buffer to the given <see cref="WordDelimiterConcatenation"/>
/// </summary>
- /// <param name="concatenation"> WordDelimiterConcatenation to concatenate the buffer to </param>
+ /// <param name="concatenation"> <see cref="WordDelimiterConcatenation"/> to concatenate the buffer to </param>
private void Concatenate(WordDelimiterConcatenation concatenation)
{
- if (concatenation.Empty)
+ if (concatenation.IsEmpty)
{
concatenation.startOffset = savedStartOffset + iterator.current;
}
@@ -546,7 +541,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// <summary>
/// Generates a word/number part, updating the appropriate attributes
/// </summary>
- /// <param name="isSingleWord"> {@code true} if the generation is occurring from a single word, {@code false} otherwise </param>
+ /// <param name="isSingleWord"> <c>true</c> if the generation is occurring from a single word, <c>false</c> otherwise </param>
private void GeneratePart(bool isSingleWord)
{
ClearAttributes();
@@ -605,40 +600,40 @@ namespace Lucene.Net.Analysis.Miscellaneous
}
/// <summary>
- /// Checks if the given word type includes <see cref="#ALPHA"/>
+ /// Checks if the given word type includes <see cref="ALPHA"/>
/// </summary>
/// <param name="type"> Word type to check </param>
- /// <returns> {@code true} if the type contains ALPHA, {@code false} otherwise </returns>
+ /// <returns> <c>true</c> if the type contains <see cref="ALPHA"/>, <c>false</c> otherwise </returns>
internal static bool IsAlpha(int type)
{
return (type & ALPHA) != 0;
}
/// <summary>
- /// Checks if the given word type includes <see cref="#DIGIT"/>
+ /// Checks if the given word type includes <see cref="DIGIT"/>
/// </summary>
/// <param name="type"> Word type to check </param>
- /// <returns> {@code true} if the type contains DIGIT, {@code false} otherwise </returns>
+ /// <returns> <c>true</c> if the type contains <see cref="DIGIT"/>, <c>false</c> otherwise </returns>
internal static bool IsDigit(int type)
{
return (type & DIGIT) != 0;
}
/// <summary>
- /// Checks if the given word type includes <see cref="#SUBWORD_DELIM"/>
+ /// Checks if the given word type includes <see cref="SUBWORD_DELIM"/>
/// </summary>
/// <param name="type"> Word type to check </param>
- /// <returns> {@code true} if the type contains SUBWORD_DELIM, {@code false} otherwise </returns>
+ /// <returns> <c>true</c> if the type contains <see cref="SUBWORD_DELIM"/>, <c>false</c> otherwise </returns>
internal static bool IsSubwordDelim(int type)
{
return (type & SUBWORD_DELIM) != 0;
}
/// <summary>
- /// Checks if the given word type includes <see cref="#UPPER"/>
+ /// Checks if the given word type includes <see cref="UPPER"/>
/// </summary>
/// <param name="type"> Word type to check </param>
- /// <returns> {@code true} if the type contains UPPER, {@code false} otherwise </returns>
+ /// <returns> <c>true</c> if the type contains <see cref="UPPER"/>, <c>false</c> otherwise </returns>
internal static bool IsUpper(int type)
{
return (type & UPPER) != 0;
@@ -648,7 +643,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// Determines whether the given flag is set
/// </summary>
/// <param name="flag"> Flag to see if set </param>
- /// <returns> {@code true} if flag is set </returns>
+ /// <returns> <c>true</c> if flag is set </returns>
private bool Has(int flag)
{
return (flags & flag) != 0;
@@ -668,7 +663,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
this.outerInstance = outerInstance;
}
- internal readonly StringBuilder buffer = new StringBuilder();
+ private readonly StringBuilder buffer = new StringBuilder();
internal int startOffset;
internal int endOffset;
internal int type;
@@ -698,7 +693,6 @@ namespace Lucene.Net.Analysis.Miscellaneous
}
char[] termbuffer = outerInstance.termAttribute.Buffer;
- //buffer.GetChars(0, buffer.Length, termbuffer, 0);
buffer.CopyTo(0, termbuffer, 0, buffer.Length);
outerInstance.termAttribute.Length = buffer.Length;
@@ -718,8 +712,8 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// <summary>
/// Determines if the concatenation is empty
/// </summary>
- /// <returns> {@code true} if the concatenation is empty, {@code false} otherwise </returns>
- internal bool Empty
+ /// <returns> <c>true</c> if the concatenation is empty, <c>false</c> otherwise </returns>
+ internal bool IsEmpty
{
get
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/46b02fbc/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterFilterFactory.cs
index a0cc42d..827172e 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterFilterFactory.cs
@@ -48,11 +48,11 @@ namespace Lucene.Net.Analysis.Miscellaneous
private readonly string wordFiles;
private readonly string types;
private readonly int flags;
- internal byte[] typeTable = null;
+ private byte[] typeTable = null;
private CharArraySet protectedWords = null;
/// <summary>
- /// Creates a new WordDelimiterFilterFactory </summary>
+ /// Creates a new <see cref="WordDelimiterFilterFactory"/> </summary>
public WordDelimiterFilterFactory(IDictionary<string, string> args)
: base(args)
{
@@ -138,7 +138,6 @@ namespace Lucene.Net.Analysis.Miscellaneous
}
// source => type
- //private static Pattern typePattern = Pattern.compile("(.*)\\s*=>\\s*(.*)\\s*$");
private static Regex typePattern = new Regex("(.*)\\s*=>\\s*(.*)\\s*$", RegexOptions.Compiled);
// parses a list of MappingCharFilter style rules into a custom byte[] type table
@@ -147,8 +146,6 @@ namespace Lucene.Net.Analysis.Miscellaneous
IDictionary<char, byte> typeMap = new SortedDictionary<char, byte>();
foreach (string rule in rules)
{
- //Matcher m = typePattern.matcher(rule);
- //if (!m.find())
Match m = typePattern.Match(rule);
if (!m.Success)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/46b02fbc/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterIterator.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterIterator.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterIterator.cs
index f507cf2..ee19be7 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterIterator.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterIterator.cs
@@ -20,12 +20,11 @@ namespace Lucene.Net.Analysis.Miscellaneous
*/
/// <summary>
- /// A BreakIterator-like API for iterating over subwords in text, according to WordDelimiterFilter rules.
+ /// A BreakIterator-like API for iterating over subwords in text, according to <see cref="WordDelimiterFilter"/> rules.
/// @lucene.internal
/// </summary>
public sealed class WordDelimiterIterator
{
-
/// <summary>
/// Indicates the end of iteration </summary>
public const int DONE = -1;
@@ -33,14 +32,14 @@ namespace Lucene.Net.Analysis.Miscellaneous
public static readonly byte[] DEFAULT_WORD_DELIM_TABLE;
internal char[] text;
- internal int length;
+ private int length;
/// <summary>
/// start position of text, excluding leading delimiters </summary>
- internal int startBounds;
+ private int startBounds;
/// <summary>
/// end position of text, excluding trailing delimiters </summary>
- internal int endBounds;
+ private int endBounds;
/// <summary>
/// Beginning of subword </summary>
@@ -49,27 +48,27 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// End of subword </summary>
internal int end;
- /* does this string end with a possessive such as 's */
+ /// <summary>does this string end with a possessive such as 's</summary>
private bool hasFinalPossessive = false;
/// <summary>
/// If false, causes case changes to be ignored (subwords will only be generated
/// given SUBWORD_DELIM tokens). (Defaults to true)
/// </summary>
- internal readonly bool splitOnCaseChange;
+ private readonly bool splitOnCaseChange;
/// <summary>
/// If false, causes numeric changes to be ignored (subwords will only be generated
/// given SUBWORD_DELIM tokens). (Defaults to true)
/// </summary>
- internal readonly bool splitOnNumerics;
+ private readonly bool splitOnNumerics;
/// <summary>
/// If true, causes trailing "'s" to be removed for each subword. (Defaults to true)
/// <p/>
/// "O'Neil's" => "O", "Neil"
/// </summary>
- internal readonly bool stemEnglishPossessive;
+ private readonly bool stemEnglishPossessive;
private readonly byte[] charTypeTable;
@@ -107,7 +106,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
}
/// <summary>
- /// Create a new WordDelimiterIterator operating with the supplied rules.
+ /// Create a new <see cref="WordDelimiterIterator"/> operating with the supplied rules.
/// </summary>
/// <param name="charTypeTable"> table containing character types </param>
/// <param name="splitOnCaseChange"> if true, causes "PowerShot" to be two tokens; ("Power-Shot" remains two parts regards) </param>
@@ -124,7 +123,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// <summary>
/// Advance to the next subword in the string.
/// </summary>
- /// <returns> index of the next subword, or <see cref="#DONE"/> if all subwords have been returned </returns>
+ /// <returns> index of the next subword, or <see cref="DONE"/> if all subwords have been returned </returns>
internal int Next()
{
current = end;
@@ -175,7 +174,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// This currently uses the type of the first character in the subword.
/// </summary>
/// <returns> type of the current word </returns>
- internal int Type
+ internal int Type // LUCENENET TODO: Change to GetType()
{
get
{
@@ -218,7 +217,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// </summary>
/// <param name="lastType"> Last subword type </param>
/// <param name="type"> Current subword type </param>
- /// <returns> {@code true} if the transition indicates a break, {@code false} otherwise </returns>
+ /// <returns> <c>true</c> if the transition indicates a break, <c>false</c> otherwise </returns>
private bool IsBreak(int lastType, int type)
{
if ((type & lastType) != 0)
@@ -248,8 +247,8 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// <summary>
/// Determines if the current word contains only one subword. Note, it could be potentially surrounded by delimiters
/// </summary>
- /// <returns> {@code true} if the current word contains only one subword, {@code false} otherwise </returns>
- internal bool SingleWord
+ /// <returns> <c>true</c> if the current word contains only one subword, <c>false</c> otherwise </returns>
+ internal bool SingleWord // LUCENENET TODO: Change to IsSingleWord()
{
get
{
@@ -290,10 +289,15 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// Determines if the text at the given position indicates an English possessive which should be removed
/// </summary>
/// <param name="pos"> Position in the text to check if it indicates an English possessive </param>
- /// <returns> {@code true} if the text at the position indicates an English posessive, {@code false} otherwise </returns>
+ /// <returns> <c>true</c> if the text at the position indicates an English posessive, <c>false</c> otherwise </returns>
private bool EndsWithPossessive(int pos)
{
- return (stemEnglishPossessive && pos > 2 && text[pos - 2] == '\'' && (text[pos - 1] == 's' || text[pos - 1] == 'S') && WordDelimiterFilter.IsAlpha(CharType(text[pos - 3])) && (pos == endBounds || WordDelimiterFilter.IsSubwordDelim(CharType(text[pos]))));
+ return (stemEnglishPossessive &&
+ pos > 2 &&
+ text[pos - 2] == '\'' &&
+ (text[pos - 1] == 's' || text[pos - 1] == 'S') &&
+ WordDelimiterFilter.IsAlpha(CharType(text[pos - 3])) &&
+ (pos == endBounds || WordDelimiterFilter.IsSubwordDelim(CharType(text[pos]))));
}
/// <summary>
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/46b02fbc/src/Lucene.Net.Core/Analysis/TokenFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Analysis/TokenFilter.cs b/src/Lucene.Net.Core/Analysis/TokenFilter.cs
index b082d6a..9b5957e 100644
--- a/src/Lucene.Net.Core/Analysis/TokenFilter.cs
+++ b/src/Lucene.Net.Core/Analysis/TokenFilter.cs
@@ -69,7 +69,7 @@ namespace Lucene.Net.Analysis
/// <para/>
/// If you override this method, always call <c>base.Reset()</c>, otherwise
/// some internal state will not be correctly reset (e.g., <see cref="Tokenizer"/> will
- /// throw <see cref="InvalidOperationException"/> on further usage).
+ /// throw <see cref="System.InvalidOperationException"/> on further usage).
/// </summary>
/// <remarks>
/// <b>NOTE:</b>
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/46b02fbc/src/Lucene.Net.Core/Analysis/TokenStream.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Analysis/TokenStream.cs b/src/Lucene.Net.Core/Analysis/TokenStream.cs
index 1e104e9..d2c34c9 100644
--- a/src/Lucene.Net.Core/Analysis/TokenStream.cs
+++ b/src/Lucene.Net.Core/Analysis/TokenStream.cs
@@ -129,26 +129,26 @@ namespace Lucene.Net.Analysis
}
/// <summary>
- /// Consumers (i.e., <seealso cref="IndexWriter"/>) use this method to advance the stream to
+ /// Consumers (i.e., <see cref="Index.IndexWriter"/>) use this method to advance the stream to
/// the next token. Implementing classes must implement this method and update
- /// the appropriate <seealso cref="AttributeImpl"/>s with the attributes of the next
+ /// the appropriate <see cref="Lucene.Net.Util.IAttribute"/>s with the attributes of the next
/// token.
- /// <P>
+ /// <para/>
/// The producer must make no assumptions about the attributes after the method
/// has been returned: the caller may arbitrarily change it. If the producer
/// needs to preserve the state for subsequent calls, it can use
- /// <seealso cref="#captureState"/> to create a copy of the current attribute state.
- /// <p>
+ /// <see cref="AttributeSource.CaptureState"/> to create a copy of the current attribute state.
+ /// <para/>
/// this method is called for every token of a document, so an efficient
/// implementation is crucial for good performance. To avoid calls to
- /// <seealso cref="#addAttribute(Class)"/> and <seealso cref="#getAttribute(Class)"/>,
- /// references to all <seealso cref="AttributeImpl"/>s that this stream uses should be
+ /// <see cref="AttributeSource.AddAttribute{T}"/> and <see cref="AttributeSource.GetAttribute{T}"/>,
+ /// references to all <see cref="Lucene.Net.Util.IAttribute"/>s that this stream uses should be
/// retrieved during instantiation.
- /// <p>
+ /// <para/>
/// To ensure that filters and consumers know which attributes are available,
/// the attributes must be added during instantiation. Filters and consumers
/// are not required to check for availability of attributes in
- /// <seealso cref="#IncrementToken()"/>.
+ /// <see cref="IncrementToken()"/>.
/// </summary>
/// <returns> false for end of stream; true otherwise </returns>
public abstract bool IncrementToken();
[04/11] lucenenet git commit: Lucene.Net.Analysis.Lv refactor: member
accessibility and documentation comments
Posted by ni...@apache.org.
Lucene.Net.Analysis.Lv refactor: member accessibility and documentation comments
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/e9ed1843
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/e9ed1843
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/e9ed1843
Branch: refs/heads/api-work
Commit: e9ed18435084c4c7b50088135d62272f75d120a7
Parents: 0b3f976
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Fri Feb 3 17:51:44 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Fri Feb 3 17:51:44 2017 +0700
----------------------------------------------------------------------
.../Analysis/Lv/LatvianAnalyzer.cs | 15 +++++-----
.../Analysis/Lv/LatvianStemFilter.cs | 4 +--
.../Analysis/Lv/LatvianStemFilterFactory.cs | 3 +-
.../Analysis/Lv/LatvianStemmer.cs | 29 ++++++++++----------
4 files changed, 25 insertions(+), 26 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/e9ed1843/src/Lucene.Net.Analysis.Common/Analysis/Lv/LatvianAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Lv/LatvianAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Lv/LatvianAnalyzer.cs
index 26b5074..e466f12 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Lv/LatvianAnalyzer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Lv/LatvianAnalyzer.cs
@@ -49,7 +49,7 @@ namespace Lucene.Net.Analysis.Lv
}
/// <summary>
- /// Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class
+ /// Atomically loads the <see cref="DEFAULT_STOP_SET"/> in a lazy fashion once the outer class
/// accesses the static final set the first time.;
/// </summary>
private class DefaultSetHolder
@@ -77,10 +77,11 @@ namespace Lucene.Net.Analysis.Lv
}
/// <summary>
- /// Builds an analyzer with the default stop words: <see cref="#DEFAULT_STOPWORD_FILE"/>.
+ /// Builds an analyzer with the default stop words: <see cref="DEFAULT_STOPWORD_FILE"/>.
/// </summary>
+ /// <param name="matchVersion"> lucene compatibility version </param>
public LatvianAnalyzer(LuceneVersion matchVersion)
- : this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET)
+ : this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET)
{
}
@@ -90,7 +91,7 @@ namespace Lucene.Net.Analysis.Lv
/// <param name="matchVersion"> lucene compatibility version </param>
/// <param name="stopwords"> a stopword set </param>
public LatvianAnalyzer(LuceneVersion matchVersion, CharArraySet stopwords)
- : this(matchVersion, stopwords, CharArraySet.EMPTY_SET)
+ : this(matchVersion, stopwords, CharArraySet.EMPTY_SET)
{
}
@@ -103,7 +104,7 @@ namespace Lucene.Net.Analysis.Lv
/// <param name="stopwords"> a stopword set </param>
/// <param name="stemExclusionSet"> a set of terms not to be stemmed </param>
public LatvianAnalyzer(LuceneVersion matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet)
- : base(matchVersion, stopwords)
+ : base(matchVersion, stopwords)
{
this.stemExclusionSet = CharArraySet.UnmodifiableSet(CharArraySet.Copy(matchVersion, stemExclusionSet));
}
@@ -116,8 +117,8 @@ namespace Lucene.Net.Analysis.Lv
/// <returns> A
/// <see cref="Analyzer.TokenStreamComponents"/>
/// built from an <see cref="StandardTokenizer"/> filtered with
- /// <see cref="StandardFilter"/>, <see cref="LowerCaseFilter"/>, <see cref="StopFilter"/>
- /// , <see cref="SetKeywordMarkerFilter"/> if a stem exclusion set is
+ /// <see cref="StandardFilter"/>, <see cref="LowerCaseFilter"/>, <see cref="StopFilter"/>,
+ /// <see cref="SetKeywordMarkerFilter"/> if a stem exclusion set is
/// provided and <see cref="LatvianStemFilter"/>. </returns>
protected override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/e9ed1843/src/Lucene.Net.Analysis.Common/Analysis/Lv/LatvianStemFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Lv/LatvianStemFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Lv/LatvianStemFilter.cs
index 8a373fa..138d2f9 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Lv/LatvianStemFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Lv/LatvianStemFilter.cs
@@ -25,7 +25,7 @@ namespace Lucene.Net.Analysis.Lv
/// words.
/// <para>
/// To prevent terms from being stemmed use an instance of
- /// <see cref="SetKeywordMarkerFilter"/> or a custom <see cref="TokenFilter"/> that sets
+ /// <see cref="Miscellaneous.SetKeywordMarkerFilter"/> or a custom <see cref="TokenFilter"/> that sets
/// the <see cref="KeywordAttribute"/> before this <see cref="TokenStream"/>.
/// </para>
/// </summary>
@@ -36,7 +36,7 @@ namespace Lucene.Net.Analysis.Lv
private readonly IKeywordAttribute keywordAttr;
public LatvianStemFilter(TokenStream input)
- : base(input)
+ : base(input)
{
termAtt = AddAttribute<ICharTermAttribute>();
keywordAttr = AddAttribute<IKeywordAttribute>();
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/e9ed1843/src/Lucene.Net.Analysis.Common/Analysis/Lv/LatvianStemFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Lv/LatvianStemFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Lv/LatvianStemFilterFactory.cs
index 33b3789..f626fcb 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Lv/LatvianStemFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Lv/LatvianStemFilterFactory.cs
@@ -34,9 +34,8 @@ namespace Lucene.Net.Analysis.Lv
/// </summary>
public class LatvianStemFilterFactory : TokenFilterFactory
{
-
/// <summary>
- /// Creates a new LatvianStemFilterFactory </summary>
+ /// Creates a new <see cref="LatvianStemFilterFactory"/> </summary>
public LatvianStemFilterFactory(IDictionary<string, string> args)
: base(args)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/e9ed1843/src/Lucene.Net.Analysis.Common/Analysis/Lv/LatvianStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Lv/LatvianStemmer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Lv/LatvianStemmer.cs
index 3a95d76..cb75bef 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Lv/LatvianStemmer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Lv/LatvianStemmer.cs
@@ -1,5 +1,4 @@
\ufeffusing Lucene.Net.Analysis.Util;
-using System.IO;
namespace Lucene.Net.Analysis.Lv
{
@@ -24,14 +23,14 @@ namespace Lucene.Net.Analysis.Lv
/// Light stemmer for Latvian.
/// <para>
/// This is a light version of the algorithm in Karlis Kreslin's PhD thesis
- /// <i>A stemming algorithm for Latvian</i> with the following modifications:
- /// <ul>
- /// <li>Only explicitly stems noun and adjective morphology
- /// <li>Stricter length/vowel checks for the resulting stems (verb etc suffix stripping is removed)
- /// <li>Removes only the primary inflectional suffixes: case and number for nouns ;
- /// case, number, gender, and definitiveness for adjectives.
- /// <li>Palatalization is only handled when a declension II,V,VI noun suffix is removed.
- /// </ul>
+ /// <c>A stemming algorithm for Latvian</c> with the following modifications:
+ /// <list type="bullet">
+ /// <item>Only explicitly stems noun and adjective morphology</item>
+ /// <item>Stricter length/vowel checks for the resulting stems (verb etc suffix stripping is removed)</item>
+ /// <item>Removes only the primary inflectional suffixes: case and number for nouns ;
+ /// case, number, gender, and definitiveness for adjectives.</item>
+ /// <item>Palatalization is only handled when a declension II,V,VI noun suffix is removed.</item>
+ /// </list>
/// </para>
/// </summary>
public class LatvianStemmer
@@ -94,12 +93,12 @@ namespace Lucene.Net.Analysis.Lv
/// <summary>
/// Most cases are handled except for the ambiguous ones:
- /// <ul>
- /// <li> s -> \u0161
- /// <li> t -> \u0161
- /// <li> d -> \u017e
- /// <li> z -> \u017e
- /// </ul>
+ /// <list type="bullet">
+ /// <item> s -> \u0161</item>
+ /// <item> t -> \u0161</item>
+ /// <item> d -> \u017e</item>
+ /// <item> z -> \u017e</item>
+ /// </list>
/// </summary>
private int Unpalatalize(char[] s, int len)
{
[09/11] lucenenet git commit: Lucene.Net.Analysis.Nl refactor: member
accessibility and documentation comments
Posted by ni...@apache.org.
Lucene.Net.Analysis.Nl refactor: member accessibility and documentation comments
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/31585cfd
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/31585cfd
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/31585cfd
Branch: refs/heads/api-work
Commit: 31585cfd562b1a270b2f5d1ea1e4bf7a4c3adb33
Parents: 269da1e
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Sat Feb 4 00:16:07 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Sat Feb 4 00:16:07 2017 +0700
----------------------------------------------------------------------
.../Analysis/Nl/DutchAnalyzer.cs | 29 +++++++++-----------
.../Analysis/Nl/DutchStemFilter.cs | 20 ++++++++------
.../Analysis/Nl/DutchStemmer.cs | 14 +++++-----
3 files changed, 31 insertions(+), 32 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/31585cfd/src/Lucene.Net.Analysis.Common/Analysis/Nl/DutchAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Nl/DutchAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Nl/DutchAnalyzer.cs
index d38d922..153de62 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Nl/DutchAnalyzer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Nl/DutchAnalyzer.cs
@@ -38,19 +38,18 @@ namespace Lucene.Net.Analysis.Nl
/// exclusion list is empty by default.
/// </para>
///
- /// <a name="version"/>
/// <para>You must specify the required <see cref="LuceneVersion"/>
- /// compatibility when creating DutchAnalyzer:
- /// <ul>
- /// <li> As of 3.6, <see cref="#DutchAnalyzer(Version, CharArraySet)"/> and
- /// <see cref="#DutchAnalyzer(Version, CharArraySet, CharArraySet)"/> also populate
- /// the default entries for the stem override dictionary
- /// <li> As of 3.1, Snowball stemming is done with SnowballFilter,
+ /// compatibility when creating <see cref="DutchAnalyzer"/>:
+ /// <list type="bullet">
+ /// <item> As of 3.6, <see cref="DutchAnalyzer(LuceneVersion, CharArraySet)"/> and
+ /// <see cref="DutchAnalyzer(LuceneVersion, CharArraySet, CharArraySet)"/> also populate
+ /// the default entries for the stem override dictionary</item>
+ /// <item> As of 3.1, Snowball stemming is done with SnowballFilter,
/// LowerCaseFilter is used prior to StopFilter, and Snowball
- /// stopwords are used by default.
- /// <li> As of 2.9, StopFilter preserves position
- /// increments
- /// </ul>
+ /// stopwords are used by default.</item>
+ /// <item> As of 2.9, StopFilter preserves position
+ /// increments</item>
+ /// </list>
///
/// </para>
/// <para><b>NOTE</b>: This class uses the same <see cref="LuceneVersion"/>
@@ -58,7 +57,6 @@ namespace Lucene.Net.Analysis.Nl
/// </summary>
public sealed class DutchAnalyzer : Analyzer
{
-
/// <summary>
/// File containing default Dutch stopwords. </summary>
public const string DEFAULT_STOPWORD_FILE = "dutch_stop.txt";
@@ -107,7 +105,7 @@ namespace Lucene.Net.Analysis.Nl
/// <summary>
- /// Contains the stopwords used with the StopFilter.
+ /// Contains the stopwords used with the <see cref="StopFilter"/>.
/// </summary>
private readonly CharArraySet stoptable;
@@ -123,9 +121,8 @@ namespace Lucene.Net.Analysis.Nl
private readonly LuceneVersion matchVersion;
/// <summary>
- /// Builds an analyzer with the default stop words (<see cref="#getDefaultStopSet()"/>)
+ /// Builds an analyzer with the default stop words (<see cref="DefaultStopSet"/>)
/// and a few default entries for the stem exclusion table.
- ///
/// </summary>
public DutchAnalyzer(LuceneVersion matchVersion)
: this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET, CharArraySet.EMPTY_SET, DefaultSetHolder.DEFAULT_STEM_DICT)
@@ -193,7 +190,7 @@ namespace Lucene.Net.Analysis.Nl
/// <summary>
/// Returns a (possibly reused) <see cref="TokenStream"/> which tokenizes all the
- /// text in the provided <see cref="Reader"/>.
+ /// text in the provided <see cref="TextReader"/>.
/// </summary>
/// <returns> A <see cref="TokenStream"/> built from a <see cref="StandardTokenizer"/>
/// filtered with <see cref="StandardFilter"/>, <see cref="LowerCaseFilter"/>,
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/31585cfd/src/Lucene.Net.Analysis.Common/Analysis/Nl/DutchStemFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Nl/DutchStemFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Nl/DutchStemFilter.cs
index ba3d181..9c280bf 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Nl/DutchStemFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Nl/DutchStemFilter.cs
@@ -31,14 +31,15 @@ namespace Lucene.Net.Analysis.Nl
/// </para>
/// <para>
/// To prevent terms from being stemmed use an instance of
- /// <see cref="KeywordMarkerFilter"/> or a custom <see cref="TokenFilter"/> that sets
+ /// <see cref="Miscellaneous.KeywordMarkerFilter"/> or a custom <see cref="TokenFilter"/> that sets
/// the <see cref="KeywordAttribute"/> before this <see cref="TokenStream"/>.
- /// </para> </summary>
- /// <seealso cref= KeywordMarkerFilter </seealso>
- /// @deprecated (3.1) Use <see cref="SnowballFilter"/> with
- /// <see cref="org.tartarus.snowball.ext.DutchStemmer"/> instead, which has the
+ /// </para>
+ /// </summary>
+ /// <seealso cref="Miscellaneous.KeywordMarkerFilter"/>
+ /// @deprecated (3.1) Use <see cref="Snowball.SnowballFilter"/> with
+ /// <see cref="Tartarus.Snowball.Ext.DutchStemmer"/> instead, which has the
/// same functionality. This filter will be removed in Lucene 5.0
- [Obsolete("(3.1) Use SnowballFilter with DutchStemmer instead, which has the same functionality. This filter will be removed in Lucene 5.0")]
+ [Obsolete("(3.1) Use Snowball.SnowballFilter with Tartarus.Snowball.Ext.DutchStemmer instead, which has the same functionality. This filter will be removed in Lucene 5.0")]
public sealed class DutchStemFilter : TokenFilter
{
/// <summary>
@@ -49,15 +50,16 @@ namespace Lucene.Net.Analysis.Nl
private readonly ICharTermAttribute termAtt;
private readonly IKeywordAttribute keywordAttr;
- public DutchStemFilter(TokenStream _in)
- : base(_in)
+ public DutchStemFilter(TokenStream @in)
+ : base(@in)
{
termAtt = AddAttribute<ICharTermAttribute>();
keywordAttr = AddAttribute<IKeywordAttribute>();
}
/// <param name="stemdictionary"> Dictionary of word stem pairs, that overrule the algorithm </param>
- public DutchStemFilter(TokenStream _in, IDictionary<string, string> stemdictionary) : this(_in)
+ public DutchStemFilter(TokenStream @in, IDictionary<string, string> stemdictionary)
+ : this(@in)
{
stemmer.StemDictionary = stemdictionary;
}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/31585cfd/src/Lucene.Net.Analysis.Common/Analysis/Nl/DutchStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Nl/DutchStemmer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Nl/DutchStemmer.cs
index 036b761..43ef3fc 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Nl/DutchStemmer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Nl/DutchStemmer.cs
@@ -29,7 +29,7 @@ namespace Lucene.Net.Analysis.Nl
/// the <a href="http://snowball.tartarus.org/algorithms/dutch/stemmer.html">dutch stemming</a>
/// algorithm in Martin Porter's snowball project.
/// </para> </summary>
- /// @deprecated (3.1) Use <see cref="org.tartarus.snowball.ext.DutchStemmer"/> instead,
+ /// @deprecated (3.1) Use <see cref="Tartarus.Snowball.Ext.DutchStemmer"/> instead,
/// which has the same functionality. This filter will be removed in Lucene 5.0
[Obsolete("(3.1) Use Tartarus.Snowball.Ext.DutchStemmer instead, which has the same functionality. This filter will be removed in Lucene 5.0")]
public class DutchStemmer
@@ -47,12 +47,12 @@ namespace Lucene.Net.Analysis.Nl
private int _R2;
//TODO convert to internal
- /*
- * Stems the given term to an unique <tt>discriminator</tt>.
- *
- * @param term The term that should be stemmed.
- * @return Discriminator for <tt>term</tt>
- */
+
+ /// <summary>
+ /// Stems the given term to an unique <c>discriminator</c>.
+ /// </summary>
+ /// <param name="term">The term that should be stemmed.</param>
+ /// <returns>Discriminator for <paramref name="term"/></returns>
public virtual string Stem(string term)
{
term = locale.TextInfo.ToLower(term);
[07/11] lucenenet git commit: Lucene.Net.Analysis SWEEP: fix for
various broken XML comments
Posted by ni...@apache.org.
Lucene.Net.Analysis SWEEP: fix for various broken XML comments
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/d4b9c00e
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/d4b9c00e
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/d4b9c00e
Branch: refs/heads/api-work
Commit: d4b9c00efe9b96c15c683e7f757842dfd250d95f
Parents: 46b02fb
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Fri Feb 3 22:56:15 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Fri Feb 3 22:56:15 2017 +0700
----------------------------------------------------------------------
.../Analysis/Ar/ArabicLetterTokenizer.cs | 3 +--
.../Analysis/CharFilter/HTMLStripCharFilter.cs | 6 +++---
.../Analysis/CharFilter/NormalizeCharMap.cs | 2 +-
.../Analysis/Cjk/CJKTokenizer.cs | 14 ++++++-------
.../Analysis/Cn/ChineseTokenizer.cs | 2 +-
.../Analysis/CommonGrams/CommonGramsFilter.cs | 1 +
.../CommonGrams/CommonGramsQueryFilter.cs | 2 +-
.../Compound/Hyphenation/HyphenationTree.cs | 10 ++++-----
.../Analysis/Core/LetterTokenizer.cs | 2 +-
.../Analysis/Core/StopFilter.cs | 4 ++--
.../Analysis/Core/StopFilterFactory.cs | 2 +-
.../Analysis/Core/WhitespaceTokenizer.cs | 2 +-
.../Analysis/El/GreekStemmer.cs | 22 ++++++++++----------
.../Analysis/En/EnglishAnalyzer.cs | 1 +
.../Analysis/En/EnglishPossessiveFilter.cs | 2 +-
.../Analysis/En/PorterStemmer.cs | 6 +++---
.../Analysis/Hu/HungarianAnalyzer.cs | 1 +
.../Analysis/Hunspell/Dictionary.cs | 2 +-
.../Analysis/Lv/LatvianAnalyzer.cs | 2 +-
.../Analysis/Miscellaneous/KeepWordFilter.cs | 2 +-
.../Analysis/Miscellaneous/PatternAnalyzer.cs | 6 +++---
.../Miscellaneous/PatternKeywordMarkerFilter.cs | 2 +-
.../Analysis/Util/CharArrayMap.cs | 6 +++---
.../Analysis/Util/CharArraySet.cs | 20 +++++++++---------
24 files changed, 60 insertions(+), 62 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/d4b9c00e/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicLetterTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicLetterTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicLetterTokenizer.cs
index c698d5c..ae875e4 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicLetterTokenizer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicLetterTokenizer.cs
@@ -31,7 +31,6 @@ namespace Lucene.Net.Analysis.Ar
/// Handling similar to this is necessary for Indic Scripts, Hebrew, Thaana, etc.
/// </para>
/// <para>
- /// <paramref name="matchVersion"/>
/// You must specify the required <see cref="LuceneVersion"/> compatibility when creating
/// <see cref="ArabicLetterTokenizer"/>:
/// <list type="bullet">
@@ -62,7 +61,7 @@ namespace Lucene.Net.Analysis.Ar
/// <see cref="AttributeSource.AttributeFactory"/>.
/// </summary>
/// <param name="matchVersion">
- /// matchVersion Lucene version to match See
+ /// Lucene version to match - See
/// <see cref="LuceneVersion"/>.
/// </param>
/// <param name="factory">
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/d4b9c00e/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/HTMLStripCharFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/HTMLStripCharFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/HTMLStripCharFilter.cs
index d60080e..7dba4f6 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/HTMLStripCharFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/HTMLStripCharFilter.cs
@@ -30659,13 +30659,13 @@ namespace Lucene.Net.Analysis.CharFilters
private int yycolumn;
/// <summary>
- /// zzAtBOL == true <=> the scanner is currently at the beginning of a line
+ /// zzAtBOL == true <=> the scanner is currently at the beginning of a line
/// </summary>
private bool zzAtBOL = true;
#pragma warning restore 169, 414
- /// <summary>zzAtEOF == true <=> the scanner is at the EOF</summary>
+ /// <summary>zzAtEOF == true <=> the scanner is at the EOF</summary>
private bool zzAtEOF;
/// <summary>denotes if the user-EOF-code has already been executed</summary>
@@ -31061,7 +31061,7 @@ namespace Lucene.Net.Analysis.CharFilters
/// <para/>
/// All internal variables are reset, the old input stream
/// <b>cannot</b> be reused (internal buffer is discarded and lost).
- /// Lexical state is set to <see cref="ZZ_INITIAL"/>.
+ /// Lexical state is set to <see cref="YYINITIAL"/>.
/// <para/>
/// Internal scan buffer is resized down to its initial length, if it has grown.
/// </summary>
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/d4b9c00e/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/NormalizeCharMap.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/NormalizeCharMap.cs b/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/NormalizeCharMap.cs
index 110790f..6678922 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/NormalizeCharMap.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/NormalizeCharMap.cs
@@ -91,7 +91,7 @@ namespace Lucene.Net.Analysis.CharFilters
/// </summary>
/// <param name="match"> input String to be replaced </param>
/// <param name="replacement"> output String </param>
- /// <exception cref="IllegalArgumentException"> if
+ /// <exception cref="ArgumentException"> if
/// <code>match</code> is the empty string, or was
/// already previously added </exception>
public virtual void Add(string match, string replacement)
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/d4b9c00e/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKTokenizer.cs
index 160306d..901320b 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKTokenizer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKTokenizer.cs
@@ -85,13 +85,13 @@ namespace Lucene.Net.Analysis.Cjk
private int dataLen = 0;
/// <summary>
- /// character buffer, store the characters which are used to compose <br>
+ /// character buffer, store the characters which are used to compose
/// the returned Token
/// </summary>
private readonly char[] buffer = new char[MAX_WORD_LEN];
/// <summary>
- /// I/O buffer, used to store the content of the input(one of the <br>
+ /// I/O buffer, used to store the content of the input(one of the
/// members of Tokenizer)
/// </summary>
private readonly char[] ioBuffer = new char[IO_BUFFER_SIZE];
@@ -151,22 +151,20 @@ namespace Lucene.Net.Analysis.Cjk
public override bool IncrementToken()
{
ClearAttributes();
- /// <summary>
- /// how many character(s) has been stored in buffer </summary>
+
+ // how many character(s) has been stored in buffer
while (true) // loop until we find a non-empty token
{
int length = 0;
- /// <summary>
- /// the position used to create Token </summary>
+ // the position used to create Token
int start = offset;
while (true) // loop until we've found a full token
{
- /// <summary>
- /// current character </summary>
+ // current character
char c;
offset++;
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/d4b9c00e/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseTokenizer.cs
index eb500bb..cd98aca 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseTokenizer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseTokenizer.cs
@@ -39,7 +39,7 @@ namespace Lucene.Net.Analysis.Cn
/// </list>
/// </para>
/// <para>
- /// Therefore the index created by <see cref="CJKTokenizer"/> is much larger.
+ /// Therefore the index created by <see cref="Cjk.CJKTokenizer"/> is much larger.
/// </para>
/// <para>
/// The problem is that when searching for C1, C1C2, C1C3,
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/d4b9c00e/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsFilter.cs
index e7578be..fcd9b7a 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsFilter.cs
@@ -69,6 +69,7 @@ namespace Lucene.Net.Analysis.CommonGrams
/// bigrams with position increment 0 type=gram where one or both of the words
/// in a potential bigram are in the set of common words .
/// </summary>
+ /// <param name="matchVersion"> lucene compatibility version </param>
/// <param name="input"> <see cref="TokenStream"/> input in filter chain </param>
/// <param name="commonWords"> The set of common words. </param>
public CommonGramsFilter(LuceneVersion matchVersion, TokenStream input, CharArraySet commonWords)
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/d4b9c00e/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsQueryFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsQueryFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsQueryFilter.cs
index 32039ca..07e7b53 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsQueryFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsQueryFilter.cs
@@ -65,7 +65,7 @@ namespace Lucene.Net.Analysis.CommonGrams
/// <para/>
/// If you override this method, always call <c>base.Reset()</c>, otherwise
/// some internal state will not be correctly reset (e.g., <see cref="Tokenizer"/> will
- /// throw <see cref="InvalidOperationException"/> on further usage).
+ /// throw <see cref="System.InvalidOperationException"/> on further usage).
/// </summary>
/// <remarks>
/// <b>NOTE:</b>
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/d4b9c00e/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/HyphenationTree.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/HyphenationTree.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/HyphenationTree.cs
index c4dfe8b..6f2af07 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/HyphenationTree.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/HyphenationTree.cs
@@ -117,7 +117,7 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation
/// <summary>
/// Read hyphenation patterns from an XML file.
/// </summary>
- /// <param name="f"> the filename </param>
+ /// <param name="filename"> the filename </param>
/// <exception cref="IOException"> In case the parsing fails </exception>
public virtual void LoadPatterns(string filename)
{
@@ -127,7 +127,7 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation
/// <summary>
/// Read hyphenation patterns from an XML file.
/// </summary>
- /// <param name="f"> the filename </param>
+ /// <param name="filename"> the filename </param>
/// <param name="encoding">The character encoding to use</param>
/// <exception cref="IOException"> In case the parsing fails </exception>
public virtual void LoadPatterns(string filename, Encoding encoding)
@@ -359,10 +359,8 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation
{
q = m_lo[q];
- /// <summary>
- /// actually the code should be: q = sc[q] < 0 ? hi[q] : lo[q]; but
- /// java chars are unsigned
- /// </summary>
+ // actually the code should be: q = sc[q] < 0 ? hi[q] : lo[q]; but
+ // java chars are unsigned
}
}
}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/d4b9c00e/src/Lucene.Net.Analysis.Common/Analysis/Core/LetterTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Core/LetterTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Core/LetterTokenizer.cs
index 9ef19a6..1be2e65 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Core/LetterTokenizer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Core/LetterTokenizer.cs
@@ -25,7 +25,7 @@ namespace Lucene.Net.Analysis.Core
/// <summary>
/// A <see cref="LetterTokenizer"/> is a tokenizer that divides text at non-letters. That's to
/// say, it defines tokens as maximal strings of adjacent letters, as defined by
- /// <see cref="char.IsLetter"/> predicate.
+ /// <see cref="char.IsLetter(char)"/> predicate.
/// <para>
/// Note: this does a decent job for most European languages, but does a terrible
/// job for some Asian languages, where words are not separated by spaces.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/d4b9c00e/src/Lucene.Net.Analysis.Common/Analysis/Core/StopFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Core/StopFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Core/StopFilter.cs
index 1e5e2a0..2515426 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Core/StopFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Core/StopFilter.cs
@@ -79,7 +79,7 @@ namespace Lucene.Net.Analysis.Core
/// an <see cref="Analyzer"/> is constructed.
/// </summary>
/// <param name="matchVersion"> <see cref="LuceneVersion"/> to enable correct Unicode 4.0 behavior in the returned set if Version > 3.0 </param>
- /// <param name="stopWords"> A List of <see cref="string"/>s or <see cref="char[]"/> or any other ToString()-able list representing the stopwords </param>
+ /// <param name="stopWords"> A List of <see cref="string"/>s or <see cref="T:char[]"/> or any other ToString()-able list representing the stopwords </param>
/// <returns> A Set (<see cref="CharArraySet"/>) containing the words </returns>
/// <seealso cref="MakeStopSet(LuceneVersion, string[], bool)"/> passing false to ignoreCase
public static CharArraySet MakeStopSet<T1>(LuceneVersion matchVersion, IList<T1> stopWords)
@@ -104,7 +104,7 @@ namespace Lucene.Net.Analysis.Core
/// <summary>
/// Creates a stopword set from the given stopword list. </summary>
/// <param name="matchVersion"> <see cref="LuceneVersion"/> to enable correct Unicode 4.0 behavior in the returned set if Version > 3.0 </param>
- /// <param name="stopWords"> A List of <see cref="string"/>s or <see cref="char[]"/> or any other ToString()-able list representing the stopwords </param>
+ /// <param name="stopWords"> A List of <see cref="string"/>s or <see cref="T:char[]"/> or any other ToString()-able list representing the stopwords </param>
/// <param name="ignoreCase"> if true, all words are lower cased first </param>
/// <returns> A Set (<see cref="CharArraySet"/>) containing the words </returns>
public static CharArraySet MakeStopSet<T1>(LuceneVersion matchVersion, IList<T1> stopWords, bool ignoreCase)
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/d4b9c00e/src/Lucene.Net.Analysis.Common/Analysis/Core/StopFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Core/StopFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Core/StopFilterFactory.cs
index 9466549..1f0c5db 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Core/StopFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Core/StopFilterFactory.cs
@@ -60,7 +60,7 @@ namespace Lucene.Net.Analysis.Core
/// <item><c>snowball</c> - This format allows for multiple words specified on each
/// line, and trailing comments may be specified using the vertical line ("|").
/// Blank lines are ignored. See
- /// <see cref="WordlistLoader.GetSnowballWordSet"/>
+ /// <see cref="WordlistLoader.GetSnowballWordSet(System.IO.TextReader, Net.Util.LuceneVersion)"/>
/// for details.
/// </item>
/// </list>
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/d4b9c00e/src/Lucene.Net.Analysis.Common/Analysis/Core/WhitespaceTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Core/WhitespaceTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Core/WhitespaceTokenizer.cs
index a60a679..98db5e7 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Core/WhitespaceTokenizer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Core/WhitespaceTokenizer.cs
@@ -62,7 +62,7 @@ namespace Lucene.Net.Analysis.Core
/// <summary>
/// Collects only characters which do not satisfy
- /// <see cref="char.IsWhitespace(char)"/>.
+ /// <see cref="char.IsWhiteSpace(char)"/>.
/// </summary>
protected override bool IsTokenChar(int c)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/d4b9c00e/src/Lucene.Net.Analysis.Common/Analysis/El/GreekStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/El/GreekStemmer.cs b/src/Lucene.Net.Analysis.Common/Analysis/El/GreekStemmer.cs
index 1a5e8b3..4934410 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/El/GreekStemmer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/El/GreekStemmer.cs
@@ -34,11 +34,11 @@ namespace Lucene.Net.Analysis.El
public class GreekStemmer
{
/// <summary>
- /// Stems a word contained in a leading portion of a <see cref="char[]"/> array.
+ /// Stems a word contained in a leading portion of a <see cref="T:char[]"/> array.
/// The word is passed through a number of rules that modify it's length.
/// </summary>
- /// <param name="s"> A <see cref="char[]"/> array that contains the word to be stemmed. </param>
- /// <param name="len"> The length of the <see cref="char[]"/> array. </param>
+ /// <param name="s"> A <see cref="T:char[]"/> array that contains the word to be stemmed. </param>
+ /// <param name="len"> The length of the <see cref="T:char[]"/> array. </param>
/// <returns> The new length of the stemmed word. </returns>
public virtual int Stem(char[] s, int len)
{
@@ -1021,12 +1021,12 @@ namespace Lucene.Net.Analysis.El
}
/// <summary>
- /// Checks if the word contained in the leading portion of <see cref="char[]"/> array ,
+ /// Checks if the word contained in the leading portion of <see cref="T:char[]"/> array ,
/// ends with a Greek vowel.
/// </summary>
- /// <param name="s"> A <see cref="char[]"/> array that represents a word. </param>
- /// <param name="len"> The length of the <see cref="char[]"/> array. </param>
- /// <returns> True if the word contained in the leading portion of <see cref="char[]"/> array ,
+ /// <param name="s"> A <see cref="T:char[]"/> array that represents a word. </param>
+ /// <param name="len"> The length of the <see cref="T:char[]"/> array. </param>
+ /// <returns> True if the word contained in the leading portion of <see cref="T:char[]"/> array ,
/// ends with a vowel , false otherwise. </returns>
private bool EndsWithVowel(char[] s, int len)
{
@@ -1050,12 +1050,12 @@ namespace Lucene.Net.Analysis.El
}
/// <summary>
- /// Checks if the word contained in the leading portion of <see cref="char[]"/> array ,
+ /// Checks if the word contained in the leading portion of <see cref="T:char[]"/> array ,
/// ends with a Greek vowel.
/// </summary>
- /// <param name="s"> A <see cref="char[]"/> array that represents a word. </param>
- /// <param name="len"> The length of the <see cref="char[]"/> array. </param>
- /// <returns> True if the word contained in the leading portion of <see cref="char[]"/> array ,
+ /// <param name="s"> A <see cref="T:char[]"/> array that represents a word. </param>
+ /// <param name="len"> The length of the <see cref="T:char[]"/> array. </param>
+ /// <returns> True if the word contained in the leading portion of <see cref="T:char[]"/> array ,
/// ends with a vowel , false otherwise. </returns>
private bool EndsWithVowelNoY(char[] s, int len)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/d4b9c00e/src/Lucene.Net.Analysis.Common/Analysis/En/EnglishAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/En/EnglishAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/En/EnglishAnalyzer.cs
index eec5878..3c6e0ff 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/En/EnglishAnalyzer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/En/EnglishAnalyzer.cs
@@ -55,6 +55,7 @@ namespace Lucene.Net.Analysis.En
/// <summary>
/// Builds an analyzer with the default stop words: <see cref="DefaultStopSet"/>.
/// </summary>
+ /// <param name="matchVersion"> lucene compatibility version </param>
public EnglishAnalyzer(LuceneVersion matchVersion)
: this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/d4b9c00e/src/Lucene.Net.Analysis.Common/Analysis/En/EnglishPossessiveFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/En/EnglishPossessiveFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/En/EnglishPossessiveFilter.cs
index 9e22c3d..888e7a8 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/En/EnglishPossessiveFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/En/EnglishPossessiveFilter.cs
@@ -37,7 +37,7 @@ namespace Lucene.Net.Analysis.En
private readonly ICharTermAttribute termAtt;
private LuceneVersion matchVersion;
- /// @deprecated Use <see cref="#EnglishPossessiveFilter(Version, TokenStream)"/> instead.
+ /// @deprecated Use <see cref="EnglishPossessiveFilter(LuceneVersion, TokenStream)"/> instead.
[Obsolete(@"Use <see cref=""#EnglishPossessiveFilter(org.apache.lucene.util.Version, org.apache.lucene.analysis.TokenStream)""/> instead.")]
public EnglishPossessiveFilter(TokenStream input)
: this(LuceneVersion.LUCENE_35, input)
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/d4b9c00e/src/Lucene.Net.Analysis.Common/Analysis/En/PorterStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/En/PorterStemmer.cs b/src/Lucene.Net.Analysis.Common/Analysis/En/PorterStemmer.cs
index 2c2c3e9..d1119c4 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/En/PorterStemmer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/En/PorterStemmer.cs
@@ -741,7 +741,7 @@ namespace Lucene.Net.Analysis.En
}
/// <summary>
- /// Stem a word contained in a <see cref="char[]"/>. Returns true if the stemming process
+ /// Stem a word contained in a <see cref="T:char[]"/>. Returns true if the stemming process
/// resulted in a word different from the input. You can retrieve the
/// result with <see cref="ResultLength"/>/<see cref="ResultBuffer"/> or <see cref="ToString"/>.
/// </summary>
@@ -751,7 +751,7 @@ namespace Lucene.Net.Analysis.En
}
/// <summary>
- /// Stem a word contained in a portion of a <see cref="char[]"/> array. Returns
+ /// Stem a word contained in a portion of a <see cref="T:char[]"/> array. Returns
/// true if the stemming process resulted in a word different from
/// the input. You can retrieve the result with
/// <see cref="ResultLength"/>/<see cref="ResultBuffer"/> or <see cref="ToString"/>.
@@ -769,7 +769,7 @@ namespace Lucene.Net.Analysis.En
}
/// <summary>
- /// Stem a word contained in a leading portion of a <see cref="char[]"/> array.
+ /// Stem a word contained in a leading portion of a <see cref="T:char[]"/> array.
/// Returns true if the stemming process resulted in a word different
/// from the input. You can retrieve the result with
/// <see cref="ResultLength"/>/<see cref="ResultBuffer"/> or <see cref="ToString"/>.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/d4b9c00e/src/Lucene.Net.Analysis.Common/Analysis/Hu/HungarianAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Hu/HungarianAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Hu/HungarianAnalyzer.cs
index 9d9cdd5..6951d32 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Hu/HungarianAnalyzer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Hu/HungarianAnalyzer.cs
@@ -81,6 +81,7 @@ namespace Lucene.Net.Analysis.Hu
/// <summary>
/// Builds an analyzer with the default stop words: <see cref="DEFAULT_STOPWORD_FILE"/>.
/// </summary>
+ /// <param name="matchVersion"> lucene compatibility version </param>
public HungarianAnalyzer(LuceneVersion matchVersion)
: this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/d4b9c00e/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/Dictionary.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/Dictionary.cs b/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/Dictionary.cs
index 780fe1c..29938e5 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/Dictionary.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/Dictionary.cs
@@ -393,7 +393,7 @@ namespace Lucene.Net.Analysis.Hunspell
/// <summary>
/// Parses a specific affix rule putting the result into the provided affix map
/// </summary>
- /// <param name="affixes"> <see cref="SortedDictionary{string, IList{char?}}"/> where the result of the parsing will be put </param>
+ /// <param name="affixes"> <see cref="SortedDictionary{TKey, TValue}"/> where the result of the parsing will be put </param>
/// <param name="header"> Header line of the affix rule </param>
/// <param name="reader"> <see cref="TextReader"/> to read the content of the rule from </param>
/// <param name="conditionPattern"> <see cref="string.Format(string, object[])"/> pattern to be used to generate the condition regex
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/d4b9c00e/src/Lucene.Net.Analysis.Common/Analysis/Lv/LatvianAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Lv/LatvianAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Lv/LatvianAnalyzer.cs
index e466f12..7b1828f 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Lv/LatvianAnalyzer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Lv/LatvianAnalyzer.cs
@@ -112,7 +112,7 @@ namespace Lucene.Net.Analysis.Lv
/// <summary>
/// Creates a
/// <see cref="Analyzer.TokenStreamComponents"/>
- /// which tokenizes all the text in the provided <see cref="Reader"/>.
+ /// which tokenizes all the text in the provided <see cref="TextReader"/>.
/// </summary>
/// <returns> A
/// <see cref="Analyzer.TokenStreamComponents"/>
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/d4b9c00e/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeepWordFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeepWordFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeepWordFilter.cs
index 3aa0978..a7972e0 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeepWordFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeepWordFilter.cs
@@ -24,7 +24,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// <summary>
/// A <see cref="TokenFilter"/> that only keeps tokens with text contained in the
- /// required words. This filter behaves like the inverse of <see cref="StopFilter"/>.
+ /// required words. This filter behaves like the inverse of <see cref="Core.StopFilter"/>.
///
/// @since solr 1.3
/// </summary>
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/d4b9c00e/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternAnalyzer.cs
index a01625b..f943762 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternAnalyzer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternAnalyzer.cs
@@ -39,7 +39,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// <para>
/// If you are unsure how exactly a regular expression should look like, consider
/// prototyping by simply trying various expressions on some test texts via
- /// <see cref="string.Split(string)"/>. Once you are satisfied, give that regex to
+ /// <see cref="string.Split(char[])"/>. Once you are satisfied, give that regex to
/// <see cref="PatternAnalyzer"/>. Also see <a target="_blank"
/// href="http://www.regular-expressions.info/">Regular Expression Tutorial</a>.
/// </para>
@@ -48,7 +48,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// It can also serve as a building block in a compound Lucene
/// <see cref="TokenFilter"/> chain. For example as in this
/// stemming example:
- /// <pre>
+ /// <code>
/// PatternAnalyzer pat = ...
/// TokenStream tokenStream = new SnowballFilter(
/// pat.GetTokenStream("content", "James is running round in the woods"),
@@ -151,7 +151,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// if non-null, ignores all tokens that are contained in the
/// given stop set (after previously having applied toLowerCase()
/// if applicable). For example, created via
- /// <see cref="StopFilter#makeStopSet(Version, String[])"/>and/or
+ /// <see cref="StopFilter.MakeStopSet(LuceneVersion, string[])"/>and/or
/// <see cref="WordlistLoader"/>as in
/// <code>WordlistLoader.getWordSet(new File("samples/fulltext/stopwords.txt")</code>
/// or <a href="http://www.unine.ch/info/clef/">other stop words
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/d4b9c00e/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternKeywordMarkerFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternKeywordMarkerFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternKeywordMarkerFilter.cs
index f9c8898..10ea4a4 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternKeywordMarkerFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternKeywordMarkerFilter.cs
@@ -23,7 +23,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// <summary>
/// Marks terms as keywords via the <see cref="KeywordAttribute"/>. Each token
/// that matches the provided pattern is marked as a keyword by setting
- /// <see cref="KeywordAttribute#setKeyword(boolean)"/> to <code>true</code>.
+ /// <see cref="KeywordAttribute.IsKeyword"/> to <c>true</c>.
/// </summary>
public sealed class PatternKeywordMarkerFilter : KeywordMarkerFilter
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/d4b9c00e/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArrayMap.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArrayMap.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArrayMap.cs
index 5cade2d..b6e2e36 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArrayMap.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArrayMap.cs
@@ -29,12 +29,12 @@ namespace Lucene.Net.Analysis.Util
*/
/// <summary>
- /// A simple class that stores key <see cref="string"/>s as <see cref="char[]"/>'s in a
+ /// A simple class that stores key <see cref="string"/>s as <see cref="T:char[]"/>'s in a
/// hash table. Note that this is not a general purpose
/// class. For example, it cannot remove items from the
/// map, nor does it resize its hash table to be smaller,
/// etc. It is designed to be quick to retrieve items
- /// by <see cref="char[]"/> keys without the necessity of converting
+ /// by <see cref="T:char[]"/> keys without the necessity of converting
/// to a <see cref="string"/> first.
///
/// <a name="version"></a>
@@ -249,7 +249,7 @@ namespace Lucene.Net.Analysis.Util
/// <summary>
/// <c>true</c> if the entire <see cref="KeySet"/> is the same as the
- /// <paramref name="text"/> <see cref="char[]"/> being passed in;
+ /// <paramref name="text"/> <see cref="T:char[]"/> being passed in;
/// otherwise <c>false</c>.
/// </summary>
public virtual bool ContainsKey(char[] text)
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/d4b9c00e/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArraySet.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArraySet.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArraySet.cs
index b715768..9ac42ba 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArraySet.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArraySet.cs
@@ -28,11 +28,11 @@ namespace Lucene.Net.Analysis.Util
*/
/// <summary>
- /// A simple class that stores <see cref="string"/>s as <see cref="char[]"/>'s in a
+ /// A simple class that stores <see cref="string"/>s as <see cref="T:char[]"/>'s in a
/// hash table. Note that this is not a general purpose
/// class. For example, it cannot remove items from the
/// set, nor does it resize its hash table to be smaller,
- /// etc. It is designed to be quick to test if a <see cref="char[]"/>
+ /// etc. It is designed to be quick to test if a <see cref="T:char[]"/>
/// is in the set without the necessity of converting it
/// to a <see cref="string"/> first.
///
@@ -54,9 +54,9 @@ namespace Lucene.Net.Analysis.Util
/// does not behave like it should in all cases. The generic type is
/// <see cref="string"/>, because you can add any object to it,
/// that has a string representation (which is converted to a string). The add methods will use
- /// <see cref="object.ToString()"/> and store the result using a <see cref="char[]"/>
+ /// <see cref="object.ToString()"/> and store the result using a <see cref="T:char[]"/>
/// buffer. The same behavior have the <see cref="Contains(string)"/> methods.
- /// The <see cref="GetEnumerator()"/> returns an <see cref="IEnumerator{Char[]}"/>
+ /// The <see cref="GetEnumerator()"/> returns an <see cref="T:IEnumerator{char[]}"/>
/// </para>
/// </summary>
public class CharArraySet : ISet<string>
@@ -125,7 +125,7 @@ namespace Lucene.Net.Analysis.Util
}
/// <summary>
- /// <c>true</c> if the <see cref="char[]"/>s
+ /// <c>true</c> if the <see cref="T:char[]"/>s
/// are in the set
/// </summary>
public virtual bool Contains(char[] text)
@@ -189,8 +189,8 @@ namespace Lucene.Net.Analysis.Util
}
/// <summary>
- /// Add this <see cref="char[]"/> directly to the set.
- /// If <see cref="ignoreCase"/> is true for this <see cref="CharArraySet"/>, the text array will be directly modified.
+ /// Add this <see cref="T:char[]"/> directly to the set.
+ /// If <c>ignoreCase</c> is true for this <see cref="CharArraySet"/>, the text array will be directly modified.
/// The user should never modify this text array after calling this method.
/// </summary>
/// <returns><c>true</c> if <paramref name="o"/> was added to the set; <c>false</c> if it already existed prior to this call</returns>
@@ -293,7 +293,7 @@ namespace Lucene.Net.Analysis.Util
}
/// <summary>
- /// Returns an <see cref="IEnumerator"/> for <see cref="char[]"/> instances in this set.
+ /// Returns an <see cref="IEnumerator"/> for <see cref="T:char[]"/> instances in this set.
/// </summary>
public virtual IEnumerator GetEnumerator()
{
@@ -327,10 +327,10 @@ namespace Lucene.Net.Analysis.Util
#region LUCENENET specific members
/// <summary>
- /// Copies the entire <see cref="CharArraySet"/> to a one-dimensional <see cref="string[]"/> array,
+ /// Copies the entire <see cref="CharArraySet"/> to a one-dimensional <see cref="T:string[]"/> array,
/// starting at the specified index of the target array.
/// </summary>
- /// <param name="array">The one-dimensional <see cref="string[]"/> Array that is the destination of the
+ /// <param name="array">The one-dimensional <see cref="T:string[]"/> Array that is the destination of the
/// elements copied from <see cref="CharArraySet"/>. The Array must have zero-based indexing.</param>
/// <param name="arrayIndex">The zero-based index in array at which copying begins.</param>
public void CopyTo(string[] array, int arrayIndex)
[02/11] lucenenet git commit: Lucene.Net.Analysis.In refactor: member
accessibility and documentation comments
Posted by ni...@apache.org.
Lucene.Net.Analysis.In refactor: member accessibility and documentation comments
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/6d272fe7
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/6d272fe7
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/6d272fe7
Branch: refs/heads/api-work
Commit: 6d272fe7d28f666b6b9700906b113872dab3e197
Parents: 3eada00
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Fri Feb 3 17:39:26 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Fri Feb 3 17:39:26 2017 +0700
----------------------------------------------------------------------
.../Analysis/In/IndicNormalizationFilter.cs | 2 +-
.../Analysis/In/IndicNormalizationFilterFactory.cs | 4 +---
src/Lucene.Net.Analysis.Common/Analysis/In/IndicNormalizer.cs | 1 -
src/Lucene.Net.Analysis.Common/Analysis/In/IndicTokenizer.cs | 7 +++----
4 files changed, 5 insertions(+), 9 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/6d272fe7/src/Lucene.Net.Analysis.Common/Analysis/In/IndicNormalizationFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/In/IndicNormalizationFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/In/IndicNormalizationFilter.cs
index 412714a..5d8abf9 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/In/IndicNormalizationFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/In/IndicNormalizationFilter.cs
@@ -30,7 +30,7 @@ namespace Lucene.Net.Analysis.In
private readonly IndicNormalizer normalizer = new IndicNormalizer();
public IndicNormalizationFilter(TokenStream input)
- : base(input)
+ : base(input)
{
termAtt = AddAttribute<ICharTermAttribute>();
}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/6d272fe7/src/Lucene.Net.Analysis.Common/Analysis/In/IndicNormalizationFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/In/IndicNormalizationFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/In/IndicNormalizationFilterFactory.cs
index 9026c7c..d5a24e8 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/In/IndicNormalizationFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/In/IndicNormalizationFilterFactory.cs
@@ -1,6 +1,5 @@
\ufeffusing Lucene.Net.Analysis.Util;
using System.Collections.Generic;
-using System.IO;
namespace Lucene.Net.Analysis.In
{
@@ -33,9 +32,8 @@ namespace Lucene.Net.Analysis.In
/// </summary>
public class IndicNormalizationFilterFactory : TokenFilterFactory, IMultiTermAwareComponent
{
-
/// <summary>
- /// Creates a new IndicNormalizationFilterFactory </summary>
+ /// Creates a new <see cref="IndicNormalizationFilterFactory"/> </summary>
public IndicNormalizationFilterFactory(IDictionary<string, string> args) : base(args)
{
if (args.Count > 0)
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/6d272fe7/src/Lucene.Net.Analysis.Common/Analysis/In/IndicNormalizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/In/IndicNormalizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/In/IndicNormalizer.cs
index 4b26e12..c01e150 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/In/IndicNormalizer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/In/IndicNormalizer.cs
@@ -2,7 +2,6 @@
using System;
using System.Collections;
using System.Collections.Generic;
-using System.Linq;
using System.Text.RegularExpressions;
namespace Lucene.Net.Analysis.In
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/6d272fe7/src/Lucene.Net.Analysis.Common/Analysis/In/IndicTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/In/IndicTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/In/IndicTokenizer.cs
index d492ff6..b0b8b59 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/In/IndicTokenizer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/In/IndicTokenizer.cs
@@ -26,18 +26,17 @@ namespace Lucene.Net.Analysis.In
/// <summary>
/// Simple Tokenizer for text in Indian Languages. </summary>
- /// @deprecated (3.6) Use <see cref="StandardTokenizer"/> instead.
+ /// @deprecated (3.6) Use <see cref="Standard.StandardTokenizer"/> instead.
[Obsolete("(3.6) Use StandardTokenizer instead.")]
public sealed class IndicTokenizer : CharTokenizer
{
-
public IndicTokenizer(LuceneVersion matchVersion, AttributeFactory factory, TextReader input)
- : base(matchVersion, factory, input)
+ : base(matchVersion, factory, input)
{
}
public IndicTokenizer(LuceneVersion matchVersion, TextReader input)
- : base(matchVersion, input)
+ : base(matchVersion, input)
{
}
[06/11] lucenenet git commit: Lucene.Net.Analysis.Miscellaneous
refactor: member accessibility and documentation comments
Posted by ni...@apache.org.
Lucene.Net.Analysis.Miscellaneous refactor: member accessibility and documentation comments
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/46b02fbc
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/46b02fbc
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/46b02fbc
Branch: refs/heads/api-work
Commit: 46b02fbc101d21e58fe19a8b3f7446c05537d5ba
Parents: e9ed184
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Fri Feb 3 22:29:58 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Fri Feb 3 22:29:58 2017 +0700
----------------------------------------------------------------------
.../Miscellaneous/ASCIIFoldingFilter.cs | 40 ++--
.../Miscellaneous/ASCIIFoldingFilterFactory.cs | 2 +-
.../Miscellaneous/CapitalizationFilter.cs | 15 +-
.../CapitalizationFilterFactory.cs | 20 +-
.../Miscellaneous/CodepointCountFilter.cs | 6 +-
.../CodepointCountFilterFactory.cs | 13 +-
.../Analysis/Miscellaneous/EmptyTokenStream.cs | 24 ++-
.../Miscellaneous/HyphenatedWordsFilter.cs | 48 ++++-
.../HyphenatedWordsFilterFactory.cs | 6 +-
.../Analysis/Miscellaneous/KeepWordFilter.cs | 6 +-
.../Miscellaneous/KeepWordFilterFactory.cs | 2 +-
.../Miscellaneous/KeywordMarkerFilter.cs | 14 +-
.../Miscellaneous/KeywordMarkerFilterFactory.cs | 2 +-
.../Miscellaneous/KeywordRepeatFilter.cs | 6 +-
.../Miscellaneous/KeywordRepeatFilterFactory.cs | 3 +-
.../Analysis/Miscellaneous/LengthFilter.cs | 5 +-
.../Miscellaneous/LengthFilterFactory.cs | 12 +-
.../Miscellaneous/LimitTokenCountAnalyzer.cs | 9 +-
.../Miscellaneous/LimitTokenCountFilter.cs | 25 +--
.../LimitTokenCountFilterFactory.cs | 13 +-
.../Miscellaneous/LimitTokenPositionFilter.cs | 19 +-
.../LimitTokenPositionFilterFactory.cs | 13 +-
.../Lucene47WordDelimiterFilter.cs | 87 ++++----
.../Analysis/Miscellaneous/PatternAnalyzer.cs | 32 ++-
.../Miscellaneous/PatternKeywordMarkerFilter.cs | 7 +-
.../Miscellaneous/PerFieldAnalyzerWrapper.cs | 22 +--
.../PrefixAndSuffixAwareTokenFilter.cs | 18 +-
.../Miscellaneous/PrefixAwareTokenFilter.cs | 8 +-
.../RemoveDuplicatesTokenFilter.cs | 35 +++-
.../RemoveDuplicatesTokenFilterFactory.cs | 3 +-
.../Miscellaneous/ScandinavianFoldingFilter.cs | 20 +-
.../ScandinavianFoldingFilterFactory.cs | 6 +-
.../ScandinavianNormalizationFilter.cs | 8 +-
.../ScandinavianNormalizationFilterFactory.cs | 5 +-
.../Miscellaneous/SetKeywordMarkerFilter.cs | 8 +-
.../Miscellaneous/SingleTokenTokenStream.cs | 8 +-
.../Miscellaneous/StemmerOverrideFilter.cs | 44 ++---
.../StemmerOverrideFilterFactory.cs | 2 +-
.../Analysis/Miscellaneous/TrimFilter.cs | 4 +-
.../Analysis/Miscellaneous/TrimFilterFactory.cs | 10 +-
.../Miscellaneous/TruncateTokenFilter.cs | 5 +-
.../Miscellaneous/TruncateTokenFilterFactory.cs | 8 +-
.../Miscellaneous/WordDelimiterFilter.cs | 198 +++++++++----------
.../Miscellaneous/WordDelimiterFilterFactory.cs | 7 +-
.../Miscellaneous/WordDelimiterIterator.cs | 38 ++--
src/Lucene.Net.Core/Analysis/TokenFilter.cs | 2 +-
src/Lucene.Net.Core/Analysis/TokenStream.cs | 18 +-
47 files changed, 477 insertions(+), 429 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/46b02fbc/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ASCIIFoldingFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ASCIIFoldingFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ASCIIFoldingFilter.cs
index 76bb80a..582a461 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ASCIIFoldingFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ASCIIFoldingFilter.cs
@@ -25,32 +25,32 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// This class converts alphabetic, numeric, and symbolic Unicode characters
/// which are not in the first 127 ASCII characters (the "Basic Latin" Unicode
/// block) into their ASCII equivalents, if one exists.
- ///
+ /// <para/>
/// Characters from the following Unicode blocks are converted; however, only
/// those characters with reasonable ASCII alternatives are converted:
///
/// <ul>
- /// <li>C1 Controls and Latin-1 Supplement: <a href="http://www.unicode.org/charts/PDF/U0080.pdf">http://www.unicode.org/charts/PDF/U0080.pdf</a>
- /// <li>Latin Extended-A: <a href="http://www.unicode.org/charts/PDF/U0100.pdf">http://www.unicode.org/charts/PDF/U0100.pdf</a>
- /// <li>Latin Extended-B: <a href="http://www.unicode.org/charts/PDF/U0180.pdf">http://www.unicode.org/charts/PDF/U0180.pdf</a>
- /// <li>Latin Extended Additional: <a href="http://www.unicode.org/charts/PDF/U1E00.pdf">http://www.unicode.org/charts/PDF/U1E00.pdf</a>
- /// <li>Latin Extended-C: <a href="http://www.unicode.org/charts/PDF/U2C60.pdf">http://www.unicode.org/charts/PDF/U2C60.pdf</a>
- /// <li>Latin Extended-D: <a href="http://www.unicode.org/charts/PDF/UA720.pdf">http://www.unicode.org/charts/PDF/UA720.pdf</a>
- /// <li>IPA Extensions: <a href="http://www.unicode.org/charts/PDF/U0250.pdf">http://www.unicode.org/charts/PDF/U0250.pdf</a>
- /// <li>Phonetic Extensions: <a href="http://www.unicode.org/charts/PDF/U1D00.pdf">http://www.unicode.org/charts/PDF/U1D00.pdf</a>
- /// <li>Phonetic Extensions Supplement: <a href="http://www.unicode.org/charts/PDF/U1D80.pdf">http://www.unicode.org/charts/PDF/U1D80.pdf</a>
- /// <li>General Punctuation: <a href="http://www.unicode.org/charts/PDF/U2000.pdf">http://www.unicode.org/charts/PDF/U2000.pdf</a>
- /// <li>Superscripts and Subscripts: <a href="http://www.unicode.org/charts/PDF/U2070.pdf">http://www.unicode.org/charts/PDF/U2070.pdf</a>
- /// <li>Enclosed Alphanumerics: <a href="http://www.unicode.org/charts/PDF/U2460.pdf">http://www.unicode.org/charts/PDF/U2460.pdf</a>
- /// <li>Dingbats: <a href="http://www.unicode.org/charts/PDF/U2700.pdf">http://www.unicode.org/charts/PDF/U2700.pdf</a>
- /// <li>Supplemental Punctuation: <a href="http://www.unicode.org/charts/PDF/U2E00.pdf">http://www.unicode.org/charts/PDF/U2E00.pdf</a>
- /// <li>Alphabetic Presentation Forms: <a href="http://www.unicode.org/charts/PDF/UFB00.pdf">http://www.unicode.org/charts/PDF/UFB00.pdf</a>
- /// <li>Halfwidth and Fullwidth Forms: <a href="http://www.unicode.org/charts/PDF/UFF00.pdf">http://www.unicode.org/charts/PDF/UFF00.pdf</a>
+ /// <item>C1 Controls and Latin-1 Supplement: <a href="http://www.unicode.org/charts/PDF/U0080.pdf">http://www.unicode.org/charts/PDF/U0080.pdf</a></item>
+ /// <item>Latin Extended-A: <a href="http://www.unicode.org/charts/PDF/U0100.pdf">http://www.unicode.org/charts/PDF/U0100.pdf</a></item>
+ /// <item>Latin Extended-B: <a href="http://www.unicode.org/charts/PDF/U0180.pdf">http://www.unicode.org/charts/PDF/U0180.pdf</a></item>
+ /// <item>Latin Extended Additional: <a href="http://www.unicode.org/charts/PDF/U1E00.pdf">http://www.unicode.org/charts/PDF/U1E00.pdf</a></item>
+ /// <item>Latin Extended-C: <a href="http://www.unicode.org/charts/PDF/U2C60.pdf">http://www.unicode.org/charts/PDF/U2C60.pdf</a></item>
+ /// <item>Latin Extended-D: <a href="http://www.unicode.org/charts/PDF/UA720.pdf">http://www.unicode.org/charts/PDF/UA720.pdf</a></item>
+ /// <item>IPA Extensions: <a href="http://www.unicode.org/charts/PDF/U0250.pdf">http://www.unicode.org/charts/PDF/U0250.pdf</a></item>
+ /// <item>Phonetic Extensions: <a href="http://www.unicode.org/charts/PDF/U1D00.pdf">http://www.unicode.org/charts/PDF/U1D00.pdf</a></item>
+ /// <item>Phonetic Extensions Supplement: <a href="http://www.unicode.org/charts/PDF/U1D80.pdf">http://www.unicode.org/charts/PDF/U1D80.pdf</a></item>
+ /// <item>General Punctuation: <a href="http://www.unicode.org/charts/PDF/U2000.pdf">http://www.unicode.org/charts/PDF/U2000.pdf</a></item>
+ /// <item>Superscripts and Subscripts: <a href="http://www.unicode.org/charts/PDF/U2070.pdf">http://www.unicode.org/charts/PDF/U2070.pdf</a></item>
+ /// <item>Enclosed Alphanumerics: <a href="http://www.unicode.org/charts/PDF/U2460.pdf">http://www.unicode.org/charts/PDF/U2460.pdf</a></item>
+ /// <item>Dingbats: <a href="http://www.unicode.org/charts/PDF/U2700.pdf">http://www.unicode.org/charts/PDF/U2700.pdf</a></item>
+ /// <item>Supplemental Punctuation: <a href="http://www.unicode.org/charts/PDF/U2E00.pdf">http://www.unicode.org/charts/PDF/U2E00.pdf</a></item>
+ /// <item>Alphabetic Presentation Forms: <a href="http://www.unicode.org/charts/PDF/UFB00.pdf">http://www.unicode.org/charts/PDF/UFB00.pdf</a></item>
+ /// <item>Halfwidth and Fullwidth Forms: <a href="http://www.unicode.org/charts/PDF/UFF00.pdf">http://www.unicode.org/charts/PDF/UFF00.pdf</a></item>
/// </ul>
- ///
+ /// <para/>
/// See: <a href="http://en.wikipedia.org/wiki/Latin_characters_in_Unicode">http://en.wikipedia.org/wiki/Latin_characters_in_Unicode</a>
- ///
- /// For example, 'à' will be replaced by 'a'.
+ /// <para/>
+ /// For example, '&agrave;' will be replaced by 'a'.
/// </summary>
public sealed class ASCIIFoldingFilter : TokenFilter
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/46b02fbc/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ASCIIFoldingFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ASCIIFoldingFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ASCIIFoldingFilterFactory.cs
index 74a0d33..c1ef517 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ASCIIFoldingFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ASCIIFoldingFilterFactory.cs
@@ -35,7 +35,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
private readonly bool preserveOriginal;
/// <summary>
- /// Creates a new ASCIIFoldingFilterFactory </summary>
+ /// Creates a new <see cref="ASCIIFoldingFilterFactory"/> </summary>
public ASCIIFoldingFilterFactory(IDictionary<string, string> args)
: base(args)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/46b02fbc/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CapitalizationFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CapitalizationFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CapitalizationFilter.cs
index a4b99bc..1148aca 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CapitalizationFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CapitalizationFilter.cs
@@ -26,7 +26,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// <summary>
/// A filter to apply normal capitalization rules to Tokens. It will make the first letter
/// capital and the rest lower case.
- /// <p/>
+ /// <para/>
/// This filter is particularly useful to build nice looking facet parameters. This filter
/// is not appropriate if you intend to use a prefix query.
/// </summary>
@@ -47,10 +47,11 @@ namespace Lucene.Net.Analysis.Miscellaneous
private readonly ICharTermAttribute termAtt;
/// <summary>
- /// Creates a CapitalizationFilter with the default parameters.
+ /// Creates a <see cref="CapitalizationFilter"/> with the default parameters.
/// <para>
- /// Calls {@link #CapitalizationFilter(TokenStream, boolean, CharArraySet, boolean, Collection, int, int, int)
- /// CapitalizationFilter(in, true, null, true, null, 0, DEFAULT_MAX_WORD_COUNT, DEFAULT_MAX_TOKEN_LENGTH)}
+ /// Calls <see cref="CapitalizationFilter.CapitalizationFilter(TokenStream, bool, CharArraySet, bool, ICollection{char[]}, int, int, int)">
+ /// CapitalizationFilter(in, true, null, true, null, 0, DEFAULT_MAX_WORD_COUNT, DEFAULT_MAX_TOKEN_LENGTH)
+ /// </see>
/// </para>
/// </summary>
public CapitalizationFilter(TokenStream @in)
@@ -59,21 +60,21 @@ namespace Lucene.Net.Analysis.Miscellaneous
}
/// <summary>
- /// Creates a CapitalizationFilter with the specified parameters. </summary>
+ /// Creates a <see cref="CapitalizationFilter"/> with the specified parameters. </summary>
/// <param name="in"> input tokenstream </param>
/// <param name="onlyFirstWord"> should each word be capitalized or all of the words? </param>
/// <param name="keep"> a keep word list. Each word that should be kept separated by whitespace. </param>
/// <param name="forceFirstLetter"> Force the first letter to be capitalized even if it is in the keep list. </param>
/// <param name="okPrefix"> do not change word capitalization if a word begins with something in this list. </param>
/// <param name="minWordLength"> how long the word needs to be to get capitalization applied. If the
- /// minWordLength is 3, "and" > "And" but "or" stays "or". </param>
+ /// minWordLength is 3, "and" > "And" but "or" stays "or". </param>
/// <param name="maxWordCount"> if the token contains more then maxWordCount words, the capitalization is
/// assumed to be correct. </param>
/// <param name="maxTokenLength"> ??? </param>
public CapitalizationFilter(TokenStream @in, bool onlyFirstWord, CharArraySet keep, bool forceFirstLetter, ICollection<char[]> okPrefix, int minWordLength, int maxWordCount, int maxTokenLength)
: base(@in)
{
- // LUCENENET: The guard clauses were copied here from the version of Lucene.
+ // LUCENENET NOTE: The guard clauses were copied here from a later version of Lucene.
// Apparently, the tests were not ported from 4.8.0 because they expected this and the
// original tests did not. Adding them anyway because there is no downside to this.
if (minWordLength < 0)
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/46b02fbc/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CapitalizationFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CapitalizationFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CapitalizationFilterFactory.cs
index 236d6da..88f6bb0 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CapitalizationFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CapitalizationFilterFactory.cs
@@ -23,19 +23,19 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// <summary>
/// Factory for <see cref="CapitalizationFilter"/>.
- /// <p/>
- /// The factory takes parameters:<br/>
- /// "onlyFirstWord" - should each word be capitalized or all of the words?<br/>
- /// "keep" - a keep word list. Each word that should be kept separated by whitespace.<br/>
- /// "keepIgnoreCase - true or false. If true, the keep list will be considered case-insensitive.<br/>
- /// "forceFirstLetter" - Force the first letter to be capitalized even if it is in the keep list<br/>
+ /// <para/>
+ /// The factory takes parameters:<para/>
+ /// "onlyFirstWord" - should each word be capitalized or all of the words?<para/>
+ /// "keep" - a keep word list. Each word that should be kept separated by whitespace.<para/>
+ /// "keepIgnoreCase - true or false. If true, the keep list will be considered case-insensitive.<para/>
+ /// "forceFirstLetter" - Force the first letter to be capitalized even if it is in the keep list<para/>
/// "okPrefix" - do not change word capitalization if a word begins with something in this list.
/// for example if "McK" is on the okPrefix list, the word "McKinley" should not be changed to
- /// "Mckinley"<br/>
+ /// "Mckinley"<para/>
/// "minWordLength" - how long the word needs to be to get capitalization applied. If the
- /// minWordLength is 3, "and" > "And" but "or" stays "or"<br/>
+ /// minWordLength is 3, "and" > "And" but "or" stays "or"<para/>
/// "maxWordCount" - if the token contains more then maxWordCount words, the capitalization is
- /// assumed to be correct.<br/>
+ /// assumed to be correct.<para/>
///
/// <code>
/// <fieldType name="text_cptlztn" class="solr.TextField" positionIncrementGap="100">
@@ -71,7 +71,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
internal readonly bool forceFirstLetter; // make sure the first letter is capital even if it is in the keep list
/// <summary>
- /// Creates a new CapitalizationFilterFactory </summary>
+ /// Creates a new <see cref="CapitalizationFilterFactory"/> </summary>
public CapitalizationFilterFactory(IDictionary<string, string> args)
: base(args)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/46b02fbc/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CodepointCountFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CodepointCountFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CodepointCountFilter.cs
index 1c12925..a5f2085 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CodepointCountFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CodepointCountFilter.cs
@@ -38,8 +38,8 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// <summary>
/// Create a new <see cref="CodepointCountFilter"/>. This will filter out tokens whose
- /// <see cref="CharTermAttribute"/> is either too short (<see cref="Character#CodePointCount(char[], int, int)"/>
- /// < min) or too long (<see cref="Character#codePointCount(char[], int, int)"/> > max). </summary>
+ /// <see cref="CharTermAttribute"/> is either too short (<see cref="Character.CodePointCount(char[], int, int)"/>
+ /// < min) or too long (<see cref="Character.CodePointCount(char[], int, int)"/> > max). </summary>
/// <param name="version"> the Lucene match version </param>
/// <param name="in"> the <see cref="TokenStream"/> to consume </param>
/// <param name="min"> the minimum length </param>
@@ -47,7 +47,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
public CodepointCountFilter(LuceneVersion version, TokenStream @in, int min, int max)
: base(version, @in)
{
- // LUCENENET: The guard clauses were copied here from the version of Lucene.
+ // LUCENENET: The guard clauses were copied here from a later version of Lucene.
// Apparently, the tests were not ported from 4.8.0 because they expected this and the
// original tests did not. Adding them anyway because there is no downside to this.
if (min < 0)
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/46b02fbc/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CodepointCountFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CodepointCountFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CodepointCountFilterFactory.cs
index 4163aec..149cc13 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CodepointCountFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CodepointCountFilterFactory.cs
@@ -1,5 +1,5 @@
-\ufeffusing System.Collections.Generic;
-using Lucene.Net.Analysis.Util;
+\ufeffusing Lucene.Net.Analysis.Util;
+using System.Collections.Generic;
namespace Lucene.Net.Analysis.Miscellaneous
{
@@ -32,14 +32,15 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// </summary>
public class CodepointCountFilterFactory : TokenFilterFactory
{
- internal readonly int min;
- internal readonly int max;
+ private readonly int min;
+ private readonly int max;
public const string MIN_KEY = "min";
public const string MAX_KEY = "max";
/// <summary>
- /// Creates a new CodepointCountFilterFactory </summary>
- public CodepointCountFilterFactory(IDictionary<string, string> args) : base(args)
+ /// Creates a new <see cref="CodepointCountFilterFactory"/> </summary>
+ public CodepointCountFilterFactory(IDictionary<string, string> args)
+ : base(args)
{
min = RequireInt(args, MIN_KEY);
max = RequireInt(args, MAX_KEY);
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/46b02fbc/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/EmptyTokenStream.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/EmptyTokenStream.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/EmptyTokenStream.cs
index ef84806..baa5f80 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/EmptyTokenStream.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/EmptyTokenStream.cs
@@ -1,7 +1,6 @@
\ufeffnamespace Lucene.Net.Analysis.Miscellaneous
{
-
- /*
+ /*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -18,15 +17,14 @@
* limitations under the License.
*/
- /// <summary>
- /// An always exhausted token stream.
- /// </summary>
- public sealed class EmptyTokenStream : TokenStream
- {
-
- public override bool IncrementToken()
- {
- return false;
- }
- }
+ /// <summary>
+ /// An always exhausted token stream.
+ /// </summary>
+ public sealed class EmptyTokenStream : TokenStream
+ {
+ public override bool IncrementToken()
+ {
+ return false;
+ }
+ }
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/46b02fbc/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/HyphenatedWordsFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/HyphenatedWordsFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/HyphenatedWordsFilter.cs
index c5da204..e12e123 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/HyphenatedWordsFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/HyphenatedWordsFilter.cs
@@ -1,10 +1,8 @@
-\ufeffusing System;
+\ufeffusing Lucene.Net.Analysis.TokenAttributes;
using System.Text;
-using Lucene.Net.Analysis.TokenAttributes;
namespace Lucene.Net.Analysis.Miscellaneous
{
-
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -21,6 +19,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
/// <summary>
/// When the plain text is extracted from documents, we will often have many words hyphenated and broken into
/// two lines. This is often the case with documents where narrow text columns are used, such as newsletters.
@@ -48,11 +47,9 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// </analyzer>
/// </fieldtype>
/// </code>
- ///
/// </summary>
public sealed class HyphenatedWordsFilter : TokenFilter
{
-
private readonly ICharTermAttribute termAttribute;
private readonly IOffsetAttribute offsetAttribute;
@@ -62,9 +59,9 @@ namespace Lucene.Net.Analysis.Miscellaneous
private int lastEndOffset = 0;
/// <summary>
- /// Creates a new HyphenatedWordsFilter
+ /// Creates a new <see cref="HyphenatedWordsFilter"/>
/// </summary>
- /// <param name="in"> TokenStream that will be filtered </param>
+ /// <param name="in"> <see cref="TokenStream"/> that will be filtered </param>
public HyphenatedWordsFilter(TokenStream @in)
: base(@in)
{
@@ -73,8 +70,28 @@ namespace Lucene.Net.Analysis.Miscellaneous
}
/// <summary>
- /// {@inheritDoc}
+ /// Consumers (i.e., <see cref="Index.IndexWriter"/>) use this method to advance the stream to
+ /// the next token. Implementing classes must implement this method and update
+ /// the appropriate <see cref="Lucene.Net.Util.Attribute"/>s with the attributes of the next
+ /// token.
+ /// <para/>
+ /// The producer must make no assumptions about the attributes after the method
+ /// has been returned: the caller may arbitrarily change it. If the producer
+ /// needs to preserve the state for subsequent calls, it can use
+ /// <see cref="Lucene.Net.Util.AttributeSource.CaptureState"/> to create a copy of the current attribute state.
+ /// <para/>
+ /// this method is called for every token of a document, so an efficient
+ /// implementation is crucial for good performance. To avoid calls to
+ /// <see cref="Lucene.Net.Util.AttributeSource.AddAttribute{T}"/> and <see cref="Lucene.Net.Util.AttributeSource.GetAttribute{T}"/>,
+ /// references to all <see cref="Lucene.Net.Util.Attribute"/>s that this stream uses should be
+ /// retrieved during instantiation.
+ /// <para/>
+ /// To ensure that filters and consumers know which attributes are available,
+ /// the attributes must be added during instantiation. Filters and consumers
+ /// are not required to check for availability of attributes in
+ /// <see cref="IncrementToken()"/>.
/// </summary>
+ /// <returns> false for end of stream; true otherwise </returns>
public override bool IncrementToken()
{
while (!exhausted && m_input.IncrementToken())
@@ -122,8 +139,21 @@ namespace Lucene.Net.Analysis.Miscellaneous
}
/// <summary>
- /// {@inheritDoc}
+ /// This method is called by a consumer before it begins consumption using
+ /// <see cref="IncrementToken()"/>.
+ /// <para/>
+ /// Resets this stream to a clean state. Stateful implementations must implement
+ /// this method so that they can be reused, just as if they had been created fresh.
+ /// <para/>
+ /// If you override this method, always call <c>base.Reset()</c>, otherwise
+ /// some internal state will not be correctly reset (e.g., <see cref="Tokenizer"/> will
+ /// throw <see cref="System.InvalidOperationException"/> on further usage).
/// </summary>
+ /// <remarks>
+ /// <b>NOTE:</b>
+ /// The default implementation chains the call to the input <see cref="TokenStream"/>, so
+ /// be sure to call <c>base.Reset()</c> when overriding this method.
+ /// </remarks>
public override void Reset()
{
base.Reset();
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/46b02fbc/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/HyphenatedWordsFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/HyphenatedWordsFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/HyphenatedWordsFilterFactory.cs
index 6c4d375..19707ad 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/HyphenatedWordsFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/HyphenatedWordsFilterFactory.cs
@@ -32,10 +32,10 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// </summary>
public class HyphenatedWordsFilterFactory : TokenFilterFactory
{
-
/// <summary>
- /// Creates a new HyphenatedWordsFilterFactory </summary>
- public HyphenatedWordsFilterFactory(IDictionary<string, string> args) : base(args)
+ /// Creates a new <see cref="HyphenatedWordsFilterFactory"/> </summary>
+ public HyphenatedWordsFilterFactory(IDictionary<string, string> args)
+ : base(args)
{
if (args.Count > 0)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/46b02fbc/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeepWordFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeepWordFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeepWordFilter.cs
index 0ff278c..3aa0978 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeepWordFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeepWordFilter.cs
@@ -23,8 +23,8 @@ namespace Lucene.Net.Analysis.Miscellaneous
*/
/// <summary>
- /// A TokenFilter that only keeps tokens with text contained in the
- /// required words. This filter behaves like the inverse of StopFilter.
+ /// A <see cref="TokenFilter"/> that only keeps tokens with text contained in the
+ /// required words. This filter behaves like the inverse of <see cref="StopFilter"/>.
///
/// @since solr 1.3
/// </summary>
@@ -44,7 +44,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// <summary>
/// Create a new <see cref="KeepWordFilter"/>.
- /// <para><b>NOTE</b>: The words set passed to this constructor will be directly
+ /// <para><c>NOTE</c>: The words set passed to this constructor will be directly
/// used by this filter and should not be modified.
/// </para>
/// </summary>
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/46b02fbc/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeepWordFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeepWordFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeepWordFilterFactory.cs
index 39f61bf..4806fbd 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeepWordFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeepWordFilterFactory.cs
@@ -38,7 +38,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
private CharArraySet words;
/// <summary>
- /// Creates a new KeepWordFilterFactory </summary>
+ /// Creates a new <see cref="KeepWordFilterFactory"/> </summary>
public KeepWordFilterFactory(IDictionary<string, string> args)
: base(args)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/46b02fbc/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeywordMarkerFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeywordMarkerFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeywordMarkerFilter.cs
index ab8a884..83adbda 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeywordMarkerFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeywordMarkerFilter.cs
@@ -2,7 +2,6 @@
namespace Lucene.Net.Analysis.Miscellaneous
{
-
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -19,29 +18,29 @@ namespace Lucene.Net.Analysis.Miscellaneous
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
/// <summary>
/// Marks terms as keywords via the <see cref="KeywordAttribute"/>.
/// </summary>
- /// <seealso cref= KeywordAttribute </seealso>
+ /// <seealso cref="KeywordAttribute"/>
public abstract class KeywordMarkerFilter : TokenFilter
{
-
private readonly IKeywordAttribute keywordAttr;
/// <summary>
/// Creates a new <see cref="KeywordMarkerFilter"/> </summary>
/// <param name="in"> the input stream </param>
- protected internal KeywordMarkerFilter(TokenStream @in)
+ protected KeywordMarkerFilter(TokenStream @in)
: base(@in)
{
keywordAttr = AddAttribute<IKeywordAttribute>();
}
- public override bool IncrementToken()
+ public override sealed bool IncrementToken()
{
if (m_input.IncrementToken())
{
- if (Keyword)
+ if (IsKeyword)
{
keywordAttr.IsKeyword = true;
}
@@ -53,7 +52,6 @@ namespace Lucene.Net.Analysis.Miscellaneous
}
}
- protected internal abstract bool Keyword { get; }
-
+ protected abstract bool IsKeyword { get; } // LUCENENET TODO: Change to IsKeyword() ?
}
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/46b02fbc/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeywordMarkerFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeywordMarkerFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeywordMarkerFilterFactory.cs
index 0070b74..d5054e1 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeywordMarkerFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeywordMarkerFilterFactory.cs
@@ -42,7 +42,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
private CharArraySet protectedWords;
/// <summary>
- /// Creates a new KeywordMarkerFilterFactory </summary>
+ /// Creates a new <see cref="KeywordMarkerFilterFactory"/> </summary>
public KeywordMarkerFilterFactory(IDictionary<string, string> args)
: base(args)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/46b02fbc/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeywordRepeatFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeywordRepeatFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeywordRepeatFilter.cs
index 4cfe3e8..13aa99b 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeywordRepeatFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeywordRepeatFilter.cs
@@ -2,7 +2,6 @@
namespace Lucene.Net.Analysis.Miscellaneous
{
-
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -19,15 +18,15 @@ namespace Lucene.Net.Analysis.Miscellaneous
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
/// <summary>
/// This TokenFilter�emits each incoming token twice once as keyword and once non-keyword, in other words once with
- /// <see cref="KeywordAttribute#setKeyword(boolean)"/> set to <code>true</code> and once set to <code>false</code>.
+ /// <see cref="KeywordAttribute.IsKeyword"/> set to <c>true</c> and once set to <c>false</c>.
/// This is useful if used with a stem filter that respects the <see cref="KeywordAttribute"/> to index the stemmed and the
/// un-stemmed version of a term into the same field.
/// </summary>
public sealed class KeywordRepeatFilter : TokenFilter
{
-
private readonly IKeywordAttribute keywordAttribute;
private readonly IPositionIncrementAttribute posIncAttr;
private State state;
@@ -67,5 +66,4 @@ namespace Lucene.Net.Analysis.Miscellaneous
state = null;
}
}
-
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/46b02fbc/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeywordRepeatFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeywordRepeatFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeywordRepeatFilterFactory.cs
index c34561a..e88fe02 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeywordRepeatFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeywordRepeatFilterFactory.cs
@@ -29,9 +29,8 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// </summary>
public sealed class KeywordRepeatFilterFactory : TokenFilterFactory
{
-
/// <summary>
- /// Creates a new KeywordRepeatFilterFactory </summary>
+ /// Creates a new <see cref="KeywordRepeatFilterFactory"/> </summary>
public KeywordRepeatFilterFactory(IDictionary<string, string> args)
: base(args)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/46b02fbc/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LengthFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LengthFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LengthFilter.cs
index ab19c3a..aeb9434 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LengthFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LengthFilter.cs
@@ -30,7 +30,6 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// </summary>
public sealed class LengthFilter : FilteringTokenFilter
{
-
private readonly int min;
private readonly int max;
@@ -55,8 +54,8 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// <summary>
/// Create a new <see cref="LengthFilter"/>. This will filter out tokens whose
- /// <see cref="CharTermAttribute"/> is either too short (<see cref="CharTermAttribute#length()"/>
- /// < min) or too long (<see cref="CharTermAttribute#length()"/> > max). </summary>
+ /// <see cref="CharTermAttribute"/> is either too short (<see cref="ICharTermAttribute.Length"/>
+ /// < min) or too long (<see cref="ICharTermAttribute.Length"/> > max). </summary>
/// <param name="version"> the Lucene match version </param>
/// <param name="in"> the <see cref="TokenStream"/> to consume </param>
/// <param name="min"> the minimum length </param>
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/46b02fbc/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LengthFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LengthFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LengthFilterFactory.cs
index f206b4b..aaed57d 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LengthFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LengthFilterFactory.cs
@@ -1,5 +1,5 @@
-\ufeffusing System.Collections.Generic;
-using Lucene.Net.Analysis.Util;
+\ufeffusing Lucene.Net.Analysis.Util;
+using System.Collections.Generic;
namespace Lucene.Net.Analysis.Miscellaneous
{
@@ -32,14 +32,14 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// </summary>
public class LengthFilterFactory : TokenFilterFactory
{
- internal readonly int min;
- internal readonly int max;
- internal readonly bool enablePositionIncrements;
+ private readonly int min;
+ private readonly int max;
+ private readonly bool enablePositionIncrements;
public const string MIN_KEY = "min";
public const string MAX_KEY = "max";
/// <summary>
- /// Creates a new LengthFilterFactory </summary>
+ /// Creates a new <see cref="LengthFilterFactory"/> </summary>
public LengthFilterFactory(IDictionary<string, string> args)
: base(args)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/46b02fbc/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenCountAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenCountAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenCountAnalyzer.cs
index 3bafb19..51361b2 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenCountAnalyzer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenCountAnalyzer.cs
@@ -18,9 +18,9 @@
*/
/// <summary>
- /// This Analyzer limits the number of tokens while indexing. It is
- /// a replacement for the maximum field length setting inside <see cref="org.apache.lucene.index.IndexWriter"/>. </summary>
- /// <seealso cref= LimitTokenCountFilter </seealso>
+ /// This <see cref="Analyzer"/> limits the number of tokens while indexing. It is
+ /// a replacement for the maximum field length setting inside <see cref="Index.IndexWriter"/>. </summary>
+ /// <seealso cref="LimitTokenCountFilter"/>
public sealed class LimitTokenCountAnalyzer : AnalyzerWrapper
{
private readonly Analyzer @delegate;
@@ -31,11 +31,12 @@
/// Build an analyzer that limits the maximum number of tokens per field.
/// This analyzer will not consume any tokens beyond the maxTokenCount limit
/// </summary>
- /// <seealso cref= #LimitTokenCountAnalyzer(Analyzer,int,boolean) </seealso>
+ /// <seealso cref="LimitTokenCountAnalyzer(Analyzer,int,bool)"/>
public LimitTokenCountAnalyzer(Analyzer @delegate, int maxTokenCount)
: this(@delegate, maxTokenCount, false)
{
}
+
/// <summary>
/// Build an analyzer that limits the maximum number of tokens per field. </summary>
/// <param name="delegate"> the analyzer to wrap </param>
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/46b02fbc/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenCountFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenCountFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenCountFilter.cs
index 9e4a37c..d7ec875 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenCountFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenCountFilter.cs
@@ -18,23 +18,22 @@
*/
/// <summary>
- /// This TokenFilter limits the number of tokens while indexing. It is
- /// a replacement for the maximum field length setting inside <see cref="org.apache.lucene.index.IndexWriter"/>.
+ /// This <see cref="TokenFilter"/> limits the number of tokens while indexing. It is
+ /// a replacement for the maximum field length setting inside <see cref="Index.IndexWriter"/>.
/// <para>
- /// By default, this filter ignores any tokens in the wrapped {@code TokenStream}
- /// once the limit has been reached, which can result in {@code reset()} being
- /// called prior to {@code incrementToken()} returning {@code false}. For most
- /// {@code TokenStream} implementations this should be acceptable, and faster
- /// then consuming the full stream. If you are wrapping a {@code TokenStream}
+ /// By default, this filter ignores any tokens in the wrapped <see cref="TokenStream"/>
+ /// once the limit has been reached, which can result in <see cref="Reset"/> being
+ /// called prior to <see cref="IncrementToken"/> returning <c>false</c>. For most
+ /// <see cref="TokenStream"/> implementations this should be acceptable, and faster
+ /// then consuming the full stream. If you are wrapping a <see cref="TokenStream"/>
/// which requires that the full stream of tokens be exhausted in order to
/// function properly, use the
- /// <see cref="#LimitTokenCountFilter(TokenStream,int,boolean) consumeAllTokens"/>
+ /// <see cref="LimitTokenCountFilter.LimitTokenCountFilter(TokenStream,int,bool)"/> consumeAllTokens
/// option.
/// </para>
/// </summary>
public sealed class LimitTokenCountFilter : TokenFilter
{
-
private readonly int maxTokenCount;
private readonly bool consumeAllTokens;
private int tokenCount = 0;
@@ -42,9 +41,11 @@
/// <summary>
/// Build a filter that only accepts tokens up to a maximum number.
- /// This filter will not consume any tokens beyond the maxTokenCount limit
+ /// This filter will not consume any tokens beyond the <paramref name="maxTokenCount"/> limit
/// </summary>
- /// <seealso cref= #LimitTokenCountFilter(TokenStream,int,boolean) </seealso>
+ /// <param name="in"> the stream to wrap </param>
+ /// <param name="maxTokenCount"> max number of tokens to produce </param>
+ /// <seealso cref="LimitTokenCountFilter(TokenStream,int,bool)"/>
public LimitTokenCountFilter(TokenStream @in, int maxTokenCount)
: this(@in, maxTokenCount, false)
{
@@ -54,7 +55,7 @@
/// Build an filter that limits the maximum number of tokens per field. </summary>
/// <param name="in"> the stream to wrap </param>
/// <param name="maxTokenCount"> max number of tokens to produce </param>
- /// <param name="consumeAllTokens"> whether all tokens from the input must be consumed even if maxTokenCount is reached. </param>
+ /// <param name="consumeAllTokens"> whether all tokens from the input must be consumed even if <paramref name="maxTokenCount"/> is reached. </param>
public LimitTokenCountFilter(TokenStream @in, int maxTokenCount, bool consumeAllTokens)
: base(@in)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/46b02fbc/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenCountFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenCountFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenCountFilterFactory.cs
index 25b980d..f64981e 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenCountFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenCountFilterFactory.cs
@@ -1,5 +1,5 @@
-\ufeffusing System.Collections.Generic;
-using Lucene.Net.Analysis.Util;
+\ufeffusing Lucene.Net.Analysis.Util;
+using System.Collections.Generic;
namespace Lucene.Net.Analysis.Miscellaneous
{
@@ -30,20 +30,19 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// </analyzer>
/// </fieldType></code>
/// <para>
- /// The {@code consumeAllTokens} property is optional and defaults to {@code false}.
+ /// The <see cref="consumeAllTokens"/> property is optional and defaults to <c>false</c>.
/// See <see cref="LimitTokenCountFilter"/> for an explanation of it's use.
/// </para>
/// </summary>
public class LimitTokenCountFilterFactory : TokenFilterFactory
{
-
public const string MAX_TOKEN_COUNT_KEY = "maxTokenCount";
public const string CONSUME_ALL_TOKENS_KEY = "consumeAllTokens";
- internal readonly int maxTokenCount;
- internal readonly bool consumeAllTokens;
+ private readonly int maxTokenCount;
+ private readonly bool consumeAllTokens;
/// <summary>
- /// Creates a new LimitTokenCountFilterFactory </summary>
+ /// Creates a new <see cref="LimitTokenCountFilterFactory"/> </summary>
public LimitTokenCountFilterFactory(IDictionary<string, string> args)
: base(args)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/46b02fbc/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenPositionFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenPositionFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenPositionFilter.cs
index 008ff97..40c5a34 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenPositionFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenPositionFilter.cs
@@ -20,23 +20,22 @@ namespace Lucene.Net.Analysis.Miscellaneous
*/
/// <summary>
- /// This TokenFilter limits its emitted tokens to those with positions that
+ /// This <see cref="TokenFilter"/> limits its emitted tokens to those with positions that
/// are not greater than the configured limit.
/// <para>
- /// By default, this filter ignores any tokens in the wrapped {@code TokenStream}
- /// once the limit has been exceeded, which can result in {@code reset()} being
- /// called prior to {@code incrementToken()} returning {@code false}. For most
- /// {@code TokenStream} implementations this should be acceptable, and faster
- /// then consuming the full stream. If you are wrapping a {@code TokenStream}
+ /// By default, this filter ignores any tokens in the wrapped <see cref="TokenStream"/>
+ /// once the limit has been exceeded, which can result in <see cref="Reset"/> being
+ /// called prior to <see cref="IncrementToken"/> returning <c>false</c>. For most
+ /// <see cref="TokenStream"/> implementations this should be acceptable, and faster
+ /// then consuming the full stream. If you are wrapping a <see cref="TokenStream"/>
/// which requires that the full stream of tokens be exhausted in order to
/// function properly, use the
- /// <see cref="#LimitTokenPositionFilter(TokenStream,int,boolean) consumeAllTokens"/>
+ /// <see cref="LimitTokenPositionFilter(TokenStream,int,bool)"/> consumeAllTokens
/// option.
/// </para>
/// </summary>
public sealed class LimitTokenPositionFilter : TokenFilter
{
-
private readonly int maxTokenPosition;
private readonly bool consumeAllTokens;
private int tokenPosition = 0;
@@ -45,12 +44,12 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// <summary>
/// Build a filter that only accepts tokens up to and including the given maximum position.
- /// This filter will not consume any tokens with position greater than the maxTokenPosition limit.
+ /// This filter will not consume any tokens with position greater than the <paramref name="maxTokenPosition"/> limit.
/// </summary>
/// <param name="in"> the stream to wrap </param>
/// <param name="maxTokenPosition"> max position of tokens to produce (1st token always has position 1)
/// </param>
- /// <seealso cref= #LimitTokenPositionFilter(TokenStream,int,boolean) </seealso>
+ /// <seealso cref="LimitTokenPositionFilter(TokenStream,int,bool)"/>
public LimitTokenPositionFilter(TokenStream @in, int maxTokenPosition)
: this(@in, maxTokenPosition, false)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/46b02fbc/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenPositionFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenPositionFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenPositionFilterFactory.cs
index e2d7692..6bc1fcd 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenPositionFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenPositionFilterFactory.cs
@@ -1,5 +1,5 @@
-\ufeffusing System.Collections.Generic;
-using Lucene.Net.Analysis.Util;
+\ufeffusing Lucene.Net.Analysis.Util;
+using System.Collections.Generic;
namespace Lucene.Net.Analysis.Miscellaneous
{
@@ -30,20 +30,19 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// </analyzer>
/// </fieldType></code>
/// <para>
- /// The {@code consumeAllTokens} property is optional and defaults to {@code false}.
+ /// The <see cref="consumeAllTokens"/> property is optional and defaults to <c>false</c>.
/// See <see cref="LimitTokenPositionFilter"/> for an explanation of its use.
/// </para>
/// </summary>
public class LimitTokenPositionFilterFactory : TokenFilterFactory
{
-
public const string MAX_TOKEN_POSITION_KEY = "maxTokenPosition";
public const string CONSUME_ALL_TOKENS_KEY = "consumeAllTokens";
- internal readonly int maxTokenPosition;
- internal readonly bool consumeAllTokens;
+ private readonly int maxTokenPosition;
+ private readonly bool consumeAllTokens;
/// <summary>
- /// Creates a new LimitTokenPositionFilterFactory </summary>
+ /// Creates a new <see cref="LimitTokenPositionFilterFactory"/> </summary>
public LimitTokenPositionFilterFactory(IDictionary<string, string> args)
: base(args)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/46b02fbc/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/Lucene47WordDelimiterFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/Lucene47WordDelimiterFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/Lucene47WordDelimiterFilter.cs
index 15bf32a..070ad85 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/Lucene47WordDelimiterFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/Lucene47WordDelimiterFilter.cs
@@ -29,13 +29,6 @@ namespace Lucene.Net.Analysis.Miscellaneous
[Obsolete]
public sealed class Lucene47WordDelimiterFilter : TokenFilter
{
- private void InitializeInstanceFields()
- {
- concat = new WordDelimiterConcatenation(this);
- concatAll = new WordDelimiterConcatenation(this);
- }
-
-
public const int LOWER = 0x01;
public const int UPPER = 0x02;
public const int DIGIT = 0x04;
@@ -110,7 +103,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// If not null is the set of tokens to protect from being delimited
///
/// </summary>
- internal readonly CharArraySet protWords;
+ private readonly CharArraySet protWords;
private readonly int flags;
@@ -149,9 +142,9 @@ namespace Lucene.Net.Analysis.Miscellaneous
private bool hasOutputFollowingOriginal = false;
/// <summary>
- /// Creates a new WordDelimiterFilter
+ /// Creates a new <see cref="Lucene47WordDelimiterFilter"/>
/// </summary>
- /// <param name="in"> TokenStream to be filtered </param>
+ /// <param name="in"> <see cref="TokenStream"/> to be filtered </param>
/// <param name="charTypeTable"> table containing character types </param>
/// <param name="configurationFlags"> Flags configuring the filter </param>
/// <param name="protWords"> If not null is the set of tokens to protect from being delimited </param>
@@ -162,18 +155,19 @@ namespace Lucene.Net.Analysis.Miscellaneous
offsetAttribute = AddAttribute<IOffsetAttribute>();
posIncAttribute = AddAttribute<IPositionIncrementAttribute>();
typeAttribute = AddAttribute<ITypeAttribute>();
+ concat = new WordDelimiterConcatenation(this);
+ concatAll = new WordDelimiterConcatenation(this);
- InitializeInstanceFields();
this.flags = configurationFlags;
this.protWords = protWords;
this.iterator = new WordDelimiterIterator(charTypeTable, Has(SPLIT_ON_CASE_CHANGE), Has(SPLIT_ON_NUMERICS), Has(STEM_ENGLISH_POSSESSIVE));
}
/// <summary>
- /// Creates a new WordDelimiterFilter using <see cref="WordDelimiterIterator#DEFAULT_WORD_DELIM_TABLE"/>
+ /// Creates a new <see cref="Lucene47WordDelimiterFilter"/> using <see cref="WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE"/>
/// as its charTypeTable
/// </summary>
- /// <param name="in"> TokenStream to be filtered </param>
+ /// <param name="in"> <see cref="TokenStream"/> to be filtered </param>
/// <param name="configurationFlags"> Flags configuring the filter </param>
/// <param name="protWords"> If not null is the set of tokens to protect from being delimited </param>
public Lucene47WordDelimiterFilter(TokenStream @in, int configurationFlags, CharArraySet protWords)
@@ -237,7 +231,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
// at the end of the string, output any concatenations
if (iterator.end == WordDelimiterIterator.DONE)
{
- if (!concat.Empty)
+ if (!concat.IsEmpty)
{
if (FlushConcatenation(concat))
{
@@ -245,7 +239,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
}
}
- if (!concatAll.Empty)
+ if (!concatAll.IsEmpty)
{
// only if we haven't output this same combo above!
if (concatAll.subwordCount > lastConcatCount)
@@ -272,7 +266,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
int wordType = iterator.Type;
// do we already have queued up incompatible concatenations?
- if (!concat.Empty && (concat.type & wordType) == 0)
+ if (!concat.IsEmpty && (concat.type & wordType) == 0)
{
if (FlushConcatenation(concat))
{
@@ -285,7 +279,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
// add subwords depending upon options
if (ShouldConcatenate(wordType))
{
- if (concat.Empty)
+ if (concat.IsEmpty)
{
concat.type = wordType;
}
@@ -311,8 +305,21 @@ namespace Lucene.Net.Analysis.Miscellaneous
}
/// <summary>
- /// {@inheritDoc}
+ /// This method is called by a consumer before it begins consumption using
+ /// <see cref="IncrementToken()"/>.
+ /// <para/>
+ /// Resets this stream to a clean state. Stateful implementations must implement
+ /// this method so that they can be reused, just as if they had been created fresh.
+ /// <para/>
+ /// If you override this method, always call <c>base.Reset()</c>, otherwise
+ /// some internal state will not be correctly reset (e.g., <see cref="Tokenizer"/> will
+ /// throw <see cref="System.InvalidOperationException"/> on further usage).
/// </summary>
+ /// <remarks>
+ /// <b>NOTE:</b>
+ /// The default implementation chains the call to the input <see cref="TokenStream"/>, so
+ /// be sure to call <c>base.Reset()</c> when overriding this method.
+ /// </remarks>
public override void Reset()
{
base.Reset();
@@ -348,10 +355,10 @@ namespace Lucene.Net.Analysis.Miscellaneous
}
/// <summary>
- /// Flushes the given WordDelimiterConcatenation by either writing its concat and then clearing, or just clearing.
+ /// Flushes the given <see cref="WordDelimiterConcatenation"/> by either writing its concat and then clearing, or just clearing.
/// </summary>
- /// <param name="concatenation"> WordDelimiterConcatenation that will be flushed </param>
- /// <returns> {@code true} if the concatenation was written before it was cleared, {@code false} otherwise </returns>
+ /// <param name="concatenation"> <see cref="WordDelimiterConcatenation"/> that will be flushed </param>
+ /// <returns> <c>true</c> if the concatenation was written before it was cleared, <c>false</c> otherwise </returns>
private bool FlushConcatenation(WordDelimiterConcatenation concatenation)
{
lastConcatCount = concatenation.subwordCount;
@@ -368,7 +375,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// Determines whether to concatenate a word or number if the current word is the given type
/// </summary>
/// <param name="wordType"> Type of the current word used to determine if it should be concatenated </param>
- /// <returns> {@code true} if concatenation should occur, {@code false} otherwise </returns>
+ /// <returns> <c>true</c> if concatenation should occur, <c>false</c> otherwise </returns>
private bool ShouldConcatenate(int wordType)
{
return (Has(CATENATE_WORDS) && IsAlpha(wordType)) || (Has(CATENATE_NUMBERS) && IsDigit(wordType));
@@ -378,7 +385,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// Determines whether a word/number part should be generated for a word of the given type
/// </summary>
/// <param name="wordType"> Type of the word used to determine if a word/number part should be generated </param>
- /// <returns> {@code true} if a word/number part should be generated, {@code false} otherwise </returns>
+ /// <returns> <c>true</c> if a word/number part should be generated, <c>false</c> otherwise </returns>
private bool ShouldGenerateParts(int wordType)
{
return (Has(GENERATE_WORD_PARTS) && IsAlpha(wordType)) || (Has(GENERATE_NUMBER_PARTS) && IsDigit(wordType));
@@ -390,7 +397,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// <param name="concatenation"> WordDelimiterConcatenation to concatenate the buffer to </param>
private void Concatenate(WordDelimiterConcatenation concatenation)
{
- if (concatenation.Empty)
+ if (concatenation.IsEmpty)
{
concatenation.startOffset = savedStartOffset + iterator.current;
}
@@ -401,7 +408,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// <summary>
/// Generates a word/number part, updating the appropriate attributes
/// </summary>
- /// <param name="isSingleWord"> {@code true} if the generation is occurring from a single word, {@code false} otherwise </param>
+ /// <param name="isSingleWord"> <c>true</c> if the generation is occurring from a single word, <c>false</c> otherwise </param>
private void GeneratePart(bool isSingleWord)
{
ClearAttributes();
@@ -460,41 +467,41 @@ namespace Lucene.Net.Analysis.Miscellaneous
}
/// <summary>
- /// Checks if the given word type includes <see cref="#ALPHA"/>
+ /// Checks if the given word type includes <see cref="ALPHA"/>
/// </summary>
/// <param name="type"> Word type to check </param>
- /// <returns> {@code true} if the type contains ALPHA, {@code false} otherwise </returns>
- internal static bool IsAlpha(int type)
+ /// <returns> <c>true</c> if the type contains <see cref="ALPHA"/>, <c>false</c> otherwise </returns>
+ private static bool IsAlpha(int type)
{
return (type & ALPHA) != 0;
}
/// <summary>
- /// Checks if the given word type includes <see cref="#DIGIT"/>
+ /// Checks if the given word type includes <see cref="DIGIT"/>
/// </summary>
/// <param name="type"> Word type to check </param>
- /// <returns> {@code true} if the type contains DIGIT, {@code false} otherwise </returns>
- internal static bool IsDigit(int type)
+ /// <returns> <c>true</c> if the type contains <see cref="DIGIT"/>, <c>false</c> otherwise </returns>
+ private static bool IsDigit(int type)
{
return (type & DIGIT) != 0;
}
/// <summary>
- /// Checks if the given word type includes <see cref="#SUBWORD_DELIM"/>
+ /// Checks if the given word type includes <see cref="SUBWORD_DELIM"/>
/// </summary>
/// <param name="type"> Word type to check </param>
- /// <returns> {@code true} if the type contains SUBWORD_DELIM, {@code false} otherwise </returns>
- internal static bool IsSubwordDelim(int type)
+ /// <returns> <c>true</c> if the type contains <see cref="SUBWORD_DELIM"/>, <c>false</c> otherwise </returns>
+ private static bool IsSubwordDelim(int type)
{
return (type & SUBWORD_DELIM) != 0;
}
/// <summary>
- /// Checks if the given word type includes <see cref="#UPPER"/>
+ /// Checks if the given word type includes <see cref="UPPER"/>
/// </summary>
/// <param name="type"> Word type to check </param>
- /// <returns> {@code true} if the type contains UPPER, {@code false} otherwise </returns>
- internal static bool IsUpper(int type)
+ /// <returns> <c>true</c> if the type contains <see cref="UPPER"/>, <c>false</c> otherwise </returns>
+ private static bool IsUpper(int type)
{
return (type & UPPER) != 0;
}
@@ -503,7 +510,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// Determines whether the given flag is set
/// </summary>
/// <param name="flag"> Flag to see if set </param>
- /// <returns> {@code true} if flag is set </returns>
+ /// <returns> <c>true</c> if flag is set </returns>
private bool Has(int flag)
{
return (flags & flag) != 0;
@@ -573,8 +580,8 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// <summary>
/// Determines if the concatenation is empty
/// </summary>
- /// <returns> {@code true} if the concatenation is empty, {@code false} otherwise </returns>
- internal bool Empty
+ /// <returns> <c>true</c> if the concatenation is empty, <c>false</c> otherwise </returns>
+ internal bool IsEmpty
{
get
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/46b02fbc/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternAnalyzer.cs
index e33b446..a01625b 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternAnalyzer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternAnalyzer.cs
@@ -27,9 +27,9 @@ namespace Lucene.Net.Analysis.Miscellaneous
*/
/// <summary>
- /// Efficient Lucene analyzer/tokenizer that preferably operates on a String rather than a
- /// <see cref="TextReader"/>, that can flexibly separate text into terms via a regular expression <see cref="Pattern"/>
- /// (with behaviour identical to <see cref="String#split(String)"/>),
+ /// Efficient Lucene analyzer/tokenizer that preferably operates on a <see cref="string"/> rather than a
+ /// <see cref="TextReader"/>, that can flexibly separate text into terms via a regular expression <see cref="Regex"/>
+ /// (with behaviour similar to <see cref="string.Split(string)"/>),
/// and that combines the functionality of
/// <see cref="LetterTokenizer"/>,
/// <see cref="LowerCaseTokenizer"/>,
@@ -39,9 +39,9 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// <para>
/// If you are unsure how exactly a regular expression should look like, consider
/// prototyping by simply trying various expressions on some test texts via
- /// <see cref="String#split(String)"/>. Once you are satisfied, give that regex to
- /// PatternAnalyzer. Also see <a target="_blank"
- /// href="http://java.sun.com/docs/books/tutorial/extra/regex/">Java Regular Expression Tutorial</a>.
+ /// <see cref="string.Split(string)"/>. Once you are satisfied, give that regex to
+ /// <see cref="PatternAnalyzer"/>. Also see <a target="_blank"
+ /// href="http://www.regular-expressions.info/">Regular Expression Tutorial</a>.
/// </para>
/// <para>
/// This class can be considerably faster than the "normal" Lucene tokenizers.
@@ -51,7 +51,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// <pre>
/// PatternAnalyzer pat = ...
/// TokenStream tokenStream = new SnowballFilter(
- /// pat.tokenStream("content", "James is running round in the woods"),
+ /// pat.GetTokenStream("content", "James is running round in the woods"),
/// "English"));
/// </code>
/// </para>
@@ -60,13 +60,12 @@ namespace Lucene.Net.Analysis.Miscellaneous
[Obsolete("(4.0) use the pattern-based analysis in the analysis/pattern package instead.")]
public sealed class PatternAnalyzer : Analyzer
{
-
/// <summary>
- /// <code>"\\W+"</code>; Divides text at non-letters (NOT Character.isLetter(c)) </summary>
+ /// <c>"\\W+"</c>; Divides text at non-letters (NOT Character.isLetter(c)) </summary>
public static readonly Regex NON_WORD_PATTERN = new Regex("\\W+", RegexOptions.Compiled);
/// <summary>
- /// <code>"\\s+"</code>; Divides text at whitespaces (Character.isWhitespace(c)) </summary>
+ /// <c>"\\s+"</c>; Divides text at whitespaces (Character.isWhitespace(c)) </summary>
public static readonly Regex WHITESPACE_PATTERN = new Regex("\\s+", RegexOptions.Compiled);
private static readonly CharArraySet EXTENDED_ENGLISH_STOP_WORDS =
@@ -124,7 +123,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
LuceneVersion.LUCENE_CURRENT, NON_WORD_PATTERN, true, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
/// <summary>
- /// A lower-casing word analyzer with <b>extended </b> English stop words
+ /// A lower-casing word analyzer with <b>extended</b> English stop words
/// (can be shared freely across threads without harm); global per class
/// loader. The stop words are borrowed from
/// http://thomas.loc.gov/home/stopwords.html, see
@@ -220,8 +219,8 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// <summary>
/// Creates a token stream that tokenizes all the text in the given SetReader;
- /// This implementation forwards to <code>tokenStream(String, TextReader, String)</code> and is
- /// less efficient than <code>tokenStream(String, TextReader, String)</code>.
+ /// This implementation forwards to <see cref="Analyzer.TokenStream(string, TextReader)"/> and is
+ /// less efficient than <see cref="Analyzer.TokenStream(string, TextReader)"/>.
/// </summary>
/// <param name="fieldName">
/// the name of the field to tokenize (currently ignored). </param>
@@ -586,13 +585,12 @@ namespace Lucene.Net.Analysis.Miscellaneous
// Nested classes:
///////////////////////////////////////////////////////////////////////////////
/// <summary>
- /// A StringReader that exposes it's contained string for fast direct access.
- /// Might make sense to generalize this to CharSequence and make it public?
+ /// A <see cref="StringReader"/> that exposes it's contained string for fast direct access.
+ /// Might make sense to generalize this to ICharSequence and make it public?
/// </summary>
internal sealed class FastStringReader : StringReader
{
-
- internal readonly string s;
+ private readonly string s;
internal FastStringReader(string s)
: base(s)
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/46b02fbc/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternKeywordMarkerFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternKeywordMarkerFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternKeywordMarkerFilter.cs
index 200a934..f9c8898 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternKeywordMarkerFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternKeywordMarkerFilter.cs
@@ -34,10 +34,10 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// <summary>
/// Create a new <see cref="PatternKeywordMarkerFilter"/>, that marks the current
/// token as a keyword if the tokens term buffer matches the provided
- /// <see cref="Pattern"/> via the <see cref="KeywordAttribute"/>.
+ /// <see cref="Regex"/> via the <see cref="KeywordAttribute"/>.
/// </summary>
/// <param name="in">
- /// TokenStream to filter </param>
+ /// <see cref="TokenStream"/> to filter </param>
/// <param name="pattern">
/// the pattern to apply to the incoming term buffer
/// </param>
@@ -50,7 +50,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
this.pattern = pattern;
}
- protected internal override bool Keyword
+ protected override bool IsKeyword
{
get
{
@@ -58,6 +58,5 @@ namespace Lucene.Net.Analysis.Miscellaneous
return matcher.Success;
}
}
-
}
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/46b02fbc/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PerFieldAnalyzerWrapper.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PerFieldAnalyzerWrapper.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PerFieldAnalyzerWrapper.cs
index 32e9fa0..862f2a2 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PerFieldAnalyzerWrapper.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PerFieldAnalyzerWrapper.cs
@@ -2,7 +2,6 @@
namespace Lucene.Net.Analysis.Miscellaneous
{
-
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -20,31 +19,26 @@ namespace Lucene.Net.Analysis.Miscellaneous
* limitations under the License.
*/
-
-
/// <summary>
/// This analyzer is used to facilitate scenarios where different
/// fields Require different analysis techniques. Use the Map
- /// argument in <see cref="#PerFieldAnalyzerWrapper(Analyzer, java.util.Map)"/>
+ /// argument in <see cref="PerFieldAnalyzerWrapper(Analyzer, IDictionary{string, Analyzer})"/>
/// to add non-default analyzers for fields.
///
/// <para>Example usage:
///
/// <code>
- /// {@code
- /// Map<String,Analyzer> analyzerPerField = new HashMap<>();
- /// analyzerPerField.put("firstname", new KeywordAnalyzer());
- /// analyzerPerField.put("lastname", new KeywordAnalyzer());
+ /// IDictionary<string, Analyzer> analyzerPerField = new Dictionary<string, Analyzer>();
+ /// analyzerPerField["firstname"] = new KeywordAnalyzer();
+ /// analyzerPerField["lastname"] = new KeywordAnalyzer();
///
/// PerFieldAnalyzerWrapper aWrapper =
/// new PerFieldAnalyzerWrapper(new StandardAnalyzer(version), analyzerPerField);
- /// }
/// </code>
- ///
/// </para>
- /// <para>In this example, StandardAnalyzer will be used for all fields except "firstname"
- /// and "lastname", for which KeywordAnalyzer will be used.
- ///
+ /// <para>
+ /// In this example, <see cref="Standard.StandardAnalyzer"/> will be used for all fields except "firstname"
+ /// and "lastname", for which <see cref="Core.KeywordAnalyzer"/> will be used.
/// </para>
/// <para>A PerFieldAnalyzerWrapper can be used like any other analyzer, for both indexing
/// and query parsing.
@@ -71,7 +65,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// </summary>
/// <param name="defaultAnalyzer"> Any fields not specifically
/// defined to use a different analyzer will use the one provided here. </param>
- /// <param name="fieldAnalyzers"> a Map (String field name to the Analyzer) to be
+ /// <param name="fieldAnalyzers"> a <see cref="IDictionary{TKey, TValue}"/> (String field name to the Analyzer) to be
/// used for those fields </param>
public PerFieldAnalyzerWrapper(Analyzer defaultAnalyzer, IDictionary<string, Analyzer> fieldAnalyzers)
: base(PER_FIELD_REUSE_STRATEGY)
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/46b02fbc/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PrefixAndSuffixAwareTokenFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PrefixAndSuffixAwareTokenFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PrefixAndSuffixAwareTokenFilter.cs
index f968659..2479b33 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PrefixAndSuffixAwareTokenFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PrefixAndSuffixAwareTokenFilter.cs
@@ -19,16 +19,17 @@
/// <summary>
/// Links two <see cref="PrefixAwareTokenFilter"/>.
- /// <p/>
- /// <b>NOTE:</b> This filter might not behave correctly if used with custom Attributes, i.e. Attributes other than
- /// the ones located in org.apache.lucene.analysis.tokenattributes.
+ /// <para/>
+ /// <b>NOTE:</b> This filter might not behave correctly if used with custom
+ /// <see cref="Lucene.Net.Util.IAttribute"/>s, i.e. <see cref="Lucene.Net.Util.IAttribute"/>s other than
+ /// the ones located in Lucene.Net.Analysis.TokenAttributes.
/// </summary>
public class PrefixAndSuffixAwareTokenFilter : TokenStream
{
-
private readonly PrefixAwareTokenFilter suffix;
- public PrefixAndSuffixAwareTokenFilter(TokenStream prefix, TokenStream input, TokenStream suffix) : base(suffix)
+ public PrefixAndSuffixAwareTokenFilter(TokenStream prefix, TokenStream input, TokenStream suffix)
+ : base(suffix)
{
prefix = new PrefixAwareTokenFilterAnonymousInnerClassHelper(this, prefix, input);
this.suffix = new PrefixAwareTokenFilterAnonymousInnerClassHelper2(this, prefix, suffix);
@@ -38,7 +39,8 @@
{
private readonly PrefixAndSuffixAwareTokenFilter outerInstance;
- public PrefixAwareTokenFilterAnonymousInnerClassHelper(PrefixAndSuffixAwareTokenFilter outerInstance, TokenStream prefix, TokenStream input) : base(prefix, input)
+ public PrefixAwareTokenFilterAnonymousInnerClassHelper(PrefixAndSuffixAwareTokenFilter outerInstance, TokenStream prefix, TokenStream input)
+ : base(prefix, input)
{
this.outerInstance = outerInstance;
}
@@ -53,7 +55,8 @@
{
private readonly PrefixAndSuffixAwareTokenFilter outerInstance;
- public PrefixAwareTokenFilterAnonymousInnerClassHelper2(PrefixAndSuffixAwareTokenFilter outerInstance, TokenStream prefix, TokenStream suffix) : base(prefix, suffix)
+ public PrefixAwareTokenFilterAnonymousInnerClassHelper2(PrefixAndSuffixAwareTokenFilter outerInstance, TokenStream prefix, TokenStream suffix)
+ : base(prefix, suffix)
{
this.outerInstance = outerInstance;
}
@@ -86,7 +89,6 @@
suffix.Reset();
}
-
public override void Dispose()
{
suffix.Dispose();
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/46b02fbc/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PrefixAwareTokenFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PrefixAwareTokenFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PrefixAwareTokenFilter.cs
index 652aec0..cef54d0 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PrefixAwareTokenFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PrefixAwareTokenFilter.cs
@@ -25,13 +25,13 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// to be used when updating the token values in the second stream based on that token.
///
/// The default implementation adds last prefix token end offset to the suffix token start and end offsets.
- /// <p/>
- /// <b>NOTE:</b> This filter might not behave correctly if used with custom Attributes, i.e. Attributes other than
- /// the ones located in org.apache.lucene.analysis.tokenattributes.
+ /// <para/>
+ /// <b>NOTE:</b> This filter might not behave correctly if used with custom
+ /// <see cref="Lucene.Net.Util.IAttribute"/>s, i.e. <see cref="Lucene.Net.Util.IAttribute"/>s other than
+ /// the ones located in Lucene.Net.Analysis.TokenAttributes.
/// </summary>
public class PrefixAwareTokenFilter : TokenStream
{
-
private TokenStream prefix;
private TokenStream suffix;
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/46b02fbc/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/RemoveDuplicatesTokenFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/RemoveDuplicatesTokenFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/RemoveDuplicatesTokenFilter.cs
index 8f3fb20..d93b7a8 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/RemoveDuplicatesTokenFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/RemoveDuplicatesTokenFilter.cs
@@ -23,11 +23,10 @@ namespace Lucene.Net.Analysis.Miscellaneous
*/
/// <summary>
- /// A TokenFilter which filters out Tokens at the same position and Term text as the previous token in the stream.
+ /// A <see cref="TokenFilter"/> which filters out <see cref="Token"/>s at the same position and Term text as the previous token in the stream.
/// </summary>
public sealed class RemoveDuplicatesTokenFilter : TokenFilter
{
-
private readonly ICharTermAttribute termAttribute;
private readonly IPositionIncrementAttribute posIncAttribute;
@@ -49,8 +48,28 @@ namespace Lucene.Net.Analysis.Miscellaneous
}
/// <summary>
- /// {@inheritDoc}
+ /// Consumers (i.e., <see cref="Index.IndexWriter"/>) use this method to advance the stream to
+ /// the next token. Implementing classes must implement this method and update
+ /// the appropriate <see cref="Lucene.Net.Util.IAttribute"/>s with the attributes of the next
+ /// token.
+ /// <para/>
+ /// The producer must make no assumptions about the attributes after the method
+ /// has been returned: the caller may arbitrarily change it. If the producer
+ /// needs to preserve the state for subsequent calls, it can use
+ /// <see cref="AttributeSource.CaptureState"/> to create a copy of the current attribute state.
+ /// <para/>
+ /// this method is called for every token of a document, so an efficient
+ /// implementation is crucial for good performance. To avoid calls to
+ /// <see cref="AttributeSource.AddAttribute{T}"/> and <see cref="AttributeSource.GetAttribute{T}"/>,
+ /// references to all <see cref="Lucene.Net.Util.IAttribute"/>s that this stream uses should be
+ /// retrieved during instantiation.
+ /// <para/>
+ /// To ensure that filters and consumers know which attributes are available,
+ /// the attributes must be added during instantiation. Filters and consumers
+ /// are not required to check for availability of attributes in
+ /// <see cref="IncrementToken()"/>.
/// </summary>
+ /// <returns> false for end of stream; true otherwise </returns>
public override sealed bool IncrementToken()
{
while (m_input.IncrementToken())
@@ -80,7 +99,15 @@ namespace Lucene.Net.Analysis.Miscellaneous
}
/// <summary>
- /// {@inheritDoc}
+ /// This method is called by a consumer before it begins consumption using
+ /// <see cref="IncrementToken()"/>.
+ /// <para/>
+ /// Resets this stream to a clean state. Stateful implementations must implement
+ /// this method so that they can be reused, just as if they had been created fresh.
+ /// <para/>
+ /// If you override this method, always call <c>base.Reset()</c>, otherwise
+ /// some internal state will not be correctly reset (e.g., <see cref="Tokenizer"/> will
+ /// throw <see cref="InvalidOperationException"/> on further usage).
/// </summary>
public override void Reset()
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/46b02fbc/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/RemoveDuplicatesTokenFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/RemoveDuplicatesTokenFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/RemoveDuplicatesTokenFilterFactory.cs
index 1554866..1aedae4 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/RemoveDuplicatesTokenFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/RemoveDuplicatesTokenFilterFactory.cs
@@ -32,9 +32,8 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// </summary>
public class RemoveDuplicatesTokenFilterFactory : TokenFilterFactory
{
-
/// <summary>
- /// Creates a new RemoveDuplicatesTokenFilterFactory </summary>
+ /// Creates a new <see cref="RemoveDuplicatesTokenFilterFactory"/> </summary>
public RemoveDuplicatesTokenFilterFactory(IDictionary<string, string> args) : base(args)
{
if (args.Count > 0)
[03/11] lucenenet git commit: Lucene.Net.Analysis.It refactor: member
accessibility and documentation comments
Posted by ni...@apache.org.
Lucene.Net.Analysis.It refactor: member accessibility and documentation comments
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/0b3f976b
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/0b3f976b
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/0b3f976b
Branch: refs/heads/api-work
Commit: 0b3f976bcd72cd302c4f3586dfb23c29a50a3496
Parents: 6d272fe
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Fri Feb 3 17:45:31 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Fri Feb 3 17:45:31 2017 +0700
----------------------------------------------------------------------
.../Analysis/It/ItalianAnalyzer.cs | 32 +++++++++-----------
.../Analysis/It/ItalianLightStemFilter.cs | 4 +--
.../It/ItalianLightStemFilterFactory.cs | 6 ++--
.../Analysis/It/ItalianLightStemmer.cs | 3 +-
4 files changed, 20 insertions(+), 25 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0b3f976b/src/Lucene.Net.Analysis.Common/Analysis/It/ItalianAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/It/ItalianAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/It/ItalianAnalyzer.cs
index be81d75..e9ccce6 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/It/ItalianAnalyzer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/It/ItalianAnalyzer.cs
@@ -31,16 +31,13 @@ namespace Lucene.Net.Analysis.It
/// <summary>
/// <see cref="Analyzer"/> for Italian.
- /// <para>
- /// <a name="version"/>
- /// </para>
/// <para>You must specify the required <see cref="LuceneVersion"/>
- /// compatibility when creating ItalianAnalyzer:
- /// <ul>
- /// <li> As of 3.6, ItalianLightStemFilter is used for less aggressive stemming.
- /// <li> As of 3.2, ElisionFilter with a set of Italian
- /// contractions is used by default.
- /// </ul>
+ /// compatibility when creating <see cref="ItalianAnalyzer"/>:
+ /// <list type="bullet">
+ /// <item> As of 3.6, <see cref="ItalianLightStemFilter"/> is used for less aggressive stemming.</item>
+ /// <item> As of 3.2, <see cref="ElisionFilter"/> with a set of Italian
+ /// contractions is used by default.</item>
+ /// </list>
/// </para>
/// </summary>
public sealed class ItalianAnalyzer : StopwordAnalyzerBase
@@ -70,7 +67,7 @@ namespace Lucene.Net.Analysis.It
}
/// <summary>
- /// Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class
+ /// Atomically loads the <see cref="DEFAULT_STOP_SET"/> in a lazy fashion once the outer class
/// accesses the static final set the first time.;
/// </summary>
private class DefaultSetHolder
@@ -98,10 +95,11 @@ namespace Lucene.Net.Analysis.It
}
/// <summary>
- /// Builds an analyzer with the default stop words: <see cref="#DEFAULT_STOPWORD_FILE"/>.
+ /// Builds an analyzer with the default stop words: <see cref="DEFAULT_STOPWORD_FILE"/>.
/// </summary>
+ /// <param name="matchVersion"> lucene compatibility version </param>
public ItalianAnalyzer(LuceneVersion matchVersion)
- : this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET)
+ : this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET)
{
}
@@ -111,7 +109,7 @@ namespace Lucene.Net.Analysis.It
/// <param name="matchVersion"> lucene compatibility version </param>
/// <param name="stopwords"> a stopword set </param>
public ItalianAnalyzer(LuceneVersion matchVersion, CharArraySet stopwords)
- : this(matchVersion, stopwords, CharArraySet.EMPTY_SET)
+ : this(matchVersion, stopwords, CharArraySet.EMPTY_SET)
{
}
@@ -124,7 +122,7 @@ namespace Lucene.Net.Analysis.It
/// <param name="stopwords"> a stopword set </param>
/// <param name="stemExclusionSet"> a set of terms not to be stemmed </param>
public ItalianAnalyzer(LuceneVersion matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet)
- : base(matchVersion, stopwords)
+ : base(matchVersion, stopwords)
{
this.stemExclusionSet = CharArraySet.UnmodifiableSet(CharArraySet.Copy(matchVersion, stemExclusionSet));
}
@@ -132,13 +130,13 @@ namespace Lucene.Net.Analysis.It
/// <summary>
/// Creates a
/// <see cref="Analyzer.TokenStreamComponents"/>
- /// which tokenizes all the text in the provided <see cref="Reader"/>.
+ /// which tokenizes all the text in the provided <see cref="TextReader"/>.
/// </summary>
/// <returns> A
/// <see cref="Analyzer.TokenStreamComponents"/>
/// built from an <see cref="StandardTokenizer"/> filtered with
- /// <see cref="StandardFilter"/>, <see cref="ElisionFilter"/>, <see cref="LowerCaseFilter"/>, <see cref="StopFilter"/>
- /// , <see cref="SetKeywordMarkerFilter"/> if a stem exclusion set is
+ /// <see cref="StandardFilter"/>, <see cref="ElisionFilter"/>, <see cref="LowerCaseFilter"/>, <see cref="StopFilter"/>,
+ /// <see cref="SetKeywordMarkerFilter"/> if a stem exclusion set is
/// provided and <see cref="ItalianLightStemFilter"/>. </returns>
protected override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0b3f976b/src/Lucene.Net.Analysis.Common/Analysis/It/ItalianLightStemFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/It/ItalianLightStemFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/It/ItalianLightStemFilter.cs
index f86d45d..ca692e1 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/It/ItalianLightStemFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/It/ItalianLightStemFilter.cs
@@ -25,7 +25,7 @@ namespace Lucene.Net.Analysis.It
/// words.
/// <para>
/// To prevent terms from being stemmed use an instance of
- /// <see cref="SetKeywordMarkerFilter"/> or a custom <see cref="TokenFilter"/> that sets
+ /// <see cref="Miscellaneous.SetKeywordMarkerFilter"/> or a custom <see cref="TokenFilter"/> that sets
/// the <see cref="KeywordAttribute"/> before this <see cref="TokenStream"/>.
/// </para>
/// </summary>
@@ -36,7 +36,7 @@ namespace Lucene.Net.Analysis.It
private readonly IKeywordAttribute keywordAttr;
public ItalianLightStemFilter(TokenStream input)
- : base(input)
+ : base(input)
{
termAtt = AddAttribute<ICharTermAttribute>();
keywordAttr = AddAttribute<IKeywordAttribute>();
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0b3f976b/src/Lucene.Net.Analysis.Common/Analysis/It/ItalianLightStemFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/It/ItalianLightStemFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/It/ItalianLightStemFilterFactory.cs
index 1ea8ee5..5dd8080 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/It/ItalianLightStemFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/It/ItalianLightStemFilterFactory.cs
@@ -1,6 +1,5 @@
\ufeffusing Lucene.Net.Analysis.Util;
using System.Collections.Generic;
-using System.IO;
namespace Lucene.Net.Analysis.It
{
@@ -34,11 +33,10 @@ namespace Lucene.Net.Analysis.It
/// </summary>
public class ItalianLightStemFilterFactory : TokenFilterFactory
{
-
/// <summary>
- /// Creates a new ItalianLightStemFilterFactory </summary>
+ /// Creates a new <see cref="ItalianLightStemFilterFactory"/> </summary>
public ItalianLightStemFilterFactory(IDictionary<string, string> args)
- : base(args)
+ : base(args)
{
if (args.Count > 0)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0b3f976b/src/Lucene.Net.Analysis.Common/Analysis/It/ItalianLightStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/It/ItalianLightStemmer.cs b/src/Lucene.Net.Analysis.Common/Analysis/It/ItalianLightStemmer.cs
index 84c9a83..9eb799d 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/It/ItalianLightStemmer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/It/ItalianLightStemmer.cs
@@ -58,13 +58,12 @@ namespace Lucene.Net.Analysis.It
/// Light Stemmer for Italian.
/// <para>
/// This stemmer implements the algorithm described in:
- /// <i>Report on CLEF-2001 Experiments</i>
+ /// <c>Report on CLEF-2001 Experiments</c>
/// Jacques Savoy
/// </para>
/// </summary>
public class ItalianLightStemmer
{
-
public virtual int Stem(char[] s, int len)
{
if (len < 6)
[08/11] lucenenet git commit: Lucene.Net.Analysis.Ngram refactor:
member accessibility and documentation comments
Posted by ni...@apache.org.
Lucene.Net.Analysis.Ngram refactor: member accessibility and documentation comments
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/269da1ef
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/269da1ef
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/269da1ef
Branch: refs/heads/api-work
Commit: 269da1ef4ecb679c0e13c914fab3f60c175d9466
Parents: d4b9c00
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Sat Feb 4 00:01:15 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Sat Feb 4 00:01:15 2017 +0700
----------------------------------------------------------------------
.../Analysis/Ngram/EdgeNGramFilterFactory.cs | 4 +-
.../Analysis/Ngram/EdgeNGramTokenFilter.cs | 29 ++---
.../Analysis/Ngram/EdgeNGramTokenizer.cs | 31 +++---
.../Analysis/Ngram/EdgeNGramTokenizerFactory.cs | 4 +-
.../Ngram/Lucene43EdgeNGramTokenizer.cs | 47 ++++----
.../Analysis/Ngram/Lucene43NGramTokenizer.cs | 12 +--
.../Analysis/Ngram/NGramFilterFactory.cs | 4 +-
.../Analysis/Ngram/NGramTokenFilter.cs | 33 +++---
.../Analysis/Ngram/NGramTokenizer.cs | 107 +++++++++++++------
.../Analysis/Ngram/NGramTokenizerFactory.cs | 4 +-
.../Analysis/Ngram/NGramTokenizerTest.cs | 2 +-
11 files changed, 161 insertions(+), 116 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/269da1ef/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramFilterFactory.cs
index 2efb5fc..2e3e0ed 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramFilterFactory.cs
@@ -1,7 +1,7 @@
\ufeffusing Lucene.Net.Analysis.Util;
using System.Collections.Generic;
-namespace Lucene.Net.Analysis.Ngram
+namespace Lucene.Net.Analysis.Ngram // LUCENENET TODO: Change namespace, directory, and Git to NGram
{
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
@@ -37,7 +37,7 @@ namespace Lucene.Net.Analysis.Ngram
private readonly string side;
/// <summary>
- /// Creates a new EdgeNGramFilterFactory </summary>
+ /// Creates a new <see cref="EdgeNGramFilterFactory"/> </summary>
public EdgeNGramFilterFactory(IDictionary<string, string> args)
: base(args)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/269da1ef/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramTokenFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramTokenFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramTokenFilter.cs
index 01677cf..4c1fff1 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramTokenFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramTokenFilter.cs
@@ -1,9 +1,9 @@
-\ufeffusing System;
-using Lucene.Net.Analysis.TokenAttributes;
+\ufeffusing Lucene.Net.Analysis.TokenAttributes;
using Lucene.Net.Analysis.Util;
using Lucene.Net.Util;
+using System;
-namespace Lucene.Net.Analysis.Ngram
+namespace Lucene.Net.Analysis.Ngram // LUCENENET TODO: Change namespace, directory, and Git to NGram
{
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
@@ -27,8 +27,8 @@ namespace Lucene.Net.Analysis.Ngram
/// <para>
/// This <see cref="TokenFilter"/> create n-grams from the beginning edge or ending edge of a input token.
/// </para>
- /// <para><a name="version"/>As of Lucene 4.4, this filter does not support
- /// <see cref="Side#BACK"/> (you can use <see cref="ReverseStringFilter"/> up-front and
+ /// <para>As of Lucene 4.4, this filter does not support
+ /// <see cref="Side.BACK"/> (you can use <see cref="Reverse.ReverseStringFilter"/> up-front and
/// afterward to get the same behavior), handles supplementary characters
/// correctly and does not update offsets anymore.
/// </para>
@@ -43,7 +43,6 @@ namespace Lucene.Net.Analysis.Ngram
/// Specifies which side of the input the n-gram should be generated from </summary>
public enum Side
{
-
/// <summary>
/// Get the n-gram from the front of the input </summary>
FRONT,
@@ -54,7 +53,9 @@ namespace Lucene.Net.Analysis.Ngram
BACK,
}
- // Get the appropriate Side from a string
+ /// <summary>
+ /// Get the appropriate <see cref="Side"/> from a string
+ /// </summary>
public static Side GetSide(string sideName)
{
Side result;
@@ -86,9 +87,9 @@ namespace Lucene.Net.Analysis.Ngram
private readonly IPositionLengthAttribute posLenAtt;
/// <summary>
- /// Creates EdgeNGramTokenFilter that can generate n-grams in the sizes of the given range
+ /// Creates <see cref="EdgeNGramTokenFilter"/> that can generate n-grams in the sizes of the given range
/// </summary>
- /// <param name="version"> the <a href="#version">Lucene match version</a> </param>
+ /// <param name="version"> the Lucene match version - See <see cref="LuceneVersion"/> </param>
/// <param name="input"> <see cref="TokenStream"/> holding the input to be tokenized </param>
/// <param name="side"> the <see cref="Side"/> from which to chop off an n-gram </param>
/// <param name="minGram"> the smallest n-gram to generate </param>
@@ -136,9 +137,9 @@ namespace Lucene.Net.Analysis.Ngram
}
/// <summary>
- /// Creates EdgeNGramTokenFilter that can generate n-grams in the sizes of the given range
+ /// Creates <see cref="EdgeNGramTokenFilter"/> that can generate n-grams in the sizes of the given range
/// </summary>
- /// <param name="version"> the <a href="#version">Lucene match version</a> </param>
+ /// <param name="version"> the Lucene match version - See <see cref="LuceneVersion"/> </param>
/// <param name="input"> <see cref="TokenStream"/> holding the input to be tokenized </param>
/// <param name="sideLabel"> the name of the <see cref="Side"/> from which to chop off an n-gram </param>
/// <param name="minGram"> the smallest n-gram to generate </param>
@@ -150,9 +151,9 @@ namespace Lucene.Net.Analysis.Ngram
}
/// <summary>
- /// Creates EdgeNGramTokenFilter that can generate n-grams in the sizes of the given range
+ /// Creates <see cref="EdgeNGramTokenFilter"/> that can generate n-grams in the sizes of the given range
/// </summary>
- /// <param name="version"> the <a href="#version">Lucene match version</a> </param>
+ /// <param name="version"> the Lucene match version - See <see cref="LuceneVersion"/> </param>
/// <param name="input"> <see cref="TokenStream"/> holding the input to be tokenized </param>
/// <param name="minGram"> the smallest n-gram to generate </param>
/// <param name="maxGram"> the largest n-gram to generate </param>
@@ -163,7 +164,7 @@ namespace Lucene.Net.Analysis.Ngram
{
}
- public override bool IncrementToken()
+ public override sealed bool IncrementToken()
{
while (true)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/269da1ef/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramTokenizer.cs
index 09ad7f8..9eba29f 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramTokenizer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramTokenizer.cs
@@ -1,7 +1,7 @@
\ufeffusing Lucene.Net.Util;
using System.IO;
-namespace Lucene.Net.Analysis.Ngram
+namespace Lucene.Net.Analysis.Ngram // LUCENENET TODO: Change namespace, directory, and Git to NGram
{
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
@@ -25,14 +25,15 @@ namespace Lucene.Net.Analysis.Ngram
/// <para>
/// This <see cref="Tokenizer"/> create n-grams from the beginning edge or ending edge of a input token.
/// </para>
- /// <para><a name="version" /> As of Lucene 4.4, this tokenizer<ul>
- /// <li>can handle <code>maxGram</code> larger than 1024 chars, but beware that this will result in increased memory usage
- /// <li>doesn't trim the input,
- /// <li>sets position increments equal to 1 instead of 1 for the first token and 0 for all other ones
- /// <li>doesn't support backward n-grams anymore.
- /// <li>supports <see cref="#isTokenChar(int) pre-tokenization"/>,
- /// <li>correctly handles supplementary characters.
- /// </ul>
+ /// <para>As of Lucene 4.4, this tokenizer
+ /// <list type="bullet">
+ /// <item>can handle <code>maxGram</code> larger than 1024 chars, but beware that this will result in increased memory usage</item>
+ /// <item>doesn't trim the input,</item>
+ /// <item>sets position increments equal to 1 instead of 1 for the first token and 0 for all other ones</item>
+ /// <item>doesn't support backward n-grams anymore.</item>
+ /// <item>supports <see cref="Util.CharTokenizer.IsTokenChar(int)"/> pre-tokenization,</item>
+ /// <item>correctly handles supplementary characters.</item>
+ /// </list>
/// </para>
/// <para>Although <b style="color:red">highly</b> discouraged, it is still possible
/// to use the old behavior through <see cref="Lucene43EdgeNGramTokenizer"/>.
@@ -44,10 +45,10 @@ namespace Lucene.Net.Analysis.Ngram
public const int DEFAULT_MIN_GRAM_SIZE = 1;
/// <summary>
- /// Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given range
+ /// Creates <see cref="EdgeNGramTokenizer"/> that can generate n-grams in the sizes of the given range
/// </summary>
- /// <param name="version"> the <a href="#version">Lucene match version</a> </param>
- /// <param name="input"> <see cref="Reader"/> holding the input to be tokenized </param>
+ /// <param name="version"> the Lucene match version - See <see cref="LuceneVersion"/> </param>
+ /// <param name="input"> <see cref="TextReader"/> holding the input to be tokenized </param>
/// <param name="minGram"> the smallest n-gram to generate </param>
/// <param name="maxGram"> the largest n-gram to generate </param>
public EdgeNGramTokenizer(LuceneVersion version, TextReader input, int minGram, int maxGram)
@@ -58,9 +59,9 @@ namespace Lucene.Net.Analysis.Ngram
/// <summary>
/// Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given range
/// </summary>
- /// <param name="version"> the <a href="#version">Lucene match version</a> </param>
- /// <param name="factory"> <see cref="org.apache.lucene.util.AttributeSource.AttributeFactory"/> to use </param>
- /// <param name="input"> <see cref="Reader"/> holding the input to be tokenized </param>
+ /// <param name="version"> the Lucene match version - See <see cref="LuceneVersion"/> </param>
+ /// <param name="factory"> <see cref="AttributeSource.AttributeFactory"/> to use </param>
+ /// <param name="input"> <see cref="TextReader"/> holding the input to be tokenized </param>
/// <param name="minGram"> the smallest n-gram to generate </param>
/// <param name="maxGram"> the largest n-gram to generate </param>
public EdgeNGramTokenizer(LuceneVersion version, AttributeSource.AttributeFactory factory, TextReader input, int minGram, int maxGram)
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/269da1ef/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramTokenizerFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramTokenizerFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramTokenizerFactory.cs
index 5273ae4..d3f2bb6 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramTokenizerFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramTokenizerFactory.cs
@@ -4,7 +4,7 @@ using System;
using System.Collections.Generic;
using System.IO;
-namespace Lucene.Net.Analysis.Ngram
+namespace Lucene.Net.Analysis.Ngram // LUCENENET TODO: Change namespace, directory, and Git to NGram
{
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
@@ -39,7 +39,7 @@ namespace Lucene.Net.Analysis.Ngram
private readonly string side;
/// <summary>
- /// Creates a new EdgeNGramTokenizerFactory </summary>
+ /// Creates a new <see cref="EdgeNGramTokenizerFactory"/> </summary>
public EdgeNGramTokenizerFactory(IDictionary<string, string> args) : base(args)
{
minGramSize = GetInt(args, "minGramSize", EdgeNGramTokenizer.DEFAULT_MIN_GRAM_SIZE);
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/269da1ef/src/Lucene.Net.Analysis.Common/Analysis/Ngram/Lucene43EdgeNGramTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/Lucene43EdgeNGramTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/Lucene43EdgeNGramTokenizer.cs
index 3ed7187..eb09a94 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/Lucene43EdgeNGramTokenizer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/Lucene43EdgeNGramTokenizer.cs
@@ -3,7 +3,7 @@ using Lucene.Net.Util;
using System;
using System.IO;
-namespace Lucene.Net.Analysis.Ngram
+namespace Lucene.Net.Analysis.Ngram // LUCENENET TODO: Change namespace, directory, and Git to NGram
{
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
@@ -41,7 +41,6 @@ namespace Lucene.Net.Analysis.Ngram
/// Specifies which side of the input the n-gram should be generated from </summary>
public enum Side
{
-
/// <summary>
/// Get the n-gram from the front of the input </summary>
FRONT,
@@ -52,7 +51,7 @@ namespace Lucene.Net.Analysis.Ngram
}
// Get the appropriate Side from a string
- internal static Side GetSide(string sideName)
+ public static Side GetSide(string sideName)
{
Side result;
if (!Enum.TryParse(sideName, true, out result))
@@ -73,10 +72,10 @@ namespace Lucene.Net.Analysis.Ngram
/// <summary>
- /// Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given range
+ /// Creates <see cref="Lucene43EdgeNGramTokenizer"/> that can generate n-grams in the sizes of the given range
/// </summary>
- /// <param name="version"> the <a href="#version">Lucene match version</a> </param>
- /// <param name="input"> <see cref="Reader"/> holding the input to be tokenized </param>
+ /// <param name="version"> the Lucene match version - See <see cref="LuceneVersion"/> </param>
+ /// <param name="input"> <see cref="TextReader"/> holding the input to be tokenized </param>
/// <param name="side"> the <see cref="Side"/> from which to chop off an n-gram </param>
/// <param name="minGram"> the smallest n-gram to generate </param>
/// <param name="maxGram"> the largest n-gram to generate </param>
@@ -88,11 +87,11 @@ namespace Lucene.Net.Analysis.Ngram
}
/// <summary>
- /// Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given range
+ /// Creates <see cref="Lucene43EdgeNGramTokenizer"/> that can generate n-grams in the sizes of the given range
/// </summary>
- /// <param name="version"> the <a href="#version">Lucene match version</a> </param>
- /// <param name="factory"> <see cref="org.apache.lucene.util.AttributeSource.AttributeFactory"/> to use </param>
- /// <param name="input"> <see cref="Reader"/> holding the input to be tokenized </param>
+ /// <param name="version"> the Lucene match version - See <see cref="LuceneVersion"/> </param>
+ /// <param name="factory"> <see cref="AttributeSource.AttributeFactory"/> to use </param>
+ /// <param name="input"> <see cref="TextReader"/> holding the input to be tokenized </param>
/// <param name="side"> the <see cref="Side"/> from which to chop off an n-gram </param>
/// <param name="minGram"> the smallest n-gram to generate </param>
/// <param name="maxGram"> the largest n-gram to generate </param>
@@ -104,10 +103,10 @@ namespace Lucene.Net.Analysis.Ngram
}
/// <summary>
- /// Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given range
+ /// Creates <see cref="Lucene43EdgeNGramTokenizer"/> that can generate n-grams in the sizes of the given range
/// </summary>
- /// <param name="version"> the <a href="#version">Lucene match version</a> </param>
- /// <param name="input"> <see cref="Reader"/> holding the input to be tokenized </param>
+ /// <param name="version"> the Lucene match version - See <see cref="LuceneVersion"/> </param>
+ /// <param name="input"> <see cref="TextReader"/> holding the input to be tokenized </param>
/// <param name="sideLabel"> the name of the <see cref="Side"/> from which to chop off an n-gram </param>
/// <param name="minGram"> the smallest n-gram to generate </param>
/// <param name="maxGram"> the largest n-gram to generate </param>
@@ -118,11 +117,11 @@ namespace Lucene.Net.Analysis.Ngram
}
/// <summary>
- /// Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given range
+ /// Creates <see cref="Lucene43EdgeNGramTokenizer"/> that can generate n-grams in the sizes of the given range
/// </summary>
- /// <param name="version"> the <a href="#version">Lucene match version</a> </param>
- /// <param name="factory"> <see cref="org.apache.lucene.util.AttributeSource.AttributeFactory"/> to use </param>
- /// <param name="input"> <see cref="Reader"/> holding the input to be tokenized </param>
+ /// <param name="version"> the Lucene match version - See <see cref="LuceneVersion"/> </param>
+ /// <param name="factory"> <see cref="AttributeSource.AttributeFactory"/> to use </param>
+ /// <param name="input"> <see cref="TextReader"/> holding the input to be tokenized </param>
/// <param name="sideLabel"> the name of the <see cref="Side"/> from which to chop off an n-gram </param>
/// <param name="minGram"> the smallest n-gram to generate </param>
/// <param name="maxGram"> the largest n-gram to generate </param>
@@ -133,10 +132,10 @@ namespace Lucene.Net.Analysis.Ngram
}
/// <summary>
- /// Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given range
+ /// Creates <see cref="Lucene43EdgeNGramTokenizer"/> that can generate n-grams in the sizes of the given range
/// </summary>
- /// <param name="version"> the <a href="#version">Lucene match version</a> </param>
- /// <param name="input"> <see cref="Reader"/> holding the input to be tokenized </param>
+ /// <param name="version"> the Lucene match version - See <see cref="LuceneVersion"/> </param>
+ /// <param name="input"> <see cref="TextReader"/> holding the input to be tokenized </param>
/// <param name="minGram"> the smallest n-gram to generate </param>
/// <param name="maxGram"> the largest n-gram to generate </param>
public Lucene43EdgeNGramTokenizer(LuceneVersion version, TextReader input, int minGram, int maxGram)
@@ -145,11 +144,11 @@ namespace Lucene.Net.Analysis.Ngram
}
/// <summary>
- /// Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given range
+ /// Creates <see cref="Lucene43EdgeNGramTokenizer"/> that can generate n-grams in the sizes of the given range
/// </summary>
- /// <param name="version"> the <a href="#version">Lucene match version</a> </param>
- /// <param name="factory"> <see cref="org.apache.lucene.util.AttributeSource.AttributeFactory"/> to use </param>
- /// <param name="input"> <see cref="Reader"/> holding the input to be tokenized </param>
+ /// <param name="version"> the Lucene match version - See <see cref="LuceneVersion"/> </param>
+ /// <param name="factory"> <see cref="AttributeSource.AttributeFactory"/> to use </param>
+ /// <param name="input"> <see cref="TextReader"/> holding the input to be tokenized </param>
/// <param name="minGram"> the smallest n-gram to generate </param>
/// <param name="maxGram"> the largest n-gram to generate </param>
public Lucene43EdgeNGramTokenizer(LuceneVersion version, AttributeFactory factory, TextReader input, int minGram, int maxGram)
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/269da1ef/src/Lucene.Net.Analysis.Common/Analysis/Ngram/Lucene43NGramTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/Lucene43NGramTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/Lucene43NGramTokenizer.cs
index a0f210a..a79ffba 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/Lucene43NGramTokenizer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/Lucene43NGramTokenizer.cs
@@ -2,7 +2,7 @@
using System;
using System.IO;
-namespace Lucene.Net.Analysis.Ngram
+namespace Lucene.Net.Analysis.Ngram // LUCENENET TODO: Change namespace, directory, and Git to NGram
{
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
@@ -42,7 +42,7 @@ namespace Lucene.Net.Analysis.Ngram
private IOffsetAttribute offsetAtt;
/// <summary>
- /// Creates NGramTokenizer with given min and max n-grams. </summary>
+ /// Creates <see cref="Lucene43NGramTokenizer"/> with given min and max n-grams. </summary>
/// <param name="input"> <see cref="TextReader"/> holding the input to be tokenized </param>
/// <param name="minGram"> the smallest n-gram to generate </param>
/// <param name="maxGram"> the largest n-gram to generate </param>
@@ -53,9 +53,9 @@ namespace Lucene.Net.Analysis.Ngram
}
/// <summary>
- /// Creates NGramTokenizer with given min and max n-grams. </summary>
- /// <param name="factory"> <see cref="org.apache.lucene.util.AttributeSource.AttributeFactory"/> to use </param>
- /// <param name="input"> <see cref="Reader"/> holding the input to be tokenized </param>
+ /// Creates <see cref="Lucene43NGramTokenizer"/> with given min and max n-grams. </summary>
+ /// <param name="factory"> <see cref="Lucene.Net.Util.AttributeSource.AttributeFactory"/> to use </param>
+ /// <param name="input"> <see cref="TextReader"/> holding the input to be tokenized </param>
/// <param name="minGram"> the smallest n-gram to generate </param>
/// <param name="maxGram"> the largest n-gram to generate </param>
public Lucene43NGramTokenizer(AttributeFactory factory, TextReader input, int minGram, int maxGram)
@@ -65,7 +65,7 @@ namespace Lucene.Net.Analysis.Ngram
}
/// <summary>
- /// Creates NGramTokenizer with default min and max n-grams. </summary>
+ /// Creates <see cref="Lucene43NGramTokenizer"/> with default min and max n-grams. </summary>
/// <param name="input"> <see cref="TextReader"/> holding the input to be tokenized </param>
public Lucene43NGramTokenizer(TextReader input)
: this(input, DEFAULT_MIN_NGRAM_SIZE, DEFAULT_MAX_NGRAM_SIZE)
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/269da1ef/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramFilterFactory.cs
index 3c9f738..8b9b726 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramFilterFactory.cs
@@ -1,7 +1,7 @@
\ufeffusing Lucene.Net.Analysis.Util;
using System.Collections.Generic;
-namespace Lucene.Net.Analysis.Ngram
+namespace Lucene.Net.Analysis.Ngram // LUCENENET TODO: Change namespace, directory, and Git to NGram
{
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
@@ -36,7 +36,7 @@ namespace Lucene.Net.Analysis.Ngram
private readonly int minGramSize;
/// <summary>
- /// Creates a new NGramFilterFactory </summary>
+ /// Creates a new <see cref="NGramFilterFactory"/> </summary>
public NGramFilterFactory(IDictionary<string, string> args)
: base(args)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/269da1ef/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramTokenFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramTokenFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramTokenFilter.cs
index 561e575..26cc8d5 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramTokenFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramTokenFilter.cs
@@ -3,7 +3,7 @@ using Lucene.Net.Analysis.TokenAttributes;
using Lucene.Net.Analysis.Util;
using Lucene.Net.Util;
-namespace Lucene.Net.Analysis.Ngram
+namespace Lucene.Net.Analysis.Ngram // LUCENENET TODO: Change namespace, directory, and Git to NGram
{
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
@@ -24,25 +24,26 @@ namespace Lucene.Net.Analysis.Ngram
/// <summary>
/// Tokenizes the input into n-grams of the given size(s).
- /// <a name="version"/>
/// <para>You must specify the required <see cref="LuceneVersion"/> compatibility when
- /// creating a <see cref="NGramTokenFilter"/>. As of Lucene 4.4, this token filters:<ul>
- /// <li>handles supplementary characters correctly,</li>
- /// <li>emits all n-grams for the same token at the same position,</li>
- /// <li>does not modify offsets,</li>
- /// <li>sorts n-grams by their offset in the original token first, then
- /// increasing length (meaning that "abc" will give "a", "ab", "abc", "b", "bc",
- /// "c").</li></ul>
+ /// creating a <see cref="NGramTokenFilter"/>. As of Lucene 4.4, this token filters:
+ /// <list type="bullet">
+ /// <item>handles supplementary characters correctly,</item>
+ /// <item>emits all n-grams for the same token at the same position,</item>
+ /// <item>does not modify offsets,</item>
+ /// <item>sorts n-grams by their offset in the original token first, then
+ /// increasing length (meaning that "abc" will give "a", "ab", "abc", "b", "bc",
+ /// "c").</item>
+ /// </list>
/// </para>
/// <para>You can make this filter use the old behavior by providing a version <
- /// <see cref="Version#LUCENE_44"/> in the constructor but this is not recommended as
+ /// <see cref="LuceneVersion.LUCENE_44"/> in the constructor but this is not recommended as
/// it will lead to broken <see cref="TokenStream"/>s that will cause highlighting
/// bugs.
/// </para>
/// <para>If you were using this <see cref="TokenFilter"/> to perform partial highlighting,
/// this won't work anymore since this filter doesn't update offsets. You should
/// modify your analysis chain to use <see cref="NGramTokenizer"/>, and potentially
- /// override <see cref="NGramTokenizer#isTokenChar(int)"/> to perform pre-tokenization.
+ /// override <see cref="NGramTokenizer.IsTokenChar(int)"/> to perform pre-tokenization.
/// </para>
/// </summary>
public sealed class NGramTokenFilter : TokenFilter
@@ -70,9 +71,9 @@ namespace Lucene.Net.Analysis.Ngram
private readonly IOffsetAttribute offsetAtt;
/// <summary>
- /// Creates NGramTokenFilter with given min and max n-grams. </summary>
+ /// Creates <see cref="NGramTokenFilter"/> with given min and max n-grams. </summary>
/// <param name="version"> Lucene version to enable correct position increments.
- /// See <a href="#version">above</a> for details. </param>
+ /// See <see cref="NGramTokenFilter"/> for details. </param>
/// <param name="input"> <see cref="TokenStream"/> holding the input to be tokenized </param>
/// <param name="minGram"> the smallest n-gram to generate </param>
/// <param name="maxGram"> the largest n-gram to generate </param>
@@ -154,9 +155,9 @@ namespace Lucene.Net.Analysis.Ngram
}
/// <summary>
- /// Creates NGramTokenFilter with default min and max n-grams. </summary>
+ /// Creates <see cref="NGramTokenFilter"/> with default min and max n-grams. </summary>
/// <param name="version"> Lucene version to enable correct position increments.
- /// See <a href="#version">above</a> for details. </param>
+ /// See <see cref="NGramTokenFilter"/> for details. </param>
/// <param name="input"> <see cref="TokenStream"/> holding the input to be tokenized </param>
public NGramTokenFilter(LuceneVersion version, TokenStream input)
: this(version, input, DEFAULT_MIN_NGRAM_SIZE, DEFAULT_MAX_NGRAM_SIZE)
@@ -166,7 +167,7 @@ namespace Lucene.Net.Analysis.Ngram
/// <summary>
/// Returns the next token in the stream, or null at EOS.
/// </summary>
- public override bool IncrementToken()
+ public override sealed bool IncrementToken()
{
while (true)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/269da1ef/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramTokenizer.cs
index acc42c3..a6ce01d 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramTokenizer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramTokenizer.cs
@@ -6,7 +6,7 @@ using System;
using System.Diagnostics;
using System.IO;
-namespace Lucene.Net.Analysis.Ngram
+namespace Lucene.Net.Analysis.Ngram // LUCENENET TODO: Change namespace, directory, and Git to NGram
{
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
@@ -32,21 +32,66 @@ namespace Lucene.Net.Analysis.Ngram
/// the same as the term chars.
/// </para>
/// <para>For example, "abcde" would be tokenized as (minGram=2, maxGram=3):
- /// <table>
- /// <tr><th>Term</th><td>ab</td><td>abc</td><td>bc</td><td>bcd</td><td>cd</td><td>cde</td><td>de</td></tr>
- /// <tr><th>Position increment</th><td>1</td><td>1</td><td>1</td><td>1</td><td>1</td><td>1</td><td>1</td></tr>
- /// <tr><th>Position length</th><td>1</td><td>1</td><td>1</td><td>1</td><td>1</td><td>1</td><td>1</td></tr>
- /// <tr><th>Offsets</th><td>[0,2[</td><td>[0,3[</td><td>[1,3[</td><td>[1,4[</td><td>[2,4[</td><td>[2,5[</td><td>[3,5[</td></tr>
- /// </table>
- /// <a name="version"/>
+ /// <list type="table">
+ /// <listheader>
+ /// <term>Term</term>
+ /// <term>Position increment</term>
+ /// <term>Position length</term>
+ /// <term>Offsets</term>
+ /// </listheader>
+ /// <item>
+ /// <term>ab</term>
+ /// <term>1</term>
+ /// <term>1</term>
+ /// <term>[0,2[</term>
+ /// </item>
+ /// <item>
+ /// <term>abc</term>
+ /// <term>1</term>
+ /// <term>1</term>
+ /// <term>[0,3[</term>
+ /// </item>
+ /// <item>
+ /// <term>bc</term>
+ /// <term>1</term>
+ /// <term>1</term>
+ /// <term>[1,3[</term>
+ /// </item>
+ /// <item>
+ /// <term>bcd</term>
+ /// <term>1</term>
+ /// <term>1</term>
+ /// <term>[1,4[</term>
+ /// </item>
+ /// <item>
+ /// <term>cd</term>
+ /// <term>1</term>
+ /// <term>1</term>
+ /// <term>[2,4[</term>
+ /// </item>
+ /// <item>
+ /// <term>cde</term>
+ /// <term>1</term>
+ /// <term>1</term>
+ /// <term>[2,5[</term>
+ /// </item>
+ /// <item>
+ /// <term>de</term>
+ /// <term>1</term>
+ /// <term>1</term>
+ /// <term>[3,5[</term>
+ /// </item>
+ /// </list>
/// </para>
- /// <para>This tokenizer changed a lot in Lucene 4.4 in order to:<ul>
- /// <li>tokenize in a streaming fashion to support streams which are larger
- /// than 1024 chars (limit of the previous version),
- /// <li>count grams based on unicode code points instead of java chars (and
- /// never split in the middle of surrogate pairs),
- /// <li>give the ability to <see cref="#isTokenChar(int) pre-tokenize"/> the stream
- /// before computing n-grams.</ul>
+ /// <para>This tokenizer changed a lot in Lucene 4.4 in order to:
+ /// <list type="bullet">
+ /// <item>tokenize in a streaming fashion to support streams which are larger
+ /// than 1024 chars (limit of the previous version),</item>
+ /// <item>count grams based on unicode code points instead of java chars (and
+ /// never split in the middle of surrogate pairs),</item>
+ /// <item>give the ability to pre-tokenize the stream (<see cref="IsTokenChar(int)"/>)
+ /// before computing n-grams.</item>
+ /// </list>
/// </para>
/// <para>Additionally, this class doesn't trim trailing whitespaces and emits
/// tokens in a different order, tokens are now emitted by increasing start
@@ -57,7 +102,7 @@ namespace Lucene.Net.Analysis.Ngram
/// to use the old behavior through <see cref="Lucene43NGramTokenizer"/>.
/// </para>
/// </summary>
- // non-final to allow for overriding isTokenChar, but all other methods should be final
+ // non-sealed to allow for overriding IsTokenChar, but all other methods should be sealed
public class NGramTokenizer : Tokenizer
{
public const int DEFAULT_MIN_NGRAM_SIZE = 1;
@@ -87,8 +132,8 @@ namespace Lucene.Net.Analysis.Ngram
}
/// <summary>
- /// Creates NGramTokenizer with given min and max n-grams. </summary>
- /// <param name="version"> the lucene compatibility <a href="#version">version</a> </param>
+ /// Creates <see cref="NGramTokenizer"/> with given min and max n-grams. </summary>
+ /// <param name="version"> the lucene compatibility version </param>
/// <param name="input"> <see cref="TextReader"/> holding the input to be tokenized </param>
/// <param name="minGram"> the smallest n-gram to generate </param>
/// <param name="maxGram"> the largest n-gram to generate </param>
@@ -104,10 +149,10 @@ namespace Lucene.Net.Analysis.Ngram
}
/// <summary>
- /// Creates NGramTokenizer with given min and max n-grams. </summary>
- /// <param name="version"> the lucene compatibility <a href="#version">version</a> </param>
- /// <param name="factory"> <see cref="org.apache.lucene.util.AttributeSource.AttributeFactory"/> to use </param>
- /// <param name="input"> <see cref="Reader"/> holding the input to be tokenized </param>
+ /// Creates <see cref="NGramTokenizer"/> with given min and max n-grams. </summary>
+ /// <param name="version"> the lucene compatibility version </param>
+ /// <param name="factory"> <see cref="AttributeSource.AttributeFactory"/> to use </param>
+ /// <param name="input"> <see cref="TextReader"/> holding the input to be tokenized </param>
/// <param name="minGram"> the smallest n-gram to generate </param>
/// <param name="maxGram"> the largest n-gram to generate </param>
public NGramTokenizer(LuceneVersion version, AttributeFactory factory, TextReader input, int minGram, int maxGram)
@@ -116,8 +161,8 @@ namespace Lucene.Net.Analysis.Ngram
}
/// <summary>
- /// Creates NGramTokenizer with default min and max n-grams. </summary>
- /// <param name="version"> the lucene compatibility <a href="#version">version</a> </param>
+ /// Creates <see cref="NGramTokenizer"/> with default min and max n-grams. </summary>
+ /// <param name="version"> the lucene compatibility version </param>
/// <param name="input"> <see cref="TextReader"/> holding the input to be tokenized </param>
public NGramTokenizer(LuceneVersion version, TextReader input)
: this(version, input, DEFAULT_MIN_NGRAM_SIZE, DEFAULT_MAX_NGRAM_SIZE)
@@ -154,8 +199,6 @@ namespace Lucene.Net.Analysis.Ngram
charBuffer = CharacterUtils.NewCharacterBuffer(2 * maxGram + 1024); // 2 * maxGram in case all code points require 2 chars and + 1024 for buffering to not keep polling the Reader
buffer = new int[charBuffer.Buffer.Length];
-
-
// Make the term att large enough
termAtt.ResizeBuffer(2 * maxGram);
}
@@ -191,7 +234,7 @@ namespace Lucene.Net.Analysis.Ngram
Debug.Assert(exhausted);
return false;
}
- consume();
+ Consume();
gramSize = minGram;
}
@@ -202,7 +245,7 @@ namespace Lucene.Net.Analysis.Ngram
bool isEdgeAndPreviousCharIsTokenChar = edgesOnly && lastNonTokenChar != bufferStart - 1;
if (termContainsNonTokenChar || isEdgeAndPreviousCharIsTokenChar)
{
- consume();
+ Consume();
gramSize = minGram;
continue;
}
@@ -236,19 +279,19 @@ namespace Lucene.Net.Analysis.Ngram
/// <summary>
/// Consume one code point. </summary>
- private void consume()
+ private void Consume()
{
offset += Character.CharCount(buffer[bufferStart++]);
}
/// <summary>
/// Only collect characters which satisfy this condition. </summary>
- protected internal virtual bool IsTokenChar(int chr)
+ protected virtual bool IsTokenChar(int chr)
{
return true;
}
- public override void End()
+ public override sealed void End()
{
base.End();
Debug.Assert(bufferStart <= bufferEnd);
@@ -262,7 +305,7 @@ namespace Lucene.Net.Analysis.Ngram
offsetAtt.SetOffset(endOffset, endOffset);
}
- public override void Reset()
+ public override sealed void Reset()
{
base.Reset();
bufferStart = bufferEnd = buffer.Length;
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/269da1ef/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramTokenizerFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramTokenizerFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramTokenizerFactory.cs
index 73865fb..33a81b2 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramTokenizerFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramTokenizerFactory.cs
@@ -3,7 +3,7 @@ using Lucene.Net.Util;
using System.Collections.Generic;
using System.IO;
-namespace Lucene.Net.Analysis.Ngram
+namespace Lucene.Net.Analysis.Ngram // LUCENENET TODO: Change namespace, directory, and Git to NGram
{
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
@@ -37,7 +37,7 @@ namespace Lucene.Net.Analysis.Ngram
private readonly int minGramSize;
/// <summary>
- /// Creates a new NGramTokenizerFactory </summary>
+ /// Creates a new <see cref="NGramTokenizerFactory"/> </summary>
public NGramTokenizerFactory(IDictionary<string, string> args)
: base(args)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/269da1ef/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ngram/NGramTokenizerTest.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ngram/NGramTokenizerTest.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ngram/NGramTokenizerTest.cs
index b4aac99..d72f4c5 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ngram/NGramTokenizerTest.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ngram/NGramTokenizerTest.cs
@@ -233,7 +233,7 @@ namespace Lucene.Net.Analysis.Ngram
this.nonTokenChars = nonTokenChars;
}
- protected internal override bool IsTokenChar(int chr)
+ protected override bool IsTokenChar(int chr)
{
return nonTokenChars.IndexOf((char)chr) < 0;
}
[10/11] lucenenet git commit: Lucene.Net.Analysis.No refactor: member
accessibility and documentation comments
Posted by ni...@apache.org.
Lucene.Net.Analysis.No refactor: member accessibility and documentation comments
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/100a8c51
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/100a8c51
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/100a8c51
Branch: refs/heads/api-work
Commit: 100a8c5128e25f213e66ecb05dba6cd9e970995e
Parents: 31585cf
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Sat Feb 4 00:31:09 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Sat Feb 4 00:45:32 2017 +0700
----------------------------------------------------------------------
.../Analysis/No/NorwegianAnalyzer.cs | 10 +++++-----
.../Analysis/No/NorwegianLightStemFilter.cs | 16 ++++++++--------
.../Analysis/No/NorwegianLightStemFilterFactory.cs | 4 +---
.../Analysis/No/NorwegianLightStemmer.cs | 13 +++++++------
.../Analysis/No/NorwegianMinimalStemFilter.cs | 17 ++++++++---------
.../No/NorwegianMinimalStemFilterFactory.cs | 7 +++----
.../Analysis/No/NorwegianMinimalStemmer.cs | 11 +++++------
7 files changed, 37 insertions(+), 41 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/100a8c51/src/Lucene.Net.Analysis.Common/Analysis/No/NorwegianAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/No/NorwegianAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/No/NorwegianAnalyzer.cs
index 6d3b1dd..70f9fc5 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/No/NorwegianAnalyzer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/No/NorwegianAnalyzer.cs
@@ -51,7 +51,7 @@ namespace Lucene.Net.Analysis.No
}
/// <summary>
- /// Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class
+ /// Atomically loads the <see cref="DEFAULT_STOP_SET"/> in a lazy fashion once the outer class
/// accesses the static final set the first time.;
/// </summary>
private class DefaultSetHolder
@@ -79,7 +79,7 @@ namespace Lucene.Net.Analysis.No
}
/// <summary>
- /// Builds an analyzer with the default stop words: <see cref="#DEFAULT_STOPWORD_FILE"/>.
+ /// Builds an analyzer with the default stop words: <see cref="DEFAULT_STOPWORD_FILE"/>.
/// </summary>
public NorwegianAnalyzer(LuceneVersion matchVersion)
: this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET)
@@ -113,13 +113,13 @@ namespace Lucene.Net.Analysis.No
/// <summary>
/// Creates a
/// <see cref="Analyzer.TokenStreamComponents"/>
- /// which tokenizes all the text in the provided <see cref="Reader"/>.
+ /// which tokenizes all the text in the provided <see cref="TextReader"/>.
/// </summary>
/// <returns> A
/// <see cref="Analyzer.TokenStreamComponents"/>
/// built from an <see cref="StandardTokenizer"/> filtered with
- /// <see cref="StandardFilter"/>, <see cref="LowerCaseFilter"/>, <see cref="StopFilter"/>
- /// , <see cref="SetKeywordMarkerFilter"/> if a stem exclusion set is
+ /// <see cref="StandardFilter"/>, <see cref="LowerCaseFilter"/>, <see cref="StopFilter"/>,
+ /// <see cref="SetKeywordMarkerFilter"/> if a stem exclusion set is
/// provided and <see cref="SnowballFilter"/>. </returns>
protected override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/100a8c51/src/Lucene.Net.Analysis.Common/Analysis/No/NorwegianLightStemFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/No/NorwegianLightStemFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/No/NorwegianLightStemFilter.cs
index 6d0e0c1..6fb788a 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/No/NorwegianLightStemFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/No/NorwegianLightStemFilter.cs
@@ -1,5 +1,4 @@
\ufeffusing Lucene.Net.Analysis.TokenAttributes;
-using System.IO;
namespace Lucene.Net.Analysis.No
{
@@ -25,7 +24,7 @@ namespace Lucene.Net.Analysis.No
/// words.
/// <para>
/// To prevent terms from being stemmed use an instance of
- /// <see cref="SetKeywordMarkerFilter"/> or a custom <see cref="TokenFilter"/> that sets
+ /// <see cref="Miscellaneous.SetKeywordMarkerFilter"/> or a custom <see cref="TokenFilter"/> that sets
/// the <see cref="KeywordAttribute"/> before this <see cref="TokenStream"/>.
/// </para>
/// </summary>
@@ -36,8 +35,8 @@ namespace Lucene.Net.Analysis.No
private readonly IKeywordAttribute keywordAttr;
/// <summary>
- /// Calls {@link #NorwegianLightStemFilter(TokenStream, int)
- /// NorwegianLightStemFilter(input, BOKMAAL)}
+ /// Calls <see cref="NorwegianLightStemFilter.NorwegianLightStemFilter(TokenStream, int)"/>
+ /// - NorwegianLightStemFilter(input, BOKMAAL)
/// </summary>
public NorwegianLightStemFilter(TokenStream input)
: this(input, NorwegianLightStemmer.BOKMAAL)
@@ -45,10 +44,11 @@ namespace Lucene.Net.Analysis.No
}
/// <summary>
- /// Creates a new NorwegianLightStemFilter </summary>
- /// <param name="flags"> set to <see cref="NorwegianLightStemmer#BOKMAAL"/>,
- /// <see cref="NorwegianLightStemmer#NYNORSK"/>, or both. </param>
- public NorwegianLightStemFilter(TokenStream input, int flags) : base(input)
+ /// Creates a new <see cref="NorwegianLightStemFilter"/> </summary>
+ /// <param name="flags"> set to <see cref="NorwegianLightStemmer.BOKMAAL"/>,
+ /// <see cref="NorwegianLightStemmer.NYNORSK"/>, or both. </param>
+ public NorwegianLightStemFilter(TokenStream input, int flags)
+ : base(input)
{
stemmer = new NorwegianLightStemmer(flags);
termAtt = AddAttribute<ICharTermAttribute>();
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/100a8c51/src/Lucene.Net.Analysis.Common/Analysis/No/NorwegianLightStemFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/No/NorwegianLightStemFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/No/NorwegianLightStemFilterFactory.cs
index cc28b03..405d2b7 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/No/NorwegianLightStemFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/No/NorwegianLightStemFilterFactory.cs
@@ -1,6 +1,5 @@
\ufeffusing Lucene.Net.Analysis.Util;
using System.Collections.Generic;
-using System.IO;
namespace Lucene.Net.Analysis.No
{
@@ -34,11 +33,10 @@ namespace Lucene.Net.Analysis.No
/// </summary>
public class NorwegianLightStemFilterFactory : TokenFilterFactory
{
-
private readonly int flags;
/// <summary>
- /// Creates a new NorwegianLightStemFilterFactory </summary>
+ /// Creates a new <see cref="NorwegianLightStemFilterFactory"/> </summary>
public NorwegianLightStemFilterFactory(IDictionary<string, string> args)
: base(args)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/100a8c51/src/Lucene.Net.Analysis.Common/Analysis/No/NorwegianLightStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/No/NorwegianLightStemmer.cs b/src/Lucene.Net.Analysis.Common/Analysis/No/NorwegianLightStemmer.cs
index 3a8a66e..a53d5f5 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/No/NorwegianLightStemmer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/No/NorwegianLightStemmer.cs
@@ -1,5 +1,4 @@
\ufeffusing Lucene.Net.Analysis.Util;
-using System.IO;
namespace Lucene.Net.Analysis.No
{
@@ -58,13 +57,15 @@ namespace Lucene.Net.Analysis.No
/// <summary>
/// Light Stemmer for Norwegian.
/// <para>
- /// Parts of this stemmer is adapted from SwedishLightStemFilter, except
+ /// Parts of this stemmer is adapted from <see cref="Sv.SwedishLightStemFilter"/>, except
/// that while the Swedish one has a pre-defined rule set and a corresponding
/// corpus to validate against whereas the Norwegian one is hand crafted.
/// </para>
/// </summary>
public class NorwegianLightStemmer
{
+ // LUCENENET TODO: Convert the following into a [Flags] enum
+
/// <summary>
/// Constant to remove Bokm�l-specific endings </summary>
public const int BOKMAAL = 1;
@@ -72,12 +73,12 @@ namespace Lucene.Net.Analysis.No
/// Constant to remove Nynorsk-specific endings </summary>
public const int NYNORSK = 2;
- internal readonly bool useBokmaal;
- internal readonly bool useNynorsk;
+ private readonly bool useBokmaal;
+ private readonly bool useNynorsk;
/// <summary>
- /// Creates a new NorwegianLightStemmer </summary>
- /// <param name="flags"> set to <see cref="#BOKMAAL"/>, <see cref="#NYNORSK"/>, or both. </param>
+ /// Creates a new <see cref="NorwegianLightStemmer"/> </summary>
+ /// <param name="flags"> set to <see cref="BOKMAAL"/>, <see cref="NYNORSK"/>, or both. </param>
public NorwegianLightStemmer(int flags)
{
if (flags <= 0 || flags > BOKMAAL + NYNORSK)
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/100a8c51/src/Lucene.Net.Analysis.Common/Analysis/No/NorwegianMinimalStemFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/No/NorwegianMinimalStemFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/No/NorwegianMinimalStemFilter.cs
index 877fb59..520425d 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/No/NorwegianMinimalStemFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/No/NorwegianMinimalStemFilter.cs
@@ -1,5 +1,4 @@
\ufeffusing Lucene.Net.Analysis.TokenAttributes;
-using System.IO;
namespace Lucene.Net.Analysis.No
{
@@ -25,7 +24,7 @@ namespace Lucene.Net.Analysis.No
/// words.
/// <para>
/// To prevent terms from being stemmed use an instance of
- /// <see cref="SetKeywordMarkerFilter"/> or a custom <see cref="TokenFilter"/> that sets
+ /// <see cref="Miscellaneous.SetKeywordMarkerFilter"/> or a custom <see cref="TokenFilter"/> that sets
/// the <see cref="KeywordAttribute"/> before this <see cref="TokenStream"/>.
/// </para>
/// </summary>
@@ -36,20 +35,20 @@ namespace Lucene.Net.Analysis.No
private readonly IKeywordAttribute keywordAttr;
/// <summary>
- /// Calls {@link #NorwegianMinimalStemFilter(TokenStream, int)
- /// NorwegianMinimalStemFilter(input, BOKMAAL)}
+ /// Calls <see cref="NorwegianLightStemFilter.NorwegianLightStemFilter(TokenStream, int)"/> -
+ /// NorwegianMinimalStemFilter(input, BOKMAAL)
/// </summary>
public NorwegianMinimalStemFilter(TokenStream input)
- : this(input, NorwegianLightStemmer.BOKMAAL)
+ : this(input, NorwegianLightStemmer.BOKMAAL)
{
}
/// <summary>
- /// Creates a new NorwegianLightStemFilter </summary>
- /// <param name="flags"> set to <see cref="NorwegianLightStemmer#BOKMAAL"/>,
- /// <see cref="NorwegianLightStemmer#NYNORSK"/>, or both. </param>
+ /// Creates a new <see cref="NorwegianLightStemFilter"/> </summary>
+ /// <param name="flags"> set to <see cref="NorwegianLightStemmer.BOKMAAL"/>,
+ /// <see cref="NorwegianLightStemmer.NYNORSK"/>, or both. </param>
public NorwegianMinimalStemFilter(TokenStream input, int flags)
- : base(input)
+ : base(input)
{
this.stemmer = new NorwegianMinimalStemmer(flags);
termAtt = AddAttribute<ICharTermAttribute>();
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/100a8c51/src/Lucene.Net.Analysis.Common/Analysis/No/NorwegianMinimalStemFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/No/NorwegianMinimalStemFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/No/NorwegianMinimalStemFilterFactory.cs
index ee99e3d..18f1c56 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/No/NorwegianMinimalStemFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/No/NorwegianMinimalStemFilterFactory.cs
@@ -1,6 +1,5 @@
\ufeffusing Lucene.Net.Analysis.Util;
using System.Collections.Generic;
-using System.IO;
namespace Lucene.Net.Analysis.No
{
@@ -34,12 +33,12 @@ namespace Lucene.Net.Analysis.No
/// </summary>
public class NorwegianMinimalStemFilterFactory : TokenFilterFactory
{
-
private readonly int flags;
/// <summary>
- /// Creates a new NorwegianMinimalStemFilterFactory </summary>
- public NorwegianMinimalStemFilterFactory(IDictionary<string, string> args) : base(args)
+ /// Creates a new <see cref="NorwegianMinimalStemFilterFactory"/> </summary>
+ public NorwegianMinimalStemFilterFactory(IDictionary<string, string> args)
+ : base(args)
{
string variant = Get(args, "variant");
if (variant == null || "nb".Equals(variant))
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/100a8c51/src/Lucene.Net.Analysis.Common/Analysis/No/NorwegianMinimalStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/No/NorwegianMinimalStemmer.cs b/src/Lucene.Net.Analysis.Common/Analysis/No/NorwegianMinimalStemmer.cs
index 5724ef2..0bab21b 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/No/NorwegianMinimalStemmer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/No/NorwegianMinimalStemmer.cs
@@ -1,5 +1,4 @@
\ufeffusing Lucene.Net.Analysis.Util;
-using System.IO;
namespace Lucene.Net.Analysis.No
{
@@ -63,13 +62,13 @@ namespace Lucene.Net.Analysis.No
/// </summary>
public class NorwegianMinimalStemmer
{
- internal readonly bool useBokmaal;
- internal readonly bool useNynorsk;
+ private readonly bool useBokmaal;
+ private readonly bool useNynorsk;
/// <summary>
- /// Creates a new NorwegianMinimalStemmer </summary>
- /// <param name="flags"> set to <see cref="NorwegianLightStemmer#BOKMAAL"/>,
- /// <see cref="NorwegianLightStemmer#NYNORSK"/>, or both. </param>
+ /// Creates a new <see cref="NorwegianMinimalStemmer"/> </summary>
+ /// <param name="flags"> set to <see cref="NorwegianLightStemmer.BOKMAAL"/>,
+ /// <see cref="NorwegianLightStemmer.NYNORSK"/>, or both. </param>
public NorwegianMinimalStemmer(int flags)
{
if (flags <= 0 || flags > NorwegianLightStemmer.BOKMAAL + NorwegianLightStemmer.NYNORSK)