You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by ni...@apache.org on 2017/02/02 14:54:56 UTC
[1/6] lucenenet git commit: Lucene.Net.Analysis.Ar refactor:
accessibility and documentation comments
Repository: lucenenet
Updated Branches:
refs/heads/api-work bc485b4c4 -> 917b4fdf5
Lucene.Net.Analysis.Ar refactor: accessibility and documentation comments
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/2878664e
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/2878664e
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/2878664e
Branch: refs/heads/api-work
Commit: 2878664e260be46bd20a3996dce0bb770aae3ab2
Parents: bc485b4
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Thu Feb 2 20:09:32 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Thu Feb 2 20:09:32 2017 +0700
----------------------------------------------------------------------
.../Analysis/Ar/ArabicAnalyzer.cs | 44 +++++------
.../Analysis/Ar/ArabicLetterTokenizer.cs | 82 ++++++++++----------
.../Analysis/Ar/ArabicLetterTokenizerFactory.cs | 28 +++----
.../Analysis/Ar/ArabicNormalizationFilter.cs | 4 +-
.../Ar/ArabicNormalizationFilterFactory.cs | 9 ++-
.../Analysis/Ar/ArabicNormalizer.cs | 27 +++----
.../Analysis/Ar/ArabicStemFilter.cs | 13 ++--
.../Analysis/Ar/ArabicStemFilterFactory.cs | 9 ++-
.../Analysis/Ar/ArabicStemmer.cs | 21 +++--
9 files changed, 114 insertions(+), 123 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/2878664e/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicAnalyzer.cs
index 9db2bdf..e484850 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicAnalyzer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicAnalyzer.cs
@@ -27,23 +27,20 @@ namespace Lucene.Net.Analysis.Ar
*/
/// <summary>
- /// <seealso cref="Analyzer"/> for Arabic.
- /// <para>
+ /// <see cref="Analyzer"/> for Arabic.
+ /// <para/>
/// This analyzer implements light-stemming as specified by:
- /// <i>
+ /// <c>
/// Light Stemming for Arabic Information Retrieval
- /// </i>
+ /// </c>
/// http://www.mtholyoke.edu/~lballest/Pubs/arab_stem05.pdf
- /// </para>
- /// <para>
+ /// <para/>
/// The analysis package contains three primary components:
- /// <ul>
- /// <li><seealso cref="ArabicNormalizationFilter"/>: Arabic orthographic normalization.
- /// <li><seealso cref="ArabicStemFilter"/>: Arabic light stemming
- /// <li>Arabic stop words file: a set of default Arabic stop words.
- /// </ul>
- ///
- /// </para>
+ /// <list type="bullet">
+ /// <item><see cref="ArabicNormalizationFilter"/>: Arabic orthographic normalization.</item>
+ /// <item><see cref="ArabicStemFilter"/>: Arabic light stemming</item>
+ /// <item>Arabic stop words file: a set of default Arabic stop words.</item>
+ /// </list>
/// </summary>
public sealed class ArabicAnalyzer : StopwordAnalyzerBase
{
@@ -94,7 +91,7 @@ namespace Lucene.Net.Analysis.Ar
private readonly CharArraySet stemExclusionSet;
/// <summary>
- /// Builds an analyzer with the default stop words: <seealso cref="#DEFAULT_STOPWORD_FILE"/>.
+ /// Builds an analyzer with the default stop words: <see cref="DEFAULT_STOPWORD_FILE"/>.
/// </summary>
public ArabicAnalyzer(LuceneVersion matchVersion)
: this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET)
@@ -115,8 +112,8 @@ namespace Lucene.Net.Analysis.Ar
/// <summary>
/// Builds an analyzer with the given stop word. If a none-empty stem exclusion set is
- /// provided this analyzer will add a <seealso cref="SetKeywordMarkerFilter"/> before
- /// <seealso cref="ArabicStemFilter"/>.
+ /// provided this analyzer will add a <see cref="SetKeywordMarkerFilter"/> before
+ /// <see cref="ArabicStemFilter"/>.
/// </summary>
/// <param name="matchVersion">
/// lucene compatibility version </param>
@@ -131,15 +128,14 @@ namespace Lucene.Net.Analysis.Ar
}
/// <summary>
- /// Creates
- /// <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
- /// used to tokenize all the text in the provided <seealso cref="Reader"/>.
+ /// Creates <see cref="Analyzer.TokenStreamComponents"/>
+ /// used to tokenize all the text in the provided <see cref="TextReader"/>.
/// </summary>
- /// <returns> <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
- /// built from an <seealso cref="StandardTokenizer"/> filtered with
- /// <seealso cref="LowerCaseFilter"/>, <seealso cref="StopFilter"/>,
- /// <seealso cref="ArabicNormalizationFilter"/>, <seealso cref="SetKeywordMarkerFilter"/>
- /// if a stem exclusion set is provided and <seealso cref="ArabicStemFilter"/>. </returns>
+ /// <returns> <see cref="Analyzer.TokenStreamComponents"/>
+ /// built from an <see cref="StandardTokenizer"/> filtered with
+ /// <see cref="LowerCaseFilter"/>, <see cref="StopFilter"/>,
+ /// <see cref="ArabicNormalizationFilter"/>, <see cref="SetKeywordMarkerFilter"/>
+ /// if a stem exclusion set is provided and <see cref="ArabicStemFilter"/>. </returns>
protected override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
{
#pragma warning disable 612, 618
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/2878664e/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicLetterTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicLetterTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicLetterTokenizer.cs
index 5fa5827..0e4e28c 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicLetterTokenizer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicLetterTokenizer.cs
@@ -31,52 +31,54 @@ namespace Lucene.Net.Analysis.Ar
/// Handling similar to this is necessary for Indic Scripts, Hebrew, Thaana, etc.
/// </para>
/// <para>
- /// <a name="version"/>
- /// You must specify the required <seealso cref="Version"/> compatibility when creating
- /// <seealso cref="ArabicLetterTokenizer"/>:
- /// <ul>
- /// <li>As of 3.1, <seealso cref="CharTokenizer"/> uses an int based API to normalize and
- /// detect token characters. See <seealso cref="#isTokenChar(int)"/> and
- /// <seealso cref="#normalize(int)"/> for details.</li>
- /// </ul>
+ /// <paramref name="matchVersion"/>
+ /// You must specify the required <see cref="LuceneVersion"/> compatibility when creating
+ /// <see cref="ArabicLetterTokenizer"/>:
+ /// <list type="bullet">
+ /// <item>As of 3.1, <see cref="Util.CharTokenizer"/> uses an int based API to normalize and
+ /// detect token characters. See <see cref="IsTokenChar(int)"/> and
+ /// <see cref="Util.CharTokenizer.Normalize(int)"/> for details.</item>
+ /// </list>
/// </para>
/// </summary>
- /// @deprecated (3.1) Use <seealso cref="StandardTokenizer"/> instead.
+ /// @deprecated (3.1) Use <see cref="Standard.StandardTokenizer"/> instead.
[Obsolete("(3.1) Use StandardTokenizer instead.")]
- public class ArabicLetterTokenizer : LetterTokenizer
- {
- /// <summary>
- /// Construct a new ArabicLetterTokenizer. </summary>
- /// <param name="matchVersion"> Lucene version
- /// to match See <seealso cref="<a href="#version">above</a>"/>
- /// </param>
- /// <param name="in">
- /// the input to split up into tokens </param>
- public ArabicLetterTokenizer(LuceneVersion matchVersion, TextReader @in)
- : base(matchVersion, @in)
- {
- }
+ public class ArabicLetterTokenizer : LetterTokenizer
+ {
+ /// <summary>
+ /// Construct a new ArabicLetterTokenizer. </summary>
+ /// <param name="matchVersion"> Lucene version
+ /// to match See <seealso cref="<a href="#version">above</a>"/>
+ /// </param>
+ /// <param name="in">
+ /// the input to split up into tokens </param>
+ public ArabicLetterTokenizer(LuceneVersion matchVersion, TextReader @in)
+ : base(matchVersion, @in)
+ {
+ }
- /// <summary>
- /// Construct a new ArabicLetterTokenizer using a given
- /// <seealso cref="org.apache.lucene.util.AttributeSource.AttributeFactory"/>. * @param
- /// matchVersion Lucene version to match See
- /// <seealso cref="<a href="#version">above</a>"/>
- /// </summary>
- /// <param name="factory">
- /// the attribute factory to use for this Tokenizer </param>
- /// <param name="in">
- /// the input to split up into tokens </param>
- public ArabicLetterTokenizer(LuceneVersion matchVersion, AttributeFactory factory, TextReader @in)
+ /// <summary>
+ /// Construct a new <see cref="ArabicLetterTokenizer"/> using a given
+ /// <see cref="AttributeSource.AttributeFactory"/>.
+ /// </summary>
+ /// <param name="matchVersion">
+ /// matchVersion Lucene version to match See
+ /// <see cref="LuceneVersion"/>.
+ /// </param>
+ /// <param name="factory">
+ /// the attribute factory to use for this Tokenizer </param>
+ /// <param name="in">
+ /// the input to split up into tokens </param>
+ public ArabicLetterTokenizer(LuceneVersion matchVersion, AttributeFactory factory, TextReader @in)
: base(matchVersion, factory, @in)
- {
- }
+ {
+ }
- /// <summary>
- /// Allows for Letter category or NonspacingMark category </summary>
- /// <seealso cref= org.apache.lucene.analysis.core.LetterTokenizer#isTokenChar(int) </seealso>
- protected override bool IsTokenChar(int c)
- {
+ /// <summary>
+ /// Allows for Letter category or NonspacingMark category </summary>
+ /// <seealso cref="LetterTokenizer.IsTokenChar(int)"/>
+ protected override bool IsTokenChar(int c)
+ {
return base.IsTokenChar(c) || Character.GetType(c) == UnicodeCategory.NonSpacingMark;
}
}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/2878664e/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicLetterTokenizerFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicLetterTokenizerFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicLetterTokenizerFactory.cs
index 43b08d7..366d85c 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicLetterTokenizerFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicLetterTokenizerFactory.cs
@@ -24,24 +24,24 @@ namespace Lucene.Net.Analysis.Ar
*/
/// <summary>
- /// Factory for <seealso cref="ArabicLetterTokenizer"/> </summary>
+ /// Factory for <see cref="ArabicLetterTokenizer"/> </summary>
/// @deprecated (3.1) Use StandardTokenizerFactory instead.
///
[Obsolete("(3.1) Use StandardTokenizerFactory instead.")]
- public class ArabicLetterTokenizerFactory : TokenizerFactory
- {
+ public class ArabicLetterTokenizerFactory : TokenizerFactory
+ {
- /// <summary>
- /// Creates a new ArabicNormalizationFilterFactory </summary>
- public ArabicLetterTokenizerFactory(IDictionary<string, string> args)
- : base(args)
- {
- AssureMatchVersion();
- if (args.Count > 0)
- {
- throw new System.ArgumentException("Unknown parameters: " + args);
- }
- }
+ /// <summary>
+ /// Creates a new <see cref="ArabicNormalizationFilterFactory"/> </summary>
+ public ArabicLetterTokenizerFactory(IDictionary<string, string> args)
+ : base(args)
+ {
+ AssureMatchVersion();
+ if (args.Count > 0)
+ {
+ throw new System.ArgumentException("Unknown parameters: " + args);
+ }
+ }
public override Tokenizer Create(AttributeSource.AttributeFactory factory, TextReader input)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/2878664e/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicNormalizationFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicNormalizationFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicNormalizationFilter.cs
index 7d2fa2a..7f87ecf 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicNormalizationFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicNormalizationFilter.cs
@@ -20,10 +20,8 @@ namespace Lucene.Net.Analysis.Ar
*/
/// <summary>
- /// A <seealso cref="TokenFilter"/> that applies <seealso cref="ArabicNormalizer"/> to normalize the orthography.
- ///
+ /// A <see cref="TokenFilter"/> that applies <see cref="ArabicNormalizer"/> to normalize the orthography.
/// </summary>
-
public sealed class ArabicNormalizationFilter : TokenFilter
{
private readonly ArabicNormalizer normalizer = new ArabicNormalizer();
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/2878664e/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicNormalizationFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicNormalizationFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicNormalizationFilterFactory.cs
index 840522c..0b92b5c 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicNormalizationFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicNormalizationFilterFactory.cs
@@ -21,20 +21,21 @@ namespace Lucene.Net.Analysis.Ar
*/
/// <summary>
- /// Factory for <seealso cref="ArabicNormalizationFilter"/>.
- /// <pre class="prettyprint">
+ /// Factory for <see cref="ArabicNormalizationFilter"/>.
+ /// <code>
/// <fieldType name="text_arnormal" class="solr.TextField" positionIncrementGap="100">
/// <analyzer>
/// <tokenizer class="solr.StandardTokenizerFactory"/>
/// <filter class="solr.ArabicNormalizationFilterFactory"/>
/// </analyzer>
- /// </fieldType></pre>
+ /// </fieldType>
+ /// </code>
/// </summary>
public class ArabicNormalizationFilterFactory : TokenFilterFactory, IMultiTermAwareComponent
{
/// <summary>
- /// Creates a new ArabicNormalizationFilterFactory </summary>
+ /// Creates a new <see cref="ArabicNormalizationFilterFactory"/> </summary>
public ArabicNormalizationFilterFactory(IDictionary<string, string> args)
: base(args)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/2878664e/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicNormalizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicNormalizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicNormalizer.cs
index 47ebe76..9733198 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicNormalizer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicNormalizer.cs
@@ -20,21 +20,18 @@ namespace Lucene.Net.Analysis.Ar
*/
/// <summary>
- /// Normalizer for Arabic.
- /// <para>
- /// Normalization is done in-place for efficiency, operating on a termbuffer.
- /// </para>
- /// <para>
- /// Normalization is defined as:
- /// <ul>
- /// <li> Normalization of hamza with alef seat to a bare alef.
- /// <li> Normalization of teh marbuta to heh
- /// <li> Normalization of dotless yeh (alef maksura) to yeh.
- /// <li> Removal of Arabic diacritics (the harakat)
- /// <li> Removal of tatweel (stretching character).
- /// </ul>
- ///
- /// </para>
+ /// Normalizer for Arabic.
+ /// <para/>
+ /// Normalization is done in-place for efficiency, operating on a termbuffer.
+ /// <para/>
+ /// Normalization is defined as:
+ /// <list type="bullet">
+ /// <item> Normalization of hamza with alef seat to a bare alef.</item>
+ /// <item> Normalization of teh marbuta to heh</item>
+ /// <item> Normalization of dotless yeh (alef maksura) to yeh.</item>
+ /// <item> Removal of Arabic diacritics (the harakat)</item>
+ /// <item> Removal of tatweel (stretching character).</item>
+ /// </list>
/// </summary>
public class ArabicNormalizer
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/2878664e/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicStemFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicStemFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicStemFilter.cs
index 54027ed..a8d2745 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicStemFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicStemFilter.cs
@@ -20,14 +20,13 @@ namespace Lucene.Net.Analysis.Ar
*/
/// <summary>
- /// A <seealso cref="TokenFilter"/> that applies <seealso cref="ArabicStemmer"/> to stem Arabic words..
- /// <para>
+ /// A <see cref="TokenFilter"/> that applies <see cref="ArabicStemmer"/> to stem Arabic words..
+ /// <para/>
/// To prevent terms from being stemmed use an instance of
- /// <seealso cref="SetKeywordMarkerFilter"/> or a custom <seealso cref="TokenFilter"/> that sets
- /// the <seealso cref="KeywordAttribute"/> before this <seealso cref="TokenStream"/>.
- /// </para> </summary>
- /// <seealso cref= SetKeywordMarkerFilter </seealso>
-
+ /// <see cref="Miscellaneous.SetKeywordMarkerFilter"/> or a custom <see cref="TokenFilter"/> that sets
+ /// the <see cref="KeywordAttribute"/> before this <seealso cref="TokenStream"/>.
+ /// </summary>
+ /// <seealso cref="Miscellaneous.SetKeywordMarkerFilter"/>
public sealed class ArabicStemFilter : TokenFilter
{
private readonly ArabicStemmer stemmer = new ArabicStemmer();
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/2878664e/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicStemFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicStemFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicStemFilterFactory.cs
index 08d0d19..0238b5b 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicStemFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicStemFilterFactory.cs
@@ -21,21 +21,22 @@ namespace Lucene.Net.Analysis.Ar
*/
/// <summary>
- /// Factory for <seealso cref="ArabicStemFilter"/>.
- /// <pre class="prettyprint">
+ /// Factory for <see cref="ArabicStemFilter"/>.
+ /// <code>
/// <fieldType name="text_arstem" class="solr.TextField" positionIncrementGap="100">
/// <analyzer>
/// <tokenizer class="solr.StandardTokenizerFactory"/>
/// <filter class="solr.ArabicNormalizationFilterFactory"/>
/// <filter class="solr.ArabicStemFilterFactory"/>
/// </analyzer>
- /// </fieldType></pre>
+ /// </fieldType>
+ /// </code>
/// </summary>
public class ArabicStemFilterFactory : TokenFilterFactory
{
/// <summary>
- /// Creates a new ArabicStemFilterFactory </summary>
+ /// Creates a new <see cref="ArabicStemFilterFactory"/> </summary>
public ArabicStemFilterFactory(IDictionary<string, string> args)
: base(args)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/2878664e/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicStemmer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicStemmer.cs
index 8ba6ca7..444b5d3 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicStemmer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicStemmer.cs
@@ -20,18 +20,15 @@ namespace Lucene.Net.Analysis.Ar
*/
/// <summary>
- /// Stemmer for Arabic.
- /// <para>
- /// Stemming is done in-place for efficiency, operating on a termbuffer.
- /// </para>
- /// <para>
- /// Stemming is defined as:
- /// <ul>
- /// <li> Removal of attached definite article, conjunction, and prepositions.
- /// <li> Stemming of common suffixes.
- /// </ul>
- ///
- /// </para>
+ /// Stemmer for Arabic.
+ /// <para/>
+ /// Stemming is done in-place for efficiency, operating on a termbuffer.
+ /// <para/>
+ /// Stemming is defined as:
+ /// <list type="bullet">
+ /// <item> Removal of attached definite article, conjunction, and prepositions.</item>
+ /// <item> Stemming of common suffixes.</item>
+ /// </list>
/// </summary>
public class ArabicStemmer
{
[3/6] lucenenet git commit: Lucene.Net.Analysis.Br refactor: member
accessibility and documentation comments
Posted by ni...@apache.org.
Lucene.Net.Analysis.Br refactor: member accessibility and documentation comments
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/83902e97
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/83902e97
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/83902e97
Branch: refs/heads/api-work
Commit: 83902e979cec023948dcef9dba21b0c02924d171
Parents: 1b0bca6
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Thu Feb 2 20:30:21 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Thu Feb 2 20:30:21 2017 +0700
----------------------------------------------------------------------
.../Analysis/Br/BrazilianAnalyzer.cs | 20 +++++++++----------
.../Analysis/Br/BrazilianStemFilter.cs | 18 ++++++++---------
.../Analysis/Br/BrazilianStemFilterFactory.cs | 9 +++++----
.../Analysis/Br/BrazilianStemmer.cs | 21 +++++++++-----------
4 files changed, 33 insertions(+), 35 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/83902e97/src/Lucene.Net.Analysis.Common/Analysis/Br/BrazilianAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Br/BrazilianAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Br/BrazilianAnalyzer.cs
index 12109b6..b6bd791 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Br/BrazilianAnalyzer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Br/BrazilianAnalyzer.cs
@@ -27,15 +27,15 @@ namespace Lucene.Net.Analysis.Br
*/
/// <summary>
- /// <seealso cref="Analyzer"/> for Brazilian Portuguese language.
+ /// <see cref="Analyzer"/> for Brazilian Portuguese language.
/// <para>
/// Supports an external list of stopwords (words that
/// will not be indexed at all) and an external list of exclusions (words that will
/// not be stemmed, but indexed).
/// </para>
///
- /// <para><b>NOTE</b>: This class uses the same <seealso cref="Version"/>
- /// dependent settings as <seealso cref="StandardAnalyzer"/>.</para>
+ /// <para><b>NOTE</b>: This class uses the same <see cref="LuceneVersion"/>
+ /// dependent settings as <see cref="StandardAnalyzer"/>.</para>
/// </summary>
public sealed class BrazilianAnalyzer : StopwordAnalyzerBase
{
@@ -86,7 +86,7 @@ namespace Lucene.Net.Analysis.Br
private CharArraySet excltable = CharArraySet.EMPTY_SET;
/// <summary>
- /// Builds an analyzer with the default stop words (<seealso cref="#getDefaultStopSet()"/>).
+ /// Builds an analyzer with the default stop words (<see cref="DefaultStopSet"/>).
/// </summary>
public BrazilianAnalyzer(LuceneVersion matchVersion)
: this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET)
@@ -120,13 +120,13 @@ namespace Lucene.Net.Analysis.Br
/// <summary>
/// Creates
- /// <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
- /// used to tokenize all the text in the provided <seealso cref="Reader"/>.
+ /// <see cref="Analyzer.TokenStreamComponents"/>
+ /// used to tokenize all the text in the provided <see cref="TextReader"/>.
/// </summary>
- /// <returns> <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
- /// built from a <seealso cref="StandardTokenizer"/> filtered with
- /// <seealso cref="LowerCaseFilter"/>, <seealso cref="StandardFilter"/>, <seealso cref="StopFilter"/>
- /// , and <seealso cref="BrazilianStemFilter"/>. </returns>
+ /// <returns> <see cref="Analyzer.TokenStreamComponents"/>
+ /// built from a <see cref="StandardTokenizer"/> filtered with
+ /// <see cref="LowerCaseFilter"/>, <see cref="StandardFilter"/>, <see cref="StopFilter"/>,
+ /// and <see cref="BrazilianStemFilter"/>. </returns>
protected override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
{
Tokenizer source = new StandardTokenizer(m_matchVersion, reader);
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/83902e97/src/Lucene.Net.Analysis.Common/Analysis/Br/BrazilianStemFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Br/BrazilianStemFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Br/BrazilianStemFilter.cs
index 69580e4..0b94e4c 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Br/BrazilianStemFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Br/BrazilianStemFilter.cs
@@ -21,19 +21,19 @@ namespace Lucene.Net.Analysis.Br
*/
/// <summary>
- /// A <seealso cref="TokenFilter"/> that applies <seealso cref="BrazilianStemmer"/>.
+ /// A <see cref="TokenFilter"/> that applies <see cref="BrazilianStemmer"/>.
/// <para>
/// To prevent terms from being stemmed use an instance of
- /// <seealso cref="SetKeywordMarkerFilter"/> or a custom <seealso cref="TokenFilter"/> that sets
- /// the <seealso cref="KeywordAttribute"/> before this <seealso cref="TokenStream"/>.
- /// </para> </summary>
- /// <seealso cref= SetKeywordMarkerFilter
- /// </seealso>
+ /// <see cref="Miscellaneous.SetKeywordMarkerFilter"/> or a custom <see cref="TokenFilter"/> that sets
+ /// the <see cref="KeywordAttribute"/> before this <see cref="TokenStream"/>.
+ /// </para>
+ /// </summary>
+ /// <seealso cref="Miscellaneous.SetKeywordMarkerFilter"/>
public sealed class BrazilianStemFilter : TokenFilter
{
/// <summary>
- /// <seealso cref="BrazilianStemmer"/> in use by this filter.
+ /// <see cref="BrazilianStemmer"/> in use by this filter.
/// </summary>
private BrazilianStemmer stemmer = new BrazilianStemmer();
private HashSet<string> exclusions = null; // LUCENENET TODO: This is odd. No way to set it at all, so it cannot possibly have any values.
@@ -41,9 +41,9 @@ namespace Lucene.Net.Analysis.Br
private readonly IKeywordAttribute keywordAttr;
/// <summary>
- /// Creates a new BrazilianStemFilter
+ /// Creates a new <see cref="BrazilianStemFilter"/>
/// </summary>
- /// <param name="in"> the source <seealso cref="TokenStream"/> </param>
+ /// <param name="in"> the source <see cref="TokenStream"/> </param>
public BrazilianStemFilter(TokenStream @in)
: base(@in)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/83902e97/src/Lucene.Net.Analysis.Common/Analysis/Br/BrazilianStemFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Br/BrazilianStemFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Br/BrazilianStemFilterFactory.cs
index 332ea42..0be7f71 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Br/BrazilianStemFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Br/BrazilianStemFilterFactory.cs
@@ -21,21 +21,22 @@ namespace Lucene.Net.Analysis.Br
*/
/// <summary>
- /// Factory for <seealso cref="BrazilianStemFilter"/>.
- /// <pre class="prettyprint">
+ /// Factory for <see cref="BrazilianStemFilter"/>.
+ /// <code>
/// <fieldType name="text_brstem" class="solr.TextField" positionIncrementGap="100">
/// <analyzer>
/// <tokenizer class="solr.StandardTokenizerFactory"/>
/// <filter class="solr.LowerCaseFilterFactory"/>
/// <filter class="solr.BrazilianStemFilterFactory"/>
/// </analyzer>
- /// </fieldType></pre>
+ /// </fieldType>
+ /// </code>
/// </summary>
public class BrazilianStemFilterFactory : TokenFilterFactory
{
/// <summary>
- /// Creates a new BrazilianStemFilterFactory </summary>
+ /// Creates a new <see cref="BrazilianStemFilterFactory"/> </summary>
public BrazilianStemFilterFactory(IDictionary<string, string> args) : base(args)
{
if (args.Count > 0)
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/83902e97/src/Lucene.Net.Analysis.Common/Analysis/Br/BrazilianStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Br/BrazilianStemmer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Br/BrazilianStemmer.cs
index b08f0dd..3d35ee5 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Br/BrazilianStemmer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Br/BrazilianStemmer.cs
@@ -41,10 +41,10 @@ namespace Lucene.Net.Analysis.Br
}
/// <summary>
- /// Stems the given term to an unique <tt>discriminator</tt>.
+ /// Stems the given term to an unique <c>discriminator</c>.
/// </summary>
/// <param name="term"> The term that should be stemmed. </param>
- /// <returns> Discriminator for <tt>term</tt> </returns>
+ /// <returns>Discriminator for <paramref name="term"/></returns>
protected internal virtual string Stem(string term)
{
bool altered = false; // altered the term
@@ -106,7 +106,7 @@ namespace Lucene.Net.Analysis.Br
/// <summary>
/// Checks a term if it can be processed indexed.
/// </summary>
- /// <returns> true if it can be indexed </returns>
+ /// <returns> true if it can be indexed </returns>
private bool IsIndexable(string term)
{
return (term.Length < 30) && (term.Length > 2);
@@ -335,9 +335,9 @@ namespace Lucene.Net.Analysis.Br
}
/// <summary>
- /// Replace a string suffix by another
+ /// Replace a <see cref="string"/> suffix by another
/// </summary>
- /// <returns> the replaced String </returns>
+ /// <returns> the replaced <see cref="string"/> </returns>
private string ReplaceSuffix(string value, string toReplace, string changeTo)
{
string vvalue;
@@ -361,9 +361,9 @@ namespace Lucene.Net.Analysis.Br
}
/// <summary>
- /// Remove a string suffix
+ /// Remove a <see cref="string"/> suffix
/// </summary>
- /// <returns> the String without the suffix </returns>
+ /// <returns> the <see cref="string"/> without the suffix </returns>
private string RemoveSuffix(string value, string toRemove)
{
// be-safe !!!
@@ -376,7 +376,7 @@ namespace Lucene.Net.Analysis.Br
}
/// <summary>
- /// See if a suffix is preceded by a String
+ /// See if a suffix is preceded by a <see cref="string"/>
/// </summary>
/// <returns> true if the suffix is preceded </returns>
private bool SuffixPreceded(string value, string suffix, string preceded)
@@ -1292,7 +1292,6 @@ namespace Lucene.Net.Analysis.Br
/// <summary>
/// Delete suffix 'i' if in RV and preceded by 'c'
- ///
/// </summary>
private void Step3()
{
@@ -1313,7 +1312,6 @@ namespace Lucene.Net.Analysis.Br
///
/// If the word ends with one of the suffixes (os a i o � � �)
/// in RV, delete it
- ///
/// </summary>
private void Step4()
{
@@ -1351,7 +1349,6 @@ namespace Lucene.Net.Analysis.Br
/// delete the 'u' (or 'i')
///
/// Or if the word ends � remove the cedilha
- ///
/// </summary>
private void Step5()
{
@@ -1384,7 +1381,7 @@ namespace Lucene.Net.Analysis.Br
/// <summary>
/// For log and debug purpose
/// </summary>
- /// <returns> TERM, CT, RV, R1 and R2 </returns>
+ /// <returns> TERM, CT, RV, R1 and R2 </returns>
public virtual string Log()
{
return " (TERM = " + TERM + ")" + " (CT = " + CT + ")" + " (RV = " + RV + ")" + " (R1 = " + R1 + ")" + " (R2 = " + R2 + ")";
[6/6] lucenenet git commit: Lucene.Net.Analysis.Cjk refactor: member
accessibility and documentation comments
Posted by ni...@apache.org.
Lucene.Net.Analysis.Cjk refactor: member accessibility and documentation comments
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/917b4fdf
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/917b4fdf
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/917b4fdf
Branch: refs/heads/api-work
Commit: 917b4fdf53f978f32219cef6edf31f3c30b84dea
Parents: 7fdbd66
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Thu Feb 2 21:53:51 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Thu Feb 2 21:53:51 2017 +0700
----------------------------------------------------------------------
.../Analysis/Cjk/CJKAnalyzer.cs | 12 ++---
.../Analysis/Cjk/CJKBigramFilter.cs | 32 ++++++-------
.../Analysis/Cjk/CJKBigramFilterFactory.cs | 13 +++---
.../Analysis/Cjk/CJKTokenizer.cs | 14 +++---
.../Analysis/Cjk/CJKTokenizerFactory.cs | 13 +++---
.../Analysis/Cjk/CJKWidthFilter.cs | 49 ++++++++++++++------
.../Analysis/Cjk/CJKWidthFilterFactory.cs | 10 ++--
7 files changed, 82 insertions(+), 61 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/917b4fdf/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKAnalyzer.cs
index 0fcc42c..28c7a52 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKAnalyzer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKAnalyzer.cs
@@ -25,16 +25,16 @@ namespace Lucene.Net.Analysis.Cjk
*/
/// <summary>
- /// An <seealso cref="Analyzer"/> that tokenizes text with <seealso cref="StandardTokenizer"/>,
- /// normalizes content with <seealso cref="CJKWidthFilter"/>, folds case with
- /// <seealso cref="LowerCaseFilter"/>, forms bigrams of CJK with <seealso cref="CJKBigramFilter"/>,
- /// and filters stopwords with <seealso cref="StopFilter"/>
+ /// An <see cref="Analyzer"/> that tokenizes text with <see cref="StandardTokenizer"/>,
+ /// normalizes content with <see cref="CJKWidthFilter"/>, folds case with
+ /// <see cref="LowerCaseFilter"/>, forms bigrams of CJK with <see cref="CJKBigramFilter"/>,
+ /// and filters stopwords with <see cref="StopFilter"/>
/// </summary>
public sealed class CJKAnalyzer : StopwordAnalyzerBase
{
/// <summary>
/// File containing default CJK stopwords.
- /// <p/>
+ /// <para/>
/// Currently it contains some common English words that are not usually
/// useful for searching and some double-byte interpunctions.
/// </summary>
@@ -72,7 +72,7 @@ namespace Lucene.Net.Analysis.Cjk
}
/// <summary>
- /// Builds an analyzer which removes words in <seealso cref="#getDefaultStopSet()"/>.
+ /// Builds an analyzer which removes words in <see cref="DefaultStopSet"/>.
/// </summary>
public CJKAnalyzer(LuceneVersion matchVersion)
: this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET)
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/917b4fdf/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKBigramFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKBigramFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKBigramFilter.cs
index 4b8cb17..443ea04 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKBigramFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKBigramFilter.cs
@@ -23,18 +23,18 @@ namespace Lucene.Net.Analysis.Cjk
*/
/// <summary>
- /// Forms bigrams of CJK terms that are generated from StandardTokenizer
+ /// Forms bigrams of CJK terms that are generated from <see cref="StandardTokenizer"/>
/// or ICUTokenizer.
/// <para>
/// CJK types are set by these tokenizers, but you can also use
- /// <seealso cref="#CJKBigramFilter(TokenStream, int)"/> to explicitly control which
+ /// <see cref="CJKBigramFilter(TokenStream, int)"/> to explicitly control which
/// of the CJK scripts are turned into bigrams.
/// </para>
/// <para>
/// By default, when a CJK character has no adjacent characters to form
/// a bigram, it is output in unigram form. If you want to always output
/// both unigrams and bigrams, set the <code>outputUnigrams</code>
- /// flag in <seealso cref="CJKBigramFilter#CJKBigramFilter(TokenStream, int, boolean)"/>.
+ /// flag in <see cref="CJKBigramFilter.CJKBigramFilter(TokenStream, int, bool)"/>.
/// This can be used for a combined unigram+bigram approach.
/// </para>
/// <para>
@@ -90,22 +90,22 @@ namespace Lucene.Net.Analysis.Cjk
private readonly IPositionLengthAttribute posLengthAtt;
// buffers containing codepoint and offsets in parallel
- internal int[] buffer = new int[8];
- internal int[] startOffset = new int[8];
- internal int[] endOffset = new int[8];
+ private int[] buffer = new int[8];
+ private int[] startOffset = new int[8];
+ private int[] endOffset = new int[8];
// length of valid buffer
- internal int bufferLen;
+ private int bufferLen;
// current buffer index
- internal int index;
+ private int index;
// the last end offset, to determine if we should bigram across tokens
- internal int lastEndOffset;
+ private int lastEndOffset;
private bool exhausted;
/// <summary>
- /// Calls {@link CJKBigramFilter#CJKBigramFilter(TokenStream, int)
- /// CJKBigramFilter(in, HAN | HIRAGANA | KATAKANA | HANGUL)}
+ /// Calls <see cref="CJKBigramFilter.CJKBigramFilter(TokenStream, int)">
+ /// CJKBigramFilter(in, HAN | HIRAGANA | KATAKANA | HANGUL)</see>
/// </summary>
public CJKBigramFilter(TokenStream @in)
: this(@in, HAN | HIRAGANA | KATAKANA | HANGUL)
@@ -113,8 +113,8 @@ namespace Lucene.Net.Analysis.Cjk
}
/// <summary>
- /// Calls {@link CJKBigramFilter#CJKBigramFilter(TokenStream, int, boolean)
- /// CJKBigramFilter(in, flags, false)}
+ /// Calls <see cref="CJKBigramFilter.CJKBigramFilter(TokenStream, int, bool)">
+ /// CJKBigramFilter(in, flags, false)</see>
/// </summary>
public CJKBigramFilter(TokenStream @in, int flags)
: this(@in, flags, false)
@@ -122,10 +122,10 @@ namespace Lucene.Net.Analysis.Cjk
}
/// <summary>
- /// Create a new CJKBigramFilter, specifying which writing systems should be bigrammed,
+ /// Create a new <see cref="CJKBigramFilter"/>, specifying which writing systems should be bigrammed,
/// and whether or not unigrams should also be output. </summary>
- /// <param name="flags"> OR'ed set from <seealso cref="CJKBigramFilter#HAN"/>, <seealso cref="CJKBigramFilter#HIRAGANA"/>,
- /// <seealso cref="CJKBigramFilter#KATAKANA"/>, <seealso cref="CJKBigramFilter#HANGUL"/> </param>
+ /// <param name="flags"> OR'ed set from <see cref="CJKBigramFilter.HAN"/>, <see cref="CJKBigramFilter.HIRAGANA"/>,
+ /// <see cref="CJKBigramFilter.KATAKANA"/>, <see cref="CJKBigramFilter.HANGUL"/> </param>
/// <param name="outputUnigrams"> true if unigrams for the selected writing systems should also be output.
/// when this is false, this is only done when there are no adjacent characters to form
/// a bigram. </param>
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/917b4fdf/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKBigramFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKBigramFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKBigramFilterFactory.cs
index 8fd34fd..b9e4d97 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKBigramFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKBigramFilterFactory.cs
@@ -21,8 +21,8 @@ namespace Lucene.Net.Analysis.Cjk
*/
/// <summary>
- /// Factory for <seealso cref="CJKBigramFilter"/>.
- /// <pre class="prettyprint">
+ /// Factory for <see cref="CJKBigramFilter"/>.
+ /// <code>
/// <fieldType name="text_cjk" class="solr.TextField">
/// <analyzer>
/// <tokenizer class="solr.StandardTokenizerFactory"/>
@@ -32,15 +32,16 @@ namespace Lucene.Net.Analysis.Cjk
/// han="true" hiragana="true"
/// katakana="true" hangul="true" outputUnigrams="false" />
/// </analyzer>
- /// </fieldType></pre>
+ /// </fieldType>
+ /// </code>
/// </summary>
public class CJKBigramFilterFactory : TokenFilterFactory
{
- internal readonly int flags;
- internal readonly bool outputUnigrams;
+ private readonly int flags;
+ private readonly bool outputUnigrams;
/// <summary>
- /// Creates a new CJKBigramFilterFactory </summary>
+ /// Creates a new <see cref="CJKBigramFilterFactory"/> </summary>
public CJKBigramFilterFactory(IDictionary<string, string> args)
: base(args)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/917b4fdf/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKTokenizer.cs
index 1ff4f07..160306d 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKTokenizer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKTokenizer.cs
@@ -32,11 +32,11 @@ namespace Lucene.Net.Analysis.Cjk
/// Example: "java C1C2C3C4" will be segmented to: "java" "C1C2" "C2C3" "C3C4".
/// </para>
/// Additionally, the following is applied to Latin text (such as English):
- /// <ul>
- /// <li>Text is converted to lowercase.
- /// <li>Numeric digits, '+', '#', and '_' are tokenized as letters.
- /// <li>Full-width forms are converted to half-width forms.
- /// </ul>
+ /// <list type="bullet">
+ /// <item>Text is converted to lowercase.</item>
+ /// <item>Numeric digits, '+', '#', and '_' are tokenized as letters.</item>
+ /// <item>Full-width forms are converted to half-width forms.</item>
+ /// </list>
/// For more info on Asian language (Chinese, Japanese, and Korean) text segmentation:
/// please search <a
/// href="http://www.google.com/search?q=word+chinese+segment">google</a>
@@ -145,7 +145,7 @@ namespace Lucene.Net.Analysis.Cjk
/// </summary>
/// <returns> false for end of stream, true otherwise
/// </returns>
- /// <exception cref="java.io.IOException"> - throw IOException when read error <br>
+ /// <exception cref="IOException"> when read error
/// happened in the InputStream
/// </exception>
public override bool IncrementToken()
@@ -347,7 +347,7 @@ namespace Lucene.Net.Analysis.Cjk
}
}
- public override void End()
+ public override sealed void End()
{
base.End();
// set final offset
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/917b4fdf/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKTokenizerFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKTokenizerFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKTokenizerFactory.cs
index 220a7d6..c33f3a6 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKTokenizerFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKTokenizerFactory.cs
@@ -25,20 +25,21 @@ namespace Lucene.Net.Analysis.Cjk
/// <summary>
- /// Factory for <seealso cref="CJKTokenizer"/>.
- /// <pre class="prettyprint" >
+ /// Factory for <see cref="CJKTokenizer"/>.
+ /// <code>
/// <fieldType name="text_cjk" class="solr.TextField" positionIncrementGap="100">
/// <analyzer>
/// <tokenizer class="solr.CJKTokenizerFactory"/>
/// </analyzer>
- /// </fieldType></pre> </summary>
- /// @deprecated Use <seealso cref="CJKBigramFilterFactory"/> instead.
+ /// </fieldType>
+ /// </code>
+ /// </summary>
+ /// @deprecated Use <see cref="CJKBigramFilterFactory"/> instead.
[Obsolete("Use CJKBigramFilterFactory instead.")]
public class CJKTokenizerFactory : TokenizerFactory
{
-
/// <summary>
- /// Creates a new CJKTokenizerFactory </summary>
+ /// Creates a new <see cref="CJKTokenizerFactory"/> </summary>
public CJKTokenizerFactory(IDictionary<string, string> args) : base(args)
{
if (args.Count > 0)
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/917b4fdf/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKWidthFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKWidthFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKWidthFilter.cs
index 331de6b..64018e2 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKWidthFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKWidthFilter.cs
@@ -22,11 +22,11 @@ namespace Lucene.Net.Analysis.Cjk
*/
/// <summary>
- /// A <seealso cref="TokenFilter"/> that normalizes CJK width differences:
- /// <ul>
- /// <li>Folds fullwidth ASCII variants into the equivalent basic latin
- /// <li>Folds halfwidth Katakana variants into the equivalent kana
- /// </ul>
+ /// A <see cref="TokenFilter"/> that normalizes CJK width differences:
+ /// <list type="bullet">
+ /// <item>Folds fullwidth ASCII variants into the equivalent basic latin</item>
+ /// <item>Folds halfwidth Katakana variants into the equivalent kana</item>
+ /// </list>
/// <para>
/// NOTE: this filter can be viewed as a (practical) subset of NFKC/NFKD
/// Unicode normalization. See the normalization support in the ICU package
@@ -37,13 +37,22 @@ namespace Lucene.Net.Analysis.Cjk
{
private ICharTermAttribute termAtt;
- /* halfwidth kana mappings: 0xFF65-0xFF9D
- *
- * note: 0xFF9C and 0xFF9D are only mapped to 0x3099 and 0x309A
- * as a fallback when they cannot properly combine with a preceding
- * character into a composed form.
- */
- private static readonly char[] KANA_NORM = new char[] { (char)0x30fb, (char)0x30f2, (char)0x30a1, (char)0x30a3, (char)0x30a5, (char)0x30a7, (char)0x30a9, (char)0x30e3, (char)0x30e5, (char)0x30e7, (char)0x30c3, (char)0x30fc, (char)0x30a2, (char)0x30a4, (char)0x30a6, (char)0x30a8, (char)0x30aa, (char)0x30ab, (char)0x30ad, (char)0x30af, (char)0x30b1, (char)0x30b3, (char)0x30b5, (char)0x30b7, (char)0x30b9, (char)0x30bb, (char)0x30bd, (char)0x30bf, (char)0x30c1, (char)0x30c4, (char)0x30c6, (char)0x30c8, (char)0x30ca, (char)0x30cb, (char)0x30cc, (char)0x30cd, (char)0x30ce, (char)0x30cf, (char)0x30d2, (char)0x30d5, (char)0x30d8, (char)0x30db, (char)0x30de, (char)0x30df, (char)0x30e0, (char)0x30e1, (char)0x30e2, (char)0x30e4, (char)0x30e6, (char)0x30e8, (char)0x30e9, (char)0x30ea, (char)0x30eb, (char)0x30ec, (char)0x30ed, (char)0x30ef, (char)0x30f3, (char)0x3099, (char)0x309A };
+ /// <summary>
+ /// halfwidth kana mappings: 0xFF65-0xFF9D
+ /// <para/>
+ /// note: 0xFF9C and 0xFF9D are only mapped to 0x3099 and 0x309A
+ /// as a fallback when they cannot properly combine with a preceding
+ /// character into a composed form.
+ /// </summary>
+ private static readonly char[] KANA_NORM = new char[] {
+ (char)0x30fb, (char)0x30f2, (char)0x30a1, (char)0x30a3, (char)0x30a5, (char)0x30a7, (char)0x30a9, (char)0x30e3, (char)0x30e5,
+ (char)0x30e7, (char)0x30c3, (char)0x30fc, (char)0x30a2, (char)0x30a4, (char)0x30a6, (char)0x30a8, (char)0x30aa, (char)0x30ab,
+ (char)0x30ad, (char)0x30af, (char)0x30b1, (char)0x30b3, (char)0x30b5, (char)0x30b7, (char)0x30b9, (char)0x30bb, (char)0x30bd,
+ (char)0x30bf, (char)0x30c1, (char)0x30c4, (char)0x30c6, (char)0x30c8, (char)0x30ca, (char)0x30cb, (char)0x30cc, (char)0x30cd,
+ (char)0x30ce, (char)0x30cf, (char)0x30d2, (char)0x30d5, (char)0x30d8, (char)0x30db, (char)0x30de, (char)0x30df, (char)0x30e0,
+ (char)0x30e1, (char)0x30e2, (char)0x30e4, (char)0x30e6, (char)0x30e8, (char)0x30e9, (char)0x30ea, (char)0x30eb, (char)0x30ec,
+ (char)0x30ed, (char)0x30ef, (char)0x30f3, (char)0x3099, (char)0x309A
+ };
public CJKWidthFilter(TokenStream input)
: base(input)
@@ -87,10 +96,20 @@ namespace Lucene.Net.Analysis.Cjk
}
}
- /* kana combining diffs: 0x30A6-0x30FD */
- private static readonly sbyte[] KANA_COMBINE_VOICED = new sbyte[] { 78, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 };
+ /// <summary>kana combining diffs: 0x30A6-0x30FD </summary>
+ private static readonly sbyte[] KANA_COMBINE_VOICED = new sbyte[] {
+ 78, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
+ 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1,
+ 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 8, 8, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1
+ };
- private static readonly sbyte[] KANA_COMBINE_HALF_VOICED = new sbyte[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 2, 0, 0, 2, 0, 0, 2, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+ private static readonly sbyte[] KANA_COMBINE_HALF_VOICED = new sbyte[] {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 2, 0, 0, 2,
+ 0, 0, 2, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+ };
/// <summary>
/// returns true if we successfully combined the voice mark </summary>
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/917b4fdf/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKWidthFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKWidthFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKWidthFilterFactory.cs
index dfe8f2e..9c956e6 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKWidthFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKWidthFilterFactory.cs
@@ -21,8 +21,8 @@ namespace Lucene.Net.Analysis.Cjk
*/
/// <summary>
- /// Factory for <seealso cref="CJKWidthFilter"/>.
- /// <pre class="prettyprint">
+ /// Factory for <see cref="CJKWidthFilter"/>.
+ /// <code>
/// <fieldType name="text_cjk" class="solr.TextField">
/// <analyzer>
/// <tokenizer class="solr.StandardTokenizerFactory"/>
@@ -30,13 +30,13 @@ namespace Lucene.Net.Analysis.Cjk
/// <filter class="solr.LowerCaseFilterFactory"/>
/// <filter class="solr.CJKBigramFilterFactory"/>
/// </analyzer>
- /// </fieldType></pre>
+ /// </fieldType>
+ /// </code>
/// </summary>
public class CJKWidthFilterFactory : TokenFilterFactory, IMultiTermAwareComponent
{
-
/// <summary>
- /// Creates a new CJKWidthFilterFactory </summary>
+ /// Creates a new <see cref="CJKWidthFilterFactory"/> </summary>
public CJKWidthFilterFactory(IDictionary<string, string> args) : base(args)
{
if (args.Count > 0)
[4/6] lucenenet git commit: Lucene.Net.Analysis.Ca refactor: member
accessibility and documentation comments
Posted by ni...@apache.org.
Lucene.Net.Analysis.Ca refactor: member accessibility and documentation comments
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/5d0d43f4
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/5d0d43f4
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/5d0d43f4
Branch: refs/heads/api-work
Commit: 5d0d43f4034186c2f893d2e60475c1bbcee8998e
Parents: 83902e9
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Thu Feb 2 20:39:49 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Thu Feb 2 20:39:49 2017 +0700
----------------------------------------------------------------------
.../Analysis/Ca/CatalanAnalyzer.cs | 33 +++++++++-----------
1 file changed, 15 insertions(+), 18 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/5d0d43f4/src/Lucene.Net.Analysis.Common/Analysis/Ca/CatalanAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ca/CatalanAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ca/CatalanAnalyzer.cs
index b65b920..704f543 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Ca/CatalanAnalyzer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ca/CatalanAnalyzer.cs
@@ -29,16 +29,13 @@ namespace Lucene.Net.Analysis.Ca
*/
/// <summary>
- /// <seealso cref="Analyzer"/> for Catalan.
- /// <para>
- /// <a name="version"/>
- /// </para>
- /// <para>You must specify the required <seealso cref="Version"/>
+ /// <see cref="Analyzer"/> for Catalan.
+ /// <para>You must specify the required <see cref="LuceneVersion"/>
/// compatibility when creating CatalanAnalyzer:
- /// <ul>
- /// <li> As of 3.6, ElisionFilter with a set of Catalan
- /// contractions is used by default.
- /// </ul>
+ /// <list>
+ /// <item> As of 3.6, <see cref="ElisionFilter"/> with a set of Catalan
+ /// contractions is used by default.</item>
+ /// </list>
/// </para>
/// </summary>
public sealed class CatalanAnalyzer : StopwordAnalyzerBase
@@ -66,7 +63,7 @@ namespace Lucene.Net.Analysis.Ca
}
/// <summary>
- /// Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class
+ /// Atomically loads the <see cref="DEFAULT_STOP_SET"/> in a lazy fashion once the outer class
/// accesses the static final set the first time.;
/// </summary>
private class DefaultSetHolder
@@ -90,7 +87,7 @@ namespace Lucene.Net.Analysis.Ca
}
/// <summary>
- /// Builds an analyzer with the default stop words: <seealso cref="#DEFAULT_STOPWORD_FILE"/>.
+ /// Builds an analyzer with the default stop words: <see cref="DEFAULT_STOPWORD_FILE"/>.
/// </summary>
public CatalanAnalyzer(LuceneVersion matchVersion)
: this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET)
@@ -109,7 +106,7 @@ namespace Lucene.Net.Analysis.Ca
/// <summary>
/// Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
- /// provided this analyzer will add a <seealso cref="SetKeywordMarkerFilter"/> before
+ /// provided this analyzer will add a <see cref="SetKeywordMarkerFilter"/> before
/// stemming.
/// </summary>
/// <param name="matchVersion"> lucene compatibility version </param>
@@ -123,15 +120,15 @@ namespace Lucene.Net.Analysis.Ca
/// <summary>
/// Creates a
- /// <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
- /// which tokenizes all the text in the provided <seealso cref="Reader"/>.
+ /// <see cref="Analyzer.TokenStreamComponents"/>
+ /// which tokenizes all the text in the provided <see cref="TextReader"/>.
/// </summary>
/// <returns> A
- /// <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
+ /// <see cref="Analyzer.TokenStreamComponents"/>
/// built from an <seealso cref="StandardTokenizer"/> filtered with
- /// <seealso cref="StandardFilter"/>, <seealso cref="ElisionFilter"/>, <seealso cref="LowerCaseFilter"/>,
- /// <seealso cref="StopFilter"/>, <seealso cref="SetKeywordMarkerFilter"/> if a stem exclusion set is
- /// provided and <seealso cref="SnowballFilter"/>. </returns>
+ /// <see cref="StandardFilter"/>, <see cref="ElisionFilter"/>, <see cref="LowerCaseFilter"/>,
+ /// <see cref="StopFilter"/>, <see cref="SetKeywordMarkerFilter"/> if a stem exclusion set is
+ /// provided and <see cref="SnowballFilter"/>. </returns>
protected override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
{
Tokenizer source = new StandardTokenizer(m_matchVersion, reader);
[5/6] lucenenet git commit: Lucene.Net.Analysis.CharFilter refactor:
member accessibility and documentation comments
Posted by ni...@apache.org.
Lucene.Net.Analysis.CharFilter refactor: member accessibility and documentation comments
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/7fdbd66d
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/7fdbd66d
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/7fdbd66d
Branch: refs/heads/api-work
Commit: 7fdbd66dc5f2b590dd27e543a2644966cba2a40b
Parents: 5d0d43f
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Thu Feb 2 21:25:07 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Thu Feb 2 21:25:07 2017 +0700
----------------------------------------------------------------------
.../Analysis/CharFilter/BaseCharFilter.cs | 10 +-
.../Analysis/CharFilter/HTMLStripCharFilter.cs | 329 +++++++++----------
.../CharFilter/HTMLStripCharFilterFactory.cs | 13 +-
.../Analysis/CharFilter/MappingCharFilter.cs | 20 +-
.../CharFilter/MappingCharFilterFactory.cs | 17 +-
.../Analysis/CharFilter/NormalizeCharMap.cs | 9 +-
6 files changed, 194 insertions(+), 204 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/7fdbd66d/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/BaseCharFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/BaseCharFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/BaseCharFilter.cs
index 4ae7af5..ed3d61b 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/BaseCharFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/BaseCharFilter.cs
@@ -23,9 +23,9 @@ namespace Lucene.Net.Analysis.CharFilters
*/
/// <summary>
- /// Base utility class for implementing a <seealso cref="CharFilter"/>.
+ /// Base utility class for implementing a <see cref="CharFilter"/>.
/// You subclass this, and then record mappings by calling
- /// <seealso cref="#addOffCorrectMap"/>, and then invoke the correct
+ /// <see cref="AddOffCorrectMap"/>, and then invoke the correct
/// method to correct an offset.
/// </summary>
public abstract class BaseCharFilter : CharFilter
@@ -35,7 +35,7 @@ namespace Lucene.Net.Analysis.CharFilters
private int[] diffs;
private int size = 0;
- protected BaseCharFilter(TextReader @in)
+ public BaseCharFilter(TextReader @in)
: base(@in)
{
}
@@ -85,7 +85,7 @@ namespace Lucene.Net.Analysis.CharFilters
}
}
- protected internal virtual int LastCumulativeDiff
+ protected virtual int LastCumulativeDiff
{
get
{
@@ -105,7 +105,7 @@ namespace Lucene.Net.Analysis.CharFilters
/// <param name="off"> The output stream offset at which to apply the correction </param>
/// <param name="cumulativeDiff"> The input offset is given by adding this
/// to the output offset </param>
- protected internal virtual void AddOffCorrectMap(int off, int cumulativeDiff)
+ protected virtual void AddOffCorrectMap(int off, int cumulativeDiff)
{
if (offsets == null)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/7fdbd66d/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/HTMLStripCharFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/HTMLStripCharFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/HTMLStripCharFilter.cs
index 6fd8bad..d60080e 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/HTMLStripCharFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/HTMLStripCharFilter.cs
@@ -27,17 +27,17 @@ namespace Lucene.Net.Analysis.CharFilters
*/
/// <summary>
- /// A CharFilter that wraps another TextReader and attempts to strip out HTML constructs.
+ /// A <see cref="CharFilter"/> that wraps another <see cref="TextReader"/> and attempts to strip out HTML constructs.
/// </summary>
- public class HTMLStripCharFilter : BaseCharFilter
+ public sealed class HTMLStripCharFilter : BaseCharFilter
{
- /** This character denotes the end of file */
+ /// <summary>This character denotes the end of file</summary>
private const int YYEOF = -1;
- /** initial size of the lookahead buffer */
+ /// <summary>initial size of the lookahead buffer</summary>
private const int ZZ_BUFFERSIZE = 16384;
- /** lexical states */
+ // lexical states
private const int YYINITIAL = 0;
private const int AMPERSAND = 2;
private const int NUMERIC_CHARACTER = 4;
@@ -62,21 +62,21 @@ namespace Lucene.Net.Analysis.CharFilters
private const int STYLE = 42;
private const int STYLE_COMMENT = 44;
- /**
- * ZZ_LEXSTATE[l] is the state in the DFA for the lexical state l
- * ZZ_LEXSTATE[l+1] is the state in the DFA for the lexical state l
- * at the beginning of a line
- * l is of the form l = 2*k, k a non negative integer
- */
+ /// <summary>
+ /// ZZ_LEXSTATE[l] is the state in the DFA for the lexical state l
+ /// ZZ_LEXSTATE[l+1] is the state in the DFA for the lexical state l
+ /// at the beginning of a line
+ /// l is of the form l = 2*k, k a non negative integer
+ /// </summary>
private static readonly int[] ZZ_LEXSTATE = {
0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7,
8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15,
16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22
};
- /**
- * Translates characters to character classes
- */
+ /// <summary>
+ /// Translates characters to character classes
+ /// </summary>
private const string ZZ_CMAP_PACKED =
"\x0009\x0000\x0005\x0021\x0012\x0000\x0001\x0021\x0001\x001D\x0001\x001B\x0001\x001F\x0002\x0000\x0001\x00B6\x0001\x0019" +
"\x0005\x0000\x0001\x001E\x0001\x0002\x0001\x00BE\x0001\x00B8\x0001\x003C\x0001\x003D\x0001\x003F\x0001\x003E\x0001\x00BA" +
@@ -2230,9 +2230,9 @@ namespace Lucene.Net.Analysis.CharFilters
return j;
}
- /**
- * The transition table of the DFA
- */
+ /// <summary>
+ /// The transition table of the DFA
+ /// </summary>
private static readonly int[] ZZ_TRANS = ZzUnpackTrans();
private const string ZZ_TRANS_PACKED_0 =
@@ -30492,21 +30492,21 @@ namespace Lucene.Net.Analysis.CharFilters
}
- /* error codes */
+ /// <summary>error codes</summary>
private static readonly int ZZ_UNKNOWN_ERROR = 0;
private static readonly int ZZ_NO_MATCH = 1;
private static readonly int ZZ_PUSHBACK_2BIG = 2;
- /* error messages for the codes above */
+ /// <summary>error messages for the codes above</summary>
private static readonly string[] ZZ_ERROR_MSG = {
"Unkown internal scanner error",
"Error: could not match input",
"Error: pushback value was too large"
};
- /**
- * ZZ_ATTRIBUTE[aState] contains the attributes of state <code>aState</code>
- */
+ /// <summary>
+ /// ZZ_ATTRIBUTE[aState] contains the attributes of state <c>aState</c>
+ /// </summary>
private static readonly int[] ZZ_ATTRIBUTE = ZzUnpackAttribute();
private const string ZZ_ATTRIBUTE_PACKED_0 =
@@ -30618,59 +30618,62 @@ namespace Lucene.Net.Analysis.CharFilters
return j;
}
- /** the input device */
+ /// <summary>the input device</summary>
private BufferedCharFilter zzReader;
- /** the current state of the DFA */
+ /// <summary>the current state of the DFA</summary>
private int zzState;
- /** the current lexical state */
+ /// <summary>the current lexical state</summary>
private int zzLexicalState = YYINITIAL;
- /** this buffer contains the current text to be matched and is
- the source of the YyText() string */
+ /// <summary>
+ /// this buffer contains the current text to be matched and is the source of the YyText() string
+ /// </summary>
private char[] zzBuffer = new char[ZZ_BUFFERSIZE];
- /** the textposition at the last accepting state */
+ /// <summary>the textposition at the last accepting state</summary>
private int zzMarkedPos;
- /** the current text position in the buffer */
+ /// <summary>the current text position in the buffer</summary>
private int zzCurrentPos;
- /** startRead marks the beginning of the YyText() string in the buffer */
+ /// <summary>startRead marks the beginning of the YyText() string in the buffer</summary>
private int zzStartRead;
- /** endRead marks the last character in the buffer, that has been read
- from input */
+ /// <summary>
+ /// endRead marks the last character in the buffer, that has been read from input
+ /// </summary>
private int zzEndRead;
- /** number of newlines encountered up to the start of the matched text */
+ /// <summary>number of newlines encountered up to the start of the matched text</summary>
private int yyline;
- /** the number of characters up to the start of the matched text */
+ /// <summary>the number of characters up to the start of the matched text</summary>
private int yychar;
#pragma warning disable 169, 414
- /**
- * the number of characters from the last newline up to the start of the
- * matched text
- */
+ /// <summary>
+ /// the number of characters from the last newline up to the start of the matched text
+ /// </summary>
private int yycolumn;
- /**
- * zzAtBOL == true <=> the scanner is currently at the beginning of a line
- */
+ /// <summary>
+ /// zzAtBOL == true <=> the scanner is currently at the beginning of a line
+ /// </summary>
private bool zzAtBOL = true;
#pragma warning restore 169, 414
- /** zzAtEOF == true <=> the scanner is at the EOF */
+ /// <summary>zzAtEOF == true <=> the scanner is at the EOF</summary>
private bool zzAtEOF;
- /** denotes if the user-EOF-code has already been executed */
+ /// <summary>denotes if the user-EOF-code has already been executed</summary>
private bool zzEOFDone;
- /* user code: */
+ /// <summary>
+ /// user code:
+ /// </summary>
private static readonly IDictionary<string, string> upperCaseVariantsAccepted
= new Dictionary<string, string>()
{
@@ -30798,10 +30801,10 @@ namespace Lucene.Net.Analysis.CharFilters
private TextSegment outputSegment;
private TextSegment entitySegment = new TextSegment(2);
- /**
- * Creates a new HTMLStripCharFilter over the provided TextReader.
- * @param source SetReader to strip html tags from.
- */
+ /// <summary>
+ /// Creates a new HTMLStripCharFilter over the provided TextReader.
+ /// </summary>
+ /// <param name="source"><see cref="TextReader"/> to strip html tags from.</param>
public HTMLStripCharFilter(TextReader source)
: base(source)
{
@@ -30809,13 +30812,12 @@ namespace Lucene.Net.Analysis.CharFilters
this.zzReader = GetBufferedReader(source);
}
- /**
- * Creates a new HTMLStripCharFilter over the provided TextReader
- * with the specified start and end tags.
- * @param source SetReader to strip html tags from.
- * @param escapedTags Tags in this set (both start and end tags)
- * will not be filtered out.
- */
+ /// <summary>
+ /// Creates a new <see cref="HTMLStripCharFilter"/> over the provided <see cref="TextReader"/>
+ /// with the specified start and end tags.
+ /// </summary>
+ /// <param name="source"><see cref="TextReader"/> to strip html tags from.</param>
+ /// <param name="escapedTags">Tags in this set (both start and end tags) will not be filtered out.</param>
public HTMLStripCharFilter(TextReader source, ICollection<string> escapedTags)
: base(source)
{
@@ -30910,54 +30912,63 @@ namespace Lucene.Net.Analysis.CharFilters
private class TextSegment : OpenStringBuilder
{
- /** The position from which the next char will be read. */
+ /// <summary>
+ /// The position from which the next char will be read.
+ /// </summary>
int pos = 0;
- /** Wraps the given buffer and sets this.len to the given length. */
+ /// <summary>
+ /// Wraps the given <paramref name="buffer"/> and sets this.len to the given <paramref name="length"/>.
+ /// </summary>
internal TextSegment(char[] buffer, int length) : base(buffer, length)
{ }
- /** Allocates an internal buffer of the given size. */
+ /// <summary>
+ /// Allocates an internal buffer of the given size.
+ /// </summary>
internal TextSegment(int size) : base(size)
{ }
- /** Sets len = 0 and pos = 0. */
+ /// <summary>
+ /// Sets len = 0 and pos = 0.
+ /// </summary>
internal void Clear()
{
Reset();
Restart();
}
- /** Sets pos = 0 */
+ /// <summary>
+ /// Sets pos = 0
+ /// </summary>
internal void Restart()
{
pos = 0;
}
- /** Returns the next char in the segment. */
+ /// <summary>
+ /// Returns the next char in the segment.
+ /// </summary>
internal int NextChar()
{
Debug.Assert(!IsRead, "Attempting to read past the end of a segment.");
return m_buf[pos++];
}
- /** Returns true when all characters in the text segment have been read */
+ /// <summary>
+ /// Returns true when all characters in the text segment have been read
+ /// </summary>
internal bool IsRead
{
get { return pos >= m_len; }
}
}
-
-
-
-
- /**
- * Unpacks the compressed character translation table.
- *
- * @param packed the packed character translation table
- * @return the unpacked character translation table
- */
+ /// <summary>
+ /// Unpacks the compressed character translation table.
+ /// </summary>
+ /// <param name="packed">the packed character translation table</param>
+ /// <returns>the unpacked character translation table</returns>
private static char[] ZzUnpackCMap(string packed)
{
char[] map = new char[0x10000];
@@ -30972,14 +30983,11 @@ namespace Lucene.Net.Analysis.CharFilters
return map;
}
-
- /**
- * Refills the input buffer.
- *
- * @return <code>false</code>, iff there was new input.
- *
- * @exception java.io.IOException if any I/O-Error occurs
- */
+ /// <summary>
+ /// Refills the input buffer.
+ /// </summary>
+ /// <returns><c>false</c>, iff there was new input.</returns>
+ /// <exception cref="IOException">if any I/O-Error occurs</exception>
private bool ZzRefill()
{
@@ -31035,9 +31043,9 @@ namespace Lucene.Net.Analysis.CharFilters
}
- /**
- * Closes the input stream.
- */
+ /// <summary>
+ /// Disposes the input stream.
+ /// </summary>
private void YyClose()
{
zzAtEOF = true; /* indicate end of file */
@@ -31047,19 +31055,17 @@ namespace Lucene.Net.Analysis.CharFilters
zzReader.Dispose();
}
-
- /**
- * Resets the scanner to read from a new input stream.
- * Does not close the old reader.
- *
- * All internal variables are reset, the old input stream
- * <b>cannot</b> be reused (internal buffer is discarded and lost).
- * Lexical state is set to <tt>ZZ_INITIAL</tt>.
- *
- * Internal scan buffer is resized down to its initial length, if it has grown.
- *
- * @param reader the new input stream
- */
+ /// <summary>
+ /// Resets the scanner to read from a new input stream.
+ /// Does not close the old reader.
+ /// <para/>
+ /// All internal variables are reset, the old input stream
+ /// <b>cannot</b> be reused (internal buffer is discarded and lost).
+ /// Lexical state is set to <see cref="ZZ_INITIAL"/>.
+ /// <para/>
+ /// Internal scan buffer is resized down to its initial length, if it has grown.
+ /// </summary>
+ /// <param name="reader">the new input stream</param>
private void YyReset(BufferedCharFilter reader)
{
zzReader = reader;
@@ -31075,75 +31081,66 @@ namespace Lucene.Net.Analysis.CharFilters
}
- /**
- * Returns the current lexical state.
- */
+ /// <summary>
+ /// Returns the current lexical state.
+ /// </summary>
private int YyState
{
get { return zzLexicalState; }
}
-
- /**
- * Enters a new lexical state
- *
- * @param newState the new lexical state
- */
+ /// <summary>
+ /// Enters a new lexical state
+ /// </summary>
+ /// <param name="newState">the new lexical state</param>
private void YyBegin(int newState)
{
zzLexicalState = newState;
}
- /**
- * Returns the text matched by the current regular expression.
- */
- private string YyText
+ /// <summary>
+ /// Returns the text matched by the current regular expression.
+ /// </summary>
+ /// <returns>Returns the text matched by the current regular expression.</returns>
+ private string YyText()
{
- get { return new string(zzBuffer, zzStartRead, zzMarkedPos - zzStartRead); }
+ return new string(zzBuffer, zzStartRead, zzMarkedPos - zzStartRead);
}
-
- /**
- * Returns the character at position <tt>pos</tt> from the
- * matched text.
- *
- * It is equivalent to YyText[pos], but faster
- *
- * @param pos the position of the character to fetch.
- * A value from 0 to YyLength()-1.
- *
- * @return the character at position pos
- */
+ /// <summary>
+ /// Returns the character at position <tt>pos</tt> from the
+ /// matched text. It is equivalent to YyText[pos], but faster
+ /// </summary>
+ /// <param name="pos">the position of the character to fetch. A value from 0 to YyLength()-1.</param>
+ /// <returns>the character at position pos</returns>
private char YyCharAt(int pos)
{
return zzBuffer[zzStartRead + pos];
}
- /**
- * Returns the length of the matched text region.
- */
+ /// <summary>
+ /// Returns the length of the matched text region.
+ /// </summary>
private int YyLength
{
get { return zzMarkedPos - zzStartRead; }
}
-
- /**
- * Reports an error that occured while scanning.
- *
- * In a wellformed scanner (no or only correct usage of
- * YyPushBack(int) and a match-all fallback rule) this method
- * will only be called with things that "Can't Possibly Happen".
- * If this method is called, something is seriously wrong
- * (e.g. a JFlex bug producing a faulty scanner etc.).
- *
- * Usual syntax/scanner level error handling should be done
- * in error fallback rules.
- *
- * @param errorCode the code of the errormessage to display
- */
+ /// <summary>
+ /// Reports an error that occured while scanning.
+ /// <para/>
+ /// In a wellformed scanner (no or only correct usage of
+ /// YyPushBack(int) and a match-all fallback rule) this method
+ /// will only be called with things that "Can't Possibly Happen".
+ /// If this method is called, something is seriously wrong
+ /// (e.g. a JFlex bug producing a faulty scanner etc.).
+ /// <para/>
+ /// Usual syntax/scanner level error handling should be done
+ /// in error fallback rules.
+ /// </summary>
+ /// <param name="errorCode">the code of the errormessage to display</param>
private void ZzScanError(int errorCode)
{
string message;
@@ -31159,15 +31156,13 @@ namespace Lucene.Net.Analysis.CharFilters
throw new Exception(message);
}
-
- /**
- * Pushes the specified amount of characters back into the input stream.
- *
- * They will be read again by then next call of the scanning method
- *
- * @param number the number of characters to be read again.
- * This number must not be greater than YyLength()!
- */
+ /// <summary>
+ /// Pushes the specified amount of characters back into the input stream.
+ ///
+ /// They will be read again by then next call of the scanning method
+ /// </summary>
+ /// <param name="number">the number of characters to be read again.
+ /// This number must not be greater than YyLength()!</param>
private void YyPushBack(int number)
{
if (number > YyLength)
@@ -31177,10 +31172,10 @@ namespace Lucene.Net.Analysis.CharFilters
}
- /**
- * Contains user EOF-code, which will be executed exactly once,
- * when the end of file is reached
- */
+ /// <summary>
+ /// Contains user EOF-code, which will be executed exactly once,
+ /// when the end of file is reached
+ /// </summary>
private void ZzDoEOF()
{
if (!zzEOFDone)
@@ -31243,14 +31238,12 @@ namespace Lucene.Net.Analysis.CharFilters
}
}
-
- /**
- * Resumes scanning until the next regular expression is matched,
- * the end of input is encountered or an I/O-Error occurs.
- *
- * @return the next token
- * @exception java.io.IOException if any I/O-Error occurs
- */
+ /// <summary>
+ /// Resumes scanning until the next regular expression is matched,
+ /// the end of input is encountered or an I/O-Error occurs.
+ /// </summary>
+ /// <returns>the next token</returns>
+ /// <exception cref="IOException">if any I/O-Error occurs</exception>
private int NextChar()
{
int zzInput;
@@ -31384,7 +31377,7 @@ namespace Lucene.Net.Analysis.CharFilters
inputSegment.Write(zzBuffer, zzStartRead, matchLength);
if (matchLength <= 7)
{ // 0x10FFFF = 1114111: max 7 decimal chars
- string decimalCharRef = YyText;
+ string decimalCharRef = YyText();
int codePoint = 0;
try
{
@@ -31689,7 +31682,7 @@ namespace Lucene.Net.Analysis.CharFilters
{
if (inputSegment.Length > 2)
{ // Chars between "<!" and "--" - this is not a comment
- inputSegment.Append(YyText);
+ inputSegment.Append(YyText());
}
else
{
@@ -31835,7 +31828,7 @@ namespace Lucene.Net.Analysis.CharFilters
{
if (inputSegment.Length > 2)
{ // Chars between "<!" and "[CDATA[" - this is not a CDATA section
- inputSegment.Append(YyText);
+ inputSegment.Append(YyText());
}
else
{
@@ -31907,7 +31900,7 @@ namespace Lucene.Net.Analysis.CharFilters
{ // Handle paired UTF-16 surrogates.
outputSegment = entitySegment;
outputSegment.Clear();
- string surrogatePair = YyText;
+ string surrogatePair = YyText();
char highSurrogate = '\u0000';
try
{
@@ -31937,7 +31930,7 @@ namespace Lucene.Net.Analysis.CharFilters
case 103: break;
case 51:
{ // Handle paired UTF-16 surrogates.
- string surrogatePair = YyText;
+ string surrogatePair = YyText();
char highSurrogate = '\u0000';
char lowSurrogate = '\u0000';
try
@@ -31979,7 +31972,7 @@ namespace Lucene.Net.Analysis.CharFilters
case 104: break;
case 52:
{ // Handle paired UTF-16 surrogates.
- string surrogatePair = YyText;
+ string surrogatePair = YyText();
char highSurrogate = '\u0000';
try
{ // High surrogates are in decimal range [55296, 56319]
@@ -32019,7 +32012,7 @@ namespace Lucene.Net.Analysis.CharFilters
case 105: break;
case 53:
{ // Handle paired UTF-16 surrogates.
- string surrogatePair = YyText;
+ string surrogatePair = YyText();
char highSurrogate = '\u0000';
try
{ // High surrogates are in decimal range [55296, 56319]
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/7fdbd66d/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/HTMLStripCharFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/HTMLStripCharFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/HTMLStripCharFilterFactory.cs
index 7a9ce24..53a01a9 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/HTMLStripCharFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/HTMLStripCharFilterFactory.cs
@@ -23,22 +23,23 @@ namespace Lucene.Net.Analysis.CharFilters
*/
/// <summary>
- /// Factory for <seealso cref="HTMLStripCharFilter"/>.
- /// <pre class="prettyprint">
+ /// Factory for <see cref="HTMLStripCharFilter"/>.
+ /// <code>
/// <fieldType name="text_html" class="solr.TextField" positionIncrementGap="100">
/// <analyzer>
/// <charFilter class="solr.HTMLStripCharFilterFactory" escapedTags="a, title" />
/// <tokenizer class="solr.WhitespaceTokenizerFactory"/>
/// </analyzer>
- /// </fieldType></pre>
+ /// </fieldType>
+ /// </code>
/// </summary>
public class HTMLStripCharFilterFactory : CharFilterFactory
{
- internal readonly ICollection<string> escapedTags;
- internal static readonly Regex TAG_NAME_PATTERN = new Regex(@"[^\\s,]+", RegexOptions.Compiled);
+ private readonly ICollection<string> escapedTags;
+ private static readonly Regex TAG_NAME_PATTERN = new Regex(@"[^\\s,]+", RegexOptions.Compiled);
/// <summary>
- /// Creates a new HTMLStripCharFilterFactory </summary>
+ /// Creates a new <see cref="HTMLStripCharFilterFactory"/> </summary>
public HTMLStripCharFilterFactory(IDictionary<string, string> args) : base(args)
{
escapedTags = GetSet(args, "escapedTags");
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/7fdbd66d/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/MappingCharFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/MappingCharFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/MappingCharFilter.cs
index 05b7469..08ac354 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/MappingCharFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/MappingCharFilter.cs
@@ -26,8 +26,8 @@ namespace Lucene.Net.Analysis.CharFilters
*/
/// <summary>
- /// Simplistic <seealso cref="CharFilter"/> that applies the mappings
- /// contained in a <seealso cref="NormalizeCharMap"/> to the character
+ /// Simplistic <see cref="CharFilter"/> that applies the mappings
+ /// contained in a <see cref="NormalizeCharMap"/> to the character
/// stream, and correcting the resulting changes to the
/// offsets. Matching is greedy (longest pattern matching at
/// a given point wins). Replacement is allowed to be the
@@ -35,7 +35,6 @@ namespace Lucene.Net.Analysis.CharFilters
/// </summary>
public class MappingCharFilter : BaseCharFilter
{
-
private readonly Outputs<CharsRef> outputs = CharSequenceOutputs.Singleton;
private readonly FST<CharsRef> map;
private readonly FST.BytesReader fstReader;
@@ -48,13 +47,14 @@ namespace Lucene.Net.Analysis.CharFilters
private int inputOff;
/// <summary>
- /// LUCENENET support to buffer the reader.
+ /// LUCENENET specific support to buffer the reader.
/// </summary>
- private BufferedCharFilter _input;
+ private readonly BufferedCharFilter _input;
/// <summary>
- /// Default constructor that takes a <seealso cref="TextReader"/>. </summary>
- public MappingCharFilter(NormalizeCharMap normMap, TextReader @in) : base(@in)
+ /// Default constructor that takes a <see cref="TextReader"/>. </summary>
+ public MappingCharFilter(NormalizeCharMap normMap, TextReader @in)
+ : base(@in)
{
//LUCENENET support to reset the reader.
_input = GetBufferedReader(@in);
@@ -76,10 +76,10 @@ namespace Lucene.Net.Analysis.CharFilters
}
/// <summary>
- /// LUCENENET: Copied this method from the WordlistLoader class - this class requires readers
- /// with a Reset() method (which .NET readers don't support). So, we use the BufferedCharFilter
+ /// LUCENENET: Copied this method from the <see cref="WordlistLoader"/> class - this class requires readers
+ /// with a Reset() method (which .NET readers don't support). So, we use the <see cref="BufferedCharFilter"/>
/// (which is similar to Java BufferedReader) as a wrapper for whatever reader the user passes
- /// (unless it is already a BufferedCharFilter).
+ /// (unless it is already a <see cref="BufferedCharFilter"/>).
/// </summary>
/// <param name="reader"></param>
/// <returns></returns>
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/7fdbd66d/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/MappingCharFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/MappingCharFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/MappingCharFilterFactory.cs
index 84878a5..dd12acf 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/MappingCharFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/MappingCharFilterFactory.cs
@@ -24,25 +24,25 @@ namespace Lucene.Net.Analysis.CharFilters
*/
/// <summary>
- /// Factory for <seealso cref="MappingCharFilter"/>.
- /// <pre class="prettyprint">
+ /// Factory for <see cref="MappingCharFilter"/>.
+ /// <code>
/// <fieldType name="text_map" class="solr.TextField" positionIncrementGap="100">
/// <analyzer>
/// <charFilter class="solr.MappingCharFilterFactory" mapping="mapping.txt"/>
/// <tokenizer class="solr.WhitespaceTokenizerFactory"/>
/// </analyzer>
- /// </fieldType></pre>
+ /// </fieldType>
+ /// </code>
///
/// @since Solr 1.4
/// </summary>
public class MappingCharFilterFactory : CharFilterFactory, IResourceLoaderAware, IMultiTermAwareComponent
{
-
protected internal NormalizeCharMap m_normMap;
private readonly string mapping;
/// <summary>
- /// Creates a new MappingCharFilterFactory </summary>
+ /// Creates a new <see cref="MappingCharFilterFactory"/> </summary>
public MappingCharFilterFactory(IDictionary<string, string> args) : base(args)
{
mapping = Get(args, "mapping");
@@ -92,10 +92,9 @@ namespace Lucene.Net.Analysis.CharFilters
}
// "source" => "target"
- //internal static Pattern p = Pattern.compile("\"(.*)\"\\s*=>\\s*\"(.*)\"\\s*$");
- internal static Regex p = new Regex(@"\""(.*)\""\\s*=>\\s*\""(.*)\""\\s*$", RegexOptions.Compiled);
+ private static Regex p = new Regex(@"\""(.*)\""\\s*=>\\s*\""(.*)\""\\s*$", RegexOptions.Compiled);
- protected internal virtual void ParseRules(IList<string> rules, NormalizeCharMap.Builder builder)
+ protected virtual void ParseRules(IList<string> rules, NormalizeCharMap.Builder builder)
{
foreach (string rule in rules)
{
@@ -108,7 +107,7 @@ namespace Lucene.Net.Analysis.CharFilters
}
}
- internal char[] @out = new char[256];
+ private char[] @out = new char[256];
protected internal virtual string ParseString(string s)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/7fdbd66d/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/NormalizeCharMap.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/NormalizeCharMap.cs b/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/NormalizeCharMap.cs
index de0c0d0..bcb031a 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/NormalizeCharMap.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/NormalizeCharMap.cs
@@ -27,13 +27,12 @@ namespace Lucene.Net.Analysis.CharFilters
// TODO: save/load?
/// <summary>
- /// Holds a map of String input to String output, to be used
- /// with <seealso cref="Builder"/>. Use the <seealso cref="MappingCharFilter"/>
+ /// Holds a map of <see cref="string"/> input to <see cref="string"/> output, to be used
+ /// with <see cref="Builder"/>. Use the <see cref="MappingCharFilter"/>
/// to create this.
/// </summary>
public class NormalizeCharMap
{
-
internal readonly FST<CharsRef> map;
internal readonly IDictionary<char?, FST.Arc<CharsRef>> cachedRootArcs = new Dictionary<char?, FST.Arc<CharsRef>>();
@@ -82,8 +81,7 @@ namespace Lucene.Net.Analysis.CharFilters
/// </summary>
public class Builder
{
-
- internal readonly IDictionary<string, string> pendingPairs = new SortedDictionary<string, string>();
+ private readonly IDictionary<string, string> pendingPairs = new SortedDictionary<string, string>();
/// <summary>
/// Records a replacement to be applied to the input
@@ -115,7 +113,6 @@ namespace Lucene.Net.Analysis.CharFilters
/// </summary>
public virtual NormalizeCharMap Build()
{
-
FST<CharsRef> map;
try
{
[2/6] lucenenet git commit: Lucene.Net.Analysis.Bg refactor: member
accessibility and documentation comments
Posted by ni...@apache.org.
Lucene.Net.Analysis.Bg refactor: member accessibility and documentation comments
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/1b0bca68
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/1b0bca68
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/1b0bca68
Branch: refs/heads/api-work
Commit: 1b0bca68b88838741866ad7ab782206ccb66518b
Parents: 2878664
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Thu Feb 2 20:16:24 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Thu Feb 2 20:16:24 2017 +0700
----------------------------------------------------------------------
.../Analysis/Bg/BulgarianAnalyzer.cs | 24 +++++++++-----------
.../Analysis/Bg/BulgarianStemFilter.cs | 6 ++---
.../Analysis/Bg/BulgarianStemFilterFactory.cs | 9 ++++----
.../Analysis/Bg/BulgarianStemmer.cs | 4 ++--
4 files changed, 21 insertions(+), 22 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1b0bca68/src/Lucene.Net.Analysis.Common/Analysis/Bg/BulgarianAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Bg/BulgarianAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Bg/BulgarianAnalyzer.cs
index efa9eca..cab90fa 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Bg/BulgarianAnalyzer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Bg/BulgarianAnalyzer.cs
@@ -26,14 +26,12 @@ namespace Lucene.Net.Analysis.Bg
*/
/// <summary>
- /// <seealso cref="Analyzer"/> for Bulgarian.
+ /// <see cref="Analyzer"/> for Bulgarian.
/// <para>
/// This analyzer implements light-stemming as specified by: <i> Searching
/// Strategies for the Bulgarian Language </i>
/// http://members.unine.ch/jacques.savoy/Papers/BUIR.pdf
/// </para>
- /// <para>
- /// </para>
/// </summary>
public sealed class BulgarianAnalyzer : StopwordAnalyzerBase
{
@@ -86,7 +84,7 @@ namespace Lucene.Net.Analysis.Bg
/// <summary>
/// Builds an analyzer with the default stop words:
- /// <seealso cref="#DEFAULT_STOPWORD_FILE"/>.
+ /// <see cref="DEFAULT_STOPWORD_FILE"/>.
/// </summary>
public BulgarianAnalyzer(LuceneVersion matchVersion)
: this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET)
@@ -103,8 +101,8 @@ namespace Lucene.Net.Analysis.Bg
/// <summary>
/// Builds an analyzer with the given stop words and a stem exclusion set.
- /// If a stem exclusion set is provided this analyzer will add a <seealso cref="SetKeywordMarkerFilter"/>
- /// before <seealso cref="BulgarianStemFilter"/>.
+ /// If a stem exclusion set is provided this analyzer will add a <see cref="SetKeywordMarkerFilter"/>
+ /// before <see cref="BulgarianStemFilter"/>.
/// </summary>
public BulgarianAnalyzer(LuceneVersion matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet)
: base(matchVersion, stopwords)
@@ -114,15 +112,15 @@ namespace Lucene.Net.Analysis.Bg
/// <summary>
/// Creates a
- /// <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
- /// which tokenizes all the text in the provided <seealso cref="Reader"/>.
+ /// <see cref="Analyzer.TokenStreamComponents"/>
+ /// which tokenizes all the text in the provided <see cref="TextReader"/>.
/// </summary>
/// <returns> A
- /// <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
- /// built from an <seealso cref="StandardTokenizer"/> filtered with
- /// <seealso cref="StandardFilter"/>, <seealso cref="LowerCaseFilter"/>, <seealso cref="StopFilter"/>
- /// , <seealso cref="SetKeywordMarkerFilter"/> if a stem exclusion set is
- /// provided and <seealso cref="BulgarianStemFilter"/>. </returns>
+ /// <see cref="Analyzer.TokenStreamComponents"/>
+ /// built from an <see cref="StandardTokenizer"/> filtered with
+ /// <see cref="StandardFilter"/>, <see cref="LowerCaseFilter"/>, <see cref="StopFilter"/>,
+ /// <see cref="SetKeywordMarkerFilter"/> if a stem exclusion set is
+ /// provided and <see cref="BulgarianStemFilter"/>. </returns>
protected override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
{
Tokenizer source = new StandardTokenizer(m_matchVersion, reader);
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1b0bca68/src/Lucene.Net.Analysis.Common/Analysis/Bg/BulgarianStemFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Bg/BulgarianStemFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Bg/BulgarianStemFilter.cs
index 42dff08..beeef3b 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Bg/BulgarianStemFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Bg/BulgarianStemFilter.cs
@@ -20,12 +20,12 @@ namespace Lucene.Net.Analysis.Bg
*/
/// <summary>
- /// A <seealso cref="TokenFilter"/> that applies <seealso cref="BulgarianStemmer"/> to stem Bulgarian
+ /// A <see cref="TokenFilter"/> that applies <see cref="BulgarianStemmer"/> to stem Bulgarian
/// words.
/// <para>
/// To prevent terms from being stemmed use an instance of
- /// <seealso cref="SetKeywordMarkerFilter"/> or a custom <seealso cref="TokenFilter"/> that sets
- /// the <seealso cref="KeywordAttribute"/> before this <seealso cref="TokenStream"/>.
+ /// <see cref="Miscellaneous.SetKeywordMarkerFilter"/> or a custom <see cref="TokenFilter"/> that sets
+ /// the <see cref="KeywordAttribute"/> before this <see cref="TokenStream"/>.
/// </para>
/// </summary>
public sealed class BulgarianStemFilter : TokenFilter
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1b0bca68/src/Lucene.Net.Analysis.Common/Analysis/Bg/BulgarianStemFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Bg/BulgarianStemFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Bg/BulgarianStemFilterFactory.cs
index 1ba70d9..ab47af6 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Bg/BulgarianStemFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Bg/BulgarianStemFilterFactory.cs
@@ -21,21 +21,22 @@ namespace Lucene.Net.Analysis.Bg
*/
/// <summary>
- /// Factory for <seealso cref="BulgarianStemFilter"/>.
- /// <pre class="prettyprint">
+ /// Factory for <see cref="BulgarianStemFilter"/>.
+ /// <code>
/// <fieldType name="text_bgstem" class="solr.TextField" positionIncrementGap="100">
/// <analyzer>
/// <tokenizer class="solr.StandardTokenizerFactory"/>
/// <filter class="solr.LowerCaseFilterFactory"/>
/// <filter class="solr.BulgarianStemFilterFactory"/>
/// </analyzer>
- /// </fieldType></pre>
+ /// </fieldType>
+ /// </code>
/// </summary>
public class BulgarianStemFilterFactory : TokenFilterFactory
{
/// <summary>
- /// Creates a new BulgarianStemFilterFactory </summary>
+ /// Creates a new <see cref="BulgarianStemFilterFactory"/> </summary>
public BulgarianStemFilterFactory(IDictionary<string, string> args)
: base(args)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1b0bca68/src/Lucene.Net.Analysis.Common/Analysis/Bg/BulgarianStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Bg/BulgarianStemmer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Bg/BulgarianStemmer.cs
index 0b8c339..3ff4017 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Bg/BulgarianStemmer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Bg/BulgarianStemmer.cs
@@ -23,9 +23,9 @@ namespace Lucene.Net.Analysis.Bg
/// Light Stemmer for Bulgarian.
/// <para>
/// Implements the algorithm described in:
- /// <i>
+ /// <c>
/// Searching Strategies for the Bulgarian Language
- /// </i>
+ /// </c>
/// http://members.unine.ch/jacques.savoy/Papers/BUIR.pdf
/// </para>
/// </summary>