You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by ni...@apache.org on 2017/02/02 20:15:53 UTC
[1/6] lucenenet git commit: Lucene.Net.Analysis.Ckb refactor: member
accessibility and documentation comments
Repository: lucenenet
Updated Branches:
refs/heads/api-work 917b4fdf5 -> 816f0c9b4
Lucene.Net.Analysis.Ckb refactor: member accessibility and documentation comments
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/09865451
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/09865451
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/09865451
Branch: refs/heads/api-work
Commit: 0986545155feac6675813f1bf4a1671dfa087115
Parents: 917b4fd
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Thu Feb 2 23:09:13 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Thu Feb 2 23:09:13 2017 +0700
----------------------------------------------------------------------
.../Analysis/Ckb/SoraniAnalyzer.cs | 27 ++++++++++----------
.../Analysis/Ckb/SoraniNormalizationFilter.cs | 4 +--
.../Ckb/SoraniNormalizationFilterFactory.cs | 10 ++++----
.../Analysis/Ckb/SoraniNormalizer.cs | 22 ++++++++--------
.../Analysis/Ckb/SoraniStemFilter.cs | 13 +++++-----
.../Analysis/Ckb/SoraniStemFilterFactory.cs | 12 ++++-----
.../Analysis/Ckb/SoraniStemmer.cs | 1 -
7 files changed, 44 insertions(+), 45 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/09865451/src/Lucene.Net.Analysis.Common/Analysis/Ckb/SoraniAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ckb/SoraniAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ckb/SoraniAnalyzer.cs
index 800b6ad..7f7bfa8 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Ckb/SoraniAnalyzer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ckb/SoraniAnalyzer.cs
@@ -27,7 +27,7 @@ namespace Lucene.Net.Analysis.Ckb
*/
/// <summary>
- /// <seealso cref="Analyzer"/> for Sorani Kurdish.
+ /// <see cref="Analyzer"/> for Sorani Kurdish.
/// </summary>
public sealed class SoraniAnalyzer : StopwordAnalyzerBase
{
@@ -49,7 +49,7 @@ namespace Lucene.Net.Analysis.Ckb
}
/// <summary>
- /// Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class
+ /// Atomically loads the <see cref="DEFAULT_STOP_SET"/> in a lazy fashion once the outer class
/// accesses the static final set the first time.;
/// </summary>
private class DefaultSetHolder
@@ -77,7 +77,7 @@ namespace Lucene.Net.Analysis.Ckb
}
/// <summary>
- /// Builds an analyzer with the default stop words: <seealso cref="#DEFAULT_STOPWORD_FILE"/>.
+ /// Builds an analyzer with the default stop words: <see cref="DEFAULT_STOPWORD_FILE"/>.
/// </summary>
public SoraniAnalyzer(LuceneVersion matchVersion)
: this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET)
@@ -96,29 +96,30 @@ namespace Lucene.Net.Analysis.Ckb
/// <summary>
/// Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
- /// provided this analyzer will add a <seealso cref="SetKeywordMarkerFilter"/> before
+ /// provided this analyzer will add a <see cref="SetKeywordMarkerFilter"/> before
/// stemming.
/// </summary>
/// <param name="matchVersion"> lucene compatibility version </param>
/// <param name="stopwords"> a stopword set </param>
/// <param name="stemExclusionSet"> a set of terms not to be stemmed </param>
- public SoraniAnalyzer(LuceneVersion matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) : base(matchVersion, stopwords)
+ public SoraniAnalyzer(LuceneVersion matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet)
+ : base(matchVersion, stopwords)
{
this.stemExclusionSet = CharArraySet.UnmodifiableSet(CharArraySet.Copy(matchVersion, stemExclusionSet));
}
/// <summary>
/// Creates a
- /// <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
- /// which tokenizes all the text in the provided <seealso cref="Reader"/>.
+ /// <see cref="Analyzer.TokenStreamComponents"/>
+ /// which tokenizes all the text in the provided <see cref="TextReader"/>.
/// </summary>
/// <returns> A
- /// <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
- /// built from an <seealso cref="StandardTokenizer"/> filtered with
- /// <seealso cref="StandardFilter"/>, <seealso cref="SoraniNormalizationFilter"/>,
- /// <seealso cref="LowerCaseFilter"/>, <seealso cref="StopFilter"/>
- /// , <seealso cref="SetKeywordMarkerFilter"/> if a stem exclusion set is
- /// provided and <seealso cref="SoraniStemFilter"/>. </returns>
+ /// <see cref="Analyzer.TokenStreamComponents"/>
+ /// built from an <see cref="StandardTokenizer"/> filtered with
+ /// <see cref="StandardFilter"/>, <see cref="SoraniNormalizationFilter"/>,
+ /// <see cref="LowerCaseFilter"/>, <see cref="StopFilter"/>,
+ /// <see cref="SetKeywordMarkerFilter"/> if a stem exclusion set is
+ /// provided and <see cref="SoraniStemFilter"/>. </returns>
protected override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
{
Tokenizer source = new StandardTokenizer(m_matchVersion, reader);
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/09865451/src/Lucene.Net.Analysis.Common/Analysis/Ckb/SoraniNormalizationFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ckb/SoraniNormalizationFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ckb/SoraniNormalizationFilter.cs
index dedf0e7..3a29c33 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Ckb/SoraniNormalizationFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ckb/SoraniNormalizationFilter.cs
@@ -20,7 +20,7 @@ namespace Lucene.Net.Analysis.Ckb
*/
/// <summary>
- /// A <seealso cref="TokenFilter"/> that applies <seealso cref="SoraniNormalizer"/> to normalize the
+ /// A <see cref="TokenFilter"/> that applies <see cref="SoraniNormalizer"/> to normalize the
/// orthography.
/// </summary>
public sealed class SoraniNormalizationFilter : TokenFilter
@@ -38,7 +38,7 @@ namespace Lucene.Net.Analysis.Ckb
{
if (m_input.IncrementToken())
{
- int newlen = normalizer.normalize(termAtt.Buffer, termAtt.Length);
+ int newlen = normalizer.Normalize(termAtt.Buffer, termAtt.Length);
termAtt.Length = newlen;
return true;
}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/09865451/src/Lucene.Net.Analysis.Common/Analysis/Ckb/SoraniNormalizationFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ckb/SoraniNormalizationFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ckb/SoraniNormalizationFilterFactory.cs
index ed34761..e606069 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Ckb/SoraniNormalizationFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ckb/SoraniNormalizationFilterFactory.cs
@@ -21,20 +21,20 @@ namespace Lucene.Net.Analysis.Ckb
*/
/// <summary>
- /// Factory for <seealso cref="SoraniNormalizationFilter"/>.
- /// <pre class="prettyprint">
+ /// Factory for <see cref="SoraniNormalizationFilter"/>.
+ /// <code>
/// <fieldType name="text_ckbnormal" class="solr.TextField" positionIncrementGap="100">
/// <analyzer>
/// <tokenizer class="solr.StandardTokenizerFactory"/>
/// <filter class="solr.SoraniNormalizationFilterFactory"/>
/// </analyzer>
- /// </fieldType></pre>
+ /// </fieldType>
+ /// </code>
/// </summary>
public class SoraniNormalizationFilterFactory : TokenFilterFactory, IMultiTermAwareComponent
{
-
/// <summary>
- /// Creates a new SoraniNormalizationFilterFactory </summary>
+ /// Creates a new <see cref="SoraniNormalizationFilterFactory"/> </summary>
public SoraniNormalizationFilterFactory(IDictionary<string, string> args)
: base(args)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/09865451/src/Lucene.Net.Analysis.Common/Analysis/Ckb/SoraniNormalizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ckb/SoraniNormalizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ckb/SoraniNormalizer.cs
index 5a3d708..19135d9 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Ckb/SoraniNormalizer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ckb/SoraniNormalizer.cs
@@ -23,21 +23,19 @@ namespace Lucene.Net.Analysis.Ckb
/// <summary>
/// Normalizes the Unicode representation of Sorani text.
- /// <para>
+ /// <para/>
/// Normalization consists of:
- /// <ul>
- /// <li>Alternate forms of 'y' (0064, 0649) are converted to 06CC (FARSI YEH)
- /// <li>Alternate form of 'k' (0643) is converted to 06A9 (KEHEH)
- /// <li>Alternate forms of vowel 'e' (0647+200C, word-final 0647, 0629) are converted to 06D5 (AE)
- /// <li>Alternate (joining) form of 'h' (06BE) is converted to 0647
- /// <li>Alternate forms of 'rr' (0692, word-initial 0631) are converted to 0695 (REH WITH SMALL V BELOW)
- /// <li>Harakat, tatweel, and formatting characters such as directional controls are removed.
- /// </ul>
- /// </para>
+ /// <list type="bullet">
+ /// <item>Alternate forms of 'y' (0064, 0649) are converted to 06CC (FARSI YEH)</item>
+ /// <item>Alternate form of 'k' (0643) is converted to 06A9 (KEHEH)</item>
+ /// <item>Alternate forms of vowel 'e' (0647+200C, word-final 0647, 0629) are converted to 06D5 (AE)</item>
+ /// <item>Alternate (joining) form of 'h' (06BE) is converted to 0647</item>
+ /// <item>Alternate forms of 'rr' (0692, word-initial 0631) are converted to 0695 (REH WITH SMALL V BELOW)</item>
+ /// <item>Harakat, tatweel, and formatting characters such as directional controls are removed.</item>
+ /// </list>
/// </summary>
public class SoraniNormalizer
{
-
internal const char YEH = '\u064A';
internal const char DOTLESS_YEH = '\u0649';
internal const char FARSI_YEH = '\u06CC';
@@ -71,7 +69,7 @@ namespace Lucene.Net.Analysis.Ckb
/// <param name="s"> input buffer </param>
/// <param name="len"> length of input buffer </param>
/// <returns> length of input buffer after normalization </returns>
- public virtual int normalize(char[] s, int len)
+ public virtual int Normalize(char[] s, int len)
{
for (int i = 0; i < len; i++)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/09865451/src/Lucene.Net.Analysis.Common/Analysis/Ckb/SoraniStemFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ckb/SoraniStemFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ckb/SoraniStemFilter.cs
index 72c5841..13a26f2 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Ckb/SoraniStemFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ckb/SoraniStemFilter.cs
@@ -20,13 +20,14 @@ namespace Lucene.Net.Analysis.Ckb
*/
/// <summary>
- /// A <seealso cref="TokenFilter"/> that applies <seealso cref="SoraniStemmer"/> to stem Sorani words.
+ /// A <see cref="TokenFilter"/> that applies <see cref="SoraniStemmer"/> to stem Sorani words.
/// <para>
/// To prevent terms from being stemmed use an instance of
- /// <seealso cref="SetKeywordMarkerFilter"/> or a custom <seealso cref="TokenFilter"/> that sets
- /// the <seealso cref="KeywordAttribute"/> before this <seealso cref="TokenStream"/>.
- /// </para> </summary>
- /// <seealso cref= SetKeywordMarkerFilter </seealso>
+ /// <see cref="Miscellaneous.SetKeywordMarkerFilter"/> or a custom <see cref="TokenFilter"/> that sets
+ /// the <see cref="KeywordAttribute"/> before this <see cref="TokenStream"/>.
+ /// </para>
+ /// </summary>
+ /// <seealso cref="Miscellaneous.SetKeywordMarkerFilter"/>
public sealed class SoraniStemFilter : TokenFilter
{
private readonly SoraniStemmer stemmer = new SoraniStemmer();
@@ -34,7 +35,7 @@ namespace Lucene.Net.Analysis.Ckb
private readonly IKeywordAttribute keywordAttr;
public SoraniStemFilter(TokenStream input)
- : base(input)
+ : base(input)
{
termAtt = AddAttribute<ICharTermAttribute>();
keywordAttr = AddAttribute<IKeywordAttribute>();
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/09865451/src/Lucene.Net.Analysis.Common/Analysis/Ckb/SoraniStemFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ckb/SoraniStemFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ckb/SoraniStemFilterFactory.cs
index 31329b8..6d9e002 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Ckb/SoraniStemFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ckb/SoraniStemFilterFactory.cs
@@ -21,23 +21,23 @@ namespace Lucene.Net.Analysis.Ckb
*/
/// <summary>
- /// Factory for <seealso cref="SoraniStemFilter"/>.
- /// <pre class="prettyprint">
+ /// Factory for <see cref="SoraniStemFilter"/>.
+ /// <code>
/// <fieldType name="text_ckbstem" class="solr.TextField" positionIncrementGap="100">
/// <analyzer>
/// <tokenizer class="solr.StandardTokenizerFactory"/>
/// <filter class="solr.SoraniNormalizationFilterFactory"/>
/// <filter class="solr.SoraniStemFilterFactory"/>
/// </analyzer>
- /// </fieldType></pre>
+ /// </fieldType>
+ /// </code>
/// </summary>
public class SoraniStemFilterFactory : TokenFilterFactory
{
-
/// <summary>
- /// Creates a new SoraniStemFilterFactory </summary>
+ /// Creates a new <see cref="SoraniStemFilterFactory"/> </summary>
public SoraniStemFilterFactory(IDictionary<string, string> args)
- : base(args)
+ : base(args)
{
if (args.Count > 0)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/09865451/src/Lucene.Net.Analysis.Common/Analysis/Ckb/SoraniStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ckb/SoraniStemmer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ckb/SoraniStemmer.cs
index b978be9..c375b24 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Ckb/SoraniStemmer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ckb/SoraniStemmer.cs
@@ -24,7 +24,6 @@ namespace Lucene.Net.Analysis.Ckb
/// </summary>
public class SoraniStemmer
{
-
/// <summary>
/// Stem an input buffer of Sorani text.
/// </summary>
[5/6] lucenenet git commit: Lucene.Net.Analysis.Core refactor: member
accessibility and documentation comments
Posted by ni...@apache.org.
Lucene.Net.Analysis.Core refactor: member accessibility and documentation comments
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/695b714f
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/695b714f
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/695b714f
Branch: refs/heads/api-work
Commit: 695b714fa5d7a95de2999e235e2ccc210e65f3dc
Parents: 6dc3ac1
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Fri Feb 3 02:39:12 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Fri Feb 3 02:54:24 2017 +0700
----------------------------------------------------------------------
.../Analysis/Core/KeywordAnalyzer.cs | 2 -
.../Analysis/Core/KeywordTokenizer.cs | 11 ++-
.../Analysis/Core/KeywordTokenizerFactory.cs | 16 ++---
.../Analysis/Core/LetterTokenizer.cs | 38 +++++------
.../Analysis/Core/LetterTokenizerFactory.cs | 16 ++---
.../Analysis/Core/LowerCaseFilter.cs | 19 +++---
.../Analysis/Core/LowerCaseFilterFactory.cs | 14 ++--
.../Analysis/Core/LowerCaseTokenizer.cs | 47 ++++++-------
.../Analysis/Core/LowerCaseTokenizerFactory.cs | 16 ++---
.../Analysis/Core/SimpleAnalyzer.cs | 33 ++++-----
.../Analysis/Core/StopAnalyzer.cs | 52 +++++++-------
.../Analysis/Core/StopFilter.cs | 72 +++++++++-----------
.../Analysis/Core/StopFilterFactory.cs | 48 ++++++-------
.../Analysis/Core/TypeTokenFilter.cs | 17 +++--
.../Analysis/Core/TypeTokenFilterFactory.cs | 13 ++--
.../Analysis/Core/UpperCaseFilter.cs | 19 +++---
.../Analysis/Core/UpperCaseFilterFactory.cs | 12 ++--
.../Analysis/Core/WhitespaceAnalyzer.cs | 25 +++----
.../Analysis/Core/WhitespaceTokenizer.cs | 41 +++++------
.../Analysis/Core/WhitespaceTokenizerFactory.cs | 10 +--
.../Miscellaneous/CodepointCountFilter.cs | 2 +-
.../Analysis/Miscellaneous/KeepWordFilter.cs | 2 +-
.../Analysis/Miscellaneous/LengthFilter.cs | 2 +-
.../Analysis/Util/FilteringTokenFilter.cs | 2 +-
.../Analysis/Util/MultiTermAwareComponent.cs | 2 +-
.../Analysis/Core/TestTypeTokenFilterFactory.cs | 2 +-
26 files changed, 252 insertions(+), 281 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/695b714f/src/Lucene.Net.Analysis.Common/Analysis/Core/KeywordAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Core/KeywordAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Core/KeywordAnalyzer.cs
index 8f5e5d9..1f2d00d 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Core/KeywordAnalyzer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Core/KeywordAnalyzer.cs
@@ -2,7 +2,6 @@
namespace Lucene.Net.Analysis.Core
{
-
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -20,7 +19,6 @@ namespace Lucene.Net.Analysis.Core
* limitations under the License.
*/
-
/// <summary>
/// "Tokenizes" the entire stream as a single token. This is useful
/// for data like zip codes, ids, and some product names.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/695b714f/src/Lucene.Net.Analysis.Common/Analysis/Core/KeywordTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Core/KeywordTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Core/KeywordTokenizer.cs
index f170588..4f22490 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Core/KeywordTokenizer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Core/KeywordTokenizer.cs
@@ -1,7 +1,6 @@
-\ufeffusing System.IO;
-using Lucene.Net.Analysis.TokenAttributes;
+\ufeffusing Lucene.Net.Analysis.TokenAttributes;
using Lucene.Net.Util;
-using Reader = System.IO.TextReader;
+using System.IO;
namespace Lucene.Net.Analysis.Core
{
@@ -54,7 +53,7 @@ namespace Lucene.Net.Analysis.Core
termAtt.ResizeBuffer(bufferSize);
}
- public KeywordTokenizer(AttributeSource.AttributeFactory factory, Reader input, int bufferSize)
+ public KeywordTokenizer(AttributeSource.AttributeFactory factory, TextReader input, int bufferSize)
: base(factory, input)
{
termAtt = AddAttribute<ICharTermAttribute>();
@@ -67,7 +66,7 @@ namespace Lucene.Net.Analysis.Core
termAtt.ResizeBuffer(bufferSize);
}
- public override bool IncrementToken()
+ public override sealed bool IncrementToken()
{
if (!done)
{
@@ -96,7 +95,7 @@ namespace Lucene.Net.Analysis.Core
return false;
}
- public override void End()
+ public override sealed void End()
{
base.End();
// set final offset
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/695b714f/src/Lucene.Net.Analysis.Common/Analysis/Core/KeywordTokenizerFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Core/KeywordTokenizerFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Core/KeywordTokenizerFactory.cs
index 29a812a..7d87b37 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Core/KeywordTokenizerFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Core/KeywordTokenizerFactory.cs
@@ -1,7 +1,7 @@
-\ufeffusing System.Collections.Generic;
-using System.IO;
-using Lucene.Net.Analysis.Util;
+\ufeffusing Lucene.Net.Analysis.Util;
using Lucene.Net.Util;
+using System.Collections.Generic;
+using System.IO;
namespace Lucene.Net.Analysis.Core
{
@@ -23,19 +23,19 @@ namespace Lucene.Net.Analysis.Core
*/
/// <summary>
- /// Factory for <seealso cref="KeywordTokenizer"/>.
- /// <pre class="prettyprint">
+ /// Factory for <see cref="KeywordTokenizer"/>.
+ /// <code>
/// <fieldType name="text_keyword" class="solr.TextField" positionIncrementGap="100">
/// <analyzer>
/// <tokenizer class="solr.KeywordTokenizerFactory"/>
/// </analyzer>
- /// </fieldType></pre>
+ /// </fieldType>
+ /// </code>
/// </summary>
public class KeywordTokenizerFactory : TokenizerFactory
{
-
/// <summary>
- /// Creates a new KeywordTokenizerFactory </summary>
+ /// Creates a new <see cref="KeywordTokenizerFactory"/> </summary>
public KeywordTokenizerFactory(IDictionary<string, string> args)
: base(args)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/695b714f/src/Lucene.Net.Analysis.Common/Analysis/Core/LetterTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Core/LetterTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Core/LetterTokenizer.cs
index 9d3dc2b..9ef19a6 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Core/LetterTokenizer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Core/LetterTokenizer.cs
@@ -1,7 +1,7 @@
-\ufeffusing System.IO;
-using Lucene.Net.Analysis.Util;
+\ufeffusing Lucene.Net.Analysis.Util;
using Lucene.Net.Support;
using Lucene.Net.Util;
+using System.IO;
namespace Lucene.Net.Analysis.Core
{
@@ -23,32 +23,30 @@ namespace Lucene.Net.Analysis.Core
*/
/// <summary>
- /// A LetterTokenizer is a tokenizer that divides text at non-letters. That's to
+ /// A <see cref="LetterTokenizer"/> is a tokenizer that divides text at non-letters. That's to
/// say, it defines tokens as maximal strings of adjacent letters, as defined by
- /// java.lang.Character.isLetter() predicate.
+ /// <see cref="char.IsLetter"/> predicate.
/// <para>
/// Note: this does a decent job for most European languages, but does a terrible
/// job for some Asian languages, where words are not separated by spaces.
/// </para>
/// <para>
- /// <a name="version"/>
- /// You must specify the required <seealso cref="LuceneVersion"/> compatibility when creating
- /// <seealso cref="LetterTokenizer"/>:
- /// <ul>
- /// <li>As of 3.1, <seealso cref="CharTokenizer"/> uses an int based API to normalize and
- /// detect token characters. See <seealso cref="CharTokenizer#isTokenChar(int)"/> and
- /// <seealso cref="CharTokenizer#normalize(int)"/> for details.</li>
- /// </ul>
+ /// You must specify the required <see cref="LuceneVersion"/> compatibility when creating
+ /// <see cref="LetterTokenizer"/>:
+ /// <list type="bullet">
+ /// <item>As of 3.1, <see cref="CharTokenizer"/> uses an <see cref="int"/> based API to normalize and
+ /// detect token characters. See <see cref="CharTokenizer.IsTokenChar(int)"/> and
+ /// <see cref="CharTokenizer.Normalize(int)"/> for details.</item>
+ /// </list>
/// </para>
/// </summary>
public class LetterTokenizer : CharTokenizer
{
-
/// <summary>
- /// Construct a new LetterTokenizer.
+ /// Construct a new <see cref="LetterTokenizer"/>.
/// </summary>
/// <param name="matchVersion">
- /// Lucene version to match See <seealso cref="<a href="#version">above</a>"/> </param>
+ /// <see cref="LuceneVersion"/> to match. </param>
/// <param name="in">
/// the input to split up into tokens </param>
public LetterTokenizer(LuceneVersion matchVersion, TextReader @in)
@@ -57,13 +55,13 @@ namespace Lucene.Net.Analysis.Core
}
/// <summary>
- /// Construct a new LetterTokenizer using a given
- /// <seealso cref="org.apache.lucene.util.AttributeSource.AttributeFactory"/>.
+ /// Construct a new <see cref="LetterTokenizer"/> using a given
+ /// <see cref="AttributeSource.AttributeFactory"/>.
/// </summary>
/// <param name="matchVersion">
- /// Lucene version to match See <seealso cref="<a href="#version">above</a>"/> </param>
+ /// <see cref="LuceneVersion"/> to match</param>
/// <param name="factory">
- /// the attribute factory to use for this <seealso cref="Tokenizer"/> </param>
+ /// the attribute factory to use for this <see cref="Tokenizer"/> </param>
/// <param name="in">
/// the input to split up into tokens </param>
public LetterTokenizer(LuceneVersion matchVersion, AttributeSource.AttributeFactory factory, TextReader @in)
@@ -73,7 +71,7 @@ namespace Lucene.Net.Analysis.Core
/// <summary>
/// Collects only characters which satisfy
- /// <seealso cref="Character#isLetter(int)"/>.
+ /// <see cref="Character.IsLetter(int)"/>.
/// </summary>
protected override bool IsTokenChar(int c)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/695b714f/src/Lucene.Net.Analysis.Common/Analysis/Core/LetterTokenizerFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Core/LetterTokenizerFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Core/LetterTokenizerFactory.cs
index 0fe8bed..611a4a4 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Core/LetterTokenizerFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Core/LetterTokenizerFactory.cs
@@ -1,7 +1,7 @@
-\ufeffusing System.Collections.Generic;
-using System.IO;
-using Lucene.Net.Analysis.Util;
+\ufeffusing Lucene.Net.Analysis.Util;
using Lucene.Net.Util;
+using System.Collections.Generic;
+using System.IO;
namespace Lucene.Net.Analysis.Core
{
@@ -23,19 +23,19 @@ namespace Lucene.Net.Analysis.Core
*/
/// <summary>
- /// Factory for <seealso cref="LetterTokenizer"/>.
- /// <pre class="prettyprint">
+ /// Factory for <see cref="LetterTokenizer"/>.
+ /// <code>
/// <fieldType name="text_letter" class="solr.TextField" positionIncrementGap="100">
/// <analyzer>
/// <tokenizer class="solr.LetterTokenizerFactory"/>
/// </analyzer>
- /// </fieldType></pre>
+ /// </fieldType>
+ /// </code>
/// </summary>
public class LetterTokenizerFactory : TokenizerFactory
{
-
/// <summary>
- /// Creates a new LetterTokenizerFactory </summary>
+ /// Creates a new <see cref="LetterTokenizerFactory"/> </summary>
public LetterTokenizerFactory(IDictionary<string, string> args)
: base(args)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/695b714f/src/Lucene.Net.Analysis.Common/Analysis/Core/LowerCaseFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Core/LowerCaseFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Core/LowerCaseFilter.cs
index fce4e12..36bde21 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Core/LowerCaseFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Core/LowerCaseFilter.cs
@@ -4,7 +4,6 @@ using Lucene.Net.Util;
namespace Lucene.Net.Analysis.Core
{
-
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -21,14 +20,14 @@ namespace Lucene.Net.Analysis.Core
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
/// <summary>
/// Normalizes token text to lower case.
- /// <a name="version"/>
- /// <para>You must specify the required <seealso cref="LuceneVersion"/>
+ /// <para>You must specify the required <see cref="LuceneVersion"/>
/// compatibility when creating LowerCaseFilter:
- /// <ul>
- /// <li> As of 3.1, supplementary characters are properly lowercased.
- /// </ul>
+ /// <list type="bullet">
+ /// <item> As of 3.1, supplementary characters are properly lowercased.</item>
+ /// </list>
/// </para>
/// </summary>
public sealed class LowerCaseFilter : TokenFilter
@@ -37,10 +36,10 @@ namespace Lucene.Net.Analysis.Core
private readonly ICharTermAttribute termAtt;
/// <summary>
- /// Create a new LowerCaseFilter, that normalizes token text to lower case.
+ /// Create a new <see cref="LowerCaseFilter"/>, that normalizes token text to lower case.
/// </summary>
- /// <param name="matchVersion"> See <a href="#version">above</a> </param>
- /// <param name="in"> TokenStream to filter </param>
+ /// <param name="matchVersion"> See <see cref="LuceneVersion"/> </param>
+ /// <param name="in"> <see cref="TokenStream"/> to filter </param>
public LowerCaseFilter(LuceneVersion matchVersion, TokenStream @in)
: base(@in)
{
@@ -48,7 +47,7 @@ namespace Lucene.Net.Analysis.Core
charUtils = CharacterUtils.GetInstance(matchVersion);
}
- public override bool IncrementToken()
+ public override sealed bool IncrementToken()
{
if (m_input.IncrementToken())
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/695b714f/src/Lucene.Net.Analysis.Common/Analysis/Core/LowerCaseFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Core/LowerCaseFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Core/LowerCaseFilterFactory.cs
index f34afe0..5d4446c 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Core/LowerCaseFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Core/LowerCaseFilterFactory.cs
@@ -1,5 +1,5 @@
-\ufeffusing System.Collections.Generic;
-using Lucene.Net.Analysis.Util;
+\ufeffusing Lucene.Net.Analysis.Util;
+using System.Collections.Generic;
namespace Lucene.Net.Analysis.Core
{
@@ -21,20 +21,20 @@ namespace Lucene.Net.Analysis.Core
*/
/// <summary>
- /// Factory for <seealso cref="LowerCaseFilter"/>.
- /// <pre class="prettyprint">
+ /// Factory for <see cref="LowerCaseFilter"/>.
+ /// <code>
/// <fieldType name="text_lwrcase" class="solr.TextField" positionIncrementGap="100">
/// <analyzer>
/// <tokenizer class="solr.WhitespaceTokenizerFactory"/>
/// <filter class="solr.LowerCaseFilterFactory"/>
/// </analyzer>
- /// </fieldType></pre>
+ /// </fieldType>
+ /// </code>
/// </summary>
public class LowerCaseFilterFactory : TokenFilterFactory, IMultiTermAwareComponent
{
-
/// <summary>
- /// Creates a new LowerCaseFilterFactory </summary>
+ /// Creates a new <see cref="LowerCaseFilterFactory"/> </summary>
public LowerCaseFilterFactory(IDictionary<string, string> args)
: base(args)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/695b714f/src/Lucene.Net.Analysis.Common/Analysis/Core/LowerCaseTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Core/LowerCaseTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Core/LowerCaseTokenizer.cs
index 94cfbb4..027f3d7 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Core/LowerCaseTokenizer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Core/LowerCaseTokenizer.cs
@@ -1,11 +1,9 @@
-\ufeffusing System.IO;
-using Lucene.Net.Analysis.Util;
-using Lucene.Net.Support;
+\ufeffusing Lucene.Net.Support;
using Lucene.Net.Util;
+using System.IO;
namespace Lucene.Net.Analysis.Core
{
-
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -22,35 +20,34 @@ namespace Lucene.Net.Analysis.Core
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
/// <summary>
- /// LowerCaseTokenizer performs the function of LetterTokenizer
- /// and LowerCaseFilter together. It divides text at non-letters and converts
+ /// <see cref="LowerCaseTokenizer"/> performs the function of <see cref="LetterTokenizer"/>
+ /// and <see cref="LowerCaseFilter"/> together. It divides text at non-letters and converts
/// them to lower case. While it is functionally equivalent to the combination
- /// of LetterTokenizer and LowerCaseFilter, there is a performance advantage
+ /// of <see cref="LetterTokenizer"/> and <see cref="LowerCaseFilter"/>, there is a performance advantage
/// to doing the two tasks at once, hence this (redundant) implementation.
- /// <P>
+ /// <para>
/// Note: this does a decent job for most European languages, but does a terrible
/// job for some Asian languages, where words are not separated by spaces.
- /// </p>
+ /// </para>
/// <para>
- /// <a name="version"/>
- /// You must specify the required <seealso cref="LuceneVersion"/> compatibility when creating
- /// <seealso cref="LowerCaseTokenizer"/>:
- /// <ul>
- /// <li>As of 3.1, <seealso cref="CharTokenizer"/> uses an int based API to normalize and
- /// detect token characters. See <seealso cref="CharTokenizer#isTokenChar(int)"/> and
- /// <seealso cref="CharTokenizer#normalize(int)"/> for details.</li>
- /// </ul>
+ /// You must specify the required <see cref="LuceneVersion"/> compatibility when creating
+ /// <see cref="LowerCaseTokenizer"/>:
+ /// <list type="bullet">
+ /// <item>As of 3.1, <see cref="Util.CharTokenizer"/> uses an int based API to normalize and
+ /// detect token characters. See <see cref="Util.CharTokenizer.IsTokenChar(int)"/> and
+ /// <see cref="Util.CharTokenizer.Normalize(int)"/> for details.</item>
+ /// </list>
/// </para>
/// </summary>
public sealed class LowerCaseTokenizer : LetterTokenizer
{
-
/// <summary>
- /// Construct a new LowerCaseTokenizer.
+ /// Construct a new <see cref="LowerCaseTokenizer"/>.
/// </summary>
/// <param name="matchVersion">
- /// Lucene version to match See <seealso cref="<a href="#version">above</a>"/>
+ /// <see cref="LuceneVersion"/> to match
/// </param>
/// <param name="in">
/// the input to split up into tokens </param>
@@ -60,13 +57,13 @@ namespace Lucene.Net.Analysis.Core
}
/// <summary>
- /// Construct a new LowerCaseTokenizer using a given
- /// <seealso cref="org.apache.lucene.util.AttributeSource.AttributeFactory"/>.
+ /// Construct a new <see cref="LowerCaseTokenizer"/> using a given
+ /// <see cref="AttributeSource.AttributeFactory"/>.
/// </summary>
/// <param name="matchVersion">
- /// Lucene version to match See <seealso cref="<a href="#version">above</a>"/> </param>
+ /// <see cref="LuceneVersion"/> to match </param>
/// <param name="factory">
- /// the attribute factory to use for this <seealso cref="Tokenizer"/> </param>
+ /// the attribute factory to use for this <see cref="Tokenizer"/> </param>
/// <param name="in">
/// the input to split up into tokens </param>
public LowerCaseTokenizer(LuceneVersion matchVersion, AttributeSource.AttributeFactory factory, TextReader @in)
@@ -76,7 +73,7 @@ namespace Lucene.Net.Analysis.Core
/// <summary>
/// Converts char to lower case
- /// <seealso cref="Character#toLowerCase(int)"/>.
+ /// <see cref="Character.ToLowerCase(int)"/>.
/// </summary>
protected override int Normalize(int c)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/695b714f/src/Lucene.Net.Analysis.Common/Analysis/Core/LowerCaseTokenizerFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Core/LowerCaseTokenizerFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Core/LowerCaseTokenizerFactory.cs
index 76b9d81..08e4b4f 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Core/LowerCaseTokenizerFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Core/LowerCaseTokenizerFactory.cs
@@ -1,7 +1,7 @@
-\ufeffusing System.Collections.Generic;
-using System.IO;
-using Lucene.Net.Analysis.Util;
+\ufeffusing Lucene.Net.Analysis.Util;
using Lucene.Net.Util;
+using System.Collections.Generic;
+using System.IO;
namespace Lucene.Net.Analysis.Core
{
@@ -23,19 +23,19 @@ namespace Lucene.Net.Analysis.Core
*/
/// <summary>
- /// Factory for <seealso cref="LowerCaseTokenizer"/>.
- /// <pre class="prettyprint">
+ /// Factory for <see cref="LowerCaseTokenizer"/>.
+ /// <code>
/// <fieldType name="text_lwrcase" class="solr.TextField" positionIncrementGap="100">
/// <analyzer>
/// <tokenizer class="solr.LowerCaseTokenizerFactory"/>
/// </analyzer>
- /// </fieldType></pre>
+ /// </fieldType>
+ /// </code>
/// </summary>
public class LowerCaseTokenizerFactory : TokenizerFactory, IMultiTermAwareComponent
{
-
/// <summary>
- /// Creates a new LowerCaseTokenizerFactory </summary>
+ /// Creates a new <see cref="LowerCaseTokenizerFactory"/> </summary>
public LowerCaseTokenizerFactory(IDictionary<string, string> args)
: base(args)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/695b714f/src/Lucene.Net.Analysis.Common/Analysis/Core/SimpleAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Core/SimpleAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Core/SimpleAnalyzer.cs
index d2165d0..80586d0 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Core/SimpleAnalyzer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Core/SimpleAnalyzer.cs
@@ -1,10 +1,8 @@
-\ufeffusing System.IO;
-using Lucene.Net.Analysis.Util;
-using Lucene.Net.Util;
+\ufeffusing Lucene.Net.Util;
+using System.IO;
namespace Lucene.Net.Analysis.Core
{
-
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -21,30 +19,27 @@ namespace Lucene.Net.Analysis.Core
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
/// <summary>
- /// An <seealso cref="Analyzer"/> that filters <seealso cref="LetterTokenizer"/>
- /// with <seealso cref="LowerCaseFilter"/>
+ /// An <see cref="Analyzer"/> that filters <see cref="LetterTokenizer"/>
+ /// with <see cref="LowerCaseFilter"/>
/// <para>
- /// <a name="version">You must specify the required <seealso cref="LuceneVersion"/> compatibility
- /// when creating <seealso cref="CharTokenizer"/>:
- /// <ul>
- /// <li>As of 3.1, <seealso cref="LowerCaseTokenizer"/> uses an int based API to normalize and
- /// detect token codepoints. See <seealso cref="CharTokenizer#isTokenChar(int)"/> and
- /// <seealso cref="CharTokenizer#normalize(int)"/> for details.</li>
- /// </ul>
- /// </para>
- /// <para>
- ///
+ /// You must specify the required <see cref="LuceneVersion"/> compatibility
+ /// when creating <see cref="Util.CharTokenizer"/>:
+ /// <list type="bullet">
+ /// <item>As of 3.1, <see cref="LowerCaseTokenizer"/> uses an int based API to normalize and
+ /// detect token codepoints. See <see cref="Util.CharTokenizer.IsTokenChar(int)"/> and
+ /// <see cref="Util.CharTokenizer.Normalize(int)"/> for details.</item>
+ /// </list>
/// </para>
/// </summary>
public sealed class SimpleAnalyzer : Analyzer
{
-
private readonly LuceneVersion matchVersion;
/// <summary>
- /// Creates a new <seealso cref="SimpleAnalyzer"/> </summary>
- /// <param name="matchVersion"> Lucene version to match See <seealso cref="<a href="#version">above</a>"/> </param>
+ /// Creates a new <see cref="SimpleAnalyzer"/> </summary>
+ /// <param name="matchVersion"> <see cref="LuceneVersion"/> to match </param>
public SimpleAnalyzer(LuceneVersion matchVersion)
{
this.matchVersion = matchVersion;
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/695b714f/src/Lucene.Net.Analysis.Common/Analysis/Core/StopAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Core/StopAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Core/StopAnalyzer.cs
index d1a2a26..e91072e 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Core/StopAnalyzer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Core/StopAnalyzer.cs
@@ -1,8 +1,8 @@
-\ufeffusing System.Collections.Generic;
-using System.IO;
-using Lucene.Net.Analysis.Util;
+\ufeffusing Lucene.Net.Analysis.Util;
using Lucene.Net.Support;
using Lucene.Net.Util;
+using System.Collections.Generic;
+using System.IO;
namespace Lucene.Net.Analysis.Core
{
@@ -24,21 +24,19 @@ namespace Lucene.Net.Analysis.Core
*/
/// <summary>
- /// Filters <seealso cref="LetterTokenizer"/> with <seealso cref="LowerCaseFilter"/> and <seealso cref="StopFilter"/>.
- ///
- /// <a name="version"/>
- /// <para>You must specify the required <seealso cref="LuceneVersion"/>
- /// compatibility when creating StopAnalyzer:
- /// <ul>
- /// <li> As of 3.1, StopFilter correctly handles Unicode 4.0
- /// supplementary characters in stopwords
- /// <li> As of 2.9, position increments are preserved
- /// </ul>
+ /// Filters <see cref="LetterTokenizer"/> with <see cref="LowerCaseFilter"/> and <see cref="StopFilter"/>.
+ /// <para>
+ /// You must specify the required <see cref="LuceneVersion"/>
+ /// compatibility when creating <see cref="StopAnalyzer"/>:
+ /// <list type="bullet">
+ /// <item> As of 3.1, StopFilter correctly handles Unicode 4.0
+ /// supplementary characters in stopwords</item>
+ /// <item> As of 2.9, position increments are preserved</item>
+ /// </list>
/// </para>
/// </summary>
public sealed class StopAnalyzer : StopwordAnalyzerBase
{
-
/// <summary>
/// An unmodifiable set containing some common English words that are not usually useful
/// for searching.
@@ -59,8 +57,8 @@ namespace Lucene.Net.Analysis.Core
/// <summary>
/// Builds an analyzer which removes words in
- /// <seealso cref="#ENGLISH_STOP_WORDS_SET"/>. </summary>
- /// <param name="matchVersion"> See <a href="#version">above</a> </param>
+ /// <see cref="ENGLISH_STOP_WORDS_SET"/>. </summary>
+ /// <param name="matchVersion"> See <see cref="LuceneVersion"/> </param>
public StopAnalyzer(LuceneVersion matchVersion)
: this(matchVersion, ENGLISH_STOP_WORDS_SET)
{
@@ -68,7 +66,7 @@ namespace Lucene.Net.Analysis.Core
/// <summary>
/// Builds an analyzer with the stop words from the given set. </summary>
- /// <param name="matchVersion"> See <a href="#version">above</a> </param>
+ /// <param name="matchVersion"> See <see cref="LuceneVersion"/> </param>
/// <param name="stopWords"> Set of stop words </param>
public StopAnalyzer(LuceneVersion matchVersion, CharArraySet stopWords)
: base(matchVersion, stopWords)
@@ -77,8 +75,8 @@ namespace Lucene.Net.Analysis.Core
/// <summary>
/// Builds an analyzer with the stop words from the given file. </summary>
- /// <seealso cref= WordlistLoader#getWordSet(Reader, Version) </seealso>
- /// <param name="matchVersion"> See <a href="#version">above</a> </param>
+ /// <seealso cref="WordlistLoader.GetWordSet(TextReader, LuceneVersion)"/>
+ /// <param name="matchVersion"> See <see cref="LuceneVersion"/> </param>
/// <param name="stopwordsFile"> File to load stop words from </param>
public StopAnalyzer(LuceneVersion matchVersion, FileInfo stopwordsFile)
: this(matchVersion, LoadStopwordSet(stopwordsFile, matchVersion))
@@ -87,9 +85,9 @@ namespace Lucene.Net.Analysis.Core
/// <summary>
/// Builds an analyzer with the stop words from the given reader. </summary>
- /// <seealso cref= WordlistLoader#getWordSet(Reader, Version) </seealso>
- /// <param name="matchVersion"> See <a href="#version">above</a> </param>
- /// <param name="stopwords"> TextReader to load stop words from </param>
+ /// <seealso cref="WordlistLoader.GetWordSet(TextReader, LuceneVersion)"/>
+ /// <param name="matchVersion"> See <see cref="LuceneVersion"/> </param>
+ /// <param name="stopwords"> <see cref="TextReader"/> to load stop words from </param>
public StopAnalyzer(LuceneVersion matchVersion, TextReader stopwords)
: this(matchVersion, LoadStopwordSet(stopwords, matchVersion))
{
@@ -97,12 +95,12 @@ namespace Lucene.Net.Analysis.Core
/// <summary>
/// Creates
- /// <seealso cref="Analyzer.TokenStreamComponents"/>
- /// used to tokenize all the text in the provided <seealso cref="TextReader"/>.
+ /// <see cref="Analyzer.TokenStreamComponents"/>
+ /// used to tokenize all the text in the provided <see cref="TextReader"/>.
/// </summary>
- /// <returns> <seealso cref="Analyzer.TokenStreamComponents"/>
- /// built from a <seealso cref="LowerCaseTokenizer"/> filtered with
- /// <seealso cref="StopFilter"/> </returns>
+ /// <returns> <see cref="Analyzer.TokenStreamComponents"/>
+ /// built from a <see cref="LowerCaseTokenizer"/> filtered with
+ /// <see cref="StopFilter"/> </returns>
protected override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
{
Tokenizer source = new LowerCaseTokenizer(m_matchVersion, reader);
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/695b714f/src/Lucene.Net.Analysis.Common/Analysis/Core/StopFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Core/StopFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Core/StopFilter.cs
index b8582a4..1e5e2a0 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Core/StopFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Core/StopFilter.cs
@@ -1,9 +1,7 @@
\ufeffusing Lucene.Net.Analysis.TokenAttributes;
using Lucene.Net.Analysis.Util;
-using Lucene.Net.Support;
using Lucene.Net.Util;
using System.Collections.Generic;
-using System.Linq;
namespace Lucene.Net.Analysis.Core
{
@@ -26,35 +24,33 @@ namespace Lucene.Net.Analysis.Core
/// <summary>
/// Removes stop words from a token stream.
- ///
- /// <a name="version"/>
- /// <para>You must specify the required <seealso cref="LuceneVersion"/>
- /// compatibility when creating StopFilter:
- /// <ul>
- /// <li> As of 3.1, StopFilter correctly handles Unicode 4.0
+ /// <para>
+ /// You must specify the required <see cref="LuceneVersion"/>
+ /// compatibility when creating <see cref="StopFilter"/>:
+ /// <list type="bullet">
+ /// <item>As of 3.1, StopFilter correctly handles Unicode 4.0
/// supplementary characters in stopwords and position
- /// increments are preserved
- /// </ul>
+ /// increments are preserved</item>
+ /// </list>
/// </para>
/// </summary>
public sealed class StopFilter : FilteringTokenFilter
{
-
private readonly CharArraySet stopWords;
private readonly ICharTermAttribute termAtt;
/// <summary>
- /// Constructs a filter which removes words from the input TokenStream that are
- /// named in the Set.
+ /// Constructs a filter which removes words from the input <see cref="TokenStream"/> that are
+ /// named in the <see cref="CharArraySet"/>.
/// </summary>
/// <param name="matchVersion">
/// Lucene version to enable correct Unicode 4.0 behavior in the stop
- /// set if Version > 3.0. See <a href="#version">above</a> for details. </param>
+ /// set if Version > 3.0. See <see cref="LuceneVersion"/>> for details. </param>
/// <param name="in">
- /// Input stream </param>
+ /// Input <see cref="TokenStream"/> </param>
/// <param name="stopWords">
- /// A <seealso cref="CharArraySet"/> representing the stopwords. </param>
- /// <seealso cref= #makeStopSet(Version, java.lang.String...) </seealso>
+ /// A <see cref="CharArraySet"/> representing the stopwords. </param>
+ /// <seealso cref="MakeStopSet(LuceneVersion, string[])"/>
public StopFilter(LuceneVersion matchVersion, TokenStream @in, CharArraySet stopWords)
: base(matchVersion, @in)
{
@@ -63,29 +59,29 @@ namespace Lucene.Net.Analysis.Core
}
/// <summary>
- /// Builds a Set from an array of stop words,
- /// appropriate for passing into the StopFilter constructor.
- /// This permits this stopWords construction to be cached once when
- /// an Analyzer is constructed.
+ /// Builds a <see cref="CharArraySet"/> from an array of stop words,
+ /// appropriate for passing into the <see cref="StopFilter"/> constructor.
+ /// This permits this <paramref name="stopWords"/> construction to be cached once when
+ /// an <see cref="Analyzer"/> is constructed.
/// </summary>
- /// <param name="matchVersion"> Lucene version to enable correct Unicode 4.0 behavior in the returned set if Version > 3.0 </param>
+ /// <param name="matchVersion"> <see cref="LuceneVersion"/> to enable correct Unicode 4.0 behavior in the returned set if Version > 3.0 </param>
/// <param name="stopWords"> An array of stopwords </param>
- /// <seealso cref= #makeStopSet(Version, java.lang.String[], boolean) passing false to ignoreCase </seealso>
+ /// <seealso cref="MakeStopSet(LuceneVersion, string[], bool)"/> passing false to ignoreCase
public static CharArraySet MakeStopSet(LuceneVersion matchVersion, params string[] stopWords)
{
return MakeStopSet(matchVersion, stopWords, false);
}
/// <summary>
- /// Builds a Set from an array of stop words,
- /// appropriate for passing into the StopFilter constructor.
- /// This permits this stopWords construction to be cached once when
- /// an Analyzer is constructed.
+ /// Builds a <see cref="CharArraySet"/> from an array of stop words,
+ /// appropriate for passing into the <see cref="StopFilter"/> constructor.
+ /// This permits this <paramref name="stopWords"/> construction to be cached once when
+ /// an <see cref="Analyzer"/> is constructed.
/// </summary>
- /// <param name="matchVersion"> Lucene version to enable correct Unicode 4.0 behavior in the returned set if Version > 3.0 </param>
- /// <param name="stopWords"> A List of Strings or char[] or any other toString()-able list representing the stopwords </param>
- /// <returns> A Set (<seealso cref="CharArraySet"/>) containing the words </returns>
- /// <seealso cref= #makeStopSet(Version, java.lang.String[], boolean) passing false to ignoreCase </seealso>
+ /// <param name="matchVersion"> <see cref="LuceneVersion"/> to enable correct Unicode 4.0 behavior in the returned set if Version > 3.0 </param>
+ /// <param name="stopWords"> A List of <see cref="string"/>s or <see cref="char[]"/> or any other ToString()-able list representing the stopwords </param>
+ /// <returns> A Set (<see cref="CharArraySet"/>) containing the words </returns>
+ /// <seealso cref="MakeStopSet(LuceneVersion, string[], bool)"/> passing false to ignoreCase
public static CharArraySet MakeStopSet<T1>(LuceneVersion matchVersion, IList<T1> stopWords)
{
return MakeStopSet(matchVersion, stopWords, false);
@@ -94,10 +90,10 @@ namespace Lucene.Net.Analysis.Core
/// <summary>
/// Creates a stopword set from the given stopword array.
/// </summary>
- /// <param name="matchVersion"> Lucene version to enable correct Unicode 4.0 behavior in the returned set if Version > 3.0 </param>
+ /// <param name="matchVersion"> <see cref="LuceneVersion"/> to enable correct Unicode 4.0 behavior in the returned set if Version > 3.0 </param>
/// <param name="stopWords"> An array of stopwords </param>
/// <param name="ignoreCase"> If true, all words are lower cased first. </param>
- /// <returns> a Set containing the words </returns>
+ /// <returns> a Set (<see cref="CharArraySet"/>) containing the words </returns>
public static CharArraySet MakeStopSet(LuceneVersion matchVersion, string[] stopWords, bool ignoreCase)
{
CharArraySet stopSet = new CharArraySet(matchVersion, stopWords.Length, ignoreCase);
@@ -107,10 +103,10 @@ namespace Lucene.Net.Analysis.Core
/// <summary>
/// Creates a stopword set from the given stopword list. </summary>
- /// <param name="matchVersion"> Lucene version to enable correct Unicode 4.0 behavior in the returned set if Version > 3.0 </param>
- /// <param name="stopWords"> A List of Strings or char[] or any other toString()-able list representing the stopwords </param>
+ /// <param name="matchVersion"> <see cref="LuceneVersion"/> to enable correct Unicode 4.0 behavior in the returned set if Version > 3.0 </param>
+ /// <param name="stopWords"> A List of <see cref="string"/>s or <see cref="char[]"/> or any other ToString()-able list representing the stopwords </param>
/// <param name="ignoreCase"> if true, all words are lower cased first </param>
- /// <returns> A Set (<seealso cref="CharArraySet"/>) containing the words </returns>
+ /// <returns> A Set (<see cref="CharArraySet"/>) containing the words </returns>
public static CharArraySet MakeStopSet<T1>(LuceneVersion matchVersion, IList<T1> stopWords, bool ignoreCase)
{
var stopSet = new CharArraySet(matchVersion, stopWords.Count, ignoreCase);
@@ -119,9 +115,9 @@ namespace Lucene.Net.Analysis.Core
}
/// <summary>
- /// Returns the next input Token whose term() is not a stop word.
+ /// Returns the next input Token whose Term is not a stop word.
/// </summary>
- protected internal override bool Accept()
+ protected override bool Accept()
{
return !stopWords.Contains(termAtt.Buffer, 0, termAtt.Length);
}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/695b714f/src/Lucene.Net.Analysis.Common/Analysis/Core/StopFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Core/StopFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Core/StopFilterFactory.cs
index abc6b8c..9466549 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Core/StopFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Core/StopFilterFactory.cs
@@ -24,46 +24,46 @@ namespace Lucene.Net.Analysis.Core
/// <summary>
- /// Factory for <seealso cref="StopFilter"/>.
+ /// Factory for <see cref="StopFilter"/>.
///
- /// <pre class="prettyprint">
+ /// <code>
/// <fieldType name="text_stop" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
/// <analyzer>
/// <tokenizer class="solr.WhitespaceTokenizerFactory"/>
/// <filter class="solr.StopFilterFactory" ignoreCase="true"
/// words="stopwords.txt" format="wordset" />
/// </analyzer>
- /// </fieldType></pre>
- ///
+ /// </fieldType>
+ /// </code>
/// <para>
/// All attributes are optional:
/// </para>
- /// <ul>
- /// <li><code>ignoreCase</code> defaults to <code>false</code></li>
- /// <li><code>words</code> should be the name of a stopwords file to parse, if not
- /// specified the factory will use <seealso cref="StopAnalyzer#ENGLISH_STOP_WORDS_SET"/>
- /// </li>
- /// <li><code>format</code> defines how the <code>words</code> file will be parsed,
- /// and defaults to <code>wordset</code>. If <code>words</code> is not specified,
- /// then <code>format</code> must not be specified.
- /// </li>
- /// </ul>
+ /// <list type="bullet">
+ /// <item><c>ignoreCase</c> defaults to <c>false</c></item>
+ /// <item><c>words</c> should be the name of a stopwords file to parse, if not
+ /// specified the factory will use <see cref="StopAnalyzer.ENGLISH_STOP_WORDS_SET"/>
+ /// </item>
+ /// <item><c>format</c> defines how the <c>words</c> file will be parsed,
+ /// and defaults to <c>wordset</c>. If <c>words</c> is not specified,
+ /// then <c>format</c> must not be specified.
+ /// </item>
+ /// </list>
/// <para>
- /// The valid values for the <code>format</code> option are:
+ /// The valid values for the <c>format</c> option are:
/// </para>
- /// <ul>
- /// <li><code>wordset</code> - This is the default format, which supports one word per
+ /// <list type="bullet">
+ /// <item><c>wordset</c> - This is the default format, which supports one word per
/// line (including any intra-word whitespace) and allows whole line comments
/// begining with the "#" character. Blank lines are ignored. See
- /// <seealso cref="WordlistLoader#getLines WordlistLoader.getLines"/> for details.
- /// </li>
- /// <li><code>snowball</code> - This format allows for multiple words specified on each
+ /// <see cref="WordlistLoader.GetLines"/> for details.
+ /// </item>
+ /// <item><c>snowball</c> - This format allows for multiple words specified on each
/// line, and trailing comments may be specified using the vertical line ("|").
/// Blank lines are ignored. See
- /// <seealso cref="WordlistLoader#getSnowballWordSet WordlistLoader.getSnowballWordSet"/>
+ /// <see cref="WordlistLoader.GetSnowballWordSet"/>
/// for details.
- /// </li>
- /// </ul>
+ /// </item>
+ /// </list>
/// </summary>
public class StopFilterFactory : TokenFilterFactory, IResourceLoaderAware
{
@@ -77,7 +77,7 @@ namespace Lucene.Net.Analysis.Core
private readonly bool enablePositionIncrements;
/// <summary>
- /// Creates a new StopFilterFactory </summary>
+ /// Creates a new <see cref="StopFilterFactory"/> </summary>
public StopFilterFactory(IDictionary<string, string> args)
: base(args)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/695b714f/src/Lucene.Net.Analysis.Common/Analysis/Core/TypeTokenFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Core/TypeTokenFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Core/TypeTokenFilter.cs
index bf72df8..0c993ad 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Core/TypeTokenFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Core/TypeTokenFilter.cs
@@ -3,7 +3,6 @@ using Lucene.Net.Analysis.Util;
using Lucene.Net.Util;
using System;
using System.Collections.Generic;
-using System.Linq;
namespace Lucene.Net.Analysis.Core
{
@@ -51,11 +50,11 @@ namespace Lucene.Net.Analysis.Core
}
/// <summary>
- /// Create a new <seealso cref="TypeTokenFilter"/>. </summary>
- /// <param name="version"> the Lucene match version </param>
- /// <param name="input"> the <seealso cref="TokenStream"/> to consume </param>
+ /// Create a new <see cref="TypeTokenFilter"/>. </summary>
+ /// <param name="version"> the <see cref="LuceneVersion"/> match version </param>
+ /// <param name="input"> the <see cref="TokenStream"/> to consume </param>
/// <param name="stopTypes"> the types to filter </param>
- /// <param name="useWhiteList"> if true, then tokens whose type is in stopTypes will
+ /// <param name="useWhiteList"> if true, then tokens whose type is in <paramref name="stopTypes"/> will
/// be kept, otherwise they will be filtered out </param>
public TypeTokenFilter(LuceneVersion version, TokenStream input, ICollection<string> stopTypes, bool useWhiteList)
: base(version, input)
@@ -66,9 +65,9 @@ namespace Lucene.Net.Analysis.Core
}
/// <summary>
- /// Create a new <seealso cref="TypeTokenFilter"/> that filters tokens out
+ /// Create a new <see cref="TypeTokenFilter"/> that filters tokens out
/// (useWhiteList=false). </summary>
- /// <seealso cref= #TypeTokenFilter(Version, TokenStream, Set, boolean) </seealso>
+ /// <seealso cref="TypeTokenFilter.TypeTokenFilter(LuceneVersion, TokenStream, ICollection{string}, bool)"/>
public TypeTokenFilter(LuceneVersion version, TokenStream input, ICollection<string> stopTypes)
: this(version, input, stopTypes, false)
{
@@ -76,9 +75,9 @@ namespace Lucene.Net.Analysis.Core
/// <summary>
/// By default accept the token if its type is not a stop type.
- /// When the useWhiteList parameter is set to true then accept the token if its type is contained in the stopTypes
+ /// When the <see cref="useWhiteList"/> parameter is set to true then accept the token if its type is contained in the <see cref="stopTypes"/>
/// </summary>
- protected internal override bool Accept()
+ protected override bool Accept()
{
return useWhiteList == stopTypes.Contains(typeAttribute.Type);
}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/695b714f/src/Lucene.Net.Analysis.Common/Analysis/Core/TypeTokenFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Core/TypeTokenFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Core/TypeTokenFilterFactory.cs
index bf7658b..38c69ab 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Core/TypeTokenFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Core/TypeTokenFilterFactory.cs
@@ -23,15 +23,16 @@ namespace Lucene.Net.Analysis.Core
*/
/// <summary>
- /// Factory class for <seealso cref="TypeTokenFilter"/>.
- /// <pre class="prettyprint">
+ /// Factory class for <see cref="TypeTokenFilter"/>.
+ /// <code>
/// <fieldType name="chars" class="solr.TextField" positionIncrementGap="100">
/// <analyzer>
/// <tokenizer class="solr.StandardTokenizerFactory"/>
/// <filter class="solr.TypeTokenFilterFactory" types="stoptypes.txt"
/// useWhitelist="false"/>
/// </analyzer>
- /// </fieldType></pre>
+ /// </fieldType>
+ /// </code>
/// </summary>
public class TypeTokenFilterFactory : TokenFilterFactory, IResourceLoaderAware
{
@@ -41,7 +42,7 @@ namespace Lucene.Net.Analysis.Core
private HashSet<string> stopTypes;
/// <summary>
- /// Creates a new TypeTokenFilterFactory </summary>
+ /// Creates a new <see cref="TypeTokenFilterFactory"/> </summary>
public TypeTokenFilterFactory(IDictionary<string, string> args)
: base(args)
{
@@ -63,7 +64,7 @@ namespace Lucene.Net.Analysis.Core
foreach (string file in files)
{
IList<string> typesLines = GetLines(loader, file.Trim());
- stopTypes.AddAll(typesLines);
+ stopTypes.UnionWith(typesLines);
}
}
}
@@ -76,7 +77,7 @@ namespace Lucene.Net.Analysis.Core
}
}
- public virtual HashSet<string> StopTypes
+ public virtual ICollection<string> StopTypes
{
get
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/695b714f/src/Lucene.Net.Analysis.Common/Analysis/Core/UpperCaseFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Core/UpperCaseFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Core/UpperCaseFilter.cs
index ca4aab4..0e65be6 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Core/UpperCaseFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Core/UpperCaseFilter.cs
@@ -20,16 +20,16 @@ namespace Lucene.Net.Analysis.Core
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
/// <summary>
/// Normalizes token text to UPPER CASE.
- /// <a name="version"/>
- /// <para>You must specify the required <seealso cref="LuceneVersion"/>
- /// compatibility when creating UpperCaseFilter
- ///
+ /// <para>
+ /// You must specify the required <see cref="LuceneVersion"/>
+ /// compatibility when creating <see cref="UpperCaseFilter"/>
/// </para>
/// <para><b>NOTE:</b> In Unicode, this transformation may lose information when the
/// upper case character represents more than one lower case character. Use this filter
- /// when you Require uppercase tokens. Use the <seealso cref="LowerCaseFilter"/> for
+ /// when you Require uppercase tokens. Use the <see cref="LowerCaseFilter"/> for
/// general search matching
/// </para>
/// </summary>
@@ -39,10 +39,10 @@ namespace Lucene.Net.Analysis.Core
private readonly ICharTermAttribute termAtt;
/// <summary>
- /// Create a new UpperCaseFilter, that normalizes token text to upper case.
+ /// Create a new <see cref="UpperCaseFilter"/>, that normalizes token text to upper case.
/// </summary>
- /// <param name="matchVersion"> See <a href="#version">above</a> </param>
- /// <param name="in"> TokenStream to filter </param>
+ /// <param name="matchVersion"> See <see cref="LuceneVersion"/> </param>
+ /// <param name="in"> <see cref="TokenStream"/> to filter </param>
public UpperCaseFilter(LuceneVersion matchVersion, TokenStream @in)
: base(@in)
{
@@ -51,7 +51,7 @@ namespace Lucene.Net.Analysis.Core
charUtils = CharacterUtils.GetInstance(matchVersion);
}
- public override bool IncrementToken()
+ public override sealed bool IncrementToken()
{
if (m_input.IncrementToken())
{
@@ -64,5 +64,4 @@ namespace Lucene.Net.Analysis.Core
}
}
}
-
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/695b714f/src/Lucene.Net.Analysis.Common/Analysis/Core/UpperCaseFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Core/UpperCaseFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Core/UpperCaseFilterFactory.cs
index e4ade5c..2a6661a 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Core/UpperCaseFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Core/UpperCaseFilterFactory.cs
@@ -21,26 +21,26 @@ namespace Lucene.Net.Analysis.Core
*/
/// <summary>
- /// Factory for <seealso cref="UpperCaseFilter"/>.
- /// <pre class="prettyprint">
+ /// Factory for <see cref="UpperCaseFilter"/>.
+ /// <code>
/// <fieldType name="text_uppercase" class="solr.TextField" positionIncrementGap="100">
/// <analyzer>
/// <tokenizer class="solr.WhitespaceTokenizerFactory"/>
/// <filter class="solr.UpperCaseFilterFactory"/>
/// </analyzer>
- /// </fieldType></pre>
+ /// </fieldType>
+ /// </code>
///
/// <para><b>NOTE:</b> In Unicode, this transformation may lose information when the
/// upper case character represents more than one lower case character. Use this filter
- /// when you require uppercase tokens. Use the <seealso cref="LowerCaseFilterFactory"/> for
+ /// when you require uppercase tokens. Use the <see cref="LowerCaseFilterFactory"/> for
/// general search matching
/// </para>
/// </summary>
public class UpperCaseFilterFactory : TokenFilterFactory, IMultiTermAwareComponent
{
-
/// <summary>
- /// Creates a new UpperCaseFilterFactory </summary>
+ /// Creates a new <see cref="UpperCaseFilterFactory"/> </summary>
public UpperCaseFilterFactory(IDictionary<string, string> args)
: base(args)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/695b714f/src/Lucene.Net.Analysis.Common/Analysis/Core/WhitespaceAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Core/WhitespaceAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Core/WhitespaceAnalyzer.cs
index 9976966..6becd82 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Core/WhitespaceAnalyzer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Core/WhitespaceAnalyzer.cs
@@ -21,29 +21,26 @@ namespace Lucene.Net.Analysis.Core
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
/// <summary>
- /// An Analyzer that uses <seealso cref="WhitespaceTokenizer"/>.
- /// <para>
- /// <a name="version">You must specify the required <seealso cref="LuceneVersion"/> compatibility
- /// when creating <seealso cref="CharTokenizer"/>:
- /// <ul>
- /// <li>As of 3.1, <seealso cref="WhitespaceTokenizer"/> uses an int based API to normalize and
- /// detect token codepoints. See <seealso cref="CharTokenizer#isTokenChar(int)"/> and
- /// <seealso cref="CharTokenizer#normalize(int)"/> for details.</li>
- /// </ul>
- /// </para>
+ /// An <see cref="Analyzer"/> that uses <see cref="WhitespaceTokenizer"/>.
/// <para>
- ///
+ /// You must specify the required <see cref="LuceneVersion"/> compatibility
+ /// when creating <see cref="CharTokenizer"/>:
+ /// <list type="bullet">
+ /// <item>As of 3.1, <see cref="WhitespaceTokenizer"/> uses an int based API to normalize and
+ /// detect token codepoints. See <see cref="Util.CharTokenizer.IsTokenChar(int)"/> and
+ /// <see cref="Util.CharTokenizer.Normalize(int)"/> for details.</item>
+ /// </list>
/// </para>
/// </summary>
public sealed class WhitespaceAnalyzer : Analyzer
{
-
private readonly LuceneVersion matchVersion;
/// <summary>
- /// Creates a new <seealso cref="WhitespaceAnalyzer"/> </summary>
- /// <param name="matchVersion"> Lucene version to match See <seealso cref="<a href="#version">above</a>"/> </param>
+ /// Creates a new <see cref="WhitespaceAnalyzer"/> </summary>
+ /// <param name="matchVersion"> <see cref="LuceneVersion"/> to match </param>
public WhitespaceAnalyzer(LuceneVersion matchVersion)
{
this.matchVersion = matchVersion;
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/695b714f/src/Lucene.Net.Analysis.Common/Analysis/Core/WhitespaceTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Core/WhitespaceTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Core/WhitespaceTokenizer.cs
index 5ccdbbf..a60a679 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Core/WhitespaceTokenizer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Core/WhitespaceTokenizer.cs
@@ -5,7 +5,6 @@ using Lucene.Net.Util;
namespace Lucene.Net.Analysis.Core
{
-
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -22,26 +21,24 @@ namespace Lucene.Net.Analysis.Core
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
/// <summary>
- /// A WhitespaceTokenizer is a tokenizer that divides text at whitespace.
- /// Adjacent sequences of non-Whitespace characters form tokens. <a
- /// name="version"/>
+ /// A <see cref="WhitespaceTokenizer"/> is a tokenizer that divides text at whitespace.
+ /// Adjacent sequences of non-Whitespace characters form tokens.
/// <para>
- /// You must specify the required <seealso cref="LuceneVersion"/> compatibility when creating
- /// <seealso cref="WhitespaceTokenizer"/>:
- /// <ul>
- /// <li>As of 3.1, <seealso cref="CharTokenizer"/> uses an int based API to normalize and
- /// detect token characters. See <seealso cref="CharTokenizer#isTokenChar(int)"/> and
- /// <seealso cref="CharTokenizer#normalize(int)"/> for details.</li>
- /// </ul>
+ /// You must specify the required <see cref="LuceneVersion"/> compatibility when creating
+ /// <see cref="WhitespaceTokenizer"/>:
+ /// <list type="bullet">
+ /// <item>As of 3.1, <see cref="CharTokenizer"/> uses an int based API to normalize and
+ /// detect token characters. See <see cref="CharTokenizer.IsTokenChar(int)"/> and
+ /// <see cref="CharTokenizer.Normalize(int)"/> for details.</item>
+ /// </list>
/// </para>
/// </summary>
public sealed class WhitespaceTokenizer : CharTokenizer
{
-
- /// Construct a new WhitespaceTokenizer. * <param name="matchVersion"> Lucene version
- /// to match See <seealso cref="<a href="#version">above</a>"/>
- /// </param>
+ /// Construct a new <see cref="WhitespaceTokenizer"/>.
+ /// <param name="matchVersion"> <see cref="LuceneVersion"/> to match</param>
/// <param name="in">
/// the input to split up into tokens </param>
public WhitespaceTokenizer(LuceneVersion matchVersion, TextReader @in)
@@ -50,14 +47,12 @@ namespace Lucene.Net.Analysis.Core
}
/// <summary>
- /// Construct a new WhitespaceTokenizer using a given
- /// <seealso cref="org.apache.lucene.util.AttributeSource.AttributeFactory"/>.
- ///
- /// @param
- /// matchVersion Lucene version to match See
- /// <seealso cref="<a href="#version">above</a>"/> </summary>
+ /// Construct a new <see cref="WhitespaceTokenizer"/> using a given
+ /// <see cref="AttributeSource.AttributeFactory"/>.
+ /// </summary>
+ /// <param name="matchVersion"><see cref="LuceneVersion"/> to match</param>
/// <param name="factory">
- /// the attribute factory to use for this <seealso cref="Tokenizer"/> </param>
+ /// the attribute factory to use for this <see cref="Tokenizer"/> </param>
/// <param name="in">
/// the input to split up into tokens </param>
public WhitespaceTokenizer(LuceneVersion matchVersion, AttributeFactory factory, TextReader @in)
@@ -67,7 +62,7 @@ namespace Lucene.Net.Analysis.Core
/// <summary>
/// Collects only characters which do not satisfy
- /// <seealso cref="Character#isWhitespace(int)"/>.
+ /// <see cref="char.IsWhitespace(char)"/>.
/// </summary>
protected override bool IsTokenChar(int c)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/695b714f/src/Lucene.Net.Analysis.Common/Analysis/Core/WhitespaceTokenizerFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Core/WhitespaceTokenizerFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Core/WhitespaceTokenizerFactory.cs
index 1198f48..645a6c9 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Core/WhitespaceTokenizerFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Core/WhitespaceTokenizerFactory.cs
@@ -23,19 +23,19 @@ namespace Lucene.Net.Analysis.Core
*/
/// <summary>
- /// Factory for <seealso cref="WhitespaceTokenizer"/>.
- /// <pre class="prettyprint">
+ /// Factory for <see cref="WhitespaceTokenizer"/>.
+ /// <code>
/// <fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100">
/// <analyzer>
/// <tokenizer class="solr.WhitespaceTokenizerFactory"/>
/// </analyzer>
- /// </fieldType></pre>
+ /// </fieldType>
+ /// </code>
/// </summary>
public class WhitespaceTokenizerFactory : TokenizerFactory
{
-
/// <summary>
- /// Creates a new WhitespaceTokenizerFactory </summary>
+ /// Creates a new <see cref="WhitespaceTokenizerFactory"/> </summary>
public WhitespaceTokenizerFactory(IDictionary<string, string> args)
: base(args)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/695b714f/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CodepointCountFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CodepointCountFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CodepointCountFilter.cs
index 983dc32..2b6f70b 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CodepointCountFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CodepointCountFilter.cs
@@ -64,7 +64,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
termAtt = AddAttribute<ICharTermAttribute>();
}
- protected internal override bool Accept()
+ protected override bool Accept()
{
int max32 = termAtt.Length;
int min32 = max32 >> 1;
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/695b714f/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeepWordFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeepWordFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeepWordFilter.cs
index 98b1c84..82ec1bc 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeepWordFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeepWordFilter.cs
@@ -58,7 +58,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
termAtt = AddAttribute<ICharTermAttribute>();
}
- protected internal override bool Accept()
+ protected override bool Accept()
{
return words.Contains(termAtt.Buffer, 0, termAtt.Length);
}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/695b714f/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LengthFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LengthFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LengthFilter.cs
index d82b34b..e02fd24 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LengthFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LengthFilter.cs
@@ -77,7 +77,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
this.termAtt = AddAttribute<ICharTermAttribute>();
}
- protected internal override bool Accept()
+ protected override bool Accept()
{
int len = termAtt.Length;
return (len >= min && len <= max);
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/695b714f/src/Lucene.Net.Analysis.Common/Analysis/Util/FilteringTokenFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/FilteringTokenFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/FilteringTokenFilter.cs
index c3c1f41..688c890 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Util/FilteringTokenFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/FilteringTokenFilter.cs
@@ -79,7 +79,7 @@ namespace Lucene.Net.Analysis.Util
/// <summary>
/// Override this method and return if the current input token should be returned by <seealso cref="#incrementToken"/>. </summary>
- protected internal abstract bool Accept();
+ protected abstract bool Accept();
public override sealed bool IncrementToken()
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/695b714f/src/Lucene.Net.Analysis.Common/Analysis/Util/MultiTermAwareComponent.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/MultiTermAwareComponent.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/MultiTermAwareComponent.cs
index 7ea6673..9f32238 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Util/MultiTermAwareComponent.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/MultiTermAwareComponent.cs
@@ -30,6 +30,6 @@
/// Returns an analysis component to handle analysis if multi-term queries.
/// The returned component must be a TokenizerFactory, TokenFilterFactory or CharFilterFactory.
/// </summary>
- AbstractAnalysisFactory MultiTermComponent { get; }
+ AbstractAnalysisFactory MultiTermComponent { get; } // LUCENENET TODO: Change to GetMultiTermComponent() ? Some implementations return new instance.
}
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/695b714f/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestTypeTokenFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestTypeTokenFilterFactory.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestTypeTokenFilterFactory.cs
index 5576495..5a7d81d 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestTypeTokenFilterFactory.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestTypeTokenFilterFactory.cs
@@ -31,7 +31,7 @@ namespace Lucene.Net.Analysis.Core
public virtual void TestInform()
{
TypeTokenFilterFactory factory = (TypeTokenFilterFactory)TokenFilterFactory("Type", "types", "stoptypes-1.txt", "enablePositionIncrements", "true");
- ISet<string> types = factory.StopTypes;
+ ICollection<string> types = factory.StopTypes;
assertTrue("types is null and it shouldn't be", types != null);
assertTrue("types Size: " + types.Count + " is not: " + 2, types.Count == 2);
assertTrue("enablePositionIncrements was set to true but not correctly parsed", factory.EnablePositionIncrements);
[3/6] lucenenet git commit: Lucene.Net.Analysis.Compound refactor:
member accessibility and documentation comments
Posted by ni...@apache.org.
Lucene.Net.Analysis.Compound refactor: member accessibility and documentation comments
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/6dc3ac1f
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/6dc3ac1f
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/6dc3ac1f
Branch: refs/heads/api-work
Commit: 6dc3ac1fad1053c2f65856ddd15243dd57e5045b
Parents: 487927c
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Fri Feb 3 01:12:23 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Fri Feb 3 01:13:43 2017 +0700
----------------------------------------------------------------------
.../Compound/CompoundWordTokenFilterBase.cs | 45 +++++----
.../DictionaryCompoundWordTokenFilter.cs | 25 +++--
.../DictionaryCompoundWordTokenFilterFactory.cs | 9 +-
.../Analysis/Compound/Hyphenation/ByteVector.cs | 2 +-
.../Analysis/Compound/Hyphenation/CharVector.cs | 9 +-
.../Analysis/Compound/Hyphenation/Hyphen.cs | 2 +-
.../Compound/Hyphenation/Hyphenation.cs | 9 +-
.../Compound/Hyphenation/HyphenationTree.cs | 66 +++++++------
.../Compound/Hyphenation/PatternConsumer.cs | 49 +++++-----
.../Compound/Hyphenation/PatternParser.cs | 97 +++++++++++++++-----
.../Compound/Hyphenation/TernaryTree.cs | 62 ++++++-------
.../HyphenationCompoundWordTokenFilter.cs | 36 ++++----
...HyphenationCompoundWordTokenFilterFactory.cs | 39 ++++----
13 files changed, 251 insertions(+), 199 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/6dc3ac1f/src/Lucene.Net.Analysis.Common/Analysis/Compound/CompoundWordTokenFilterBase.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/CompoundWordTokenFilterBase.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/CompoundWordTokenFilterBase.cs
index 705ce55..0d42753 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Compound/CompoundWordTokenFilterBase.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/CompoundWordTokenFilterBase.cs
@@ -26,18 +26,15 @@ namespace Lucene.Net.Analysis.Compound
/// <summary>
/// Base class for decomposition token filters.
- /// <para>
- ///
- /// <a name="version"></a>
- /// You must specify the required <seealso cref="LuceneVersion"/> compatibility when creating
- /// CompoundWordTokenFilterBase:
- /// <ul>
- /// <li>As of 3.1, CompoundWordTokenFilterBase correctly handles Unicode 4.0
- /// supplementary characters in strings and char arrays provided as compound word
- /// dictionaries.
- /// <li>As of 4.4, <seealso cref="CompoundWordTokenFilterBase"/> doesn't update offsets.
- /// </ul>
- /// </para>
+ /// <para/>
+ /// You must specify the required <see cref="LuceneVersion"/> compatibility when creating
+ /// <see cref="CompoundWordTokenFilterBase"/>:
+ /// <list type="bullet">
+ /// <item>As of 3.1, CompoundWordTokenFilterBase correctly handles Unicode 4.0
+ /// supplementary characters in strings and char arrays provided as compound word
+ /// dictionaries.</item>
+ /// <item>As of 4.4, <see cref="CompoundWordTokenFilterBase"/> doesn't update offsets.</item>
+ /// </list>
/// </summary>
public abstract class CompoundWordTokenFilterBase : TokenFilter
{
@@ -56,16 +53,16 @@ namespace Lucene.Net.Analysis.Compound
/// </summary>
public const int DEFAULT_MAX_SUBWORD_SIZE = 15;
- protected internal readonly LuceneVersion m_matchVersion;
- protected internal readonly CharArraySet m_dictionary;
- protected internal readonly LinkedList<CompoundToken> m_tokens;
- protected internal readonly int m_minWordSize;
- protected internal readonly int m_minSubwordSize;
- protected internal readonly int m_maxSubwordSize;
- protected internal readonly bool m_onlyLongestMatch;
+ protected readonly LuceneVersion m_matchVersion;
+ protected readonly CharArraySet m_dictionary;
+ protected readonly LinkedList<CompoundToken> m_tokens;
+ protected readonly int m_minWordSize;
+ protected readonly int m_minSubwordSize;
+ protected readonly int m_maxSubwordSize;
+ protected readonly bool m_onlyLongestMatch;
- protected internal readonly ICharTermAttribute m_termAtt;
- protected internal readonly IOffsetAttribute m_offsetAtt;
+ protected readonly ICharTermAttribute m_termAtt;
+ protected readonly IOffsetAttribute m_offsetAtt;
private readonly IPositionIncrementAttribute posIncAtt;
private AttributeSource.State current;
@@ -144,7 +141,7 @@ namespace Lucene.Net.Analysis.Compound
}
/// <summary>
- /// Decomposes the current <seealso cref="#termAtt"/> and places <seealso cref="CompoundToken"/> instances in the <seealso cref="#tokens"/> list.
+ /// Decomposes the current <see cref="m_termAtt"/> and places <see cref="CompoundToken"/> instances in the <see cref="m_tokens"/> list.
/// The original token may not be placed in the list, as it is automatically passed through this filter.
/// </summary>
protected abstract void Decompose();
@@ -159,7 +156,7 @@ namespace Lucene.Net.Analysis.Compound
/// <summary>
/// Helper class to hold decompounded token information
/// </summary>
- protected internal class CompoundToken
+ protected class CompoundToken
{
private readonly ICharSequence txt;
private readonly int startOffset, endOffset;
@@ -180,7 +177,7 @@ namespace Lucene.Net.Analysis.Compound
}
/// <summary>
- /// Construct the compound token based on a slice of the current <seealso cref="CompoundWordTokenFilterBase#termAtt"/>. </summary>
+ /// Construct the compound token based on a slice of the current <see cref="CompoundWordTokenFilterBase.m_termAtt"/>. </summary>
public CompoundToken(CompoundWordTokenFilterBase outerInstance, int offset, int length)
{
this.txt = outerInstance.m_termAtt.SubSequence(offset, offset + length);
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/6dc3ac1f/src/Lucene.Net.Analysis.Common/Analysis/Compound/DictionaryCompoundWordTokenFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/DictionaryCompoundWordTokenFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/DictionaryCompoundWordTokenFilter.cs
index 7221927..12ce070 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Compound/DictionaryCompoundWordTokenFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/DictionaryCompoundWordTokenFilter.cs
@@ -21,27 +21,26 @@ namespace Lucene.Net.Analysis.Compound
*/
/// <summary>
- /// A <seealso cref="TokenFilter"/> that decomposes compound words found in many Germanic languages.
+ /// A <see cref="TokenFilter"/> that decomposes compound words found in many Germanic languages.
/// <para>
/// "Donaudampfschiff" becomes Donau, dampf, schiff so that you can find
/// "Donaudampfschiff" even when you only enter "schiff".
/// It uses a brute-force algorithm to achieve this.
/// </para>
/// <para>
- /// You must specify the required <seealso cref="LuceneVersion"/> compatibility when creating
- /// CompoundWordTokenFilterBase:
- /// <ul>
- /// <li>As of 3.1, CompoundWordTokenFilterBase correctly handles Unicode 4.0
- /// supplementary characters in strings and char arrays provided as compound word
- /// dictionaries.
- /// </ul>
+ /// You must specify the required <see cref="LuceneVersion"/> compatibility when creating
+ /// <see cref="CompoundWordTokenFilterBase"/>:
+ /// <list type="bullet">
+ /// <item>As of 3.1, CompoundWordTokenFilterBase correctly handles Unicode 4.0
+ /// supplementary characters in strings and char arrays provided as compound word
+ /// dictionaries.</item>
+ /// </list>
/// </para>
/// </summary>
public class DictionaryCompoundWordTokenFilter : CompoundWordTokenFilterBase
{
-
/// <summary>
- /// Creates a new <seealso cref="DictionaryCompoundWordTokenFilter"/>
+ /// Creates a new <see cref="DictionaryCompoundWordTokenFilter"/>
/// </summary>
/// <param name="matchVersion">
/// Lucene version to enable correct Unicode 4.0 behavior in the
@@ -49,7 +48,7 @@ namespace Lucene.Net.Analysis.Compound
/// href="CompoundWordTokenFilterBase.html#version"
/// >CompoundWordTokenFilterBase</a> for details. </param>
/// <param name="input">
- /// the <seealso cref="TokenStream"/> to process </param>
+ /// the <see cref="TokenStream"/> to process </param>
/// <param name="dictionary">
/// the word dictionary to match against. </param>
public DictionaryCompoundWordTokenFilter(LuceneVersion matchVersion, TokenStream input, CharArraySet dictionary)
@@ -62,7 +61,7 @@ namespace Lucene.Net.Analysis.Compound
}
/// <summary>
- /// Creates a new <seealso cref="DictionaryCompoundWordTokenFilter"/>
+ /// Creates a new <see cref="DictionaryCompoundWordTokenFilter"/>
/// </summary>
/// <param name="matchVersion">
/// Lucene version to enable correct Unicode 4.0 behavior in the
@@ -70,7 +69,7 @@ namespace Lucene.Net.Analysis.Compound
/// href="CompoundWordTokenFilterBase.html#version"
/// >CompoundWordTokenFilterBase</a> for details. </param>
/// <param name="input">
- /// the <seealso cref="TokenStream"/> to process </param>
+ /// the <see cref="TokenStream"/> to process </param>
/// <param name="dictionary">
/// the word dictionary to match against. </param>
/// <param name="minWordSize">
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/6dc3ac1f/src/Lucene.Net.Analysis.Common/Analysis/Compound/DictionaryCompoundWordTokenFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/DictionaryCompoundWordTokenFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/DictionaryCompoundWordTokenFilterFactory.cs
index a44eb19..4dcb266 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Compound/DictionaryCompoundWordTokenFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/DictionaryCompoundWordTokenFilterFactory.cs
@@ -21,15 +21,16 @@ namespace Lucene.Net.Analysis.Compound
*/
/// <summary>
- /// Factory for <seealso cref="DictionaryCompoundWordTokenFilter"/>.
- /// <pre class="prettyprint">
+ /// Factory for <see cref="DictionaryCompoundWordTokenFilter"/>.
+ /// <code>
/// <fieldType name="text_dictcomp" class="solr.TextField" positionIncrementGap="100">
/// <analyzer>
/// <tokenizer class="solr.WhitespaceTokenizerFactory"/>
/// <filter class="solr.DictionaryCompoundWordTokenFilterFactory" dictionary="dictionary.txt"
/// minWordSize="5" minSubwordSize="2" maxSubwordSize="15" onlyLongestMatch="true"/>
/// </analyzer>
- /// </fieldType></pre>
+ /// </fieldType>
+ /// </code>
/// </summary>
public class DictionaryCompoundWordTokenFilterFactory : TokenFilterFactory, IResourceLoaderAware
{
@@ -41,7 +42,7 @@ namespace Lucene.Net.Analysis.Compound
private readonly bool onlyLongestMatch;
/// <summary>
- /// Creates a new DictionaryCompoundWordTokenFilterFactory </summary>
+ /// Creates a new <see cref="DictionaryCompoundWordTokenFilterFactory"/> </summary>
public DictionaryCompoundWordTokenFilterFactory(IDictionary<string, string> args)
: base(args)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/6dc3ac1f/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/ByteVector.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/ByteVector.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/ByteVector.cs
index 206a7c4..4df2e3a 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/ByteVector.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/ByteVector.cs
@@ -27,7 +27,6 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation
/// </summary>
public class ByteVector
{
-
/// <summary>
/// Capacity increment size
/// </summary>
@@ -122,6 +121,7 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation
get { return array.Length; }
}
+ // LUCENENET NOTE: Not needed (replaced with this[])
//public virtual void Put(int index, sbyte val)
//{
// array[index] = val;
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/6dc3ac1f/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/CharVector.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/CharVector.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/CharVector.cs
index c9f513b..2e67343 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/CharVector.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/CharVector.cs
@@ -99,7 +99,7 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation
public virtual object Clone()
{
- CharVector cv = new CharVector(array, blockSize);
+ CharVector cv = new CharVector((char[])array.Clone(), blockSize);
cv.n = this.n;
return cv;
}
@@ -117,8 +117,6 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation
/// <summary>
/// LUCENENET indexer for .NET
/// </summary>
- /// <param name="index"></param>
- /// <returns></returns>
public virtual char this[int index]
{
get { return array[index]; }
@@ -128,9 +126,9 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation
/// <summary>
/// return number of items in array
/// </summary>
- public virtual int Length()
+ public virtual int Length
{
- return n;
+ get { return n; }
}
/// <summary>
@@ -141,6 +139,7 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation
get { return array.Length; }
}
+ // LUCENENET NOTE: Not needed (replaced with this[])
//public virtual void Put(int index, char val)
//{
// array[index] = val;
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/6dc3ac1f/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/Hyphen.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/Hyphen.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/Hyphen.cs
index 08168a4..0fb57d9 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/Hyphen.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/Hyphen.cs
@@ -28,7 +28,7 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation
/// cases in some languages where words change spelling if they're split across
/// lines, like german's 'backen' which hyphenates 'bak-ken'. BTW, this comes
/// from TeX.
- ///
+ /// <para/>
/// This class has been taken from the Apache FOP project (http://xmlgraphics.apache.org/fop/). They have been slightly modified.
/// </summary>
public class Hyphen
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/6dc3ac1f/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/Hyphenation.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/Hyphenation.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/Hyphenation.cs
index ec33fd0..1cdb7d4 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/Hyphenation.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/Hyphenation.cs
@@ -22,29 +22,28 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation
/// <summary>
/// This class represents a hyphenated word.
- ///
+ /// <para/>
/// This class has been taken from the Apache FOP project (http://xmlgraphics.apache.org/fop/). They have been slightly modified.
/// </summary>
public class Hyphenation
{
-
private readonly int[] hyphenPoints;
/// <summary>
- /// rawWord as made of alternating strings and <seealso cref="Hyphen"/> instances
+ /// rawWord as made of alternating strings and <see cref="Hyphen"/> instances
/// </summary>
internal Hyphenation(int[] points)
{
hyphenPoints = points;
}
- /// <returns> the number of hyphenation points in the word </returns>
+ /// <summary> the number of hyphenation points in the word </summary>
public virtual int Length
{
get { return hyphenPoints.Length; }
}
- /// <returns> the hyphenation points </returns>
+ /// <summary> the hyphenation points </summary>
[WritableArray]
[SuppressMessage("Microsoft.Performance", "CA1819", Justification = "Lucene's design requires some writable array properties")]
public virtual int[] HyphenationPoints
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/6dc3ac1f/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/HyphenationTree.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/HyphenationTree.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/HyphenationTree.cs
index 7e1420d..c4dfe8b 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/HyphenationTree.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/HyphenationTree.cs
@@ -27,7 +27,7 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation
/// <summary>
/// This tree structure stores the hyphenation patterns in an efficient way for
/// fast lookup. It provides the provides the method to hyphenate a word.
- ///
+ /// <para/>
/// This class has been taken from the Apache FOP project (http://xmlgraphics.apache.org/fop/). They have been slightly modified.
/// </summary>
public class HyphenationTree : TernaryTree, IPatternConsumer
@@ -35,17 +35,17 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation
/// <summary>
/// value space: stores the interletter values
/// </summary>
- protected internal ByteVector m_vspace;
+ protected ByteVector m_vspace;
/// <summary>
/// This map stores hyphenation exceptions
/// </summary>
- protected internal IDictionary<string, IList<object>> m_stoplist;
+ protected IDictionary<string, IList<object>> m_stoplist;
/// <summary>
/// This map stores the character classes
/// </summary>
- protected internal TernaryTree m_classmap;
+ protected TernaryTree m_classmap;
/// <summary>
/// Temporary map to store interletter values on pattern loading.
@@ -71,7 +71,7 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation
/// <param name="values"> a string of digits from '0' to '9' representing the
/// interletter values. </param>
/// <returns> the index into the vspace array where the packed values are stored. </returns>
- protected internal virtual int PackValues(string values)
+ protected virtual int PackValues(string values)
{
int i, n = values.Length;
int m = (n & 1) == 1 ? (n >> 1) + 2 : (n >> 1) + 1;
@@ -94,7 +94,7 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation
return offset;
}
- protected internal virtual string UnpackValues(int k)
+ protected virtual string UnpackValues(int k)
{
StringBuilder buf = new StringBuilder();
byte v = m_vspace[k++];
@@ -128,6 +128,7 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation
/// Read hyphenation patterns from an XML file.
/// </summary>
/// <param name="f"> the filename </param>
+ /// <param name="encoding">The character encoding to use</param>
/// <exception cref="IOException"> In case the parsing fails </exception>
public virtual void LoadPatterns(string filename, Encoding encoding)
{
@@ -138,7 +139,7 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation
/// <summary>
/// Read hyphenation patterns from an XML file.
/// </summary>
- /// <param name="f"> the filename </param>
+ /// <param name="f"> a <see cref="FileInfo"/> object representing the file </param>
/// <exception cref="IOException"> In case the parsing fails </exception>
public virtual void LoadPatterns(FileInfo f)
{
@@ -148,7 +149,8 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation
/// <summary>
/// Read hyphenation patterns from an XML file.
/// </summary>
- /// <param name="f"> the filename </param>
+ /// <param name="f"> a <see cref="FileInfo"/> object representing the file </param>
+ /// <param name="encoding">The character encoding to use</param>
/// <exception cref="IOException"> In case the parsing fails </exception>
public virtual void LoadPatterns(FileInfo f, Encoding encoding)
{
@@ -159,7 +161,7 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation
/// <summary>
/// Read hyphenation patterns from an XML file.
/// </summary>
- /// <param name="source"> the InputSource for the file </param>
+ /// <param name="source"> <see cref="Stream"/> input source for the file </param>
/// <exception cref="IOException"> In case the parsing fails </exception>
public virtual void LoadPatterns(Stream source)
{
@@ -169,7 +171,8 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation
/// <summary>
/// Read hyphenation patterns from an XML file.
/// </summary>
- /// <param name="source"> the InputSource for the file </param>
+ /// <param name="source"> <see cref="Stream"/> input source for the file </param>
+ /// <param name="encoding">The character encoding to use</param>
/// <exception cref="IOException"> In case the parsing fails </exception>
public virtual void LoadPatterns(Stream source, Encoding encoding)
{
@@ -190,6 +193,11 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation
}
}
+ /// <summary>
+ /// Read hyphenation patterns from an <see cref="XmlReader"/>.
+ /// </summary>
+ /// <param name="source"> <see cref="XmlReader"/> input source for the file </param>
+ /// <exception cref="IOException"> In case the parsing fails </exception>
public virtual void LoadPatterns(XmlReader source)
{
PatternParser pp = new PatternParser(this);
@@ -220,7 +228,7 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation
/// <summary>
/// String compare, returns 0 if equal or t is a substring of s
/// </summary>
- protected internal virtual int HStrCmp(char[] s, int si, char[] t, int ti)
+ protected virtual int HStrCmp(char[] s, int si, char[] t, int ti)
{
for (; s[si] == t[ti]; si++, ti++)
{
@@ -236,7 +244,7 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation
return s[si] - t[ti];
}
- protected internal virtual byte[] GetValues(int k)
+ protected virtual byte[] GetValues(int k)
{
StringBuilder buf = new StringBuilder();
byte v = m_vspace[k++];
@@ -267,9 +275,10 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation
/// interletter values. In other words, it does something like:
/// </para>
/// <code>
- /// for(i=0; i<patterns.length; i++) {
- /// if ( word.substring(index).startsWidth(patterns[i]) )
- /// update_interletter_values(patterns[i]);
+ /// for (i=0; i<patterns.Length; i++)
+ /// {
+ /// if (word.Substring(index).StartsWith(patterns[i]))
+ /// update_interletter_values(patterns[i]);
/// }
/// </code>
/// <para>
@@ -286,7 +295,7 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation
/// <param name="word"> null terminated word to match </param>
/// <param name="index"> start index from word </param>
/// <param name="il"> interletter values array to update </param>
- protected internal virtual void SearchPatterns(char[] word, int index, byte[] il)
+ protected virtual void SearchPatterns(char[] word, int index, byte[] il)
{
byte[] values;
int i = index;
@@ -365,14 +374,14 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation
}
/// <summary>
- /// Hyphenate word and return a Hyphenation object.
+ /// Hyphenate word and return a <see cref="Hyphenation"/> object.
/// </summary>
/// <param name="word"> the word to be hyphenated </param>
/// <param name="remainCharCount"> Minimum number of characters allowed before the
/// hyphenation point. </param>
/// <param name="pushCharCount"> Minimum number of characters allowed after the
/// hyphenation point. </param>
- /// <returns> a <seealso cref="Hyphenation Hyphenation"/> object representing the
+ /// <returns> a <see cref="Hyphenation"/> object representing the
/// hyphenated word or null if word is not hyphenated. </returns>
public virtual Hyphenation Hyphenate(string word, int remainCharCount, int pushCharCount)
{
@@ -380,7 +389,12 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation
return Hyphenate(w, 0, w.Length, remainCharCount, pushCharCount);
}
+
+
/// <summary>
+ /// Hyphenate word and return an array of hyphenation points.
+ /// </summary>
+ /// <remarks>
/// w = "****nnllllllnnn*****", where n is a non-letter, l is a letter, all n
/// may be absent, the first n is at offset, the first l is at offset +
/// iIgnoreAtBeginning; word = ".llllll.'\0'***", where all l in w are copied
@@ -392,11 +406,7 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation
/// index(word) - 1 (see first loop). It follows that: index(w) - index(word) =
/// offset - 1 + iIgnoreAtBeginning index(w) = letterindex(word) + offset +
/// iIgnoreAtBeginning
- /// </summary>
-
- /// <summary>
- /// Hyphenate word and return an array of hyphenation points.
- /// </summary>
+ /// </remarks>
/// <param name="w"> char array that contains the word </param>
/// <param name="offset"> Offset to first character in word </param>
/// <param name="len"> Length of word </param>
@@ -404,7 +414,7 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation
/// hyphenation point. </param>
/// <param name="pushCharCount"> Minimum number of characters allowed after the
/// hyphenation point. </param>
- /// <returns> a <seealso cref="Hyphenation Hyphenation"/> object representing the
+ /// <returns> a <see cref="Hyphenation"/> object representing the
/// hyphenated word or null if word is not hyphenated. </returns>
public virtual Hyphenation Hyphenate(char[] w, int offset, int len, int remainCharCount, int pushCharCount)
{
@@ -522,7 +532,7 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation
/// <summary>
/// Add a character class to the tree. It is used by
- /// <seealso cref="PatternParser PatternParser"/> as callback to add character classes.
+ /// <see cref="PatternParser"/> as callback to add character classes.
/// Character classes define the valid word characters for hyphenation. If a
/// word contains a character not defined in any of the classes, it is not
/// hyphenated. It also defines a way to normalize the characters in order to
@@ -547,12 +557,12 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation
/// <summary>
/// Add an exception to the tree. It is used by
- /// <seealso cref="PatternParser PatternParser"/> class as callback to store the
+ /// <see cref="PatternParser"/> class as callback to store the
/// hyphenation exceptions.
/// </summary>
/// <param name="word"> normalized word </param>
/// <param name="hyphenatedword"> a vector of alternating strings and
- /// <seealso cref="Hyphen hyphen"/> objects. </param>
+ /// <see cref="Hyphen"/> objects. </param>
public virtual void AddException(string word, IList<object> hyphenatedword)
{
m_stoplist[word] = hyphenatedword;
@@ -560,7 +570,7 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation
/// <summary>
/// Add a pattern to the tree. Mainly, to be used by
- /// <seealso cref="PatternParser PatternParser"/> class as callback to add a pattern to
+ /// <see cref="PatternParser"/> class as callback to add a pattern to
/// the tree.
/// </summary>
/// <param name="pattern"> the hyphenation pattern </param>
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/6dc3ac1f/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/PatternConsumer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/PatternConsumer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/PatternConsumer.cs
index 4929892..1d92db5 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/PatternConsumer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/PatternConsumer.cs
@@ -22,33 +22,32 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation
/// <summary>
/// This interface is used to connect the XML pattern file parser to the
/// hyphenation tree.
- ///
- /// This class has been taken from the Apache FOP project (http://xmlgraphics.apache.org/fop/). They have been slightly modified.
+ /// <para/>
+ /// This interface has been taken from the Apache FOP project (http://xmlgraphics.apache.org/fop/). They have been slightly modified.
/// </summary>
public interface IPatternConsumer
- {
+ {
+ /// <summary>
+ /// Add a character class. A character class defines characters that are
+ /// considered equivalent for the purpose of hyphenation (e.g. "aA"). It
+ /// usually means to ignore case.
+ /// </summary>
+ /// <param name="chargroup"> character group </param>
+ void AddClass(string chargroup);
- /// <summary>
- /// Add a character class. A character class defines characters that are
- /// considered equivalent for the purpose of hyphenation (e.g. "aA"). It
- /// usually means to ignore case.
- /// </summary>
- /// <param name="chargroup"> character group </param>
- void AddClass(string chargroup);
+ /// <summary>
+ /// Add a hyphenation exception. An exception replaces the result obtained by
+ /// the algorithm for cases for which this fails or the user wants to provide
+ /// his own hyphenation. A hyphenatedword is a vector of alternating String's
+ /// and <see cref="Hyphen"/> instances
+ /// </summary>
+ void AddException(string word, IList<object> hyphenatedword);
- /// <summary>
- /// Add a hyphenation exception. An exception replaces the result obtained by
- /// the algorithm for cases for which this fails or the user wants to provide
- /// his own hyphenation. A hyphenatedword is a vector of alternating String's
- /// and <seealso cref="Hyphen"/> instances
- /// </summary>
- void AddException(string word, IList<object> hyphenatedword);
-
- /// <summary>
- /// Add hyphenation patterns.
- /// </summary>
- /// <param name="pattern"> the pattern </param>
- /// <param name="values"> interletter values expressed as a string of digit characters. </param>
- void AddPattern(string pattern, string values);
- }
+ /// <summary>
+ /// Add hyphenation patterns.
+ /// </summary>
+ /// <param name="pattern"> the pattern </param>
+ /// <param name="values"> interletter values expressed as a string of digit characters. </param>
+ void AddPattern(string pattern, string values);
+ }
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/6dc3ac1f/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/PatternParser.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/PatternParser.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/PatternParser.cs
index 8849cff..9b9f226 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/PatternParser.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/PatternParser.cs
@@ -27,7 +27,7 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation
/// <summary>
/// A XMLReader document handler to read and parse hyphenation patterns from a XML
/// file.
- ///
+ /// <para/>
/// LUCENENET: This class has been refactored from its Java counterpart to use XmlReader rather
/// than a SAX parser.
/// </summary>
@@ -59,7 +59,8 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation
hyphenChar = '-'; // default
}
- public PatternParser(IPatternConsumer consumer) : this()
+ public PatternParser(IPatternConsumer consumer)
+ : this()
{
this.consumer = consumer;
}
@@ -79,14 +80,26 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation
/// <summary>
/// Parses a hyphenation pattern file.
/// </summary>
- /// <param name="filename"> the filename </param>
+ /// <param name="path">The complete file path to be read.</param>
/// <exception cref="IOException"> In case of an exception while parsing </exception>
- public virtual void Parse(string filename)
+ public virtual void Parse(string path)
+ {
+ // LUCENENET TODO: Create overloads that allow XmlReaderSettings to be passed in.
+ Parse(path, Encoding.UTF8);
+ }
+
+ /// <summary>
+ /// Parses a hyphenation pattern file.
+ /// </summary>
+ /// <param name="path">The complete file path to be read.</param>
+ /// <param name="encoding">The character encoding to use</param>
+ /// <exception cref="IOException"> In case of an exception while parsing </exception>
+ public virtual void Parse(string path, Encoding encoding)
{
var xmlReaderSettings = GetXmlReaderSettings();
// LUCENENET TODO: Create overloads that allow XmlReaderSettings to be passed in.
- using (var src = XmlReader.Create(filename, xmlReaderSettings))
+ using (var src = XmlReader.Create(new StreamReader(path, encoding), xmlReaderSettings))
{
Parse(src);
}
@@ -95,7 +108,8 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation
/// <summary>
/// Parses a hyphenation pattern file.
/// </summary>
- /// <param name="file"> the pattern file </param>
+ /// <param name="file"> a <see cref="FileInfo"/> object representing the file </param>
+ /// <exception cref="IOException"> In case of an exception while parsing </exception>
public virtual void Parse(FileInfo file)
{
Parse(file, Encoding.UTF8);
@@ -104,7 +118,9 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation
/// <summary>
/// Parses a hyphenation pattern file.
/// </summary>
- /// <param name="file"> the pattern file </param>
+ /// <param name="file"> a <see cref="FileInfo"/> object representing the file </param>
+ /// <param name="encoding">The character encoding to use</param>
+ /// <exception cref="IOException"> In case of an exception while parsing </exception>
public virtual void Parse(FileInfo file, Encoding encoding)
{
var xmlReaderSettings = GetXmlReaderSettings();
@@ -118,7 +134,14 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation
/// <summary>
/// Parses a hyphenation pattern file.
/// </summary>
- /// <param name="file"> the pattern file </param>
+ /// <param name="xmlStream">
+ /// The stream containing the XML data.
+ /// <para/>
+ /// The <see cref="PatternParser"/> scans the first bytes of the stream looking for a byte order mark
+ /// or other sign of encoding. When encoding is determined, the encoding is used to continue reading
+ /// the stream, and processing continues parsing the input as a stream of (Unicode) characters.
+ /// </param>
+ /// <exception cref="IOException"> In case of an exception while parsing </exception>
public virtual void Parse(Stream xmlStream)
{
var xmlReaderSettings = GetXmlReaderSettings();
@@ -132,7 +155,7 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation
/// <summary>
/// Parses a hyphenation pattern file.
/// </summary>
- /// <param name="source"> the InputSource for the file </param>
+ /// <param name="source"> <see cref="XmlReader"/> input source for the file </param>
/// <exception cref="IOException"> In case of an exception while parsing </exception>
public virtual void Parse(XmlReader source)
{
@@ -209,7 +232,7 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation
return result;
}
- protected internal virtual string ReadToken(StringBuilder chars)
+ protected virtual string ReadToken(StringBuilder chars)
{
string word;
bool space = false;
@@ -266,7 +289,7 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation
return null;
}
- protected internal static string GetPattern(string word)
+ protected static string GetPattern(string word)
{
StringBuilder pat = new StringBuilder();
int len = word.Length;
@@ -280,7 +303,7 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation
return pat.ToString();
}
- protected internal virtual IList<object> NormalizeException<T1>(IList<T1> ex)
+ protected virtual IList<object> NormalizeException<T1>(IList<T1> ex)
{
List<object> res = new List<object>();
for (int i = 0; i < ex.Count; i++)
@@ -321,7 +344,7 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation
return res;
}
- protected internal virtual string GetExceptionWord<T1>(IList<T1> ex)
+ protected virtual string GetExceptionWord<T1>(IList<T1> ex)
{
StringBuilder res = new StringBuilder();
for (int i = 0; i < ex.Count; i++)
@@ -342,7 +365,7 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation
return res.ToString();
}
- protected internal static string GetInterletterValues(string pat)
+ protected static string GetInterletterValues(string pat)
{
StringBuilder il = new StringBuilder();
string word = pat + "a"; // add dummy letter to serve as sentinel
@@ -388,9 +411,19 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation
// ContentHandler methods
//
- /// <seealso cref= org.xml.sax.ContentHandler#startElement(java.lang.String,
- /// java.lang.String, java.lang.String, org.xml.sax.Attributes) </seealso>
- public void StartElement(string uri, string local, string raw, IDictionary<string, string> attrs)
+ /// <summary>
+ /// Receive notification of the beginning of an element.
+ /// <para/>
+ /// The Parser will invoke this method at the beginning of every element in the XML document;
+ /// there will be a corresponding <see cref="EndElement"/> event for every <see cref="StartElement"/> event
+ /// (even when the element is empty). All of the element's content will be reported,
+ /// in order, before the corresponding endElement event.
+ /// </summary>
+ /// <param name="uri">the Namespace URI, or the empty string if the element has no Namespace URI or if Namespace processing is not being performed</param>
+ /// <param name="local">the local name (without prefix), or the empty string if Namespace processing is not being performed</param>
+ /// <param name="raw"></param>
+ /// <param name="attrs"> the attributes attached to the element. If there are no attributes, it shall be an empty Attributes object. The value of this object after startElement returns is undefined</param>
+ public virtual void StartElement(string uri, string local, string raw, IDictionary<string, string> attrs)
{
if (local.Equals("hyphen-char"))
{
@@ -425,9 +458,17 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation
token.Length = 0;
}
- /// <seealso cref= org.xml.sax.ContentHandler#endElement(java.lang.String,
- /// java.lang.String, java.lang.String) </seealso>
- public void EndElement(string uri, string local, string raw)
+ /// <summary>
+ /// Receive notification of the end of an element.
+ /// <para/>
+ /// The parser will invoke this method at the end of every element in the XML document;
+ /// there will be a corresponding <see cref="StartElement"/> event for every
+ /// <see cref="EndElement"/> event (even when the element is empty).
+ /// </summary>
+ /// <param name="uri">the Namespace URI, or the empty string if the element has no Namespace URI or if Namespace processing is not being performed</param>
+ /// <param name="local">the local name (without prefix), or the empty string if Namespace processing is not being performed</param>
+ /// <param name="raw"></param>
+ public virtual void EndElement(string uri, string local, string raw)
{
if (token.Length > 0)
{
@@ -464,8 +505,20 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation
}
}
- /// <seealso cref= org.xml.sax.ContentHandler#characters(char[], int, int) </seealso>
- public void Characters(char[] ch, int start, int length)
+ /// <summary>
+ /// Receive notification of character data.
+ /// <para/>
+ /// The Parser will call this method to report each chunk of character data. Parsers may
+ /// return all contiguous character data in a single chunk, or they may split it into
+ /// several chunks; however, all of the characters in any single event must come from
+ /// the same external entity so that the Locator provides useful information.
+ /// <para/>
+ /// The application must not attempt to read from the array outside of the specified range.
+ /// </summary>
+ /// <param name="ch"></param>
+ /// <param name="start"></param>
+ /// <param name="length"></param>
+ public virtual void Characters(char[] ch, int start, int length)
{
StringBuilder chars = new StringBuilder(length);
chars.Append(ch, start, length);
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/6dc3ac1f/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/TernaryTree.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/TernaryTree.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/TernaryTree.cs
index 87e9d19..82feaec 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/TernaryTree.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/TernaryTree.cs
@@ -70,65 +70,63 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation
: ICloneable
#endif
{
- /// <summary>
- /// We use 4 arrays to represent a node. I guess I should have created a proper
- /// node class, but somehow Knuth's pascal code made me forget we now have a
- /// portable language with virtual memory management and automatic garbage
- /// collection! And now is kind of late, furthermore, if it ain't broken, don't
- /// fix it.
- /// </summary>
+ // We use 4 arrays to represent a node.I guess I should have created a proper
+ // node class, but somehow Knuth's pascal code made me forget we now have a
+ // portable language with virtual memory management and automatic garbage
+ // collection! And now is kind of late, furthermore, if it ain't broken, don't
+ // fix it.
/// <summary>
/// Pointer to low branch and to rest of the key when it is stored directly in
/// this node, we don't have unions in java!
/// </summary>
- protected internal char[] m_lo;
+ protected char[] m_lo;
/// <summary>
/// Pointer to high branch.
/// </summary>
- protected internal char[] m_hi;
+ protected char[] m_hi;
/// <summary>
/// Pointer to equal branch and to data when this node is a string terminator.
/// </summary>
- protected internal char[] m_eq;
+ protected char[] m_eq;
/// <summary>
- /// <P>
+ /// <para>
/// The character stored in this node: splitchar. Two special values are
/// reserved:
- /// </P>
- /// <ul>
- /// <li>0x0000 as string terminator</li>
- /// <li>0xFFFF to indicate that the branch starting at this node is compressed</li>
- /// </ul>
+ /// </para>
+ /// <list type="bullet">
+ /// <item>0x0000 as string terminator</item>
+ /// <item>0xFFFF to indicate that the branch starting at this node is compressed</item>
+ /// </list>
/// <para>
/// This shouldn't be a problem if we give the usual semantics to strings since
/// 0xFFFF is guaranteed not to be an Unicode character.
/// </para>
/// </summary>
- protected internal char[] m_sc;
+ protected char[] m_sc;
/// <summary>
/// This vector holds the trailing of the keys when the branch is compressed.
/// </summary>
- protected internal CharVector m_kv;
+ protected CharVector m_kv;
- protected internal char m_root;
+ protected char m_root;
- protected internal char m_freenode;
+ protected char m_freenode;
- protected internal int m_length; // number of items in tree
+ protected int m_length; // number of items in tree
- protected internal const int BLOCK_SIZE = 2048; // allocation size for arrays
+ protected const int BLOCK_SIZE = 2048; // allocation size for arrays
internal TernaryTree()
{
Init();
}
- protected internal virtual void Init()
+ protected virtual void Init()
{
m_root = (char)0;
m_freenode = (char)1;
@@ -408,7 +406,7 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation
get { return m_length; }
}
- public object Clone()
+ public virtual object Clone()
{
TernaryTree t = new TernaryTree();
t.m_lo = (char[])this.m_lo.Clone();
@@ -428,7 +426,7 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation
/// upper halves, and so on in order to get a balanced tree. The array of keys
/// is assumed to be sorted in ascending order.
/// </summary>
- protected internal virtual void InsertBalanced(string[] k, char[] v, int offset, int n)
+ protected virtual void InsertBalanced(string[] k, char[] v, int offset, int n)
{
int m;
if (n < 1)
@@ -555,7 +553,7 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation
/// </summary>
private string curkey;
- internal class Item
+ private class Item
#if FEATURE_CLONEABLE
: ICloneable
#endif
@@ -585,12 +583,12 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation
/// <summary>
/// Node stack
/// </summary>
- internal Stack<Item> ns;
+ private Stack<Item> ns;
/// <summary>
- /// key stack implemented with a StringBuilder
+ /// key stack implemented with a <see cref="StringBuilder"/>
/// </summary>
- internal StringBuilder ks;
+ private StringBuilder ks;
private bool isInitialized = false;
@@ -626,7 +624,7 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation
/// <summary>
/// traverse upwards
/// </summary>
- internal virtual int Up()
+ private int Up()
{
Item i = new Item();
int res = 0;
@@ -690,7 +688,7 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation
/// <summary>
/// traverse the tree to find next key
/// </summary>
- internal virtual int Run()
+ private int Run()
{
if (cur == -1)
{
@@ -794,7 +792,7 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation
@out.WriteLine("Number of keys = " + Convert.ToString(m_length));
@out.WriteLine("Node count = " + Convert.ToString(m_freenode));
// System.out.println("Array length = " + Integer.toString(eq.length));
- @out.WriteLine("Key Array length = " + Convert.ToString(m_kv.Length()));
+ @out.WriteLine("Key Array length = " + Convert.ToString(m_kv.Length));
/*
* for(int i=0; i<kv.length(); i++) if ( kv.get(i) != 0 )
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/6dc3ac1f/src/Lucene.Net.Analysis.Common/Analysis/Compound/HyphenationCompoundWordTokenFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/HyphenationCompoundWordTokenFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/HyphenationCompoundWordTokenFilter.cs
index 4608c01..83a1a46 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Compound/HyphenationCompoundWordTokenFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/HyphenationCompoundWordTokenFilter.cs
@@ -24,20 +24,20 @@ namespace Lucene.Net.Analysis.Compound
*/
/// <summary>
- /// A <seealso cref="TokenFilter"/> that decomposes compound words found in many Germanic languages.
+ /// A <see cref="TokenFilter"/> that decomposes compound words found in many Germanic languages.
/// <para>
/// "Donaudampfschiff" becomes Donau, dampf, schiff so that you can find
/// "Donaudampfschiff" even when you only enter "schiff". It uses a hyphenation
/// grammar and a word dictionary to achieve this.
/// </para>
/// <para>
- /// You must specify the required <seealso cref="LuceneVersion"/> compatibility when creating
- /// CompoundWordTokenFilterBase:
- /// <ul>
- /// <li>As of 3.1, CompoundWordTokenFilterBase correctly handles Unicode 4.0
- /// supplementary characters in strings and char arrays provided as compound word
- /// dictionaries.
- /// </ul>
+ /// You must specify the required <see cref="LuceneVersion"/> compatibility when creating
+ /// <see cref="CompoundWordTokenFilterBase"/>:
+ /// <list type="bullet">
+ /// <item>As of 3.1, CompoundWordTokenFilterBase correctly handles Unicode 4.0
+ /// supplementary characters in strings and char arrays provided as compound word
+ /// dictionaries.</item>
+ /// </list>
/// </para>
/// </summary>
public class HyphenationCompoundWordTokenFilter : CompoundWordTokenFilterBase
@@ -45,7 +45,7 @@ namespace Lucene.Net.Analysis.Compound
private readonly HyphenationTree hyphenator;
/// <summary>
- /// Creates a new <seealso cref="HyphenationCompoundWordTokenFilter"/> instance.
+ /// Creates a new <see cref="HyphenationCompoundWordTokenFilter"/> instance.
/// </summary>
/// <param name="matchVersion">
/// Lucene version to enable correct Unicode 4.0 behavior in the
@@ -66,7 +66,7 @@ namespace Lucene.Net.Analysis.Compound
}
/// <summary>
- /// Creates a new <seealso cref="HyphenationCompoundWordTokenFilter"/> instance.
+ /// Creates a new <see cref="HyphenationCompoundWordTokenFilter"/> instance.
/// </summary>
/// <param name="matchVersion">
/// Lucene version to enable correct Unicode 4.0 behavior in the
@@ -93,16 +93,13 @@ namespace Lucene.Net.Analysis.Compound
: base(matchVersion, input, dictionary, minWordSize, minSubwordSize, maxSubwordSize,
onlyLongestMatch)
{
-
this.hyphenator = hyphenator;
}
/// <summary>
- /// Create a HyphenationCompoundWordTokenFilter with no dictionary.
+ /// Create a <see cref="HyphenationCompoundWordTokenFilter"/> with no dictionary.
/// <para>
- /// Calls {@link #HyphenationCompoundWordTokenFilter(Version, TokenStream, HyphenationTree, CharArraySet, int, int, int, boolean)
- /// HyphenationCompoundWordTokenFilter(matchVersion, input, hyphenator,
- /// null, minWordSize, minSubwordSize, maxSubwordSize }
+ /// Calls <see cref="HyphenationCompoundWordTokenFilter.HyphenationCompoundWordTokenFilter(LuceneVersion, TokenStream, HyphenationTree, CharArraySet, int, int, int, bool)"/>
/// </para>
/// </summary>
public HyphenationCompoundWordTokenFilter(LuceneVersion matchVersion, TokenStream input,
@@ -114,11 +111,9 @@ namespace Lucene.Net.Analysis.Compound
}
/// <summary>
- /// Create a HyphenationCompoundWordTokenFilter with no dictionary.
+ /// Create a <see cref="HyphenationCompoundWordTokenFilter"/> with no dictionary.
/// <para>
- /// Calls {@link #HyphenationCompoundWordTokenFilter(Version, TokenStream, HyphenationTree, int, int, int)
- /// HyphenationCompoundWordTokenFilter(matchVersion, input, hyphenator,
- /// DEFAULT_MIN_WORD_SIZE, DEFAULT_MIN_SUBWORD_SIZE, DEFAULT_MAX_SUBWORD_SIZE }
+ /// Calls <see cref="HyphenationCompoundWordTokenFilter.HyphenationCompoundWordTokenFilter(LuceneVersion, TokenStream, HyphenationTree, int, int, int)"/>
/// </para>
/// </summary>
public HyphenationCompoundWordTokenFilter(LuceneVersion matchVersion, TokenStream input,
@@ -143,6 +138,7 @@ namespace Lucene.Net.Analysis.Compound
/// Create a hyphenator tree
/// </summary>
/// <param name="hyphenationFilename"> the filename of the XML grammar to load </param>
+ /// <param name="encoding">The character encoding to use</param>
/// <returns> An object representing the hyphenation patterns </returns>
/// <exception cref="IOException"> If there is a low-level I/O error. </exception>
public static HyphenationTree GetHyphenationTree(string hyphenationFilename, Encoding encoding)
@@ -165,6 +161,7 @@ namespace Lucene.Net.Analysis.Compound
/// Create a hyphenator tree
/// </summary>
/// <param name="hyphenationFile"> the file of the XML grammar to load </param>
+ /// <param name="encoding">The character encoding to use</param>
/// <returns> An object representing the hyphenation patterns </returns>
/// <exception cref="IOException"> If there is a low-level I/O error. </exception>
public static HyphenationTree GetHyphenationTree(FileInfo hyphenationFile, Encoding encoding)
@@ -187,6 +184,7 @@ namespace Lucene.Net.Analysis.Compound
/// Create a hyphenator tree
/// </summary>
/// <param name="hyphenationSource"> the InputSource pointing to the XML grammar </param>
+ /// <param name="encoding">The character encoding to use</param>
/// <returns> An object representing the hyphenation patterns </returns>
/// <exception cref="IOException"> If there is a low-level I/O error. </exception>
public static HyphenationTree GetHyphenationTree(Stream hyphenationSource, Encoding encoding)
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/6dc3ac1f/src/Lucene.Net.Analysis.Common/Analysis/Compound/HyphenationCompoundWordTokenFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/HyphenationCompoundWordTokenFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/HyphenationCompoundWordTokenFilterFactory.cs
index 5dfec4a..75a7917 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Compound/HyphenationCompoundWordTokenFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/HyphenationCompoundWordTokenFilterFactory.cs
@@ -25,34 +25,33 @@ namespace Lucene.Net.Analysis.Compound
*/
/// <summary>
- /// Factory for <seealso cref="HyphenationCompoundWordTokenFilter"/>.
- /// <para>
+ /// Factory for <see cref="HyphenationCompoundWordTokenFilter"/>.
+ /// <para/>
/// This factory accepts the following parameters:
- /// <ul>
- /// <li><code>hyphenator</code> (mandatory): path to the FOP xml hyphenation pattern.
- /// See <a href="http://offo.sourceforge.net/hyphenation/">http://offo.sourceforge.net/hyphenation/</a>.
- /// <li><code>encoding</code> (optional): encoding of the xml hyphenation file. defaults to UTF-8.
- /// <li><code>dictionary</code> (optional): dictionary of words. defaults to no dictionary.
- /// <li><code>minWordSize</code> (optional): minimal word length that gets decomposed. defaults to 5.
- /// <li><code>minSubwordSize</code> (optional): minimum length of subwords. defaults to 2.
- /// <li><code>maxSubwordSize</code> (optional): maximum length of subwords. defaults to 15.
- /// <li><code>onlyLongestMatch</code> (optional): if true, adds only the longest matching subword
- /// to the stream. defaults to false.
- /// </ul>
- /// </para>
+ /// <list type="bullet">
+ /// <item><code>hyphenator</code> (mandatory): path to the FOP xml hyphenation pattern.
+ /// See <a href="http://offo.sourceforge.net/hyphenation/">http://offo.sourceforge.net/hyphenation/</a>.</item>
+ /// <item><code>encoding</code> (optional): encoding of the xml hyphenation file. defaults to UTF-8.</item>
+ /// <item><code>dictionary</code> (optional): dictionary of words. defaults to no dictionary.</item>
+ /// <item><code>minWordSize</code> (optional): minimal word length that gets decomposed. defaults to 5.</item>
+ /// <item><code>minSubwordSize</code> (optional): minimum length of subwords. defaults to 2.</item>
+ /// <item><code>maxSubwordSize</code> (optional): maximum length of subwords. defaults to 15.</item>
+ /// <item><code>onlyLongestMatch</code> (optional): if true, adds only the longest matching subword
+ /// to the stream. defaults to false.</item>
+ /// </list>
/// <para>
- /// <pre class="prettyprint">
+ /// <code>
/// <fieldType name="text_hyphncomp" class="solr.TextField" positionIncrementGap="100">
/// <analyzer>
/// <tokenizer class="solr.WhitespaceTokenizerFactory"/>
/// <filter class="solr.HyphenationCompoundWordTokenFilterFactory" hyphenator="hyphenator.xml" encoding="UTF-8"
/// dictionary="dictionary.txt" minWordSize="5" minSubwordSize="2" maxSubwordSize="15" onlyLongestMatch="false"/>
/// </analyzer>
- /// </fieldType></pre>
- ///
+ /// </fieldType>
+ /// </code>
/// </para>
/// </summary>
- /// <seealso cref= HyphenationCompoundWordTokenFilter </seealso>
+ /// <seealso cref="HyphenationCompoundWordTokenFilter"/>
public class HyphenationCompoundWordTokenFilterFactory : TokenFilterFactory, IResourceLoaderAware
{
private CharArraySet dictionary;
@@ -66,13 +65,13 @@ namespace Lucene.Net.Analysis.Compound
private readonly bool onlyLongestMatch;
/// <summary>
- /// Creates a new HyphenationCompoundWordTokenFilterFactory </summary>
+ /// Creates a new <see cref="HyphenationCompoundWordTokenFilterFactory"/> </summary>
public HyphenationCompoundWordTokenFilterFactory(IDictionary<string, string> args) : base(args)
{
AssureMatchVersion();
dictFile = Get(args, "dictionary");
encoding = Get(args, "encoding");
- hypFile = Require(args, "hyphenator"); // LUCENENET TODO: Not sure what to do with this
+ hypFile = Require(args, "hyphenator");
minWordSize = GetInt(args, "minWordSize", CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE);
minSubwordSize = GetInt(args, "minSubwordSize", CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE);
maxSubwordSize = GetInt(args, "maxSubwordSize", CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE);
[2/6] lucenenet git commit: Lucene.Net.Analysis.Cn refactor: member
accessibility and documentation comments
Posted by ni...@apache.org.
Lucene.Net.Analysis.Cn refactor: member accessibility and documentation comments
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/3e97f31e
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/3e97f31e
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/3e97f31e
Branch: refs/heads/api-work
Commit: 3e97f31e190f7c3a7781a45e9807e609a1e06393
Parents: 0986545
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Thu Feb 2 23:22:53 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Fri Feb 3 01:13:42 2017 +0700
----------------------------------------------------------------------
.../Analysis/Cn/ChineseAnalyzer.cs | 23 +++++-----
.../Analysis/Cn/ChineseFilter.cs | 37 ++++++++--------
.../Analysis/Cn/ChineseFilterFactory.cs | 6 +--
.../Analysis/Cn/ChineseTokenizer.cs | 45 ++++++++++----------
.../Analysis/Cn/ChineseTokenizerFactory.cs | 8 ++--
5 files changed, 61 insertions(+), 58 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/3e97f31e/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseAnalyzer.cs
index 5dc0aa6..de0b5e7 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseAnalyzer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseAnalyzer.cs
@@ -21,21 +21,22 @@ namespace Lucene.Net.Analysis.Cn
*/
/// <summary>
- /// An <seealso cref="Analyzer"/> that tokenizes text with <seealso cref="ChineseTokenizer"/> and
- /// filters with <seealso cref="ChineseFilter"/> </summary>
- /// @deprecated (3.1) Use <seealso cref="StandardAnalyzer"/> instead, which has the same functionality.
+ /// An <see cref="Analyzer"/> that tokenizes text with <see cref="ChineseTokenizer"/> and
+ /// filters with <see cref="ChineseFilter"/>
+ /// </summary>
+ /// @deprecated (3.1) Use <see cref="Standard.StandardAnalyzer"/> instead, which has the same functionality.
/// This analyzer will be removed in Lucene 5.0
[Obsolete("(3.1) Use StandardAnalyzer instead, which has the same functionality.")]
public sealed class ChineseAnalyzer : Analyzer
- /// <summary>
- /// Creates
- /// <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
- /// used to tokenize all the text in the provided <seealso cref="Reader"/>.
- /// </summary>
- /// <returns> <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
- /// built from a <seealso cref="ChineseTokenizer"/> filtered with
- /// <seealso cref="ChineseFilter"/> </returns>
{
+ /// <summary>
+ /// Creates
+ /// <see cref="Analyzer.TokenStreamComponents"/>
+ /// used to tokenize all the text in the provided <see cref="TextReader"/>.
+ /// </summary>
+ /// <returns> <see cref="Analyzer.TokenStreamComponents"/>
+ /// built from a <see cref="ChineseTokenizer"/> filtered with
+ /// <see cref="ChineseFilter"/> </returns>
protected override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
{
Tokenizer source = new ChineseTokenizer(reader);
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/3e97f31e/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseFilter.cs
index 9b3b95a..61e6576 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseFilter.cs
@@ -25,28 +25,32 @@ namespace Lucene.Net.Analysis.Cn
*/
/// <summary>
- /// A <seealso cref="TokenFilter"/> with a stop word table.
- /// <ul>
- /// <li>Numeric tokens are removed.
- /// <li>English tokens must be larger than 1 character.
- /// <li>One Chinese character as one Chinese word.
- /// </ul>
+ /// A <see cref="TokenFilter"/> with a stop word table.
+ /// <list type="bullet">
+ /// <item>Numeric tokens are removed.</item>
+ /// <item>English tokens must be larger than 1 character.</item>
+ /// <item>One Chinese character as one Chinese word.</item>
+ /// </list>
/// TO DO:
- /// <ol>
- /// <li>Add Chinese stop words, such as \ue400
- /// <li>Dictionary based Chinese word extraction
- /// <li>Intelligent Chinese word extraction
- /// </ol>
+ /// <list type="number">
+ /// <item>Add Chinese stop words, such as \ue400</item>
+ /// <item>Dictionary based Chinese word extraction</item>
+ /// <item>Intelligent Chinese word extraction</item>
+ /// </list>
/// </summary>
- /// @deprecated (3.1) Use <seealso cref="StopFilter"/> instead, which has the same functionality.
+ /// @deprecated (3.1) Use <see cref="Core.StopFilter"/> instead, which has the same functionality.
/// This filter will be removed in Lucene 5.0
[Obsolete("(3.1) Use StopFilter instead, which has the same functionality.")]
public sealed class ChineseFilter : TokenFilter
{
-
// Only English now, Chinese to be added later.
- public static readonly string[] STOP_WORDS = new string[] { "and", "are", "as", "at", "be", "but", "by", "for", "if", "in", "into", "is", "it", "no", "not", "of", "on", "or", "such", "that", "the", "their", "then", "there", "these", "they", "this", "to", "was", "will", "with" };
-
+ public static readonly string[] STOP_WORDS = new string[] {
+ "and", "are", "as", "at", "be", "but", "by",
+ "for", "if", "in", "into", "is", "it",
+ "no", "not", "of", "on", "or", "such",
+ "that", "the", "their", "then", "there", "these",
+ "they", "this", "to", "was", "will", "with"
+ };
private CharArraySet stopTable;
@@ -55,13 +59,12 @@ namespace Lucene.Net.Analysis.Cn
public ChineseFilter(TokenStream @in)
: base(@in)
{
-
stopTable = new CharArraySet(LuceneVersion.LUCENE_CURRENT, Arrays.AsList(STOP_WORDS), false);
termAtt = AddAttribute<ICharTermAttribute>();
}
+
public override bool IncrementToken()
{
-
while (m_input.IncrementToken())
{
char[] text = termAtt.Buffer;
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/3e97f31e/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseFilterFactory.cs
index d3e30e5..98ddee9 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseFilterFactory.cs
@@ -22,14 +22,14 @@ namespace Lucene.Net.Analysis.Cn
*/
/// <summary>
- /// Factory for <seealso cref="ChineseFilter"/> </summary>
- /// @deprecated Use <seealso cref="StopFilterFactory"/> instead.
+ /// Factory for <see cref="ChineseFilter"/> </summary>
+ /// @deprecated Use <see cref="Core.StopFilterFactory"/> instead.
[Obsolete("Use StopFilterFactory instead.")]
public class ChineseFilterFactory : TokenFilterFactory
{
/// <summary>
- /// Creates a new ChineseFilterFactory </summary>
+ /// Creates a new <see cref="ChineseFilterFactory"/> </summary>
public ChineseFilterFactory(IDictionary<string, string> args) : base(args)
{
if (args.Count > 0)
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/3e97f31e/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseTokenizer.cs
index 4ae7ff8..eb500bb 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseTokenizer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseTokenizer.cs
@@ -26,27 +26,28 @@ namespace Lucene.Net.Analysis.Cn
/// Tokenize Chinese text as individual chinese characters.
///
/// <para>
- /// The difference between ChineseTokenizer and
- /// CJKTokenizer is that they have different
+ /// The difference between <see cref="ChineseTokenizer"/> and
+ /// <see cref="Cjk.CJKTokenizer"/> is that they have different
/// token parsing logic.
/// </para>
/// <para>
/// For example, if the Chinese text
/// "C1C2C3C4" is to be indexed:
- /// <ul>
- /// <li>The tokens returned from ChineseTokenizer are C1, C2, C3, C4.
- /// <li>The tokens returned from the CJKTokenizer are C1C2, C2C3, C3C4.
- /// </ul>
+ /// <list type="bullet">
+ /// <item>The tokens returned from ChineseTokenizer are C1, C2, C3, C4.</item>
+ /// <item>The tokens returned from the CJKTokenizer are C1C2, C2C3, C3C4.</item>
+ /// </list>
/// </para>
/// <para>
- /// Therefore the index created by CJKTokenizer is much larger.
+ /// Therefore the index created by <see cref="CJKTokenizer"/> is much larger.
/// </para>
/// <para>
/// The problem is that when searching for C1, C1C2, C1C3,
- /// C4C2, C1C2C3 ... the ChineseTokenizer works, but the
- /// CJKTokenizer will not work.
- /// </para> </summary>
- /// @deprecated (3.1) Use <seealso cref="StandardTokenizer"/> instead, which has the same functionality.
+ /// C4C2, C1C2C3 ... the <see cref="ChineseTokenizer"/> works, but the
+ /// <see cref="Cjk.CJKTokenizer"/> will not work.
+ /// </para>
+ /// </summary>
+ /// @deprecated (3.1) Use <see cref="Standard.StandardTokenizer"/> instead, which has the same functionality.
/// This filter will be removed in Lucene 5.0
[Obsolete("(3.1) Use StandardTokenizer instead, which has the same functionality.")]
public sealed class ChineseTokenizer : Tokenizer
@@ -82,9 +83,8 @@ namespace Lucene.Net.Analysis.Cn
private ICharTermAttribute termAtt;
private IOffsetAttribute offsetAtt;
- private void push(char c)
+ private void Push(char c)
{
-
if (length == 0) // start of token
{
start = offset - 1;
@@ -93,9 +93,8 @@ namespace Lucene.Net.Analysis.Cn
}
- private bool flush()
+ private bool Flush()
{
-
if (length > 0)
{
//System.out.println(new String(buffer, 0,
@@ -132,7 +131,7 @@ namespace Lucene.Net.Analysis.Cn
if (dataLen <= 0)
{
offset--;
- return flush();
+ return Flush();
}
else
{
@@ -145,10 +144,10 @@ namespace Lucene.Net.Analysis.Cn
case UnicodeCategory.DecimalDigitNumber:
case UnicodeCategory.LowercaseLetter:
case UnicodeCategory.UppercaseLetter:
- push(c);
+ Push(c);
if (length == MAX_WORD_LEN)
{
- return flush();
+ return Flush();
}
break;
@@ -157,22 +156,22 @@ namespace Lucene.Net.Analysis.Cn
{
bufferIndex--;
offset--;
- return flush();
+ return Flush();
}
- push(c);
- return flush();
+ Push(c);
+ return Flush();
default:
if (length > 0)
{
- return flush();
+ return Flush();
}
break;
}
}
}
- public override void End()
+ public override sealed void End()
{
base.End();
// set final offset
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/3e97f31e/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseTokenizerFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseTokenizerFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseTokenizerFactory.cs
index 2eef7be..b71906e 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseTokenizerFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseTokenizerFactory.cs
@@ -24,14 +24,14 @@ namespace Lucene.Net.Analysis.Cn
*/
/// <summary>
- /// Factory for <seealso cref="ChineseTokenizer"/> </summary>
- /// @deprecated Use <seealso cref="org.apache.lucene.analysis.standard.StandardTokenizerFactory"/> instead.
+ /// Factory for <see cref="ChineseTokenizer"/>
+ /// </summary>
+ /// @deprecated Use <see cref="Standard.StandardTokenizerFactory"/> instead.
[Obsolete("Use StandardTokenizerFactory instead.")]
public class ChineseTokenizerFactory : TokenizerFactory
{
-
/// <summary>
- /// Creates a new ChineseTokenizerFactory </summary>
+ /// Creates a new <see cref="ChineseTokenizerFactory"/> </summary>
public ChineseTokenizerFactory(IDictionary<string, string> args)
: base(args)
{
[6/6] lucenenet git commit: Lucene.Net.Analysis.Cz refactor: member
accessibility and documentation comments
Posted by ni...@apache.org.
Lucene.Net.Analysis.Cz refactor: member accessibility and documentation comments
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/816f0c9b
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/816f0c9b
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/816f0c9b
Branch: refs/heads/api-work
Commit: 816f0c9b4fa6e628f0d405e0c9fb5949e586551b
Parents: 695b714
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Fri Feb 3 02:53:37 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Fri Feb 3 02:54:25 2017 +0700
----------------------------------------------------------------------
.../Analysis/Cz/CzechAnalyzer.cs | 51 +++++++++-----------
.../Analysis/Cz/CzechStemFilter.cs | 10 ++--
.../Analysis/Cz/CzechStemFilterFactory.cs | 10 ++--
.../Analysis/Cz/CzechStemmer.cs | 11 ++---
4 files changed, 38 insertions(+), 44 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/816f0c9b/src/Lucene.Net.Analysis.Common/Analysis/Cz/CzechAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Cz/CzechAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Cz/CzechAnalyzer.cs
index 00add40..25b0b12 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Cz/CzechAnalyzer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Cz/CzechAnalyzer.cs
@@ -28,23 +28,21 @@ namespace Lucene.Net.Analysis.Cz
*/
/// <summary>
- /// <seealso cref="Analyzer"/> for Czech language.
+ /// <see cref="Analyzer"/> for Czech language.
/// <para>
/// Supports an external list of stopwords (words that will not be indexed at
/// all). A default set of stopwords is used unless an alternative list is
/// specified.
/// </para>
- ///
- /// <a name="version"/>
/// <para>
- /// You must specify the required <seealso cref="Version"/> compatibility when creating
- /// CzechAnalyzer:
- /// <ul>
- /// <li>As of 3.1, words are stemmed with <seealso cref="CzechStemFilter"/>
- /// <li>As of 2.9, StopFilter preserves position increments
- /// <li>As of 2.4, Tokens incorrectly identified as acronyms are corrected (see
- /// <a href="https://issues.apache.org/jira/browse/LUCENE-1068">LUCENE-1068</a>)
- /// </ul>
+ /// You must specify the required <see cref="LuceneVersion"/> compatibility when creating
+ /// <see cref="CzechAnalyzer"/>:
+ /// <list type="bullet">
+ /// <item>As of 3.1, words are stemmed with <see cref="CzechStemFilter"/></item>
+ /// <item>As of 2.9, StopFilter preserves position increments</item>
+ /// <item>As of 2.4, Tokens incorrectly identified as acronyms are corrected (see
+ /// <a href="https://issues.apache.org/jira/browse/LUCENE-1068">LUCENE-1068</a>)</item>
+ /// </list>
/// </para>
/// </summary>
public sealed class CzechAnalyzer : StopwordAnalyzerBase
@@ -94,10 +92,9 @@ namespace Lucene.Net.Analysis.Cz
private readonly CharArraySet stemExclusionTable;
/// <summary>
- /// Builds an analyzer with the default stop words (<seealso cref="#getDefaultStopSet()"/>).
+ /// Builds an analyzer with the default stop words (<see cref="DefaultStopSet"/>).
/// </summary>
- /// <param name="matchVersion"> Lucene version to match See
- /// <seealso cref="<a href="#version">above</a>"/> </param>
+ /// <param name="matchVersion"> <see cref="LuceneVersion"/> to match </param>
public CzechAnalyzer(LuceneVersion matchVersion)
: this(matchVersion, DefaultSetHolder.DEFAULT_SET)
{
@@ -106,8 +103,7 @@ namespace Lucene.Net.Analysis.Cz
/// <summary>
/// Builds an analyzer with the given stop words.
/// </summary>
- /// <param name="matchVersion"> Lucene version to match See
- /// <seealso cref="<a href="#version">above</a>"/> </param>
+ /// <param name="matchVersion"> <see cref="LuceneVersion"/> to match </param>
/// <param name="stopwords"> a stopword set </param>
public CzechAnalyzer(LuceneVersion matchVersion, CharArraySet stopwords)
: this(matchVersion, stopwords, CharArraySet.EMPTY_SET)
@@ -116,10 +112,9 @@ namespace Lucene.Net.Analysis.Cz
/// <summary>
/// Builds an analyzer with the given stop words and a set of work to be
- /// excluded from the <seealso cref="CzechStemFilter"/>.
+ /// excluded from the <see cref="CzechStemFilter"/>.
/// </summary>
- /// <param name="matchVersion"> Lucene version to match See
- /// <seealso cref="<a href="#version">above</a>"/> </param>
+ /// <param name="matchVersion"> <see cref="LuceneVersion"/> to match </param>
/// <param name="stopwords"> a stopword set </param>
/// <param name="stemExclusionTable"> a stemming exclusion set </param>
public CzechAnalyzer(LuceneVersion matchVersion, CharArraySet stopwords, CharArraySet stemExclusionTable)
@@ -130,17 +125,17 @@ namespace Lucene.Net.Analysis.Cz
/// <summary>
/// Creates
- /// <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
- /// used to tokenize all the text in the provided <seealso cref="Reader"/>.
+ /// <see cref="Analyzer.TokenStreamComponents"/>
+ /// used to tokenize all the text in the provided <see cref="TextReader"/>.
/// </summary>
- /// <returns> <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
- /// built from a <seealso cref="StandardTokenizer"/> filtered with
- /// <seealso cref="StandardFilter"/>, <seealso cref="LowerCaseFilter"/>, <seealso cref="StopFilter"/>
- /// , and <seealso cref="CzechStemFilter"/> (only if version is >= LUCENE_31). If
+ /// <returns> <see cref="Analyzer.TokenStreamComponents"/>
+ /// built from a <see cref="StandardTokenizer"/> filtered with
+ /// <see cref="StandardFilter"/>, <see cref="LowerCaseFilter"/>, <see cref="StopFilter"/>,
+ /// and <see cref="CzechStemFilter"/> (only if version is >= LUCENE_31). If
/// a version is >= LUCENE_31 and a stem exclusion set is provided via
- /// <seealso cref="#CzechAnalyzer(Version, CharArraySet, CharArraySet)"/> a
- /// <seealso cref="SetKeywordMarkerFilter"/> is added before
- /// <seealso cref="CzechStemFilter"/>. </returns>
+ /// <see cref="CzechAnalyzer(LuceneVersion, CharArraySet, CharArraySet)"/> a
+ /// <see cref="SetKeywordMarkerFilter"/> is added before
+ /// <see cref="CzechStemFilter"/>. </returns>
protected override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/816f0c9b/src/Lucene.Net.Analysis.Common/Analysis/Cz/CzechStemFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Cz/CzechStemFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Cz/CzechStemFilter.cs
index 4bd47e0..a3f77b6 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Cz/CzechStemFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Cz/CzechStemFilter.cs
@@ -20,15 +20,15 @@ namespace Lucene.Net.Analysis.Cz
*/
/// <summary>
- /// A <seealso cref="TokenFilter"/> that applies <seealso cref="CzechStemmer"/> to stem Czech words.
+ /// A <see cref="TokenFilter"/> that applies <see cref="CzechStemmer"/> to stem Czech words.
/// <para>
/// To prevent terms from being stemmed use an instance of
- /// <seealso cref="SetKeywordMarkerFilter"/> or a custom <seealso cref="TokenFilter"/> that sets
- /// the <seealso cref="KeywordAttribute"/> before this <seealso cref="TokenStream"/>.
+ /// <see cref="Miscellaneous.SetKeywordMarkerFilter"/> or a custom <see cref="TokenFilter"/> that sets
+ /// the <see cref="KeywordAttribute"/> before this <see cref="TokenStream"/>.
/// </para>
/// <para><b>NOTE</b>: Input is expected to be in lowercase,
/// but with diacritical marks</para> </summary>
- /// <seealso cref= SetKeywordMarkerFilter </seealso>
+ /// <seealso cref="Miscellaneous.SetKeywordMarkerFilter"/>
public sealed class CzechStemFilter : TokenFilter
{
private readonly CzechStemmer stemmer = new CzechStemmer();
@@ -36,7 +36,7 @@ namespace Lucene.Net.Analysis.Cz
private readonly IKeywordAttribute keywordAttr;
public CzechStemFilter(TokenStream input)
- : base(input)
+ : base(input)
{
termAtt = AddAttribute<ICharTermAttribute>();
keywordAttr = AddAttribute<IKeywordAttribute>();
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/816f0c9b/src/Lucene.Net.Analysis.Common/Analysis/Cz/CzechStemFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Cz/CzechStemFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Cz/CzechStemFilterFactory.cs
index f19b18f..3b3bb84 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Cz/CzechStemFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Cz/CzechStemFilterFactory.cs
@@ -21,21 +21,21 @@ namespace Lucene.Net.Analysis.Cz
*/
/// <summary>
- /// Factory for <seealso cref="CzechStemFilter"/>.
- /// <pre class="prettyprint">
+ /// Factory for <see cref="CzechStemFilter"/>.
+ /// <code>
/// <fieldType name="text_czstem" class="solr.TextField" positionIncrementGap="100">
/// <analyzer>
/// <tokenizer class="solr.StandardTokenizerFactory"/>
/// <filter class="solr.LowerCaseFilterFactory"/>
/// <filter class="solr.CzechStemFilterFactory"/>
/// </analyzer>
- /// </fieldType></pre>
+ /// </fieldType>
+ /// </code>
/// </summary>
public class CzechStemFilterFactory : TokenFilterFactory
{
-
/// <summary>
- /// Creates a new CzechStemFilterFactory </summary>
+ /// Creates a new <see cref="CzechStemFilterFactory"/> </summary>
public CzechStemFilterFactory(IDictionary<string, string> args) : base(args)
{
if (args.Count > 0)
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/816f0c9b/src/Lucene.Net.Analysis.Common/Analysis/Cz/CzechStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Cz/CzechStemmer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Cz/CzechStemmer.cs
index f623a87..d87b843 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Cz/CzechStemmer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Cz/CzechStemmer.cs
@@ -23,9 +23,9 @@ namespace Lucene.Net.Analysis.Cz
/// Light Stemmer for Czech.
/// <para>
/// Implements the algorithm described in:
- /// <i>
+ /// <c>
/// Indexing and stemming approaches for the Czech language
- /// </i>
+ /// </c>
/// http://portal.acm.org/citation.cfm?id=1598600
/// </para>
/// </summary>
@@ -33,13 +33,12 @@ namespace Lucene.Net.Analysis.Cz
{
/// <summary>
/// Stem an input buffer of Czech text.
+ /// <para><b>NOTE</b>: Input is expected to be in lowercase,
+ /// but with diacritical marks</para>
/// </summary>
/// <param name="s"> input buffer </param>
/// <param name="len"> length of input buffer </param>
- /// <returns> length of input buffer after normalization
- ///
- /// <para><b>NOTE</b>: Input is expected to be in lowercase,
- /// but with diacritical marks</para> </returns>
+ /// <returns> length of input buffer after normalization</returns>
public virtual int Stem(char[] s, int len)
{
len = RemoveCase(s, len);
[4/6] lucenenet git commit: Lucene.Net.Analysis.CommonGrams refactor:
member accessibility and documentation comments
Posted by ni...@apache.org.
Lucene.Net.Analysis.CommonGrams refactor: member accessibility and documentation comments
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/487927c0
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/487927c0
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/487927c0
Branch: refs/heads/api-work
Commit: 487927c003fd5d42e4b72d642278683ca0d31aec
Parents: 3e97f31
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Thu Feb 2 23:43:22 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Fri Feb 3 01:13:43 2017 +0700
----------------------------------------------------------------------
.../Analysis/CommonGrams/CommonGramsFilter.cs | 49 +++++++++------
.../CommonGrams/CommonGramsFilterFactory.cs | 9 +--
.../CommonGrams/CommonGramsQueryFilter.cs | 63 ++++++++++++--------
.../CommonGramsQueryFilterFactory.cs | 12 ++--
src/Lucene.Net.Core/Analysis/TokenFilter.cs | 19 ++++--
src/Lucene.Net.Core/Analysis/TokenStream.cs | 14 ++---
6 files changed, 99 insertions(+), 67 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/487927c0/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsFilter.cs
index fcfe42d..e7578be 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsFilter.cs
@@ -31,15 +31,14 @@ namespace Lucene.Net.Analysis.CommonGrams
/// <summary>
/// Construct bigrams for frequently occurring terms while indexing. Single terms
/// are still indexed too, with bigrams overlaid. This is achieved through the
- /// use of <seealso cref="PositionIncrementAttribute#setPositionIncrement(int)"/>. Bigrams have a type
- /// of <seealso cref="#GRAM_TYPE"/> Example:
- /// <ul>
- /// <li>input:"the quick brown fox"</li>
- /// <li>output:|"the","the-quick"|"brown"|"fox"|</li>
- /// <li>"the-quick" has a position increment of 0 so it is in the same position
- /// as "the" "the-quick" has a term.type() of "gram"</li>
- ///
- /// </ul>
+ /// use of <see cref="PositionIncrementAttribute.PositionIncrement"/>. Bigrams have a type
+ /// of <see cref="GRAM_TYPE"/> Example:
+ /// <list type="bullet">
+ /// <item>input:"the quick brown fox"</item>
+ /// <item>output:|"the","the-quick"|"brown"|"fox"|</item>
+ /// <item>"the-quick" has a position increment of 0 so it is in the same position
+ /// as "the" "the-quick" has a term.type() of "gram"</item>
+ /// </list>
/// </summary>
/*
@@ -47,7 +46,6 @@ namespace Lucene.Net.Analysis.CommonGrams
*/
public sealed class CommonGramsFilter : TokenFilter
{
-
public const string GRAM_TYPE = "gram";
private const char SEPARATOR = '_';
@@ -71,7 +69,7 @@ namespace Lucene.Net.Analysis.CommonGrams
/// bigrams with position increment 0 type=gram where one or both of the words
/// in a potential bigram are in the set of common words .
/// </summary>
- /// <param name="input"> TokenStream input in filter chain </param>
+ /// <param name="input"> <see cref="TokenStream"/> input in filter chain </param>
/// <param name="commonWords"> The set of common words. </param>
public CommonGramsFilter(LuceneVersion matchVersion, TokenStream input, CharArraySet commonWords)
: base(input)
@@ -89,11 +87,11 @@ namespace Lucene.Net.Analysis.CommonGrams
/// output the token. If the token and/or the following token are in the list
/// of common words also output a bigram with position increment 0 and
/// type="gram"
- ///
+ /// <para/>
/// TODO:Consider adding an option to not emit unigram stopwords
- /// as in CDL XTF BigramStopFilter, CommonGramsQueryFilter would need to be
+ /// as in CDL XTF BigramStopFilter, <see cref="CommonGramsQueryFilter"/> would need to be
/// changed to work with this.
- ///
+ /// <para/>
/// TODO: Consider optimizing for the case of three
/// commongrams i.e "man of the year" normally produces 3 bigrams: "man-of",
/// "of-the", "the-year" but with proper management of positions we could
@@ -119,7 +117,7 @@ namespace Lucene.Net.Analysis.CommonGrams
* When valid, the buffer always contains at least the separator.
* If its empty, there is nothing before this stopword.
*/
- if (lastWasCommon || (Common && buffer.Length > 0))
+ if (lastWasCommon || (IsCommon && buffer.Length > 0))
{
savedState = CaptureState();
GramToken();
@@ -131,8 +129,21 @@ namespace Lucene.Net.Analysis.CommonGrams
}
/// <summary>
- /// {@inheritDoc}
+ /// This method is called by a consumer before it begins consumption using
+ /// <see cref="IncrementToken()"/>.
+ /// <para/>
+ /// Resets this stream to a clean state. Stateful implementations must implement
+ /// this method so that they can be reused, just as if they had been created fresh.
+ /// <para/>
+ /// If you override this method, always call <c>base.Reset()</c>, otherwise
+ /// some internal state will not be correctly reset (e.g., <see cref="Tokenizer"/> will
+ /// throw <see cref="System.InvalidOperationException"/> on further usage).
/// </summary>
+ /// <remarks>
+ /// <b>NOTE:</b>
+ /// The default implementation chains the call to the input <see cref="TokenStream"/>, so
+ /// be sure to call <c>base.Reset()</c> when overriding this method.
+ /// </remarks>
public override void Reset()
{
base.Reset();
@@ -146,8 +157,8 @@ namespace Lucene.Net.Analysis.CommonGrams
/// <summary>
/// Determines if the current token is a common term
/// </summary>
- /// <returns> {@code true} if the current token is a common term, {@code false} otherwise </returns>
- private bool Common
+ /// <returns> <c>true</c> if the current token is a common term, <c>false</c> otherwise </returns>
+ private bool IsCommon
{
get
{
@@ -164,7 +175,7 @@ namespace Lucene.Net.Analysis.CommonGrams
buffer.Append(termAttribute.Buffer, 0, termAttribute.Length);
buffer.Append(SEPARATOR);
lastStartOffset = offsetAttribute.StartOffset;
- lastWasCommon = Common;
+ lastWasCommon = IsCommon;
}
/// <summary>
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/487927c0/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsFilterFactory.cs
index f63a71f..333ac68 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsFilterFactory.cs
@@ -23,14 +23,15 @@ namespace Lucene.Net.Analysis.CommonGrams
*/
/// <summary>
- /// Constructs a <seealso cref="CommonGramsFilter"/>.
- /// <pre class="prettyprint">
+ /// Constructs a <see cref="CommonGramsFilter"/>.
+ /// <code>
/// <fieldType name="text_cmmngrms" class="solr.TextField" positionIncrementGap="100">
/// <analyzer>
/// <tokenizer class="solr.WhitespaceTokenizerFactory"/>
/// <filter class="solr.CommonGramsFilterFactory" words="commongramsstopwords.txt" ignoreCase="false"/>
/// </analyzer>
- /// </fieldType></pre>
+ /// </fieldType>
+ /// </code>
/// </summary>
public class CommonGramsFilterFactory : TokenFilterFactory, IResourceLoaderAware
{
@@ -41,7 +42,7 @@ namespace Lucene.Net.Analysis.CommonGrams
private readonly bool ignoreCase;
/// <summary>
- /// Creates a new CommonGramsFilterFactory </summary>
+ /// Creates a new <see cref="CommonGramsFilterFactory"/> </summary>
public CommonGramsFilterFactory(IDictionary<string, string> args)
: base(args)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/487927c0/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsQueryFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsQueryFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsQueryFilter.cs
index 366621d..32039ca 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsQueryFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsQueryFilter.cs
@@ -18,28 +18,26 @@ namespace Lucene.Net.Analysis.CommonGrams
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
+
/// <summary>
- /// Wrap a CommonGramsFilter optimizing phrase queries by only returning single
+ /// Wrap a <see cref="CommonGramsFilter"/> optimizing phrase queries by only returning single
/// words when they are not a member of a bigram.
- ///
+ /// <para/>
/// Example:
- /// <ul>
- /// <li>query input to CommonGramsFilter: "the rain in spain falls mainly"
- /// <li>output of CommomGramsFilter/input to CommonGramsQueryFilter:
- /// |"the, "the-rain"|"rain" "rain-in"|"in, "in-spain"|"spain"|"falls"|"mainly"
- /// <li>output of CommonGramsQueryFilter:"the-rain", "rain-in" ,"in-spain",
- /// "falls", "mainly"
- /// </ul>
+ /// <list type="bullet">
+ /// <item>query input to CommonGramsFilter: "the rain in spain falls mainly"</item>
+ /// <item>output of CommomGramsFilter/input to CommonGramsQueryFilter:
+ /// |"the, "the-rain"|"rain" "rain-in"|"in, "in-spain"|"spain"|"falls"|"mainly"</item>
+ /// <item>output of CommonGramsQueryFilter:"the-rain", "rain-in" ,"in-spain",
+ /// "falls", "mainly"</item>
+ /// </list>
/// </summary>
-
- /*
- * See:http://hudson.zones.apache.org/hudson/job/Lucene-trunk/javadoc//all/org/apache/lucene/analysis/TokenStream.html and
- * http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/package.html?revision=718798
- */
+ /// <remarks>
+ /// See:http://hudson.zones.apache.org/hudson/job/Lucene-trunk/javadoc//all/org/apache/lucene/analysis/TokenStream.html and
+ /// http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/package.html?revision=718798
+ /// </remarks>
public sealed class CommonGramsQueryFilter : TokenFilter
{
-
private readonly ITypeAttribute typeAttribute;
private readonly IPositionIncrementAttribute posIncAttribute;
@@ -59,8 +57,21 @@ namespace Lucene.Net.Analysis.CommonGrams
}
/// <summary>
- /// {@inheritDoc}
+ /// This method is called by a consumer before it begins consumption using
+ /// <see cref="IncrementToken()"/>.
+ /// <para/>
+ /// Resets this stream to a clean state. Stateful implementations must implement
+ /// this method so that they can be reused, just as if they had been created fresh.
+ /// <para/>
+ /// If you override this method, always call <c>base.Reset()</c>, otherwise
+ /// some internal state will not be correctly reset (e.g., <see cref="Tokenizer"/> will
+ /// throw <see cref="InvalidOperationException"/> on further usage).
/// </summary>
+ /// <remarks>
+ /// <b>NOTE:</b>
+ /// The default implementation chains the call to the input <see cref="TokenStream"/>, so
+ /// be sure to call <c>base.Reset()</c> when overriding this method.
+ /// </remarks>
public override void Reset()
{
base.Reset();
@@ -72,10 +83,10 @@ namespace Lucene.Net.Analysis.CommonGrams
/// <summary>
/// Output bigrams whenever possible to optimize queries. Only output unigrams
/// when they are not a member of a bigram. Example:
- /// <ul>
- /// <li>input: "the rain in spain falls mainly"
- /// <li>output:"the-rain", "rain-in" ,"in-spain", "falls", "mainly"
- /// </ul>
+ /// <list type="bullet">
+ /// <item>input: "the rain in spain falls mainly"</item>
+ /// <item>output:"the-rain", "rain-in" ,"in-spain", "falls", "mainly"</item>
+ /// </list>
/// </summary>
public override bool IncrementToken()
{
@@ -83,13 +94,13 @@ namespace Lucene.Net.Analysis.CommonGrams
{
State current = CaptureState();
- if (previous != null && !GramType)
+ if (previous != null && !IsGramType)
{
RestoreState(previous);
previous = current;
previousType = typeAttribute.Type;
- if (GramType)
+ if (IsGramType)
{
posIncAttribute.PositionIncrement = 1;
}
@@ -109,7 +120,7 @@ namespace Lucene.Net.Analysis.CommonGrams
RestoreState(previous);
previous = null;
- if (GramType)
+ if (IsGramType)
{
posIncAttribute.PositionIncrement = 1;
}
@@ -121,8 +132,8 @@ namespace Lucene.Net.Analysis.CommonGrams
/// <summary>
/// Convenience method to check if the current type is a gram type
/// </summary>
- /// <returns> {@code true} if the current type is a gram type, {@code false} otherwise </returns>
- public bool GramType
+ /// <returns> <c>true</c> if the current type is a gram type, <c>false</c> otherwise </returns>
+ public bool IsGramType
{
get
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/487927c0/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsQueryFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsQueryFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsQueryFilterFactory.cs
index f797390..1e067e9 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsQueryFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsQueryFilterFactory.cs
@@ -21,28 +21,28 @@ namespace Lucene.Net.Analysis.CommonGrams
/// <summary>
- /// Construct <seealso cref="CommonGramsQueryFilter"/>.
+ /// Construct <see cref="CommonGramsQueryFilter"/>.
///
- /// <pre class="prettyprint">
+ /// <code>
/// <fieldType name="text_cmmngrmsqry" class="solr.TextField" positionIncrementGap="100">
/// <analyzer>
/// <tokenizer class="solr.WhitespaceTokenizerFactory"/>
/// <filter class="solr.CommonGramsQueryFilterFactory" words="commongramsquerystopwords.txt" ignoreCase="false"/>
/// </analyzer>
- /// </fieldType></pre>
+ /// </fieldType>
+ /// </code>
/// </summary>
public class CommonGramsQueryFilterFactory : CommonGramsFilterFactory
{
-
/// <summary>
- /// Creates a new CommonGramsQueryFilterFactory </summary>
+ /// Creates a new <see cref="CommonGramsQueryFilterFactory"/> </summary>
public CommonGramsQueryFilterFactory(IDictionary<string, string> args)
: base(args)
{
}
/// <summary>
- /// Create a CommonGramsFilter and wrap it with a CommonGramsQueryFilter
+ /// Create a <see cref="CommonGramsFilter"/> and wrap it with a <see cref="CommonGramsQueryFilter"/>
/// </summary>
public override TokenStream Create(TokenStream input)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/487927c0/src/Lucene.Net.Core/Analysis/TokenFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Analysis/TokenFilter.cs b/src/Lucene.Net.Core/Analysis/TokenFilter.cs
index d6a96fb..b082d6a 100644
--- a/src/Lucene.Net.Core/Analysis/TokenFilter.cs
+++ b/src/Lucene.Net.Core/Analysis/TokenFilter.cs
@@ -61,12 +61,21 @@ namespace Lucene.Net.Analysis
}
/// <summary>
- /// {@inheritDoc}
- /// <p>
- /// <b>NOTE:</b>
- /// The default implementation chains the call to the input TokenStream, so
- /// be sure to call <code>super.reset()</code> when overriding this method.
+ /// This method is called by a consumer before it begins consumption using
+ /// <see cref="IncrementToken()"/>.
+ /// <para/>
+ /// Resets this stream to a clean state. Stateful implementations must implement
+ /// this method so that they can be reused, just as if they had been created fresh.
+ /// <para/>
+ /// If you override this method, always call <c>base.Reset()</c>, otherwise
+ /// some internal state will not be correctly reset (e.g., <see cref="Tokenizer"/> will
+ /// throw <see cref="InvalidOperationException"/> on further usage).
/// </summary>
+ /// <remarks>
+ /// <b>NOTE:</b>
+ /// The default implementation chains the call to the input <see cref="TokenStream"/>, so
+ /// be sure to call <c>base.Reset()</c> when overriding this method.
+ /// </remarks>
public override void Reset()
{
m_input.Reset();
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/487927c0/src/Lucene.Net.Core/Analysis/TokenStream.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Analysis/TokenStream.cs b/src/Lucene.Net.Core/Analysis/TokenStream.cs
index 90bf3f2..1e104e9 100644
--- a/src/Lucene.Net.Core/Analysis/TokenStream.cs
+++ b/src/Lucene.Net.Core/Analysis/TokenStream.cs
@@ -183,15 +183,15 @@ namespace Lucene.Net.Analysis
}
/// <summary>
- /// this method is called by a consumer before it begins consumption using
- /// <seealso cref="#IncrementToken()"/>.
- /// <p>
+ /// This method is called by a consumer before it begins consumption using
+ /// <see cref="IncrementToken()"/>.
+ /// <para/>
/// Resets this stream to a clean state. Stateful implementations must implement
/// this method so that they can be reused, just as if they had been created fresh.
- /// <p>
- /// If you override this method, always call {@code super.reset()}, otherwise
- /// some internal state will not be correctly reset (e.g., <seealso cref="Tokenizer"/> will
- /// throw <seealso cref="IllegalStateException"/> on further usage).
+ /// <para/>
+ /// If you override this method, always call <c>base.Reset()</c>, otherwise
+ /// some internal state will not be correctly reset (e.g., <see cref="Tokenizer"/> will
+ /// throw <see cref="InvalidOperationException"/> on further usage).
/// </summary>
public virtual void Reset()
{