You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by ni...@apache.org on 2017/02/03 21:06:43 UTC
[8/8] lucenenet git commit: Lucene.Net.Analysis.Ru refactor: member
accessibility and documentation comments
Lucene.Net.Analysis.Ru refactor: member accessibility and documentation comments
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/f1631b14
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/f1631b14
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/f1631b14
Branch: refs/heads/api-work
Commit: f1631b143c06ef3f660f90f59ef2ad2a4a9cbb66
Parents: 3dae672
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Sat Feb 4 04:00:25 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Sat Feb 4 04:00:25 2017 +0700
----------------------------------------------------------------------
.../Analysis/Ru/RussianAnalyzer.cs | 26 ++++++++++++++------
.../Analysis/Ru/RussianLetterTokenizer.cs | 22 ++++++++---------
.../Ru/RussianLetterTokenizerFactory.cs | 7 +++---
.../Analysis/Ru/RussianLightStemFilter.cs | 2 +-
.../Ru/RussianLightStemFilterFactory.cs | 6 ++---
.../Analysis/Ru/RussianLightStemmer.cs | 3 +--
6 files changed, 37 insertions(+), 29 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/f1631b14/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianAnalyzer.cs
index e62f65b..1e26a34 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianAnalyzer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianAnalyzer.cs
@@ -35,13 +35,12 @@ namespace Lucene.Net.Analysis.Ru
/// will not be indexed at all).
/// A default set of stopwords is used unless an alternative list is specified.
/// </para>
- /// <a name="version"/>
/// <para>You must specify the required <see cref="LuceneVersion"/>
- /// compatibility when creating RussianAnalyzer:
- /// <ul>
- /// <li> As of 3.1, StandardTokenizer is used, Snowball stemming is done with
- /// SnowballFilter, and Snowball stopwords are used by default.
- /// </ul>
+ /// compatibility when creating <see cref="RussianAnalyzer"/>:
+ /// <list type="bullet">
+ /// <item> As of 3.1, <see cref="StandardTokenizer"/> is used, Snowball stemming is done with
+ /// <see cref="SnowballFilter"/>, and Snowball stopwords are used by default.</item>
+ /// </list>
/// </para>
/// </summary>
public sealed class RussianAnalyzer : StopwordAnalyzerBase
@@ -50,7 +49,18 @@ namespace Lucene.Net.Analysis.Ru
/// List of typical Russian stopwords. (for backwards compatibility) </summary>
/// @deprecated (3.1) Remove this for LUCENE 5.0
[Obsolete("(3.1) Remove this for LUCENE 5.0")]
- private static readonly string[] RUSSIAN_STOP_WORDS_30 = new string[] { "\u0430", "\u0431\u0435\u0437", "\u0431\u043e\u043b\u0435\u0435", "\u0431\u044b", "\u0431\u044b\u043b", "\u0431\u044b\u043b\u0430", "\u0431\u044b\u043b\u0438", "\u0431\u044b\u043b\u043e", "\u0431\u044b\u0442\u044c", "\u0432", "\u0432\u0430\u043c", "\u0432\u0430\u0441", "\u0432\u0435\u0441\u044c", "\u0432\u043e", "\u0432\u043e\u0442", "\u0432\u0441\u0435", "\u0432\u0441\u0435\u0433\u043e", "\u0432\u0441\u0435\u0445", "\u0432\u044b", "\u0433\u0434\u0435", "\u0434\u0430", "\u0434\u0430\u0436\u0435", "\u0434\u043b\u044f", "\u0434\u043e", "\u0435\u0433\u043e", "\u0435\u0435", "\u0435\u0439", "\u0435\u044e", "\u0435\u0441\u043b\u0438", "\u0435\u0441\u0442\u044c", "\u0435\u0449\u0435", "\u0436\u0435", "\u0437\u0430", "\u0437\u0434\u0435\u0441\u044c", "\u0438", "\u0438\u0437", "\u0438\u043b\u0438", "\u0438\u043c", "\u0438\u0445", "\u043a", "\u043a\u0430\u043a", "\u043a\u043e", "\u043a\u043e\u0433\u0434\u0430", "\u043a\u0442\u043e", "\u043b\u0438", "\u043b\u0438\u0431\u043e", "\u043c\u043d\u0435", "\u043c\u043e\u0436\u0435\u0442", "\u043c\u044b", "\u043d\u0430", "\u043d\u0430\u0434\u043e", "\u043d\u0430\u0448", "\u043d\u0435", "\u043d\u0435\u0433\u043e", "\u043d\u0435\u0435", "\u043d\u0435\u0442", "\u043d\u0438", "\u043d\u0438\u0445", "\u043d\u043e", "\u043d\u0443", "\u043e", "\u043e\u0431", "\u043e\u0434\u043d\u0430\u043a\u043e", "\u043e\u043d", "\u043e\u043d\u0430", "\u043e\u043d\u0438", "\u043e\u043d\u043e", "\u043e\u0442", "\u043e\u0447\u0435\u043d\u044c", "\u043f\u043e", "\u043f\u043e\u0434", "\u043f\u0440\u0438", "\u0441", "\u0441\u043e", "\u0442\u0430\u043a", "\u0442\u0430\u043a\u0436\u0435", "\u0442\u0430\u043a\u043e\u0439", "\u0442\u0430\u043c", "\u0442\u0435", "\u0442\u0435\u043c", "\u0442\u043e", "\u0442\u043e\u0433\u043e", "\u0442\u043e\u0436\u0435", "\u0442\u043e\u0439", "\u0442\u043e\u043b\u044c\u043a\u043e", "\u0442\u043e\u043c", "\u0442\u044b", "\u0443", "\u0443\u0436\u0435", "\u0445\u043e\u0442\u044f", "\u0447\u0435\u0433\u043e", "\u0447\u0435\u0439", "\u0447\u0435\u043c
", "\u0447\u0442\u043e", "\u0447\u0442\u043e\u0431\u044b", "\u0447\u044c\u0435", "\u0447\u044c\u044f", "\u044d\u0442\u0430", "\u044d\u0442\u0438", "\u044d\u0442\u043e", "\u044f" };
+ private static readonly string[] RUSSIAN_STOP_WORDS_30 = new string[] {
+ "\u0430", "\u0431\u0435\u0437", "\u0431\u043e\u043b\u0435\u0435", "\u0431\u044b", "\u0431\u044b\u043b", "\u0431\u044b\u043b\u0430", "\u0431\u044b\u043b\u0438", "\u0431\u044b\u043b\u043e", "\u0431\u044b\u0442\u044c", "\u0432",
+ "\u0432\u0430\u043c", "\u0432\u0430\u0441", "\u0432\u0435\u0441\u044c", "\u0432\u043e", "\u0432\u043e\u0442", "\u0432\u0441\u0435", "\u0432\u0441\u0435\u0433\u043e", "\u0432\u0441\u0435\u0445", "\u0432\u044b", "\u0433\u0434\u0435",
+ "\u0434\u0430", "\u0434\u0430\u0436\u0435", "\u0434\u043b\u044f", "\u0434\u043e", "\u0435\u0433\u043e", "\u0435\u0435", "\u0435\u0439", "\u0435\u044e", "\u0435\u0441\u043b\u0438", "\u0435\u0441\u0442\u044c",
+ "\u0435\u0449\u0435", "\u0436\u0435", "\u0437\u0430", "\u0437\u0434\u0435\u0441\u044c", "\u0438", "\u0438\u0437", "\u0438\u043b\u0438", "\u0438\u043c", "\u0438\u0445", "\u043a", "\u043a\u0430\u043a",
+ "\u043a\u043e", "\u043a\u043e\u0433\u0434\u0430", "\u043a\u0442\u043e", "\u043b\u0438", "\u043b\u0438\u0431\u043e", "\u043c\u043d\u0435", "\u043c\u043e\u0436\u0435\u0442", "\u043c\u044b", "\u043d\u0430", "\u043d\u0430\u0434\u043e",
+ "\u043d\u0430\u0448", "\u043d\u0435", "\u043d\u0435\u0433\u043e", "\u043d\u0435\u0435", "\u043d\u0435\u0442", "\u043d\u0438", "\u043d\u0438\u0445", "\u043d\u043e", "\u043d\u0443", "\u043e", "\u043e\u0431",
+ "\u043e\u0434\u043d\u0430\u043a\u043e", "\u043e\u043d", "\u043e\u043d\u0430", "\u043e\u043d\u0438", "\u043e\u043d\u043e", "\u043e\u0442", "\u043e\u0447\u0435\u043d\u044c", "\u043f\u043e", "\u043f\u043e\u0434", "\u043f\u0440\u0438",
+ "\u0441", "\u0441\u043e", "\u0442\u0430\u043a", "\u0442\u0430\u043a\u0436\u0435", "\u0442\u0430\u043a\u043e\u0439", "\u0442\u0430\u043c", "\u0442\u0435", "\u0442\u0435\u043c", "\u0442\u043e", "\u0442\u043e\u0433\u043e",
+ "\u0442\u043e\u0436\u0435", "\u0442\u043e\u0439", "\u0442\u043e\u043b\u044c\u043a\u043e", "\u0442\u043e\u043c", "\u0442\u044b", "\u0443", "\u0443\u0436\u0435", "\u0445\u043e\u0442\u044f", "\u0447\u0435\u0433\u043e", "\u0447\u0435\u0439",
+ "\u0447\u0435\u043c", "\u0447\u0442\u043e", "\u0447\u0442\u043e\u0431\u044b", "\u0447\u044c\u0435", "\u0447\u044c\u044f", "\u044d\u0442\u0430", "\u044d\u0442\u0438", "\u044d\u0442\u043e", "\u044f"
+ };
/// <summary>
/// File containing default Russian stopwords. </summary>
@@ -134,7 +144,7 @@ namespace Lucene.Net.Analysis.Ru
/// <summary>
/// Creates
/// <see cref="Analyzer.TokenStreamComponents"/>
- /// used to tokenize all the text in the provided <see cref="Reader"/>.
+ /// used to tokenize all the text in the provided <see cref="TextReader"/>.
/// </summary>
/// <returns> <see cref="Analyzer.TokenStreamComponents"/>
/// built from a <see cref="StandardTokenizer"/> filtered with
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/f1631b14/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianLetterTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianLetterTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianLetterTokenizer.cs
index 1ffa004..38b4180 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianLetterTokenizer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianLetterTokenizer.cs
@@ -24,7 +24,7 @@ namespace Lucene.Net.Analysis.Ru
*/
/// <summary>
- /// A RussianLetterTokenizer is a <see cref="Tokenizer"/> that extends <see cref="LetterTokenizer"/>
+ /// A <see cref="RussianLetterTokenizer"/> is a <see cref="Tokenizer"/> that extends <see cref="Core.LetterTokenizer"/>
/// by also allowing the basic Latin digits 0-9.
/// <para>
/// <a name="version"/>
@@ -32,12 +32,12 @@ namespace Lucene.Net.Analysis.Ru
/// <see cref="RussianLetterTokenizer"/>:
/// <ul>
/// <li>As of 3.1, <see cref="CharTokenizer"/> uses an int based API to normalize and
- /// detect token characters. See <see cref="CharTokenizer#isTokenChar(int)"/> and
- /// <see cref="CharTokenizer#normalize(int)"/> for details.</li>
+ /// detect token characters. See <see cref="CharTokenizer.IsTokenChar(int)"/> and
+ /// <see cref="CharTokenizer.Normalize(int)"/> for details.</li>
/// </ul>
/// </para>
/// </summary>
- /// @deprecated (3.1) Use <see cref="StandardTokenizer"/> instead, which has the same functionality.
+ /// @deprecated (3.1) Use <see cref="Standard.StandardTokenizer"/> instead, which has the same functionality.
/// This filter will be removed in Lucene 5.0
[Obsolete("(3.1) Use StandardTokenizer instead, which has the same functionality.")]
public class RussianLetterTokenizer : CharTokenizer
@@ -45,9 +45,10 @@ namespace Lucene.Net.Analysis.Ru
private const int DIGIT_0 = '0';
private const int DIGIT_9 = '9';
- /// Construct a new RussianLetterTokenizer. * <param name="matchVersion"> Lucene version
- /// to match See <see cref="<a href="#version">above</a>"/>
- /// </param>
+ /// <summary>
+ /// Construct a new <see cref="RussianLetterTokenizer"/>.
+ /// </summary>
+ /// <param name="matchVersion"> lucene compatibility version </param>
/// <param name="in">
/// the input to split up into tokens </param>
public RussianLetterTokenizer(LuceneVersion matchVersion, TextReader @in)
@@ -57,10 +58,9 @@ namespace Lucene.Net.Analysis.Ru
/// <summary>
/// Construct a new RussianLetterTokenizer using a given
- /// <see cref="org.apache.lucene.util.AttributeSource.AttributeFactory"/>. * @param
- /// matchVersion Lucene version to match See
- /// <see cref="<a href="#version">above</a>"/>
+ /// <see cref="AttributeSource.AttributeFactory"/>.
/// </summary>
+ /// <param name="matchVersion"> lucene compatibility version </param>
/// <param name="factory">
/// the attribute factory to use for this <see cref="Tokenizer"/> </param>
/// <param name="in">
@@ -72,7 +72,7 @@ namespace Lucene.Net.Analysis.Ru
/// <summary>
/// Collects only characters which satisfy
- /// <see cref="Character#isLetter(int)"/>.
+ /// <see cref="Character.IsLetter(int)"/>.
/// </summary>
protected override bool IsTokenChar(int c)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/f1631b14/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianLetterTokenizerFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianLetterTokenizerFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianLetterTokenizerFactory.cs
index 16b09c2..d3ef34b 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianLetterTokenizerFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianLetterTokenizerFactory.cs
@@ -23,14 +23,13 @@ namespace Lucene.Net.Analysis.Ru
* limitations under the License.
*/
- /// @deprecated Use <see cref="org.apache.lucene.analysis.standard.StandardTokenizerFactory"/> instead.
+ /// @deprecated Use <see cref="Standard.StandardTokenizerFactory"/> instead.
/// This tokenizer has no Russian-specific functionality.
- [Obsolete("Use StandardTokenizerFactory instead.")]
+ [Obsolete("Use Standard.StandardTokenizerFactory instead.")]
public class RussianLetterTokenizerFactory : TokenizerFactory
{
-
/// <summary>
- /// Creates a new RussianLetterTokenizerFactory </summary>
+ /// Creates a new <see cref="RussianLetterTokenizerFactory"/> </summary>
public RussianLetterTokenizerFactory(IDictionary<string, string> args) : base(args)
{
AssureMatchVersion();
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/f1631b14/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianLightStemFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianLightStemFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianLightStemFilter.cs
index 66a1599..0d9a56d 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianLightStemFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianLightStemFilter.cs
@@ -24,7 +24,7 @@ namespace Lucene.Net.Analysis.Ru
/// words.
/// <para>
/// To prevent terms from being stemmed use an instance of
- /// <see cref="SetKeywordMarkerFilter"/> or a custom <see cref="TokenFilter"/> that sets
+ /// <see cref="Miscellaneous.SetKeywordMarkerFilter"/> or a custom <see cref="TokenFilter"/> that sets
/// the <see cref="KeywordAttribute"/> before this <see cref="TokenStream"/>.
/// </para>
/// </summary>
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/f1631b14/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianLightStemFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianLightStemFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianLightStemFilterFactory.cs
index 8073c78..2b3496b 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianLightStemFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianLightStemFilterFactory.cs
@@ -33,10 +33,10 @@ namespace Lucene.Net.Analysis.Ru
/// </summary>
public class RussianLightStemFilterFactory : TokenFilterFactory
{
-
/// <summary>
- /// Creates a new RussianLightStemFilterFactory </summary>
- public RussianLightStemFilterFactory(IDictionary<string, string> args) : base(args)
+ /// Creates a new <see cref="RussianLightStemFilterFactory"/> </summary>
+ public RussianLightStemFilterFactory(IDictionary<string, string> args)
+ : base(args)
{
if (args.Count > 0)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/f1631b14/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianLightStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianLightStemmer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianLightStemmer.cs
index 9634983..1a66691 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianLightStemmer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianLightStemmer.cs
@@ -58,13 +58,12 @@ namespace Lucene.Net.Analysis.Ru
/// Light Stemmer for Russian.
/// <para>
/// This stemmer implements the following algorithm:
- /// <i>Indexing and Searching Strategies for the Russian Language.</i>
+ /// <c>Indexing and Searching Strategies for the Russian Language.</c>
/// Ljiljana Dolamic and Jacques Savoy.
/// </para>
/// </summary>
public class RussianLightStemmer
{
-
public virtual int Stem(char[] s, int len)
{
len = RemoveCase(s, len);