You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by ni...@apache.org on 2017/02/03 21:06:43 UTC

[8/8] lucenenet git commit: Lucene.Net.Analysis.Ru refactor: member accessibility and documentation comments

Lucene.Net.Analysis.Ru refactor: member accessibility and documentation comments


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/f1631b14
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/f1631b14
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/f1631b14

Branch: refs/heads/api-work
Commit: f1631b143c06ef3f660f90f59ef2ad2a4a9cbb66
Parents: 3dae672
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Sat Feb 4 04:00:25 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Sat Feb 4 04:00:25 2017 +0700

----------------------------------------------------------------------
 .../Analysis/Ru/RussianAnalyzer.cs              | 26 ++++++++++++++------
 .../Analysis/Ru/RussianLetterTokenizer.cs       | 22 ++++++++---------
 .../Ru/RussianLetterTokenizerFactory.cs         |  7 +++---
 .../Analysis/Ru/RussianLightStemFilter.cs       |  2 +-
 .../Ru/RussianLightStemFilterFactory.cs         |  6 ++---
 .../Analysis/Ru/RussianLightStemmer.cs          |  3 +--
 6 files changed, 37 insertions(+), 29 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/f1631b14/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianAnalyzer.cs
index e62f65b..1e26a34 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianAnalyzer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianAnalyzer.cs
@@ -35,13 +35,12 @@ namespace Lucene.Net.Analysis.Ru
     /// will not be indexed at all).
     /// A default set of stopwords is used unless an alternative list is specified.
     /// </para>
-    /// <a name="version"/>
     /// <para>You must specify the required <see cref="LuceneVersion"/>
-    /// compatibility when creating RussianAnalyzer:
-    /// <ul>
-    ///   <li> As of 3.1, StandardTokenizer is used, Snowball stemming is done with
-    ///        SnowballFilter, and Snowball stopwords are used by default.
-    /// </ul>
+    /// compatibility when creating <see cref="RussianAnalyzer"/>:
+    /// <list type="bullet">
+    ///     <item> As of 3.1, <see cref="StandardTokenizer"/> is used, Snowball stemming is done with
+    ///        <see cref="SnowballFilter"/>, and Snowball stopwords are used by default.</item>
+    /// </list>
     /// </para>
     /// </summary>
     public sealed class RussianAnalyzer : StopwordAnalyzerBase
@@ -50,7 +49,18 @@ namespace Lucene.Net.Analysis.Ru
         /// List of typical Russian stopwords. (for backwards compatibility) </summary>
         /// @deprecated (3.1) Remove this for LUCENE 5.0 
         [Obsolete("(3.1) Remove this for LUCENE 5.0")]
-        private static readonly string[] RUSSIAN_STOP_WORDS_30 = new string[] { "\u0430", "\u0431\u0435\u0437", "\u0431\u043e\u043b\u0435\u0435", "\u0431\u044b", "\u0431\u044b\u043b", "\u0431\u044b\u043b\u0430", "\u0431\u044b\u043b\u0438", "\u0431\u044b\u043b\u043e", "\u0431\u044b\u0442\u044c", "\u0432", "\u0432\u0430\u043c", "\u0432\u0430\u0441", "\u0432\u0435\u0441\u044c", "\u0432\u043e", "\u0432\u043e\u0442", "\u0432\u0441\u0435", "\u0432\u0441\u0435\u0433\u043e", "\u0432\u0441\u0435\u0445", "\u0432\u044b", "\u0433\u0434\u0435", "\u0434\u0430", "\u0434\u0430\u0436\u0435", "\u0434\u043b\u044f", "\u0434\u043e", "\u0435\u0433\u043e", "\u0435\u0435", "\u0435\u0439", "\u0435\u044e", "\u0435\u0441\u043b\u0438", "\u0435\u0441\u0442\u044c", "\u0435\u0449\u0435", "\u0436\u0435", "\u0437\u0430", "\u0437\u0434\u0435\u0441\u044c", "\u0438", "\u0438\u0437", "\u0438\u043b\u0438", "\u0438\u043c", "\u0438\u0445", "\u043a", "\u043a\u0430\u043a", "\u043a\u043e", "\u043a\u043e\u0433\u0434\u0430", "\u043a\u0442\u043e", "\u043b\u0438", "\u043b\u0438\u0431\u043e", "\u043c\u043d\u0435", "\u043c\u043e\u0436\u0435\u0442", "\u043c\u044b", "\u043d\u0430", "\u043d\u0430\u0434\u043e", "\u043d\u0430\u0448", "\u043d\u0435", "\u043d\u0435\u0433\u043e", "\u043d\u0435\u0435", "\u043d\u0435\u0442", "\u043d\u0438", "\u043d\u0438\u0445", "\u043d\u043e", "\u043d\u0443", "\u043e", "\u043e\u0431", "\u043e\u0434\u043d\u0430\u043a\u043e", "\u043e\u043d", "\u043e\u043d\u0430", "\u043e\u043d\u0438", "\u043e\u043d\u043e", "\u043e\u0442", "\u043e\u0447\u0435\u043d\u044c", "\u043f\u043e", "\u043f\u043e\u0434", "\u043f\u0440\u0438", "\u0441", "\u0441\u043e", "\u0442\u0430\u043a", "\u0442\u0430\u043a\u0436\u0435", "\u0442\u0430\u043a\u043e\u0439", "\u0442\u0430\u043c", "\u0442\u0435", "\u0442\u0435\u043c", "\u0442\u043e", "\u0442\u043e\u0433\u043e", "\u0442\u043e\u0436\u0435", "\u0442\u043e\u0439", "\u0442\u043e\u043b\u044c\u043a\u043e", "\u0442\u043e\u043c", "\u0442\u044b", "\u0443", "\u0443\u0436\u0435", "\u0445\u043e\u0442\u044f", "\u0447\u0435\u0433\u043e", "\u0447\u0435\u0439", "\u0447\u0435\u043c
 ", "\u0447\u0442\u043e", "\u0447\u0442\u043e\u0431\u044b", "\u0447\u044c\u0435", "\u0447\u044c\u044f", "\u044d\u0442\u0430", "\u044d\u0442\u0438", "\u044d\u0442\u043e", "\u044f" };
+        private static readonly string[] RUSSIAN_STOP_WORDS_30 = new string[] {
+            "\u0430", "\u0431\u0435\u0437", "\u0431\u043e\u043b\u0435\u0435", "\u0431\u044b", "\u0431\u044b\u043b", "\u0431\u044b\u043b\u0430", "\u0431\u044b\u043b\u0438", "\u0431\u044b\u043b\u043e", "\u0431\u044b\u0442\u044c", "\u0432",
+            "\u0432\u0430\u043c", "\u0432\u0430\u0441", "\u0432\u0435\u0441\u044c", "\u0432\u043e", "\u0432\u043e\u0442", "\u0432\u0441\u0435", "\u0432\u0441\u0435\u0433\u043e", "\u0432\u0441\u0435\u0445", "\u0432\u044b", "\u0433\u0434\u0435",
+            "\u0434\u0430", "\u0434\u0430\u0436\u0435", "\u0434\u043b\u044f", "\u0434\u043e", "\u0435\u0433\u043e", "\u0435\u0435", "\u0435\u0439", "\u0435\u044e", "\u0435\u0441\u043b\u0438", "\u0435\u0441\u0442\u044c",
+            "\u0435\u0449\u0435", "\u0436\u0435", "\u0437\u0430", "\u0437\u0434\u0435\u0441\u044c", "\u0438", "\u0438\u0437", "\u0438\u043b\u0438", "\u0438\u043c", "\u0438\u0445", "\u043a", "\u043a\u0430\u043a",
+            "\u043a\u043e", "\u043a\u043e\u0433\u0434\u0430", "\u043a\u0442\u043e", "\u043b\u0438", "\u043b\u0438\u0431\u043e", "\u043c\u043d\u0435", "\u043c\u043e\u0436\u0435\u0442", "\u043c\u044b", "\u043d\u0430", "\u043d\u0430\u0434\u043e",
+            "\u043d\u0430\u0448", "\u043d\u0435", "\u043d\u0435\u0433\u043e", "\u043d\u0435\u0435", "\u043d\u0435\u0442", "\u043d\u0438", "\u043d\u0438\u0445", "\u043d\u043e", "\u043d\u0443", "\u043e", "\u043e\u0431",
+            "\u043e\u0434\u043d\u0430\u043a\u043e", "\u043e\u043d", "\u043e\u043d\u0430", "\u043e\u043d\u0438", "\u043e\u043d\u043e", "\u043e\u0442", "\u043e\u0447\u0435\u043d\u044c", "\u043f\u043e", "\u043f\u043e\u0434", "\u043f\u0440\u0438",
+            "\u0441", "\u0441\u043e", "\u0442\u0430\u043a", "\u0442\u0430\u043a\u0436\u0435", "\u0442\u0430\u043a\u043e\u0439", "\u0442\u0430\u043c", "\u0442\u0435", "\u0442\u0435\u043c", "\u0442\u043e", "\u0442\u043e\u0433\u043e",
+            "\u0442\u043e\u0436\u0435", "\u0442\u043e\u0439", "\u0442\u043e\u043b\u044c\u043a\u043e", "\u0442\u043e\u043c", "\u0442\u044b", "\u0443", "\u0443\u0436\u0435", "\u0445\u043e\u0442\u044f", "\u0447\u0435\u0433\u043e", "\u0447\u0435\u0439",
+            "\u0447\u0435\u043c", "\u0447\u0442\u043e", "\u0447\u0442\u043e\u0431\u044b", "\u0447\u044c\u0435", "\u0447\u044c\u044f", "\u044d\u0442\u0430", "\u044d\u0442\u0438", "\u044d\u0442\u043e", "\u044f"
+        };
 
         /// <summary>
         /// File containing default Russian stopwords. </summary>
@@ -134,7 +144,7 @@ namespace Lucene.Net.Analysis.Ru
         /// <summary>
         /// Creates
         /// <see cref="Analyzer.TokenStreamComponents"/>
-        /// used to tokenize all the text in the provided <see cref="Reader"/>.
+        /// used to tokenize all the text in the provided <see cref="TextReader"/>.
         /// </summary>
         /// <returns> <see cref="Analyzer.TokenStreamComponents"/>
         ///         built from a <see cref="StandardTokenizer"/> filtered with

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/f1631b14/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianLetterTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianLetterTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianLetterTokenizer.cs
index 1ffa004..38b4180 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianLetterTokenizer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianLetterTokenizer.cs
@@ -24,7 +24,7 @@ namespace Lucene.Net.Analysis.Ru
 	 */
 
     /// <summary>
-    /// A RussianLetterTokenizer is a <see cref="Tokenizer"/> that extends <see cref="LetterTokenizer"/>
+    /// A <see cref="RussianLetterTokenizer"/> is a <see cref="Tokenizer"/> that extends <see cref="Core.LetterTokenizer"/>
     /// by also allowing the basic Latin digits 0-9.
     /// <para>
     /// <a name="version"/>
@@ -32,12 +32,12 @@ namespace Lucene.Net.Analysis.Ru
     /// <see cref="RussianLetterTokenizer"/>:
     /// <ul>
     /// <li>As of 3.1, <see cref="CharTokenizer"/> uses an int based API to normalize and
-    /// detect token characters. See <see cref="CharTokenizer#isTokenChar(int)"/> and
-    /// <see cref="CharTokenizer#normalize(int)"/> for details.</li>
+    /// detect token characters. See <see cref="CharTokenizer.IsTokenChar(int)"/> and
+    /// <see cref="CharTokenizer.Normalize(int)"/> for details.</li>
     /// </ul>
     /// </para>
     /// </summary>
-    /// @deprecated (3.1) Use <see cref="StandardTokenizer"/> instead, which has the same functionality.
+    /// @deprecated (3.1) Use <see cref="Standard.StandardTokenizer"/> instead, which has the same functionality.
     /// This filter will be removed in Lucene 5.0  
     [Obsolete("(3.1) Use StandardTokenizer instead, which has the same functionality.")]
     public class RussianLetterTokenizer : CharTokenizer
@@ -45,9 +45,10 @@ namespace Lucene.Net.Analysis.Ru
         private const int DIGIT_0 = '0';
         private const int DIGIT_9 = '9';
 
-        /// Construct a new RussianLetterTokenizer. * <param name="matchVersion"> Lucene version
-        /// to match See <see cref="<a href="#version">above</a>"/>
-        /// </param>
+        /// <summary>
+        /// Construct a new <see cref="RussianLetterTokenizer"/>.
+        /// </summary>
+        /// <param name="matchVersion"> lucene compatibility version </param>
         /// <param name="in">
         ///          the input to split up into tokens </param>
         public RussianLetterTokenizer(LuceneVersion matchVersion, TextReader @in)
@@ -57,10 +58,9 @@ namespace Lucene.Net.Analysis.Ru
 
         /// <summary>
         /// Construct a new RussianLetterTokenizer using a given
-        /// <see cref="org.apache.lucene.util.AttributeSource.AttributeFactory"/>. * @param
-        /// matchVersion Lucene version to match See
-        /// <see cref="<a href="#version">above</a>"/>
+        /// <see cref="AttributeSource.AttributeFactory"/>.
         /// </summary>
+        /// <param name="matchVersion"> lucene compatibility version </param>
         /// <param name="factory">
         ///          the attribute factory to use for this <see cref="Tokenizer"/> </param>
         /// <param name="in">
@@ -72,7 +72,7 @@ namespace Lucene.Net.Analysis.Ru
 
         /// <summary>
         /// Collects only characters which satisfy
-        /// <see cref="Character#isLetter(int)"/>.
+        /// <see cref="Character.IsLetter(int)"/>.
         /// </summary>
         protected override bool IsTokenChar(int c)
         {

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/f1631b14/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianLetterTokenizerFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianLetterTokenizerFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianLetterTokenizerFactory.cs
index 16b09c2..d3ef34b 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianLetterTokenizerFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianLetterTokenizerFactory.cs
@@ -23,14 +23,13 @@ namespace Lucene.Net.Analysis.Ru
 	 * limitations under the License.
 	 */
 
-    /// @deprecated Use <see cref="org.apache.lucene.analysis.standard.StandardTokenizerFactory"/> instead.
+    /// @deprecated Use <see cref="Standard.StandardTokenizerFactory"/> instead.
     ///  This tokenizer has no Russian-specific functionality. 
-    [Obsolete("Use StandardTokenizerFactory instead.")]
+    [Obsolete("Use Standard.StandardTokenizerFactory instead.")]
     public class RussianLetterTokenizerFactory : TokenizerFactory
     {
-
         /// <summary>
-        /// Creates a new RussianLetterTokenizerFactory </summary>
+        /// Creates a new <see cref="RussianLetterTokenizerFactory"/> </summary>
         public RussianLetterTokenizerFactory(IDictionary<string, string> args) : base(args)
         {
             AssureMatchVersion();

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/f1631b14/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianLightStemFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianLightStemFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianLightStemFilter.cs
index 66a1599..0d9a56d 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianLightStemFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianLightStemFilter.cs
@@ -24,7 +24,7 @@ namespace Lucene.Net.Analysis.Ru
     /// words.
     /// <para>
     /// To prevent terms from being stemmed use an instance of
-    /// <see cref="SetKeywordMarkerFilter"/> or a custom <see cref="TokenFilter"/> that sets
+    /// <see cref="Miscellaneous.SetKeywordMarkerFilter"/> or a custom <see cref="TokenFilter"/> that sets
     /// the <see cref="KeywordAttribute"/> before this <see cref="TokenStream"/>.
     /// </para>
     /// </summary>

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/f1631b14/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianLightStemFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianLightStemFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianLightStemFilterFactory.cs
index 8073c78..2b3496b 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianLightStemFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianLightStemFilterFactory.cs
@@ -33,10 +33,10 @@ namespace Lucene.Net.Analysis.Ru
     /// </summary>
     public class RussianLightStemFilterFactory : TokenFilterFactory
     {
-
         /// <summary>
-        /// Creates a new RussianLightStemFilterFactory </summary>
-        public RussianLightStemFilterFactory(IDictionary<string, string> args) : base(args)
+        /// Creates a new <see cref="RussianLightStemFilterFactory"/> </summary>
+        public RussianLightStemFilterFactory(IDictionary<string, string> args) 
+            : base(args)
         {
             if (args.Count > 0)
             {

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/f1631b14/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianLightStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianLightStemmer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianLightStemmer.cs
index 9634983..1a66691 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianLightStemmer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianLightStemmer.cs
@@ -58,13 +58,12 @@ namespace Lucene.Net.Analysis.Ru
     /// Light Stemmer for Russian.
     /// <para>
     /// This stemmer implements the following algorithm:
-    /// <i>Indexing and Searching Strategies for the Russian Language.</i>
+    /// <c>Indexing and Searching Strategies for the Russian Language.</c>
     /// Ljiljana Dolamic and Jacques Savoy.
     /// </para>
     /// </summary>
     public class RussianLightStemmer
     {
-
         public virtual int Stem(char[] s, int len)
         {
             len = RemoveCase(s, len);