You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by ni...@apache.org on 2017/02/02 14:54:56 UTC

[1/6] lucenenet git commit: Lucene.Net.Analysis.Ar refactor: accessibility and documentation comments

Repository: lucenenet
Updated Branches:
  refs/heads/api-work bc485b4c4 -> 917b4fdf5


Lucene.Net.Analysis.Ar refactor: accessibility and documentation comments


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/2878664e
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/2878664e
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/2878664e

Branch: refs/heads/api-work
Commit: 2878664e260be46bd20a3996dce0bb770aae3ab2
Parents: bc485b4
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Thu Feb 2 20:09:32 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Thu Feb 2 20:09:32 2017 +0700

----------------------------------------------------------------------
 .../Analysis/Ar/ArabicAnalyzer.cs               | 44 +++++------
 .../Analysis/Ar/ArabicLetterTokenizer.cs        | 82 ++++++++++----------
 .../Analysis/Ar/ArabicLetterTokenizerFactory.cs | 28 +++----
 .../Analysis/Ar/ArabicNormalizationFilter.cs    |  4 +-
 .../Ar/ArabicNormalizationFilterFactory.cs      |  9 ++-
 .../Analysis/Ar/ArabicNormalizer.cs             | 27 +++----
 .../Analysis/Ar/ArabicStemFilter.cs             | 13 ++--
 .../Analysis/Ar/ArabicStemFilterFactory.cs      |  9 ++-
 .../Analysis/Ar/ArabicStemmer.cs                | 21 +++--
 9 files changed, 114 insertions(+), 123 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/2878664e/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicAnalyzer.cs
index 9db2bdf..e484850 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicAnalyzer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicAnalyzer.cs
@@ -27,23 +27,20 @@ namespace Lucene.Net.Analysis.Ar
 	 */
 
     /// <summary>
-    /// <seealso cref="Analyzer"/> for Arabic. 
-    /// <para>
+    /// <see cref="Analyzer"/> for Arabic. 
+    /// <para/>
     /// This analyzer implements light-stemming as specified by:
-    /// <i>
+    /// <c>
     /// Light Stemming for Arabic Information Retrieval
-    /// </i>    
+    /// </c>    
     /// http://www.mtholyoke.edu/~lballest/Pubs/arab_stem05.pdf
-    /// </para>
-    /// <para>
+    /// <para/>
     /// The analysis package contains three primary components:
-    /// <ul>
-    ///  <li><seealso cref="ArabicNormalizationFilter"/>: Arabic orthographic normalization.
-    ///  <li><seealso cref="ArabicStemFilter"/>: Arabic light stemming
-    ///  <li>Arabic stop words file: a set of default Arabic stop words.
-    /// </ul>
-    /// 
-    /// </para>
+    /// <list type="bullet">
+    ///     <item><see cref="ArabicNormalizationFilter"/>: Arabic orthographic normalization.</item>
+    ///     <item><see cref="ArabicStemFilter"/>: Arabic light stemming</item>
+    ///     <item>Arabic stop words file: a set of default Arabic stop words.</item>
+    /// </list>
     /// </summary>
     public sealed class ArabicAnalyzer : StopwordAnalyzerBase
     {
@@ -94,7 +91,7 @@ namespace Lucene.Net.Analysis.Ar
         private readonly CharArraySet stemExclusionSet;
 
         /// <summary>
-        /// Builds an analyzer with the default stop words: <seealso cref="#DEFAULT_STOPWORD_FILE"/>.
+        /// Builds an analyzer with the default stop words: <see cref="DEFAULT_STOPWORD_FILE"/>.
         /// </summary>
         public ArabicAnalyzer(LuceneVersion matchVersion)
               : this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET)
@@ -115,8 +112,8 @@ namespace Lucene.Net.Analysis.Ar
 
         /// <summary>
         /// Builds an analyzer with the given stop word. If a none-empty stem exclusion set is
-        /// provided this analyzer will add a <seealso cref="SetKeywordMarkerFilter"/> before
-        /// <seealso cref="ArabicStemFilter"/>.
+        /// provided this analyzer will add a <see cref="SetKeywordMarkerFilter"/> before
+        /// <see cref="ArabicStemFilter"/>.
         /// </summary>
         /// <param name="matchVersion">
         ///          lucene compatibility version </param>
@@ -131,15 +128,14 @@ namespace Lucene.Net.Analysis.Ar
         }
 
         /// <summary>
-        /// Creates
-        /// <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
-        /// used to tokenize all the text in the provided <seealso cref="Reader"/>.
+        /// Creates <see cref="Analyzer.TokenStreamComponents"/>
+        /// used to tokenize all the text in the provided <see cref="TextReader"/>.
         /// </summary>
-        /// <returns> <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
-        ///         built from an <seealso cref="StandardTokenizer"/> filtered with
-        ///         <seealso cref="LowerCaseFilter"/>, <seealso cref="StopFilter"/>,
-        ///         <seealso cref="ArabicNormalizationFilter"/>, <seealso cref="SetKeywordMarkerFilter"/>
-        ///         if a stem exclusion set is provided and <seealso cref="ArabicStemFilter"/>. </returns>
+        /// <returns> <see cref="Analyzer.TokenStreamComponents"/>
+        ///         built from an <see cref="StandardTokenizer"/> filtered with
+        ///         <see cref="LowerCaseFilter"/>, <see cref="StopFilter"/>,
+        ///         <see cref="ArabicNormalizationFilter"/>, <see cref="SetKeywordMarkerFilter"/>
+        ///         if a stem exclusion set is provided and <see cref="ArabicStemFilter"/>. </returns>
         protected override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
         {
 #pragma warning disable 612, 618

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/2878664e/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicLetterTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicLetterTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicLetterTokenizer.cs
index 5fa5827..0e4e28c 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicLetterTokenizer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicLetterTokenizer.cs
@@ -31,52 +31,54 @@ namespace Lucene.Net.Analysis.Ar
     /// Handling similar to this is necessary for Indic Scripts, Hebrew, Thaana, etc.
     /// </para>
     /// <para>
-    /// <a name="version"/>
-    /// You must specify the required <seealso cref="Version"/> compatibility when creating
-    /// <seealso cref="ArabicLetterTokenizer"/>:
-    /// <ul>
-    /// <li>As of 3.1, <seealso cref="CharTokenizer"/> uses an int based API to normalize and
-    /// detect token characters. See <seealso cref="#isTokenChar(int)"/> and
-    /// <seealso cref="#normalize(int)"/> for details.</li>
-    /// </ul>
+    /// <paramref name="matchVersion"/>
+    /// You must specify the required <see cref="LuceneVersion"/> compatibility when creating
+    /// <see cref="ArabicLetterTokenizer"/>:
+    /// <list type="bullet">
+    /// <item>As of 3.1, <see cref="Util.CharTokenizer"/> uses an int based API to normalize and
+    /// detect token characters. See <see cref="IsTokenChar(int)"/> and
+    /// <see cref="Util.CharTokenizer.Normalize(int)"/> for details.</item>
+    /// </list>
     /// </para>
     /// </summary>
-    /// @deprecated (3.1) Use <seealso cref="StandardTokenizer"/> instead. 
+    /// @deprecated (3.1) Use <see cref="Standard.StandardTokenizer"/> instead. 
     [Obsolete("(3.1) Use StandardTokenizer instead.")]
-	public class ArabicLetterTokenizer : LetterTokenizer
-	{
-	  /// <summary>
-	  /// Construct a new ArabicLetterTokenizer. </summary>
-	  /// <param name="matchVersion"> Lucene version
-	  /// to match See <seealso cref="<a href="#version">above</a>"/>
-	  /// </param>
-	  /// <param name="in">
-	  ///          the input to split up into tokens </param>
-	  public ArabicLetterTokenizer(LuceneVersion matchVersion, TextReader @in) 
-            : base(matchVersion, @in)
-	  {
-	  }
+    public class ArabicLetterTokenizer : LetterTokenizer
+    {
+        /// <summary>
+        /// Construct a new ArabicLetterTokenizer. </summary>
+        /// <param name="matchVersion"> Lucene version
+        /// to match See <seealso cref="<a href="#version">above</a>"/>
+        /// </param>
+        /// <param name="in">
+        ///          the input to split up into tokens </param>
+        public ArabicLetterTokenizer(LuceneVersion matchVersion, TextReader @in)
+              : base(matchVersion, @in)
+        {
+        }
 
-	  /// <summary>
-	  /// Construct a new ArabicLetterTokenizer using a given
-	  /// <seealso cref="org.apache.lucene.util.AttributeSource.AttributeFactory"/>. * @param
-	  /// matchVersion Lucene version to match See
-	  /// <seealso cref="<a href="#version">above</a>"/>
-	  /// </summary>
-	  /// <param name="factory">
-	  ///          the attribute factory to use for this Tokenizer </param>
-	  /// <param name="in">
-	  ///          the input to split up into tokens </param>
-	  public ArabicLetterTokenizer(LuceneVersion matchVersion, AttributeFactory factory, TextReader @in) 
+        /// <summary>
+        /// Construct a new <see cref="ArabicLetterTokenizer"/> using a given
+        /// <see cref="AttributeSource.AttributeFactory"/>. 
+        /// </summary>
+        /// <param name="matchVersion">
+        ///         matchVersion Lucene version to match See
+        ///         <see cref="LuceneVersion"/>.
+        /// </param>
+        /// <param name="factory">
+        ///          the attribute factory to use for this Tokenizer </param>
+        /// <param name="in">
+        ///          the input to split up into tokens </param>
+        public ArabicLetterTokenizer(LuceneVersion matchVersion, AttributeFactory factory, TextReader @in)
             : base(matchVersion, factory, @in)
-	  {
-	  }
+        {
+        }
 
-	  /// <summary>
-	  /// Allows for Letter category or NonspacingMark category </summary>
-	  /// <seealso cref= org.apache.lucene.analysis.core.LetterTokenizer#isTokenChar(int) </seealso>
-	  protected override bool IsTokenChar(int c)
-	  {
+        /// <summary>
+        /// Allows for Letter category or NonspacingMark category </summary>
+        /// <seealso cref="LetterTokenizer.IsTokenChar(int)"/>
+        protected override bool IsTokenChar(int c)
+        {
             return base.IsTokenChar(c) || Character.GetType(c) == UnicodeCategory.NonSpacingMark;
         }
     }

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/2878664e/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicLetterTokenizerFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicLetterTokenizerFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicLetterTokenizerFactory.cs
index 43b08d7..366d85c 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicLetterTokenizerFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicLetterTokenizerFactory.cs
@@ -24,24 +24,24 @@ namespace Lucene.Net.Analysis.Ar
 	 */
 
     /// <summary>
-    /// Factory for <seealso cref="ArabicLetterTokenizer"/> </summary>
+    /// Factory for <see cref="ArabicLetterTokenizer"/> </summary>
     /// @deprecated (3.1) Use StandardTokenizerFactory instead.
     ///  
     [Obsolete("(3.1) Use StandardTokenizerFactory instead.")]
-	public class ArabicLetterTokenizerFactory : TokenizerFactory
-	{
+    public class ArabicLetterTokenizerFactory : TokenizerFactory
+    {
 
-	  /// <summary>
-	  /// Creates a new ArabicNormalizationFilterFactory </summary>
-	  public ArabicLetterTokenizerFactory(IDictionary<string, string> args) 
-            : base(args)
-	  {
-		AssureMatchVersion();
-		if (args.Count > 0)
-		{
-		  throw new System.ArgumentException("Unknown parameters: " + args);
-		}
-	  }
+        /// <summary>
+        /// Creates a new <see cref="ArabicNormalizationFilterFactory"/> </summary>
+        public ArabicLetterTokenizerFactory(IDictionary<string, string> args)
+              : base(args)
+        {
+            AssureMatchVersion();
+            if (args.Count > 0)
+            {
+                throw new System.ArgumentException("Unknown parameters: " + args);
+            }
+        }
 
         public override Tokenizer Create(AttributeSource.AttributeFactory factory, TextReader input)
         {

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/2878664e/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicNormalizationFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicNormalizationFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicNormalizationFilter.cs
index 7d2fa2a..7f87ecf 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicNormalizationFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicNormalizationFilter.cs
@@ -20,10 +20,8 @@ namespace Lucene.Net.Analysis.Ar
 	 */
 
     /// <summary>
-    /// A <seealso cref="TokenFilter"/> that applies <seealso cref="ArabicNormalizer"/> to normalize the orthography.
-    /// 
+    /// A <see cref="TokenFilter"/> that applies <see cref="ArabicNormalizer"/> to normalize the orthography.
     /// </summary>
-
     public sealed class ArabicNormalizationFilter : TokenFilter
     {
         private readonly ArabicNormalizer normalizer = new ArabicNormalizer();

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/2878664e/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicNormalizationFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicNormalizationFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicNormalizationFilterFactory.cs
index 840522c..0b92b5c 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicNormalizationFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicNormalizationFilterFactory.cs
@@ -21,20 +21,21 @@ namespace Lucene.Net.Analysis.Ar
 	 */
 
     /// <summary>
-    /// Factory for <seealso cref="ArabicNormalizationFilter"/>.
-    /// <pre class="prettyprint">
+    /// Factory for <see cref="ArabicNormalizationFilter"/>.
+    /// <code>
     /// &lt;fieldType name="text_arnormal" class="solr.TextField" positionIncrementGap="100"&gt;
     ///   &lt;analyzer&gt;
     ///     &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
     ///     &lt;filter class="solr.ArabicNormalizationFilterFactory"/&gt;
     ///   &lt;/analyzer&gt;
-    /// &lt;/fieldType&gt;</pre>
+    /// &lt;/fieldType&gt;
+    /// </code>
     /// </summary>
     public class ArabicNormalizationFilterFactory : TokenFilterFactory, IMultiTermAwareComponent
     {
 
         /// <summary>
-        /// Creates a new ArabicNormalizationFilterFactory </summary>
+        /// Creates a new <see cref="ArabicNormalizationFilterFactory"/> </summary>
         public ArabicNormalizationFilterFactory(IDictionary<string, string> args)
               : base(args)
         {

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/2878664e/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicNormalizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicNormalizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicNormalizer.cs
index 47ebe76..9733198 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicNormalizer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicNormalizer.cs
@@ -20,21 +20,18 @@ namespace Lucene.Net.Analysis.Ar
 	 */
 
     /// <summary>
-    ///  Normalizer for Arabic.
-    ///  <para>
-    ///  Normalization is done in-place for efficiency, operating on a termbuffer.
-    /// </para>
-    ///  <para>
-    ///  Normalization is defined as:
-    ///  <ul>
-    ///  <li> Normalization of hamza with alef seat to a bare alef.
-    ///  <li> Normalization of teh marbuta to heh
-    ///  <li> Normalization of dotless yeh (alef maksura) to yeh.
-    ///  <li> Removal of Arabic diacritics (the harakat)
-    ///  <li> Removal of tatweel (stretching character).
-    /// </ul>
-    /// 
-    /// </para>
+    /// Normalizer for Arabic.
+    /// <para/>
+    /// Normalization is done in-place for efficiency, operating on a termbuffer.
+    /// <para/>
+    /// Normalization is defined as:
+    /// <list type="bullet">
+    ///     <item> Normalization of hamza with alef seat to a bare alef.</item>
+    ///     <item> Normalization of teh marbuta to heh</item>
+    ///     <item> Normalization of dotless yeh (alef maksura) to yeh.</item>
+    ///     <item> Removal of Arabic diacritics (the harakat)</item>
+    ///     <item> Removal of tatweel (stretching character).</item>
+    /// </list>
     /// </summary>
     public class ArabicNormalizer
     {

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/2878664e/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicStemFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicStemFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicStemFilter.cs
index 54027ed..a8d2745 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicStemFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicStemFilter.cs
@@ -20,14 +20,13 @@ namespace Lucene.Net.Analysis.Ar
 	 */
 
     /// <summary>
-    /// A <seealso cref="TokenFilter"/> that applies <seealso cref="ArabicStemmer"/> to stem Arabic words..
-    /// <para>
+    /// A <see cref="TokenFilter"/> that applies <see cref="ArabicStemmer"/> to stem Arabic words..
+    /// <para/>
     /// To prevent terms from being stemmed use an instance of
-    /// <seealso cref="SetKeywordMarkerFilter"/> or a custom <seealso cref="TokenFilter"/> that sets
-    /// the <seealso cref="KeywordAttribute"/> before this <seealso cref="TokenStream"/>.
-    /// </para> </summary>
-    /// <seealso cref= SetKeywordMarkerFilter  </seealso>
-
+    /// <see cref="Miscellaneous.SetKeywordMarkerFilter"/> or a custom <see cref="TokenFilter"/> that sets
+    /// the <see cref="KeywordAttribute"/> before this <seealso cref="TokenStream"/>.
+    /// </summary>
+    /// <seealso cref="Miscellaneous.SetKeywordMarkerFilter"/>
     public sealed class ArabicStemFilter : TokenFilter
     {
         private readonly ArabicStemmer stemmer = new ArabicStemmer();

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/2878664e/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicStemFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicStemFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicStemFilterFactory.cs
index 08d0d19..0238b5b 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicStemFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicStemFilterFactory.cs
@@ -21,21 +21,22 @@ namespace Lucene.Net.Analysis.Ar
 	 */
 
     /// <summary>
-    /// Factory for <seealso cref="ArabicStemFilter"/>.
-    /// <pre class="prettyprint">
+    /// Factory for <see cref="ArabicStemFilter"/>.
+    /// <code>
     /// &lt;fieldType name="text_arstem" class="solr.TextField" positionIncrementGap="100"&gt;
     ///   &lt;analyzer&gt;
     ///     &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
     ///     &lt;filter class="solr.ArabicNormalizationFilterFactory"/&gt;
     ///     &lt;filter class="solr.ArabicStemFilterFactory"/&gt;
     ///   &lt;/analyzer&gt;
-    /// &lt;/fieldType&gt;</pre>
+    /// &lt;/fieldType&gt;
+    /// </code>
     /// </summary>
     public class ArabicStemFilterFactory : TokenFilterFactory
     {
 
         /// <summary>
-        /// Creates a new ArabicStemFilterFactory </summary>
+        /// Creates a new <see cref="ArabicStemFilterFactory"/> </summary>
         public ArabicStemFilterFactory(IDictionary<string, string> args)
               : base(args)
         {

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/2878664e/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicStemmer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicStemmer.cs
index 8ba6ca7..444b5d3 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicStemmer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicStemmer.cs
@@ -20,18 +20,15 @@ namespace Lucene.Net.Analysis.Ar
 	 */
 
     /// <summary>
-    ///  Stemmer for Arabic.
-    ///  <para>
-    ///  Stemming  is done in-place for efficiency, operating on a termbuffer.
-    /// </para>
-    ///  <para>
-    ///  Stemming is defined as:
-    ///  <ul>
-    ///  <li> Removal of attached definite article, conjunction, and prepositions.
-    ///  <li> Stemming of common suffixes.
-    /// </ul>
-    /// 
-    /// </para>
+    /// Stemmer for Arabic.
+    /// <para/>
+    /// Stemming is done in-place for efficiency, operating on a termbuffer.
+    /// <para/>
+    /// Stemming is defined as:
+    /// <list type="bullet">
+    ///     <item> Removal of attached definite article, conjunction, and prepositions.</item>
+    ///     <item> Stemming of common suffixes.</item>
+    /// </list>
     /// </summary>
     public class ArabicStemmer
     {


[3/6] lucenenet git commit: Lucene.Net.Analysis.Br refactor: member accessibility and documentation comments

Posted by ni...@apache.org.
Lucene.Net.Analysis.Br refactor: member accessibility and documentation comments


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/83902e97
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/83902e97
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/83902e97

Branch: refs/heads/api-work
Commit: 83902e979cec023948dcef9dba21b0c02924d171
Parents: 1b0bca6
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Thu Feb 2 20:30:21 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Thu Feb 2 20:30:21 2017 +0700

----------------------------------------------------------------------
 .../Analysis/Br/BrazilianAnalyzer.cs            | 20 +++++++++----------
 .../Analysis/Br/BrazilianStemFilter.cs          | 18 ++++++++---------
 .../Analysis/Br/BrazilianStemFilterFactory.cs   |  9 +++++----
 .../Analysis/Br/BrazilianStemmer.cs             | 21 +++++++++-----------
 4 files changed, 33 insertions(+), 35 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/83902e97/src/Lucene.Net.Analysis.Common/Analysis/Br/BrazilianAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Br/BrazilianAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Br/BrazilianAnalyzer.cs
index 12109b6..b6bd791 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Br/BrazilianAnalyzer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Br/BrazilianAnalyzer.cs
@@ -27,15 +27,15 @@ namespace Lucene.Net.Analysis.Br
 	 */
 
     /// <summary>
-    /// <seealso cref="Analyzer"/> for Brazilian Portuguese language. 
+    /// <see cref="Analyzer"/> for Brazilian Portuguese language. 
     /// <para>
     /// Supports an external list of stopwords (words that
     /// will not be indexed at all) and an external list of exclusions (words that will
     /// not be stemmed, but indexed).
     /// </para>
     /// 
-    /// <para><b>NOTE</b>: This class uses the same <seealso cref="Version"/>
-    /// dependent settings as <seealso cref="StandardAnalyzer"/>.</para>
+    /// <para><b>NOTE</b>: This class uses the same <see cref="LuceneVersion"/>
+    /// dependent settings as <see cref="StandardAnalyzer"/>.</para>
     /// </summary>
     public sealed class BrazilianAnalyzer : StopwordAnalyzerBase
     {
@@ -86,7 +86,7 @@ namespace Lucene.Net.Analysis.Br
         private CharArraySet excltable = CharArraySet.EMPTY_SET;
 
         /// <summary>
-        /// Builds an analyzer with the default stop words (<seealso cref="#getDefaultStopSet()"/>).
+        /// Builds an analyzer with the default stop words (<see cref="DefaultStopSet"/>).
         /// </summary>
         public BrazilianAnalyzer(LuceneVersion matchVersion)
               : this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET)
@@ -120,13 +120,13 @@ namespace Lucene.Net.Analysis.Br
 
         /// <summary>
         /// Creates
-        /// <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
-        /// used to tokenize all the text in the provided <seealso cref="Reader"/>.
+        /// <see cref="Analyzer.TokenStreamComponents"/>
+        /// used to tokenize all the text in the provided <see cref="TextReader"/>.
         /// </summary>
-        /// <returns> <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
-        ///         built from a <seealso cref="StandardTokenizer"/> filtered with
-        ///         <seealso cref="LowerCaseFilter"/>, <seealso cref="StandardFilter"/>, <seealso cref="StopFilter"/>
-        ///         , and <seealso cref="BrazilianStemFilter"/>. </returns>
+        /// <returns> <see cref="Analyzer.TokenStreamComponents"/>
+        ///         built from a <see cref="StandardTokenizer"/> filtered with
+        ///         <see cref="LowerCaseFilter"/>, <see cref="StandardFilter"/>, <see cref="StopFilter"/>,
+        ///         and <see cref="BrazilianStemFilter"/>. </returns>
         protected override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
         {
             Tokenizer source = new StandardTokenizer(m_matchVersion, reader);

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/83902e97/src/Lucene.Net.Analysis.Common/Analysis/Br/BrazilianStemFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Br/BrazilianStemFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Br/BrazilianStemFilter.cs
index 69580e4..0b94e4c 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Br/BrazilianStemFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Br/BrazilianStemFilter.cs
@@ -21,19 +21,19 @@ namespace Lucene.Net.Analysis.Br
 	 */
 
     /// <summary>
-    /// A <seealso cref="TokenFilter"/> that applies <seealso cref="BrazilianStemmer"/>.
+    /// A <see cref="TokenFilter"/> that applies <see cref="BrazilianStemmer"/>.
     /// <para>
     /// To prevent terms from being stemmed use an instance of
-    /// <seealso cref="SetKeywordMarkerFilter"/> or a custom <seealso cref="TokenFilter"/> that sets
-    /// the <seealso cref="KeywordAttribute"/> before this <seealso cref="TokenStream"/>.
-    /// </para> </summary>
-    /// <seealso cref= SetKeywordMarkerFilter
-    ///  </seealso>
+    /// <see cref="Miscellaneous.SetKeywordMarkerFilter"/> or a custom <see cref="TokenFilter"/> that sets
+    /// the <see cref="KeywordAttribute"/> before this <see cref="TokenStream"/>.
+    /// </para>
+    /// </summary>
+    /// <seealso cref="Miscellaneous.SetKeywordMarkerFilter"/>
     public sealed class BrazilianStemFilter : TokenFilter
     {
 
         /// <summary>
-        /// <seealso cref="BrazilianStemmer"/> in use by this filter.
+        /// <see cref="BrazilianStemmer"/> in use by this filter.
         /// </summary>
         private BrazilianStemmer stemmer = new BrazilianStemmer();
         private HashSet<string> exclusions = null; // LUCENENET TODO: This is odd. No way to set it at all, so it cannot possibly have any values.
@@ -41,9 +41,9 @@ namespace Lucene.Net.Analysis.Br
         private readonly IKeywordAttribute keywordAttr;
 
         /// <summary>
-        /// Creates a new BrazilianStemFilter 
+        /// Creates a new <see cref="BrazilianStemFilter"/> 
         /// </summary>
-        /// <param name="in"> the source <seealso cref="TokenStream"/>  </param>
+        /// <param name="in"> the source <see cref="TokenStream"/>  </param>
         public BrazilianStemFilter(TokenStream @in)
               : base(@in)
         {

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/83902e97/src/Lucene.Net.Analysis.Common/Analysis/Br/BrazilianStemFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Br/BrazilianStemFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Br/BrazilianStemFilterFactory.cs
index 332ea42..0be7f71 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Br/BrazilianStemFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Br/BrazilianStemFilterFactory.cs
@@ -21,21 +21,22 @@ namespace Lucene.Net.Analysis.Br
 	 */
 
     /// <summary>
-    /// Factory for <seealso cref="BrazilianStemFilter"/>.
-    /// <pre class="prettyprint">
+    /// Factory for <see cref="BrazilianStemFilter"/>.
+    /// <code>
     /// &lt;fieldType name="text_brstem" class="solr.TextField" positionIncrementGap="100"&gt;
     ///   &lt;analyzer&gt;
     ///     &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
     ///     &lt;filter class="solr.LowerCaseFilterFactory"/&gt;
     ///     &lt;filter class="solr.BrazilianStemFilterFactory"/&gt;
     ///   &lt;/analyzer&gt;
-    /// &lt;/fieldType&gt;</pre>
+    /// &lt;/fieldType&gt;
+    /// </code>
     /// </summary>
     public class BrazilianStemFilterFactory : TokenFilterFactory
     {
 
         /// <summary>
-        /// Creates a new BrazilianStemFilterFactory </summary>
+        /// Creates a new <see cref="BrazilianStemFilterFactory"/> </summary>
         public BrazilianStemFilterFactory(IDictionary<string, string> args) : base(args)
         {
             if (args.Count > 0)

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/83902e97/src/Lucene.Net.Analysis.Common/Analysis/Br/BrazilianStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Br/BrazilianStemmer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Br/BrazilianStemmer.cs
index b08f0dd..3d35ee5 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Br/BrazilianStemmer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Br/BrazilianStemmer.cs
@@ -41,10 +41,10 @@ namespace Lucene.Net.Analysis.Br
         }
 
         /// <summary>
-        /// Stems the given term to an unique <tt>discriminator</tt>.
+        /// Stems the given term to an unique <c>discriminator</c>.
         /// </summary>
         /// <param name="term">  The term that should be stemmed. </param>
-        /// <returns>      Discriminator for <tt>term</tt> </returns>
+        /// <returns>Discriminator for <paramref name="term"/></returns>
         protected internal virtual string Stem(string term)
         {
             bool altered = false; // altered the term
@@ -106,7 +106,7 @@ namespace Lucene.Net.Analysis.Br
         /// <summary>
         /// Checks a term if it can be processed indexed.
         /// </summary>
-        /// <returns>  true if it can be indexed </returns>
+        /// <returns> true if it can be indexed </returns>
         private bool IsIndexable(string term)
         {
             return (term.Length < 30) && (term.Length > 2);
@@ -335,9 +335,9 @@ namespace Lucene.Net.Analysis.Br
         }
 
         /// <summary>
-        /// Replace a string suffix by another
+        /// Replace a <see cref="string"/> suffix by another
         /// </summary>
-        /// <returns> the replaced String </returns>
+        /// <returns> the replaced <see cref="string"/> </returns>
         private string ReplaceSuffix(string value, string toReplace, string changeTo)
         {
             string vvalue;
@@ -361,9 +361,9 @@ namespace Lucene.Net.Analysis.Br
         }
 
         /// <summary>
-        /// Remove a string suffix
+        /// Remove a <see cref="string"/> suffix
         /// </summary>
-        /// <returns> the String without the suffix </returns>
+        /// <returns> the <see cref="string"/> without the suffix </returns>
         private string RemoveSuffix(string value, string toRemove)
         {
             // be-safe !!!
@@ -376,7 +376,7 @@ namespace Lucene.Net.Analysis.Br
         }
 
         /// <summary>
-        /// See if a suffix is preceded by a String
+        /// See if a suffix is preceded by a <see cref="string"/>
         /// </summary>
         /// <returns> true if the suffix is preceded </returns>
         private bool SuffixPreceded(string value, string suffix, string preceded)
@@ -1292,7 +1292,6 @@ namespace Lucene.Net.Analysis.Br
 
         /// <summary>
         /// Delete suffix 'i' if in RV and preceded by 'c'
-        /// 
         /// </summary>
         private void Step3()
         {
@@ -1313,7 +1312,6 @@ namespace Lucene.Net.Analysis.Br
         /// 
         /// If the word ends with one of the suffixes (os a i o � � �)
         /// in RV, delete it
-        /// 
         /// </summary>
         private void Step4()
         {
@@ -1351,7 +1349,6 @@ namespace Lucene.Net.Analysis.Br
         /// delete the 'u' (or 'i')
         /// 
         /// Or if the word ends � remove the cedilha
-        /// 
         /// </summary>
         private void Step5()
         {
@@ -1384,7 +1381,7 @@ namespace Lucene.Net.Analysis.Br
         /// <summary>
         /// For log and debug purpose
         /// </summary>
-        /// <returns>  TERM, CT, RV, R1 and R2 </returns>
+        /// <returns> TERM, CT, RV, R1 and R2 </returns>
         public virtual string Log()
         {
             return " (TERM = " + TERM + ")" + " (CT = " + CT + ")" + " (RV = " + RV + ")" + " (R1 = " + R1 + ")" + " (R2 = " + R2 + ")";


[6/6] lucenenet git commit: Lucene.Net.Analysis.Cjk refactor: member accessibility and documentation comments

Posted by ni...@apache.org.
Lucene.Net.Analysis.Cjk refactor: member accessibility and documentation comments


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/917b4fdf
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/917b4fdf
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/917b4fdf

Branch: refs/heads/api-work
Commit: 917b4fdf53f978f32219cef6edf31f3c30b84dea
Parents: 7fdbd66
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Thu Feb 2 21:53:51 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Thu Feb 2 21:53:51 2017 +0700

----------------------------------------------------------------------
 .../Analysis/Cjk/CJKAnalyzer.cs                 | 12 ++---
 .../Analysis/Cjk/CJKBigramFilter.cs             | 32 ++++++-------
 .../Analysis/Cjk/CJKBigramFilterFactory.cs      | 13 +++---
 .../Analysis/Cjk/CJKTokenizer.cs                | 14 +++---
 .../Analysis/Cjk/CJKTokenizerFactory.cs         | 13 +++---
 .../Analysis/Cjk/CJKWidthFilter.cs              | 49 ++++++++++++++------
 .../Analysis/Cjk/CJKWidthFilterFactory.cs       | 10 ++--
 7 files changed, 82 insertions(+), 61 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/917b4fdf/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKAnalyzer.cs
index 0fcc42c..28c7a52 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKAnalyzer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKAnalyzer.cs
@@ -25,16 +25,16 @@ namespace Lucene.Net.Analysis.Cjk
 	 */
 
     /// <summary>
-    /// An <seealso cref="Analyzer"/> that tokenizes text with <seealso cref="StandardTokenizer"/>,
-    /// normalizes content with <seealso cref="CJKWidthFilter"/>, folds case with
-    /// <seealso cref="LowerCaseFilter"/>, forms bigrams of CJK with <seealso cref="CJKBigramFilter"/>,
-    /// and filters stopwords with <seealso cref="StopFilter"/>
+    /// An <see cref="Analyzer"/> that tokenizes text with <see cref="StandardTokenizer"/>,
+    /// normalizes content with <see cref="CJKWidthFilter"/>, folds case with
+    /// <see cref="LowerCaseFilter"/>, forms bigrams of CJK with <see cref="CJKBigramFilter"/>,
+    /// and filters stopwords with <see cref="StopFilter"/>
     /// </summary>
     public sealed class CJKAnalyzer : StopwordAnalyzerBase
     {
         /// <summary>
         /// File containing default CJK stopwords.
-        /// <p/>
+        /// <para/>
         /// Currently it contains some common English words that are not usually
         /// useful for searching and some double-byte interpunctions.
         /// </summary>
@@ -72,7 +72,7 @@ namespace Lucene.Net.Analysis.Cjk
         }
 
         /// <summary>
-        /// Builds an analyzer which removes words in <seealso cref="#getDefaultStopSet()"/>.
+        /// Builds an analyzer which removes words in <see cref="DefaultStopSet"/>.
         /// </summary>
         public CJKAnalyzer(LuceneVersion matchVersion)
               : this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET)

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/917b4fdf/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKBigramFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKBigramFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKBigramFilter.cs
index 4b8cb17..443ea04 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKBigramFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKBigramFilter.cs
@@ -23,18 +23,18 @@ namespace Lucene.Net.Analysis.Cjk
 	 */
 
     /// <summary>
-    /// Forms bigrams of CJK terms that are generated from StandardTokenizer
+    /// Forms bigrams of CJK terms that are generated from <see cref="StandardTokenizer"/>
     /// or ICUTokenizer.
     /// <para>
     /// CJK types are set by these tokenizers, but you can also use 
-    /// <seealso cref="#CJKBigramFilter(TokenStream, int)"/> to explicitly control which
+    /// <see cref="CJKBigramFilter(TokenStream, int)"/> to explicitly control which
     /// of the CJK scripts are turned into bigrams.
     /// </para>
     /// <para>
     /// By default, when a CJK character has no adjacent characters to form
     /// a bigram, it is output in unigram form. If you want to always output
     /// both unigrams and bigrams, set the <code>outputUnigrams</code>
-    /// flag in <seealso cref="CJKBigramFilter#CJKBigramFilter(TokenStream, int, boolean)"/>.
+    /// flag in <see cref="CJKBigramFilter.CJKBigramFilter(TokenStream, int, bool)"/>.
     /// This can be used for a combined unigram+bigram approach.
     /// </para>
     /// <para>
@@ -90,22 +90,22 @@ namespace Lucene.Net.Analysis.Cjk
         private readonly IPositionLengthAttribute posLengthAtt;
 
         // buffers containing codepoint and offsets in parallel
-        internal int[] buffer = new int[8];
-        internal int[] startOffset = new int[8];
-        internal int[] endOffset = new int[8];
+        private int[] buffer = new int[8];
+        private int[] startOffset = new int[8];
+        private int[] endOffset = new int[8];
         // length of valid buffer
-        internal int bufferLen;
+        private int bufferLen;
         // current buffer index
-        internal int index;
+        private int index;
 
         // the last end offset, to determine if we should bigram across tokens
-        internal int lastEndOffset;
+        private int lastEndOffset;
 
         private bool exhausted;
 
         /// <summary>
-        /// Calls {@link CJKBigramFilter#CJKBigramFilter(TokenStream, int)
-        ///       CJKBigramFilter(in, HAN | HIRAGANA | KATAKANA | HANGUL)}
+        /// Calls <see cref="CJKBigramFilter.CJKBigramFilter(TokenStream, int)">
+        ///       CJKBigramFilter(in, HAN | HIRAGANA | KATAKANA | HANGUL)</see>
         /// </summary>
         public CJKBigramFilter(TokenStream @in)
               : this(@in, HAN | HIRAGANA | KATAKANA | HANGUL)
@@ -113,8 +113,8 @@ namespace Lucene.Net.Analysis.Cjk
         }
 
         /// <summary>
-        /// Calls {@link CJKBigramFilter#CJKBigramFilter(TokenStream, int, boolean)
-        ///       CJKBigramFilter(in, flags, false)}
+        /// Calls <see cref="CJKBigramFilter.CJKBigramFilter(TokenStream, int, bool)">
+        ///       CJKBigramFilter(in, flags, false)</see>
         /// </summary>
         public CJKBigramFilter(TokenStream @in, int flags)
               : this(@in, flags, false)
@@ -122,10 +122,10 @@ namespace Lucene.Net.Analysis.Cjk
         }
 
         /// <summary>
-        /// Create a new CJKBigramFilter, specifying which writing systems should be bigrammed,
+        /// Create a new <see cref="CJKBigramFilter"/>, specifying which writing systems should be bigrammed,
         /// and whether or not unigrams should also be output. </summary>
-        /// <param name="flags"> OR'ed set from <seealso cref="CJKBigramFilter#HAN"/>, <seealso cref="CJKBigramFilter#HIRAGANA"/>, 
-        ///        <seealso cref="CJKBigramFilter#KATAKANA"/>, <seealso cref="CJKBigramFilter#HANGUL"/> </param>
+        /// <param name="flags"> OR'ed set from <see cref="CJKBigramFilter.HAN"/>, <see cref="CJKBigramFilter.HIRAGANA"/>, 
+        ///        <see cref="CJKBigramFilter.KATAKANA"/>, <see cref="CJKBigramFilter.HANGUL"/> </param>
         /// <param name="outputUnigrams"> true if unigrams for the selected writing systems should also be output.
         ///        when this is false, this is only done when there are no adjacent characters to form
         ///        a bigram. </param>

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/917b4fdf/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKBigramFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKBigramFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKBigramFilterFactory.cs
index 8fd34fd..b9e4d97 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKBigramFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKBigramFilterFactory.cs
@@ -21,8 +21,8 @@ namespace Lucene.Net.Analysis.Cjk
 	 */
 
     /// <summary>
-    /// Factory for <seealso cref="CJKBigramFilter"/>.
-    /// <pre class="prettyprint">
+    /// Factory for <see cref="CJKBigramFilter"/>.
+    /// <code>
     /// &lt;fieldType name="text_cjk" class="solr.TextField"&gt;
     ///   &lt;analyzer&gt;
     ///     &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
@@ -32,15 +32,16 @@ namespace Lucene.Net.Analysis.Cjk
     ///       han="true" hiragana="true" 
     ///       katakana="true" hangul="true" outputUnigrams="false" /&gt;
     ///   &lt;/analyzer&gt;
-    /// &lt;/fieldType&gt;</pre>
+    /// &lt;/fieldType&gt;
+    /// </code>
     /// </summary>
     public class CJKBigramFilterFactory : TokenFilterFactory
     {
-        internal readonly int flags;
-        internal readonly bool outputUnigrams;
+        private readonly int flags;
+        private readonly bool outputUnigrams;
 
         /// <summary>
-        /// Creates a new CJKBigramFilterFactory </summary>
+        /// Creates a new <see cref="CJKBigramFilterFactory"/> </summary>
         public CJKBigramFilterFactory(IDictionary<string, string> args)
               : base(args)
         {

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/917b4fdf/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKTokenizer.cs
index 1ff4f07..160306d 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKTokenizer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKTokenizer.cs
@@ -32,11 +32,11 @@ namespace Lucene.Net.Analysis.Cjk
     /// Example: "java C1C2C3C4" will be segmented to: "java" "C1C2" "C2C3" "C3C4".
     /// </para>
     /// Additionally, the following is applied to Latin text (such as English):
-    /// <ul>
-    /// <li>Text is converted to lowercase.
-    /// <li>Numeric digits, '+', '#', and '_' are tokenized as letters.
-    /// <li>Full-width forms are converted to half-width forms.
-    /// </ul>
+    /// <list type="bullet">
+    ///     <item>Text is converted to lowercase.</item>
+    ///     <item>Numeric digits, '+', '#', and '_' are tokenized as letters.</item>
+    ///     <item>Full-width forms are converted to half-width forms.</item>
+    /// </list>
     /// For more info on Asian language (Chinese, Japanese, and Korean) text segmentation:
     /// please search  <a
     /// href="http://www.google.com/search?q=word+chinese+segment">google</a>
@@ -145,7 +145,7 @@ namespace Lucene.Net.Analysis.Cjk
         /// </summary>
         /// <returns> false for end of stream, true otherwise
         /// </returns>
-        /// <exception cref="java.io.IOException"> - throw IOException when read error <br>
+        /// <exception cref="IOException"> when read error
         ///         happened in the InputStream
         ///  </exception>
         public override bool IncrementToken()
@@ -347,7 +347,7 @@ namespace Lucene.Net.Analysis.Cjk
             }
         }
 
-        public override void End()
+        public override sealed void End()
         {
             base.End();
             // set final offset

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/917b4fdf/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKTokenizerFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKTokenizerFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKTokenizerFactory.cs
index 220a7d6..c33f3a6 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKTokenizerFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKTokenizerFactory.cs
@@ -25,20 +25,21 @@ namespace Lucene.Net.Analysis.Cjk
 
 
     /// <summary>
-    /// Factory for <seealso cref="CJKTokenizer"/>. 
-    /// <pre class="prettyprint" >
+    /// Factory for <see cref="CJKTokenizer"/>. 
+    /// <code>
     /// &lt;fieldType name="text_cjk" class="solr.TextField" positionIncrementGap="100"&gt;
     ///   &lt;analyzer&gt;
     ///     &lt;tokenizer class="solr.CJKTokenizerFactory"/&gt;
     ///   &lt;/analyzer&gt;
-    /// &lt;/fieldType&gt;</pre> </summary>
-    /// @deprecated Use <seealso cref="CJKBigramFilterFactory"/> instead. 
+    /// &lt;/fieldType&gt;
+    /// </code>
+    /// </summary>
+    /// @deprecated Use <see cref="CJKBigramFilterFactory"/> instead. 
     [Obsolete("Use CJKBigramFilterFactory instead.")]
     public class CJKTokenizerFactory : TokenizerFactory
     {
-
         /// <summary>
-        /// Creates a new CJKTokenizerFactory </summary>
+        /// Creates a new <see cref="CJKTokenizerFactory"/> </summary>
         public CJKTokenizerFactory(IDictionary<string, string> args) : base(args)
         {
             if (args.Count > 0)

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/917b4fdf/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKWidthFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKWidthFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKWidthFilter.cs
index 331de6b..64018e2 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKWidthFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKWidthFilter.cs
@@ -22,11 +22,11 @@ namespace Lucene.Net.Analysis.Cjk
 	 */
 
     /// <summary>
-    /// A <seealso cref="TokenFilter"/> that normalizes CJK width differences:
-    /// <ul>
-    ///   <li>Folds fullwidth ASCII variants into the equivalent basic latin
-    ///   <li>Folds halfwidth Katakana variants into the equivalent kana
-    /// </ul>
+    /// A <see cref="TokenFilter"/> that normalizes CJK width differences:
+    /// <list type="bullet">
+    ///   <item>Folds fullwidth ASCII variants into the equivalent basic latin</item>
+    ///   <item>Folds halfwidth Katakana variants into the equivalent kana</item>
+    /// </list>
     /// <para>
     /// NOTE: this filter can be viewed as a (practical) subset of NFKC/NFKD
     /// Unicode normalization. See the normalization support in the ICU package
@@ -37,13 +37,22 @@ namespace Lucene.Net.Analysis.Cjk
     {
         private ICharTermAttribute termAtt;
 
-        /* halfwidth kana mappings: 0xFF65-0xFF9D 
-         *
-         * note: 0xFF9C and 0xFF9D are only mapped to 0x3099 and 0x309A
-         * as a fallback when they cannot properly combine with a preceding 
-         * character into a composed form.
-         */
-        private static readonly char[] KANA_NORM = new char[] { (char)0x30fb, (char)0x30f2, (char)0x30a1, (char)0x30a3, (char)0x30a5, (char)0x30a7, (char)0x30a9, (char)0x30e3, (char)0x30e5, (char)0x30e7, (char)0x30c3, (char)0x30fc, (char)0x30a2, (char)0x30a4, (char)0x30a6, (char)0x30a8, (char)0x30aa, (char)0x30ab, (char)0x30ad, (char)0x30af, (char)0x30b1, (char)0x30b3, (char)0x30b5, (char)0x30b7, (char)0x30b9, (char)0x30bb, (char)0x30bd, (char)0x30bf, (char)0x30c1, (char)0x30c4, (char)0x30c6, (char)0x30c8, (char)0x30ca, (char)0x30cb, (char)0x30cc, (char)0x30cd, (char)0x30ce, (char)0x30cf, (char)0x30d2, (char)0x30d5, (char)0x30d8, (char)0x30db, (char)0x30de, (char)0x30df, (char)0x30e0, (char)0x30e1, (char)0x30e2, (char)0x30e4, (char)0x30e6, (char)0x30e8, (char)0x30e9, (char)0x30ea, (char)0x30eb, (char)0x30ec, (char)0x30ed, (char)0x30ef, (char)0x30f3, (char)0x3099, (char)0x309A };
+        /// <summary>
+        /// halfwidth kana mappings: 0xFF65-0xFF9D 
+        /// <para/>
+        /// note: 0xFF9C and 0xFF9D are only mapped to 0x3099 and 0x309A
+        /// as a fallback when they cannot properly combine with a preceding 
+        /// character into a composed form.
+        /// </summary>
+        private static readonly char[] KANA_NORM = new char[] {
+            (char)0x30fb, (char)0x30f2, (char)0x30a1, (char)0x30a3, (char)0x30a5, (char)0x30a7, (char)0x30a9, (char)0x30e3, (char)0x30e5,
+            (char)0x30e7, (char)0x30c3, (char)0x30fc, (char)0x30a2, (char)0x30a4, (char)0x30a6, (char)0x30a8, (char)0x30aa, (char)0x30ab,
+            (char)0x30ad, (char)0x30af, (char)0x30b1, (char)0x30b3, (char)0x30b5, (char)0x30b7, (char)0x30b9, (char)0x30bb, (char)0x30bd,
+            (char)0x30bf, (char)0x30c1, (char)0x30c4, (char)0x30c6, (char)0x30c8, (char)0x30ca, (char)0x30cb, (char)0x30cc, (char)0x30cd,
+            (char)0x30ce, (char)0x30cf, (char)0x30d2, (char)0x30d5, (char)0x30d8, (char)0x30db, (char)0x30de, (char)0x30df, (char)0x30e0,
+            (char)0x30e1, (char)0x30e2, (char)0x30e4, (char)0x30e6, (char)0x30e8, (char)0x30e9, (char)0x30ea, (char)0x30eb, (char)0x30ec,
+            (char)0x30ed, (char)0x30ef, (char)0x30f3, (char)0x3099, (char)0x309A
+        };
 
         public CJKWidthFilter(TokenStream input)
               : base(input)
@@ -87,10 +96,20 @@ namespace Lucene.Net.Analysis.Cjk
             }
         }
 
-        /* kana combining diffs: 0x30A6-0x30FD */
-        private static readonly sbyte[] KANA_COMBINE_VOICED = new sbyte[] { 78, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 };
+        /// <summary>kana combining diffs: 0x30A6-0x30FD </summary>
+        private static readonly sbyte[] KANA_COMBINE_VOICED = new sbyte[] {
+            78, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
+            0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1,
+            0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+            0, 8, 8, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1
+        };
 
-        private static readonly sbyte[] KANA_COMBINE_HALF_VOICED = new sbyte[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 2, 0, 0, 2, 0, 0, 2, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+        private static readonly sbyte[] KANA_COMBINE_HALF_VOICED = new sbyte[] {
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 2, 0, 0, 2,
+            0, 0, 2, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+        };
 
         /// <summary>
         /// returns true if we successfully combined the voice mark </summary>

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/917b4fdf/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKWidthFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKWidthFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKWidthFilterFactory.cs
index dfe8f2e..9c956e6 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKWidthFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKWidthFilterFactory.cs
@@ -21,8 +21,8 @@ namespace Lucene.Net.Analysis.Cjk
 	 */
 
     /// <summary>
-    /// Factory for <seealso cref="CJKWidthFilter"/>.
-    /// <pre class="prettyprint">
+    /// Factory for <see cref="CJKWidthFilter"/>.
+    /// <code>
     /// &lt;fieldType name="text_cjk" class="solr.TextField"&gt;
     ///   &lt;analyzer&gt;
     ///     &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
@@ -30,13 +30,13 @@ namespace Lucene.Net.Analysis.Cjk
     ///     &lt;filter class="solr.LowerCaseFilterFactory"/&gt;
     ///     &lt;filter class="solr.CJKBigramFilterFactory"/&gt;
     ///   &lt;/analyzer&gt;
-    /// &lt;/fieldType&gt;</pre>
+    /// &lt;/fieldType&gt;
+    /// </code>
     /// </summary>
     public class CJKWidthFilterFactory : TokenFilterFactory, IMultiTermAwareComponent
     {
-
         /// <summary>
-        /// Creates a new CJKWidthFilterFactory </summary>
+        /// Creates a new <see cref="CJKWidthFilterFactory"/> </summary>
         public CJKWidthFilterFactory(IDictionary<string, string> args) : base(args)
         {
             if (args.Count > 0)


[4/6] lucenenet git commit: Lucene.Net.Analysis.Ca refactor: member accessibility and documentation comments

Posted by ni...@apache.org.
Lucene.Net.Analysis.Ca refactor: member accessibility and documentation comments


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/5d0d43f4
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/5d0d43f4
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/5d0d43f4

Branch: refs/heads/api-work
Commit: 5d0d43f4034186c2f893d2e60475c1bbcee8998e
Parents: 83902e9
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Thu Feb 2 20:39:49 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Thu Feb 2 20:39:49 2017 +0700

----------------------------------------------------------------------
 .../Analysis/Ca/CatalanAnalyzer.cs              | 33 +++++++++-----------
 1 file changed, 15 insertions(+), 18 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/5d0d43f4/src/Lucene.Net.Analysis.Common/Analysis/Ca/CatalanAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ca/CatalanAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ca/CatalanAnalyzer.cs
index b65b920..704f543 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Ca/CatalanAnalyzer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ca/CatalanAnalyzer.cs
@@ -29,16 +29,13 @@ namespace Lucene.Net.Analysis.Ca
 	 */
 
     /// <summary>
-    /// <seealso cref="Analyzer"/> for Catalan.
-    /// <para>
-    /// <a name="version"/>
-    /// </para>
-    /// <para>You must specify the required <seealso cref="Version"/>
+    /// <see cref="Analyzer"/> for Catalan.
+    /// <para>You must specify the required <see cref="LuceneVersion"/>
     /// compatibility when creating CatalanAnalyzer:
-    /// <ul>
-    ///   <li> As of 3.6, ElisionFilter with a set of Catalan 
-    ///        contractions is used by default.
-    /// </ul>
+    /// <list>
+    ///   <item> As of 3.6, <see cref="ElisionFilter"/> with a set of Catalan 
+    ///        contractions is used by default.</item>
+    /// </list>
     /// </para>
     /// </summary>
     public sealed class CatalanAnalyzer : StopwordAnalyzerBase
@@ -66,7 +63,7 @@ namespace Lucene.Net.Analysis.Ca
         }
 
         /// <summary>
-        /// Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class 
+        /// Atomically loads the <see cref="DEFAULT_STOP_SET"/> in a lazy fashion once the outer class 
         /// accesses the static final set the first time.;
         /// </summary>
         private class DefaultSetHolder
@@ -90,7 +87,7 @@ namespace Lucene.Net.Analysis.Ca
         }
 
         /// <summary>
-        /// Builds an analyzer with the default stop words: <seealso cref="#DEFAULT_STOPWORD_FILE"/>.
+        /// Builds an analyzer with the default stop words: <see cref="DEFAULT_STOPWORD_FILE"/>.
         /// </summary>
         public CatalanAnalyzer(LuceneVersion matchVersion)
               : this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET)
@@ -109,7 +106,7 @@ namespace Lucene.Net.Analysis.Ca
 
         /// <summary>
         /// Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
-        /// provided this analyzer will add a <seealso cref="SetKeywordMarkerFilter"/> before
+        /// provided this analyzer will add a <see cref="SetKeywordMarkerFilter"/> before
         /// stemming.
         /// </summary>
         /// <param name="matchVersion"> lucene compatibility version </param>
@@ -123,15 +120,15 @@ namespace Lucene.Net.Analysis.Ca
 
         /// <summary>
         /// Creates a
-        /// <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
-        /// which tokenizes all the text in the provided <seealso cref="Reader"/>.
+        /// <see cref="Analyzer.TokenStreamComponents"/>
+        /// which tokenizes all the text in the provided <see cref="TextReader"/>.
         /// </summary>
         /// <returns> A
-        ///         <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
+        ///         <see cref="Analyzer.TokenStreamComponents"/>
         ///         built from an <seealso cref="StandardTokenizer"/> filtered with
-        ///         <seealso cref="StandardFilter"/>, <seealso cref="ElisionFilter"/>, <seealso cref="LowerCaseFilter"/>, 
-        ///         <seealso cref="StopFilter"/>, <seealso cref="SetKeywordMarkerFilter"/> if a stem exclusion set is
-        ///         provided and <seealso cref="SnowballFilter"/>. </returns>
+        ///         <see cref="StandardFilter"/>, <see cref="ElisionFilter"/>, <see cref="LowerCaseFilter"/>, 
+        ///         <see cref="StopFilter"/>, <see cref="SetKeywordMarkerFilter"/> if a stem exclusion set is
+        ///         provided and <see cref="SnowballFilter"/>. </returns>
         protected override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
         {
             Tokenizer source = new StandardTokenizer(m_matchVersion, reader);


[5/6] lucenenet git commit: Lucene.Net.Analysis.CharFilter refactor: member accessibility and documentation comments

Posted by ni...@apache.org.
Lucene.Net.Analysis.CharFilter refactor: member accessibility and documentation comments


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/7fdbd66d
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/7fdbd66d
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/7fdbd66d

Branch: refs/heads/api-work
Commit: 7fdbd66dc5f2b590dd27e543a2644966cba2a40b
Parents: 5d0d43f
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Thu Feb 2 21:25:07 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Thu Feb 2 21:25:07 2017 +0700

----------------------------------------------------------------------
 .../Analysis/CharFilter/BaseCharFilter.cs       |  10 +-
 .../Analysis/CharFilter/HTMLStripCharFilter.cs  | 329 +++++++++----------
 .../CharFilter/HTMLStripCharFilterFactory.cs    |  13 +-
 .../Analysis/CharFilter/MappingCharFilter.cs    |  20 +-
 .../CharFilter/MappingCharFilterFactory.cs      |  17 +-
 .../Analysis/CharFilter/NormalizeCharMap.cs     |   9 +-
 6 files changed, 194 insertions(+), 204 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/7fdbd66d/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/BaseCharFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/BaseCharFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/BaseCharFilter.cs
index 4ae7af5..ed3d61b 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/BaseCharFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/BaseCharFilter.cs
@@ -23,9 +23,9 @@ namespace Lucene.Net.Analysis.CharFilters
      */
 
     /// <summary>
-    /// Base utility class for implementing a <seealso cref="CharFilter"/>.
+    /// Base utility class for implementing a <see cref="CharFilter"/>.
     /// You subclass this, and then record mappings by calling
-    /// <seealso cref="#addOffCorrectMap"/>, and then invoke the correct
+    /// <see cref="AddOffCorrectMap"/>, and then invoke the correct
     /// method to correct an offset.
     /// </summary>
     public abstract class BaseCharFilter : CharFilter
@@ -35,7 +35,7 @@ namespace Lucene.Net.Analysis.CharFilters
         private int[] diffs;
         private int size = 0;
 
-        protected BaseCharFilter(TextReader @in)
+        public BaseCharFilter(TextReader @in)
             : base(@in)
         {
         }
@@ -85,7 +85,7 @@ namespace Lucene.Net.Analysis.CharFilters
             }
         }
 
-        protected internal virtual int LastCumulativeDiff
+        protected virtual int LastCumulativeDiff
         {
             get
             {
@@ -105,7 +105,7 @@ namespace Lucene.Net.Analysis.CharFilters
         /// <param name="off"> The output stream offset at which to apply the correction </param>
         /// <param name="cumulativeDiff"> The input offset is given by adding this
         ///                       to the output offset </param>
-        protected internal virtual void AddOffCorrectMap(int off, int cumulativeDiff)
+        protected virtual void AddOffCorrectMap(int off, int cumulativeDiff)
         {
             if (offsets == null)
             {

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/7fdbd66d/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/HTMLStripCharFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/HTMLStripCharFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/HTMLStripCharFilter.cs
index 6fd8bad..d60080e 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/HTMLStripCharFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/HTMLStripCharFilter.cs
@@ -27,17 +27,17 @@ namespace Lucene.Net.Analysis.CharFilters
 	 */
 
     /// <summary>
-    /// A CharFilter that wraps another TextReader and attempts to strip out HTML constructs.
+    /// A <see cref="CharFilter"/> that wraps another <see cref="TextReader"/> and attempts to strip out HTML constructs.
     /// </summary>
-    public class HTMLStripCharFilter : BaseCharFilter
+    public sealed class HTMLStripCharFilter : BaseCharFilter
     {
-        /** This character denotes the end of file */
+        /// <summary>This character denotes the end of file</summary>
         private const int YYEOF = -1;
 
-        /** initial size of the lookahead buffer */
+        /// <summary>initial size of the lookahead buffer</summary>
         private const int ZZ_BUFFERSIZE = 16384;
 
-        /** lexical states */
+        // lexical states
         private const int YYINITIAL = 0;
         private const int AMPERSAND = 2;
         private const int NUMERIC_CHARACTER = 4;
@@ -62,21 +62,21 @@ namespace Lucene.Net.Analysis.CharFilters
         private const int STYLE = 42;
         private const int STYLE_COMMENT = 44;
 
-        /**
-         * ZZ_LEXSTATE[l] is the state in the DFA for the lexical state l
-         * ZZ_LEXSTATE[l+1] is the state in the DFA for the lexical state l
-         *                  at the beginning of a line
-         * l is of the form l = 2*k, k a non negative integer
-         */
+        /// <summary>
+        /// ZZ_LEXSTATE[l] is the state in the DFA for the lexical state l
+        /// ZZ_LEXSTATE[l+1] is the state in the DFA for the lexical state l
+        ///     at the beginning of a line
+        /// l is of the form l = 2*k, k a non negative integer
+        /// </summary>
         private static readonly int[] ZZ_LEXSTATE = {
             0,  0,  1,  1,  2,  2,  3,  3,  4,  4,  5,  5,  6,  6,  7,  7,
             8,  8,  9,  9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15,
             16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22
         };
 
-        /** 
-         * Translates characters to character classes
-         */
+        /// <summary>
+        /// Translates characters to character classes
+        /// </summary>
         private const string ZZ_CMAP_PACKED =
             "\x0009\x0000\x0005\x0021\x0012\x0000\x0001\x0021\x0001\x001D\x0001\x001B\x0001\x001F\x0002\x0000\x0001\x00B6\x0001\x0019" +
             "\x0005\x0000\x0001\x001E\x0001\x0002\x0001\x00BE\x0001\x00B8\x0001\x003C\x0001\x003D\x0001\x003F\x0001\x003E\x0001\x00BA" +
@@ -2230,9 +2230,9 @@ namespace Lucene.Net.Analysis.CharFilters
             return j;
         }
 
-        /** 
-         * The transition table of the DFA
-         */
+        /// <summary>
+        /// The transition table of the DFA
+        /// </summary>
         private static readonly int[] ZZ_TRANS = ZzUnpackTrans();
 
         private const string ZZ_TRANS_PACKED_0 =
@@ -30492,21 +30492,21 @@ namespace Lucene.Net.Analysis.CharFilters
         }
 
 
-        /* error codes */
+        /// <summary>error codes</summary>
         private static readonly int ZZ_UNKNOWN_ERROR = 0;
         private static readonly int ZZ_NO_MATCH = 1;
         private static readonly int ZZ_PUSHBACK_2BIG = 2;
 
-        /* error messages for the codes above */
+        /// <summary>error messages for the codes above</summary>
         private static readonly string[] ZZ_ERROR_MSG = {
             "Unkown internal scanner error",
             "Error: could not match input",
             "Error: pushback value was too large"
         };
 
-        /**
-         * ZZ_ATTRIBUTE[aState] contains the attributes of state <code>aState</code>
-         */
+        /// <summary>
+        /// ZZ_ATTRIBUTE[aState] contains the attributes of state <c>aState</c>
+        /// </summary>
         private static readonly int[] ZZ_ATTRIBUTE = ZzUnpackAttribute();
 
         private const string ZZ_ATTRIBUTE_PACKED_0 =
@@ -30618,59 +30618,62 @@ namespace Lucene.Net.Analysis.CharFilters
             return j;
         }
 
-        /** the input device */
+        /// <summary>the input device</summary>
         private BufferedCharFilter zzReader;
 
-        /** the current state of the DFA */
+        /// <summary>the current state of the DFA</summary>
         private int zzState;
 
-        /** the current lexical state */
+        /// <summary>the current lexical state</summary>
         private int zzLexicalState = YYINITIAL;
 
-        /** this buffer contains the current text to be matched and is
-            the source of the YyText() string */
+        /// <summary>
+        /// this buffer contains the current text to be matched and is the source of the YyText() string
+        /// </summary>
         private char[] zzBuffer = new char[ZZ_BUFFERSIZE];
 
-        /** the textposition at the last accepting state */
+        /// <summary>the textposition at the last accepting state</summary>
         private int zzMarkedPos;
 
-        /** the current text position in the buffer */
+        /// <summary>the current text position in the buffer</summary>
         private int zzCurrentPos;
 
-        /** startRead marks the beginning of the YyText() string in the buffer */
+        /// <summary>startRead marks the beginning of the YyText() string in the buffer</summary>
         private int zzStartRead;
 
-        /** endRead marks the last character in the buffer, that has been read
-            from input */
+        /// <summary>
+        /// endRead marks the last character in the buffer, that has been read from input
+        /// </summary>
         private int zzEndRead;
 
-        /** number of newlines encountered up to the start of the matched text */
+        /// <summary>number of newlines encountered up to the start of the matched text</summary>
         private int yyline;
 
-        /** the number of characters up to the start of the matched text */
+        /// <summary>the number of characters up to the start of the matched text</summary>
         private int yychar;
 
 #pragma warning disable 169, 414
-        /**
-         * the number of characters from the last newline up to the start of the 
-         * matched text
-         */
+        /// <summary>
+        /// the number of characters from the last newline up to the start of the matched text
+        /// </summary>
         private int yycolumn;
 
-        /** 
-         * zzAtBOL == true <=> the scanner is currently at the beginning of a line
-         */
+        /// <summary>
+        /// zzAtBOL == true <=> the scanner is currently at the beginning of a line
+        /// </summary>
         private bool zzAtBOL = true;
 
 #pragma warning restore 169, 414
 
-        /** zzAtEOF == true <=> the scanner is at the EOF */
+        /// <summary>zzAtEOF == true <=> the scanner is at the EOF</summary>
         private bool zzAtEOF;
 
-        /** denotes if the user-EOF-code has already been executed */
+        /// <summary>denotes if the user-EOF-code has already been executed</summary>
         private bool zzEOFDone;
 
-        /* user code: */
+        /// <summary>
+        /// user code:
+        /// </summary>
         private static readonly IDictionary<string, string> upperCaseVariantsAccepted
             = new Dictionary<string, string>()
             {
@@ -30798,10 +30801,10 @@ namespace Lucene.Net.Analysis.CharFilters
         private TextSegment outputSegment;
         private TextSegment entitySegment = new TextSegment(2);
 
-        /**
-         * Creates a new HTMLStripCharFilter over the provided TextReader.
-         * @param source SetReader to strip html tags from.
-         */
+        /// <summary>
+        /// Creates a new HTMLStripCharFilter over the provided TextReader.
+        /// </summary>
+        /// <param name="source"><see cref="TextReader"/> to strip html tags from.</param>
         public HTMLStripCharFilter(TextReader source)
                     : base(source)
         {
@@ -30809,13 +30812,12 @@ namespace Lucene.Net.Analysis.CharFilters
             this.zzReader = GetBufferedReader(source);
         }
 
-        /**
-         * Creates a new HTMLStripCharFilter over the provided TextReader
-         * with the specified start and end tags.
-         * @param source SetReader to strip html tags from.
-         * @param escapedTags Tags in this set (both start and end tags)
-         *  will not be filtered out.
-         */
+        /// <summary>
+        /// Creates a new <see cref="HTMLStripCharFilter"/> over the provided <see cref="TextReader"/>
+        /// with the specified start and end tags.
+        /// </summary>
+        /// <param name="source"><see cref="TextReader"/> to strip html tags from.</param>
+        /// <param name="escapedTags">Tags in this set (both start and end tags) will not be filtered out.</param>
         public HTMLStripCharFilter(TextReader source, ICollection<string> escapedTags)
                     : base(source)
         {
@@ -30910,54 +30912,63 @@ namespace Lucene.Net.Analysis.CharFilters
 
         private class TextSegment : OpenStringBuilder
         {
-            /** The position from which the next char will be read. */
+            /// <summary>
+            /// The position from which the next char will be read.
+            /// </summary>
             int pos = 0;
 
-            /** Wraps the given buffer and sets this.len to the given length. */
+            /// <summary>
+            /// Wraps the given <paramref name="buffer"/> and sets this.len to the given <paramref name="length"/>.
+            /// </summary>
             internal TextSegment(char[] buffer, int length) : base(buffer, length)
             { }
 
-            /** Allocates an internal buffer of the given size. */
+            /// <summary>
+            /// Allocates an internal buffer of the given size.
+            /// </summary>
             internal TextSegment(int size) : base(size)
             { }
 
-            /** Sets len = 0 and pos = 0. */
+            /// <summary>
+            /// Sets len = 0 and pos = 0.
+            /// </summary>
             internal void Clear()
             {
                 Reset();
                 Restart();
             }
 
-            /** Sets pos = 0 */
+            /// <summary>
+            /// Sets pos = 0
+            /// </summary>
             internal void Restart()
             {
                 pos = 0;
             }
 
-            /** Returns the next char in the segment. */
+            /// <summary>
+            /// Returns the next char in the segment.
+            /// </summary>
             internal int NextChar()
             {
                 Debug.Assert(!IsRead, "Attempting to read past the end of a segment.");
                 return m_buf[pos++];
             }
 
-            /** Returns true when all characters in the text segment have been read */
+            /// <summary>
+            /// Returns true when all characters in the text segment have been read
+            /// </summary>
             internal bool IsRead
             {
                 get { return pos >= m_len; }
             }
         }
 
-
-
-
-
-        /** 
-         * Unpacks the compressed character translation table.
-         *
-         * @param packed   the packed character translation table
-         * @return         the unpacked character translation table
-         */
+        /// <summary>
+        /// Unpacks the compressed character translation table.
+        /// </summary>
+        /// <param name="packed">the packed character translation table</param>
+        /// <returns>the unpacked character translation table</returns>
         private static char[] ZzUnpackCMap(string packed)
         {
             char[] map = new char[0x10000];
@@ -30972,14 +30983,11 @@ namespace Lucene.Net.Analysis.CharFilters
             return map;
         }
 
-
-        /**
-         * Refills the input buffer.
-         *
-         * @return      <code>false</code>, iff there was new input.
-         * 
-         * @exception   java.io.IOException  if any I/O-Error occurs
-         */
+        /// <summary>
+        /// Refills the input buffer.
+        /// </summary>
+        /// <returns><c>false</c>, iff there was new input.</returns>
+        /// <exception cref="IOException">if any I/O-Error occurs</exception>
         private bool ZzRefill()
         {
 
@@ -31035,9 +31043,9 @@ namespace Lucene.Net.Analysis.CharFilters
         }
 
 
-        /**
-         * Closes the input stream.
-         */
+        /// <summary>
+        /// Disposes the input stream.
+        /// </summary>
         private void YyClose()
         {
             zzAtEOF = true;            /* indicate end of file */
@@ -31047,19 +31055,17 @@ namespace Lucene.Net.Analysis.CharFilters
                 zzReader.Dispose();
         }
 
-
-        /**
-         * Resets the scanner to read from a new input stream.
-         * Does not close the old reader.
-         *
-         * All internal variables are reset, the old input stream 
-         * <b>cannot</b> be reused (internal buffer is discarded and lost).
-         * Lexical state is set to <tt>ZZ_INITIAL</tt>.
-         *
-         * Internal scan buffer is resized down to its initial length, if it has grown.
-         *
-         * @param reader   the new input stream 
-         */
+        /// <summary>
+        /// Resets the scanner to read from a new input stream.
+        /// Does not close the old reader.
+        /// <para/>
+        /// All internal variables are reset, the old input stream
+        /// <b>cannot</b> be reused (internal buffer is discarded and lost).
+        /// Lexical state is set to <see cref="ZZ_INITIAL"/>.
+        /// <para/>
+        /// Internal scan buffer is resized down to its initial length, if it has grown.
+        /// </summary>
+        /// <param name="reader">the new input stream</param>
         private void YyReset(BufferedCharFilter reader)
         {
             zzReader = reader;
@@ -31075,75 +31081,66 @@ namespace Lucene.Net.Analysis.CharFilters
         }
 
 
-        /**
-         * Returns the current lexical state.
-         */
+        /// <summary>
+        /// Returns the current lexical state.
+        /// </summary>
         private int YyState
         {
             get { return zzLexicalState; }
         }
 
-
-        /**
-         * Enters a new lexical state
-         *
-         * @param newState the new lexical state
-         */
+        /// <summary>
+        /// Enters a new lexical state
+        /// </summary>
+        /// <param name="newState">the new lexical state</param>
         private void YyBegin(int newState)
         {
             zzLexicalState = newState;
         }
 
 
-        /**
-         * Returns the text matched by the current regular expression.
-         */
-        private string YyText
+        /// <summary>
+        /// Returns the text matched by the current regular expression.
+        /// </summary>
+        /// <returns>Returns the text matched by the current regular expression.</returns>
+        private string YyText()
         {
-            get { return new string(zzBuffer, zzStartRead, zzMarkedPos - zzStartRead); }
+            return new string(zzBuffer, zzStartRead, zzMarkedPos - zzStartRead);
         }
 
-
-        /**
-         * Returns the character at position <tt>pos</tt> from the 
-         * matched text. 
-         * 
-         * It is equivalent to YyText[pos], but faster
-         *
-         * @param pos the position of the character to fetch. 
-         *            A value from 0 to YyLength()-1.
-         *
-         * @return the character at position pos
-         */
+        /// <summary>
+        /// Returns the character at position <tt>pos</tt> from the 
+        /// matched text. It is equivalent to YyText[pos], but faster
+        /// </summary>
+        /// <param name="pos">the position of the character to fetch. A value from 0 to YyLength()-1.</param>
+        /// <returns>the character at position pos</returns>
         private char YyCharAt(int pos)
         {
             return zzBuffer[zzStartRead + pos];
         }
 
 
-        /**
-         * Returns the length of the matched text region.
-         */
+        /// <summary>
+        /// Returns the length of the matched text region.
+        /// </summary>
         private int YyLength
         {
             get { return zzMarkedPos - zzStartRead; }
         }
 
-
-        /**
-         * Reports an error that occured while scanning.
-         *
-         * In a wellformed scanner (no or only correct usage of 
-         * YyPushBack(int) and a match-all fallback rule) this method 
-         * will only be called with things that "Can't Possibly Happen".
-         * If this method is called, something is seriously wrong
-         * (e.g. a JFlex bug producing a faulty scanner etc.).
-         *
-         * Usual syntax/scanner level error handling should be done
-         * in error fallback rules.
-         *
-         * @param   errorCode  the code of the errormessage to display
-         */
+        /// <summary>
+        /// Reports an error that occured while scanning.
+        /// <para/>
+        /// In a wellformed scanner (no or only correct usage of
+        /// YyPushBack(int) and a match-all fallback rule) this method 
+        /// will only be called with things that "Can't Possibly Happen".
+        /// If this method is called, something is seriously wrong
+        /// (e.g. a JFlex bug producing a faulty scanner etc.).
+        /// <para/>
+        /// Usual syntax/scanner level error handling should be done
+        /// in error fallback rules.
+        /// </summary>
+        /// <param name="errorCode">the code of the errormessage to display</param>
         private void ZzScanError(int errorCode)
         {
             string message;
@@ -31159,15 +31156,13 @@ namespace Lucene.Net.Analysis.CharFilters
             throw new Exception(message);
         }
 
-
-        /**
-         * Pushes the specified amount of characters back into the input stream.
-         *
-         * They will be read again by then next call of the scanning method
-         *
-         * @param number  the number of characters to be read again.
-         *                This number must not be greater than YyLength()!
-         */
+        /// <summary>
+        /// Pushes the specified amount of characters back into the input stream.
+        /// 
+        /// They will be read again by then next call of the scanning method
+        /// </summary>
+        /// <param name="number">the number of characters to be read again.
+        /// This number must not be greater than YyLength()!</param>
         private void YyPushBack(int number)
         {
             if (number > YyLength)
@@ -31177,10 +31172,10 @@ namespace Lucene.Net.Analysis.CharFilters
         }
 
 
-        /**
-         * Contains user EOF-code, which will be executed exactly once,
-         * when the end of file is reached
-         */
+        /// <summary>
+        /// Contains user EOF-code, which will be executed exactly once,
+        /// when the end of file is reached
+        /// </summary>
         private void ZzDoEOF()
         {
             if (!zzEOFDone)
@@ -31243,14 +31238,12 @@ namespace Lucene.Net.Analysis.CharFilters
             }
         }
 
-
-        /**
-         * Resumes scanning until the next regular expression is matched,
-         * the end of input is encountered or an I/O-Error occurs.
-         *
-         * @return      the next token
-         * @exception   java.io.IOException  if any I/O-Error occurs
-         */
+        /// <summary>
+        /// Resumes scanning until the next regular expression is matched,
+        /// the end of input is encountered or an I/O-Error occurs.
+        /// </summary>
+        /// <returns>the next token</returns>
+        /// <exception cref="IOException">if any I/O-Error occurs</exception>
         private int NextChar()
         {
             int zzInput;
@@ -31384,7 +31377,7 @@ namespace Lucene.Net.Analysis.CharFilters
                             inputSegment.Write(zzBuffer, zzStartRead, matchLength);
                             if (matchLength <= 7)
                             { // 0x10FFFF = 1114111: max 7 decimal chars
-                                string decimalCharRef = YyText;
+                                string decimalCharRef = YyText();
                                 int codePoint = 0;
                                 try
                                 {
@@ -31689,7 +31682,7 @@ namespace Lucene.Net.Analysis.CharFilters
                         {
                             if (inputSegment.Length > 2)
                             { // Chars between "<!" and "--" - this is not a comment
-                                inputSegment.Append(YyText);
+                                inputSegment.Append(YyText());
                             }
                             else
                             {
@@ -31835,7 +31828,7 @@ namespace Lucene.Net.Analysis.CharFilters
                         {
                             if (inputSegment.Length > 2)
                             { // Chars between "<!" and "[CDATA[" - this is not a CDATA section
-                                inputSegment.Append(YyText);
+                                inputSegment.Append(YyText());
                             }
                             else
                             {
@@ -31907,7 +31900,7 @@ namespace Lucene.Net.Analysis.CharFilters
                         { // Handle paired UTF-16 surrogates.
                             outputSegment = entitySegment;
                             outputSegment.Clear();
-                            string surrogatePair = YyText;
+                            string surrogatePair = YyText();
                             char highSurrogate = '\u0000';
                             try
                             {
@@ -31937,7 +31930,7 @@ namespace Lucene.Net.Analysis.CharFilters
                     case 103: break;
                     case 51:
                         { // Handle paired UTF-16 surrogates.
-                            string surrogatePair = YyText;
+                            string surrogatePair = YyText();
                             char highSurrogate = '\u0000';
                             char lowSurrogate = '\u0000';
                             try
@@ -31979,7 +31972,7 @@ namespace Lucene.Net.Analysis.CharFilters
                     case 104: break;
                     case 52:
                         { // Handle paired UTF-16 surrogates.
-                            string surrogatePair = YyText;
+                            string surrogatePair = YyText();
                             char highSurrogate = '\u0000';
                             try
                             { // High surrogates are in decimal range [55296, 56319]
@@ -32019,7 +32012,7 @@ namespace Lucene.Net.Analysis.CharFilters
                     case 105: break;
                     case 53:
                         { // Handle paired UTF-16 surrogates.
-                            string surrogatePair = YyText;
+                            string surrogatePair = YyText();
                             char highSurrogate = '\u0000';
                             try
                             { // High surrogates are in decimal range [55296, 56319]

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/7fdbd66d/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/HTMLStripCharFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/HTMLStripCharFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/HTMLStripCharFilterFactory.cs
index 7a9ce24..53a01a9 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/HTMLStripCharFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/HTMLStripCharFilterFactory.cs
@@ -23,22 +23,23 @@ namespace Lucene.Net.Analysis.CharFilters
 	 */
 
     /// <summary>
-    /// Factory for <seealso cref="HTMLStripCharFilter"/>. 
-    /// <pre class="prettyprint">
+    /// Factory for <see cref="HTMLStripCharFilter"/>. 
+    /// <code>
     /// &lt;fieldType name="text_html" class="solr.TextField" positionIncrementGap="100"&gt;
     ///   &lt;analyzer&gt;
     ///     &lt;charFilter class="solr.HTMLStripCharFilterFactory" escapedTags="a, title" /&gt;
     ///     &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
     ///   &lt;/analyzer&gt;
-    /// &lt;/fieldType&gt;</pre>
+    /// &lt;/fieldType&gt;
+    /// </code>
     /// </summary>
     public class HTMLStripCharFilterFactory : CharFilterFactory
     {
-        internal readonly ICollection<string> escapedTags;
-        internal static readonly Regex TAG_NAME_PATTERN = new Regex(@"[^\\s,]+", RegexOptions.Compiled);
+        private readonly ICollection<string> escapedTags;
+        private static readonly Regex TAG_NAME_PATTERN = new Regex(@"[^\\s,]+", RegexOptions.Compiled);
 
         /// <summary>
-        /// Creates a new HTMLStripCharFilterFactory </summary>
+        /// Creates a new <see cref="HTMLStripCharFilterFactory"/> </summary>
         public HTMLStripCharFilterFactory(IDictionary<string, string> args) : base(args)
         {
             escapedTags = GetSet(args, "escapedTags");

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/7fdbd66d/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/MappingCharFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/MappingCharFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/MappingCharFilter.cs
index 05b7469..08ac354 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/MappingCharFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/MappingCharFilter.cs
@@ -26,8 +26,8 @@ namespace Lucene.Net.Analysis.CharFilters
 	 */
 
     /// <summary>
-    /// Simplistic <seealso cref="CharFilter"/> that applies the mappings
-    /// contained in a <seealso cref="NormalizeCharMap"/> to the character
+    /// Simplistic <see cref="CharFilter"/> that applies the mappings
+    /// contained in a <see cref="NormalizeCharMap"/> to the character
     /// stream, and correcting the resulting changes to the
     /// offsets.  Matching is greedy (longest pattern matching at
     /// a given point wins).  Replacement is allowed to be the
@@ -35,7 +35,6 @@ namespace Lucene.Net.Analysis.CharFilters
     /// </summary>
     public class MappingCharFilter : BaseCharFilter
     {
-
         private readonly Outputs<CharsRef> outputs = CharSequenceOutputs.Singleton;
         private readonly FST<CharsRef> map;
         private readonly FST.BytesReader fstReader;
@@ -48,13 +47,14 @@ namespace Lucene.Net.Analysis.CharFilters
         private int inputOff;
 
         /// <summary>
-        /// LUCENENET support to buffer the reader.
+        /// LUCENENET specific support to buffer the reader.
         /// </summary>
-        private BufferedCharFilter _input;
+        private readonly BufferedCharFilter _input;
 
         /// <summary>
-        /// Default constructor that takes a <seealso cref="TextReader"/>. </summary>
-        public MappingCharFilter(NormalizeCharMap normMap, TextReader @in) : base(@in)
+        /// Default constructor that takes a <see cref="TextReader"/>. </summary>
+        public MappingCharFilter(NormalizeCharMap normMap, TextReader @in) 
+            : base(@in)
         {
             //LUCENENET support to reset the reader.
             _input = GetBufferedReader(@in);
@@ -76,10 +76,10 @@ namespace Lucene.Net.Analysis.CharFilters
         }
 
         /// <summary>
-        /// LUCENENET: Copied this method from the WordlistLoader class - this class requires readers
-        /// with a Reset() method (which .NET readers don't support). So, we use the BufferedCharFilter 
+        /// LUCENENET: Copied this method from the <see cref="WordlistLoader"/> class - this class requires readers
+        /// with a Reset() method (which .NET readers don't support). So, we use the <see cref="BufferedCharFilter"/> 
         /// (which is similar to Java BufferedReader) as a wrapper for whatever reader the user passes 
-        /// (unless it is already a BufferedCharFilter).
+        /// (unless it is already a <see cref="BufferedCharFilter"/>).
         /// </summary>
         /// <param name="reader"></param>
         /// <returns></returns>

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/7fdbd66d/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/MappingCharFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/MappingCharFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/MappingCharFilterFactory.cs
index 84878a5..dd12acf 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/MappingCharFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/MappingCharFilterFactory.cs
@@ -24,25 +24,25 @@ namespace Lucene.Net.Analysis.CharFilters
 	 */
 
     /// <summary>
-    /// Factory for <seealso cref="MappingCharFilter"/>. 
-    /// <pre class="prettyprint">
+    /// Factory for <see cref="MappingCharFilter"/>. 
+    /// <code>
     /// &lt;fieldType name="text_map" class="solr.TextField" positionIncrementGap="100"&gt;
     ///   &lt;analyzer&gt;
     ///     &lt;charFilter class="solr.MappingCharFilterFactory" mapping="mapping.txt"/&gt;
     ///     &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
     ///   &lt;/analyzer&gt;
-    /// &lt;/fieldType&gt;</pre>
+    /// &lt;/fieldType&gt;
+    /// </code>
     /// 
     /// @since Solr 1.4
     /// </summary>
     public class MappingCharFilterFactory : CharFilterFactory, IResourceLoaderAware, IMultiTermAwareComponent
     {
-
         protected internal NormalizeCharMap m_normMap;
         private readonly string mapping;
 
         /// <summary>
-        /// Creates a new MappingCharFilterFactory </summary>
+        /// Creates a new <see cref="MappingCharFilterFactory"/> </summary>
         public MappingCharFilterFactory(IDictionary<string, string> args) : base(args)
         {
             mapping = Get(args, "mapping");
@@ -92,10 +92,9 @@ namespace Lucene.Net.Analysis.CharFilters
         }
 
         // "source" => "target"
-        //internal static Pattern p = Pattern.compile("\"(.*)\"\\s*=>\\s*\"(.*)\"\\s*$");
-        internal static Regex p = new Regex(@"\""(.*)\""\\s*=>\\s*\""(.*)\""\\s*$", RegexOptions.Compiled);
+        private static Regex p = new Regex(@"\""(.*)\""\\s*=>\\s*\""(.*)\""\\s*$", RegexOptions.Compiled);
 
-        protected internal virtual void ParseRules(IList<string> rules, NormalizeCharMap.Builder builder)
+        protected virtual void ParseRules(IList<string> rules, NormalizeCharMap.Builder builder)
         {
             foreach (string rule in rules)
             {
@@ -108,7 +107,7 @@ namespace Lucene.Net.Analysis.CharFilters
             }
         }
 
-        internal char[] @out = new char[256];
+        private char[] @out = new char[256];
 
         protected internal virtual string ParseString(string s)
         {

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/7fdbd66d/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/NormalizeCharMap.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/NormalizeCharMap.cs b/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/NormalizeCharMap.cs
index de0c0d0..bcb031a 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/NormalizeCharMap.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/NormalizeCharMap.cs
@@ -27,13 +27,12 @@ namespace Lucene.Net.Analysis.CharFilters
     // TODO: save/load?
 
     /// <summary>
-    /// Holds a map of String input to String output, to be used
-    /// with <seealso cref="Builder"/>.  Use the <seealso cref="MappingCharFilter"/>
+    /// Holds a map of <see cref="string"/> input to <see cref="string"/> output, to be used
+    /// with <see cref="Builder"/>.  Use the <see cref="MappingCharFilter"/>
     /// to create this.
     /// </summary>
     public class NormalizeCharMap
     {
-
         internal readonly FST<CharsRef> map;
         internal readonly IDictionary<char?, FST.Arc<CharsRef>> cachedRootArcs = new Dictionary<char?, FST.Arc<CharsRef>>();
 
@@ -82,8 +81,7 @@ namespace Lucene.Net.Analysis.CharFilters
         /// </summary>
         public class Builder
         {
-
-            internal readonly IDictionary<string, string> pendingPairs = new SortedDictionary<string, string>();
+            private readonly IDictionary<string, string> pendingPairs = new SortedDictionary<string, string>();
 
             /// <summary>
             /// Records a replacement to be applied to the input
@@ -115,7 +113,6 @@ namespace Lucene.Net.Analysis.CharFilters
             /// </summary>
             public virtual NormalizeCharMap Build()
             {
-
                 FST<CharsRef> map;
                 try
                 {


[2/6] lucenenet git commit: Lucene.Net.Analysis.Bg refactor: member accessibility and documentation comments

Posted by ni...@apache.org.
Lucene.Net.Analysis.Bg refactor: member accessibility and documentation comments


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/1b0bca68
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/1b0bca68
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/1b0bca68

Branch: refs/heads/api-work
Commit: 1b0bca68b88838741866ad7ab782206ccb66518b
Parents: 2878664
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Thu Feb 2 20:16:24 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Thu Feb 2 20:16:24 2017 +0700

----------------------------------------------------------------------
 .../Analysis/Bg/BulgarianAnalyzer.cs            | 24 +++++++++-----------
 .../Analysis/Bg/BulgarianStemFilter.cs          |  6 ++---
 .../Analysis/Bg/BulgarianStemFilterFactory.cs   |  9 ++++----
 .../Analysis/Bg/BulgarianStemmer.cs             |  4 ++--
 4 files changed, 21 insertions(+), 22 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1b0bca68/src/Lucene.Net.Analysis.Common/Analysis/Bg/BulgarianAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Bg/BulgarianAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Bg/BulgarianAnalyzer.cs
index efa9eca..cab90fa 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Bg/BulgarianAnalyzer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Bg/BulgarianAnalyzer.cs
@@ -26,14 +26,12 @@ namespace Lucene.Net.Analysis.Bg
 	 */
 
     /// <summary>
-    /// <seealso cref="Analyzer"/> for Bulgarian.
+    /// <see cref="Analyzer"/> for Bulgarian.
     /// <para>
     /// This analyzer implements light-stemming as specified by: <i> Searching
     /// Strategies for the Bulgarian Language </i>
     /// http://members.unine.ch/jacques.savoy/Papers/BUIR.pdf
     /// </para>
-    /// <para>
-    /// </para>
     /// </summary>
     public sealed class BulgarianAnalyzer : StopwordAnalyzerBase
     {
@@ -86,7 +84,7 @@ namespace Lucene.Net.Analysis.Bg
 
         /// <summary>
         /// Builds an analyzer with the default stop words:
-        /// <seealso cref="#DEFAULT_STOPWORD_FILE"/>.
+        /// <see cref="DEFAULT_STOPWORD_FILE"/>.
         /// </summary>
         public BulgarianAnalyzer(LuceneVersion matchVersion)
               : this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET)
@@ -103,8 +101,8 @@ namespace Lucene.Net.Analysis.Bg
 
         /// <summary>
         /// Builds an analyzer with the given stop words and a stem exclusion set.
-        /// If a stem exclusion set is provided this analyzer will add a <seealso cref="SetKeywordMarkerFilter"/> 
-        /// before <seealso cref="BulgarianStemFilter"/>.
+        /// If a stem exclusion set is provided this analyzer will add a <see cref="SetKeywordMarkerFilter"/> 
+        /// before <see cref="BulgarianStemFilter"/>.
         /// </summary>
         public BulgarianAnalyzer(LuceneVersion matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet)
               : base(matchVersion, stopwords)
@@ -114,15 +112,15 @@ namespace Lucene.Net.Analysis.Bg
 
         /// <summary>
         /// Creates a
-        /// <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
-        /// which tokenizes all the text in the provided <seealso cref="Reader"/>.
+        /// <see cref="Analyzer.TokenStreamComponents"/>
+        /// which tokenizes all the text in the provided <see cref="TextReader"/>.
         /// </summary>
         /// <returns> A
-        ///         <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
-        ///         built from an <seealso cref="StandardTokenizer"/> filtered with
-        ///         <seealso cref="StandardFilter"/>, <seealso cref="LowerCaseFilter"/>, <seealso cref="StopFilter"/>
-        ///         , <seealso cref="SetKeywordMarkerFilter"/> if a stem exclusion set is
-        ///         provided and <seealso cref="BulgarianStemFilter"/>. </returns>
+        ///         <see cref="Analyzer.TokenStreamComponents"/>
+        ///         built from an <see cref="StandardTokenizer"/> filtered with
+        ///         <see cref="StandardFilter"/>, <see cref="LowerCaseFilter"/>, <see cref="StopFilter"/>, 
+        ///         <see cref="SetKeywordMarkerFilter"/> if a stem exclusion set is
+        ///         provided and <see cref="BulgarianStemFilter"/>. </returns>
         protected override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
         {
             Tokenizer source = new StandardTokenizer(m_matchVersion, reader);

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1b0bca68/src/Lucene.Net.Analysis.Common/Analysis/Bg/BulgarianStemFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Bg/BulgarianStemFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Bg/BulgarianStemFilter.cs
index 42dff08..beeef3b 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Bg/BulgarianStemFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Bg/BulgarianStemFilter.cs
@@ -20,12 +20,12 @@ namespace Lucene.Net.Analysis.Bg
 	 */
 
     /// <summary>
-    /// A <seealso cref="TokenFilter"/> that applies <seealso cref="BulgarianStemmer"/> to stem Bulgarian
+    /// A <see cref="TokenFilter"/> that applies <see cref="BulgarianStemmer"/> to stem Bulgarian
     /// words.
     /// <para>
     /// To prevent terms from being stemmed use an instance of
-    /// <seealso cref="SetKeywordMarkerFilter"/> or a custom <seealso cref="TokenFilter"/> that sets
-    /// the <seealso cref="KeywordAttribute"/> before this <seealso cref="TokenStream"/>.
+    /// <see cref="Miscellaneous.SetKeywordMarkerFilter"/> or a custom <see cref="TokenFilter"/> that sets
+    /// the <see cref="KeywordAttribute"/> before this <see cref="TokenStream"/>.
     /// </para>
     /// </summary>
     public sealed class BulgarianStemFilter : TokenFilter

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1b0bca68/src/Lucene.Net.Analysis.Common/Analysis/Bg/BulgarianStemFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Bg/BulgarianStemFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Bg/BulgarianStemFilterFactory.cs
index 1ba70d9..ab47af6 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Bg/BulgarianStemFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Bg/BulgarianStemFilterFactory.cs
@@ -21,21 +21,22 @@ namespace Lucene.Net.Analysis.Bg
 	 */
 
     /// <summary>
-    /// Factory for <seealso cref="BulgarianStemFilter"/>.
-    /// <pre class="prettyprint">
+    /// Factory for <see cref="BulgarianStemFilter"/>.
+    /// <code>
     /// &lt;fieldType name="text_bgstem" class="solr.TextField" positionIncrementGap="100"&gt;
     ///   &lt;analyzer&gt;
     ///     &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
     ///     &lt;filter class="solr.LowerCaseFilterFactory"/&gt;
     ///     &lt;filter class="solr.BulgarianStemFilterFactory"/&gt;
     ///   &lt;/analyzer&gt;
-    /// &lt;/fieldType&gt;</pre>
+    /// &lt;/fieldType&gt;
+    /// </code>
     /// </summary>
     public class BulgarianStemFilterFactory : TokenFilterFactory
     {
 
         /// <summary>
-        /// Creates a new BulgarianStemFilterFactory </summary>
+        /// Creates a new <see cref="BulgarianStemFilterFactory"/> </summary>
         public BulgarianStemFilterFactory(IDictionary<string, string> args)
               : base(args)
         {

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1b0bca68/src/Lucene.Net.Analysis.Common/Analysis/Bg/BulgarianStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Bg/BulgarianStemmer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Bg/BulgarianStemmer.cs
index 0b8c339..3ff4017 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Bg/BulgarianStemmer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Bg/BulgarianStemmer.cs
@@ -23,9 +23,9 @@ namespace Lucene.Net.Analysis.Bg
     /// Light Stemmer for Bulgarian.
     /// <para>
     /// Implements the algorithm described in:  
-    /// <i>
+    /// <c>
     /// Searching Strategies for the Bulgarian Language
-    /// </i>
+    /// </c>
     /// http://members.unine.ch/jacques.savoy/Papers/BUIR.pdf
     /// </para>
     /// </summary>