You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by ni...@apache.org on 2017/02/03 17:51:12 UTC
[06/11] lucenenet git commit: Lucene.Net.Analysis.Miscellaneous
refactor: member accessibility and documentation comments
Lucene.Net.Analysis.Miscellaneous refactor: member accessibility and documentation comments
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/46b02fbc
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/46b02fbc
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/46b02fbc
Branch: refs/heads/api-work
Commit: 46b02fbc101d21e58fe19a8b3f7446c05537d5ba
Parents: e9ed184
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Fri Feb 3 22:29:58 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Fri Feb 3 22:29:58 2017 +0700
----------------------------------------------------------------------
.../Miscellaneous/ASCIIFoldingFilter.cs | 40 ++--
.../Miscellaneous/ASCIIFoldingFilterFactory.cs | 2 +-
.../Miscellaneous/CapitalizationFilter.cs | 15 +-
.../CapitalizationFilterFactory.cs | 20 +-
.../Miscellaneous/CodepointCountFilter.cs | 6 +-
.../CodepointCountFilterFactory.cs | 13 +-
.../Analysis/Miscellaneous/EmptyTokenStream.cs | 24 ++-
.../Miscellaneous/HyphenatedWordsFilter.cs | 48 ++++-
.../HyphenatedWordsFilterFactory.cs | 6 +-
.../Analysis/Miscellaneous/KeepWordFilter.cs | 6 +-
.../Miscellaneous/KeepWordFilterFactory.cs | 2 +-
.../Miscellaneous/KeywordMarkerFilter.cs | 14 +-
.../Miscellaneous/KeywordMarkerFilterFactory.cs | 2 +-
.../Miscellaneous/KeywordRepeatFilter.cs | 6 +-
.../Miscellaneous/KeywordRepeatFilterFactory.cs | 3 +-
.../Analysis/Miscellaneous/LengthFilter.cs | 5 +-
.../Miscellaneous/LengthFilterFactory.cs | 12 +-
.../Miscellaneous/LimitTokenCountAnalyzer.cs | 9 +-
.../Miscellaneous/LimitTokenCountFilter.cs | 25 +--
.../LimitTokenCountFilterFactory.cs | 13 +-
.../Miscellaneous/LimitTokenPositionFilter.cs | 19 +-
.../LimitTokenPositionFilterFactory.cs | 13 +-
.../Lucene47WordDelimiterFilter.cs | 87 ++++----
.../Analysis/Miscellaneous/PatternAnalyzer.cs | 32 ++-
.../Miscellaneous/PatternKeywordMarkerFilter.cs | 7 +-
.../Miscellaneous/PerFieldAnalyzerWrapper.cs | 22 +--
.../PrefixAndSuffixAwareTokenFilter.cs | 18 +-
.../Miscellaneous/PrefixAwareTokenFilter.cs | 8 +-
.../RemoveDuplicatesTokenFilter.cs | 35 +++-
.../RemoveDuplicatesTokenFilterFactory.cs | 3 +-
.../Miscellaneous/ScandinavianFoldingFilter.cs | 20 +-
.../ScandinavianFoldingFilterFactory.cs | 6 +-
.../ScandinavianNormalizationFilter.cs | 8 +-
.../ScandinavianNormalizationFilterFactory.cs | 5 +-
.../Miscellaneous/SetKeywordMarkerFilter.cs | 8 +-
.../Miscellaneous/SingleTokenTokenStream.cs | 8 +-
.../Miscellaneous/StemmerOverrideFilter.cs | 44 ++---
.../StemmerOverrideFilterFactory.cs | 2 +-
.../Analysis/Miscellaneous/TrimFilter.cs | 4 +-
.../Analysis/Miscellaneous/TrimFilterFactory.cs | 10 +-
.../Miscellaneous/TruncateTokenFilter.cs | 5 +-
.../Miscellaneous/TruncateTokenFilterFactory.cs | 8 +-
.../Miscellaneous/WordDelimiterFilter.cs | 198 +++++++++----------
.../Miscellaneous/WordDelimiterFilterFactory.cs | 7 +-
.../Miscellaneous/WordDelimiterIterator.cs | 38 ++--
src/Lucene.Net.Core/Analysis/TokenFilter.cs | 2 +-
src/Lucene.Net.Core/Analysis/TokenStream.cs | 18 +-
47 files changed, 477 insertions(+), 429 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/46b02fbc/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ASCIIFoldingFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ASCIIFoldingFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ASCIIFoldingFilter.cs
index 76bb80a..582a461 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ASCIIFoldingFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ASCIIFoldingFilter.cs
@@ -25,32 +25,32 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// This class converts alphabetic, numeric, and symbolic Unicode characters
/// which are not in the first 127 ASCII characters (the "Basic Latin" Unicode
/// block) into their ASCII equivalents, if one exists.
- ///
+ /// <para/>
/// Characters from the following Unicode blocks are converted; however, only
/// those characters with reasonable ASCII alternatives are converted:
///
/// <ul>
- /// <li>C1 Controls and Latin-1 Supplement: <a href="http://www.unicode.org/charts/PDF/U0080.pdf">http://www.unicode.org/charts/PDF/U0080.pdf</a>
- /// <li>Latin Extended-A: <a href="http://www.unicode.org/charts/PDF/U0100.pdf">http://www.unicode.org/charts/PDF/U0100.pdf</a>
- /// <li>Latin Extended-B: <a href="http://www.unicode.org/charts/PDF/U0180.pdf">http://www.unicode.org/charts/PDF/U0180.pdf</a>
- /// <li>Latin Extended Additional: <a href="http://www.unicode.org/charts/PDF/U1E00.pdf">http://www.unicode.org/charts/PDF/U1E00.pdf</a>
- /// <li>Latin Extended-C: <a href="http://www.unicode.org/charts/PDF/U2C60.pdf">http://www.unicode.org/charts/PDF/U2C60.pdf</a>
- /// <li>Latin Extended-D: <a href="http://www.unicode.org/charts/PDF/UA720.pdf">http://www.unicode.org/charts/PDF/UA720.pdf</a>
- /// <li>IPA Extensions: <a href="http://www.unicode.org/charts/PDF/U0250.pdf">http://www.unicode.org/charts/PDF/U0250.pdf</a>
- /// <li>Phonetic Extensions: <a href="http://www.unicode.org/charts/PDF/U1D00.pdf">http://www.unicode.org/charts/PDF/U1D00.pdf</a>
- /// <li>Phonetic Extensions Supplement: <a href="http://www.unicode.org/charts/PDF/U1D80.pdf">http://www.unicode.org/charts/PDF/U1D80.pdf</a>
- /// <li>General Punctuation: <a href="http://www.unicode.org/charts/PDF/U2000.pdf">http://www.unicode.org/charts/PDF/U2000.pdf</a>
- /// <li>Superscripts and Subscripts: <a href="http://www.unicode.org/charts/PDF/U2070.pdf">http://www.unicode.org/charts/PDF/U2070.pdf</a>
- /// <li>Enclosed Alphanumerics: <a href="http://www.unicode.org/charts/PDF/U2460.pdf">http://www.unicode.org/charts/PDF/U2460.pdf</a>
- /// <li>Dingbats: <a href="http://www.unicode.org/charts/PDF/U2700.pdf">http://www.unicode.org/charts/PDF/U2700.pdf</a>
- /// <li>Supplemental Punctuation: <a href="http://www.unicode.org/charts/PDF/U2E00.pdf">http://www.unicode.org/charts/PDF/U2E00.pdf</a>
- /// <li>Alphabetic Presentation Forms: <a href="http://www.unicode.org/charts/PDF/UFB00.pdf">http://www.unicode.org/charts/PDF/UFB00.pdf</a>
- /// <li>Halfwidth and Fullwidth Forms: <a href="http://www.unicode.org/charts/PDF/UFF00.pdf">http://www.unicode.org/charts/PDF/UFF00.pdf</a>
+ /// <item>C1 Controls and Latin-1 Supplement: <a href="http://www.unicode.org/charts/PDF/U0080.pdf">http://www.unicode.org/charts/PDF/U0080.pdf</a></item>
+ /// <item>Latin Extended-A: <a href="http://www.unicode.org/charts/PDF/U0100.pdf">http://www.unicode.org/charts/PDF/U0100.pdf</a></item>
+ /// <item>Latin Extended-B: <a href="http://www.unicode.org/charts/PDF/U0180.pdf">http://www.unicode.org/charts/PDF/U0180.pdf</a></item>
+ /// <item>Latin Extended Additional: <a href="http://www.unicode.org/charts/PDF/U1E00.pdf">http://www.unicode.org/charts/PDF/U1E00.pdf</a></item>
+ /// <item>Latin Extended-C: <a href="http://www.unicode.org/charts/PDF/U2C60.pdf">http://www.unicode.org/charts/PDF/U2C60.pdf</a></item>
+ /// <item>Latin Extended-D: <a href="http://www.unicode.org/charts/PDF/UA720.pdf">http://www.unicode.org/charts/PDF/UA720.pdf</a></item>
+ /// <item>IPA Extensions: <a href="http://www.unicode.org/charts/PDF/U0250.pdf">http://www.unicode.org/charts/PDF/U0250.pdf</a></item>
+ /// <item>Phonetic Extensions: <a href="http://www.unicode.org/charts/PDF/U1D00.pdf">http://www.unicode.org/charts/PDF/U1D00.pdf</a></item>
+ /// <item>Phonetic Extensions Supplement: <a href="http://www.unicode.org/charts/PDF/U1D80.pdf">http://www.unicode.org/charts/PDF/U1D80.pdf</a></item>
+ /// <item>General Punctuation: <a href="http://www.unicode.org/charts/PDF/U2000.pdf">http://www.unicode.org/charts/PDF/U2000.pdf</a></item>
+ /// <item>Superscripts and Subscripts: <a href="http://www.unicode.org/charts/PDF/U2070.pdf">http://www.unicode.org/charts/PDF/U2070.pdf</a></item>
+ /// <item>Enclosed Alphanumerics: <a href="http://www.unicode.org/charts/PDF/U2460.pdf">http://www.unicode.org/charts/PDF/U2460.pdf</a></item>
+ /// <item>Dingbats: <a href="http://www.unicode.org/charts/PDF/U2700.pdf">http://www.unicode.org/charts/PDF/U2700.pdf</a></item>
+ /// <item>Supplemental Punctuation: <a href="http://www.unicode.org/charts/PDF/U2E00.pdf">http://www.unicode.org/charts/PDF/U2E00.pdf</a></item>
+ /// <item>Alphabetic Presentation Forms: <a href="http://www.unicode.org/charts/PDF/UFB00.pdf">http://www.unicode.org/charts/PDF/UFB00.pdf</a></item>
+ /// <item>Halfwidth and Fullwidth Forms: <a href="http://www.unicode.org/charts/PDF/UFF00.pdf">http://www.unicode.org/charts/PDF/UFF00.pdf</a></item>
/// </ul>
- ///
+ /// <para/>
/// See: <a href="http://en.wikipedia.org/wiki/Latin_characters_in_Unicode">http://en.wikipedia.org/wiki/Latin_characters_in_Unicode</a>
- ///
- /// For example, 'à' will be replaced by 'a'.
+ /// <para/>
+ /// For example, '&agrave;' will be replaced by 'a'.
/// </summary>
public sealed class ASCIIFoldingFilter : TokenFilter
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/46b02fbc/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ASCIIFoldingFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ASCIIFoldingFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ASCIIFoldingFilterFactory.cs
index 74a0d33..c1ef517 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ASCIIFoldingFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ASCIIFoldingFilterFactory.cs
@@ -35,7 +35,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
private readonly bool preserveOriginal;
/// <summary>
- /// Creates a new ASCIIFoldingFilterFactory </summary>
+ /// Creates a new <see cref="ASCIIFoldingFilterFactory"/> </summary>
public ASCIIFoldingFilterFactory(IDictionary<string, string> args)
: base(args)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/46b02fbc/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CapitalizationFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CapitalizationFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CapitalizationFilter.cs
index a4b99bc..1148aca 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CapitalizationFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CapitalizationFilter.cs
@@ -26,7 +26,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// <summary>
/// A filter to apply normal capitalization rules to Tokens. It will make the first letter
/// capital and the rest lower case.
- /// <p/>
+ /// <para/>
/// This filter is particularly useful to build nice looking facet parameters. This filter
/// is not appropriate if you intend to use a prefix query.
/// </summary>
@@ -47,10 +47,11 @@ namespace Lucene.Net.Analysis.Miscellaneous
private readonly ICharTermAttribute termAtt;
/// <summary>
- /// Creates a CapitalizationFilter with the default parameters.
+ /// Creates a <see cref="CapitalizationFilter"/> with the default parameters.
/// <para>
- /// Calls {@link #CapitalizationFilter(TokenStream, boolean, CharArraySet, boolean, Collection, int, int, int)
- /// CapitalizationFilter(in, true, null, true, null, 0, DEFAULT_MAX_WORD_COUNT, DEFAULT_MAX_TOKEN_LENGTH)}
+ /// Calls <see cref="CapitalizationFilter.CapitalizationFilter(TokenStream, bool, CharArraySet, bool, ICollection{char[]}, int, int, int)">
+ /// CapitalizationFilter(in, true, null, true, null, 0, DEFAULT_MAX_WORD_COUNT, DEFAULT_MAX_TOKEN_LENGTH)
+ /// </see>
/// </para>
/// </summary>
public CapitalizationFilter(TokenStream @in)
@@ -59,21 +60,21 @@ namespace Lucene.Net.Analysis.Miscellaneous
}
/// <summary>
- /// Creates a CapitalizationFilter with the specified parameters. </summary>
+ /// Creates a <see cref="CapitalizationFilter"/> with the specified parameters. </summary>
/// <param name="in"> input tokenstream </param>
/// <param name="onlyFirstWord"> should each word be capitalized or all of the words? </param>
/// <param name="keep"> a keep word list. Each word that should be kept separated by whitespace. </param>
/// <param name="forceFirstLetter"> Force the first letter to be capitalized even if it is in the keep list. </param>
/// <param name="okPrefix"> do not change word capitalization if a word begins with something in this list. </param>
/// <param name="minWordLength"> how long the word needs to be to get capitalization applied. If the
- /// minWordLength is 3, "and" > "And" but "or" stays "or". </param>
+ /// minWordLength is 3, "and" > "And" but "or" stays "or". </param>
/// <param name="maxWordCount"> if the token contains more then maxWordCount words, the capitalization is
/// assumed to be correct. </param>
/// <param name="maxTokenLength"> ??? </param>
public CapitalizationFilter(TokenStream @in, bool onlyFirstWord, CharArraySet keep, bool forceFirstLetter, ICollection<char[]> okPrefix, int minWordLength, int maxWordCount, int maxTokenLength)
: base(@in)
{
- // LUCENENET: The guard clauses were copied here from the version of Lucene.
+ // LUCENENET NOTE: The guard clauses were copied here from a later version of Lucene.
// Apparently, the tests were not ported from 4.8.0 because they expected this and the
// original tests did not. Adding them anyway because there is no downside to this.
if (minWordLength < 0)
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/46b02fbc/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CapitalizationFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CapitalizationFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CapitalizationFilterFactory.cs
index 236d6da..88f6bb0 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CapitalizationFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CapitalizationFilterFactory.cs
@@ -23,19 +23,19 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// <summary>
/// Factory for <see cref="CapitalizationFilter"/>.
- /// <p/>
- /// The factory takes parameters:<br/>
- /// "onlyFirstWord" - should each word be capitalized or all of the words?<br/>
- /// "keep" - a keep word list. Each word that should be kept separated by whitespace.<br/>
- /// "keepIgnoreCase - true or false. If true, the keep list will be considered case-insensitive.<br/>
- /// "forceFirstLetter" - Force the first letter to be capitalized even if it is in the keep list<br/>
+ /// <para/>
+ /// The factory takes parameters:<para/>
+ /// "onlyFirstWord" - should each word be capitalized or all of the words?<para/>
+ /// "keep" - a keep word list. Each word that should be kept separated by whitespace.<para/>
+ /// "keepIgnoreCase - true or false. If true, the keep list will be considered case-insensitive.<para/>
+ /// "forceFirstLetter" - Force the first letter to be capitalized even if it is in the keep list<para/>
/// "okPrefix" - do not change word capitalization if a word begins with something in this list.
/// for example if "McK" is on the okPrefix list, the word "McKinley" should not be changed to
- /// "Mckinley"<br/>
+ /// "Mckinley"<para/>
/// "minWordLength" - how long the word needs to be to get capitalization applied. If the
- /// minWordLength is 3, "and" > "And" but "or" stays "or"<br/>
+ /// minWordLength is 3, "and" > "And" but "or" stays "or"<para/>
/// "maxWordCount" - if the token contains more then maxWordCount words, the capitalization is
- /// assumed to be correct.<br/>
+ /// assumed to be correct.<para/>
///
/// <code>
/// <fieldType name="text_cptlztn" class="solr.TextField" positionIncrementGap="100">
@@ -71,7 +71,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
internal readonly bool forceFirstLetter; // make sure the first letter is capital even if it is in the keep list
/// <summary>
- /// Creates a new CapitalizationFilterFactory </summary>
+ /// Creates a new <see cref="CapitalizationFilterFactory"/> </summary>
public CapitalizationFilterFactory(IDictionary<string, string> args)
: base(args)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/46b02fbc/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CodepointCountFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CodepointCountFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CodepointCountFilter.cs
index 1c12925..a5f2085 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CodepointCountFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CodepointCountFilter.cs
@@ -38,8 +38,8 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// <summary>
/// Create a new <see cref="CodepointCountFilter"/>. This will filter out tokens whose
- /// <see cref="CharTermAttribute"/> is either too short (<see cref="Character#CodePointCount(char[], int, int)"/>
- /// < min) or too long (<see cref="Character#codePointCount(char[], int, int)"/> > max). </summary>
+ /// <see cref="CharTermAttribute"/> is either too short (<see cref="Character.CodePointCount(char[], int, int)"/>
+ /// < min) or too long (<see cref="Character.CodePointCount(char[], int, int)"/> > max). </summary>
/// <param name="version"> the Lucene match version </param>
/// <param name="in"> the <see cref="TokenStream"/> to consume </param>
/// <param name="min"> the minimum length </param>
@@ -47,7 +47,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
public CodepointCountFilter(LuceneVersion version, TokenStream @in, int min, int max)
: base(version, @in)
{
- // LUCENENET: The guard clauses were copied here from the version of Lucene.
+ // LUCENENET: The guard clauses were copied here from a later version of Lucene.
// Apparently, the tests were not ported from 4.8.0 because they expected this and the
// original tests did not. Adding them anyway because there is no downside to this.
if (min < 0)
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/46b02fbc/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CodepointCountFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CodepointCountFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CodepointCountFilterFactory.cs
index 4163aec..149cc13 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CodepointCountFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CodepointCountFilterFactory.cs
@@ -1,5 +1,5 @@
-\ufeffusing System.Collections.Generic;
-using Lucene.Net.Analysis.Util;
+\ufeffusing Lucene.Net.Analysis.Util;
+using System.Collections.Generic;
namespace Lucene.Net.Analysis.Miscellaneous
{
@@ -32,14 +32,15 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// </summary>
public class CodepointCountFilterFactory : TokenFilterFactory
{
- internal readonly int min;
- internal readonly int max;
+ private readonly int min;
+ private readonly int max;
public const string MIN_KEY = "min";
public const string MAX_KEY = "max";
/// <summary>
- /// Creates a new CodepointCountFilterFactory </summary>
- public CodepointCountFilterFactory(IDictionary<string, string> args) : base(args)
+ /// Creates a new <see cref="CodepointCountFilterFactory"/> </summary>
+ public CodepointCountFilterFactory(IDictionary<string, string> args)
+ : base(args)
{
min = RequireInt(args, MIN_KEY);
max = RequireInt(args, MAX_KEY);
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/46b02fbc/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/EmptyTokenStream.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/EmptyTokenStream.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/EmptyTokenStream.cs
index ef84806..baa5f80 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/EmptyTokenStream.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/EmptyTokenStream.cs
@@ -1,7 +1,6 @@
\ufeffnamespace Lucene.Net.Analysis.Miscellaneous
{
-
- /*
+ /*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -18,15 +17,14 @@
* limitations under the License.
*/
- /// <summary>
- /// An always exhausted token stream.
- /// </summary>
- public sealed class EmptyTokenStream : TokenStream
- {
-
- public override bool IncrementToken()
- {
- return false;
- }
- }
+ /// <summary>
+ /// An always exhausted token stream.
+ /// </summary>
+ public sealed class EmptyTokenStream : TokenStream
+ {
+ public override bool IncrementToken()
+ {
+ return false;
+ }
+ }
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/46b02fbc/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/HyphenatedWordsFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/HyphenatedWordsFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/HyphenatedWordsFilter.cs
index c5da204..e12e123 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/HyphenatedWordsFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/HyphenatedWordsFilter.cs
@@ -1,10 +1,8 @@
-\ufeffusing System;
+\ufeffusing Lucene.Net.Analysis.TokenAttributes;
using System.Text;
-using Lucene.Net.Analysis.TokenAttributes;
namespace Lucene.Net.Analysis.Miscellaneous
{
-
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -21,6 +19,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
/// <summary>
/// When the plain text is extracted from documents, we will often have many words hyphenated and broken into
/// two lines. This is often the case with documents where narrow text columns are used, such as newsletters.
@@ -48,11 +47,9 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// </analyzer>
/// </fieldtype>
/// </code>
- ///
/// </summary>
public sealed class HyphenatedWordsFilter : TokenFilter
{
-
private readonly ICharTermAttribute termAttribute;
private readonly IOffsetAttribute offsetAttribute;
@@ -62,9 +59,9 @@ namespace Lucene.Net.Analysis.Miscellaneous
private int lastEndOffset = 0;
/// <summary>
- /// Creates a new HyphenatedWordsFilter
+ /// Creates a new <see cref="HyphenatedWordsFilter"/>
/// </summary>
- /// <param name="in"> TokenStream that will be filtered </param>
+ /// <param name="in"> <see cref="TokenStream"/> that will be filtered </param>
public HyphenatedWordsFilter(TokenStream @in)
: base(@in)
{
@@ -73,8 +70,28 @@ namespace Lucene.Net.Analysis.Miscellaneous
}
/// <summary>
- /// {@inheritDoc}
+ /// Consumers (i.e., <see cref="Index.IndexWriter"/>) use this method to advance the stream to
+ /// the next token. Implementing classes must implement this method and update
+ /// the appropriate <see cref="Lucene.Net.Util.Attribute"/>s with the attributes of the next
+ /// token.
+ /// <para/>
+ /// The producer must make no assumptions about the attributes after the method
+ /// has been returned: the caller may arbitrarily change it. If the producer
+ /// needs to preserve the state for subsequent calls, it can use
+ /// <see cref="Lucene.Net.Util.AttributeSource.CaptureState"/> to create a copy of the current attribute state.
+ /// <para/>
+ /// this method is called for every token of a document, so an efficient
+ /// implementation is crucial for good performance. To avoid calls to
+ /// <see cref="Lucene.Net.Util.AttributeSource.AddAttribute{T}"/> and <see cref="Lucene.Net.Util.AttributeSource.GetAttribute{T}"/>,
+ /// references to all <see cref="Lucene.Net.Util.Attribute"/>s that this stream uses should be
+ /// retrieved during instantiation.
+ /// <para/>
+ /// To ensure that filters and consumers know which attributes are available,
+ /// the attributes must be added during instantiation. Filters and consumers
+ /// are not required to check for availability of attributes in
+ /// <see cref="IncrementToken()"/>.
/// </summary>
+ /// <returns> false for end of stream; true otherwise </returns>
public override bool IncrementToken()
{
while (!exhausted && m_input.IncrementToken())
@@ -122,8 +139,21 @@ namespace Lucene.Net.Analysis.Miscellaneous
}
/// <summary>
- /// {@inheritDoc}
+ /// This method is called by a consumer before it begins consumption using
+ /// <see cref="IncrementToken()"/>.
+ /// <para/>
+ /// Resets this stream to a clean state. Stateful implementations must implement
+ /// this method so that they can be reused, just as if they had been created fresh.
+ /// <para/>
+ /// If you override this method, always call <c>base.Reset()</c>, otherwise
+ /// some internal state will not be correctly reset (e.g., <see cref="Tokenizer"/> will
+ /// throw <see cref="System.InvalidOperationException"/> on further usage).
/// </summary>
+ /// <remarks>
+ /// <b>NOTE:</b>
+ /// The default implementation chains the call to the input <see cref="TokenStream"/>, so
+ /// be sure to call <c>base.Reset()</c> when overriding this method.
+ /// </remarks>
public override void Reset()
{
base.Reset();
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/46b02fbc/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/HyphenatedWordsFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/HyphenatedWordsFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/HyphenatedWordsFilterFactory.cs
index 6c4d375..19707ad 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/HyphenatedWordsFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/HyphenatedWordsFilterFactory.cs
@@ -32,10 +32,10 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// </summary>
public class HyphenatedWordsFilterFactory : TokenFilterFactory
{
-
/// <summary>
- /// Creates a new HyphenatedWordsFilterFactory </summary>
- public HyphenatedWordsFilterFactory(IDictionary<string, string> args) : base(args)
+ /// Creates a new <see cref="HyphenatedWordsFilterFactory"/> </summary>
+ public HyphenatedWordsFilterFactory(IDictionary<string, string> args)
+ : base(args)
{
if (args.Count > 0)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/46b02fbc/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeepWordFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeepWordFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeepWordFilter.cs
index 0ff278c..3aa0978 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeepWordFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeepWordFilter.cs
@@ -23,8 +23,8 @@ namespace Lucene.Net.Analysis.Miscellaneous
*/
/// <summary>
- /// A TokenFilter that only keeps tokens with text contained in the
- /// required words. This filter behaves like the inverse of StopFilter.
+ /// A <see cref="TokenFilter"/> that only keeps tokens with text contained in the
+ /// required words. This filter behaves like the inverse of <see cref="StopFilter"/>.
///
/// @since solr 1.3
/// </summary>
@@ -44,7 +44,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// <summary>
/// Create a new <see cref="KeepWordFilter"/>.
- /// <para><b>NOTE</b>: The words set passed to this constructor will be directly
+ /// <para><c>NOTE</c>: The words set passed to this constructor will be directly
/// used by this filter and should not be modified.
/// </para>
/// </summary>
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/46b02fbc/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeepWordFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeepWordFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeepWordFilterFactory.cs
index 39f61bf..4806fbd 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeepWordFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeepWordFilterFactory.cs
@@ -38,7 +38,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
private CharArraySet words;
/// <summary>
- /// Creates a new KeepWordFilterFactory </summary>
+ /// Creates a new <see cref="KeepWordFilterFactory"/> </summary>
public KeepWordFilterFactory(IDictionary<string, string> args)
: base(args)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/46b02fbc/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeywordMarkerFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeywordMarkerFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeywordMarkerFilter.cs
index ab8a884..83adbda 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeywordMarkerFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeywordMarkerFilter.cs
@@ -2,7 +2,6 @@
namespace Lucene.Net.Analysis.Miscellaneous
{
-
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -19,29 +18,29 @@ namespace Lucene.Net.Analysis.Miscellaneous
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
/// <summary>
/// Marks terms as keywords via the <see cref="KeywordAttribute"/>.
/// </summary>
- /// <seealso cref= KeywordAttribute </seealso>
+ /// <seealso cref="KeywordAttribute"/>
public abstract class KeywordMarkerFilter : TokenFilter
{
-
private readonly IKeywordAttribute keywordAttr;
/// <summary>
/// Creates a new <see cref="KeywordMarkerFilter"/> </summary>
/// <param name="in"> the input stream </param>
- protected internal KeywordMarkerFilter(TokenStream @in)
+ protected KeywordMarkerFilter(TokenStream @in)
: base(@in)
{
keywordAttr = AddAttribute<IKeywordAttribute>();
}
- public override bool IncrementToken()
+ public override sealed bool IncrementToken()
{
if (m_input.IncrementToken())
{
- if (Keyword)
+ if (IsKeyword)
{
keywordAttr.IsKeyword = true;
}
@@ -53,7 +52,6 @@ namespace Lucene.Net.Analysis.Miscellaneous
}
}
- protected internal abstract bool Keyword { get; }
-
+ protected abstract bool IsKeyword { get; } // LUCENENET TODO: Change to IsKeyword() ?
}
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/46b02fbc/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeywordMarkerFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeywordMarkerFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeywordMarkerFilterFactory.cs
index 0070b74..d5054e1 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeywordMarkerFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeywordMarkerFilterFactory.cs
@@ -42,7 +42,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
private CharArraySet protectedWords;
/// <summary>
- /// Creates a new KeywordMarkerFilterFactory </summary>
+ /// Creates a new <see cref="KeywordMarkerFilterFactory"/> </summary>
public KeywordMarkerFilterFactory(IDictionary<string, string> args)
: base(args)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/46b02fbc/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeywordRepeatFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeywordRepeatFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeywordRepeatFilter.cs
index 4cfe3e8..13aa99b 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeywordRepeatFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeywordRepeatFilter.cs
@@ -2,7 +2,6 @@
namespace Lucene.Net.Analysis.Miscellaneous
{
-
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -19,15 +18,15 @@ namespace Lucene.Net.Analysis.Miscellaneous
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
/// <summary>
/// This TokenFilter�emits each incoming token twice once as keyword and once non-keyword, in other words once with
- /// <see cref="KeywordAttribute#setKeyword(boolean)"/> set to <code>true</code> and once set to <code>false</code>.
+ /// <see cref="KeywordAttribute.IsKeyword"/> set to <c>true</c> and once set to <c>false</c>.
/// This is useful if used with a stem filter that respects the <see cref="KeywordAttribute"/> to index the stemmed and the
/// un-stemmed version of a term into the same field.
/// </summary>
public sealed class KeywordRepeatFilter : TokenFilter
{
-
private readonly IKeywordAttribute keywordAttribute;
private readonly IPositionIncrementAttribute posIncAttr;
private State state;
@@ -67,5 +66,4 @@ namespace Lucene.Net.Analysis.Miscellaneous
state = null;
}
}
-
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/46b02fbc/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeywordRepeatFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeywordRepeatFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeywordRepeatFilterFactory.cs
index c34561a..e88fe02 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeywordRepeatFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeywordRepeatFilterFactory.cs
@@ -29,9 +29,8 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// </summary>
public sealed class KeywordRepeatFilterFactory : TokenFilterFactory
{
-
/// <summary>
- /// Creates a new KeywordRepeatFilterFactory </summary>
+ /// Creates a new <see cref="KeywordRepeatFilterFactory"/> </summary>
public KeywordRepeatFilterFactory(IDictionary<string, string> args)
: base(args)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/46b02fbc/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LengthFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LengthFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LengthFilter.cs
index ab19c3a..aeb9434 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LengthFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LengthFilter.cs
@@ -30,7 +30,6 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// </summary>
public sealed class LengthFilter : FilteringTokenFilter
{
-
private readonly int min;
private readonly int max;
@@ -55,8 +54,8 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// <summary>
/// Create a new <see cref="LengthFilter"/>. This will filter out tokens whose
- /// <see cref="CharTermAttribute"/> is either too short (<see cref="CharTermAttribute#length()"/>
- /// < min) or too long (<see cref="CharTermAttribute#length()"/> > max). </summary>
+ /// <see cref="CharTermAttribute"/> is either too short (<see cref="ICharTermAttribute.Length"/>
+ /// < min) or too long (<see cref="ICharTermAttribute.Length"/> > max). </summary>
/// <param name="version"> the Lucene match version </param>
/// <param name="in"> the <see cref="TokenStream"/> to consume </param>
/// <param name="min"> the minimum length </param>
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/46b02fbc/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LengthFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LengthFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LengthFilterFactory.cs
index f206b4b..aaed57d 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LengthFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LengthFilterFactory.cs
@@ -1,5 +1,5 @@
-\ufeffusing System.Collections.Generic;
-using Lucene.Net.Analysis.Util;
+\ufeffusing Lucene.Net.Analysis.Util;
+using System.Collections.Generic;
namespace Lucene.Net.Analysis.Miscellaneous
{
@@ -32,14 +32,14 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// </summary>
public class LengthFilterFactory : TokenFilterFactory
{
- internal readonly int min;
- internal readonly int max;
- internal readonly bool enablePositionIncrements;
+ private readonly int min;
+ private readonly int max;
+ private readonly bool enablePositionIncrements;
public const string MIN_KEY = "min";
public const string MAX_KEY = "max";
/// <summary>
- /// Creates a new LengthFilterFactory </summary>
+ /// Creates a new <see cref="LengthFilterFactory"/> </summary>
public LengthFilterFactory(IDictionary<string, string> args)
: base(args)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/46b02fbc/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenCountAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenCountAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenCountAnalyzer.cs
index 3bafb19..51361b2 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenCountAnalyzer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenCountAnalyzer.cs
@@ -18,9 +18,9 @@
*/
/// <summary>
- /// This Analyzer limits the number of tokens while indexing. It is
- /// a replacement for the maximum field length setting inside <see cref="org.apache.lucene.index.IndexWriter"/>. </summary>
- /// <seealso cref= LimitTokenCountFilter </seealso>
+ /// This <see cref="Analyzer"/> limits the number of tokens while indexing. It is
+ /// a replacement for the maximum field length setting inside <see cref="Index.IndexWriter"/>. </summary>
+ /// <seealso cref="LimitTokenCountFilter"/>
public sealed class LimitTokenCountAnalyzer : AnalyzerWrapper
{
private readonly Analyzer @delegate;
@@ -31,11 +31,12 @@
/// Build an analyzer that limits the maximum number of tokens per field.
/// This analyzer will not consume any tokens beyond the maxTokenCount limit
/// </summary>
- /// <seealso cref= #LimitTokenCountAnalyzer(Analyzer,int,boolean) </seealso>
+ /// <seealso cref="LimitTokenCountAnalyzer(Analyzer,int,bool)"/>
public LimitTokenCountAnalyzer(Analyzer @delegate, int maxTokenCount)
: this(@delegate, maxTokenCount, false)
{
}
+
/// <summary>
/// Build an analyzer that limits the maximum number of tokens per field. </summary>
/// <param name="delegate"> the analyzer to wrap </param>
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/46b02fbc/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenCountFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenCountFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenCountFilter.cs
index 9e4a37c..d7ec875 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenCountFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenCountFilter.cs
@@ -18,23 +18,22 @@
*/
/// <summary>
- /// This TokenFilter limits the number of tokens while indexing. It is
- /// a replacement for the maximum field length setting inside <see cref="org.apache.lucene.index.IndexWriter"/>.
+ /// This <see cref="TokenFilter"/> limits the number of tokens while indexing. It is
+ /// a replacement for the maximum field length setting inside <see cref="Index.IndexWriter"/>.
/// <para>
- /// By default, this filter ignores any tokens in the wrapped {@code TokenStream}
- /// once the limit has been reached, which can result in {@code reset()} being
- /// called prior to {@code incrementToken()} returning {@code false}. For most
- /// {@code TokenStream} implementations this should be acceptable, and faster
- /// then consuming the full stream. If you are wrapping a {@code TokenStream}
+ /// By default, this filter ignores any tokens in the wrapped <see cref="TokenStream"/>
+ /// once the limit has been reached, which can result in <see cref="Reset"/> being
+ /// called prior to <see cref="IncrementToken"/> returning <c>false</c>. For most
+ /// <see cref="TokenStream"/> implementations this should be acceptable, and faster
+ /// then consuming the full stream. If you are wrapping a <see cref="TokenStream"/>
/// which requires that the full stream of tokens be exhausted in order to
/// function properly, use the
- /// <see cref="#LimitTokenCountFilter(TokenStream,int,boolean) consumeAllTokens"/>
+ /// <see cref="LimitTokenCountFilter.LimitTokenCountFilter(TokenStream,int,bool)"/> consumeAllTokens
/// option.
/// </para>
/// </summary>
public sealed class LimitTokenCountFilter : TokenFilter
{
-
private readonly int maxTokenCount;
private readonly bool consumeAllTokens;
private int tokenCount = 0;
@@ -42,9 +41,11 @@
/// <summary>
/// Build a filter that only accepts tokens up to a maximum number.
- /// This filter will not consume any tokens beyond the maxTokenCount limit
+ /// This filter will not consume any tokens beyond the <paramref name="maxTokenCount"/> limit
/// </summary>
- /// <seealso cref= #LimitTokenCountFilter(TokenStream,int,boolean) </seealso>
+ /// <param name="in"> the stream to wrap </param>
+ /// <param name="maxTokenCount"> max number of tokens to produce </param>
+ /// <seealso cref="LimitTokenCountFilter(TokenStream,int,bool)"/>
public LimitTokenCountFilter(TokenStream @in, int maxTokenCount)
: this(@in, maxTokenCount, false)
{
@@ -54,7 +55,7 @@
/// Build an filter that limits the maximum number of tokens per field. </summary>
/// <param name="in"> the stream to wrap </param>
/// <param name="maxTokenCount"> max number of tokens to produce </param>
- /// <param name="consumeAllTokens"> whether all tokens from the input must be consumed even if maxTokenCount is reached. </param>
+ /// <param name="consumeAllTokens"> whether all tokens from the input must be consumed even if <paramref name="maxTokenCount"/> is reached. </param>
public LimitTokenCountFilter(TokenStream @in, int maxTokenCount, bool consumeAllTokens)
: base(@in)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/46b02fbc/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenCountFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenCountFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenCountFilterFactory.cs
index 25b980d..f64981e 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenCountFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenCountFilterFactory.cs
@@ -1,5 +1,5 @@
-\ufeffusing System.Collections.Generic;
-using Lucene.Net.Analysis.Util;
+\ufeffusing Lucene.Net.Analysis.Util;
+using System.Collections.Generic;
namespace Lucene.Net.Analysis.Miscellaneous
{
@@ -30,20 +30,19 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// </analyzer>
/// </fieldType></code>
/// <para>
- /// The {@code consumeAllTokens} property is optional and defaults to {@code false}.
+ /// The <see cref="consumeAllTokens"/> property is optional and defaults to <c>false</c>.
/// See <see cref="LimitTokenCountFilter"/> for an explanation of it's use.
/// </para>
/// </summary>
public class LimitTokenCountFilterFactory : TokenFilterFactory
{
-
public const string MAX_TOKEN_COUNT_KEY = "maxTokenCount";
public const string CONSUME_ALL_TOKENS_KEY = "consumeAllTokens";
- internal readonly int maxTokenCount;
- internal readonly bool consumeAllTokens;
+ private readonly int maxTokenCount;
+ private readonly bool consumeAllTokens;
/// <summary>
- /// Creates a new LimitTokenCountFilterFactory </summary>
+ /// Creates a new <see cref="LimitTokenCountFilterFactory"/> </summary>
public LimitTokenCountFilterFactory(IDictionary<string, string> args)
: base(args)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/46b02fbc/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenPositionFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenPositionFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenPositionFilter.cs
index 008ff97..40c5a34 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenPositionFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenPositionFilter.cs
@@ -20,23 +20,22 @@ namespace Lucene.Net.Analysis.Miscellaneous
*/
/// <summary>
- /// This TokenFilter limits its emitted tokens to those with positions that
+ /// This <see cref="TokenFilter"/> limits its emitted tokens to those with positions that
/// are not greater than the configured limit.
/// <para>
- /// By default, this filter ignores any tokens in the wrapped {@code TokenStream}
- /// once the limit has been exceeded, which can result in {@code reset()} being
- /// called prior to {@code incrementToken()} returning {@code false}. For most
- /// {@code TokenStream} implementations this should be acceptable, and faster
- /// then consuming the full stream. If you are wrapping a {@code TokenStream}
+ /// By default, this filter ignores any tokens in the wrapped <see cref="TokenStream"/>
+ /// once the limit has been exceeded, which can result in <see cref="Reset"/> being
+ /// called prior to <see cref="IncrementToken"/> returning <c>false</c>. For most
+ /// <see cref="TokenStream"/> implementations this should be acceptable, and faster
+ /// then consuming the full stream. If you are wrapping a <see cref="TokenStream"/>
/// which requires that the full stream of tokens be exhausted in order to
/// function properly, use the
- /// <see cref="#LimitTokenPositionFilter(TokenStream,int,boolean) consumeAllTokens"/>
+ /// <see cref="LimitTokenPositionFilter(TokenStream,int,bool)"/> consumeAllTokens
/// option.
/// </para>
/// </summary>
public sealed class LimitTokenPositionFilter : TokenFilter
{
-
private readonly int maxTokenPosition;
private readonly bool consumeAllTokens;
private int tokenPosition = 0;
@@ -45,12 +44,12 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// <summary>
/// Build a filter that only accepts tokens up to and including the given maximum position.
- /// This filter will not consume any tokens with position greater than the maxTokenPosition limit.
+ /// This filter will not consume any tokens with position greater than the <paramref name="maxTokenPosition"/> limit.
/// </summary>
/// <param name="in"> the stream to wrap </param>
/// <param name="maxTokenPosition"> max position of tokens to produce (1st token always has position 1)
/// </param>
- /// <seealso cref= #LimitTokenPositionFilter(TokenStream,int,boolean) </seealso>
+ /// <seealso cref="LimitTokenPositionFilter(TokenStream,int,bool)"/>
public LimitTokenPositionFilter(TokenStream @in, int maxTokenPosition)
: this(@in, maxTokenPosition, false)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/46b02fbc/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenPositionFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenPositionFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenPositionFilterFactory.cs
index e2d7692..6bc1fcd 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenPositionFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenPositionFilterFactory.cs
@@ -1,5 +1,5 @@
-\ufeffusing System.Collections.Generic;
-using Lucene.Net.Analysis.Util;
+\ufeffusing Lucene.Net.Analysis.Util;
+using System.Collections.Generic;
namespace Lucene.Net.Analysis.Miscellaneous
{
@@ -30,20 +30,19 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// </analyzer>
/// </fieldType></code>
/// <para>
- /// The {@code consumeAllTokens} property is optional and defaults to {@code false}.
+ /// The <see cref="consumeAllTokens"/> property is optional and defaults to <c>false</c>.
/// See <see cref="LimitTokenPositionFilter"/> for an explanation of its use.
/// </para>
/// </summary>
public class LimitTokenPositionFilterFactory : TokenFilterFactory
{
-
public const string MAX_TOKEN_POSITION_KEY = "maxTokenPosition";
public const string CONSUME_ALL_TOKENS_KEY = "consumeAllTokens";
- internal readonly int maxTokenPosition;
- internal readonly bool consumeAllTokens;
+ private readonly int maxTokenPosition;
+ private readonly bool consumeAllTokens;
/// <summary>
- /// Creates a new LimitTokenPositionFilterFactory </summary>
+ /// Creates a new <see cref="LimitTokenPositionFilterFactory"/> </summary>
public LimitTokenPositionFilterFactory(IDictionary<string, string> args)
: base(args)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/46b02fbc/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/Lucene47WordDelimiterFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/Lucene47WordDelimiterFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/Lucene47WordDelimiterFilter.cs
index 15bf32a..070ad85 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/Lucene47WordDelimiterFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/Lucene47WordDelimiterFilter.cs
@@ -29,13 +29,6 @@ namespace Lucene.Net.Analysis.Miscellaneous
[Obsolete]
public sealed class Lucene47WordDelimiterFilter : TokenFilter
{
- private void InitializeInstanceFields()
- {
- concat = new WordDelimiterConcatenation(this);
- concatAll = new WordDelimiterConcatenation(this);
- }
-
-
public const int LOWER = 0x01;
public const int UPPER = 0x02;
public const int DIGIT = 0x04;
@@ -110,7 +103,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// If not null is the set of tokens to protect from being delimited
///
/// </summary>
- internal readonly CharArraySet protWords;
+ private readonly CharArraySet protWords;
private readonly int flags;
@@ -149,9 +142,9 @@ namespace Lucene.Net.Analysis.Miscellaneous
private bool hasOutputFollowingOriginal = false;
/// <summary>
- /// Creates a new WordDelimiterFilter
+ /// Creates a new <see cref="Lucene47WordDelimiterFilter"/>
/// </summary>
- /// <param name="in"> TokenStream to be filtered </param>
+ /// <param name="in"> <see cref="TokenStream"/> to be filtered </param>
/// <param name="charTypeTable"> table containing character types </param>
/// <param name="configurationFlags"> Flags configuring the filter </param>
/// <param name="protWords"> If not null is the set of tokens to protect from being delimited </param>
@@ -162,18 +155,19 @@ namespace Lucene.Net.Analysis.Miscellaneous
offsetAttribute = AddAttribute<IOffsetAttribute>();
posIncAttribute = AddAttribute<IPositionIncrementAttribute>();
typeAttribute = AddAttribute<ITypeAttribute>();
+ concat = new WordDelimiterConcatenation(this);
+ concatAll = new WordDelimiterConcatenation(this);
- InitializeInstanceFields();
this.flags = configurationFlags;
this.protWords = protWords;
this.iterator = new WordDelimiterIterator(charTypeTable, Has(SPLIT_ON_CASE_CHANGE), Has(SPLIT_ON_NUMERICS), Has(STEM_ENGLISH_POSSESSIVE));
}
/// <summary>
- /// Creates a new WordDelimiterFilter using <see cref="WordDelimiterIterator#DEFAULT_WORD_DELIM_TABLE"/>
+ /// Creates a new <see cref="Lucene47WordDelimiterFilter"/> using <see cref="WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE"/>
/// as its charTypeTable
/// </summary>
- /// <param name="in"> TokenStream to be filtered </param>
+ /// <param name="in"> <see cref="TokenStream"/> to be filtered </param>
/// <param name="configurationFlags"> Flags configuring the filter </param>
/// <param name="protWords"> If not null is the set of tokens to protect from being delimited </param>
public Lucene47WordDelimiterFilter(TokenStream @in, int configurationFlags, CharArraySet protWords)
@@ -237,7 +231,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
// at the end of the string, output any concatenations
if (iterator.end == WordDelimiterIterator.DONE)
{
- if (!concat.Empty)
+ if (!concat.IsEmpty)
{
if (FlushConcatenation(concat))
{
@@ -245,7 +239,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
}
}
- if (!concatAll.Empty)
+ if (!concatAll.IsEmpty)
{
// only if we haven't output this same combo above!
if (concatAll.subwordCount > lastConcatCount)
@@ -272,7 +266,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
int wordType = iterator.Type;
// do we already have queued up incompatible concatenations?
- if (!concat.Empty && (concat.type & wordType) == 0)
+ if (!concat.IsEmpty && (concat.type & wordType) == 0)
{
if (FlushConcatenation(concat))
{
@@ -285,7 +279,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
// add subwords depending upon options
if (ShouldConcatenate(wordType))
{
- if (concat.Empty)
+ if (concat.IsEmpty)
{
concat.type = wordType;
}
@@ -311,8 +305,21 @@ namespace Lucene.Net.Analysis.Miscellaneous
}
/// <summary>
- /// {@inheritDoc}
+ /// This method is called by a consumer before it begins consumption using
+ /// <see cref="IncrementToken()"/>.
+ /// <para/>
+ /// Resets this stream to a clean state. Stateful implementations must implement
+ /// this method so that they can be reused, just as if they had been created fresh.
+ /// <para/>
+ /// If you override this method, always call <c>base.Reset()</c>, otherwise
+ /// some internal state will not be correctly reset (e.g., <see cref="Tokenizer"/> will
+ /// throw <see cref="System.InvalidOperationException"/> on further usage).
/// </summary>
+ /// <remarks>
+ /// <b>NOTE:</b>
+ /// The default implementation chains the call to the input <see cref="TokenStream"/>, so
+ /// be sure to call <c>base.Reset()</c> when overriding this method.
+ /// </remarks>
public override void Reset()
{
base.Reset();
@@ -348,10 +355,10 @@ namespace Lucene.Net.Analysis.Miscellaneous
}
/// <summary>
- /// Flushes the given WordDelimiterConcatenation by either writing its concat and then clearing, or just clearing.
+ /// Flushes the given <see cref="WordDelimiterConcatenation"/> by either writing its concat and then clearing, or just clearing.
/// </summary>
- /// <param name="concatenation"> WordDelimiterConcatenation that will be flushed </param>
- /// <returns> {@code true} if the concatenation was written before it was cleared, {@code false} otherwise </returns>
+ /// <param name="concatenation"> <see cref="WordDelimiterConcatenation"/> that will be flushed </param>
+ /// <returns> <c>true</c> if the concatenation was written before it was cleared, <c>false</c> otherwise </returns>
private bool FlushConcatenation(WordDelimiterConcatenation concatenation)
{
lastConcatCount = concatenation.subwordCount;
@@ -368,7 +375,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// Determines whether to concatenate a word or number if the current word is the given type
/// </summary>
/// <param name="wordType"> Type of the current word used to determine if it should be concatenated </param>
- /// <returns> {@code true} if concatenation should occur, {@code false} otherwise </returns>
+ /// <returns> <c>true</c> if concatenation should occur, <c>false</c> otherwise </returns>
private bool ShouldConcatenate(int wordType)
{
return (Has(CATENATE_WORDS) && IsAlpha(wordType)) || (Has(CATENATE_NUMBERS) && IsDigit(wordType));
@@ -378,7 +385,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// Determines whether a word/number part should be generated for a word of the given type
/// </summary>
/// <param name="wordType"> Type of the word used to determine if a word/number part should be generated </param>
- /// <returns> {@code true} if a word/number part should be generated, {@code false} otherwise </returns>
+ /// <returns> <c>true</c> if a word/number part should be generated, <c>false</c> otherwise </returns>
private bool ShouldGenerateParts(int wordType)
{
return (Has(GENERATE_WORD_PARTS) && IsAlpha(wordType)) || (Has(GENERATE_NUMBER_PARTS) && IsDigit(wordType));
@@ -390,7 +397,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// <param name="concatenation"> WordDelimiterConcatenation to concatenate the buffer to </param>
private void Concatenate(WordDelimiterConcatenation concatenation)
{
- if (concatenation.Empty)
+ if (concatenation.IsEmpty)
{
concatenation.startOffset = savedStartOffset + iterator.current;
}
@@ -401,7 +408,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// <summary>
/// Generates a word/number part, updating the appropriate attributes
/// </summary>
- /// <param name="isSingleWord"> {@code true} if the generation is occurring from a single word, {@code false} otherwise </param>
+ /// <param name="isSingleWord"> <c>true</c> if the generation is occurring from a single word, <c>false</c> otherwise </param>
private void GeneratePart(bool isSingleWord)
{
ClearAttributes();
@@ -460,41 +467,41 @@ namespace Lucene.Net.Analysis.Miscellaneous
}
/// <summary>
- /// Checks if the given word type includes <see cref="#ALPHA"/>
+ /// Checks if the given word type includes <see cref="ALPHA"/>
/// </summary>
/// <param name="type"> Word type to check </param>
- /// <returns> {@code true} if the type contains ALPHA, {@code false} otherwise </returns>
- internal static bool IsAlpha(int type)
+ /// <returns> <c>true</c> if the type contains <see cref="ALPHA"/>, <c>false</c> otherwise </returns>
+ private static bool IsAlpha(int type)
{
return (type & ALPHA) != 0;
}
/// <summary>
- /// Checks if the given word type includes <see cref="#DIGIT"/>
+ /// Checks if the given word type includes <see cref="DIGIT"/>
/// </summary>
/// <param name="type"> Word type to check </param>
- /// <returns> {@code true} if the type contains DIGIT, {@code false} otherwise </returns>
- internal static bool IsDigit(int type)
+ /// <returns> <c>true</c> if the type contains <see cref="DIGIT"/>, <c>false</c> otherwise </returns>
+ private static bool IsDigit(int type)
{
return (type & DIGIT) != 0;
}
/// <summary>
- /// Checks if the given word type includes <see cref="#SUBWORD_DELIM"/>
+ /// Checks if the given word type includes <see cref="SUBWORD_DELIM"/>
/// </summary>
/// <param name="type"> Word type to check </param>
- /// <returns> {@code true} if the type contains SUBWORD_DELIM, {@code false} otherwise </returns>
- internal static bool IsSubwordDelim(int type)
+ /// <returns> <c>true</c> if the type contains <see cref="SUBWORD_DELIM"/>, <c>false</c> otherwise </returns>
+ private static bool IsSubwordDelim(int type)
{
return (type & SUBWORD_DELIM) != 0;
}
/// <summary>
- /// Checks if the given word type includes <see cref="#UPPER"/>
+ /// Checks if the given word type includes <see cref="UPPER"/>
/// </summary>
/// <param name="type"> Word type to check </param>
- /// <returns> {@code true} if the type contains UPPER, {@code false} otherwise </returns>
- internal static bool IsUpper(int type)
+ /// <returns> <c>true</c> if the type contains <see cref="UPPER"/>, <c>false</c> otherwise </returns>
+ private static bool IsUpper(int type)
{
return (type & UPPER) != 0;
}
@@ -503,7 +510,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// Determines whether the given flag is set
/// </summary>
/// <param name="flag"> Flag to see if set </param>
- /// <returns> {@code true} if flag is set </returns>
+ /// <returns> <c>true</c> if flag is set </returns>
private bool Has(int flag)
{
return (flags & flag) != 0;
@@ -573,8 +580,8 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// <summary>
/// Determines if the concatenation is empty
/// </summary>
- /// <returns> {@code true} if the concatenation is empty, {@code false} otherwise </returns>
- internal bool Empty
+ /// <returns> <c>true</c> if the concatenation is empty, <c>false</c> otherwise </returns>
+ internal bool IsEmpty
{
get
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/46b02fbc/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternAnalyzer.cs
index e33b446..a01625b 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternAnalyzer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternAnalyzer.cs
@@ -27,9 +27,9 @@ namespace Lucene.Net.Analysis.Miscellaneous
*/
/// <summary>
- /// Efficient Lucene analyzer/tokenizer that preferably operates on a String rather than a
- /// <see cref="TextReader"/>, that can flexibly separate text into terms via a regular expression <see cref="Pattern"/>
- /// (with behaviour identical to <see cref="String#split(String)"/>),
+ /// Efficient Lucene analyzer/tokenizer that preferably operates on a <see cref="string"/> rather than a
+ /// <see cref="TextReader"/>, that can flexibly separate text into terms via a regular expression <see cref="Regex"/>
+ /// (with behaviour similar to <see cref="string.Split(string)"/>),
/// and that combines the functionality of
/// <see cref="LetterTokenizer"/>,
/// <see cref="LowerCaseTokenizer"/>,
@@ -39,9 +39,9 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// <para>
/// If you are unsure how exactly a regular expression should look like, consider
/// prototyping by simply trying various expressions on some test texts via
- /// <see cref="String#split(String)"/>. Once you are satisfied, give that regex to
- /// PatternAnalyzer. Also see <a target="_blank"
- /// href="http://java.sun.com/docs/books/tutorial/extra/regex/">Java Regular Expression Tutorial</a>.
+ /// <see cref="string.Split(string)"/>. Once you are satisfied, give that regex to
+ /// <see cref="PatternAnalyzer"/>. Also see <a target="_blank"
+ /// href="http://www.regular-expressions.info/">Regular Expression Tutorial</a>.
/// </para>
/// <para>
/// This class can be considerably faster than the "normal" Lucene tokenizers.
@@ -51,7 +51,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// <pre>
/// PatternAnalyzer pat = ...
/// TokenStream tokenStream = new SnowballFilter(
- /// pat.tokenStream("content", "James is running round in the woods"),
+ /// pat.GetTokenStream("content", "James is running round in the woods"),
/// "English"));
/// </code>
/// </para>
@@ -60,13 +60,12 @@ namespace Lucene.Net.Analysis.Miscellaneous
[Obsolete("(4.0) use the pattern-based analysis in the analysis/pattern package instead.")]
public sealed class PatternAnalyzer : Analyzer
{
-
/// <summary>
- /// <code>"\\W+"</code>; Divides text at non-letters (NOT Character.isLetter(c)) </summary>
+ /// <c>"\\W+"</c>; Divides text at non-letters (NOT Character.isLetter(c)) </summary>
public static readonly Regex NON_WORD_PATTERN = new Regex("\\W+", RegexOptions.Compiled);
/// <summary>
- /// <code>"\\s+"</code>; Divides text at whitespaces (Character.isWhitespace(c)) </summary>
+ /// <c>"\\s+"</c>; Divides text at whitespaces (Character.isWhitespace(c)) </summary>
public static readonly Regex WHITESPACE_PATTERN = new Regex("\\s+", RegexOptions.Compiled);
private static readonly CharArraySet EXTENDED_ENGLISH_STOP_WORDS =
@@ -124,7 +123,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
LuceneVersion.LUCENE_CURRENT, NON_WORD_PATTERN, true, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
/// <summary>
- /// A lower-casing word analyzer with <b>extended </b> English stop words
+ /// A lower-casing word analyzer with <b>extended</b> English stop words
/// (can be shared freely across threads without harm); global per class
/// loader. The stop words are borrowed from
/// http://thomas.loc.gov/home/stopwords.html, see
@@ -220,8 +219,8 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// <summary>
/// Creates a token stream that tokenizes all the text in the given SetReader;
- /// This implementation forwards to <code>tokenStream(String, TextReader, String)</code> and is
- /// less efficient than <code>tokenStream(String, TextReader, String)</code>.
+ /// This implementation forwards to <see cref="Analyzer.TokenStream(string, TextReader)"/> and is
+ /// less efficient than <see cref="Analyzer.TokenStream(string, TextReader)"/>.
/// </summary>
/// <param name="fieldName">
/// the name of the field to tokenize (currently ignored). </param>
@@ -586,13 +585,12 @@ namespace Lucene.Net.Analysis.Miscellaneous
// Nested classes:
///////////////////////////////////////////////////////////////////////////////
/// <summary>
- /// A StringReader that exposes it's contained string for fast direct access.
- /// Might make sense to generalize this to CharSequence and make it public?
+ /// A <see cref="StringReader"/> that exposes it's contained string for fast direct access.
+ /// Might make sense to generalize this to ICharSequence and make it public?
/// </summary>
internal sealed class FastStringReader : StringReader
{
-
- internal readonly string s;
+ private readonly string s;
internal FastStringReader(string s)
: base(s)
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/46b02fbc/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternKeywordMarkerFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternKeywordMarkerFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternKeywordMarkerFilter.cs
index 200a934..f9c8898 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternKeywordMarkerFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternKeywordMarkerFilter.cs
@@ -34,10 +34,10 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// <summary>
/// Create a new <see cref="PatternKeywordMarkerFilter"/>, that marks the current
/// token as a keyword if the tokens term buffer matches the provided
- /// <see cref="Pattern"/> via the <see cref="KeywordAttribute"/>.
+ /// <see cref="Regex"/> via the <see cref="KeywordAttribute"/>.
/// </summary>
/// <param name="in">
- /// TokenStream to filter </param>
+ /// <see cref="TokenStream"/> to filter </param>
/// <param name="pattern">
/// the pattern to apply to the incoming term buffer
/// </param>
@@ -50,7 +50,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
this.pattern = pattern;
}
- protected internal override bool Keyword
+ protected override bool IsKeyword
{
get
{
@@ -58,6 +58,5 @@ namespace Lucene.Net.Analysis.Miscellaneous
return matcher.Success;
}
}
-
}
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/46b02fbc/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PerFieldAnalyzerWrapper.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PerFieldAnalyzerWrapper.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PerFieldAnalyzerWrapper.cs
index 32e9fa0..862f2a2 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PerFieldAnalyzerWrapper.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PerFieldAnalyzerWrapper.cs
@@ -2,7 +2,6 @@
namespace Lucene.Net.Analysis.Miscellaneous
{
-
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -20,31 +19,26 @@ namespace Lucene.Net.Analysis.Miscellaneous
* limitations under the License.
*/
-
-
/// <summary>
/// This analyzer is used to facilitate scenarios where different
/// fields Require different analysis techniques. Use the Map
- /// argument in <see cref="#PerFieldAnalyzerWrapper(Analyzer, java.util.Map)"/>
+ /// argument in <see cref="PerFieldAnalyzerWrapper(Analyzer, IDictionary{string, Analyzer})"/>
/// to add non-default analyzers for fields.
///
/// <para>Example usage:
///
/// <code>
- /// {@code
- /// Map<String,Analyzer> analyzerPerField = new HashMap<>();
- /// analyzerPerField.put("firstname", new KeywordAnalyzer());
- /// analyzerPerField.put("lastname", new KeywordAnalyzer());
+ /// IDictionary<string, Analyzer> analyzerPerField = new Dictionary<string, Analyzer>();
+ /// analyzerPerField["firstname"] = new KeywordAnalyzer();
+ /// analyzerPerField["lastname"] = new KeywordAnalyzer();
///
/// PerFieldAnalyzerWrapper aWrapper =
/// new PerFieldAnalyzerWrapper(new StandardAnalyzer(version), analyzerPerField);
- /// }
/// </code>
- ///
/// </para>
- /// <para>In this example, StandardAnalyzer will be used for all fields except "firstname"
- /// and "lastname", for which KeywordAnalyzer will be used.
- ///
+ /// <para>
+ /// In this example, <see cref="Standard.StandardAnalyzer"/> will be used for all fields except "firstname"
+ /// and "lastname", for which <see cref="Core.KeywordAnalyzer"/> will be used.
/// </para>
/// <para>A PerFieldAnalyzerWrapper can be used like any other analyzer, for both indexing
/// and query parsing.
@@ -71,7 +65,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// </summary>
/// <param name="defaultAnalyzer"> Any fields not specifically
/// defined to use a different analyzer will use the one provided here. </param>
- /// <param name="fieldAnalyzers"> a Map (String field name to the Analyzer) to be
+ /// <param name="fieldAnalyzers"> a <see cref="IDictionary{TKey, TValue}"/> (String field name to the Analyzer) to be
/// used for those fields </param>
public PerFieldAnalyzerWrapper(Analyzer defaultAnalyzer, IDictionary<string, Analyzer> fieldAnalyzers)
: base(PER_FIELD_REUSE_STRATEGY)
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/46b02fbc/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PrefixAndSuffixAwareTokenFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PrefixAndSuffixAwareTokenFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PrefixAndSuffixAwareTokenFilter.cs
index f968659..2479b33 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PrefixAndSuffixAwareTokenFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PrefixAndSuffixAwareTokenFilter.cs
@@ -19,16 +19,17 @@
/// <summary>
/// Links two <see cref="PrefixAwareTokenFilter"/>.
- /// <p/>
- /// <b>NOTE:</b> This filter might not behave correctly if used with custom Attributes, i.e. Attributes other than
- /// the ones located in org.apache.lucene.analysis.tokenattributes.
+ /// <para/>
+ /// <b>NOTE:</b> This filter might not behave correctly if used with custom
+ /// <see cref="Lucene.Net.Util.IAttribute"/>s, i.e. <see cref="Lucene.Net.Util.IAttribute"/>s other than
+ /// the ones located in Lucene.Net.Analysis.TokenAttributes.
/// </summary>
public class PrefixAndSuffixAwareTokenFilter : TokenStream
{
-
private readonly PrefixAwareTokenFilter suffix;
- public PrefixAndSuffixAwareTokenFilter(TokenStream prefix, TokenStream input, TokenStream suffix) : base(suffix)
+ public PrefixAndSuffixAwareTokenFilter(TokenStream prefix, TokenStream input, TokenStream suffix)
+ : base(suffix)
{
prefix = new PrefixAwareTokenFilterAnonymousInnerClassHelper(this, prefix, input);
this.suffix = new PrefixAwareTokenFilterAnonymousInnerClassHelper2(this, prefix, suffix);
@@ -38,7 +39,8 @@
{
private readonly PrefixAndSuffixAwareTokenFilter outerInstance;
- public PrefixAwareTokenFilterAnonymousInnerClassHelper(PrefixAndSuffixAwareTokenFilter outerInstance, TokenStream prefix, TokenStream input) : base(prefix, input)
+ public PrefixAwareTokenFilterAnonymousInnerClassHelper(PrefixAndSuffixAwareTokenFilter outerInstance, TokenStream prefix, TokenStream input)
+ : base(prefix, input)
{
this.outerInstance = outerInstance;
}
@@ -53,7 +55,8 @@
{
private readonly PrefixAndSuffixAwareTokenFilter outerInstance;
- public PrefixAwareTokenFilterAnonymousInnerClassHelper2(PrefixAndSuffixAwareTokenFilter outerInstance, TokenStream prefix, TokenStream suffix) : base(prefix, suffix)
+ public PrefixAwareTokenFilterAnonymousInnerClassHelper2(PrefixAndSuffixAwareTokenFilter outerInstance, TokenStream prefix, TokenStream suffix)
+ : base(prefix, suffix)
{
this.outerInstance = outerInstance;
}
@@ -86,7 +89,6 @@
suffix.Reset();
}
-
public override void Dispose()
{
suffix.Dispose();
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/46b02fbc/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PrefixAwareTokenFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PrefixAwareTokenFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PrefixAwareTokenFilter.cs
index 652aec0..cef54d0 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PrefixAwareTokenFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PrefixAwareTokenFilter.cs
@@ -25,13 +25,13 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// to be used when updating the token values in the second stream based on that token.
///
/// The default implementation adds last prefix token end offset to the suffix token start and end offsets.
- /// <p/>
- /// <b>NOTE:</b> This filter might not behave correctly if used with custom Attributes, i.e. Attributes other than
- /// the ones located in org.apache.lucene.analysis.tokenattributes.
+ /// <para/>
+ /// <b>NOTE:</b> This filter might not behave correctly if used with custom
+ /// <see cref="Lucene.Net.Util.IAttribute"/>s, i.e. <see cref="Lucene.Net.Util.IAttribute"/>s other than
+ /// the ones located in Lucene.Net.Analysis.TokenAttributes.
/// </summary>
public class PrefixAwareTokenFilter : TokenStream
{
-
private TokenStream prefix;
private TokenStream suffix;
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/46b02fbc/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/RemoveDuplicatesTokenFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/RemoveDuplicatesTokenFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/RemoveDuplicatesTokenFilter.cs
index 8f3fb20..d93b7a8 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/RemoveDuplicatesTokenFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/RemoveDuplicatesTokenFilter.cs
@@ -23,11 +23,10 @@ namespace Lucene.Net.Analysis.Miscellaneous
*/
/// <summary>
- /// A TokenFilter which filters out Tokens at the same position and Term text as the previous token in the stream.
+ /// A <see cref="TokenFilter"/> which filters out <see cref="Token"/>s at the same position and Term text as the previous token in the stream.
/// </summary>
public sealed class RemoveDuplicatesTokenFilter : TokenFilter
{
-
private readonly ICharTermAttribute termAttribute;
private readonly IPositionIncrementAttribute posIncAttribute;
@@ -49,8 +48,28 @@ namespace Lucene.Net.Analysis.Miscellaneous
}
/// <summary>
- /// {@inheritDoc}
+ /// Consumers (i.e., <see cref="Index.IndexWriter"/>) use this method to advance the stream to
+ /// the next token. Implementing classes must implement this method and update
+ /// the appropriate <see cref="Lucene.Net.Util.IAttribute"/>s with the attributes of the next
+ /// token.
+ /// <para/>
+ /// The producer must make no assumptions about the attributes after the method
+ /// has been returned: the caller may arbitrarily change it. If the producer
+ /// needs to preserve the state for subsequent calls, it can use
+ /// <see cref="AttributeSource.CaptureState"/> to create a copy of the current attribute state.
+ /// <para/>
+ /// this method is called for every token of a document, so an efficient
+ /// implementation is crucial for good performance. To avoid calls to
+ /// <see cref="AttributeSource.AddAttribute{T}"/> and <see cref="AttributeSource.GetAttribute{T}"/>,
+ /// references to all <see cref="Lucene.Net.Util.IAttribute"/>s that this stream uses should be
+ /// retrieved during instantiation.
+ /// <para/>
+ /// To ensure that filters and consumers know which attributes are available,
+ /// the attributes must be added during instantiation. Filters and consumers
+ /// are not required to check for availability of attributes in
+ /// <see cref="IncrementToken()"/>.
/// </summary>
+ /// <returns> false for end of stream; true otherwise </returns>
public override sealed bool IncrementToken()
{
while (m_input.IncrementToken())
@@ -80,7 +99,15 @@ namespace Lucene.Net.Analysis.Miscellaneous
}
/// <summary>
- /// {@inheritDoc}
+ /// This method is called by a consumer before it begins consumption using
+ /// <see cref="IncrementToken()"/>.
+ /// <para/>
+ /// Resets this stream to a clean state. Stateful implementations must implement
+ /// this method so that they can be reused, just as if they had been created fresh.
+ /// <para/>
+ /// If you override this method, always call <c>base.Reset()</c>, otherwise
+ /// some internal state will not be correctly reset (e.g., <see cref="Tokenizer"/> will
+ /// throw <see cref="InvalidOperationException"/> on further usage).
/// </summary>
public override void Reset()
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/46b02fbc/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/RemoveDuplicatesTokenFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/RemoveDuplicatesTokenFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/RemoveDuplicatesTokenFilterFactory.cs
index 1554866..1aedae4 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/RemoveDuplicatesTokenFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/RemoveDuplicatesTokenFilterFactory.cs
@@ -32,9 +32,8 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// </summary>
public class RemoveDuplicatesTokenFilterFactory : TokenFilterFactory
{
-
/// <summary>
- /// Creates a new RemoveDuplicatesTokenFilterFactory </summary>
+ /// Creates a new <see cref="RemoveDuplicatesTokenFilterFactory"/> </summary>
public RemoveDuplicatesTokenFilterFactory(IDictionary<string, string> args) : base(args)
{
if (args.Count > 0)