You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by ni...@apache.org on 2017/02/02 20:15:56 UTC
[4/6] lucenenet git commit: Lucene.Net.Analysis.CommonGrams refactor:
member accessibility and documentation comments
Lucene.Net.Analysis.CommonGrams refactor: member accessibility and documentation comments
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/487927c0
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/487927c0
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/487927c0
Branch: refs/heads/api-work
Commit: 487927c003fd5d42e4b72d642278683ca0d31aec
Parents: 3e97f31
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Thu Feb 2 23:43:22 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Fri Feb 3 01:13:43 2017 +0700
----------------------------------------------------------------------
.../Analysis/CommonGrams/CommonGramsFilter.cs | 49 +++++++++------
.../CommonGrams/CommonGramsFilterFactory.cs | 9 +--
.../CommonGrams/CommonGramsQueryFilter.cs | 63 ++++++++++++--------
.../CommonGramsQueryFilterFactory.cs | 12 ++--
src/Lucene.Net.Core/Analysis/TokenFilter.cs | 19 ++++--
src/Lucene.Net.Core/Analysis/TokenStream.cs | 14 ++---
6 files changed, 99 insertions(+), 67 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/487927c0/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsFilter.cs
index fcfe42d..e7578be 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsFilter.cs
@@ -31,15 +31,14 @@ namespace Lucene.Net.Analysis.CommonGrams
/// <summary>
/// Construct bigrams for frequently occurring terms while indexing. Single terms
/// are still indexed too, with bigrams overlaid. This is achieved through the
- /// use of <seealso cref="PositionIncrementAttribute#setPositionIncrement(int)"/>. Bigrams have a type
- /// of <seealso cref="#GRAM_TYPE"/> Example:
- /// <ul>
- /// <li>input:"the quick brown fox"</li>
- /// <li>output:|"the","the-quick"|"brown"|"fox"|</li>
- /// <li>"the-quick" has a position increment of 0 so it is in the same position
- /// as "the" "the-quick" has a term.type() of "gram"</li>
- ///
- /// </ul>
+ /// use of <see cref="PositionIncrementAttribute.PositionIncrement"/>. Bigrams have a type
+ /// of <see cref="GRAM_TYPE"/> Example:
+ /// <list type="bullet">
+ /// <item>input:"the quick brown fox"</item>
+ /// <item>output:|"the","the-quick"|"brown"|"fox"|</item>
+ /// <item>"the-quick" has a position increment of 0 so it is in the same position
+ /// as "the" "the-quick" has a term.type() of "gram"</item>
+ /// </list>
/// </summary>
/*
@@ -47,7 +46,6 @@ namespace Lucene.Net.Analysis.CommonGrams
*/
public sealed class CommonGramsFilter : TokenFilter
{
-
public const string GRAM_TYPE = "gram";
private const char SEPARATOR = '_';
@@ -71,7 +69,7 @@ namespace Lucene.Net.Analysis.CommonGrams
/// bigrams with position increment 0 type=gram where one or both of the words
/// in a potential bigram are in the set of common words .
/// </summary>
- /// <param name="input"> TokenStream input in filter chain </param>
+ /// <param name="input"> <see cref="TokenStream"/> input in filter chain </param>
/// <param name="commonWords"> The set of common words. </param>
public CommonGramsFilter(LuceneVersion matchVersion, TokenStream input, CharArraySet commonWords)
: base(input)
@@ -89,11 +87,11 @@ namespace Lucene.Net.Analysis.CommonGrams
/// output the token. If the token and/or the following token are in the list
/// of common words also output a bigram with position increment 0 and
/// type="gram"
- ///
+ /// <para/>
/// TODO:Consider adding an option to not emit unigram stopwords
- /// as in CDL XTF BigramStopFilter, CommonGramsQueryFilter would need to be
+ /// as in CDL XTF BigramStopFilter, <see cref="CommonGramsQueryFilter"/> would need to be
/// changed to work with this.
- ///
+ /// <para/>
/// TODO: Consider optimizing for the case of three
/// commongrams i.e "man of the year" normally produces 3 bigrams: "man-of",
/// "of-the", "the-year" but with proper management of positions we could
@@ -119,7 +117,7 @@ namespace Lucene.Net.Analysis.CommonGrams
* When valid, the buffer always contains at least the separator.
* If its empty, there is nothing before this stopword.
*/
- if (lastWasCommon || (Common && buffer.Length > 0))
+ if (lastWasCommon || (IsCommon && buffer.Length > 0))
{
savedState = CaptureState();
GramToken();
@@ -131,8 +129,21 @@ namespace Lucene.Net.Analysis.CommonGrams
}
/// <summary>
- /// {@inheritDoc}
+ /// This method is called by a consumer before it begins consumption using
+ /// <see cref="IncrementToken()"/>.
+ /// <para/>
+ /// Resets this stream to a clean state. Stateful implementations must implement
+ /// this method so that they can be reused, just as if they had been created fresh.
+ /// <para/>
+ /// If you override this method, always call <c>base.Reset()</c>, otherwise
+ /// some internal state will not be correctly reset (e.g., <see cref="Tokenizer"/> will
+ /// throw <see cref="System.InvalidOperationException"/> on further usage).
/// </summary>
+ /// <remarks>
+ /// <b>NOTE:</b>
+ /// The default implementation chains the call to the input <see cref="TokenStream"/>, so
+ /// be sure to call <c>base.Reset()</c> when overriding this method.
+ /// </remarks>
public override void Reset()
{
base.Reset();
@@ -146,8 +157,8 @@ namespace Lucene.Net.Analysis.CommonGrams
/// <summary>
/// Determines if the current token is a common term
/// </summary>
- /// <returns> {@code true} if the current token is a common term, {@code false} otherwise </returns>
- private bool Common
+ /// <returns> <c>true</c> if the current token is a common term, <c>false</c> otherwise </returns>
+ private bool IsCommon
{
get
{
@@ -164,7 +175,7 @@ namespace Lucene.Net.Analysis.CommonGrams
buffer.Append(termAttribute.Buffer, 0, termAttribute.Length);
buffer.Append(SEPARATOR);
lastStartOffset = offsetAttribute.StartOffset;
- lastWasCommon = Common;
+ lastWasCommon = IsCommon;
}
/// <summary>
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/487927c0/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsFilterFactory.cs
index f63a71f..333ac68 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsFilterFactory.cs
@@ -23,14 +23,15 @@ namespace Lucene.Net.Analysis.CommonGrams
*/
/// <summary>
- /// Constructs a <seealso cref="CommonGramsFilter"/>.
- /// <pre class="prettyprint">
+ /// Constructs a <see cref="CommonGramsFilter"/>.
+ /// <code>
/// <fieldType name="text_cmmngrms" class="solr.TextField" positionIncrementGap="100">
/// <analyzer>
/// <tokenizer class="solr.WhitespaceTokenizerFactory"/>
/// <filter class="solr.CommonGramsFilterFactory" words="commongramsstopwords.txt" ignoreCase="false"/>
/// </analyzer>
- /// </fieldType></pre>
+ /// </fieldType>
+ /// </code>
/// </summary>
public class CommonGramsFilterFactory : TokenFilterFactory, IResourceLoaderAware
{
@@ -41,7 +42,7 @@ namespace Lucene.Net.Analysis.CommonGrams
private readonly bool ignoreCase;
/// <summary>
- /// Creates a new CommonGramsFilterFactory </summary>
+ /// Creates a new <see cref="CommonGramsFilterFactory"/> </summary>
public CommonGramsFilterFactory(IDictionary<string, string> args)
: base(args)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/487927c0/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsQueryFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsQueryFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsQueryFilter.cs
index 366621d..32039ca 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsQueryFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsQueryFilter.cs
@@ -18,28 +18,26 @@ namespace Lucene.Net.Analysis.CommonGrams
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
+
/// <summary>
- /// Wrap a CommonGramsFilter optimizing phrase queries by only returning single
+ /// Wrap a <see cref="CommonGramsFilter"/> optimizing phrase queries by only returning single
/// words when they are not a member of a bigram.
- ///
+ /// <para/>
/// Example:
- /// <ul>
- /// <li>query input to CommonGramsFilter: "the rain in spain falls mainly"
- /// <li>output of CommomGramsFilter/input to CommonGramsQueryFilter:
- /// |"the, "the-rain"|"rain" "rain-in"|"in, "in-spain"|"spain"|"falls"|"mainly"
- /// <li>output of CommonGramsQueryFilter:"the-rain", "rain-in" ,"in-spain",
- /// "falls", "mainly"
- /// </ul>
+ /// <list type="bullet">
+ /// <item>query input to CommonGramsFilter: "the rain in spain falls mainly"</item>
+ /// <item>output of CommomGramsFilter/input to CommonGramsQueryFilter:
+ /// |"the, "the-rain"|"rain" "rain-in"|"in, "in-spain"|"spain"|"falls"|"mainly"</item>
+ /// <item>output of CommonGramsQueryFilter:"the-rain", "rain-in" ,"in-spain",
+ /// "falls", "mainly"</item>
+ /// </list>
/// </summary>
-
- /*
- * See:http://hudson.zones.apache.org/hudson/job/Lucene-trunk/javadoc//all/org/apache/lucene/analysis/TokenStream.html and
- * http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/package.html?revision=718798
- */
+ /// <remarks>
+ /// See:http://hudson.zones.apache.org/hudson/job/Lucene-trunk/javadoc//all/org/apache/lucene/analysis/TokenStream.html and
+ /// http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/package.html?revision=718798
+ /// </remarks>
public sealed class CommonGramsQueryFilter : TokenFilter
{
-
private readonly ITypeAttribute typeAttribute;
private readonly IPositionIncrementAttribute posIncAttribute;
@@ -59,8 +57,21 @@ namespace Lucene.Net.Analysis.CommonGrams
}
/// <summary>
- /// {@inheritDoc}
+ /// This method is called by a consumer before it begins consumption using
+ /// <see cref="IncrementToken()"/>.
+ /// <para/>
+ /// Resets this stream to a clean state. Stateful implementations must implement
+ /// this method so that they can be reused, just as if they had been created fresh.
+ /// <para/>
+ /// If you override this method, always call <c>base.Reset()</c>, otherwise
+ /// some internal state will not be correctly reset (e.g., <see cref="Tokenizer"/> will
+ /// throw <see cref="InvalidOperationException"/> on further usage).
/// </summary>
+ /// <remarks>
+ /// <b>NOTE:</b>
+ /// The default implementation chains the call to the input <see cref="TokenStream"/>, so
+ /// be sure to call <c>base.Reset()</c> when overriding this method.
+ /// </remarks>
public override void Reset()
{
base.Reset();
@@ -72,10 +83,10 @@ namespace Lucene.Net.Analysis.CommonGrams
/// <summary>
/// Output bigrams whenever possible to optimize queries. Only output unigrams
/// when they are not a member of a bigram. Example:
- /// <ul>
- /// <li>input: "the rain in spain falls mainly"
- /// <li>output:"the-rain", "rain-in" ,"in-spain", "falls", "mainly"
- /// </ul>
+ /// <list type="bullet">
+ /// <item>input: "the rain in spain falls mainly"</item>
+ /// <item>output:"the-rain", "rain-in" ,"in-spain", "falls", "mainly"</item>
+ /// </list>
/// </summary>
public override bool IncrementToken()
{
@@ -83,13 +94,13 @@ namespace Lucene.Net.Analysis.CommonGrams
{
State current = CaptureState();
- if (previous != null && !GramType)
+ if (previous != null && !IsGramType)
{
RestoreState(previous);
previous = current;
previousType = typeAttribute.Type;
- if (GramType)
+ if (IsGramType)
{
posIncAttribute.PositionIncrement = 1;
}
@@ -109,7 +120,7 @@ namespace Lucene.Net.Analysis.CommonGrams
RestoreState(previous);
previous = null;
- if (GramType)
+ if (IsGramType)
{
posIncAttribute.PositionIncrement = 1;
}
@@ -121,8 +132,8 @@ namespace Lucene.Net.Analysis.CommonGrams
/// <summary>
/// Convenience method to check if the current type is a gram type
/// </summary>
- /// <returns> {@code true} if the current type is a gram type, {@code false} otherwise </returns>
- public bool GramType
+ /// <returns> <c>true</c> if the current type is a gram type, <c>false</c> otherwise </returns>
+ public bool IsGramType
{
get
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/487927c0/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsQueryFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsQueryFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsQueryFilterFactory.cs
index f797390..1e067e9 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsQueryFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsQueryFilterFactory.cs
@@ -21,28 +21,28 @@ namespace Lucene.Net.Analysis.CommonGrams
/// <summary>
- /// Construct <seealso cref="CommonGramsQueryFilter"/>.
+ /// Construct <see cref="CommonGramsQueryFilter"/>.
///
- /// <pre class="prettyprint">
+ /// <code>
/// <fieldType name="text_cmmngrmsqry" class="solr.TextField" positionIncrementGap="100">
/// <analyzer>
/// <tokenizer class="solr.WhitespaceTokenizerFactory"/>
/// <filter class="solr.CommonGramsQueryFilterFactory" words="commongramsquerystopwords.txt" ignoreCase="false"/>
/// </analyzer>
- /// </fieldType></pre>
+ /// </fieldType>
+ /// </code>
/// </summary>
public class CommonGramsQueryFilterFactory : CommonGramsFilterFactory
{
-
/// <summary>
- /// Creates a new CommonGramsQueryFilterFactory </summary>
+ /// Creates a new <see cref="CommonGramsQueryFilterFactory"/> </summary>
public CommonGramsQueryFilterFactory(IDictionary<string, string> args)
: base(args)
{
}
/// <summary>
- /// Create a CommonGramsFilter and wrap it with a CommonGramsQueryFilter
+ /// Create a <see cref="CommonGramsFilter"/> and wrap it with a <see cref="CommonGramsQueryFilter"/>
/// </summary>
public override TokenStream Create(TokenStream input)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/487927c0/src/Lucene.Net.Core/Analysis/TokenFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Analysis/TokenFilter.cs b/src/Lucene.Net.Core/Analysis/TokenFilter.cs
index d6a96fb..b082d6a 100644
--- a/src/Lucene.Net.Core/Analysis/TokenFilter.cs
+++ b/src/Lucene.Net.Core/Analysis/TokenFilter.cs
@@ -61,12 +61,21 @@ namespace Lucene.Net.Analysis
}
/// <summary>
- /// {@inheritDoc}
- /// <p>
- /// <b>NOTE:</b>
- /// The default implementation chains the call to the input TokenStream, so
- /// be sure to call <code>super.reset()</code> when overriding this method.
+ /// This method is called by a consumer before it begins consumption using
+ /// <see cref="IncrementToken()"/>.
+ /// <para/>
+ /// Resets this stream to a clean state. Stateful implementations must implement
+ /// this method so that they can be reused, just as if they had been created fresh.
+ /// <para/>
+ /// If you override this method, always call <c>base.Reset()</c>, otherwise
+ /// some internal state will not be correctly reset (e.g., <see cref="Tokenizer"/> will
+ /// throw <see cref="InvalidOperationException"/> on further usage).
/// </summary>
+ /// <remarks>
+ /// <b>NOTE:</b>
+ /// The default implementation chains the call to the input <see cref="TokenStream"/>, so
+ /// be sure to call <c>base.Reset()</c> when overriding this method.
+ /// </remarks>
public override void Reset()
{
m_input.Reset();
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/487927c0/src/Lucene.Net.Core/Analysis/TokenStream.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Analysis/TokenStream.cs b/src/Lucene.Net.Core/Analysis/TokenStream.cs
index 90bf3f2..1e104e9 100644
--- a/src/Lucene.Net.Core/Analysis/TokenStream.cs
+++ b/src/Lucene.Net.Core/Analysis/TokenStream.cs
@@ -183,15 +183,15 @@ namespace Lucene.Net.Analysis
}
/// <summary>
- /// this method is called by a consumer before it begins consumption using
- /// <seealso cref="#IncrementToken()"/>.
- /// <p>
+ /// This method is called by a consumer before it begins consumption using
+ /// <see cref="IncrementToken()"/>.
+ /// <para/>
/// Resets this stream to a clean state. Stateful implementations must implement
/// this method so that they can be reused, just as if they had been created fresh.
- /// <p>
- /// If you override this method, always call {@code super.reset()}, otherwise
- /// some internal state will not be correctly reset (e.g., <seealso cref="Tokenizer"/> will
- /// throw <seealso cref="IllegalStateException"/> on further usage).
+ /// <para/>
+ /// If you override this method, always call <c>base.Reset()</c>, otherwise
+ /// some internal state will not be correctly reset (e.g., <see cref="Tokenizer"/> will
+ /// throw <see cref="InvalidOperationException"/> on further usage).
/// </summary>
public virtual void Reset()
{