You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by ni...@apache.org on 2017/02/03 17:51:11 UTC
[05/11] lucenenet git commit: Lucene.Net.Analysis.Miscellaneous
refactor: member accessibility and documentation comments
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/46b02fbc/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ScandinavianFoldingFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ScandinavianFoldingFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ScandinavianFoldingFilter.cs
index 51b115a..53cebfe 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ScandinavianFoldingFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ScandinavianFoldingFilter.cs
@@ -23,30 +23,30 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// <summary>
/// This filter folds Scandinavian characters ������->a and ����->o.
/// It also discriminate against use of double vowels aa, ae, ao, oe and oo, leaving just the first one.
- /// <p/>
+ /// <para/>
/// It's is a semantically more destructive solution than <see cref="ScandinavianNormalizationFilter"/> but
/// can in addition help with matching raksmorgas as r�ksm�rg�s.
- /// <p/>
+ /// <para/>
/// bl�b�rsyltet�j == bl�b�rsyltet�j == blaabaarsyltetoej == blabarsyltetoj
/// r�ksm�rg�s == r�ksm�rg�s == r�ksm�rgaos == raeksmoergaas == raksmorgas
- /// <p/>
+ /// <para/>
/// Background:
/// Swedish ��� are in fact the same letters as Norwegian and Danish ��� and thus interchangeable
/// when used between these languages. They are however folded differently when people type
/// them on a keyboard lacking these characters.
- /// <p/>
+ /// <para/>
/// In that situation almost all Swedish people use a, a, o instead of �, �, �.
- /// <p/>
+ /// <para/>
/// Norwegians and Danes on the other hand usually type aa, ae and oe instead of �, � and �.
/// Some do however use a, a, o, oo, ao and sometimes permutations of everything above.
- /// <p/>
+ /// <para/>
/// This filter solves that mismatch problem, but might also cause new.
- /// <p/> </summary>
- /// <seealso cref= ScandinavianNormalizationFilter </seealso>
+ /// </summary>
+ /// <seealso cref="ScandinavianNormalizationFilter"/>
public sealed class ScandinavianFoldingFilter : TokenFilter
{
-
- public ScandinavianFoldingFilter(TokenStream input) : base(input)
+ public ScandinavianFoldingFilter(TokenStream input)
+ : base(input)
{
charTermAttribute = AddAttribute<ICharTermAttribute>();
}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/46b02fbc/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ScandinavianFoldingFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ScandinavianFoldingFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ScandinavianFoldingFilterFactory.cs
index c6930b2..97e60a7 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ScandinavianFoldingFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ScandinavianFoldingFilterFactory.cs
@@ -32,8 +32,10 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// </summary>
public class ScandinavianFoldingFilterFactory : TokenFilterFactory
{
-
- public ScandinavianFoldingFilterFactory(IDictionary<string, string> args) : base(args)
+ /// <summary>
+ /// Creates a new <see cref="ScandinavianFoldingFilterFactory"/> </summary>
+ public ScandinavianFoldingFilterFactory(IDictionary<string, string> args)
+ : base(args)
{
if (args.Count > 0)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/46b02fbc/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ScandinavianNormalizationFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ScandinavianNormalizationFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ScandinavianNormalizationFilter.cs
index 5ad937b..9a75541 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ScandinavianNormalizationFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ScandinavianNormalizationFilter.cs
@@ -23,15 +23,15 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// <summary>
/// This filter normalize use of the interchangeable Scandinavian characters ��������
/// and folded variants (aa, ao, ae, oe and oo) by transforming them to ������.
- /// <p/>
+ /// <para/>
/// It's a semantically less destructive solution than <see cref="ScandinavianFoldingFilter"/>,
/// most useful when a person with a Norwegian or Danish keyboard queries a Swedish index
/// and vice versa. This filter does <b>not</b> the common Swedish folds of � and � to a nor � to o.
- /// <p/>
+ /// <para/>
/// bl�b�rsyltet�j == bl�b�rsyltet�j == blaabaarsyltetoej but not blabarsyltetoj
/// r�ksm�rg�s == r�ksm�rg�s == r�ksm�rgaos == raeksmoergaas but not raksmorgas
- /// <p/> </summary>
- /// <seealso cref= ScandinavianFoldingFilter </seealso>
+ /// </summary>
+ /// <seealso cref="ScandinavianFoldingFilter"/>
public sealed class ScandinavianNormalizationFilter : TokenFilter
{
public ScandinavianNormalizationFilter(TokenStream input)
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/46b02fbc/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ScandinavianNormalizationFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ScandinavianNormalizationFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ScandinavianNormalizationFilterFactory.cs
index e5a5832..1bc9dae 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ScandinavianNormalizationFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ScandinavianNormalizationFilterFactory.cs
@@ -21,7 +21,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
*/
/// <summary>
- /// Factory for <see cref="org.apache.lucene.analysis.miscellaneous.ScandinavianNormalizationFilter"/>.
+ /// Factory for <see cref="ScandinavianNormalizationFilter"/>.
/// <code>
/// <fieldType name="text_scandnorm" class="solr.TextField" positionIncrementGap="100">
/// <analyzer>
@@ -32,7 +32,8 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// </summary>
public class ScandinavianNormalizationFilterFactory : TokenFilterFactory
{
-
+ /// <summary>
+ /// Creates a new <see cref="ScandinavianNormalizationFilterFactory"/> </summary>
public ScandinavianNormalizationFilterFactory(IDictionary<string, string> args)
: base(args)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/46b02fbc/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/SetKeywordMarkerFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/SetKeywordMarkerFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/SetKeywordMarkerFilter.cs
index f4adbfe..769de5e 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/SetKeywordMarkerFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/SetKeywordMarkerFilter.cs
@@ -23,7 +23,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// <summary>
/// Marks terms as keywords via the <see cref="KeywordAttribute"/>. Each token
/// contained in the provided set is marked as a keyword by setting
- /// <see cref="KeywordAttribute#setKeyword(boolean)"/> to <code>true</code>.
+ /// <see cref="KeywordAttribute.IsKeyword"/> to <c>true</c>.
/// </summary>
public sealed class SetKeywordMarkerFilter : KeywordMarkerFilter
{
@@ -31,12 +31,12 @@ namespace Lucene.Net.Analysis.Miscellaneous
private readonly CharArraySet keywordSet;
/// <summary>
- /// Create a new KeywordSetMarkerFilter, that marks the current token as a
+ /// Create a new <see cref="SetKeywordMarkerFilter"/>, that marks the current token as a
/// keyword if the tokens term buffer is contained in the given set via the
/// <see cref="KeywordAttribute"/>.
/// </summary>
/// <param name="in">
- /// TokenStream to filter </param>
+ /// <see cref="TokenStream"/> to filter </param>
/// <param name="keywordSet">
/// the keywords set to lookup the current termbuffer </param>
public SetKeywordMarkerFilter(TokenStream @in, CharArraySet keywordSet)
@@ -46,7 +46,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
termAtt = AddAttribute<ICharTermAttribute>();
}
- protected internal override bool Keyword
+ protected override bool IsKeyword
{
get
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/46b02fbc/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/SingleTokenTokenStream.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/SingleTokenTokenStream.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/SingleTokenTokenStream.cs
index f2c00ce..cf8b0b9 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/SingleTokenTokenStream.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/SingleTokenTokenStream.cs
@@ -31,9 +31,9 @@ namespace Lucene.Net.Analysis.Miscellaneous
private Token singleToken;
private readonly ICharTermAttribute tokenAtt;
- public SingleTokenTokenStream(Token token) : base(Token.TOKEN_ATTRIBUTE_FACTORY)
+ public SingleTokenTokenStream(Token token)
+ : base(Token.TOKEN_ATTRIBUTE_FACTORY)
{
-
Debug.Assert(token != null);
this.singleToken = (Token)token.Clone();
@@ -41,7 +41,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
Debug.Assert(tokenAtt is Token);
}
- public override bool IncrementToken()
+ public override sealed bool IncrementToken()
{
if (exhausted)
{
@@ -61,7 +61,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
exhausted = false;
}
- public Token Token
+ public Token Token // LUCENENET TODO: Change to GetToken() and SetToken() (conversion)
{
get { return (Token)singleToken.Clone(); }
set { this.singleToken = (Token)value.Clone(); }
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/46b02fbc/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/StemmerOverrideFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/StemmerOverrideFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/StemmerOverrideFilter.cs
index 0e09209..d0f6618 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/StemmerOverrideFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/StemmerOverrideFilter.cs
@@ -1,9 +1,8 @@
-\ufeffusing System.Collections.Generic;
-using System.IO;
-using Lucene.Net.Analysis.TokenAttributes;
+\ufeffusing Lucene.Net.Analysis.TokenAttributes;
using Lucene.Net.Support;
using Lucene.Net.Util;
using Lucene.Net.Util.Fst;
+using System.Collections.Generic;
namespace Lucene.Net.Analysis.Miscellaneous
{
@@ -39,8 +38,8 @@ namespace Lucene.Net.Analysis.Miscellaneous
private readonly CharsRef spare = new CharsRef();
/// <summary>
- /// Create a new StemmerOverrideFilter, performing dictionary-based stemming
- /// with the provided <code>dictionary</code>.
+ /// Create a new <see cref="StemmerOverrideFilter"/>, performing dictionary-based stemming
+ /// with the provided dictionary (<paramref name="stemmerOverrideMap"/>).
/// <para>
/// Any dictionary-stemmed terms will be marked with <see cref="KeywordAttribute"/>
/// so that they will not be stemmed with stemmers down the chain.
@@ -94,8 +93,8 @@ namespace Lucene.Net.Analysis.Miscellaneous
// TODO maybe we can generalize this and reuse this map somehow?
public sealed class StemmerOverrideMap
{
- internal readonly FST<BytesRef> fst;
- internal readonly bool ignoreCase;
+ private readonly FST<BytesRef> fst;
+ private readonly bool ignoreCase;
/// <summary>
/// Creates a new <see cref="StemmerOverrideMap"/> </summary>
@@ -108,9 +107,9 @@ namespace Lucene.Net.Analysis.Miscellaneous
}
/// <summary>
- /// Returns a <see cref="BytesReader"/> to pass to the <see cref="#get(char[], int, FST.Arc, FST.BytesReader)"/> method.
+ /// Returns a <see cref="FST.BytesReader"/> to pass to the <see cref="Get(char[], int, FST.Arc{BytesRef}, FST.BytesReader)"/> method.
/// </summary>
- public FST.BytesReader BytesReader
+ public FST.BytesReader BytesReader // LUCENENET TODO: Change to GetBytesReader()
{
get
{
@@ -150,24 +149,24 @@ namespace Lucene.Net.Analysis.Miscellaneous
}
return matchOutput;
}
-
}
+
/// <summary>
/// This builder builds an <see cref="FST"/> for the <see cref="StemmerOverrideFilter"/>
/// </summary>
public class Builder
{
- internal readonly BytesRefHash hash = new BytesRefHash();
- internal readonly BytesRef spare = new BytesRef();
- internal readonly List<string> outputValues = new List<string>();
- internal readonly bool ignoreCase;
- internal readonly CharsRef charsSpare = new CharsRef();
+ private readonly BytesRefHash hash = new BytesRefHash();
+ private readonly BytesRef spare = new BytesRef();
+ private readonly List<string> outputValues = new List<string>();
+ private readonly bool ignoreCase;
+ private readonly CharsRef charsSpare = new CharsRef();
/// <summary>
- /// Creates a new <see cref="Builder"/> with ignoreCase set to <code>false</code>
+ /// Creates a new <see cref="Builder"/> with <see cref="ignoreCase"/> set to <c>false</c>
/// </summary>
public Builder()
- : this(false)
+ : this(false)
{
}
@@ -184,7 +183,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// </summary>
/// <param name="input"> the input char sequence </param>
/// <param name="output"> the stemmer override output char sequence </param>
- /// <returns> <code>false</code> iff the input has already been added to this builder otherwise <code>true</code>. </returns>
+ /// <returns> <c>false</c> if the input has already been added to this builder otherwise <c>true</c>. </returns>
public virtual bool Add(string input, string output)
{
int length = input.Length;
@@ -212,13 +211,13 @@ namespace Lucene.Net.Analysis.Miscellaneous
}
/// <summary>
- /// Returns an <see cref="StemmerOverrideMap"/> to be used with the <see cref="StemmerOverrideFilter"/> </summary>
- /// <returns> an <see cref="StemmerOverrideMap"/> to be used with the <see cref="StemmerOverrideFilter"/> </returns>
- /// <exception cref="IOException"> if an <see cref="IOException"/> occurs; </exception>
+ /// Returns a <see cref="StemmerOverrideMap"/> to be used with the <see cref="StemmerOverrideFilter"/> </summary>
+ /// <returns> a <see cref="StemmerOverrideMap"/> to be used with the <see cref="StemmerOverrideFilter"/> </returns>
+ /// <exception cref="System.IO.IOException"> if an <see cref="System.IO.IOException"/> occurs; </exception>
public virtual StemmerOverrideMap Build()
{
ByteSequenceOutputs outputs = ByteSequenceOutputs.Singleton;
- Lucene.Net.Util.Fst.Builder<BytesRef> builder = new Lucene.Net.Util.Fst.Builder<BytesRef>(FST.INPUT_TYPE.BYTE4, outputs);
+ Builder<BytesRef> builder = new Builder<BytesRef>(FST.INPUT_TYPE.BYTE4, outputs);
int[] sort = hash.Sort(BytesRef.UTF8SortedAsUnicodeComparer);
IntsRef intsSpare = new IntsRef();
int size = hash.Count;
@@ -231,7 +230,6 @@ namespace Lucene.Net.Analysis.Miscellaneous
}
return new StemmerOverrideMap(builder.Finish(), ignoreCase);
}
-
}
}
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/46b02fbc/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/StemmerOverrideFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/StemmerOverrideFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/StemmerOverrideFilterFactory.cs
index e0c9323..c7d0aad 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/StemmerOverrideFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/StemmerOverrideFilterFactory.cs
@@ -39,7 +39,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
private readonly bool ignoreCase;
/// <summary>
- /// Creates a new StemmerOverrideFilterFactory </summary>
+ /// Creates a new <see cref="StemmerOverrideFilterFactory"/> </summary>
public StemmerOverrideFilterFactory(IDictionary<string, string> args)
: base(args)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/46b02fbc/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/TrimFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/TrimFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/TrimFilter.cs
index 98539c7..197946f 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/TrimFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/TrimFilter.cs
@@ -29,7 +29,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// </summary>
public sealed class TrimFilter : TokenFilter
{
- internal readonly bool updateOffsets;
+ private readonly bool updateOffsets;
private readonly ICharTermAttribute termAtt;
private readonly IOffsetAttribute offsetAtt;
@@ -53,7 +53,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
}
/// <summary>
- /// Create a new <see cref="TrimFilter"/> on top of <code>in</code>. </summary>
+ /// Create a new <see cref="TrimFilter"/> on top of <paramref name="in"/>. </summary>
public TrimFilter(LuceneVersion version, TokenStream @in)
#pragma warning disable 612, 618
: this(version, @in, false)
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/46b02fbc/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/TrimFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/TrimFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/TrimFilterFactory.cs
index d091842..77a1d1f 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/TrimFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/TrimFilterFactory.cs
@@ -1,5 +1,5 @@
-\ufeffusing System.Collections.Generic;
-using Lucene.Net.Analysis.Util;
+\ufeffusing Lucene.Net.Analysis.Util;
+using System.Collections.Generic;
namespace Lucene.Net.Analysis.Miscellaneous
{
@@ -30,13 +30,13 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// </analyzer>
/// </fieldType></code>
/// </summary>
- /// <seealso cref= TrimFilter </seealso>
+ /// <seealso cref="TrimFilter"/>
public class TrimFilterFactory : TokenFilterFactory
{
- protected internal readonly bool m_updateOffsets;
+ protected readonly bool m_updateOffsets;
/// <summary>
- /// Creates a new TrimFilterFactory </summary>
+ /// Creates a new <see cref="TrimFilterFactory"/> </summary>
public TrimFilterFactory(IDictionary<string, string> args)
: base(args)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/46b02fbc/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/TruncateTokenFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/TruncateTokenFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/TruncateTokenFilter.cs
index 182bc9c..cd58541 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/TruncateTokenFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/TruncateTokenFilter.cs
@@ -33,7 +33,8 @@ namespace Lucene.Net.Analysis.Miscellaneous
private readonly int length;
- public TruncateTokenFilter(TokenStream input, int length) : base(input)
+ public TruncateTokenFilter(TokenStream input, int length)
+ : base(input)
{
if (length < 1)
{
@@ -44,7 +45,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
this.keywordAttr = AddAttribute<IKeywordAttribute>();
}
- public override bool IncrementToken()
+ public override sealed bool IncrementToken()
{
if (m_input.IncrementToken())
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/46b02fbc/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/TruncateTokenFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/TruncateTokenFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/TruncateTokenFilterFactory.cs
index 2b738ec..1aeb5d3 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/TruncateTokenFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/TruncateTokenFilterFactory.cs
@@ -21,7 +21,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
*/
/// <summary>
- /// Factory for <see cref="org.apache.lucene.analysis.miscellaneous.TruncateTokenFilter"/>. The following type is recommended for "<i>diacritics-insensitive search</i>" for Turkish.
+ /// Factory for <see cref="TruncateTokenFilter"/>. The following type is recommended for "<i>diacritics-insensitive search</i>" for Turkish.
/// <code>
/// <fieldType name="text_tr_ascii_f5" class="solr.TextField" positionIncrementGap="100">
/// <analyzer>
@@ -37,11 +37,13 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// </summary>
public class TruncateTokenFilterFactory : TokenFilterFactory
{
-
public const string PREFIX_LENGTH_KEY = "prefixLength";
private readonly sbyte prefixLength;
- public TruncateTokenFilterFactory(IDictionary<string, string> args) : base(args)
+ /// <summary>
+ /// Creates a new <see cref="TruncateTokenFilterFactory"/> </summary>
+ public TruncateTokenFilterFactory(IDictionary<string, string> args)
+ : base(args)
{
prefixLength = sbyte.Parse(Get(args, PREFIX_LENGTH_KEY, "5"));
if (prefixLength < 1)
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/46b02fbc/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterFilter.cs
index 3c639d2..c3cc5a5 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterFilter.cs
@@ -25,63 +25,57 @@ namespace Lucene.Net.Analysis.Miscellaneous
*/
/// <summary>
- /// Splits words into subwords and performs optional transformations on subword
- /// groups. Words are split into subwords with the following rules:
- /// <ul>
- /// <li>split on intra-word delimiters (by default, all non alpha-numeric
- /// characters): <code>"Wi-Fi"</code> → <code>"Wi", "Fi"</code></li>
- /// <li>split on case transitions: <code>"PowerShot"</code> →
- /// <code>"Power", "Shot"</code></li>
- /// <li>split on letter-number transitions: <code>"SD500"</code> →
- /// <code>"SD", "500"</code></li>
- /// <li>leading and trailing intra-word delimiters on each subword are ignored:
- /// <code>"//hello---there, 'dude'"</code> →
- /// <code>"hello", "there", "dude"</code></li>
- /// <li>trailing "'s" are removed for each subword: <code>"O'Neil's"</code>
- /// → <code>"O", "Neil"</code>
- /// <ul>
- /// <li>Note: this step isn't performed in a separate filter because of possible
- /// subword combinations.</li>
- /// </ul>
- /// </li>
- /// </ul>
- ///
- /// The <b>combinations</b> parameter affects how subwords are combined:
- /// <ul>
- /// <li>combinations="0" causes no subword combinations: <code>"PowerShot"</code>
- /// → <code>0:"Power", 1:"Shot"</code> (0 and 1 are the token positions)</li>
- /// <li>combinations="1" means that in addition to the subwords, maximum runs of
- /// non-numeric subwords are catenated and produced at the same position of the
- /// last subword in the run:
- /// <ul>
- /// <li><code>"PowerShot"</code> →
- /// <code>0:"Power", 1:"Shot" 1:"PowerShot"</code></li>
- /// <li><code>"A's+B's&C's"</code> -gt; <code>0:"A", 1:"B", 2:"C", 2:"ABC"</code>
- /// </li>
- /// <li><code>"Super-Duper-XL500-42-AutoCoder!"</code> →
- /// <code>0:"Super", 1:"Duper", 2:"XL", 2:"SuperDuperXL", 3:"500" 4:"42", 5:"Auto", 6:"Coder", 6:"AutoCoder"</code>
- /// </li>
- /// </ul>
- /// </li>
- /// </ul>
- /// One use for <see cref="WordDelimiterFilter"/> is to help match words with different
- /// subword delimiters. For example, if the source text contained "wi-fi" one may
- /// want "wifi" "WiFi" "wi-fi" "wi+fi" queries to all match. One way of doing so
- /// is to specify combinations="1" in the analyzer used for indexing, and
- /// combinations="0" (the default) in the analyzer used for querying. Given that
- /// the current <see cref="StandardTokenizer"/> immediately removes many intra-word
- /// delimiters, it is recommended that this filter be used after a tokenizer that
- /// does not do this (such as <see cref="WhitespaceTokenizer"/>).
- /// </summary>
- public sealed class WordDelimiterFilter : TokenFilter
+ /// Splits words into subwords and performs optional transformations on subword
+ /// groups. Words are split into subwords with the following rules:
+ /// <list type="bullet">
+ /// <item>split on intra-word delimiters (by default, all non alpha-numeric
+ /// characters): <c>"Wi-Fi"</c> → <c>"Wi", "Fi"</c></item>
+ /// <item>split on case transitions: <c>"PowerShot"</c> →
+ /// <c>"Power", "Shot"</c></item>
+ /// <item>split on letter-number transitions: <c>"SD500"</c> →
+ /// <c>"SD", "500"</c></item>
+ /// <item>leading and trailing intra-word delimiters on each subword are ignored:
+ /// <c>"//hello---there, 'dude'"</c> →
+ /// <c>"hello", "there", "dude"</c></item>
+ /// <item>trailing "'s" are removed for each subword: <c>"O'Neil's"</c>
+ /// → <c>"O", "Neil"</c>
+ /// <ul>
+ /// <item>Note: this step isn't performed in a separate filter because of possible
+ /// subword combinations.</item>
+ /// </ul>
+ /// </item>
+ /// </list>
+ /// <para/>
+ /// The <b>combinations</b> parameter affects how subwords are combined:
+ /// <list type="bullet">
+ /// <item>combinations="0" causes no subword combinations: <code>"PowerShot"</code>
+ /// → <c>0:"Power", 1:"Shot"</c> (0 and 1 are the token positions)</item>
+ /// <item>combinations="1" means that in addition to the subwords, maximum runs of
+ /// non-numeric subwords are catenated and produced at the same position of the
+ /// last subword in the run:
+ /// <ul>
+ /// <item><c>"PowerShot"</c> →
+ /// <c>0:"Power", 1:"Shot" 1:"PowerShot"</c></item>
+ /// <item><c>"A's+B's&C's"</c> -gt; <c>0:"A", 1:"B", 2:"C", 2:"ABC"</c>
+ /// </item>
+ /// <item><c>"Super-Duper-XL500-42-AutoCoder!"</c> →
+ /// <c>0:"Super", 1:"Duper", 2:"XL", 2:"SuperDuperXL", 3:"500" 4:"42", 5:"Auto", 6:"Coder", 6:"AutoCoder"</c>
+ /// </item>
+ /// </ul>
+ /// </item>
+ /// </list>
+ /// <para/>
+ /// One use for <see cref="WordDelimiterFilter"/> is to help match words with different
+ /// subword delimiters. For example, if the source text contained "wi-fi" one may
+ /// want "wifi" "WiFi" "wi-fi" "wi+fi" queries to all match. One way of doing so
+ /// is to specify combinations="1" in the analyzer used for indexing, and
+ /// combinations="0" (the default) in the analyzer used for querying. Given that
+ /// the current <see cref="Standard.StandardTokenizer"/> immediately removes many intra-word
+ /// delimiters, it is recommended that this filter be used after a tokenizer that
+ /// does not do this (such as <see cref="Core.WhitespaceTokenizer"/>).
+ /// </summary>
+ public sealed class WordDelimiterFilter : TokenFilter
{
- private void InitializeInstanceFields()
- {
- concat = new WordDelimiterConcatenation(this);
- concatAll = new WordDelimiterConcatenation(this);
- sorter = new OffsetSorter(this);
- }
-
// LUCENENET: Added as a replacement for null in Java
internal const int NOT_SET = 0x00;
public const int LOWER = 0x01;
@@ -161,7 +155,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// If not null is the set of tokens to protect from being delimited
///
/// </summary>
- internal readonly CharArraySet protWords;
+ private readonly CharArraySet protWords;
private readonly int flags;
@@ -202,14 +196,22 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// <summary>
/// Creates a new WordDelimiterFilter
/// </summary>
+ /// <param name="matchVersion"> lucene compatibility version </param>
/// <param name="in"> TokenStream to be filtered </param>
/// <param name="charTypeTable"> table containing character types </param>
/// <param name="configurationFlags"> Flags configuring the filter </param>
/// <param name="protWords"> If not null is the set of tokens to protect from being delimited </param>
public WordDelimiterFilter(LuceneVersion matchVersion, TokenStream @in, byte[] charTypeTable, int configurationFlags, CharArraySet protWords)
- : base(@in)
+ : base(@in)
{
- InitializeInstanceFields();
+ this.termAttribute = AddAttribute<ICharTermAttribute>();
+ this.offsetAttribute = AddAttribute<IOffsetAttribute>();
+ this.posIncAttribute = AddAttribute<IPositionIncrementAttribute>();
+ this.typeAttribute = AddAttribute<ITypeAttribute>();
+ concat = new WordDelimiterConcatenation(this);
+ concatAll = new WordDelimiterConcatenation(this);
+ sorter = new OffsetSorter(this);
+
if (!matchVersion.OnOrAfter(LuceneVersion.LUCENE_48))
{
throw new System.ArgumentException("This class only works with Lucene 4.8+. To emulate the old (broken) behavior of WordDelimiterFilter, use Lucene47WordDelimiterFilter");
@@ -217,22 +219,18 @@ namespace Lucene.Net.Analysis.Miscellaneous
this.flags = configurationFlags;
this.protWords = protWords;
this.iterator = new WordDelimiterIterator(charTypeTable, Has(SPLIT_ON_CASE_CHANGE), Has(SPLIT_ON_NUMERICS), Has(STEM_ENGLISH_POSSESSIVE));
-
- this.termAttribute = AddAttribute<ICharTermAttribute>();
- this.offsetAttribute = AddAttribute<IOffsetAttribute>();
- this.posIncAttribute = AddAttribute<IPositionIncrementAttribute>();
- this.typeAttribute = AddAttribute<ITypeAttribute>();
}
/// <summary>
- /// Creates a new WordDelimiterFilter using <see cref="WordDelimiterIterator#DEFAULT_WORD_DELIM_TABLE"/>
+ /// Creates a new WordDelimiterFilter using <see cref="WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE"/>
/// as its charTypeTable
/// </summary>
- /// <param name="in"> TokenStream to be filtered </param>
+ /// <param name="matchVersion"> lucene compatibility version </param>
+ /// <param name="in"> <see cref="TokenStream"/> to be filtered </param>
/// <param name="configurationFlags"> Flags configuring the filter </param>
/// <param name="protWords"> If not null is the set of tokens to protect from being delimited </param>
public WordDelimiterFilter(LuceneVersion matchVersion, TokenStream @in, int configurationFlags, CharArraySet protWords)
- : this(matchVersion, @in, WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE, configurationFlags, protWords)
+ : this(matchVersion, @in, WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE, configurationFlags, protWords)
{
}
@@ -295,22 +293,22 @@ namespace Lucene.Net.Analysis.Miscellaneous
// at the end of the string, output any concatenations
if (iterator.end == WordDelimiterIterator.DONE)
{
- if (!concat.Empty)
+ if (!concat.IsEmpty)
{
if (FlushConcatenation(concat))
{
- buffer();
+ Buffer();
continue;
}
}
- if (!concatAll.Empty)
+ if (!concatAll.IsEmpty)
{
// only if we haven't output this same combo above!
if (concatAll.subwordCount > lastConcatCount)
{
concatAll.WriteAndClear();
- buffer();
+ Buffer();
continue;
}
concatAll.Clear();
@@ -351,12 +349,12 @@ namespace Lucene.Net.Analysis.Miscellaneous
int wordType = iterator.Type;
// do we already have queued up incompatible concatenations?
- if (!concat.Empty && (concat.type & wordType) == 0)
+ if (!concat.IsEmpty && (concat.type & wordType) == 0)
{
if (FlushConcatenation(concat))
{
hasOutputToken = false;
- buffer();
+ Buffer();
continue;
}
hasOutputToken = false;
@@ -365,7 +363,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
// add subwords depending upon options
if (ShouldConcatenate(wordType))
{
- if (concat.Empty)
+ if (concat.IsEmpty)
{
concat.type = wordType;
}
@@ -382,7 +380,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
if (ShouldGenerateParts(wordType))
{
GeneratePart(false);
- buffer();
+ Buffer();
}
iterator.Next();
@@ -409,7 +407,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
private int bufferedPos = 0;
private bool first;
- internal class OffsetSorter : InPlaceMergeSorter
+ internal class OffsetSorter : InPlaceMergeSorter // LUCENENET NOTE: Changed from private to internal because exposed by internal member
{
private readonly WordDelimiterFilter outerInstance;
@@ -420,12 +418,9 @@ namespace Lucene.Net.Analysis.Miscellaneous
protected override int Compare(int i, int j)
{
-
- //int cmp = int.Compare(outerInstance.startOff[i], outerInstance.startOff[j]);
int cmp = outerInstance.startOff[i].CompareTo(outerInstance.startOff[j]);
if (cmp == 0)
{
- //cmp = int.compare(outerInstance.posInc[j], outerInstance.posInc[i]);
cmp = outerInstance.posInc[j].CompareTo(outerInstance.posInc[i]);
}
return cmp;
@@ -454,7 +449,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
get { return this.sorter; }
}
- private void buffer()
+ private void Buffer()
{
if (bufferedLen == buffered.Length)
{
@@ -493,10 +488,10 @@ namespace Lucene.Net.Analysis.Miscellaneous
}
/// <summary>
- /// Flushes the given WordDelimiterConcatenation by either writing its concat and then clearing, or just clearing.
+ /// Flushes the given <see cref="WordDelimiterConcatenation"/> by either writing its concat and then clearing, or just clearing.
/// </summary>
- /// <param name="concatenation"> WordDelimiterConcatenation that will be flushed </param>
- /// <returns> {@code true} if the concatenation was written before it was cleared, {@code false} otherwise </returns>
+ /// <param name="concatenation"> <see cref="WordDelimiterConcatenation"/> that will be flushed </param>
+ /// <returns> <c>true</c> if the concatenation was written before it was cleared, <c>false</c> otherwise </returns>
private bool FlushConcatenation(WordDelimiterConcatenation concatenation)
{
lastConcatCount = concatenation.subwordCount;
@@ -513,7 +508,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// Determines whether to concatenate a word or number if the current word is the given type
/// </summary>
/// <param name="wordType"> Type of the current word used to determine if it should be concatenated </param>
- /// <returns> {@code true} if concatenation should occur, {@code false} otherwise </returns>
+ /// <returns> <c>true</c> if concatenation should occur, <c>false</c> otherwise </returns>
private bool ShouldConcatenate(int wordType)
{
return (Has(CATENATE_WORDS) && IsAlpha(wordType)) || (Has(CATENATE_NUMBERS) && IsDigit(wordType));
@@ -523,19 +518,19 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// Determines whether a word/number part should be generated for a word of the given type
/// </summary>
/// <param name="wordType"> Type of the word used to determine if a word/number part should be generated </param>
- /// <returns> {@code true} if a word/number part should be generated, {@code false} otherwise </returns>
+ /// <returns> <c>true</c> if a word/number part should be generated, <c>false</c> otherwise </returns>
private bool ShouldGenerateParts(int wordType)
{
return (Has(GENERATE_WORD_PARTS) && IsAlpha(wordType)) || (Has(GENERATE_NUMBER_PARTS) && IsDigit(wordType));
}
/// <summary>
- /// Concatenates the saved buffer to the given WordDelimiterConcatenation
+ /// Concatenates the saved buffer to the given <see cref="WordDelimiterConcatenation"/>
/// </summary>
- /// <param name="concatenation"> WordDelimiterConcatenation to concatenate the buffer to </param>
+ /// <param name="concatenation"> <see cref="WordDelimiterConcatenation"/> to concatenate the buffer to </param>
private void Concatenate(WordDelimiterConcatenation concatenation)
{
- if (concatenation.Empty)
+ if (concatenation.IsEmpty)
{
concatenation.startOffset = savedStartOffset + iterator.current;
}
@@ -546,7 +541,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// <summary>
/// Generates a word/number part, updating the appropriate attributes
/// </summary>
- /// <param name="isSingleWord"> {@code true} if the generation is occurring from a single word, {@code false} otherwise </param>
+ /// <param name="isSingleWord"> <c>true</c> if the generation is occurring from a single word, <c>false</c> otherwise </param>
private void GeneratePart(bool isSingleWord)
{
ClearAttributes();
@@ -605,40 +600,40 @@ namespace Lucene.Net.Analysis.Miscellaneous
}
/// <summary>
- /// Checks if the given word type includes <see cref="#ALPHA"/>
+ /// Checks if the given word type includes <see cref="ALPHA"/>
/// </summary>
/// <param name="type"> Word type to check </param>
- /// <returns> {@code true} if the type contains ALPHA, {@code false} otherwise </returns>
+ /// <returns> <c>true</c> if the type contains <see cref="ALPHA"/>, <c>false</c> otherwise </returns>
internal static bool IsAlpha(int type)
{
return (type & ALPHA) != 0;
}
/// <summary>
- /// Checks if the given word type includes <see cref="#DIGIT"/>
+ /// Checks if the given word type includes <see cref="DIGIT"/>
/// </summary>
/// <param name="type"> Word type to check </param>
- /// <returns> {@code true} if the type contains DIGIT, {@code false} otherwise </returns>
+ /// <returns> <c>true</c> if the type contains <see cref="DIGIT"/>, <c>false</c> otherwise </returns>
internal static bool IsDigit(int type)
{
return (type & DIGIT) != 0;
}
/// <summary>
- /// Checks if the given word type includes <see cref="#SUBWORD_DELIM"/>
+ /// Checks if the given word type includes <see cref="SUBWORD_DELIM"/>
/// </summary>
/// <param name="type"> Word type to check </param>
- /// <returns> {@code true} if the type contains SUBWORD_DELIM, {@code false} otherwise </returns>
+ /// <returns> <c>true</c> if the type contains <see cref="SUBWORD_DELIM"/>, <c>false</c> otherwise </returns>
internal static bool IsSubwordDelim(int type)
{
return (type & SUBWORD_DELIM) != 0;
}
/// <summary>
- /// Checks if the given word type includes <see cref="#UPPER"/>
+ /// Checks if the given word type includes <see cref="UPPER"/>
/// </summary>
/// <param name="type"> Word type to check </param>
- /// <returns> {@code true} if the type contains UPPER, {@code false} otherwise </returns>
+ /// <returns> <c>true</c> if the type contains <see cref="UPPER"/>, <c>false</c> otherwise </returns>
internal static bool IsUpper(int type)
{
return (type & UPPER) != 0;
@@ -648,7 +643,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// Determines whether the given flag is set
/// </summary>
/// <param name="flag"> Flag to see if set </param>
- /// <returns> {@code true} if flag is set </returns>
+ /// <returns> <c>true</c> if flag is set </returns>
private bool Has(int flag)
{
return (flags & flag) != 0;
@@ -668,7 +663,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
this.outerInstance = outerInstance;
}
- internal readonly StringBuilder buffer = new StringBuilder();
+ private readonly StringBuilder buffer = new StringBuilder();
internal int startOffset;
internal int endOffset;
internal int type;
@@ -698,7 +693,6 @@ namespace Lucene.Net.Analysis.Miscellaneous
}
char[] termbuffer = outerInstance.termAttribute.Buffer;
- //buffer.GetChars(0, buffer.Length, termbuffer, 0);
buffer.CopyTo(0, termbuffer, 0, buffer.Length);
outerInstance.termAttribute.Length = buffer.Length;
@@ -718,8 +712,8 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// <summary>
/// Determines if the concatenation is empty
/// </summary>
- /// <returns> {@code true} if the concatenation is empty, {@code false} otherwise </returns>
- internal bool Empty
+ /// <returns> <c>true</c> if the concatenation is empty, <c>false</c> otherwise </returns>
+ internal bool IsEmpty
{
get
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/46b02fbc/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterFilterFactory.cs
index a0cc42d..827172e 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterFilterFactory.cs
@@ -48,11 +48,11 @@ namespace Lucene.Net.Analysis.Miscellaneous
private readonly string wordFiles;
private readonly string types;
private readonly int flags;
- internal byte[] typeTable = null;
+ private byte[] typeTable = null;
private CharArraySet protectedWords = null;
/// <summary>
- /// Creates a new WordDelimiterFilterFactory </summary>
+ /// Creates a new <see cref="WordDelimiterFilterFactory"/> </summary>
public WordDelimiterFilterFactory(IDictionary<string, string> args)
: base(args)
{
@@ -138,7 +138,6 @@ namespace Lucene.Net.Analysis.Miscellaneous
}
// source => type
- //private static Pattern typePattern = Pattern.compile("(.*)\\s*=>\\s*(.*)\\s*$");
private static Regex typePattern = new Regex("(.*)\\s*=>\\s*(.*)\\s*$", RegexOptions.Compiled);
// parses a list of MappingCharFilter style rules into a custom byte[] type table
@@ -147,8 +146,6 @@ namespace Lucene.Net.Analysis.Miscellaneous
IDictionary<char, byte> typeMap = new SortedDictionary<char, byte>();
foreach (string rule in rules)
{
- //Matcher m = typePattern.matcher(rule);
- //if (!m.find())
Match m = typePattern.Match(rule);
if (!m.Success)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/46b02fbc/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterIterator.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterIterator.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterIterator.cs
index f507cf2..ee19be7 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterIterator.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterIterator.cs
@@ -20,12 +20,11 @@ namespace Lucene.Net.Analysis.Miscellaneous
*/
/// <summary>
- /// A BreakIterator-like API for iterating over subwords in text, according to WordDelimiterFilter rules.
+ /// A BreakIterator-like API for iterating over subwords in text, according to <see cref="WordDelimiterFilter"/> rules.
/// @lucene.internal
/// </summary>
public sealed class WordDelimiterIterator
{
-
/// <summary>
/// Indicates the end of iteration </summary>
public const int DONE = -1;
@@ -33,14 +32,14 @@ namespace Lucene.Net.Analysis.Miscellaneous
public static readonly byte[] DEFAULT_WORD_DELIM_TABLE;
internal char[] text;
- internal int length;
+ private int length;
/// <summary>
/// start position of text, excluding leading delimiters </summary>
- internal int startBounds;
+ private int startBounds;
/// <summary>
/// end position of text, excluding trailing delimiters </summary>
- internal int endBounds;
+ private int endBounds;
/// <summary>
/// Beginning of subword </summary>
@@ -49,27 +48,27 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// End of subword </summary>
internal int end;
- /* does this string end with a possessive such as 's */
+ /// <summary>does this string end with a possessive such as 's</summary>
private bool hasFinalPossessive = false;
/// <summary>
/// If false, causes case changes to be ignored (subwords will only be generated
/// given SUBWORD_DELIM tokens). (Defaults to true)
/// </summary>
- internal readonly bool splitOnCaseChange;
+ private readonly bool splitOnCaseChange;
/// <summary>
/// If false, causes numeric changes to be ignored (subwords will only be generated
/// given SUBWORD_DELIM tokens). (Defaults to true)
/// </summary>
- internal readonly bool splitOnNumerics;
+ private readonly bool splitOnNumerics;
/// <summary>
/// If true, causes trailing "'s" to be removed for each subword. (Defaults to true)
/// <p/>
/// "O'Neil's" => "O", "Neil"
/// </summary>
- internal readonly bool stemEnglishPossessive;
+ private readonly bool stemEnglishPossessive;
private readonly byte[] charTypeTable;
@@ -107,7 +106,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
}
/// <summary>
- /// Create a new WordDelimiterIterator operating with the supplied rules.
+ /// Create a new <see cref="WordDelimiterIterator"/> operating with the supplied rules.
/// </summary>
/// <param name="charTypeTable"> table containing character types </param>
/// <param name="splitOnCaseChange"> if true, causes "PowerShot" to be two tokens; ("Power-Shot" remains two parts regards) </param>
@@ -124,7 +123,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// <summary>
/// Advance to the next subword in the string.
/// </summary>
- /// <returns> index of the next subword, or <see cref="#DONE"/> if all subwords have been returned </returns>
+ /// <returns> index of the next subword, or <see cref="DONE"/> if all subwords have been returned </returns>
internal int Next()
{
current = end;
@@ -175,7 +174,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// This currently uses the type of the first character in the subword.
/// </summary>
/// <returns> type of the current word </returns>
- internal int Type
+ internal int Type // LUCENENET TODO: Change to GetType()
{
get
{
@@ -218,7 +217,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// </summary>
/// <param name="lastType"> Last subword type </param>
/// <param name="type"> Current subword type </param>
- /// <returns> {@code true} if the transition indicates a break, {@code false} otherwise </returns>
+ /// <returns> <c>true</c> if the transition indicates a break, <c>false</c> otherwise </returns>
private bool IsBreak(int lastType, int type)
{
if ((type & lastType) != 0)
@@ -248,8 +247,8 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// <summary>
/// Determines if the current word contains only one subword. Note, it could be potentially surrounded by delimiters
/// </summary>
- /// <returns> {@code true} if the current word contains only one subword, {@code false} otherwise </returns>
- internal bool SingleWord
+ /// <returns> <c>true</c> if the current word contains only one subword, <c>false</c> otherwise </returns>
+ internal bool SingleWord // LUCENENET TODO: Change to IsSingleWord()
{
get
{
@@ -290,10 +289,15 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// Determines if the text at the given position indicates an English possessive which should be removed
/// </summary>
/// <param name="pos"> Position in the text to check if it indicates an English possessive </param>
- /// <returns> {@code true} if the text at the position indicates an English posessive, {@code false} otherwise </returns>
+ /// <returns> <c>true</c> if the text at the position indicates an English posessive, <c>false</c> otherwise </returns>
private bool EndsWithPossessive(int pos)
{
- return (stemEnglishPossessive && pos > 2 && text[pos - 2] == '\'' && (text[pos - 1] == 's' || text[pos - 1] == 'S') && WordDelimiterFilter.IsAlpha(CharType(text[pos - 3])) && (pos == endBounds || WordDelimiterFilter.IsSubwordDelim(CharType(text[pos]))));
+ return (stemEnglishPossessive &&
+ pos > 2 &&
+ text[pos - 2] == '\'' &&
+ (text[pos - 1] == 's' || text[pos - 1] == 'S') &&
+ WordDelimiterFilter.IsAlpha(CharType(text[pos - 3])) &&
+ (pos == endBounds || WordDelimiterFilter.IsSubwordDelim(CharType(text[pos]))));
}
/// <summary>
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/46b02fbc/src/Lucene.Net.Core/Analysis/TokenFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Analysis/TokenFilter.cs b/src/Lucene.Net.Core/Analysis/TokenFilter.cs
index b082d6a..9b5957e 100644
--- a/src/Lucene.Net.Core/Analysis/TokenFilter.cs
+++ b/src/Lucene.Net.Core/Analysis/TokenFilter.cs
@@ -69,7 +69,7 @@ namespace Lucene.Net.Analysis
/// <para/>
/// If you override this method, always call <c>base.Reset()</c>, otherwise
/// some internal state will not be correctly reset (e.g., <see cref="Tokenizer"/> will
- /// throw <see cref="InvalidOperationException"/> on further usage).
+ /// throw <see cref="System.InvalidOperationException"/> on further usage).
/// </summary>
/// <remarks>
/// <b>NOTE:</b>
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/46b02fbc/src/Lucene.Net.Core/Analysis/TokenStream.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Analysis/TokenStream.cs b/src/Lucene.Net.Core/Analysis/TokenStream.cs
index 1e104e9..d2c34c9 100644
--- a/src/Lucene.Net.Core/Analysis/TokenStream.cs
+++ b/src/Lucene.Net.Core/Analysis/TokenStream.cs
@@ -129,26 +129,26 @@ namespace Lucene.Net.Analysis
}
/// <summary>
- /// Consumers (i.e., <seealso cref="IndexWriter"/>) use this method to advance the stream to
+ /// Consumers (i.e., <see cref="Index.IndexWriter"/>) use this method to advance the stream to
/// the next token. Implementing classes must implement this method and update
- /// the appropriate <seealso cref="AttributeImpl"/>s with the attributes of the next
+ /// the appropriate <see cref="Lucene.Net.Util.IAttribute"/>s with the attributes of the next
/// token.
- /// <P>
+ /// <para/>
/// The producer must make no assumptions about the attributes after the method
/// has been returned: the caller may arbitrarily change it. If the producer
/// needs to preserve the state for subsequent calls, it can use
- /// <seealso cref="#captureState"/> to create a copy of the current attribute state.
- /// <p>
+ /// <see cref="AttributeSource.CaptureState"/> to create a copy of the current attribute state.
+ /// <para/>
/// this method is called for every token of a document, so an efficient
/// implementation is crucial for good performance. To avoid calls to
- /// <seealso cref="#addAttribute(Class)"/> and <seealso cref="#getAttribute(Class)"/>,
- /// references to all <seealso cref="AttributeImpl"/>s that this stream uses should be
+ /// <see cref="AttributeSource.AddAttribute{T}"/> and <see cref="AttributeSource.GetAttribute{T}"/>,
+ /// references to all <see cref="Lucene.Net.Util.IAttribute"/>s that this stream uses should be
/// retrieved during instantiation.
- /// <p>
+ /// <para/>
/// To ensure that filters and consumers know which attributes are available,
/// the attributes must be added during instantiation. Filters and consumers
/// are not required to check for availability of attributes in
- /// <seealso cref="#IncrementToken()"/>.
+ /// <see cref="IncrementToken()"/>.
/// </summary>
/// <returns> false for end of stream; true otherwise </returns>
public abstract bool IncrementToken();