You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by ni...@apache.org on 2017/02/04 20:32:23 UTC
[04/39] lucenenet git commit: Lucene.Net.Analysis.Standard refactor:
member accessibility and documentation comments
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/ab69b431/src/Lucene.Net.Analysis.Common/Analysis/Standard/Std40/UAX29URLEmailTokenizerImpl40.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/Std40/UAX29URLEmailTokenizerImpl40.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/Std40/UAX29URLEmailTokenizerImpl40.cs
index e3d58e5..3593baa 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Standard/Std40/UAX29URLEmailTokenizerImpl40.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/Std40/UAX29URLEmailTokenizerImpl40.cs
@@ -22,32 +22,32 @@ namespace Lucene.Net.Analysis.Standard.Std40
*/
/// <summary>
- /// This class implements UAX29URLEmailTokenizer using Unicode 6.1.0.
+ /// This class implements <see cref="UAX29URLEmailTokenizer"/> using Unicode 6.1.0.
/// @deprecated This class is only for exact backwards compatibility
/// </summary>
[Obsolete("This class is only for exact backwards compatibility")]
public sealed class UAX29URLEmailTokenizerImpl40 : IStandardTokenizerInterface
{
- /** This character denotes the end of file */
+ /// <summary>This character denotes the end of file</summary>
public static readonly int YYEOF = -1;
- /** initial size of the lookahead buffer */
+ /// <summary>initial size of the lookahead buffer</summary>
private static readonly int ZZ_BUFFERSIZE = 4096;
- /** lexical states */
+ /// <summary>lexical states</summary>
public const int YYINITIAL = 0;
- /**
- * ZZ_LEXSTATE[l] is the state in the DFA for the lexical state l
- * ZZ_LEXSTATE[l+1] is the state in the DFA for the lexical state l
- * at the beginning of a line
- * l is of the form l = 2*k, k a non negative integer
- */
+ /// <summary>
+ /// ZZ_LEXSTATE[l] is the state in the DFA for the lexical state l
+ /// ZZ_LEXSTATE[l+1] is the state in the DFA for the lexical state l
+ /// at the beginning of a line
+ /// l is of the form l = 2*k, k a non negative integer
+ /// </summary>
private static readonly int[] ZZ_LEXSTATE = { 0, 0 };
- /**
- * Translates characters to character classes
- */
+ /// <summary>
+ /// Translates characters to character classes
+ /// </summary>
private const string ZZ_CMAP_PACKED =
"\x0001\x00C1\x0008\x00BF\x0002\x00C1\x0002\x00BF\x0001\x00C1\x0013\x00BF\x0001\x00C2\x0001\x00BE\x0001\x00B9\x0001\x00C2" +
"\x0001\x00B2\x0001\x00B0\x0001\x00B5\x0002\x00B3\x0002\x00C2\x0001\x00B4\x0001\x00A4\x0001\x0089\x0001\x00B8\x0001\x00A5" +
@@ -201,14 +201,14 @@ namespace Lucene.Net.Analysis.Standard.Std40
"\x000B\x0000\x0038\x007F\x0002\x007D\x001F\x0088\x0003\x0000\x0006\x0088\x0002\x0000\x0006\x0088\x0002\x0000\x0006\x0088" +
"\x0002\x0000\x0003\x0088\x001C\x0000\x0003\x007D\x0004\x0000";
- /**
- * Translates characters to character classes
- */
+ /// <summary>
+ /// Translates characters to character classes
+ /// </summary>
private static readonly char[] ZZ_CMAP = ZzUnpackCMap(ZZ_CMAP_PACKED);
- /**
- * Translates DFA states to action switch labels.
- */
+ /// <summary>
+ /// Translates DFA states to action switch labels.
+ /// </summary>
private static readonly int[] ZZ_ACTION = ZzUnpackAction();
private const string ZZ_ACTION_PACKED_0 =
@@ -255,9 +255,9 @@ namespace Lucene.Net.Analysis.Standard.Std40
}
- /**
- * Translates a state to a row index in the transition table
- */
+ /// <summary>
+ /// Translates a state to a row index in the transition table
+ /// </summary>
private static readonly int[] ZZ_ROWMAP = ZzUnpackRowMap();
private const string ZZ_ROWMAP_PACKED_0 =
@@ -502,9 +502,9 @@ namespace Lucene.Net.Analysis.Standard.Std40
return j;
}
- /**
- * The transition table of the DFA
- */
+ /// <summary>
+ /// The transition table of the DFA
+ /// </summary>
private static readonly int[] ZZ_TRANS = ZzUnpackTrans();
private const string ZZ_TRANS_PACKED_0 =
@@ -3906,9 +3906,9 @@ namespace Lucene.Net.Analysis.Standard.Std40
"Error: pushback value was too large"
};
- /**
- * ZZ_ATTRIBUTE[aState] contains the attributes of state <code>aState</code>
- */
+ /// <summary>
+ /// ZZ_ATTRIBUTE[aState] contains the attributes of state <c>aState</c>
+ /// </summary>
private static readonly int[] ZZ_ATTRIBUTE = ZzUnpackAttribute();
private const string ZZ_ATTRIBUTE_PACKED_0 =
@@ -3951,73 +3951,77 @@ namespace Lucene.Net.Analysis.Standard.Std40
return j;
}
- /** the input device */
+ /// <summary>the input device</summary>
private TextReader zzReader;
- /** the current state of the DFA */
+ /// <summary>the current state of the DFA</summary>
private int zzState;
- /** the current lexical state */
+ /// <summary>the current lexical state</summary>
private int zzLexicalState = YYINITIAL;
- /** this buffer contains the current text to be matched and is
- the source of the YyText() string */
+ /// <summary>
+ /// this buffer contains the current text to be matched and is
+ /// the source of the YyText string
+ /// </summary>
private char[] zzBuffer = new char[ZZ_BUFFERSIZE];
- /** the textposition at the last accepting state */
+ /// <summary>the textposition at the last accepting state</summary>
private int zzMarkedPos;
- /** the current text position in the buffer */
+ /// <summary>the current text position in the buffer</summary>
private int zzCurrentPos;
- /** startRead marks the beginning of the YyText() string in the buffer */
+ /// <summary>startRead marks the beginning of the YyText string in the buffer</summary>
private int zzStartRead;
- /** endRead marks the last character in the buffer, that has been read
- from input */
+ /// <summary>
+ /// endRead marks the last character in the buffer, that has been read
+ /// from input
+ /// </summary>
private int zzEndRead;
- /** number of newlines encountered up to the start of the matched text */
+ /// <summary>number of newlines encountered up to the start of the matched text</summary>
private int yyline;
- /** the number of characters up to the start of the matched text */
+ /// <summary>the number of characters up to the start of the matched text</summary>
private int yychar;
#pragma warning disable 169, 414
- /**
- * the number of characters from the last newline up to the start of the
- * matched text
- */
+ /// <summary>
+ /// the number of characters from the last newline up to the start of the
+ /// matched text
+ /// </summary>
private int yycolumn;
- /**
- * zzAtBOL == true <=> the scanner is currently at the beginning of a line
- */
+ /// <summary>
+ /// zzAtBOL == true <=> the scanner is currently at the beginning of a line
+ /// </summary>
private bool zzAtBOL = true;
- /** zzAtEOF == true <=> the scanner is at the EOF */
+ /// <summary>zzAtEOF == true <=> the scanner is at the EOF</summary>
private bool zzAtEOF;
- /** denotes if the user-EOF-code has already been executed */
+ /// <summary>denotes if the user-EOF-code has already been executed</summary>
private bool zzEOFDone;
#pragma warning restore 169, 414
/* user code: */
- /** Alphanumeric sequences */
+ /// <summary>Alphanumeric sequences</summary>
public static readonly int WORD_TYPE = UAX29URLEmailTokenizer.ALPHANUM;
- /** Numbers */
+ /// <summary>Numbers</summary>
public static readonly int NUMERIC_TYPE = UAX29URLEmailTokenizer.NUM;
- /**
- * Chars in class \p{Line_Break = Complex_Context} are from South East Asian
- * scripts (Thai, Lao, Myanmar, Khmer, etc.). Sequences of these are kept
- * together as as a single token rather than broken up, because the logic
- * required to break them at word boundaries is too complex for UAX#29.
- * <p>
- * See Unicode Line Breaking Algorithm: http://www.unicode.org/reports/tr14/#SA
- */
+ /// <summary>
+ /// Chars in class \p{Line_Break = Complex_Context} are from South East Asian
+ /// scripts (Thai, Lao, Myanmar, Khmer, etc.). Sequences of these are kept
+ /// together as as a single token rather than broken up, because the logic
+ /// required to break them at word boundaries is too complex for UAX#29.
+ /// <para/>
+ /// See Unicode Line Breaking Algorithm: http://www.unicode.org/reports/tr14/#SA
+ /// </summary>
public static readonly int SOUTH_EAST_ASIAN_TYPE = UAX29URLEmailTokenizer.SOUTHEAST_ASIAN;
public static readonly int IDEOGRAPHIC_TYPE = UAX29URLEmailTokenizer.IDEOGRAPHIC;
@@ -4037,32 +4041,30 @@ namespace Lucene.Net.Analysis.Standard.Std40
get { return yychar; }
}
- /**
- * Fills CharTermAttribute with the current token text.
- */
+ /// <summary>
+ /// Fills ICharTermAttribute with the current token text.
+ /// </summary>
public void GetText(ICharTermAttribute t)
{
t.CopyBuffer(zzBuffer, zzStartRead, zzMarkedPos - zzStartRead);
}
- /**
- * Creates a new scanner
- *
- * @param in the TextReader to read input from.
- */
+ /// <summary>
+ /// Creates a new scanner
+ /// </summary>
+ /// <param name="in">the TextReader to read input from.</param>
public UAX29URLEmailTokenizerImpl40(TextReader @in)
{
this.zzReader = @in;
}
- /**
- * Unpacks the compressed character translation table.
- *
- * @param packed the packed character translation table
- * @return the unpacked character translation table
- */
+ /// <summary>
+ /// Unpacks the compressed character translation table.
+ /// </summary>
+ /// <param name="packed">the packed character translation table</param>
+ /// <returns>the unpacked character translation table</returns>
private static char[] ZzUnpackCMap(string packed)
{
char[] map = new char[0x10000];
@@ -4078,13 +4080,11 @@ namespace Lucene.Net.Analysis.Standard.Std40
}
- /**
- * Refills the input buffer.
- *
- * @return <code>false</code>, iff there was new input.
- *
- * @exception java.io.IOException if any I/O-Error occurs
- */
+ /// <summary>
+ /// Refills the input buffer.
+ /// </summary>
+ /// <returns><c>false</c>, iff there was new input.</returns>
+ /// <exception cref="IOException">if any I/O-Error occurs</exception>
private bool ZzRefill()
{
@@ -4140,9 +4140,9 @@ namespace Lucene.Net.Analysis.Standard.Std40
}
- /**
- * Closes the input stream.
- */
+ /// <summary>
+ /// Disposes the input stream.
+ /// </summary>
public void YyClose()
{
zzAtEOF = true; /* indicate end of file */
@@ -4155,18 +4155,17 @@ namespace Lucene.Net.Analysis.Standard.Std40
}
- /**
- * Resets the scanner to read from a new input stream.
- * Does not close the old reader.
- *
- * All internal variables are reset, the old input stream
- * <b>cannot</b> be reused (internal buffer is discarded and lost).
- * Lexical state is set to <tt>ZZ_INITIAL</tt>.
- *
- * Internal scan buffer is resized down to its initial length, if it has grown.
- *
- * @param reader the new input stream
- */
+ /// <summary>
+ /// Resets the scanner to read from a new input stream.
+ /// Does not close the old reader.
+ /// <para/>
+ /// All internal variables are reset, the old input stream
+ /// <b>cannot</b> be reused (internal buffer is discarded and lost).
+ /// Lexical state is set to <see cref="YYINITIAL"/>.
+ /// <para/>
+ /// Internal scan buffer is resized down to its initial length, if it has grown.
+ /// </summary>
+ /// <param name="reader">the new input stream </param>
public void YyReset(TextReader reader)
{
zzReader = reader;
@@ -4182,75 +4181,73 @@ namespace Lucene.Net.Analysis.Standard.Std40
}
- /**
- * Returns the current lexical state.
- */
+ /// <summary>
+ /// Returns the current lexical state.
+ /// </summary>
public int YyState
{
get { return zzLexicalState; }
}
- /**
- * Enters a new lexical state
- *
- * @param newState the new lexical state
- */
+ /// <summary>
+ /// Enters a new lexical state
+ /// </summary>
+ /// <param name="newState">the new lexical state</param>
public void YyBegin(int newState)
{
zzLexicalState = newState;
}
- /**
- * Returns the text matched by the current regular expression.
- */
+ /// <summary>
+ /// Returns the text matched by the current regular expression.
+ /// </summary>
public string YyText
{
get { return new string(zzBuffer, zzStartRead, zzMarkedPos - zzStartRead); }
}
- /**
- * Returns the character at position <tt>pos</tt> from the
- * matched text.
- *
- * It is equivalent to YyText().charAt(pos), but faster
- *
- * @param pos the position of the character to fetch.
- * A value from 0 to YyLength()-1.
- *
- * @return the character at position pos
- */
+ /// <summary>
+ /// Returns the character at position <paramref name="pos"/> from the
+ /// matched text.
+ /// <para/>
+ /// It is equivalent to YyText[pos], but faster
+ /// </summary>
+ /// <param name="pos">
+ /// the position of the character to fetch.
+ /// A value from 0 to YyLength-1.
+ /// </param>
+ /// <returns>the character at position pos</returns>
public char YyCharAt(int pos)
{
return zzBuffer[zzStartRead + pos];
}
- /**
- * Returns the length of the matched text region.
- */
+ /// <summary>
+ /// Returns the length of the matched text region.
+ /// </summary>
public int YyLength
{
get { return zzMarkedPos - zzStartRead; }
}
- /**
- * Reports an error that occured while scanning.
- *
- * In a wellformed scanner (no or only correct usage of
- * YyPushBack(int) and a match-all fallback rule) this method
- * will only be called with things that "Can't Possibly Happen".
- * If this method is called, something is seriously wrong
- * (e.g. a JFlex bug producing a faulty scanner etc.).
- *
- * Usual syntax/scanner level error handling should be done
- * in error fallback rules.
- *
- * @param errorCode the code of the errormessage to display
- */
+ /// <summary>
+ /// Reports an error that occured while scanning.
+ /// <para/>
+ /// In a wellformed scanner (no or only correct usage of
+ /// YyPushBack(int) and a match-all fallback rule) this method
+ /// will only be called with things that "Can't Possibly Happen".
+ /// If this method is called, something is seriously wrong
+ /// (e.g. a JFlex bug producing a faulty scanner etc.).
+ /// <para/>
+ /// Usual syntax/scanner level error handling should be done
+ /// in error fallback rules.
+ /// </summary>
+ /// <param name="errorCode">the code of the errormessage to display</param>
private void ZzScanError(int errorCode)
{
string message;
@@ -4267,14 +4264,15 @@ namespace Lucene.Net.Analysis.Standard.Std40
}
- /**
- * Pushes the specified amount of characters back into the input stream.
- *
- * They will be read again by then next call of the scanning method
- *
- * @param number the number of characters to be read again.
- * This number must not be greater than YyLength()!
- */
+ /// <summary>
+ /// Pushes the specified amount of characters back into the input stream.
+ /// <para/>
+ /// They will be read again by then next call of the scanning method
+ /// </summary>
+ /// <param name="number">
+ /// the number of characters to be read again.
+ /// This number must not be greater than YyLength!
+ /// </param>
public void YyPushBack(int number)
{
if (number > YyLength)
@@ -4284,13 +4282,12 @@ namespace Lucene.Net.Analysis.Standard.Std40
}
- /**
- * Resumes scanning until the next regular expression is matched,
- * the end of input is encountered or an I/O-Error occurs.
- *
- * @return the next token
- * @exception java.io.IOException if any I/O-Error occurs
- */
+ /// <summary>
+ /// Resumes scanning until the next regular expression is matched,
+ /// the end of input is encountered or an I/O-Error occurs.
+ /// </summary>
+ /// <returns>the next token</returns>
+ /// <exception cref="IOException">if any I/O-Error occurs</exception>
public int GetNextToken()
{
int zzInput;
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/ab69b431/src/Lucene.Net.Analysis.Common/Analysis/Standard/UAX29URLEmailAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/UAX29URLEmailAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/UAX29URLEmailAnalyzer.cs
index 502b98c..65aecc2 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Standard/UAX29URLEmailAnalyzer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/UAX29URLEmailAnalyzer.cs
@@ -23,21 +23,19 @@ namespace Lucene.Net.Analysis.Standard
*/
/// <summary>
- /// Filters <see cref="org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer"/>
+ /// Filters <see cref="UAX29URLEmailTokenizer"/>
/// with <see cref="StandardFilter"/>,
/// <see cref="LowerCaseFilter"/> and
/// <see cref="StopFilter"/>, using a list of
/// English stop words.
///
- /// <a name="version"/>
/// <para>
- /// You must specify the required <see cref="org.apache.lucene.util.Version"/>
- /// compatibility when creating UAX29URLEmailAnalyzer
+ /// You must specify the required <see cref="LuceneVersion"/>
+ /// compatibility when creating <see cref="UAX29URLEmailAnalyzer"/>
/// </para>
/// </summary>
public sealed class UAX29URLEmailAnalyzer : StopwordAnalyzerBase
{
-
/// <summary>
/// Default maximum allowed token length </summary>
public const int DEFAULT_MAX_TOKEN_LENGTH = StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH;
@@ -52,8 +50,7 @@ namespace Lucene.Net.Analysis.Standard
/// <summary>
/// Builds an analyzer with the given stop words. </summary>
- /// <param name="matchVersion"> Lucene version to match See {@link
- /// <a href="#version">above</a>} </param>
+ /// <param name="matchVersion"> Lucene version to match - See <see cref="UAX29URLEmailAnalyzer"/> </param>
/// <param name="stopWords"> stop words </param>
public UAX29URLEmailAnalyzer(LuceneVersion matchVersion, CharArraySet stopWords)
: base(matchVersion, stopWords)
@@ -61,10 +58,9 @@ namespace Lucene.Net.Analysis.Standard
}
/// <summary>
- /// Builds an analyzer with the default stop words ({@link
- /// #STOP_WORDS_SET}). </summary>
- /// <param name="matchVersion"> Lucene version to match See {@link
- /// <a href="#version">above</a>} </param>
+ /// Builds an analyzer with the default stop words (<see cref="STOP_WORDS_SET"/>.
+ /// </summary>
+ /// <param name="matchVersion"> Lucene version to match - See <see cref="UAX29URLEmailAnalyzer"/> </param>
public UAX29URLEmailAnalyzer(LuceneVersion matchVersion)
: this(matchVersion, STOP_WORDS_SET)
{
@@ -72,10 +68,9 @@ namespace Lucene.Net.Analysis.Standard
/// <summary>
/// Builds an analyzer with the stop words from the given reader. </summary>
- /// <seealso cref= org.apache.lucene.analysis.util.WordlistLoader#getWordSet(java.io.Reader, org.apache.lucene.util.Version) </seealso>
- /// <param name="matchVersion"> Lucene version to match See {@link
- /// <a href="#version">above</a>} </param>
- /// <param name="stopwords"> TextReader to read stop words from </param>
+ /// <seealso cref="WordlistLoader.GetWordSet(TextReader, LuceneVersion)"/>
+ /// <param name="matchVersion"> Lucene version to match - See <see cref="UAX29URLEmailAnalyzer"/> </param>
+ /// <param name="stopwords"> <see cref="TextReader"/> to read stop words from </param>
public UAX29URLEmailAnalyzer(LuceneVersion matchVersion, TextReader stopwords)
: this(matchVersion, LoadStopwordSet(stopwords, matchVersion))
{
@@ -93,7 +88,6 @@ namespace Lucene.Net.Analysis.Standard
get { return maxTokenLength; }
}
-
protected override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
{
UAX29URLEmailTokenizer src = new UAX29URLEmailTokenizer(m_matchVersion, reader);
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/ab69b431/src/Lucene.Net.Analysis.Common/Analysis/Standard/UAX29URLEmailTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/UAX29URLEmailTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/UAX29URLEmailTokenizer.cs
index 2c91236..83659e2 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Standard/UAX29URLEmailTokenizer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/UAX29URLEmailTokenizer.cs
@@ -1,11 +1,10 @@
-\ufeffusing Lucene.Net.Analysis.Standard;
-using Lucene.Net.Analysis.TokenAttributes;
-using Lucene.Net.Util;
-using System.IO;
-using Lucene.Net.Analysis.Standard.Std31;
+\ufeffusing Lucene.Net.Analysis.Standard.Std31;
using Lucene.Net.Analysis.Standard.Std34;
using Lucene.Net.Analysis.Standard.Std36;
using Lucene.Net.Analysis.Standard.Std40;
+using Lucene.Net.Analysis.TokenAttributes;
+using Lucene.Net.Util;
+using System.IO;
namespace Lucene.Net.Analysis.Standard
{
@@ -31,26 +30,25 @@ namespace Lucene.Net.Analysis.Standard
/// algorithm, as specified in `
/// <a href="http://unicode.org/reports/tr29/">Unicode Standard Annex #29</a>
/// URLs and email addresses are also tokenized according to the relevant RFCs.
- /// <p/>
+ /// <para/>
/// Tokens produced are of the following types:
- /// <ul>
- /// <li><ALPHANUM>: A sequence of alphabetic and numeric characters</li>
- /// <li><NUM>: A number</li>
- /// <li><URL>: A URL</li>
- /// <li><EMAIL>: An email address</li>
- /// <li><SOUTHEAST_ASIAN>: A sequence of characters from South and Southeast
- /// Asian languages, including Thai, Lao, Myanmar, and Khmer</li>
- /// <li><IDEOGRAPHIC>: A single CJKV ideographic character</li>
- /// <li><HIRAGANA>: A single hiragana character</li>
- /// </ul>
- /// <a name="version"/>
+ /// <list type="bullet">
+ /// <item><ALPHANUM>: A sequence of alphabetic and numeric characters</item>
+ /// <item><NUM>: A number</item>
+ /// <item><URL>: A URL</item>
+ /// <item><EMAIL>: An email address</item>
+ /// <item><SOUTHEAST_ASIAN>: A sequence of characters from South and Southeast
+ /// Asian languages, including Thai, Lao, Myanmar, and Khmer</item>
+ /// <item><IDEOGRAPHIC>: A single CJKV ideographic character</item>
+ /// <item><HIRAGANA>: A single hiragana character</item>
+ /// </list>
/// <para>You must specify the required <see cref="LuceneVersion"/>
- /// compatibility when creating UAX29URLEmailTokenizer:
- /// <ul>
- /// <li> As of 3.4, Hiragana and Han characters are no longer wrongly split
- /// from their combining characters. If you use a previous version number,
- /// you get the exact broken behavior for backwards compatibility.
- /// </ul>
+ /// compatibility when creating <see cref="UAX29URLEmailTokenizer"/>:
+ /// <list type="bullet">
+ /// <item> As of 3.4, Hiragana and Han characters are no longer wrongly split
+ /// from their combining characters. If you use a previous version number,
+ /// you get the exact broken behavior for backwards compatibility.</item>
+ /// </list>
/// </para>
/// </summary>
public sealed class UAX29URLEmailTokenizer : Tokenizer
@@ -71,7 +69,17 @@ namespace Lucene.Net.Analysis.Standard
/// <summary>
/// String token types that correspond to token type int constants </summary>
- public static readonly string[] TOKEN_TYPES = new string[] { StandardTokenizer.TOKEN_TYPES[StandardTokenizer.ALPHANUM], StandardTokenizer.TOKEN_TYPES[StandardTokenizer.NUM], StandardTokenizer.TOKEN_TYPES[StandardTokenizer.SOUTHEAST_ASIAN], StandardTokenizer.TOKEN_TYPES[StandardTokenizer.IDEOGRAPHIC], StandardTokenizer.TOKEN_TYPES[StandardTokenizer.HIRAGANA], StandardTokenizer.TOKEN_TYPES[StandardTokenizer.KATAKANA], StandardTokenizer.TOKEN_TYPES[StandardTokenizer.HANGUL], "<URL>", "<EMAIL>" };
+ public static readonly string[] TOKEN_TYPES = new string[] {
+ StandardTokenizer.TOKEN_TYPES[StandardTokenizer.ALPHANUM],
+ StandardTokenizer.TOKEN_TYPES[StandardTokenizer.NUM],
+ StandardTokenizer.TOKEN_TYPES[StandardTokenizer.SOUTHEAST_ASIAN],
+ StandardTokenizer.TOKEN_TYPES[StandardTokenizer.IDEOGRAPHIC],
+ StandardTokenizer.TOKEN_TYPES[StandardTokenizer.HIRAGANA],
+ StandardTokenizer.TOKEN_TYPES[StandardTokenizer.KATAKANA],
+ StandardTokenizer.TOKEN_TYPES[StandardTokenizer.HANGUL],
+ "<URL>",
+ "<EMAIL>"
+ };
private int skippedPositions;
@@ -99,9 +107,10 @@ namespace Lucene.Net.Analysis.Standard
/// <summary>
- /// Creates a new instance of the UAX29URLEmailTokenizer. Attaches
- /// the <code>input</code> to the newly created JFlex scanner.
+ /// Creates a new instance of the <see cref="UAX29URLEmailTokenizer"/>. Attaches
+ /// the <paramref name="input"/> to the newly created JFlex scanner.
/// </summary>
+ /// <param name="matchVersion"> Lucene compatibility version </param>
/// <param name="input"> The input reader </param>
public UAX29URLEmailTokenizer(LuceneVersion matchVersion, TextReader input)
: base(input)
@@ -110,7 +119,7 @@ namespace Lucene.Net.Analysis.Standard
}
/// <summary>
- /// Creates a new UAX29URLEmailTokenizer with a given <see cref="org.apache.lucene.util.AttributeSource.AttributeFactory"/>
+ /// Creates a new <see cref="UAX29URLEmailTokenizer"/> with a given <see cref="AttributeSource.AttributeFactory"/>
/// </summary>
public UAX29URLEmailTokenizer(LuceneVersion matchVersion, AttributeFactory factory, TextReader input)
: base(factory, input)
@@ -119,7 +128,7 @@ namespace Lucene.Net.Analysis.Standard
}
/// <summary>
- /// LUCENENET: This method was added in .NET to prevent having to repeat code in the constructors.
+ /// LUCENENET specific: This method was added in .NET to prevent having to repeat code in the constructors.
/// </summary>
/// <param name="matchVersion"></param>
private void Init(LuceneVersion matchVersion)
@@ -165,7 +174,7 @@ namespace Lucene.Net.Analysis.Standard
private IPositionIncrementAttribute posIncrAtt;
private ITypeAttribute typeAtt;
- public override bool IncrementToken()
+ public override sealed bool IncrementToken()
{
ClearAttributes();
skippedPositions = 0;
@@ -197,7 +206,7 @@ namespace Lucene.Net.Analysis.Standard
}
}
- public override void End()
+ public override sealed void End()
{
base.End();
// set final offset
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/ab69b431/src/Lucene.Net.Analysis.Common/Analysis/Standard/UAX29URLEmailTokenizerFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/UAX29URLEmailTokenizerFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/UAX29URLEmailTokenizerFactory.cs
index dc902f8..976f4c5 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Standard/UAX29URLEmailTokenizerFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/UAX29URLEmailTokenizerFactory.cs
@@ -1,8 +1,7 @@
-\ufeffusing Lucene.Net.Util;
+\ufeffusing Lucene.Net.Analysis.Util;
+using Lucene.Net.Util;
using System.Collections.Generic;
using System.IO;
-using TokenizerFactory = Lucene.Net.Analysis.Util.TokenizerFactory;
-using System;
namespace Lucene.Net.Analysis.Standard
{
@@ -37,7 +36,7 @@ namespace Lucene.Net.Analysis.Standard
private readonly int maxTokenLength;
/// <summary>
- /// Creates a new UAX29URLEmailTokenizerFactory </summary>
+ /// Creates a new <see cref="UAX29URLEmailTokenizerFactory"/> </summary>
public UAX29URLEmailTokenizerFactory(IDictionary<string, string> args)
: base(args)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/ab69b431/src/Lucene.Net.Analysis.Common/Analysis/Standard/UAX29URLEmailTokenizerImpl.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/UAX29URLEmailTokenizerImpl.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/UAX29URLEmailTokenizerImpl.cs
index b45186e..dbf05a7 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Standard/UAX29URLEmailTokenizerImpl.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/UAX29URLEmailTokenizerImpl.cs
@@ -26,44 +26,44 @@ namespace Lucene.Net.Analysis.Standard
/// algorithm, as specified in
/// <a href="http://unicode.org/reports/tr29/">Unicode Standard Annex #29</a>
/// URLs and email addresses are also tokenized according to the relevant RFCs.
- /// <p/>
+ /// <para/>
/// Tokens produced are of the following types:
- /// <ul>
- /// <li><ALPHANUM>: A sequence of alphabetic and numeric characters</li>
- /// <li><NUM>: A number</li>
- /// <li><URL>: A URL</li>
- /// <li><EMAIL>: An email address</li>
- /// <li><SOUTHEAST_ASIAN>: A sequence of characters from South and Southeast
- /// Asian languages, including Thai, Lao, Myanmar, and Khmer</li>
- /// <li><IDEOGRAPHIC>: A single CJKV ideographic character</li>
- /// <li><HIRAGANA>: A single hiragana character</li>
- /// <li><KATAKANA>: A sequence of katakana characters</li>
- /// <li><HANGUL>: A sequence of Hangul characters</li>
- /// </ul>
+ /// <list type="bullet">
+ /// <item><ALPHANUM>: A sequence of alphabetic and numeric characters</item>
+ /// <item><NUM>: A number</item>
+ /// <item><URL>: A URL</item>
+ /// <item><EMAIL>: An email address</item>
+ /// <item><SOUTHEAST_ASIAN>: A sequence of characters from South and Southeast
+ /// Asian languages, including Thai, Lao, Myanmar, and Khmer</item>
+ /// <item><IDEOGRAPHIC>: A single CJKV ideographic character</item>
+ /// <item><HIRAGANA>: A single hiragana character</item>
+ /// <item><KATAKANA>: A sequence of katakana characters</item>
+ /// <item><HANGUL>: A sequence of Hangul characters</item>
+ /// </list>
/// </summary>
public sealed class UAX29URLEmailTokenizerImpl : IStandardTokenizerInterface
{
- /** This character denotes the end of file */
+ /// <summary>This character denotes the end of file</summary>
public static readonly int YYEOF = -1;
- /** initial size of the lookahead buffer */
+ /// <summary>initial size of the lookahead buffer</summary>
private static readonly int ZZ_BUFFERSIZE = 4096;
- /** lexical states */
+ /// <summary>lexical states</summary>
public const int YYINITIAL = 0;
public const int AVOID_BAD_URL = 2;
- /**
- * ZZ_LEXSTATE[l] is the state in the DFA for the lexical state l
- * ZZ_LEXSTATE[l+1] is the state in the DFA for the lexical state l
- * at the beginning of a line
- * l is of the form l = 2*k, k a non negative integer
- */
+ /// <summary>
+ /// ZZ_LEXSTATE[l] is the state in the DFA for the lexical state l
+ /// ZZ_LEXSTATE[l+1] is the state in the DFA for the lexical state l
+ /// at the beginning of a line
+ /// l is of the form l = 2*k, k a non negative integer
+ /// </summary>
private static readonly int[] ZZ_LEXSTATE = { 0, 0, 1, 1 };
- /**
- * Translates characters to character classes
- */
+ /// <summary>
+ /// Translates characters to character classes
+ /// </summary>
private const string ZZ_CMAP_PACKED =
"\x0001\x00C6\x0008\x00C4\x0002\x00C6\x0002\x00C4\x0001\x00C6\x0013\x00C4\x0001\x00C7\x0001\x008D\x0001\x00BF\x0001\x00C7" +
"\x0001\x00B9\x0001\x00B7\x0001\x008C\x0002\x00BA\x0002\x00C7\x0001\x00BB\x0001\x00AB\x0001\x0090\x0001\x00BE\x0001\x00AD" +
@@ -219,14 +219,14 @@ namespace Lucene.Net.Analysis.Standard
"\x0002\x0000\x0003\x008F\x001C\x0000\x0003\x007F\x0004\x0000";
- /**
- * Translates characters to character classes
- */
+ /// <summary>
+ /// Translates characters to character classes
+ /// </summary>
private static readonly char[] ZZ_CMAP = ZzUnpackCMap(ZZ_CMAP_PACKED);
- /**
- * Translates DFA states to action switch labels.
- */
+ /// <summary>
+ /// Translates DFA states to action switch labels.
+ /// </summary>
private static readonly int[] ZZ_ACTION = ZzUnpackAction();
private const string ZZ_ACTION_PACKED_0 =
@@ -292,9 +292,9 @@ namespace Lucene.Net.Analysis.Standard
}
- /**
- * Translates a state to a row index in the transition table
- */
+ /// <summary>
+ /// Translates a state to a row index in the transition table
+ /// </summary>
private static readonly int[] ZZ_ROWMAP = ZzUnpackRowMap();
private const string ZZ_ROWMAP_PACKED_0 =
@@ -710,9 +710,9 @@ namespace Lucene.Net.Analysis.Standard
return j;
}
- /**
- * The transition table of the DFA
- */
+ /// <summary>
+ /// The transition table of the DFA
+ /// </summary>
private static readonly int[] ZZ_TRANS = ZzUnpackTrans();
private const string ZZ_TRANS_PACKED_0 =
@@ -8998,9 +8998,9 @@ namespace Lucene.Net.Analysis.Standard
"Error: pushback value was too large"
};
- /**
- * ZZ_ATTRIBUTE[aState] contains the attributes of state <code>aState</code>
- */
+ /// <summary>
+ /// ZZ_ATTRIBUTE[aState] contains the attributes of state <c>aState</c>
+ /// </summary>
private static readonly int[] ZZ_ATTRIBUTE = ZzUnpackAttribute();
private const string ZZ_ATTRIBUTE_PACKED_0 =
@@ -9056,73 +9056,77 @@ namespace Lucene.Net.Analysis.Standard
return j;
}
- /** the input device */
+ /// <summary>the input device</summary>
private TextReader zzReader;
- /** the current state of the DFA */
+ /// <summary>the current state of the DFA</summary>
private int zzState;
- /** the current lexical state */
+ /// <summary>the current lexical state</summary>
private int zzLexicalState = YYINITIAL;
- /** this buffer contains the current text to be matched and is
- the source of the YyText() string */
+ /// <summary>
+ /// this buffer contains the current text to be matched and is
+ /// the source of the YyText string
+ /// </summary>
private char[] zzBuffer = new char[ZZ_BUFFERSIZE];
- /** the textposition at the last accepting state */
+ /// <summary>the textposition at the last accepting state</summary>
private int zzMarkedPos;
- /** the current text position in the buffer */
+ /// <summary>the current text position in the buffer</summary>
private int zzCurrentPos;
- /** startRead marks the beginning of the YyText() string in the buffer */
+ /// <summary>startRead marks the beginning of the YyText string in the buffer</summary>
private int zzStartRead;
- /** endRead marks the last character in the buffer, that has been read
- from input */
+ /// <summary>
+ /// endRead marks the last character in the buffer, that has been read
+ /// from input
+ /// </summary>
private int zzEndRead;
- /** number of newlines encountered up to the start of the matched text */
+ /// <summary>number of newlines encountered up to the start of the matched text</summary>
private int yyline;
- /** the number of characters up to the start of the matched text */
+ /// <summary>the number of characters up to the start of the matched text</summary>
private int yychar;
#pragma warning disable 169, 414
- /**
- * the number of characters from the last newline up to the start of the
- * matched text
- */
+ /// <summary>
+ /// the number of characters from the last newline up to the start of the
+ /// matched text
+ /// </summary>
private int yycolumn;
- /**
- * zzAtBOL == true <=> the scanner is currently at the beginning of a line
- */
+ /// <summary>
+ /// zzAtBOL == true <=> the scanner is currently at the beginning of a line
+ /// </summary>
private bool zzAtBOL = true;
- /** zzAtEOF == true <=> the scanner is at the EOF */
+ /// <summary>zzAtEOF == true <=> the scanner is at the EOF</summary>
private bool zzAtEOF;
- /** denotes if the user-EOF-code has already been executed */
+ /// <summary>denotes if the user-EOF-code has already been executed</summary>
private bool zzEOFDone;
#pragma warning restore 169, 414
/* user code: */
- /** Alphanumeric sequences */
+ /// <summary>Alphanumeric sequences</summary>
public static readonly int WORD_TYPE = UAX29URLEmailTokenizer.ALPHANUM;
- /** Numbers */
+ /// <summary>Numbers</summary>
public static readonly int NUMERIC_TYPE = UAX29URLEmailTokenizer.NUM;
- /**
- * Chars in class \p{Line_Break = Complex_Context} are from South East Asian
- * scripts (Thai, Lao, Myanmar, Khmer, etc.). Sequences of these are kept
- * together as as a single token rather than broken up, because the logic
- * required to break them at word boundaries is too complex for UAX#29.
- * <p>
- * See Unicode Line Breaking Algorithm: http://www.unicode.org/reports/tr14/#SA
- */
+ /// <summary>
+ /// Chars in class \p{Line_Break = Complex_Context} are from South East Asian
+ /// scripts (Thai, Lao, Myanmar, Khmer, etc.). Sequences of these are kept
+ /// together as as a single token rather than broken up, because the logic
+ /// required to break them at word boundaries is too complex for UAX#29.
+ /// <para/>
+ /// See Unicode Line Breaking Algorithm: http://www.unicode.org/reports/tr14/#SA
+ /// </summary>
public static readonly int SOUTH_EAST_ASIAN_TYPE = UAX29URLEmailTokenizer.SOUTHEAST_ASIAN;
public static readonly int IDEOGRAPHIC_TYPE = UAX29URLEmailTokenizer.IDEOGRAPHIC;
@@ -9142,32 +9146,30 @@ namespace Lucene.Net.Analysis.Standard
get { return yychar; }
}
- /**
- * Fills CharTermAttribute with the current token text.
- */
+ /// <summary>
+ /// Fills ICharTermAttribute with the current token text.
+ /// </summary>
public void GetText(ICharTermAttribute t)
{
t.CopyBuffer(zzBuffer, zzStartRead, zzMarkedPos - zzStartRead);
}
- /**
- * Creates a new scanner
- *
- * @param in the TextReader to read input from.
- */
+ /// <summary>
+ /// Creates a new scanner
+ /// </summary>
+ /// <param name="in">the TextReader to read input from.</param>
public UAX29URLEmailTokenizerImpl(TextReader @in)
{
this.zzReader = @in;
}
- /**
- * Unpacks the compressed character translation table.
- *
- * @param packed the packed character translation table
- * @return the unpacked character translation table
- */
+ /// <summary>
+ /// Unpacks the compressed character translation table.
+ /// </summary>
+ /// <param name="packed">the packed character translation table</param>
+ /// <returns>the unpacked character translation table</returns>
private static char[] ZzUnpackCMap(string packed)
{
char[] map = new char[0x10000];
@@ -9183,13 +9185,11 @@ namespace Lucene.Net.Analysis.Standard
}
- /**
- * Refills the input buffer.
- *
- * @return <code>false</code>, iff there was new input.
- *
- * @exception java.io.IOException if any I/O-Error occurs
- */
+ /// <summary>
+ /// Refills the input buffer.
+ /// </summary>
+ /// <returns><c>false</c>, iff there was new input.</returns>
+ /// <exception cref="IOException">if any I/O-Error occurs</exception>
private bool ZzRefill()
{
@@ -9245,9 +9245,9 @@ namespace Lucene.Net.Analysis.Standard
}
- /**
- * Closes the input stream.
- */
+ /// <summary>
+ /// Disposes the input stream.
+ /// </summary>
public void YyClose()
{
zzAtEOF = true; /* indicate end of file */
@@ -9260,18 +9260,17 @@ namespace Lucene.Net.Analysis.Standard
}
- /**
- * Resets the scanner to read from a new input stream.
- * Does not close the old reader.
- *
- * All internal variables are reset, the old input stream
- * <b>cannot</b> be reused (internal buffer is discarded and lost).
- * Lexical state is set to <tt>ZZ_INITIAL</tt>.
- *
- * Internal scan buffer is resized down to its initial length, if it has grown.
- *
- * @param reader the new input stream
- */
+ /// <summary>
+ /// Resets the scanner to read from a new input stream.
+ /// Does not close the old reader.
+ /// <para/>
+ /// All internal variables are reset, the old input stream
+ /// <b>cannot</b> be reused (internal buffer is discarded and lost).
+ /// Lexical state is set to <see cref="YYINITIAL"/>.
+ /// <para/>
+ /// Internal scan buffer is resized down to its initial length, if it has grown.
+ /// </summary>
+ /// <param name="reader">the new input stream </param>
public void YyReset(TextReader reader)
{
zzReader = reader;
@@ -9287,75 +9286,73 @@ namespace Lucene.Net.Analysis.Standard
}
- /**
- * Returns the current lexical state.
- */
+ /// <summary>
+ /// Returns the current lexical state.
+ /// </summary>
public int YyState
{
get { return zzLexicalState; }
}
- /**
- * Enters a new lexical state
- *
- * @param newState the new lexical state
- */
+ /// <summary>
+ /// Enters a new lexical state
+ /// </summary>
+ /// <param name="newState">the new lexical state</param>
public void YyBegin(int newState)
{
zzLexicalState = newState;
}
- /**
- * Returns the text matched by the current regular expression.
- */
+ /// <summary>
+ /// Returns the text matched by the current regular expression.
+ /// </summary>
public string YyText
{
get { return new string(zzBuffer, zzStartRead, zzMarkedPos - zzStartRead); }
}
- /**
- * Returns the character at position <tt>pos</tt> from the
- * matched text.
- *
- * It is equivalent to YyText().charAt(pos), but faster
- *
- * @param pos the position of the character to fetch.
- * A value from 0 to YyLength()-1.
- *
- * @return the character at position pos
- */
+ /// <summary>
+ /// Returns the character at position <paramref name="pos"/> from the
+ /// matched text.
+ /// <para/>
+ /// It is equivalent to YyText[pos], but faster
+ /// </summary>
+ /// <param name="pos">
+ /// the position of the character to fetch.
+ /// A value from 0 to YyLength-1.
+ /// </param>
+ /// <returns>the character at position pos</returns>
public char YyCharAt(int pos)
{
return zzBuffer[zzStartRead + pos];
}
- /**
- * Returns the length of the matched text region.
- */
+ /// <summary>
+ /// Returns the length of the matched text region.
+ /// </summary>
public int YyLength
{
get { return zzMarkedPos - zzStartRead; }
}
- /**
- * Reports an error that occured while scanning.
- *
- * In a wellformed scanner (no or only correct usage of
- * YyPushBack(int) and a match-all fallback rule) this method
- * will only be called with things that "Can't Possibly Happen".
- * If this method is called, something is seriously wrong
- * (e.g. a JFlex bug producing a faulty scanner etc.).
- *
- * Usual syntax/scanner level error handling should be done
- * in error fallback rules.
- *
- * @param errorCode the code of the errormessage to display
- */
+ /// <summary>
+ /// Reports an error that occured while scanning.
+ /// <para/>
+ /// In a wellformed scanner (no or only correct usage of
+ /// YyPushBack(int) and a match-all fallback rule) this method
+ /// will only be called with things that "Can't Possibly Happen".
+ /// If this method is called, something is seriously wrong
+ /// (e.g. a JFlex bug producing a faulty scanner etc.).
+ /// <para/>
+ /// Usual syntax/scanner level error handling should be done
+ /// in error fallback rules.
+ /// </summary>
+ /// <param name="errorCode">the code of the errormessage to display</param>
private void ZzScanError(int errorCode)
{
string message;
@@ -9372,14 +9369,15 @@ namespace Lucene.Net.Analysis.Standard
}
- /**
- * Pushes the specified amount of characters back into the input stream.
- *
- * They will be read again by then next call of the scanning method
- *
- * @param number the number of characters to be read again.
- * This number must not be greater than YyLength()!
- */
+ /// <summary>
+ /// Pushes the specified amount of characters back into the input stream.
+ /// <para/>
+ /// They will be read again by then next call of the scanning method
+ /// </summary>
+ /// <param name="number">
+ /// the number of characters to be read again.
+ /// This number must not be greater than YyLength!
+ /// </param>
public void YyPushBack(int number)
{
if (number > YyLength)
@@ -9389,13 +9387,12 @@ namespace Lucene.Net.Analysis.Standard
}
- /**
- * Resumes scanning until the next regular expression is matched,
- * the end of input is encountered or an I/O-Error occurs.
- *
- * @return the next token
- * @exception java.io.IOException if any I/O-Error occurs
- */
+ /// <summary>
+ /// Resumes scanning until the next regular expression is matched,
+ /// the end of input is encountered or an I/O-Error occurs.
+ /// </summary>
+ /// <returns>the next token</returns>
+ /// <exception cref="IOException">if any I/O-Error occurs</exception>
public int GetNextToken()
{
int zzInput;