You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by cc...@apache.org on 2011/11/12 11:44:27 UTC
[Lucene.Net] svn commit: r1201243 [2/8] - in
/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk: src/core/
src/core/Analysis/ src/core/Analysis/Standard/
src/core/Analysis/Tokenattributes/ src/core/Document/ src/core/Index/
src/core/QueryParser/ src/core/Search/...
Modified: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/core/Analysis/Standard/StandardAnalyzer.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/core/Analysis/Standard/StandardAnalyzer.cs?rev=1201243&r1=1201242&r2=1201243&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/core/Analysis/Standard/StandardAnalyzer.cs (original)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/core/Analysis/Standard/StandardAnalyzer.cs Sat Nov 12 10:44:21 2011
@@ -17,6 +17,7 @@
using System;
using System.Collections;
+using System.Collections.Generic;
using Lucene.Net.Analysis;
using Lucene.Net.Util;
using Version = Lucene.Net.Util.Version;
@@ -37,106 +38,28 @@ namespace Lucene.Net.Analysis.Standard
/// <item>As of 2.4, Tokens incorrectly identified as acronyms are corrected (see
/// <a href="https://issues.apache.org/jira/browse/LUCENE-1068">LUCENE-1608</a>)</item>
/// </list>
- ///
/// </summary>
- /// <version> $Id: StandardAnalyzer.java 829134 2009-10-23 17:18:53Z mikemccand $
- /// </version>
public class StandardAnalyzer : Analyzer
{
- private System.Collections.Hashtable stopSet;
+ private ISet<string> stopSet;
/// <summary> Specifies whether deprecated acronyms should be replaced with HOST type.
- /// This is false by default to support backward compatibility.
- ///
- /// </summary>
- /// <deprecated> this should be removed in the next release (3.0).
- ///
- /// See https://issues.apache.org/jira/browse/LUCENE-1068
- /// </deprecated>
- [Obsolete("this should be removed in the next release (3.0).")]
- private bool replaceInvalidAcronym = defaultReplaceInvalidAcronym;
-
- private static bool defaultReplaceInvalidAcronym;
- private bool enableStopPositionIncrements;
-
- // @deprecated
- [Obsolete]
- private bool useDefaultStopPositionIncrements;
-
- /// <summary> </summary>
- /// <returns> true if new instances of StandardTokenizer will
- /// replace mischaracterized acronyms
- ///
- /// See https://issues.apache.org/jira/browse/LUCENE-1068
- /// </returns>
- /// <deprecated> This will be removed (hardwired to true) in 3.0
- /// </deprecated>
- [Obsolete("This will be removed (hardwired to true) in 3.0")]
- public static bool GetDefaultReplaceInvalidAcronym()
- {
- return defaultReplaceInvalidAcronym;
- }
-
- /// <summary> </summary>
- /// <param name="replaceInvalidAcronym">Set to true to have new
- /// instances of StandardTokenizer replace mischaracterized
- /// acronyms by default. Set to false to preserve the
- /// previous (before 2.4) buggy behavior. Alternatively,
- /// set the system property
- /// Lucene.Net.Analysis.Standard.StandardAnalyzer.replaceInvalidAcronym
- /// to false.
- ///
- /// See https://issues.apache.org/jira/browse/LUCENE-1068
- /// </param>
- /// <deprecated> This will be removed (hardwired to true) in 3.0
- /// </deprecated>
- [Obsolete("This will be removed (hardwired to true) in 3.0")]
- public static void SetDefaultReplaceInvalidAcronym(bool replaceInvalidAcronym)
- {
- defaultReplaceInvalidAcronym = replaceInvalidAcronym;
- }
-
-
- /// <summary>An array containing some common English words that are usually not
- /// useful for searching.
+ /// See <a href="https://issues.apache.org/jira/browse/LUCENE-1068">https://issues.apache.org/jira/browse/LUCENE-1068</a>
/// </summary>
- /// <deprecated> Use <see cref="STOP_WORDS_SET" /> instead
- /// </deprecated>
- [Obsolete("Use STOP_WORDS_SET instead ")]
- public static readonly System.String[] STOP_WORDS;
-
+ private bool replaceInvalidAcronym, enableStopPositionIncrements;
+
/// <summary>An unmodifiable set containing some common English words that are usually not
/// useful for searching.
/// </summary>
- public static readonly System.Collections.Hashtable STOP_WORDS_SET;
-
- /// <summary>Builds an analyzer with the default stop words
- /// (<see cref="STOP_WORDS_SET" />).
- /// </summary>
- /// <deprecated> Use <see cref="StandardAnalyzer(Version)" /> instead.
- /// </deprecated>
- [Obsolete("Use StandardAnalyzer(Version) instead")]
- public StandardAnalyzer():this(Version.LUCENE_24, STOP_WORDS_SET)
- {
- }
+ public static readonly ISet<string> STOP_WORDS_SET;
+ private Version matchVersion;
/// <summary>Builds an analyzer with the default stop words (<see cref="STOP_WORDS" />).
/// </summary>
- /// <param name="matchVersion">Lucene version to match See <see cref="Version">above</see> />
- ///
- /// </param>
- public StandardAnalyzer(Version matchVersion):this(matchVersion, STOP_WORDS_SET)
- {
- }
-
- /// <summary>Builds an analyzer with the given stop words.</summary>
- /// <deprecated> Use <see cref="StandardAnalyzer(Version, Hashtable)" />
- /// instead
- /// </deprecated>
- [Obsolete("Use StandardAnalyzer(Version, Set) instead")]
- public StandardAnalyzer(System.Collections.Hashtable stopWords):this(Version.LUCENE_24, stopWords)
- {
- }
+ /// <param name="matchVersion">Lucene version to match see <see cref="Version">above</see></param>
+ public StandardAnalyzer(Version matchVersion)
+ : this(matchVersion, STOP_WORDS_SET)
+ { }
/// <summary>Builds an analyzer with the given stop words.</summary>
/// <param name="matchVersion">Lucene version to match See <see cref="Version">above</see> />
@@ -144,31 +67,15 @@ namespace Lucene.Net.Analysis.Standard
/// </param>
/// <param name="stopWords">stop words
/// </param>
- public StandardAnalyzer(Version matchVersion, System.Collections.Hashtable stopWords)
+ public StandardAnalyzer(Version matchVersion, ISet<string> stopWords)
{
stopSet = stopWords;
- Init(matchVersion);
- }
-
- /// <summary>Builds an analyzer with the given stop words.</summary>
- /// <deprecated> Use <see cref="StandardAnalyzer(Version, Hashtable)" /> instead
- /// </deprecated>
- [Obsolete("Use StandardAnalyzer(Version, Set) instead")]
- public StandardAnalyzer(System.String[] stopWords):this(Version.LUCENE_24, StopFilter.MakeStopSet(stopWords))
- {
- }
-
- /// <summary>Builds an analyzer with the stop words from the given file.</summary>
- /// <seealso cref="WordlistLoader.GetWordSet(System.IO.FileInfo)">
- /// </seealso>
- /// <deprecated> Use <see cref="StandardAnalyzer(Version, System.IO.FileInfo)" />
- /// instead
- /// </deprecated>
- [Obsolete("Use StandardAnalyzer(Version, File) instead")]
- public StandardAnalyzer(System.IO.FileInfo stopwords):this(Version.LUCENE_24, stopwords)
- {
+ SetOverridesTokenStreamMethod(typeof(StandardAnalyzer));
+ enableStopPositionIncrements = StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion);
+ replaceInvalidAcronym = matchVersion.OnOrAfter(Version.LUCENE_24);
+ this.matchVersion = matchVersion;
}
-
+
/// <summary>Builds an analyzer with the stop words from the given file.</summary>
/// <seealso cref="WordlistLoader.GetWordSet(System.IO.FileInfo)">
/// </seealso>
@@ -178,19 +85,7 @@ namespace Lucene.Net.Analysis.Standard
/// <param name="stopwords">File to read stop words from
/// </param>
public StandardAnalyzer(Version matchVersion, System.IO.FileInfo stopwords)
- {
- stopSet = WordlistLoader.GetWordSet(stopwords);
- Init(matchVersion);
- }
-
- /// <summary>Builds an analyzer with the stop words from the given reader.</summary>
- /// <seealso cref="WordlistLoader.GetWordSet(System.IO.TextReader)">
- /// </seealso>
- /// <deprecated> Use <see cref="StandardAnalyzer(Version, System.IO.TextReader)" />
- /// instead
- /// </deprecated>
- [Obsolete("Use StandardAnalyzer(Version, Reader) instead")]
- public StandardAnalyzer(System.IO.TextReader stopwords):this(Version.LUCENE_24, stopwords)
+ : this (matchVersion, WordlistLoader.GetWordSet(stopwords))
{
}
@@ -203,125 +98,19 @@ namespace Lucene.Net.Analysis.Standard
/// <param name="stopwords">Reader to read stop words from
/// </param>
public StandardAnalyzer(Version matchVersion, System.IO.TextReader stopwords)
- {
- stopSet = WordlistLoader.GetWordSet(stopwords);
- Init(matchVersion);
- }
-
- /// <summary> </summary>
- /// <param name="replaceInvalidAcronym">Set to true if this analyzer should replace mischaracterized acronyms in the StandardTokenizer
- ///
- /// See https://issues.apache.org/jira/browse/LUCENE-1068
- ///
- /// </param>
- /// <deprecated> Remove in 3.X and make true the only valid value
- /// </deprecated>
- [Obsolete("Remove in 3.X and make true the only valid value")]
- public StandardAnalyzer(bool replaceInvalidAcronym):this(Version.LUCENE_24, STOP_WORDS_SET)
- {
- this.replaceInvalidAcronym = replaceInvalidAcronym;
- useDefaultStopPositionIncrements = true;
- }
-
- /// <param name="stopwords">The stopwords to use
- /// </param>
- /// <param name="replaceInvalidAcronym">Set to true if this analyzer should replace mischaracterized acronyms in the StandardTokenizer
- ///
- /// See https://issues.apache.org/jira/browse/LUCENE-1068
- ///
- /// </param>
- /// <deprecated> Remove in 3.X and make true the only valid value
- /// </deprecated>
- [Obsolete("Remove in 3.X and make true the only valid value")]
- public StandardAnalyzer(System.IO.TextReader stopwords, bool replaceInvalidAcronym):this(Version.LUCENE_24, stopwords)
- {
- this.replaceInvalidAcronym = replaceInvalidAcronym;
- }
-
- /// <param name="stopwords">The stopwords to use
- /// </param>
- /// <param name="replaceInvalidAcronym">Set to true if this analyzer should replace mischaracterized acronyms in the StandardTokenizer
- ///
- /// See https://issues.apache.org/jira/browse/LUCENE-1068
- ///
- /// </param>
- /// <deprecated> Remove in 3.X and make true the only valid value
- /// </deprecated>
- [Obsolete("Remove in 3.X and make true the only valid value")]
- public StandardAnalyzer(System.IO.FileInfo stopwords, bool replaceInvalidAcronym):this(Version.LUCENE_24, stopwords)
- {
- this.replaceInvalidAcronym = replaceInvalidAcronym;
- }
-
- /// <summary> </summary>
- /// <param name="stopwords">The stopwords to use
- /// </param>
- /// <param name="replaceInvalidAcronym">Set to true if this analyzer should replace mischaracterized acronyms in the StandardTokenizer
- ///
- /// See https://issues.apache.org/jira/browse/LUCENE-1068
- ///
- /// </param>
- /// <deprecated> Remove in 3.X and make true the only valid value
- /// </deprecated>
- [Obsolete("Remove in 3.X and make true the only valid value")]
- public StandardAnalyzer(System.String[] stopwords, bool replaceInvalidAcronym):this(Version.LUCENE_24, StopFilter.MakeStopSet(stopwords))
- {
- this.replaceInvalidAcronym = replaceInvalidAcronym;
- }
-
- /// <param name="stopwords">The stopwords to use
- /// </param>
- /// <param name="replaceInvalidAcronym">Set to true if this analyzer should replace mischaracterized acronyms in the StandardTokenizer
- ///
- /// See https://issues.apache.org/jira/browse/LUCENE-1068
- ///
- /// </param>
- /// <deprecated> Remove in 3.X and make true the only valid value
- /// </deprecated>
- [Obsolete("Remove in 3.X and make true the only valid value")]
- public StandardAnalyzer(System.Collections.Hashtable stopwords, bool replaceInvalidAcronym):this(Version.LUCENE_24, stopwords)
- {
- this.replaceInvalidAcronym = replaceInvalidAcronym;
- }
-
- private void Init(Version matchVersion)
- {
- SetOverridesTokenStreamMethod(typeof(StandardAnalyzer));
- if (matchVersion.OnOrAfter(Version.LUCENE_29))
- {
- enableStopPositionIncrements = true;
- }
- else
- {
- useDefaultStopPositionIncrements = true;
- }
- if (matchVersion.OnOrAfter(Version.LUCENE_24))
- {
- replaceInvalidAcronym = defaultReplaceInvalidAcronym;
- }
- else
- {
- replaceInvalidAcronym = false;
- }
- }
+ : this(matchVersion, WordlistLoader.GetWordSet(stopwords))
+ { }
/// <summary>Constructs a <see cref="StandardTokenizer" /> filtered by a <see cref="StandardFilter" />
///, a <see cref="LowerCaseFilter" /> and a <see cref="StopFilter" />.
/// </summary>
public override TokenStream TokenStream(System.String fieldName, System.IO.TextReader reader)
{
- StandardTokenizer tokenStream = new StandardTokenizer(reader, replaceInvalidAcronym);
+ StandardTokenizer tokenStream = new StandardTokenizer(matchVersion, reader);
tokenStream.SetMaxTokenLength(maxTokenLength);
TokenStream result = new StandardFilter(tokenStream);
result = new LowerCaseFilter(result);
- if (useDefaultStopPositionIncrements)
- {
- result = new StopFilter(result, stopSet);
- }
- else
- {
- result = new StopFilter(enableStopPositionIncrements, result, stopSet);
- }
+ result = new StopFilter(enableStopPositionIncrements, result, stopSet);
return result;
}
@@ -353,9 +142,6 @@ namespace Lucene.Net.Analysis.Standard
return maxTokenLength;
}
- /// <deprecated> Use <see cref="TokenStream" /> instead
- /// </deprecated>
- [Obsolete("Use TokenStream instead")]
public override TokenStream ReusableTokenStream(System.String fieldName, System.IO.TextReader reader)
{
if (overridesTokenStreamMethod)
@@ -370,17 +156,11 @@ namespace Lucene.Net.Analysis.Standard
{
streams = new SavedStreams();
SetPreviousTokenStream(streams);
- streams.tokenStream = new StandardTokenizer(reader);
+ streams.tokenStream = new StandardTokenizer(matchVersion, reader);
streams.filteredTokenStream = new StandardFilter(streams.tokenStream);
streams.filteredTokenStream = new LowerCaseFilter(streams.filteredTokenStream);
- if (useDefaultStopPositionIncrements)
- {
- streams.filteredTokenStream = new StopFilter(streams.filteredTokenStream, stopSet);
- }
- else
- {
- streams.filteredTokenStream = new StopFilter(enableStopPositionIncrements, streams.filteredTokenStream, stopSet);
- }
+ streams.filteredTokenStream = new StopFilter(enableStopPositionIncrements,
+ streams.filteredTokenStream, stopSet);
}
else
{
@@ -392,43 +172,8 @@ namespace Lucene.Net.Analysis.Standard
return streams.filteredTokenStream;
}
-
- /// <summary> </summary>
- /// <returns> true if this Analyzer is replacing mischaracterized acronyms in the StandardTokenizer
- ///
- /// See https://issues.apache.org/jira/browse/LUCENE-1068
- /// </returns>
- /// <deprecated> This will be removed (hardwired to true) in 3.0
- /// </deprecated>
- [Obsolete("This will be removed (hardwired to true) in 3.0")]
- public virtual bool IsReplaceInvalidAcronym()
- {
- return replaceInvalidAcronym;
- }
-
- /// <summary> </summary>
- /// <param name="replaceInvalidAcronym">Set to true if this Analyzer is replacing mischaracterized acronyms in the StandardTokenizer
- ///
- /// See https://issues.apache.org/jira/browse/LUCENE-1068
- /// </param>
- /// <deprecated> This will be removed (hardwired to true) in 3.0
- /// </deprecated>
- [Obsolete("This will be removed (hardwired to true) in 3.0")]
- public virtual void SetReplaceInvalidAcronym(bool replaceInvalidAcronym)
- {
- this.replaceInvalidAcronym = replaceInvalidAcronym;
- }
static StandardAnalyzer()
{
- // Default to true (fixed the bug), unless the system prop is set
- {
- System.String v = SupportClass.AppSettings.Get("Lucene.Net.Analysis.Standard.StandardAnalyzer.replaceInvalidAcronym", "true");
- if (v == null || v.Equals("true"))
- defaultReplaceInvalidAcronym = true;
- else
- defaultReplaceInvalidAcronym = false;
- }
- STOP_WORDS = StopAnalyzer.ENGLISH_STOP_WORDS;
STOP_WORDS_SET = StopAnalyzer.ENGLISH_STOP_WORDS_SET;
}
}
Modified: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/core/Analysis/Standard/StandardFilter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/core/Analysis/Standard/StandardFilter.cs?rev=1201243&r1=1201242&r2=1201243&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/core/Analysis/Standard/StandardFilter.cs (original)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/core/Analysis/Standard/StandardFilter.cs Sat Nov 12 10:44:21 2011
@@ -35,8 +35,8 @@ namespace Lucene.Net.Analysis.Standard
/// <summary>Construct filtering <i>in</i>. </summary>
public StandardFilter(TokenStream in_Renamed):base(in_Renamed)
{
- termAtt = (TermAttribute) AddAttribute(typeof(TermAttribute));
- typeAtt = (TypeAttribute) AddAttribute(typeof(TypeAttribute));
+ termAtt = AddAttribute<TermAttribute>();
+ typeAtt = AddAttribute<TypeAttribute>();
}
private static readonly System.String APOSTROPHE_TYPE;
Modified: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/core/Analysis/Standard/StandardTokenizer.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/core/Analysis/Standard/StandardTokenizer.cs?rev=1201243&r1=1201242&r2=1201243&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/core/Analysis/Standard/StandardTokenizer.cs (original)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/core/Analysis/Standard/StandardTokenizer.cs Sat Nov 12 10:44:21 2011
@@ -56,7 +56,7 @@ namespace Lucene.Net.Analysis.Standard
/// </list>
/// </summary>
- public class StandardTokenizer:Tokenizer
+ public sealed class StandardTokenizer:Tokenizer
{
private void InitBlock()
{
@@ -65,18 +65,17 @@ namespace Lucene.Net.Analysis.Standard
/// <summary>A private instance of the JFlex-constructed scanner </summary>
private StandardTokenizerImpl scanner;
- public const int ALPHANUM = 0;
+ public const int ALPHANUM = 0;
public const int APOSTROPHE = 1;
- public const int ACRONYM = 2;
- public const int COMPANY = 3;
- public const int EMAIL = 4;
- public const int HOST = 5;
- public const int NUM = 6;
- public const int CJ = 7;
+ public const int ACRONYM = 2;
+ public const int COMPANY = 3;
+ public const int EMAIL = 4;
+ public const int HOST = 5;
+ public const int NUM = 6;
+ public const int CJ = 7;
/// <deprecated> this solves a bug where HOSTs that end with '.' are identified
- /// as ACRONYMs. It is deprecated and will be removed in the next
- /// release.
+ /// as ACRONYMs.
/// </deprecated>
[Obsolete("this solves a bug where HOSTs that end with '.' are identified as ACRONYMs. It is deprecated and will be removed in the next release.")]
public const int ACRONYM_DEP = 8;
@@ -84,20 +83,6 @@ namespace Lucene.Net.Analysis.Standard
/// <summary>String token types that correspond to token type int constants </summary>
public static readonly System.String[] TOKEN_TYPES = new System.String[]{"<ALPHANUM>", "<APOSTROPHE>", "<ACRONYM>", "<COMPANY>", "<EMAIL>", "<HOST>", "<NUM>", "<CJ>", "<ACRONYM_DEP>"};
- /// <deprecated> Please use <see cref="TOKEN_TYPES" /> instead
- /// </deprecated>
- [Obsolete("Please use TOKEN_TYPES instead")]
- public static readonly System.String[] tokenImage = TOKEN_TYPES;
-
- /// <summary> Specifies whether deprecated acronyms should be replaced with HOST type.
- /// This is false by default to support backward compatibility.
- /// <p/>
- /// See http://issues.apache.org/jira/browse/LUCENE-1068
- ///
- /// </summary>
- /// <deprecated> this should be removed in the next release (3.0).
- /// </deprecated>
- [Obsolete("this should be removed in the next release (3.0).")]
private bool replaceInvalidAcronym;
private int maxTokenLength;
@@ -105,47 +90,17 @@ namespace Lucene.Net.Analysis.Standard
/// <summary>Set the max allowed token length. Any token longer
/// than this is skipped.
/// </summary>
- public virtual void SetMaxTokenLength(int length)
+ public void SetMaxTokenLength(int length)
{
this.maxTokenLength = length;
}
/// <seealso cref="SetMaxTokenLength">
/// </seealso>
- public virtual int GetMaxTokenLength()
+ public int GetMaxTokenLength()
{
return maxTokenLength;
}
-
- /// <summary> Creates a new instance of the <see cref="StandardTokenizer" />. Attaches the
- /// <c>input</c> to a newly created JFlex scanner.
- /// </summary>
- /// <deprecated> Use <see cref="StandardTokenizer(Version, System.IO.TextReader)" /> instead
- /// </deprecated>
- [Obsolete("Use StandardTokenizer(Version, Reader) instead")]
- public StandardTokenizer(System.IO.TextReader input):this(Version.LUCENE_24, input)
- {
- }
-
- /// <summary> Creates a new instance of the <see cref="Lucene.Net.Analysis.Standard.StandardTokenizer" />. Attaches
- /// the <c>input</c> to the newly created JFlex scanner.
- ///
- /// </summary>
- /// <param name="input">The input reader
- /// </param>
- /// <param name="replaceInvalidAcronym">Set to true to replace mischaracterized acronyms with HOST.
- ///
- /// See http://issues.apache.org/jira/browse/LUCENE-1068
- /// </param>
- /// <deprecated> Use <see cref="StandardTokenizer(Version, System.IO.TextReader)" /> instead
- /// </deprecated>
- [Obsolete("Use StandardTokenizer(Version, Reader) instead")]
- public StandardTokenizer(System.IO.TextReader input, bool replaceInvalidAcronym):base()
- {
- InitBlock();
- this.scanner = new StandardTokenizerImpl(input);
- Init(input, replaceInvalidAcronym);
- }
/// <summary> Creates a new instance of the
/// <see cref="Lucene.Net.Analysis.Standard.StandardTokenizer" />. Attaches
@@ -163,20 +118,7 @@ namespace Lucene.Net.Analysis.Standard
this.scanner = new StandardTokenizerImpl(input);
Init(input, matchVersion);
}
-
- /// <summary> Creates a new StandardTokenizer with a given <see cref="AttributeSource" />. </summary>
- /// <deprecated> Use
- /// <see cref="StandardTokenizer(Version, AttributeSource, System.IO.TextReader)" />
- /// instead
- /// </deprecated>
- [Obsolete("Use StandardTokenizer(Version, AttributeSource, Reader) instead")]
- public StandardTokenizer(AttributeSource source, System.IO.TextReader input, bool replaceInvalidAcronym):base(source)
- {
- InitBlock();
- this.scanner = new StandardTokenizerImpl(input);
- Init(input, replaceInvalidAcronym);
- }
-
+
/// <summary> Creates a new StandardTokenizer with a given <see cref="AttributeSource" />.</summary>
public StandardTokenizer(Version matchVersion, AttributeSource source, System.IO.TextReader input):base(source)
{
@@ -185,19 +127,6 @@ namespace Lucene.Net.Analysis.Standard
Init(input, matchVersion);
}
- /// <summary> Creates a new StandardTokenizer with a given <see cref="Lucene.Net.Util.AttributeSource.AttributeFactory" /> </summary>
- /// <deprecated> Use
- /// <see cref="StandardTokenizer(Version, AttributeSource.AttributeFactory, System.IO.TextReader)" />
- /// instead
- /// </deprecated>
- [Obsolete("Use StandardTokenizer(Version, Lucene.Net.Util.AttributeSource.AttributeFactory, Reader) instead")]
- public StandardTokenizer(AttributeFactory factory, System.IO.TextReader input, bool replaceInvalidAcronym):base(factory)
- {
- InitBlock();
- this.scanner = new StandardTokenizerImpl(input);
- Init(input, replaceInvalidAcronym);
- }
-
/// <summary> Creates a new StandardTokenizer with a given
/// <see cref="Lucene.Net.Util.AttributeSource.AttributeFactory" />
/// </summary>
@@ -208,26 +137,21 @@ namespace Lucene.Net.Analysis.Standard
Init(input, matchVersion);
}
- private void Init(System.IO.TextReader input, bool replaceInvalidAcronym)
- {
- this.replaceInvalidAcronym = replaceInvalidAcronym;
- this.input = input;
- termAtt = (TermAttribute) AddAttribute(typeof(TermAttribute));
- offsetAtt = (OffsetAttribute) AddAttribute(typeof(OffsetAttribute));
- posIncrAtt = (PositionIncrementAttribute) AddAttribute(typeof(PositionIncrementAttribute));
- typeAtt = (TypeAttribute) AddAttribute(typeof(TypeAttribute));
- }
-
private void Init(System.IO.TextReader input, Version matchVersion)
{
if (matchVersion.OnOrAfter(Version.LUCENE_24))
{
- Init(input, true);
+ replaceInvalidAcronym = true;
}
else
{
- Init(input, false);
+ replaceInvalidAcronym = false;
}
+ this.input = input;
+ termAtt = AddAttribute<TermAttribute>();
+ offsetAtt = AddAttribute<OffsetAttribute>();
+ posIncrAtt = AddAttribute<PositionIncrementAttribute>();
+ typeAtt = AddAttribute<TypeAttribute>();
}
// this tokenizer generates three attributes:
@@ -297,25 +221,6 @@ namespace Lucene.Net.Analysis.Standard
offsetAtt.SetOffset(finalOffset, finalOffset);
}
- /// <deprecated> Will be removed in Lucene 3.0. This method is final, as it should
- /// not be overridden. Delegates to the backwards compatibility layer.
- /// </deprecated>
- [Obsolete("Will be removed in Lucene 3.0. This method is final, as it should not be overridden. Delegates to the backwards compatibility layer. ")]
- public override Token Next(Token reusableToken)
- {
- return base.Next(reusableToken);
- }
-
- /// <deprecated> Will be removed in Lucene 3.0. This method is final, as it should
- /// not be overridden. Delegates to the backwards compatibility layer.
- /// </deprecated>
- [Obsolete("Will be removed in Lucene 3.0. This method is final, as it should not be overridden. Delegates to the backwards compatibility layer. ")]
- public override Token Next()
- {
- return base.Next();
- }
-
-
public override void Reset(System.IO.TextReader reader)
{
base.Reset(reader);
@@ -331,20 +236,20 @@ namespace Lucene.Net.Analysis.Standard
/// <deprecated> Remove in 3.X and make true the only valid value
/// </deprecated>
[Obsolete("Remove in 3.X and make true the only valid value")]
- public virtual bool IsReplaceInvalidAcronym()
+ public bool IsReplaceInvalidAcronym()
{
return replaceInvalidAcronym;
}
- /// <summary> </summary>
- /// <param name="replaceInvalidAcronym">Set to true to replace mischaracterized acronyms as HOST.
- /// </param>
- /// <deprecated> Remove in 3.X and make true the only valid value
- ///
+
+ /// <summary>
+ /// Remove in 3.X and make true the only valid value
/// See https://issues.apache.org/jira/browse/LUCENE-1068
- /// </deprecated>
+ /// </summary>
+ /// <param name="replaceInvalidAcronym">Set to true to replace mischaracterized acronyms as HOST.
+ /// </param>
[Obsolete("Remove in 3.X and make true the only valid value. See https://issues.apache.org/jira/browse/LUCENE-1068")]
- public virtual void SetReplaceInvalidAcronym(bool replaceInvalidAcronym)
+ public void SetReplaceInvalidAcronym(bool replaceInvalidAcronym)
{
this.replaceInvalidAcronym = replaceInvalidAcronym;
}
Modified: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/core/Analysis/Standard/StandardTokenizerImpl.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/core/Analysis/Standard/StandardTokenizerImpl.cs?rev=1201243&r1=1201242&r2=1201243&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/core/Analysis/Standard/StandardTokenizerImpl.cs (original)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/core/Analysis/Standard/StandardTokenizerImpl.cs Sat Nov 12 10:44:21 2011
@@ -18,12 +18,13 @@
/*
-
-NOTE: if you change this file and need to regenerate the tokenizer,
-remember to use JRE 1.4 when running jflex (before Lucene 3.0).
-This grammar now uses constructs (eg :digit:) whose meaning can
-vary according to the JRE used to run jflex. See
-https://issues.apache.org/jira/browse/LUCENE-1126 for details
+ NOTE: if you change StandardTokenizerImpl.jflex and need to regenerate the tokenizer,
+ the tokenizer, only use Java 1.4 !!!
+ This grammar currently uses constructs (eg :digit:, :letter:) whose
+ meaning can vary according to the JRE used to run jflex. See
+ https://issues.apache.org/jira/browse/LUCENE-1126 for details.
+ For current backwards compatibility it is needed to support
+ only Java 1.4 - this will change in Lucene 3.1.
*/
using System;
@@ -246,10 +247,9 @@ namespace Lucene.Net.Analysis.Standard
public static readonly int NUM;
public static readonly int CJ;
/// <deprecated> this solves a bug where HOSTs that end with '.' are identified
- /// as ACRONYMs. It is deprecated and will be removed in the next
- /// release.
+ /// as ACRONYMs.
/// </deprecated>
- [Obsolete("this solves a bug where HOSTs that end with '.' are identified as ACRONYMs. It is deprecated and will be removed in the next release.")]
+ [Obsolete("this solves a bug where HOSTs that end with '.' are identified as ACRONYMs")]
public static readonly int ACRONYM_DEP;
public static readonly System.String[] TOKEN_TYPES;
Modified: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/core/Analysis/Standard/StandardTokenizerImpl.jflex
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/core/Analysis/Standard/StandardTokenizerImpl.jflex?rev=1201243&r1=1201242&r2=1201243&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/core/Analysis/Standard/StandardTokenizerImpl.jflex (original)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/core/Analysis/Standard/StandardTokenizerImpl.jflex Sat Nov 12 10:44:21 2011
@@ -1,156 +1,156 @@
-package org.apache.lucene.analysis.standard;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
-
-NOTE: if you change StandardTokenizerImpl.jflex and need to regenerate
- the tokenizer, remember to use JRE 1.4 to run jflex (before
- Lucene 3.0). This grammar now uses constructs (eg :digit:,
- :letter:) whose meaning can vary according to the JRE used to
- run jflex. See
- https://issues.apache.org/jira/browse/LUCENE-1126 for details.
-
-*/
-
-import org.apache.lucene.analysis.Token;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
-
-%%
-
-%class StandardTokenizerImpl
-%unicode
-%integer
-%function getNextToken
-%pack
-%char
-
-%{
-
-public static final int ALPHANUM = StandardTokenizer.ALPHANUM;
-public static final int APOSTROPHE = StandardTokenizer.APOSTROPHE;
-public static final int ACRONYM = StandardTokenizer.ACRONYM;
-public static final int COMPANY = StandardTokenizer.COMPANY;
-public static final int EMAIL = StandardTokenizer.EMAIL;
-public static final int HOST = StandardTokenizer.HOST;
-public static final int NUM = StandardTokenizer.NUM;
-public static final int CJ = StandardTokenizer.CJ;
-/**
- * @deprecated this solves a bug where HOSTs that end with '.' are identified
- * as ACRONYMs. It is deprecated and will be removed in the next
- * release.
- */
-public static final int ACRONYM_DEP = StandardTokenizer.ACRONYM_DEP;
-
-public static final String [] TOKEN_TYPES = StandardTokenizer.TOKEN_TYPES;
-
-public final int yychar()
-{
- return yychar;
-}
-
-/**
- * Resets the Tokenizer to a new Reader.
- */
-final void reset(java.io.Reader r) {
- // reset to default buffer size, if buffer has grown
- if (zzBuffer.length > ZZ_BUFFERSIZE) {
- zzBuffer = new char[ZZ_BUFFERSIZE];
- }
- yyreset(r);
-}
-
-/**
- * Fills Lucene token with the current token text.
- */
-final void getText(Token t) {
- t.setTermBuffer(zzBuffer, zzStartRead, zzMarkedPos-zzStartRead);
-}
-
-/**
- * Fills TermAttribute with the current token text.
- */
-final void getText(TermAttribute t) {
- t.setTermBuffer(zzBuffer, zzStartRead, zzMarkedPos-zzStartRead);
-}
-
-%}
-
-THAI = [\u0E00-\u0E59]
-
-// basic word: a sequence of digits & letters (includes Thai to enable ThaiAnalyzer to function)
-ALPHANUM = ({LETTER}|{THAI}|[:digit:])+
-
-// internal apostrophes: O'Reilly, you're, O'Reilly's
-// use a post-filter to remove possessives
-APOSTROPHE = {ALPHA} ("'" {ALPHA})+
-
-// acronyms: U.S.A., I.B.M., etc.
-// use a post-filter to remove dots
-ACRONYM = {LETTER} "." ({LETTER} ".")+
-
-ACRONYM_DEP = {ALPHANUM} "." ({ALPHANUM} ".")+
-
-// company names like AT&T and Excite@Home.
-COMPANY = {ALPHA} ("&"|"@") {ALPHA}
-
-// email addresses
-EMAIL = {ALPHANUM} (("."|"-"|"_") {ALPHANUM})* "@" {ALPHANUM} (("."|"-") {ALPHANUM})+
-
-// hostname
-HOST = {ALPHANUM} ((".") {ALPHANUM})+
-
-// floating point, serial, model numbers, ip addresses, etc.
-// every other segment must have at least one digit
-NUM = ({ALPHANUM} {P} {HAS_DIGIT}
- | {HAS_DIGIT} {P} {ALPHANUM}
- | {ALPHANUM} ({P} {HAS_DIGIT} {P} {ALPHANUM})+
- | {HAS_DIGIT} ({P} {ALPHANUM} {P} {HAS_DIGIT})+
- | {ALPHANUM} {P} {HAS_DIGIT} ({P} {ALPHANUM} {P} {HAS_DIGIT})+
- | {HAS_DIGIT} {P} {ALPHANUM} ({P} {HAS_DIGIT} {P} {ALPHANUM})+)
-
-// punctuation
-P = ("_"|"-"|"/"|"."|",")
-
-// at least one digit
-HAS_DIGIT = ({LETTER}|[:digit:])* [:digit:] ({LETTER}|[:digit:])*
-
-ALPHA = ({LETTER})+
-
-// From the JFlex manual: "the expression that matches everything of <a> not matched by <b> is !(!<a>|<b>)"
-LETTER = !(![:letter:]|{CJ})
-
-// Chinese and Japanese (but NOT Korean, which is included in [:letter:])
-CJ = [\u3100-\u312f\u3040-\u309F\u30A0-\u30FF\u31F0-\u31FF\u3300-\u337f\u3400-\u4dbf\u4e00-\u9fff\uf900-\ufaff\uff65-\uff9f]
-
-WHITESPACE = \r\n | [ \r\n\t\f]
-
-%%
-
-{ALPHANUM} { return ALPHANUM; }
-{APOSTROPHE} { return APOSTROPHE; }
-{ACRONYM} { return ACRONYM; }
-{COMPANY} { return COMPANY; }
-{EMAIL} { return EMAIL; }
-{HOST} { return HOST; }
-{NUM} { return NUM; }
-{CJ} { return CJ; }
-{ACRONYM_DEP} { return ACRONYM_DEP; }
-
-/** Ignore the rest */
-. | {WHITESPACE} { /* ignore */ }
+package org.apache.lucene.analysis.standard;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+
+WARNING: if you change StandardTokenizerImpl.jflex and need to regenerate
+ the tokenizer, only use Java 1.4 !!!
+ This grammar currently uses constructs (eg :digit:, :letter:) whose
+ meaning can vary according to the JRE used to run jflex. See
+ https://issues.apache.org/jira/browse/LUCENE-1126 for details.
+ For current backwards compatibility it is needed to support
+ only Java 1.4 - this will change in Lucene 3.1.
+
+*/
+
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+
+%%
+
+%class StandardTokenizerImpl
+%unicode
+%integer
+%function getNextToken
+%pack
+%char
+
+%{
+
+public static final int ALPHANUM = StandardTokenizer.ALPHANUM;
+public static final int APOSTROPHE = StandardTokenizer.APOSTROPHE;
+public static final int ACRONYM = StandardTokenizer.ACRONYM;
+public static final int COMPANY = StandardTokenizer.COMPANY;
+public static final int EMAIL = StandardTokenizer.EMAIL;
+public static final int HOST = StandardTokenizer.HOST;
+public static final int NUM = StandardTokenizer.NUM;
+public static final int CJ = StandardTokenizer.CJ;
+/**
+ * @deprecated this solves a bug where HOSTs that end with '.' are identified
+ * as ACRONYMs.
+ */
+public static final int ACRONYM_DEP = StandardTokenizer.ACRONYM_DEP;
+
+public static final String [] TOKEN_TYPES = StandardTokenizer.TOKEN_TYPES;
+
+public final int yychar()
+{
+ return yychar;
+}
+
+/**
+ * Resets the Tokenizer to a new Reader.
+ */
+final void reset(java.io.Reader r) {
+ // reset to default buffer size, if buffer has grown
+ if (zzBuffer.length > ZZ_BUFFERSIZE) {
+ zzBuffer = new char[ZZ_BUFFERSIZE];
+ }
+ yyreset(r);
+}
+
+/**
+ * Fills Lucene token with the current token text.
+ */
+final void getText(Token t) {
+ t.setTermBuffer(zzBuffer, zzStartRead, zzMarkedPos-zzStartRead);
+}
+
+/**
+ * Fills TermAttribute with the current token text.
+ */
+final void getText(TermAttribute t) {
+ t.setTermBuffer(zzBuffer, zzStartRead, zzMarkedPos-zzStartRead);
+}
+
+%}
+
+THAI = [\u0E00-\u0E59]
+
+// basic word: a sequence of digits & letters (includes Thai to enable ThaiAnalyzer to function)
+ALPHANUM = ({LETTER}|{THAI}|[:digit:])+
+
+// internal apostrophes: O'Reilly, you're, O'Reilly's
+// use a post-filter to remove possessives
+APOSTROPHE = {ALPHA} ("'" {ALPHA})+
+
+// acronyms: U.S.A., I.B.M., etc.
+// use a post-filter to remove dots
+ACRONYM = {LETTER} "." ({LETTER} ".")+
+
+ACRONYM_DEP = {ALPHANUM} "." ({ALPHANUM} ".")+
+
+// company names like AT&T and Excite@Home.
+COMPANY = {ALPHA} ("&"|"@") {ALPHA}
+
+// email addresses
+EMAIL = {ALPHANUM} (("."|"-"|"_") {ALPHANUM})* "@" {ALPHANUM} (("."|"-") {ALPHANUM})+
+
+// hostname
+HOST = {ALPHANUM} ((".") {ALPHANUM})+
+
+// floating point, serial, model numbers, ip addresses, etc.
+// every other segment must have at least one digit
+NUM = ({ALPHANUM} {P} {HAS_DIGIT}
+ | {HAS_DIGIT} {P} {ALPHANUM}
+ | {ALPHANUM} ({P} {HAS_DIGIT} {P} {ALPHANUM})+
+ | {HAS_DIGIT} ({P} {ALPHANUM} {P} {HAS_DIGIT})+
+ | {ALPHANUM} {P} {HAS_DIGIT} ({P} {ALPHANUM} {P} {HAS_DIGIT})+
+ | {HAS_DIGIT} {P} {ALPHANUM} ({P} {HAS_DIGIT} {P} {ALPHANUM})+)
+
+// punctuation
+P = ("_"|"-"|"/"|"."|",")
+
+// at least one digit
+HAS_DIGIT = ({LETTER}|[:digit:])* [:digit:] ({LETTER}|[:digit:])*
+
+ALPHA = ({LETTER})+
+
+// From the JFlex manual: "the expression that matches everything of <a> not matched by <b> is !(!<a>|<b>)"
+LETTER = !(![:letter:]|{CJ})
+
+// Chinese and Japanese (but NOT Korean, which is included in [:letter:])
+CJ = [\u3100-\u312f\u3040-\u309F\u30A0-\u30FF\u31F0-\u31FF\u3300-\u337f\u3400-\u4dbf\u4e00-\u9fff\uf900-\ufaff\uff65-\uff9f]
+
+WHITESPACE = \r\n | [ \r\n\t\f]
+
+%%
+
+{ALPHANUM} { return ALPHANUM; }
+{APOSTROPHE} { return APOSTROPHE; }
+{ACRONYM} { return ACRONYM; }
+{COMPANY} { return COMPANY; }
+{EMAIL} { return EMAIL; }
+{HOST} { return HOST; }
+{NUM} { return NUM; }
+{CJ} { return CJ; }
+{ACRONYM_DEP} { return ACRONYM_DEP; }
+
+/** Ignore the rest */
+. | {WHITESPACE} { /* ignore */ }
Modified: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/core/Analysis/StopAnalyzer.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/core/Analysis/StopAnalyzer.cs?rev=1201243&r1=1201242&r2=1201243&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/core/Analysis/StopAnalyzer.cs (original)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/core/Analysis/StopAnalyzer.cs Sat Nov 12 10:44:21 2011
@@ -16,7 +16,7 @@
*/
using System;
-
+using System.Collections.Generic;
using Version = Lucene.Net.Util.Version;
namespace Lucene.Net.Analysis
@@ -36,157 +36,28 @@ namespace Lucene.Net.Analysis
public sealed class StopAnalyzer:Analyzer
{
- private System.Collections.Hashtable stopWords;
- // @deprecated
- [Obsolete]
- private bool useDefaultStopPositionIncrement;
+ private ISet<string> stopWords;
private bool enablePositionIncrements;
-
- /// <summary>An array containing some common English words that are not usually useful
- /// for searching.
- /// </summary>
- /// <deprecated> Use <see cref="ENGLISH_STOP_WORDS_SET" /> instead
- /// </deprecated>
- [Obsolete("Use ENGLISH_STOP_WORDS_SET instead ")]
- public static readonly System.String[] ENGLISH_STOP_WORDS = new System.String[]{"a", "an", "and", "are", "as", "at", "be", "but", "by", "for", "if", "in", "into", "is", "it", "no", "not", "of", "on", "or", "such", "that", "the", "their", "then", "there", "these", "they", "this", "to", "was", "will", "with"};
-
+
/// <summary>An unmodifiable set containing some common English words that are not usually useful
/// for searching.
/// </summary>
- public static System.Collections.Hashtable ENGLISH_STOP_WORDS_SET;
-
- /// <summary>Builds an analyzer which removes words in
- /// ENGLISH_STOP_WORDS.
- /// </summary>
- /// <deprecated> Use <see cref="StopAnalyzer(Version)" /> instead
- /// </deprecated>
- [Obsolete("Use StopAnalyzer(Version) instead")]
- public StopAnalyzer()
- {
- stopWords = ENGLISH_STOP_WORDS_SET;
- useDefaultStopPositionIncrement = true;
- enablePositionIncrements = false;
- }
+ public static ISet<string> ENGLISH_STOP_WORDS_SET;
/// <summary> Builds an analyzer which removes words in ENGLISH_STOP_WORDS.</summary>
public StopAnalyzer(Version matchVersion)
{
stopWords = ENGLISH_STOP_WORDS_SET;
- useDefaultStopPositionIncrement = false;
enablePositionIncrements = StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion);
}
-
- /// <summary>Builds an analyzer which removes words in
- /// ENGLISH_STOP_WORDS.
- /// </summary>
- /// <param name="enablePositionIncrements">
- /// See <see cref="StopFilter.SetEnablePositionIncrements" />
- /// </param>
- /// <deprecated> Use <see cref="StopAnalyzer(Version)" /> instead
- /// </deprecated>
- [Obsolete("Use StopAnalyzer(Version) instead")]
- public StopAnalyzer(bool enablePositionIncrements)
- {
- stopWords = ENGLISH_STOP_WORDS_SET;
- this.enablePositionIncrements = enablePositionIncrements;
- useDefaultStopPositionIncrement = false;
- }
-
- /// <summary>Builds an analyzer with the stop words from the given set.</summary>
- /// <deprecated> Use <see cref="StopAnalyzer(Version, System.Collections.Hashtable)" /> instead
- /// </deprecated>
- [Obsolete("Use StopAnalyzer(Version, System.Collections.Hashtable) instead")]
- public StopAnalyzer(System.Collections.Hashtable stopWords)
- {
- this.stopWords = stopWords;
- useDefaultStopPositionIncrement = true;
- enablePositionIncrements = false;
- }
-
+
/// <summary>Builds an analyzer with the stop words from the given set.</summary>
- public StopAnalyzer(Version matchVersion, System.Collections.Hashtable stopWords)
+ public StopAnalyzer(Version matchVersion, ISet<string> stopWords)
{
this.stopWords = stopWords;
- useDefaultStopPositionIncrement = false;
enablePositionIncrements = StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion);
}
- /// <summary>Builds an analyzer with the stop words from the given set.</summary>
- /// <param name="stopWords">Set of stop words
- /// </param>
- /// <param name="enablePositionIncrements">
- /// See <see cref="StopFilter.SetEnablePositionIncrements" />
- /// </param>
- /// <deprecated> Use <see cref="StopAnalyzer(Version, System.Collections.Hashtable)" /> instead
- /// </deprecated>
- [Obsolete("Use StopAnalyzer(Version, System.Collections.Hashtable) instead")]
- public StopAnalyzer(System.Collections.Hashtable stopWords, bool enablePositionIncrements)
- {
- this.stopWords = stopWords;
- this.enablePositionIncrements = enablePositionIncrements;
- useDefaultStopPositionIncrement = false;
- }
-
- /// <summary>Builds an analyzer which removes words in the provided array.</summary>
- /// <deprecated> Use <see cref="StopAnalyzer(System.Collections.Hashtable, bool)" /> instead
- /// </deprecated>
- /// <deprecated> Use <see cref="StopAnalyzer(Version, System.Collections.Hashtable)" /> instead
- /// </deprecated>
- [Obsolete("Use StopAnalyzer(System.Collections.Hashtable, boolean) or StopAnalyzer(Version, System.Collections.Hashtable) instead ")]
- public StopAnalyzer(System.String[] stopWords)
- {
- this.stopWords = StopFilter.MakeStopSet(stopWords);
- useDefaultStopPositionIncrement = true;
- enablePositionIncrements = false;
- }
-
- /// <summary>Builds an analyzer which removes words in the provided array.</summary>
- /// <param name="stopWords">Array of stop words
- /// </param>
- /// <param name="enablePositionIncrements">
- /// See <see cref="StopFilter.SetEnablePositionIncrements" />
- /// </param>
- /// <deprecated> Use <see cref="StopAnalyzer(Version, System.Collections.Hashtable)" /> instead
- /// </deprecated>
- [Obsolete("Use StopAnalyzer(Version, Set) instead")]
- public StopAnalyzer(System.String[] stopWords, bool enablePositionIncrements)
- {
- this.stopWords = StopFilter.MakeStopSet(stopWords);
- this.enablePositionIncrements = enablePositionIncrements;
- useDefaultStopPositionIncrement = false;
- }
-
- /// <summary>Builds an analyzer with the stop words from the given file.</summary>
- /// <seealso cref="WordlistLoader.GetWordSet(System.IO.FileInfo)">
- /// </seealso>
- /// <deprecated> Use <see cref="StopAnalyzer(Version, System.IO.FileInfo)" /> instead
- /// </deprecated>
- [Obsolete("Use StopAnalyzer(Version, File) instead")]
- public StopAnalyzer(System.IO.FileInfo stopwordsFile)
- {
- stopWords = WordlistLoader.GetWordSet(stopwordsFile);
- useDefaultStopPositionIncrement = true;
- enablePositionIncrements = false;
- }
-
- /// <summary>Builds an analyzer with the stop words from the given file.</summary>
- /// <seealso cref="WordlistLoader.GetWordSet(System.IO.FileInfo)">
- /// </seealso>
- /// <param name="stopwordsFile">File to load stop words from
- /// </param>
- /// <param name="enablePositionIncrements">
- /// See <see cref="StopFilter.SetEnablePositionIncrements" />
- /// </param>
- /// <deprecated> Use <see cref="StopAnalyzer(Version, System.IO.FileInfo)" /> instead
- /// </deprecated>
- [Obsolete("Use StopAnalyzer(Version, File) instead")]
- public StopAnalyzer(System.IO.FileInfo stopwordsFile, bool enablePositionIncrements)
- {
- stopWords = WordlistLoader.GetWordSet(stopwordsFile);
- this.enablePositionIncrements = enablePositionIncrements;
- useDefaultStopPositionIncrement = false;
- }
-
/// <summary> Builds an analyzer with the stop words from the given file.
///
/// </summary>
@@ -200,38 +71,6 @@ namespace Lucene.Net.Analysis
{
stopWords = WordlistLoader.GetWordSet(stopwordsFile);
this.enablePositionIncrements = StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion);
- useDefaultStopPositionIncrement = false;
- }
-
- /// <summary>Builds an analyzer with the stop words from the given reader.</summary>
- /// <seealso cref="WordlistLoader.GetWordSet(System.IO.TextReader)">
- /// </seealso>
- /// <deprecated> Use <see cref="StopAnalyzer(Version, System.IO.TextReader)" /> instead
- /// </deprecated>
- [Obsolete("Use StopAnalyzer(Version, Reader) instead")]
- public StopAnalyzer(System.IO.TextReader stopwords)
- {
- stopWords = WordlistLoader.GetWordSet(stopwords);
- useDefaultStopPositionIncrement = true;
- enablePositionIncrements = false;
- }
-
- /// <summary>Builds an analyzer with the stop words from the given reader.</summary>
- /// <seealso cref="WordlistLoader.GetWordSet(System.IO.TextReader)">
- /// </seealso>
- /// <param name="stopwords">Reader to load stop words from
- /// </param>
- /// <param name="enablePositionIncrements">
- /// See <see cref="StopFilter.SetEnablePositionIncrements" />
- /// </param>
- /// <deprecated> Use <see cref="StopAnalyzer(Version, System.IO.TextReader)" /> instead
- /// </deprecated>
- [Obsolete("Use StopAnalyzer(Version, Reader) instead")]
- public StopAnalyzer(System.IO.TextReader stopwords, bool enablePositionIncrements)
- {
- stopWords = WordlistLoader.GetWordSet(stopwords);
- this.enablePositionIncrements = enablePositionIncrements;
- useDefaultStopPositionIncrement = false;
}
/// <summary>Builds an analyzer with the stop words from the given reader. </summary>
@@ -245,20 +84,12 @@ namespace Lucene.Net.Analysis
{
stopWords = WordlistLoader.GetWordSet(stopwords);
this.enablePositionIncrements = StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion);
- useDefaultStopPositionIncrement = false;
}
/// <summary>Filters LowerCaseTokenizer with StopFilter. </summary>
public override TokenStream TokenStream(System.String fieldName, System.IO.TextReader reader)
{
- if (useDefaultStopPositionIncrement)
- {
- return new StopFilter(new LowerCaseTokenizer(reader), stopWords);
- }
- else
- {
- return new StopFilter(enablePositionIncrements, new LowerCaseTokenizer(reader), stopWords);
- }
+ return new StopFilter(enablePositionIncrements, new LowerCaseTokenizer(reader), stopWords);
}
/// <summary>Filters LowerCaseTokenizer with StopFilter. </summary>
@@ -292,14 +123,7 @@ namespace Lucene.Net.Analysis
{
streams = new SavedStreams(this);
streams.source = new LowerCaseTokenizer(reader);
- if (useDefaultStopPositionIncrement)
- {
- streams.result = new StopFilter(streams.source, stopWords);
- }
- else
- {
- streams.result = new StopFilter(enablePositionIncrements, streams.source, stopWords);
- }
+ streams.result = new StopFilter(enablePositionIncrements, streams.source, stopWords);
SetPreviousTokenStream(streams);
}
else
@@ -311,7 +135,7 @@ namespace Lucene.Net.Analysis
{
System.String[] stopWords = new System.String[]{"a", "an", "and", "are", "as", "at", "be", "but", "by", "for", "if", "in", "into", "is", "it", "no", "not", "of", "on", "or", "such", "that", "the", "their", "then", "there", "these", "they", "this", "to", "was", "will", "with"};
CharArraySet stopSet = new CharArraySet(stopWords.Length, false);
- stopSet.AddAll(new System.Collections.ArrayList(stopWords));
+ stopSet.AddAll(stopWords);
ENGLISH_STOP_WORDS_SET = CharArraySet.UnmodifiableSet(stopSet);
}
}
Modified: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/core/Analysis/StopFilter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/core/Analysis/StopFilter.cs?rev=1201243&r1=1201242&r2=1201243&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/core/Analysis/StopFilter.cs (original)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/core/Analysis/StopFilter.cs Sat Nov 12 10:44:21 2011
@@ -16,6 +16,7 @@
*/
using System;
+using System.Collections.Generic;
using Lucene.Net.Util;
using PositionIncrementAttribute = Lucene.Net.Analysis.Tokenattributes.PositionIncrementAttribute;
using TermAttribute = Lucene.Net.Analysis.Tokenattributes.TermAttribute;
@@ -29,71 +30,12 @@ namespace Lucene.Net.Analysis
public sealed class StopFilter:TokenFilter
{
-
- // deprecated
- [Obsolete]
- private static bool ENABLE_POSITION_INCREMENTS_DEFAULT = false;
-
private CharArraySet stopWords;
- private bool enablePositionIncrements = ENABLE_POSITION_INCREMENTS_DEFAULT;
+ private bool enablePositionIncrements = false;
private TermAttribute termAtt;
private PositionIncrementAttribute posIncrAtt;
- /// <summary> Construct a token stream filtering the given input.</summary>
- /// <deprecated> Use <see cref="StopFilter(bool, TokenStream, String[])" /> instead
- /// </deprecated>
- [Obsolete("Use StopFilter(bool, TokenStream, String[]) instead")]
- public StopFilter(TokenStream input, System.String[] stopWords):this(ENABLE_POSITION_INCREMENTS_DEFAULT, input, stopWords, false)
- {
- }
-
- /// <summary> Construct a token stream filtering the given input.</summary>
- /// <param name="enablePositionIncrements">true if token positions should record the removed stop words
- /// </param>
- /// <param name="input">input TokenStream
- /// </param>
- /// <param name="stopWords">array of stop words
- /// </param>
- /// <deprecated> Use <see cref="StopFilter(bool, TokenStream, System.Collections.Hashtable)" /> instead.
- /// </deprecated>
- [Obsolete("Use StopFilter(bool, TokenStream, Hashtable) instead.")]
- public StopFilter(bool enablePositionIncrements, TokenStream input, System.String[] stopWords):this(enablePositionIncrements, input, stopWords, false)
- {
- }
-
- /// <summary> Constructs a filter which removes words from the input
- /// TokenStream that are named in the array of words.
- /// </summary>
- /// <deprecated> Use <see cref="StopFilter(bool, TokenStream, String[], bool)" /> instead
- /// </deprecated>
- [Obsolete("Use StopFilter(bool, TokenStream, String[], bool) instead")]
- public StopFilter(TokenStream in_Renamed, System.String[] stopWords, bool ignoreCase):this(ENABLE_POSITION_INCREMENTS_DEFAULT, in_Renamed, stopWords, ignoreCase)
- {
- }
-
- /// <summary> Constructs a filter which removes words from the input
- /// TokenStream that are named in the array of words.
- /// </summary>
- /// <param name="enablePositionIncrements">true if token positions should record the removed stop words
- /// </param>
- /// <param name="in_Renamed">input TokenStream
- /// </param>
- /// <param name="stopWords">array of stop words
- /// </param>
- /// <param name="ignoreCase">true if case is ignored
- /// </param>
- /// <deprecated> Use <see cref="StopFilter(bool, TokenStream, System.Collections.Hashtable, bool)" /> instead.
- /// </deprecated>
- [Obsolete("Use StopFilter(bool, TokenStream, Hashtable, bool) instead.")]
- public StopFilter(bool enablePositionIncrements, TokenStream in_Renamed, System.String[] stopWords, bool ignoreCase):base(in_Renamed)
- {
- this.stopWords = (CharArraySet) MakeStopSet(stopWords, ignoreCase);
- this.enablePositionIncrements = enablePositionIncrements;
- Init();
- }
-
-
/// <summary> Construct a token stream filtering the given input.
/// If <c>stopWords</c> is an instance of <see cref="CharArraySet" /> (true if
/// <c>makeStopSet()</c> was used to construct the set) it will be directly used
@@ -103,89 +45,41 @@ namespace Lucene.Net.Analysis
/// If <c>stopWords</c> is not an instance of <see cref="CharArraySet" />,
/// a new CharArraySet will be constructed and <c>ignoreCase</c> will be
/// used to specify the case sensitivity of that set.
- ///
- /// </summary>
- /// <param name="input">
- /// </param>
- /// <param name="stopWords">The set of Stop Words.
- /// </param>
- /// <param name="ignoreCase">-Ignore case when stopping.
- /// </param>
- /// <deprecated> Use <see cref="StopFilter(bool, TokenStream, System.Collections.Hashtable, bool)" /> instead
- /// </deprecated>
- [Obsolete("Use StopFilter(bool, TokenStream, Set, bool) instead")]
- public StopFilter(TokenStream input, System.Collections.Hashtable stopWords, bool ignoreCase):this(ENABLE_POSITION_INCREMENTS_DEFAULT, input, stopWords, ignoreCase)
- {
- }
-
- /// <summary> Construct a token stream filtering the given input.
- /// If <c>stopWords</c> is an instance of <see cref="CharArraySet" /> (true if
- /// <c>makeStopSet()</c> was used to construct the set) it will be directly used
- /// and <c>ignoreCase</c> will be ignored since <c>CharArraySet</c>
- /// directly controls case sensitivity.
- /// <p/>
- /// If <c>stopWords</c> is not an instance of <see cref="CharArraySet" />,
- /// a new CharArraySet will be constructed and <c>ignoreCase</c> will be
- /// used to specify the case sensitivity of that set.
- ///
- /// </summary>
- /// <param name="enablePositionIncrements">true if token positions should record the removed stop words
- /// </param>
- /// <param name="input">Input TokenStream
- /// </param>
- /// <param name="stopWords">The set of Stop Words.
- /// </param>
- /// <param name="ignoreCase">-Ignore case when stopping.
- /// </param>
- public StopFilter(bool enablePositionIncrements, TokenStream input, System.Collections.Hashtable stopWords, bool ignoreCase):base(input)
- {
- if (stopWords is CharArraySet)
- {
- this.stopWords = (CharArraySet) stopWords;
- }
- else
- {
- this.stopWords = new CharArraySet(stopWords.Count, ignoreCase);
- this.stopWords.Add(stopWords);
- }
- this.enablePositionIncrements = enablePositionIncrements;
- Init();
- }
-
- /// <summary> Constructs a filter which removes words from the input
- /// TokenStream that are named in the Set.
- ///
/// </summary>
- /// <seealso cref="MakeStopSet(String[])">
- /// </seealso>
- /// <deprecated> Use <see cref="StopFilter(bool, TokenStream, System.Collections.Hashtable)" /> instead
- /// </deprecated>
- [Obsolete("Use StopFilter(bool, TokenStream, Hashtable) instead")]
- public StopFilter(TokenStream in_Renamed, System.Collections.Hashtable stopWords):this(ENABLE_POSITION_INCREMENTS_DEFAULT, in_Renamed, stopWords, false)
- {
+ /// <param name="enablePositionIncrements">true if token positions should record the removed stop words</param>
+ /// <param name="input">Input TokenStream</param>
+ /// <param name="stopWords">A Set of strings or strings or char[] or any other ToString()-able set representing the stopwords</param>
+ /// <param name="ignoreCase">if true, all words are lower cased first</param>
+ public StopFilter(bool enablePositionIncrements, TokenStream input, ISet<string> stopWords, bool ignoreCase)
+ : base(input)
+ {
+ if (stopWords is CharArraySet)
+ {
+ this.stopWords = (CharArraySet) stopWords;
+ }
+ else
+ {
+ this.stopWords = new CharArraySet(stopWords.Count, ignoreCase);
+ foreach (var word in stopWords)
+ {
+ this.stopWords.Add(word);
+ }
+ }
+ this.enablePositionIncrements = enablePositionIncrements;
+ termAtt = AddAttribute<TermAttribute>();
+ posIncrAtt = AddAttribute<PositionIncrementAttribute>();
}
-
- /// <summary> Constructs a filter which removes words from the input
+
+ /// <summary> Constructs a filter which removes words from the input
/// TokenStream that are named in the Set.
- ///
/// </summary>
- /// <param name="enablePositionIncrements">true if token positions should record the removed stop words
- /// </param>
- /// <param name="in_Renamed">Input stream
- /// </param>
- /// <param name="stopWords">The set of Stop Words.
- /// </param>
- /// <seealso cref="MakeStopSet(String[])">
- /// </seealso>
- public StopFilter(bool enablePositionIncrements, TokenStream in_Renamed, System.Collections.Hashtable stopWords):this(enablePositionIncrements, in_Renamed, stopWords, false)
- {
- }
-
- public void Init()
- {
- termAtt = (TermAttribute) AddAttribute(typeof(TermAttribute));
- posIncrAtt = (PositionIncrementAttribute) AddAttribute(typeof(PositionIncrementAttribute));
- }
+ /// <param name="enablePositionIncrements">true if token positions should record the removed stop words</param>
+ /// <param name="in_Renamed">Input stream</param>
+ /// <param name="stopWords">A Set of strings or char[] or any other ToString()-able set representing the stopwords</param>
+ /// <seealso cref="MakeStopSet(String[])"/>
+ public StopFilter(bool enablePositionIncrements, TokenStream in_Renamed, ISet<string> stopWords)
+ : this(enablePositionIncrements, in_Renamed, stopWords, false)
+ { }
/// <summary> Builds a Set from an array of stop words,
/// appropriate for passing into the StopFilter constructor.
@@ -195,7 +89,7 @@ namespace Lucene.Net.Analysis
/// </summary>
/// <seealso cref="MakeStopSet(String[], bool)"> passing false to ignoreCase
/// </seealso>
- public static System.Collections.Hashtable MakeStopSet(System.String[] stopWords)
+ public static ISet<string> MakeStopSet(params string[] stopWords)
{
return MakeStopSet(stopWords, false);
}
@@ -204,40 +98,35 @@ namespace Lucene.Net.Analysis
/// appropriate for passing into the StopFilter constructor.
/// This permits this stopWords construction to be cached once when
/// an Analyzer is constructed.
- ///
/// </summary>
- /// <seealso cref="MakeStopSet(String[], bool)"> passing false to ignoreCase
- /// </seealso>
- public static System.Collections.Hashtable MakeStopSet(System.Collections.IList stopWords)
+ /// <param name="stopWords">A list of strings or char[] or any other ToString()-able list representing the stop words</param>
+ /// <seealso cref="MakeStopSet(String[], bool)">passing false to ignoreCase</seealso>
+ public static ISet<string> MakeStopSet(IList<object> stopWords)
{
return MakeStopSet(stopWords, false);
}
- /// <summary> </summary>
- /// <param name="stopWords">An array of stopwords
- /// </param>
- /// <param name="ignoreCase">If true, all words are lower cased first.
- /// </param>
- /// <returns> a Set containing the words
- /// </returns>
- public static System.Collections.Hashtable MakeStopSet(System.String[] stopWords, bool ignoreCase)
+ /// <summary></summary>
+ /// <param name="stopWords">An array of stopwords</param>
+ /// <param name="ignoreCase">If true, all words are lower cased first.</param>
+ /// <returns> a Set containing the words</returns>
+ public static ISet<string> MakeStopSet(string[] stopWords, bool ignoreCase)
{
CharArraySet stopSet = new CharArraySet(stopWords.Length, ignoreCase);
- stopSet.AddAll(new System.Collections.ArrayList(stopWords));
+ foreach(var s in stopWords)
+ stopSet.Add(s);
return stopSet;
}
/// <summary> </summary>
- /// <param name="stopWords">A List of Strings representing the stopwords
- /// </param>
- /// <param name="ignoreCase">if true, all words are lower cased first
- /// </param>
- /// <returns> A Set containing the words
- /// </returns>
- public static System.Collections.Hashtable MakeStopSet(System.Collections.IList stopWords, bool ignoreCase)
+ /// <param name="stopWords">A List of Strings or char[] or any other toString()-able list representing the stopwords </param>
+ /// <param name="ignoreCase">if true, all words are lower cased first</param>
+ /// <returns>A Set (<see cref="CharArraySet"/>)containing the words</returns>
+ public static ISet<string> MakeStopSet(IList<object> stopWords, bool ignoreCase)
{
CharArraySet stopSet = new CharArraySet(stopWords.Count, ignoreCase);
- stopSet.AddAll(stopWords);
+ foreach(var word in stopWords)
+ stopSet.Add(word.ToString());
return stopSet;
}
@@ -258,57 +147,17 @@ namespace Lucene.Net.Analysis
}
skippedPositions += posIncrAtt.GetPositionIncrement();
}
- // reached EOS -- return null
+ // reached EOS -- return false
return false;
}
- /// <seealso cref="SetEnablePositionIncrementsDefault(bool)">
- /// </seealso>
- /// <deprecated> Please specify this when you create the StopFilter
- /// </deprecated>
- [Obsolete("Please specify this when you create the StopFilter")]
- public static bool GetEnablePositionIncrementsDefault()
- {
- return ENABLE_POSITION_INCREMENTS_DEFAULT;
- }
-
/// <summary> Returns version-dependent default for enablePositionIncrements. Analyzers
/// that embed StopFilter use this method when creating the StopFilter. Prior
- /// to 2.9, this returns <see cref="GetEnablePositionIncrementsDefault" />. On 2.9
- /// or later, it returns true.
+ /// to 2.9, this returns false. On 2.9 or later, it returns true.
/// </summary>
public static bool GetEnablePositionIncrementsVersionDefault(Version matchVersion)
{
- if (matchVersion.OnOrAfter(Version.LUCENE_29))
- {
- return true;
- }
- else
- {
- return ENABLE_POSITION_INCREMENTS_DEFAULT;
- }
- }
-
- /// <summary> Set the default position increments behavior of every StopFilter created
- /// from now on.
- /// <p/>
- /// Note: behavior of a single StopFilter instance can be modified with
- /// <see cref="SetEnablePositionIncrements(bool)" />. This static method allows
- /// control over behavior of classes using StopFilters internally, for
- /// example <see cref="Lucene.Net.Analysis.Standard.StandardAnalyzer"/>
- /// if used with the no-arg ctor.
- /// <p/>
- /// Default : false.
- ///
- /// </summary>
- /// <seealso cref="SetEnablePositionIncrements(bool)">
- /// </seealso>
- /// <deprecated> Please specify this when you create the StopFilter
- /// </deprecated>
- [Obsolete("Please specify this when you create the StopFilter")]
- public static void SetEnablePositionIncrementsDefault(bool defaultValue)
- {
- ENABLE_POSITION_INCREMENTS_DEFAULT = defaultValue;
+ return matchVersion.OnOrAfter(Version.LUCENE_29);
}
/// <seealso cref="SetEnablePositionIncrements(bool)">
Modified: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/core/Analysis/TeeSinkTokenFilter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/core/Analysis/TeeSinkTokenFilter.cs?rev=1201243&r1=1201242&r2=1201243&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/core/Analysis/TeeSinkTokenFilter.cs (original)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/core/Analysis/TeeSinkTokenFilter.cs Sat Nov 12 10:44:21 2011
@@ -16,7 +16,7 @@
*/
using System;
-
+using System.Collections.Generic;
using AttributeImpl = Lucene.Net.Util.AttributeImpl;
using AttributeSource = Lucene.Net.Util.AttributeSource;
@@ -74,7 +74,7 @@ namespace Lucene.Net.Analysis
return true;
}
}
- private System.Collections.IList sinks = new System.Collections.ArrayList();
+ private LinkedList<WeakReference> sinks = new LinkedList<WeakReference>();
/// <summary> Instantiates a new TeeSinkTokenFilter.</summary>
public TeeSinkTokenFilter(TokenStream input):base(input)
@@ -95,7 +95,7 @@ namespace Lucene.Net.Analysis
public SinkTokenStream NewSinkTokenStream(SinkFilter filter)
{
SinkTokenStream sink = new SinkTokenStream(this.CloneAttributes(), filter);
- this.sinks.Add(new System.WeakReference(sink));
+ this.sinks.AddLast(new System.WeakReference(sink));
return sink;
}
@@ -115,7 +115,7 @@ namespace Lucene.Net.Analysis
{
sink.AddAttributeImpl(impl);
}
- this.sinks.Add(new System.WeakReference(sink));
+ this.sinks.AddLast(new WeakReference(sink));
}
/// <summary> <c>TeeSinkTokenFilter</c> passes all tokens to the added sinks
@@ -125,8 +125,9 @@ namespace Lucene.Net.Analysis
/// </summary>
public void ConsumeAllTokens()
{
- while (IncrementToken())
- ;
+ while (IncrementToken())
+ {
+ }
}
public override bool IncrementToken()
@@ -135,9 +136,9 @@ namespace Lucene.Net.Analysis
{
// capture state lazily - maybe no SinkFilter accepts this state
AttributeSource.State state = null;
- for (System.Collections.IEnumerator it = sinks.GetEnumerator(); it.MoveNext(); )
+ foreach(WeakReference wr in sinks)
{
- SinkTokenStream sink = (SinkTokenStream) ((System.WeakReference) it.Current).Target;
+ SinkTokenStream sink = (SinkTokenStream)wr.Target;
if (sink != null)
{
if (sink.Accept(this))
@@ -160,9 +161,9 @@ namespace Lucene.Net.Analysis
{
base.End();
AttributeSource.State finalState = CaptureState();
- for (System.Collections.IEnumerator it = sinks.GetEnumerator(); it.MoveNext(); )
+ foreach(WeakReference wr in sinks)
{
- SinkTokenStream sink = (SinkTokenStream) ((System.WeakReference) it.Current).Target;
+ SinkTokenStream sink = (SinkTokenStream)wr.Target;
if (sink != null)
{
sink.SetFinalState(finalState);
@@ -189,12 +190,13 @@ namespace Lucene.Net.Analysis
public sealed class SinkTokenStream:TokenStream
{
- private System.Collections.IList cachedStates = new System.Collections.ArrayList();
+ private LinkedList<AttributeSource.State> cachedStates = new LinkedList<AttributeSource.State>();
private AttributeSource.State finalState;
- private System.Collections.IEnumerator it = null;
+ private IEnumerator<AttributeSource.State> it = null;
private SinkFilter filter;
- internal SinkTokenStream(AttributeSource source, SinkFilter filter):base(source)
+ internal SinkTokenStream(AttributeSource source, SinkFilter filter)
+ : base(source)
{
this.filter = filter;
}
@@ -210,7 +212,7 @@ namespace Lucene.Net.Analysis
{
throw new System.SystemException("The tee must be consumed before sinks are consumed.");
}
- cachedStates.Add(state);
+ cachedStates.AddLast(state);
}
internal /*private*/ void SetFinalState(AttributeSource.State finalState)
@@ -231,7 +233,7 @@ namespace Lucene.Net.Analysis
return false;
}
- AttributeSource.State state = (State) it.Current;
+ AttributeSource.State state = it.Current;
RestoreState(state);
return true;
}