You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by cc...@apache.org on 2013/04/03 19:40:10 UTC
[27/51] [partial] Mass convert mixed tabs to spaces
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/62f018ab/src/core/Analysis/NumericTokenStream.cs
----------------------------------------------------------------------
diff --git a/src/core/Analysis/NumericTokenStream.cs b/src/core/Analysis/NumericTokenStream.cs
index 90b6e72..cbcb2dc 100644
--- a/src/core/Analysis/NumericTokenStream.cs
+++ b/src/core/Analysis/NumericTokenStream.cs
@@ -24,247 +24,247 @@ using NumericField = Lucene.Net.Documents.NumericField;
namespace Lucene.Net.Analysis
{
-
- /// <summary> <b>Expert:</b> This class provides a <see cref="TokenStream" />
- /// for indexing numeric values that can be used by <see cref="NumericRangeQuery{T}" />
+
+ /// <summary> <b>Expert:</b> This class provides a <see cref="TokenStream" />
+ /// for indexing numeric values that can be used by <see cref="NumericRangeQuery{T}" />
/// or <see cref="NumericRangeFilter{T}" />.
- ///
- /// <p/>Note that for simple usage, <see cref="NumericField" /> is
- /// recommended. <see cref="NumericField" /> disables norms and
- /// term freqs, as they are not usually needed during
- /// searching. If you need to change these settings, you
- /// should use this class.
- ///
- /// <p/>See <see cref="NumericField" /> for capabilities of fields
- /// indexed numerically.<p/>
- ///
- /// <p/>Here's an example usage, for an <c>int</c> field:
- ///
- /// <code>
- /// Field field = new Field(name, new NumericTokenStream(precisionStep).setIntValue(value));
- /// field.setOmitNorms(true);
- /// field.setOmitTermFreqAndPositions(true);
- /// document.add(field);
- /// </code>
- ///
- /// <p/>For optimal performance, re-use the TokenStream and Field instance
- /// for more than one document:
- ///
- /// <code>
- /// NumericTokenStream stream = new NumericTokenStream(precisionStep);
- /// Field field = new Field(name, stream);
- /// field.setOmitNorms(true);
- /// field.setOmitTermFreqAndPositions(true);
- /// Document document = new Document();
- /// document.add(field);
- ///
- /// for(all documents) {
- /// stream.setIntValue(value)
- /// writer.addDocument(document);
- /// }
- /// </code>
- ///
- /// <p/>This stream is not intended to be used in analyzers;
- /// it's more for iterating the different precisions during
- /// indexing a specific numeric value.<p/>
- ///
- /// <p/><b>NOTE</b>: as token streams are only consumed once
- /// the document is added to the index, if you index more
- /// than one numeric field, use a separate <c>NumericTokenStream</c>
- /// instance for each.<p/>
- ///
+ ///
+ /// <p/>Note that for simple usage, <see cref="NumericField" /> is
+ /// recommended. <see cref="NumericField" /> disables norms and
+ /// term freqs, as they are not usually needed during
+ /// searching. If you need to change these settings, you
+ /// should use this class.
+ ///
+ /// <p/>See <see cref="NumericField" /> for capabilities of fields
+ /// indexed numerically.<p/>
+ ///
+ /// <p/>Here's an example usage, for an <c>int</c> field:
+ ///
+ /// <code>
+ /// Field field = new Field(name, new NumericTokenStream(precisionStep).setIntValue(value));
+ /// field.setOmitNorms(true);
+ /// field.setOmitTermFreqAndPositions(true);
+ /// document.add(field);
+ /// </code>
+ ///
+ /// <p/>For optimal performance, re-use the TokenStream and Field instance
+ /// for more than one document:
+ ///
+ /// <code>
+ /// NumericTokenStream stream = new NumericTokenStream(precisionStep);
+ /// Field field = new Field(name, stream);
+ /// field.setOmitNorms(true);
+ /// field.setOmitTermFreqAndPositions(true);
+ /// Document document = new Document();
+ /// document.add(field);
+ ///
+ /// for(all documents) {
+ /// stream.setIntValue(value)
+ /// writer.addDocument(document);
+ /// }
+ /// </code>
+ ///
+ /// <p/>This stream is not intended to be used in analyzers;
+ /// it's more for iterating the different precisions during
+ /// indexing a specific numeric value.<p/>
+ ///
+ /// <p/><b>NOTE</b>: as token streams are only consumed once
+ /// the document is added to the index, if you index more
+ /// than one numeric field, use a separate <c>NumericTokenStream</c>
+ /// instance for each.<p/>
+ ///
/// <p/>See <see cref="NumericRangeQuery{T}" /> for more details on the
- /// <a href="../search/NumericRangeQuery.html#precisionStepDesc"><c>precisionStep</c></a>
- /// parameter as well as how numeric fields work under the hood.<p/>
- ///
- /// <p/><font color="red"><b>NOTE:</b> This API is experimental and
- /// might change in incompatible ways in the next release.</font>
- /// Since 2.9
- /// </summary>
- public sealed class NumericTokenStream : TokenStream
- {
- private void InitBlock()
- {
+ /// <a href="../search/NumericRangeQuery.html#precisionStepDesc"><c>precisionStep</c></a>
+ /// parameter as well as how numeric fields work under the hood.<p/>
+ ///
+ /// <p/><font color="red"><b>NOTE:</b> This API is experimental and
+ /// might change in incompatible ways in the next release.</font>
+ /// Since 2.9
+ /// </summary>
+ public sealed class NumericTokenStream : TokenStream
+ {
+ private void InitBlock()
+ {
termAtt = AddAttribute<ITermAttribute>();
typeAtt = AddAttribute<ITypeAttribute>();
posIncrAtt = AddAttribute<IPositionIncrementAttribute>();
- }
-
- /// <summary>The full precision token gets this token type assigned. </summary>
- public const System.String TOKEN_TYPE_FULL_PREC = "fullPrecNumeric";
-
- /// <summary>The lower precision tokens gets this token type assigned. </summary>
- public const System.String TOKEN_TYPE_LOWER_PREC = "lowerPrecNumeric";
-
- /// <summary> Creates a token stream for numeric values using the default <c>precisionStep</c>
- /// <see cref="NumericUtils.PRECISION_STEP_DEFAULT" /> (4). The stream is not yet initialized,
- /// before using set a value using the various set<em>???</em>Value() methods.
- /// </summary>
- public NumericTokenStream():this(NumericUtils.PRECISION_STEP_DEFAULT)
- {
- }
-
- /// <summary> Creates a token stream for numeric values with the specified
- /// <c>precisionStep</c>. The stream is not yet initialized,
- /// before using set a value using the various set<em>???</em>Value() methods.
- /// </summary>
- public NumericTokenStream(int precisionStep):base()
- {
- InitBlock();
- this.precisionStep = precisionStep;
- if (precisionStep < 1)
- throw new System.ArgumentException("precisionStep must be >=1");
- }
-
- /// <summary> Expert: Creates a token stream for numeric values with the specified
- /// <c>precisionStep</c> using the given <see cref="AttributeSource" />.
- /// The stream is not yet initialized,
- /// before using set a value using the various set<em>???</em>Value() methods.
- /// </summary>
- public NumericTokenStream(AttributeSource source, int precisionStep):base(source)
- {
- InitBlock();
- this.precisionStep = precisionStep;
- if (precisionStep < 1)
- throw new System.ArgumentException("precisionStep must be >=1");
- }
-
- /// <summary> Expert: Creates a token stream for numeric values with the specified
- /// <c>precisionStep</c> using the given
- /// <see cref="Lucene.Net.Util.AttributeSource.AttributeFactory" />.
- /// The stream is not yet initialized,
- /// before using set a value using the various set<em>???</em>Value() methods.
- /// </summary>
- public NumericTokenStream(AttributeFactory factory, int precisionStep):base(factory)
- {
- InitBlock();
- this.precisionStep = precisionStep;
- if (precisionStep < 1)
- throw new System.ArgumentException("precisionStep must be >=1");
- }
-
- /// <summary> Initializes the token stream with the supplied <c>long</c> value.</summary>
- /// <param name="value_Renamed">the value, for which this TokenStream should enumerate tokens.
- /// </param>
- /// <returns> this instance, because of this you can use it the following way:
- /// <c>new Field(name, new NumericTokenStream(precisionStep).SetLongValue(value))</c>
- /// </returns>
- public NumericTokenStream SetLongValue(long value_Renamed)
- {
- this.value_Renamed = value_Renamed;
- valSize = 64;
- shift = 0;
- return this;
- }
-
- /// <summary> Initializes the token stream with the supplied <c>int</c> value.</summary>
- /// <param name="value_Renamed">the value, for which this TokenStream should enumerate tokens.
- /// </param>
- /// <returns> this instance, because of this you can use it the following way:
- /// <c>new Field(name, new NumericTokenStream(precisionStep).SetIntValue(value))</c>
- /// </returns>
- public NumericTokenStream SetIntValue(int value_Renamed)
- {
- this.value_Renamed = (long) value_Renamed;
- valSize = 32;
- shift = 0;
- return this;
- }
-
- /// <summary> Initializes the token stream with the supplied <c>double</c> value.</summary>
- /// <param name="value_Renamed">the value, for which this TokenStream should enumerate tokens.
- /// </param>
- /// <returns> this instance, because of this you can use it the following way:
- /// <c>new Field(name, new NumericTokenStream(precisionStep).SetDoubleValue(value))</c>
- /// </returns>
- public NumericTokenStream SetDoubleValue(double value_Renamed)
- {
- this.value_Renamed = NumericUtils.DoubleToSortableLong(value_Renamed);
- valSize = 64;
- shift = 0;
- return this;
- }
-
- /// <summary> Initializes the token stream with the supplied <c>float</c> value.</summary>
- /// <param name="value_Renamed">the value, for which this TokenStream should enumerate tokens.
- /// </param>
- /// <returns> this instance, because of this you can use it the following way:
- /// <c>new Field(name, new NumericTokenStream(precisionStep).SetFloatValue(value))</c>
- /// </returns>
- public NumericTokenStream SetFloatValue(float value_Renamed)
- {
- this.value_Renamed = (long) NumericUtils.FloatToSortableInt(value_Renamed);
- valSize = 32;
- shift = 0;
- return this;
- }
-
- // @Override
- public override void Reset()
- {
- if (valSize == 0)
- throw new System.SystemException("call set???Value() before usage");
- shift = 0;
- }
+ }
+
+ /// <summary>The full precision token gets this token type assigned. </summary>
+ public const System.String TOKEN_TYPE_FULL_PREC = "fullPrecNumeric";
+
+ /// <summary>The lower precision tokens gets this token type assigned. </summary>
+ public const System.String TOKEN_TYPE_LOWER_PREC = "lowerPrecNumeric";
+
+ /// <summary> Creates a token stream for numeric values using the default <c>precisionStep</c>
+ /// <see cref="NumericUtils.PRECISION_STEP_DEFAULT" /> (4). The stream is not yet initialized,
+ /// before using set a value using the various set<em>???</em>Value() methods.
+ /// </summary>
+ public NumericTokenStream():this(NumericUtils.PRECISION_STEP_DEFAULT)
+ {
+ }
+
+ /// <summary> Creates a token stream for numeric values with the specified
+ /// <c>precisionStep</c>. The stream is not yet initialized,
+ /// before using set a value using the various set<em>???</em>Value() methods.
+ /// </summary>
+ public NumericTokenStream(int precisionStep):base()
+ {
+ InitBlock();
+ this.precisionStep = precisionStep;
+ if (precisionStep < 1)
+ throw new System.ArgumentException("precisionStep must be >=1");
+ }
+
+ /// <summary> Expert: Creates a token stream for numeric values with the specified
+ /// <c>precisionStep</c> using the given <see cref="AttributeSource" />.
+ /// The stream is not yet initialized,
+ /// before using set a value using the various set<em>???</em>Value() methods.
+ /// </summary>
+ public NumericTokenStream(AttributeSource source, int precisionStep):base(source)
+ {
+ InitBlock();
+ this.precisionStep = precisionStep;
+ if (precisionStep < 1)
+ throw new System.ArgumentException("precisionStep must be >=1");
+ }
+
+ /// <summary> Expert: Creates a token stream for numeric values with the specified
+ /// <c>precisionStep</c> using the given
+ /// <see cref="Lucene.Net.Util.AttributeSource.AttributeFactory" />.
+ /// The stream is not yet initialized,
+ /// before using set a value using the various set<em>???</em>Value() methods.
+ /// </summary>
+ public NumericTokenStream(AttributeFactory factory, int precisionStep):base(factory)
+ {
+ InitBlock();
+ this.precisionStep = precisionStep;
+ if (precisionStep < 1)
+ throw new System.ArgumentException("precisionStep must be >=1");
+ }
+
+ /// <summary> Initializes the token stream with the supplied <c>long</c> value.</summary>
+ /// <param name="value_Renamed">the value, for which this TokenStream should enumerate tokens.
+ /// </param>
+ /// <returns> this instance, because of this you can use it the following way:
+ /// <c>new Field(name, new NumericTokenStream(precisionStep).SetLongValue(value))</c>
+ /// </returns>
+ public NumericTokenStream SetLongValue(long value_Renamed)
+ {
+ this.value_Renamed = value_Renamed;
+ valSize = 64;
+ shift = 0;
+ return this;
+ }
+
+ /// <summary> Initializes the token stream with the supplied <c>int</c> value.</summary>
+ /// <param name="value_Renamed">the value, for which this TokenStream should enumerate tokens.
+ /// </param>
+ /// <returns> this instance, because of this you can use it the following way:
+ /// <c>new Field(name, new NumericTokenStream(precisionStep).SetIntValue(value))</c>
+ /// </returns>
+ public NumericTokenStream SetIntValue(int value_Renamed)
+ {
+ this.value_Renamed = (long) value_Renamed;
+ valSize = 32;
+ shift = 0;
+ return this;
+ }
+
+ /// <summary> Initializes the token stream with the supplied <c>double</c> value.</summary>
+ /// <param name="value_Renamed">the value, for which this TokenStream should enumerate tokens.
+ /// </param>
+ /// <returns> this instance, because of this you can use it the following way:
+ /// <c>new Field(name, new NumericTokenStream(precisionStep).SetDoubleValue(value))</c>
+ /// </returns>
+ public NumericTokenStream SetDoubleValue(double value_Renamed)
+ {
+ this.value_Renamed = NumericUtils.DoubleToSortableLong(value_Renamed);
+ valSize = 64;
+ shift = 0;
+ return this;
+ }
+
+ /// <summary> Initializes the token stream with the supplied <c>float</c> value.</summary>
+ /// <param name="value_Renamed">the value, for which this TokenStream should enumerate tokens.
+ /// </param>
+ /// <returns> this instance, because of this you can use it the following way:
+ /// <c>new Field(name, new NumericTokenStream(precisionStep).SetFloatValue(value))</c>
+ /// </returns>
+ public NumericTokenStream SetFloatValue(float value_Renamed)
+ {
+ this.value_Renamed = (long) NumericUtils.FloatToSortableInt(value_Renamed);
+ valSize = 32;
+ shift = 0;
+ return this;
+ }
+
+ // @Override
+ public override void Reset()
+ {
+ if (valSize == 0)
+ throw new System.SystemException("call set???Value() before usage");
+ shift = 0;
+ }
protected override void Dispose(bool disposing)
{
// Do nothing.
}
-
- // @Override
- public override bool IncrementToken()
- {
- if (valSize == 0)
- throw new System.SystemException("call set???Value() before usage");
- if (shift >= valSize)
- return false;
-
- ClearAttributes();
- char[] buffer;
- switch (valSize)
- {
-
- case 64:
- buffer = termAtt.ResizeTermBuffer(NumericUtils.BUF_SIZE_LONG);
- termAtt.SetTermLength(NumericUtils.LongToPrefixCoded(value_Renamed, shift, buffer));
- break;
-
-
- case 32:
- buffer = termAtt.ResizeTermBuffer(NumericUtils.BUF_SIZE_INT);
- termAtt.SetTermLength(NumericUtils.IntToPrefixCoded((int) value_Renamed, shift, buffer));
- break;
-
-
- default:
- // should not happen
- throw new System.ArgumentException("valSize must be 32 or 64");
-
- }
-
- typeAtt.Type = (shift == 0)?TOKEN_TYPE_FULL_PREC:TOKEN_TYPE_LOWER_PREC;
- posIncrAtt.PositionIncrement = (shift == 0)?1:0;
- shift += precisionStep;
- return true;
- }
-
- // @Override
- public override System.String ToString()
- {
- System.Text.StringBuilder sb = new System.Text.StringBuilder("(numeric,valSize=").Append(valSize);
- sb.Append(",precisionStep=").Append(precisionStep).Append(')');
- return sb.ToString();
- }
-
- // members
- private ITermAttribute termAtt;
- private ITypeAttribute typeAtt;
- private IPositionIncrementAttribute posIncrAtt;
-
- private int shift = 0, valSize = 0; // valSize==0 means not initialized
- private readonly int precisionStep;
-
- private long value_Renamed = 0L;
- }
+
+ // @Override
+ public override bool IncrementToken()
+ {
+ if (valSize == 0)
+ throw new System.SystemException("call set???Value() before usage");
+ if (shift >= valSize)
+ return false;
+
+ ClearAttributes();
+ char[] buffer;
+ switch (valSize)
+ {
+
+ case 64:
+ buffer = termAtt.ResizeTermBuffer(NumericUtils.BUF_SIZE_LONG);
+ termAtt.SetTermLength(NumericUtils.LongToPrefixCoded(value_Renamed, shift, buffer));
+ break;
+
+
+ case 32:
+ buffer = termAtt.ResizeTermBuffer(NumericUtils.BUF_SIZE_INT);
+ termAtt.SetTermLength(NumericUtils.IntToPrefixCoded((int) value_Renamed, shift, buffer));
+ break;
+
+
+ default:
+ // should not happen
+ throw new System.ArgumentException("valSize must be 32 or 64");
+
+ }
+
+ typeAtt.Type = (shift == 0)?TOKEN_TYPE_FULL_PREC:TOKEN_TYPE_LOWER_PREC;
+ posIncrAtt.PositionIncrement = (shift == 0)?1:0;
+ shift += precisionStep;
+ return true;
+ }
+
+ // @Override
+ public override System.String ToString()
+ {
+ System.Text.StringBuilder sb = new System.Text.StringBuilder("(numeric,valSize=").Append(valSize);
+ sb.Append(",precisionStep=").Append(precisionStep).Append(')');
+ return sb.ToString();
+ }
+
+ // members
+ private ITermAttribute termAtt;
+ private ITypeAttribute typeAtt;
+ private IPositionIncrementAttribute posIncrAtt;
+
+ private int shift = 0, valSize = 0; // valSize==0 means not initialized
+ private readonly int precisionStep;
+
+ private long value_Renamed = 0L;
+ }
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/62f018ab/src/core/Analysis/PerFieldAnalyzerWrapper.cs
----------------------------------------------------------------------
diff --git a/src/core/Analysis/PerFieldAnalyzerWrapper.cs b/src/core/Analysis/PerFieldAnalyzerWrapper.cs
index b1c43aa..45e2344 100644
--- a/src/core/Analysis/PerFieldAnalyzerWrapper.cs
+++ b/src/core/Analysis/PerFieldAnalyzerWrapper.cs
@@ -20,104 +20,104 @@ using Lucene.Net.Support;
namespace Lucene.Net.Analysis
{
-
- /// <summary> This analyzer is used to facilitate scenarios where different
- /// fields require different analysis techniques. Use <see cref="AddAnalyzer" />
- /// to add a non-default analyzer on a field name basis.
- ///
- /// <p/>Example usage:
- ///
- /// <code>
- /// PerFieldAnalyzerWrapper aWrapper =
- /// new PerFieldAnalyzerWrapper(new StandardAnalyzer());
- /// aWrapper.addAnalyzer("firstname", new KeywordAnalyzer());
- /// aWrapper.addAnalyzer("lastname", new KeywordAnalyzer());
- /// </code>
- ///
- /// <p/>In this example, StandardAnalyzer will be used for all fields except "firstname"
- /// and "lastname", for which KeywordAnalyzer will be used.
- ///
- /// <p/>A PerFieldAnalyzerWrapper can be used like any other analyzer, for both indexing
- /// and query parsing.
- /// </summary>
- public class PerFieldAnalyzerWrapper:Analyzer
- {
- private readonly Analyzer defaultAnalyzer;
- private readonly IDictionary<string, Analyzer> analyzerMap = new HashMap<string, Analyzer>();
-
-
- /// <summary> Constructs with default analyzer.
- ///
- /// </summary>
- /// <param name="defaultAnalyzer">Any fields not specifically
- /// defined to use a different analyzer will use the one provided here.
- /// </param>
- public PerFieldAnalyzerWrapper(Analyzer defaultAnalyzer)
+
+ /// <summary> This analyzer is used to facilitate scenarios where different
+ /// fields require different analysis techniques. Use <see cref="AddAnalyzer" />
+ /// to add a non-default analyzer on a field name basis.
+ ///
+ /// <p/>Example usage:
+ ///
+ /// <code>
+ /// PerFieldAnalyzerWrapper aWrapper =
+ /// new PerFieldAnalyzerWrapper(new StandardAnalyzer());
+ /// aWrapper.addAnalyzer("firstname", new KeywordAnalyzer());
+ /// aWrapper.addAnalyzer("lastname", new KeywordAnalyzer());
+ /// </code>
+ ///
+ /// <p/>In this example, StandardAnalyzer will be used for all fields except "firstname"
+ /// and "lastname", for which KeywordAnalyzer will be used.
+ ///
+ /// <p/>A PerFieldAnalyzerWrapper can be used like any other analyzer, for both indexing
+ /// and query parsing.
+ /// </summary>
+ public class PerFieldAnalyzerWrapper:Analyzer
+ {
+ private readonly Analyzer defaultAnalyzer;
+ private readonly IDictionary<string, Analyzer> analyzerMap = new HashMap<string, Analyzer>();
+
+
+ /// <summary> Constructs with default analyzer.
+ ///
+ /// </summary>
+ /// <param name="defaultAnalyzer">Any fields not specifically
+ /// defined to use a different analyzer will use the one provided here.
+ /// </param>
+ public PerFieldAnalyzerWrapper(Analyzer defaultAnalyzer)
: this(defaultAnalyzer, null)
- {
- }
-
- /// <summary> Constructs with default analyzer and a map of analyzers to use for
- /// specific fields.
- ///
- /// </summary>
- /// <param name="defaultAnalyzer">Any fields not specifically
- /// defined to use a different analyzer will use the one provided here.
- /// </param>
- /// <param name="fieldAnalyzers">a Map (String field name to the Analyzer) to be
- /// used for those fields
- /// </param>
+ {
+ }
+
+ /// <summary> Constructs with default analyzer and a map of analyzers to use for
+ /// specific fields.
+ ///
+ /// </summary>
+ /// <param name="defaultAnalyzer">Any fields not specifically
+ /// defined to use a different analyzer will use the one provided here.
+ /// </param>
+ /// <param name="fieldAnalyzers">a Map (String field name to the Analyzer) to be
+ /// used for those fields
+ /// </param>
public PerFieldAnalyzerWrapper(Analyzer defaultAnalyzer, IEnumerable<KeyValuePair<string, Analyzer>> fieldAnalyzers)
- {
- this.defaultAnalyzer = defaultAnalyzer;
- if (fieldAnalyzers != null)
- {
- foreach(var entry in fieldAnalyzers)
- analyzerMap[entry.Key] = entry.Value;
- }
+ {
+ this.defaultAnalyzer = defaultAnalyzer;
+ if (fieldAnalyzers != null)
+ {
+ foreach(var entry in fieldAnalyzers)
+ analyzerMap[entry.Key] = entry.Value;
+ }
SetOverridesTokenStreamMethod<PerFieldAnalyzerWrapper>();
- }
-
-
- /// <summary> Defines an analyzer to use for the specified field.
- ///
- /// </summary>
- /// <param name="fieldName">field name requiring a non-default analyzer
- /// </param>
- /// <param name="analyzer">non-default analyzer to use for field
- /// </param>
- public virtual void AddAnalyzer(System.String fieldName, Analyzer analyzer)
- {
- analyzerMap[fieldName] = analyzer;
- }
-
- public override TokenStream TokenStream(System.String fieldName, System.IO.TextReader reader)
- {
- var analyzer = analyzerMap[fieldName] ?? defaultAnalyzer;
+ }
+
+
+ /// <summary> Defines an analyzer to use for the specified field.
+ ///
+ /// </summary>
+ /// <param name="fieldName">field name requiring a non-default analyzer
+ /// </param>
+ /// <param name="analyzer">non-default analyzer to use for field
+ /// </param>
+ public virtual void AddAnalyzer(System.String fieldName, Analyzer analyzer)
+ {
+ analyzerMap[fieldName] = analyzer;
+ }
+
+ public override TokenStream TokenStream(System.String fieldName, System.IO.TextReader reader)
+ {
+ var analyzer = analyzerMap[fieldName] ?? defaultAnalyzer;
- return analyzer.TokenStream(fieldName, reader);
- }
-
- public override TokenStream ReusableTokenStream(string fieldName, System.IO.TextReader reader)
- {
- if (overridesTokenStreamMethod)
- {
- // LUCENE-1678: force fallback to tokenStream() if we
- // have been subclassed and that subclass overrides
- // tokenStream but not reusableTokenStream
- return TokenStream(fieldName, reader);
- }
- var analyzer = analyzerMap[fieldName] ?? defaultAnalyzer;
+ return analyzer.TokenStream(fieldName, reader);
+ }
+
+ public override TokenStream ReusableTokenStream(string fieldName, System.IO.TextReader reader)
+ {
+ if (overridesTokenStreamMethod)
+ {
+ // LUCENE-1678: force fallback to tokenStream() if we
+ // have been subclassed and that subclass overrides
+ // tokenStream but not reusableTokenStream
+ return TokenStream(fieldName, reader);
+ }
+ var analyzer = analyzerMap[fieldName] ?? defaultAnalyzer;
- return analyzer.ReusableTokenStream(fieldName, reader);
- }
-
- /// <summary>Return the positionIncrementGap from the analyzer assigned to fieldName </summary>
- public override int GetPositionIncrementGap(string fieldName)
- {
- var analyzer = analyzerMap[fieldName] ?? defaultAnalyzer;
- return analyzer.GetPositionIncrementGap(fieldName);
- }
+ return analyzer.ReusableTokenStream(fieldName, reader);
+ }
+
+ /// <summary>Return the positionIncrementGap from the analyzer assigned to fieldName </summary>
+ public override int GetPositionIncrementGap(string fieldName)
+ {
+ var analyzer = analyzerMap[fieldName] ?? defaultAnalyzer;
+ return analyzer.GetPositionIncrementGap(fieldName);
+ }
/// <summary> Return the offsetGap from the analyzer assigned to field </summary>
public override int GetOffsetGap(Documents.IFieldable field)
@@ -125,11 +125,11 @@ namespace Lucene.Net.Analysis
Analyzer analyzer = analyzerMap[field.Name] ?? defaultAnalyzer;
return analyzer.GetOffsetGap(field);
}
-
- public override System.String ToString()
- {
- // {{Aroush-2.9}} will 'analyzerMap.ToString()' work in the same way as Java's java.util.HashMap.toString()?
- return "PerFieldAnalyzerWrapper(" + analyzerMap + ", default=" + defaultAnalyzer + ")";
- }
- }
+
+ public override System.String ToString()
+ {
+ // {{Aroush-2.9}} will 'analyzerMap.ToString()' work in the same way as Java's java.util.HashMap.toString()?
+ return "PerFieldAnalyzerWrapper(" + analyzerMap + ", default=" + defaultAnalyzer + ")";
+ }
+ }
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/62f018ab/src/core/Analysis/PorterStemFilter.cs
----------------------------------------------------------------------
diff --git a/src/core/Analysis/PorterStemFilter.cs b/src/core/Analysis/PorterStemFilter.cs
index b7f1dbf..b4f14dc 100644
--- a/src/core/Analysis/PorterStemFilter.cs
+++ b/src/core/Analysis/PorterStemFilter.cs
@@ -19,44 +19,44 @@ using Lucene.Net.Analysis.Tokenattributes;
namespace Lucene.Net.Analysis
{
-
- /// <summary>Transforms the token stream as per the Porter stemming algorithm.
- /// Note: the input to the stemming filter must already be in lower case,
- /// so you will need to use LowerCaseFilter or LowerCaseTokenizer farther
- /// down the Tokenizer chain in order for this to work properly!
- /// <p/>
- /// To use this filter with other analyzers, you'll want to write an
- /// Analyzer class that sets up the TokenStream chain as you want it.
- /// To use this with LowerCaseTokenizer, for example, you'd write an
- /// analyzer like this:
- /// <p/>
- /// <code>
- /// class MyAnalyzer extends Analyzer {
- /// public final TokenStream tokenStream(String fieldName, Reader reader) {
- /// return new PorterStemFilter(new LowerCaseTokenizer(reader));
- /// }
- /// }
- /// </code>
- /// </summary>
- public sealed class PorterStemFilter:TokenFilter
- {
- private readonly PorterStemmer stemmer;
- private readonly ITermAttribute termAtt;
-
- public PorterStemFilter(TokenStream in_Renamed):base(in_Renamed)
- {
- stemmer = new PorterStemmer();
+
+ /// <summary>Transforms the token stream as per the Porter stemming algorithm.
+ /// Note: the input to the stemming filter must already be in lower case,
+ /// so you will need to use LowerCaseFilter or LowerCaseTokenizer farther
+ /// down the Tokenizer chain in order for this to work properly!
+ /// <p/>
+ /// To use this filter with other analyzers, you'll want to write an
+ /// Analyzer class that sets up the TokenStream chain as you want it.
+ /// To use this with LowerCaseTokenizer, for example, you'd write an
+ /// analyzer like this:
+ /// <p/>
+ /// <code>
+ /// class MyAnalyzer extends Analyzer {
+ /// public final TokenStream tokenStream(String fieldName, Reader reader) {
+ /// return new PorterStemFilter(new LowerCaseTokenizer(reader));
+ /// }
+ /// }
+ /// </code>
+ /// </summary>
+ public sealed class PorterStemFilter:TokenFilter
+ {
+ private readonly PorterStemmer stemmer;
+ private readonly ITermAttribute termAtt;
+
+ public PorterStemFilter(TokenStream in_Renamed):base(in_Renamed)
+ {
+ stemmer = new PorterStemmer();
termAtt = AddAttribute<ITermAttribute>();
- }
-
- public override bool IncrementToken()
- {
- if (!input.IncrementToken())
- return false;
-
- if (stemmer.Stem(termAtt.TermBuffer(), 0, termAtt.TermLength()))
- termAtt.SetTermBuffer(stemmer.ResultBuffer, 0, stemmer.ResultLength);
- return true;
- }
- }
+ }
+
+ public override bool IncrementToken()
+ {
+ if (!input.IncrementToken())
+ return false;
+
+ if (stemmer.Stem(termAtt.TermBuffer(), 0, termAtt.TermLength()))
+ termAtt.SetTermBuffer(stemmer.ResultBuffer, 0, stemmer.ResultLength);
+ return true;
+ }
+ }
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/62f018ab/src/core/Analysis/PorterStemmer.cs
----------------------------------------------------------------------
diff --git a/src/core/Analysis/PorterStemmer.cs b/src/core/Analysis/PorterStemmer.cs
index f47c5a7..bc4cf75 100644
--- a/src/core/Analysis/PorterStemmer.cs
+++ b/src/core/Analysis/PorterStemmer.cs
@@ -42,705 +42,705 @@ optimize for fewer object creations. ]
using System;
namespace Lucene.Net.Analysis
{
-
- /// <summary>
- /// Stemmer, implementing the Porter Stemming Algorithm
- ///
- /// The Stemmer class transforms a word into its root form. The input
- /// word can be provided a character at time (by calling add()), or at once
- /// by calling one of the various stem(something) methods.
- /// </summary>
-
- class PorterStemmer
- {
- private char[] b;
- private int i, j, k, k0;
- private bool dirty = false;
- private const int INC = 50; /* unit of size whereby b is increased */
- private const int EXTRA = 1;
-
- public PorterStemmer()
- {
- b = new char[INC];
- i = 0;
- }
-
- /// <summary> reset() resets the stemmer so it can stem another word. If you invoke
- /// the stemmer by calling add(char) and then stem(), you must call reset()
- /// before starting another word.
- /// </summary>
- public virtual void Reset()
- {
- i = 0; dirty = false;
- }
-
- /// <summary> Add a character to the word being stemmed. When you are finished
- /// adding characters, you can call stem(void) to process the word.
- /// </summary>
- public virtual void Add(char ch)
- {
- if (b.Length <= i + EXTRA)
- {
- var new_b = new char[b.Length + INC];
- Array.Copy(b, 0, new_b, 0, b.Length);
- b = new_b;
- }
- b[i++] = ch;
- }
-
- /// <summary> After a word has been stemmed, it can be retrieved by toString(),
- /// or a reference to the internal buffer can be retrieved by getResultBuffer
- /// and getResultLength (which is generally more efficient.)
- /// </summary>
- public override System.String ToString()
- {
- return new System.String(b, 0, i);
- }
+
+ /// <summary>
+ /// Stemmer, implementing the Porter Stemming Algorithm
+ ///
+ /// The Stemmer class transforms a word into its root form. The input
+ /// word can be provided a character at time (by calling add()), or at once
+ /// by calling one of the various stem(something) methods.
+ /// </summary>
+
+ class PorterStemmer
+ {
+ private char[] b;
+ private int i, j, k, k0;
+ private bool dirty = false;
+ private const int INC = 50; /* unit of size whereby b is increased */
+ private const int EXTRA = 1;
+
+ public PorterStemmer()
+ {
+ b = new char[INC];
+ i = 0;
+ }
+
+ /// <summary> reset() resets the stemmer so it can stem another word. If you invoke
+ /// the stemmer by calling add(char) and then stem(), you must call reset()
+ /// before starting another word.
+ /// </summary>
+ public virtual void Reset()
+ {
+ i = 0; dirty = false;
+ }
+
+ /// <summary> Add a character to the word being stemmed. When you are finished
+ /// adding characters, you can call stem(void) to process the word.
+ /// </summary>
+ public virtual void Add(char ch)
+ {
+ if (b.Length <= i + EXTRA)
+ {
+ var new_b = new char[b.Length + INC];
+ Array.Copy(b, 0, new_b, 0, b.Length);
+ b = new_b;
+ }
+ b[i++] = ch;
+ }
+
+ /// <summary> After a word has been stemmed, it can be retrieved by toString(),
+ /// or a reference to the internal buffer can be retrieved by getResultBuffer
+ /// and getResultLength (which is generally more efficient.)
+ /// </summary>
+ public override System.String ToString()
+ {
+ return new System.String(b, 0, i);
+ }
- /// <summary> Returns the length of the word resulting from the stemming process.</summary>
- public virtual int ResultLength
- {
- get { return i; }
- }
+ /// <summary> Returns the length of the word resulting from the stemming process.</summary>
+ public virtual int ResultLength
+ {
+ get { return i; }
+ }
- /// <summary> Returns a reference to a character buffer containing the results of
- /// the stemming process. You also need to consult getResultLength()
- /// to determine the length of the result.
- /// </summary>
- public virtual char[] ResultBuffer
- {
- get { return b; }
- }
+ /// <summary> Returns a reference to a character buffer containing the results of
+ /// the stemming process. You also need to consult getResultLength()
+ /// to determine the length of the result.
+ /// </summary>
+ public virtual char[] ResultBuffer
+ {
+ get { return b; }
+ }
- /* cons(i) is true <=> b[i] is a consonant. */
-
- private bool Cons(int i)
- {
- switch (b[i])
- {
-
- case 'a':
- case 'e':
- case 'i':
- case 'o':
- case 'u':
- return false;
-
- case 'y':
- return (i == k0)?true:!Cons(i - 1);
-
- default:
- return true;
-
- }
- }
-
- /* m() measures the number of consonant sequences between k0 and j. if c is
- a consonant sequence and v a vowel sequence, and <..> indicates arbitrary
- presence,
-
- <c><v> gives 0
- <c>vc<v> gives 1
- <c>vcvc<v> gives 2
- <c>vcvcvc<v> gives 3
- ....
- */
-
- private int M()
- {
- int n = 0;
- int i = k0;
- while (true)
- {
- if (i > j)
- return n;
- if (!Cons(i))
- break;
- i++;
- }
- i++;
- while (true)
- {
- while (true)
- {
- if (i > j)
- return n;
- if (Cons(i))
- break;
- i++;
- }
- i++;
- n++;
- while (true)
- {
- if (i > j)
- return n;
- if (!Cons(i))
- break;
- i++;
- }
- i++;
- }
- }
-
- /* vowelinstem() is true <=> k0,...j contains a vowel */
-
- private bool Vowelinstem()
- {
- int i;
- for (i = k0; i <= j; i++)
- if (!Cons(i))
- return true;
- return false;
- }
-
- /* doublec(j) is true <=> j,(j-1) contain a double consonant. */
-
- private bool Doublec(int j)
- {
- if (j < k0 + 1)
- return false;
- if (b[j] != b[j - 1])
- return false;
- return Cons(j);
- }
-
- /* cvc(i) is true <=> i-2,i-1,i has the form consonant - vowel - consonant
- and also if the second c is not w,x or y. this is used when trying to
- restore an e at the end of a short word. e.g.
-
- cav(e), lov(e), hop(e), crim(e), but
- snow, box, tray.
-
- */
-
- private bool Cvc(int i)
- {
- if (i < k0 + 2 || !Cons(i) || Cons(i - 1) || !Cons(i - 2))
- return false;
- else
- {
- int ch = b[i];
- if (ch == 'w' || ch == 'x' || ch == 'y')
- return false;
- }
- return true;
- }
-
- private bool Ends(System.String s)
- {
- int l = s.Length;
- int o = k - l + 1;
- if (o < k0)
- return false;
- for (int i = 0; i < l; i++)
- if (b[o + i] != s[i])
- return false;
- j = k - l;
- return true;
- }
-
- /* setto(s) sets (j+1),...k to the characters in the string s, readjusting
- k. */
-
- internal virtual void Setto(System.String s)
- {
- int l = s.Length;
- int o = j + 1;
- for (int i = 0; i < l; i++)
- b[o + i] = s[i];
- k = j + l;
- dirty = true;
- }
-
- /* r(s) is used further down. */
-
- internal virtual void R(System.String s)
- {
- if (M() > 0)
- Setto(s);
- }
-
- /* step1() gets rid of plurals and -ed or -ing. e.g.
-
- caresses -> caress
- ponies -> poni
- ties -> ti
- caress -> caress
- cats -> cat
-
- feed -> feed
- agreed -> agree
- disabled -> disable
-
- matting -> mat
- mating -> mate
- meeting -> meet
- milling -> mill
- messing -> mess
-
- meetings -> meet
-
- */
-
- private void Step1()
- {
- if (b[k] == 's')
- {
- if (Ends("sses"))
- k -= 2;
- else if (Ends("ies"))
- Setto("i");
- else if (b[k - 1] != 's')
- k--;
- }
- if (Ends("eed"))
- {
- if (M() > 0)
- k--;
- }
- else if ((Ends("ed") || Ends("ing")) && Vowelinstem())
- {
- k = j;
- if (Ends("at"))
- Setto("ate");
- else if (Ends("bl"))
- Setto("ble");
- else if (Ends("iz"))
- Setto("ize");
- else if (Doublec(k))
- {
- int ch = b[k--];
- if (ch == 'l' || ch == 's' || ch == 'z')
- k++;
- }
- else if (M() == 1 && Cvc(k))
- Setto("e");
- }
- }
-
- /* step2() turns terminal y to i when there is another vowel in the stem. */
-
- private void Step2()
- {
- if (Ends("y") && Vowelinstem())
- {
- b[k] = 'i';
- dirty = true;
- }
- }
-
- /* step3() maps double suffices to single ones. so -ization ( = -ize plus
- -ation) maps to -ize etc. note that the string before the suffix must give
- m() > 0. */
-
- private void Step3()
- {
- if (k == k0)
- return ; /* For Bug 1 */
- switch (b[k - 1])
- {
-
- case 'a':
- if (Ends("ational"))
- {
- R("ate"); break;
- }
- if (Ends("tional"))
- {
- R("tion"); break;
- }
- break;
-
- case 'c':
- if (Ends("enci"))
- {
- R("ence"); break;
- }
- if (Ends("anci"))
- {
- R("ance"); break;
- }
- break;
-
- case 'e':
- if (Ends("izer"))
- {
- R("ize"); break;
- }
- break;
-
- case 'l':
- if (Ends("bli"))
- {
- R("ble"); break;
- }
- if (Ends("alli"))
- {
- R("al"); break;
- }
- if (Ends("entli"))
- {
- R("ent"); break;
- }
- if (Ends("eli"))
- {
- R("e"); break;
- }
- if (Ends("ousli"))
- {
- R("ous"); break;
- }
- break;
-
- case 'o':
- if (Ends("ization"))
- {
- R("ize"); break;
- }
- if (Ends("ation"))
- {
- R("ate"); break;
- }
- if (Ends("ator"))
- {
- R("ate"); break;
- }
- break;
-
- case 's':
- if (Ends("alism"))
- {
- R("al"); break;
- }
- if (Ends("iveness"))
- {
- R("ive"); break;
- }
- if (Ends("fulness"))
- {
- R("ful"); break;
- }
- if (Ends("ousness"))
- {
- R("ous"); break;
- }
- break;
-
- case 't':
- if (Ends("aliti"))
- {
- R("al"); break;
- }
- if (Ends("iviti"))
- {
- R("ive"); break;
- }
- if (Ends("biliti"))
- {
- R("ble"); break;
- }
- break;
-
- case 'g':
- if (Ends("logi"))
- {
- R("log"); break;
- }
- break;
- }
- }
-
- /* step4() deals with -ic-, -full, -ness etc. similar strategy to step3. */
-
- private void Step4()
- {
- switch (b[k])
- {
-
- case 'e':
- if (Ends("icate"))
- {
- R("ic"); break;
- }
- if (Ends("ative"))
- {
- R(""); break;
- }
- if (Ends("alize"))
- {
- R("al"); break;
- }
- break;
-
- case 'i':
- if (Ends("iciti"))
- {
- R("ic"); break;
- }
- break;
-
- case 'l':
- if (Ends("ical"))
- {
- R("ic"); break;
- }
- if (Ends("ful"))
- {
- R(""); break;
- }
- break;
-
- case 's':
- if (Ends("ness"))
- {
- R(""); break;
- }
- break;
- }
- }
-
- /* step5() takes off -ant, -ence etc., in context <c>vcvc<v>. */
-
- private void Step5()
- {
- if (k == k0)
- return ; /* for Bug 1 */
- switch (b[k - 1])
- {
-
- case 'a':
- if (Ends("al"))
- break;
- return ;
-
- case 'c':
- if (Ends("ance"))
- break;
- if (Ends("ence"))
- break;
- return ;
-
- case 'e':
- if (Ends("er"))
- break; return ;
-
- case 'i':
- if (Ends("ic"))
- break; return ;
-
- case 'l':
- if (Ends("able"))
- break;
- if (Ends("ible"))
- break; return ;
-
- case 'n':
- if (Ends("ant"))
- break;
- if (Ends("ement"))
- break;
- if (Ends("ment"))
- break;
- /* element etc. not stripped before the m */
- if (Ends("ent"))
- break;
- return ;
-
- case 'o':
- if (Ends("ion") && j >= 0 && (b[j] == 's' || b[j] == 't'))
- break;
- /* j >= 0 fixes Bug 2 */
- if (Ends("ou"))
- break;
- return ;
- /* takes care of -ous */
-
- case 's':
- if (Ends("ism"))
- break;
- return ;
-
- case 't':
- if (Ends("ate"))
- break;
- if (Ends("iti"))
- break;
- return ;
-
- case 'u':
- if (Ends("ous"))
- break;
- return ;
-
- case 'v':
- if (Ends("ive"))
- break;
- return ;
-
- case 'z':
- if (Ends("ize"))
- break;
- return ;
-
- default:
- return ;
-
- }
- if (M() > 1)
- k = j;
- }
-
- /* step6() removes a final -e if m() > 1. */
-
- private void Step6()
- {
- j = k;
- if (b[k] == 'e')
- {
- int a = M();
- if (a > 1 || a == 1 && !Cvc(k - 1))
- k--;
- }
- if (b[k] == 'l' && Doublec(k) && M() > 1)
- k--;
- }
-
-
- /// <summary> Stem a word provided as a String. Returns the result as a String.</summary>
- public virtual System.String Stem(System.String s)
- {
- if (Stem(s.ToCharArray(), s.Length))
- {
- return ToString();
- }
- else
- return s;
- }
-
- /// <summary>Stem a word contained in a char[]. Returns true if the stemming process
- /// resulted in a word different from the input. You can retrieve the
- /// result with getResultLength()/getResultBuffer() or toString().
- /// </summary>
- public virtual bool Stem(char[] word)
- {
- return Stem(word, word.Length);
- }
-
- /// <summary>Stem a word contained in a portion of a char[] array. Returns
- /// true if the stemming process resulted in a word different from
- /// the input. You can retrieve the result with
- /// getResultLength()/getResultBuffer() or toString().
- /// </summary>
- public virtual bool Stem(char[] wordBuffer, int offset, int wordLen)
- {
- Reset();
- if (b.Length < wordLen)
- {
- var new_b = new char[wordLen + EXTRA];
- b = new_b;
- }
- Array.Copy(wordBuffer, offset, b, 0, wordLen);
- i = wordLen;
- return Stem(0);
- }
-
- /// <summary>Stem a word contained in a leading portion of a char[] array.
- /// Returns true if the stemming process resulted in a word different
- /// from the input. You can retrieve the result with
- /// getResultLength()/getResultBuffer() or toString().
- /// </summary>
- public virtual bool Stem(char[] word, int wordLen)
- {
- return Stem(word, 0, wordLen);
- }
-
- /// <summary>Stem the word placed into the Stemmer buffer through calls to add().
- /// Returns true if the stemming process resulted in a word different
- /// from the input. You can retrieve the result with
- /// getResultLength()/getResultBuffer() or toString().
- /// </summary>
- public virtual bool Stem()
- {
- return Stem(0);
- }
-
- public virtual bool Stem(int i0)
- {
- k = i - 1;
- k0 = i0;
- if (k > k0 + 1)
- {
- Step1(); Step2(); Step3(); Step4(); Step5(); Step6();
- }
- // Also, a word is considered dirty if we lopped off letters
- // Thanks to Ifigenia Vairelles for pointing this out.
- if (i != k + 1)
- dirty = true;
- i = k + 1;
- return dirty;
- }
-
- /// <summary>Test program for demonstrating the Stemmer. It reads a file and
- /// stems each word, writing the result to standard out.
- /// Usage: Stemmer file-name
- /// </summary>
- [STAThread]
- public static void Main(System.String[] args)
- {
- var s = new PorterStemmer();
-
- for (int i = 0; i < args.Length; i++)
- {
- try
- {
- System.IO.Stream in_Renamed = new System.IO.FileStream(args[i], System.IO.FileMode.Open, System.IO.FileAccess.Read);
- var buffer = new byte[1024];
+ /* cons(i) is true <=> b[i] is a consonant. */
+
+ private bool Cons(int i)
+ {
+ switch (b[i])
+ {
+
+ case 'a':
+ case 'e':
+ case 'i':
+ case 'o':
+ case 'u':
+ return false;
+
+ case 'y':
+ return (i == k0)?true:!Cons(i - 1);
+
+ default:
+ return true;
+
+ }
+ }
+
+ /* m() measures the number of consonant sequences between k0 and j. if c is
+ a consonant sequence and v a vowel sequence, and <..> indicates arbitrary
+ presence,
+
+ <c><v> gives 0
+ <c>vc<v> gives 1
+ <c>vcvc<v> gives 2
+ <c>vcvcvc<v> gives 3
+ ....
+ */
+
+ private int M()
+ {
+ int n = 0;
+ int i = k0;
+ while (true)
+ {
+ if (i > j)
+ return n;
+ if (!Cons(i))
+ break;
+ i++;
+ }
+ i++;
+ while (true)
+ {
+ while (true)
+ {
+ if (i > j)
+ return n;
+ if (Cons(i))
+ break;
+ i++;
+ }
+ i++;
+ n++;
+ while (true)
+ {
+ if (i > j)
+ return n;
+ if (!Cons(i))
+ break;
+ i++;
+ }
+ i++;
+ }
+ }
+
+ /* vowelinstem() is true <=> k0,...j contains a vowel */
+
+ private bool Vowelinstem()
+ {
+ int i;
+ for (i = k0; i <= j; i++)
+ if (!Cons(i))
+ return true;
+ return false;
+ }
+
+ /* doublec(j) is true <=> j,(j-1) contain a double consonant. */
+
+ private bool Doublec(int j)
+ {
+ if (j < k0 + 1)
+ return false;
+ if (b[j] != b[j - 1])
+ return false;
+ return Cons(j);
+ }
+
+ /* cvc(i) is true <=> i-2,i-1,i has the form consonant - vowel - consonant
+ and also if the second c is not w,x or y. this is used when trying to
+ restore an e at the end of a short word. e.g.
+
+ cav(e), lov(e), hop(e), crim(e), but
+ snow, box, tray.
+
+ */
+
+ private bool Cvc(int i)
+ {
+ if (i < k0 + 2 || !Cons(i) || Cons(i - 1) || !Cons(i - 2))
+ return false;
+ else
+ {
+ int ch = b[i];
+ if (ch == 'w' || ch == 'x' || ch == 'y')
+ return false;
+ }
+ return true;
+ }
+
+ private bool Ends(System.String s)
+ {
+ int l = s.Length;
+ int o = k - l + 1;
+ if (o < k0)
+ return false;
+ for (int i = 0; i < l; i++)
+ if (b[o + i] != s[i])
+ return false;
+ j = k - l;
+ return true;
+ }
+
+ /* setto(s) sets (j+1),...k to the characters in the string s, readjusting
+ k. */
+
+ internal virtual void Setto(System.String s)
+ {
+ int l = s.Length;
+ int o = j + 1;
+ for (int i = 0; i < l; i++)
+ b[o + i] = s[i];
+ k = j + l;
+ dirty = true;
+ }
+
+ /* r(s) is used further down. */
+
+ internal virtual void R(System.String s)
+ {
+ if (M() > 0)
+ Setto(s);
+ }
+
+ /* step1() gets rid of plurals and -ed or -ing. e.g.
+
+ caresses -> caress
+ ponies -> poni
+ ties -> ti
+ caress -> caress
+ cats -> cat
+
+ feed -> feed
+ agreed -> agree
+ disabled -> disable
+
+ matting -> mat
+ mating -> mate
+ meeting -> meet
+ milling -> mill
+ messing -> mess
+
+ meetings -> meet
+
+ */
+
+ private void Step1()
+ {
+ if (b[k] == 's')
+ {
+ if (Ends("sses"))
+ k -= 2;
+ else if (Ends("ies"))
+ Setto("i");
+ else if (b[k - 1] != 's')
+ k--;
+ }
+ if (Ends("eed"))
+ {
+ if (M() > 0)
+ k--;
+ }
+ else if ((Ends("ed") || Ends("ing")) && Vowelinstem())
+ {
+ k = j;
+ if (Ends("at"))
+ Setto("ate");
+ else if (Ends("bl"))
+ Setto("ble");
+ else if (Ends("iz"))
+ Setto("ize");
+ else if (Doublec(k))
+ {
+ int ch = b[k--];
+ if (ch == 'l' || ch == 's' || ch == 'z')
+ k++;
+ }
+ else if (M() == 1 && Cvc(k))
+ Setto("e");
+ }
+ }
+
+ /* step2() turns terminal y to i when there is another vowel in the stem. */
+
+ private void Step2()
+ {
+ if (Ends("y") && Vowelinstem())
+ {
+ b[k] = 'i';
+ dirty = true;
+ }
+ }
+
+ /* step3() maps double suffices to single ones. so -ization ( = -ize plus
+ -ation) maps to -ize etc. note that the string before the suffix must give
+ m() > 0. */
+
+ private void Step3()
+ {
+ if (k == k0)
+ return ; /* For Bug 1 */
+ switch (b[k - 1])
+ {
+
+ case 'a':
+ if (Ends("ational"))
+ {
+ R("ate"); break;
+ }
+ if (Ends("tional"))
+ {
+ R("tion"); break;
+ }
+ break;
+
+ case 'c':
+ if (Ends("enci"))
+ {
+ R("ence"); break;
+ }
+ if (Ends("anci"))
+ {
+ R("ance"); break;
+ }
+ break;
+
+ case 'e':
+ if (Ends("izer"))
+ {
+ R("ize"); break;
+ }
+ break;
+
+ case 'l':
+ if (Ends("bli"))
+ {
+ R("ble"); break;
+ }
+ if (Ends("alli"))
+ {
+ R("al"); break;
+ }
+ if (Ends("entli"))
+ {
+ R("ent"); break;
+ }
+ if (Ends("eli"))
+ {
+ R("e"); break;
+ }
+ if (Ends("ousli"))
+ {
+ R("ous"); break;
+ }
+ break;
+
+ case 'o':
+ if (Ends("ization"))
+ {
+ R("ize"); break;
+ }
+ if (Ends("ation"))
+ {
+ R("ate"); break;
+ }
+ if (Ends("ator"))
+ {
+ R("ate"); break;
+ }
+ break;
+
+ case 's':
+ if (Ends("alism"))
+ {
+ R("al"); break;
+ }
+ if (Ends("iveness"))
+ {
+ R("ive"); break;
+ }
+ if (Ends("fulness"))
+ {
+ R("ful"); break;
+ }
+ if (Ends("ousness"))
+ {
+ R("ous"); break;
+ }
+ break;
+
+ case 't':
+ if (Ends("aliti"))
+ {
+ R("al"); break;
+ }
+ if (Ends("iviti"))
+ {
+ R("ive"); break;
+ }
+ if (Ends("biliti"))
+ {
+ R("ble"); break;
+ }
+ break;
+
+ case 'g':
+ if (Ends("logi"))
+ {
+ R("log"); break;
+ }
+ break;
+ }
+ }
+
+ /* step4() deals with -ic-, -full, -ness etc. similar strategy to step3. */
+
+ private void Step4()
+ {
+ switch (b[k])
+ {
+
+ case 'e':
+ if (Ends("icate"))
+ {
+ R("ic"); break;
+ }
+ if (Ends("ative"))
+ {
+ R(""); break;
+ }
+ if (Ends("alize"))
+ {
+ R("al"); break;
+ }
+ break;
+
+ case 'i':
+ if (Ends("iciti"))
+ {
+ R("ic"); break;
+ }
+ break;
+
+ case 'l':
+ if (Ends("ical"))
+ {
+ R("ic"); break;
+ }
+ if (Ends("ful"))
+ {
+ R(""); break;
+ }
+ break;
+
+ case 's':
+ if (Ends("ness"))
+ {
+ R(""); break;
+ }
+ break;
+ }
+ }
+
+ /* step5() takes off -ant, -ence etc., in context <c>vcvc<v>. */
+
+ private void Step5()
+ {
+ if (k == k0)
+ return ; /* for Bug 1 */
+ switch (b[k - 1])
+ {
+
+ case 'a':
+ if (Ends("al"))
+ break;
+ return ;
+
+ case 'c':
+ if (Ends("ance"))
+ break;
+ if (Ends("ence"))
+ break;
+ return ;
+
+ case 'e':
+ if (Ends("er"))
+ break; return ;
+
+ case 'i':
+ if (Ends("ic"))
+ break; return ;
+
+ case 'l':
+ if (Ends("able"))
+ break;
+ if (Ends("ible"))
+ break; return ;
+
+ case 'n':
+ if (Ends("ant"))
+ break;
+ if (Ends("ement"))
+ break;
+ if (Ends("ment"))
+ break;
+ /* element etc. not stripped before the m */
+ if (Ends("ent"))
+ break;
+ return ;
+
+ case 'o':
+ if (Ends("ion") && j >= 0 && (b[j] == 's' || b[j] == 't'))
+ break;
+ /* j >= 0 fixes Bug 2 */
+ if (Ends("ou"))
+ break;
+ return ;
+ /* takes care of -ous */
+
+ case 's':
+ if (Ends("ism"))
+ break;
+ return ;
+
+ case 't':
+ if (Ends("ate"))
+ break;
+ if (Ends("iti"))
+ break;
+ return ;
+
+ case 'u':
+ if (Ends("ous"))
+ break;
+ return ;
+
+ case 'v':
+ if (Ends("ive"))
+ break;
+ return ;
+
+ case 'z':
+ if (Ends("ize"))
+ break;
+ return ;
+
+ default:
+ return ;
+
+ }
+ if (M() > 1)
+ k = j;
+ }
+
+ /* step6() removes a final -e if m() > 1. */
+
+ private void Step6()
+ {
+ j = k;
+ if (b[k] == 'e')
+ {
+ int a = M();
+ if (a > 1 || a == 1 && !Cvc(k - 1))
+ k--;
+ }
+ if (b[k] == 'l' && Doublec(k) && M() > 1)
+ k--;
+ }
+
+
+ /// <summary> Stem a word provided as a String. Returns the result as a String.</summary>
+ public virtual System.String Stem(System.String s)
+ {
+ if (Stem(s.ToCharArray(), s.Length))
+ {
+ return ToString();
+ }
+ else
+ return s;
+ }
+
+ /// <summary>Stem a word contained in a char[]. Returns true if the stemming process
+ /// resulted in a word different from the input. You can retrieve the
+ /// result with getResultLength()/getResultBuffer() or toString().
+ /// </summary>
+ public virtual bool Stem(char[] word)
+ {
+ return Stem(word, word.Length);
+ }
+
+ /// <summary>Stem a word contained in a portion of a char[] array. Returns
+ /// true if the stemming process resulted in a word different from
+ /// the input. You can retrieve the result with
+ /// getResultLength()/getResultBuffer() or toString().
+ /// </summary>
+ public virtual bool Stem(char[] wordBuffer, int offset, int wordLen)
+ {
+ Reset();
+ if (b.Length < wordLen)
+ {
+ var new_b = new char[wordLen + EXTRA];
+ b = new_b;
+ }
+ Array.Copy(wordBuffer, offset, b, 0, wordLen);
+ i = wordLen;
+ return Stem(0);
+ }
+
+ /// <summary>Stem a word contained in a leading portion of a char[] array.
+ /// Returns true if the stemming process resulted in a word different
+ /// from the input. You can retrieve the result with
+ /// getResultLength()/getResultBuffer() or toString().
+ /// </summary>
+ public virtual bool Stem(char[] word, int wordLen)
+ {
+ return Stem(word, 0, wordLen);
+ }
+
+ /// <summary>Stem the word placed into the Stemmer buffer through calls to add().
+ /// Returns true if the stemming process resulted in a word different
+ /// from the input. You can retrieve the result with
+ /// getResultLength()/getResultBuffer() or toString().
+ /// </summary>
+ public virtual bool Stem()
+ {
+ return Stem(0);
+ }
+
+ public virtual bool Stem(int i0)
+ {
+ k = i - 1;
+ k0 = i0;
+ if (k > k0 + 1)
+ {
+ Step1(); Step2(); Step3(); Step4(); Step5(); Step6();
+ }
+ // Also, a word is considered dirty if we lopped off letters
+ // Thanks to Ifigenia Vairelles for pointing this out.
+ if (i != k + 1)
+ dirty = true;
+ i = k + 1;
+ return dirty;
+ }
+
+ /// <summary>Test program for demonstrating the Stemmer. It reads a file and
+ /// stems each word, writing the result to standard out.
+ /// Usage: Stemmer file-name
+ /// </summary>
+ [STAThread]
+ public static void Main(System.String[] args)
+ {
+ var s = new PorterStemmer();
+
+ for (int i = 0; i < args.Length; i++)
+ {
+ try
+ {
+ System.IO.Stream in_Renamed = new System.IO.FileStream(args[i], System.IO.FileMode.Open, System.IO.FileAccess.Read);
+ var buffer = new byte[1024];
- int bufferLen = in_Renamed.Read(buffer, 0, buffer.Length);
- int offset = 0;
- s.Reset();
-
- while (true)
- {
- int ch;
- if (offset < bufferLen)
- ch = buffer[offset++];
- else
- {
- bufferLen = in_Renamed.Read(buffer, 0, buffer.Length);
- offset = 0;
- if (bufferLen < 0)
- ch = - 1;
- else
- ch = buffer[offset++];
- }
-
- if (Char.IsLetter((char) ch))
- {
- s.Add(Char.ToLower((char) ch));
- }
- else
- {
- s.Stem();
- Console.Out.Write(s.ToString());
- s.Reset();
- if (ch < 0)
- break;
- else
- {
- System.Console.Out.Write((char) ch);
- }
- }
- }
-
- in_Renamed.Close();
- }
- catch (System.IO.IOException)
- {
- Console.Out.WriteLine("error reading " + args[i]);
- }
- }
- }
- }
+ int bufferLen = in_Renamed.Read(buffer, 0, buffer.Length);
+ int offset = 0;
+ s.Reset();
+
+ while (true)
+ {
+ int ch;
+ if (offset < bufferLen)
+ ch = buffer[offset++];
+ else
+ {
+ bufferLen = in_Renamed.Read(buffer, 0, buffer.Length);
+ offset = 0;
+ if (bufferLen < 0)
+ ch = - 1;
+ else
+ ch = buffer[offset++];
+ }
+
+ if (Char.IsLetter((char) ch))
+ {
+ s.Add(Char.ToLower((char) ch));
+ }
+ else
+ {
+ s.Stem();
+ Console.Out.Write(s.ToString());
+ s.Reset();
+ if (ch < 0)
+ break;
+ else
+ {
+ System.Console.Out.Write((char) ch);
+ }
+ }
+ }
+
+ in_Renamed.Close();
+ }
+ catch (System.IO.IOException)
+ {
+ Console.Out.WriteLine("error reading " + args[i]);
+ }
+ }
+ }
+ }
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/62f018ab/src/core/Analysis/SimpleAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/core/Analysis/SimpleAnalyzer.cs b/src/core/Analysis/SimpleAnalyzer.cs
index b84f470..50bc9c1 100644
--- a/src/core/Analysis/SimpleAnalyzer.cs
+++ b/src/core/Analysis/SimpleAnalyzer.cs
@@ -17,29 +17,29 @@
namespace Lucene.Net.Analysis
{
-
- /// <summary>An <see cref="Analyzer" /> that filters <see cref="LetterTokenizer" />
- /// with <see cref="LowerCaseFilter" />
- /// </summary>
-
- public sealed class SimpleAnalyzer : Analyzer
- {
- public override TokenStream TokenStream(System.String fieldName, System.IO.TextReader reader)
- {
- return new LowerCaseTokenizer(reader);
- }
-
- public override TokenStream ReusableTokenStream(System.String fieldName, System.IO.TextReader reader)
- {
- var tokenizer = (Tokenizer) PreviousTokenStream;
- if (tokenizer == null)
- {
- tokenizer = new LowerCaseTokenizer(reader);
- PreviousTokenStream = tokenizer;
- }
- else
- tokenizer.Reset(reader);
- return tokenizer;
- }
- }
+
+ /// <summary>An <see cref="Analyzer" /> that filters <see cref="LetterTokenizer" />
+ /// with <see cref="LowerCaseFilter" />
+ /// </summary>
+
+ public sealed class SimpleAnalyzer : Analyzer
+ {
+ public override TokenStream TokenStream(System.String fieldName, System.IO.TextReader reader)
+ {
+ return new LowerCaseTokenizer(reader);
+ }
+
+ public override TokenStream ReusableTokenStream(System.String fieldName, System.IO.TextReader reader)
+ {
+ var tokenizer = (Tokenizer) PreviousTokenStream;
+ if (tokenizer == null)
+ {
+ tokenizer = new LowerCaseTokenizer(reader);
+ PreviousTokenStream = tokenizer;
+ }
+ else
+ tokenizer.Reset(reader);
+ return tokenizer;
+ }
+ }
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/62f018ab/src/core/Analysis/Standard/StandardAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/core/Analysis/Standard/StandardAnalyzer.cs b/src/core/Analysis/Standard/StandardAnalyzer.cs
index 347d026..bf704be 100644
--- a/src/core/Analysis/Standard/StandardAnalyzer.cs
+++ b/src/core/Analysis/Standard/StandardAnalyzer.cs
@@ -24,151 +24,151 @@ using Version = Lucene.Net.Util.Version;
namespace Lucene.Net.Analysis.Standard
{
-
- /// <summary> Filters <see cref="StandardTokenizer" /> with <see cref="StandardFilter" />,
- /// <see cref="LowerCaseFilter" /> and <see cref="StopFilter" />, using a list of English stop
- /// words.
- ///
- /// <a name="version"/>
- /// <p/>
- /// You must specify the required <see cref="Version" /> compatibility when creating
- /// StandardAnalyzer:
- /// <list type="bullet">
- /// <item>As of 2.9, StopFilter preserves position increments</item>
- /// <item>As of 2.4, Tokens incorrectly identified as acronyms are corrected (see
- /// <a href="https://issues.apache.org/jira/browse/LUCENE-1068">LUCENE-1608</a>)</item>
- /// </list>
- /// </summary>
- public class StandardAnalyzer : Analyzer
- {
- private ISet<string> stopSet;
-
- /// <summary> Specifies whether deprecated acronyms should be replaced with HOST type.
+
+ /// <summary> Filters <see cref="StandardTokenizer" /> with <see cref="StandardFilter" />,
+ /// <see cref="LowerCaseFilter" /> and <see cref="StopFilter" />, using a list of English stop
+ /// words.
+ ///
+ /// <a name="version"/>
+ /// <p/>
+ /// You must specify the required <see cref="Version" /> compatibility when creating
+ /// StandardAnalyzer:
+ /// <list type="bullet">
+ /// <item>As of 2.9, StopFilter preserves position increments</item>
+ /// <item>As of 2.4, Tokens incorrectly identified as acronyms are corrected (see
+ /// <a href="https://issues.apache.org/jira/browse/LUCENE-1068">LUCENE-1608</a>)</item>
+ /// </list>
+ /// </summary>
+ public class StandardAnalyzer : Analyzer
+ {
+ private ISet<string> stopSet;
+
+ /// <summary> Specifies whether deprecated acronyms should be replaced with HOST type.
/// See <a href="https://issues.apache.org/jira/browse/LUCENE-1068">https://issues.apache.org/jira/browse/LUCENE-1068</a>
- /// </summary>
- private bool replaceInvalidAcronym, enableStopPositionIncrements;
+ /// </summary>
+ private bool replaceInvalidAcronym, enableStopPositionIncrements;
- /// <summary>An unmodifiable set containing some common English words that are usually not
- /// useful for searching.
- /// </summary>
- public static readonly ISet<string> STOP_WORDS_SET;
- private Version matchVersion;
-
- /// <summary>Builds an analyzer with the default stop words (<see cref="STOP_WORDS_SET" />).
- /// </summary>
- /// <param name="matchVersion">Lucene version to match see <see cref="Version">above</see></param>
- public StandardAnalyzer(Version matchVersion)
+ /// <summary>An unmodifiable set containing some common English words that are usually not
+ /// useful for searching.
+ /// </summary>
+ public static readonly ISet<string> STOP_WORDS_SET;
+ private Version matchVersion;
+
+ /// <summary>Builds an analyzer with the default stop words (<see cref="STOP_WORDS_SET" />).
+ /// </summary>
+ /// <param name="matchVersion">Lucene version to match see <see cref="Version">above</see></param>
+ public StandardAnalyzer(Version matchVersion)
: this(matchVersion, STOP_WORDS_SET)
- { }
-
- /// <summary>Builds an analyzer with the given stop words.</summary>
+ { }
+
+ /// <summary>Builds an analyzer with the given stop words.</summary>
/// <param name="matchVersion">Lucene version to match See <see cref="Version">above</see> />
- ///
- /// </param>
- /// <param name="stopWords">stop words
- /// </param>
- public StandardAnalyzer(Version matchVersion, ISet<string> stopWords)
- {
- stopSet = stopWords;
+ ///
+ /// </param>
+ /// <param name="stopWords">stop words
+ /// </param>
+ public StandardAnalyzer(Version matchVersion, ISet<string> stopWords)
+ {
+ stopSet = stopWords;
SetOverridesTokenStreamMethod<StandardAnalyzer>();
enableStopPositionIncrements = StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion);
replaceInvalidAcronym = matchVersion.OnOrAfter(Version.LUCENE_24);
this.matchVersion = matchVersion;
- }
+ }
- /// <summary>Builds an analyzer with the stop words from the given file.</summary>
- /// <seealso cref="WordlistLoader.GetWordSet(System.IO.FileInfo)">
- /// </seealso>
+ /// <summary>Builds an analyzer with the stop words from the given file.</summary>
+ /// <seealso cref="WordlistLoader.GetWordSet(System.IO.FileInfo)">
+ /// </seealso>
/// <param name="matchVersion">Lucene version to match See <see cref="Version">above</see> />
- ///
- /// </param>
- /// <param name="stopwords">File to read stop words from
- /// </param>
- public StandardAnalyzer(Version matchVersion, System.IO.FileInfo stopwords)
+ ///
+ /// </param>
+ /// <param name="stopwords">File to read stop words from
+ /// </param>
+ public StandardAnalyzer(Version matchVersion, System.IO.FileInfo stopwords)
: this (matchVersion, WordlistLoader.GetWordSet(stopwords))
- {
- }
-
- /// <summary>Builds an analyzer with the stop words from the given reader.</summary>
+ {
+ }
+
+ /// <summary>Builds an analyzer with the stop words from the given reader.</summary>
/// <seealso cref="WordlistLoader.GetWordSet(System.IO.TextReader)">
- /// </seealso>
+ /// </seealso>
/// <param name="matchVersion">Lucene version to match See <see cref="Version">above</see> />
- ///
- /// </param>
- /// <param name="stopwords">Reader to read stop words from
- /// </param>
- public StandardAnalyzer(Version matchVersion, System.IO.TextReader stopwords)
+ ///
+ /// </param>
+ /// <param name="stopwords">Reader to read stop words from
+ /// </param>
+ public StandardAnalyzer(Version matchVersion, System.IO.TextReader stopwords)
: this(matchVersion, WordlistLoader.GetWordSet(stopwords))
- { }
-
- /// <summary>Constructs a <see cref="StandardTokenizer" /> filtered by a <see cref="StandardFilter" />
- ///, a <see cref="LowerCaseFilter" /> and a <see cref="StopFilter" />.
- /// </summary>
- public override TokenStream TokenStream(System.String fieldName, System.IO.TextReader reader)
- {
- StandardTokenizer tokenStream = new StandardTokenizer(matchVersion, reader);
- tokenStream.MaxTokenLength = maxTokenLength;
- TokenStream result = new StandardFilter(tokenStream);
- result = new LowerCaseFilter(result);
- result = new StopFilter(enableStopPositionIncrements, result, stopSet);
- return result;
- }
-
- private sealed class SavedStreams
- {
- internal StandardTokenizer tokenStream;
- internal TokenStream filteredTokenStream;
- }
-
- /// <summary>Default maximum allowed token length </summary>
- public const int DEFAULT_MAX_TOKEN_LENGTH = 255;
-
- private int maxTokenLength = DEFAULT_MAX_TOKEN_LENGTH;
+ { }
+
+ /// <summary>Constructs a <see cref="StandardTokenizer" /> filtered by a <see cref="StandardFilter" />
+ ///, a <see cref="LowerCaseFilter" /> and a <see cref="StopFilter" />.
+ /// </summary>
+ public override TokenStream TokenStream(System.String fieldName, System.IO.TextReader reader)
+ {
+ StandardTokenizer tokenStream = new StandardTokenizer(matchVersion, reader);
+ tokenStream.MaxTokenLength = maxTokenLength;
+ TokenStream result = new StandardFilter(tokenStream);
+ result = new LowerCaseFilter(result);
+ result = new StopFilter(enableStopPositionIncrements, result, stopSet);
+ return result;
+ }
+
+ private sealed class SavedStreams
+ {
+ internal StandardTokenizer tokenStream;
+ internal TokenStream filteredTokenStream;
+ }
+
+ /// <summary>Default maximum allowed token length </summary>
+ public const int DEFAULT_MAX_TOKEN_LENGTH = 255;
+
+ private int maxTokenLength = DEFAULT_MAX_TOKEN_LENGTH;
- /// <summary> Set maximum allowed token length. If a token is seen
- /// that exceeds this length then it is discarded. This
- /// setting only takes effect the next time tokenStream or
- /// reusableTokenStream is called.
- /// </summary>
- public virtual int MaxTokenLength
- {
- get { return maxTokenLength; }
- set { maxTokenLength = value; }
- }
+ /// <summary> Set maximum allowed token length. If a token is seen
+ /// that exceeds this length then it is discarded. This
+ /// setting only takes effect the next time tokenStream or
+ /// reusableTokenStream is called.
+ /// </summary>
+ public virtual int MaxTokenLength
+ {
+ get { return maxTokenLength; }
+ set { maxTokenLength = value; }
+ }
- public override TokenStream ReusableTokenStream(System.String fieldName, System.IO.TextReader reader)
- {
- if (overridesTokenStreamMethod)
- {
- // LUCENE-1678: force fallback to tokenStream() if we
- // have been subclassed and that subclass overrides
- // tokenStream but not reusableTokenStream
- return TokenStream(fieldName, reader);
- }
- SavedStreams streams = (SavedStreams) PreviousTokenStream;
- if (streams == null)
- {
- streams = new SavedStreams();
- PreviousTokenStream = streams;
- streams.tokenStream = new StandardTokenizer(matchVersion, reader);
- streams.filteredTokenStream = new StandardFilter(streams.tokenStream);
- streams.filteredTokenStream = new LowerCaseFilter(streams.filteredTokenStream);
- streams.filteredTokenStream = new StopFilter(enableStopPositionIncrements,
+ public override TokenStream ReusableTokenStream(System.String fieldName, System.IO.TextReader reader)
+ {
+ if (overridesTokenStreamMethod)
+ {
+ // LUCENE-1678: force fallback to tokenStream() if we
+ // have been subclassed and that subclass overrides
+ // tokenStream but not reusableTokenStream
+ return TokenStream(fieldName, reader);
+ }
+ SavedStreams streams = (SavedStreams) PreviousTokenStream;
+ if (streams == null)
+ {
+ streams = new SavedStreams();
+ PreviousTokenStream = streams;
+ streams.tokenStream = new StandardTokenizer(matchVersion, reader);
+ streams.filteredTokenStream = new StandardFilter(streams.tokenStream);
+ streams.filteredTokenStream = new LowerCaseFilter(streams.filteredTokenStream);
+ streams.filteredTokenStream = new StopFilter(enableStopPositionIncrements,
streams.filteredTokenStream, stopSet);
- }
- else
- {
- streams.tokenStream.Reset(reader);
- }
- streams.tokenStream.MaxTokenLength = maxTokenLength;
-
- streams.tokenStream.SetReplaceInvalidAcronym(replaceInvalidAcronym);
-
- return streams.filteredTokenStream;
- }
- static StandardAnalyzer()
- {
- STOP_WORDS_SET = StopAnalyzer.ENGLISH_STOP_WORDS_SET;
- }
- }
+ }
+ else
+ {
+ streams.tokenStream.Reset(reader);
+ }
+ streams.tokenStream.MaxTokenLength = maxTokenLength;
+
+ streams.tokenStream.SetReplaceInvalidAcronym(replaceInvalidAcronym);
+
+ return streams.filteredTokenStream;
+ }
+ static StandardAnalyzer()
+ {
+ STOP_WORDS_SET = StopAnalyzer.ENGLISH_STOP_WORDS_SET;
+ }
+ }
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/62f018ab/src/core/Analysis/Standard/StandardFilter.cs
----------------------------------------------------------------------
diff --git a/src/core/Analysis/Standard/StandardFilter.cs b/src/core/Analysis/Standard/StandardFilter.cs
index fd13261..f4a1c56 100644
--- a/src/core/Analysis/Standard/StandardFilter.cs
+++ b/src/core/Analysis/Standard/StandardFilter.cs
@@ -23,66 +23,66 @@ using TokenStream = Lucene.Net.Analysis.TokenStream;
namespace Lucene.Net.Analysis.Standard
{
-
- /// <summary>Normalizes tokens extracted with <see cref="StandardTokenizer" />. </summary>
-
- public sealed class StandardFilter:TokenFilter
- {
-
-
- /// <summary>Construct filtering <i>in</i>. </summary>
- public StandardFilter(TokenStream in_Renamed):base(in_Renamed)
- {
+
+ /// <summary>Normalizes tokens extracted with <see cref="StandardTokenizer" />. </summary>
+
+ public sealed class StandardFilter:TokenFilter
+ {
+
+
+ /// <summary>Construct filtering <i>in</i>. </summary>
+ public StandardFilter(TokenStream in_Renamed):base(in_Renamed)
+ {
termAtt = AddAttribute<ITermAttribute>();
- typeAtt = AddAttribute<ITypeAttribute>();
- }
-
- private static readonly System.String APOSTROPHE_TYPE;
- private static readonly System.String ACRONYM_TYPE;
-
- // this filters uses attribute type
- private ITypeAttribute typeAtt;
- private ITermAttribute termAtt;
-
- /// <summary>Returns the next token in the stream, or null at EOS.
- /// <p/>Removes <tt>'s</tt> from the end of words.
- /// <p/>Removes dots from acronyms.
- /// </summary>
- public override bool IncrementToken()
- {
- if (!input.IncrementToken())
- {
- return false;
- }
-
- char[] buffer = termAtt.TermBuffer();
- int bufferLength = termAtt.TermLength();
- System.String type = typeAtt.Type;
-
- if ((System.Object) type == (System.Object) APOSTROPHE_TYPE && bufferLength >= 2 && buffer[bufferLength - 2] == '\'' && (buffer[bufferLength - 1] == 's' || buffer[bufferLength - 1] == 'S'))
- {
- // Strip last 2 characters off
- termAtt.SetTermLength(bufferLength - 2);
- }
- else if ((System.Object) type == (System.Object) ACRONYM_TYPE)
- {
- // remove dots
- int upto = 0;
- for (int i = 0; i < bufferLength; i++)
- {
- char c = buffer[i];
- if (c != '.')
- buffer[upto++] = c;
- }
- termAtt.SetTermLength(upto);
- }
-
- return true;
- }
- static StandardFilter()
- {
- APOSTROPHE_TYPE = StandardTokenizerImpl.TOKEN_TYPES[StandardTokenizerImpl.APOSTROPHE];
- ACRONYM_TYPE = StandardTokenizerImpl.TOKEN_TYPES[StandardTokenizerImpl.ACRONYM];
- }
- }
+ typeAtt = AddAttribute<ITypeAttribute>();
+ }
+
+ private static readonly System.String APOSTROPHE_TYPE;
+ private static readonly System.String ACRONYM_TYPE;
+
+ // this filters uses attribute type
+ private ITypeAttribute typeAtt;
+ private ITermAttribute termAtt;
+
+ /// <summary>Returns the next token in the stream, or null at EOS.
+ /// <p/>Removes <tt>'s</tt> from the end of words.
+ /// <p/>Removes dots from acronyms.
+ /// </summary>
+ public override bool IncrementToken()
+ {
+ if (!input.IncrementToken())
+ {
+ return false;
+ }
+
+ char[] buffer = termAtt.TermBuffer();
+ int bufferLength = termAtt.TermLength();
+ System.String type = typeAtt.Type;
+
+ if ((System.Object) type == (System.Object) APOSTROPHE_TYPE && bufferLength >= 2 && buffer[bufferLength - 2] == '\'' && (buffer[bufferLength - 1] == 's' || buffer[bufferLength - 1] == 'S'))
+ {
+ // Strip last 2 characters off
+ termAtt.SetTermLength(bufferLength - 2);
+ }
+ else if ((System.Object) type == (System.Object) ACRONYM_TYPE)
+ {
+ // remove dots
+ int upto = 0;
+ for (int i = 0; i < bufferLength; i++)
+ {
+ char c = buffer[i];
+ if (c != '.')
+ buffer[upto++] = c;
+ }
+ termAtt.SetTermLength(upto);
+ }
+
+ return true;
+ }
+ static StandardFilter()
+ {
+ APOSTROPHE_TYPE = StandardTokenizerImpl.TOKEN_TYPES[StandardTokenizerImpl.APOSTROPHE];
+ ACRONYM_TYPE = StandardTokenizerImpl.TOKEN_TYPES[StandardTokenizerImpl.ACRONYM];
+ }
+ }
}
\ No newline at end of file