You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by cc...@apache.org on 2012/03/27 18:42:00 UTC
svn commit: r1305897 - in
/incubator/lucene.net/trunk/src/contrib/Analyzers/De: GermanAnalyzer.cs
GermanStemFilter.cs
Author: ccurrens
Date: Tue Mar 27 16:42:00 2012
New Revision: 1305897
URL: http://svn.apache.org/viewvc?rev=1305897&view=rev
Log:
[LUCENENET-466] - Added XML comments for new constructors
Modified:
incubator/lucene.net/trunk/src/contrib/Analyzers/De/GermanAnalyzer.cs
incubator/lucene.net/trunk/src/contrib/Analyzers/De/GermanStemFilter.cs
Modified: incubator/lucene.net/trunk/src/contrib/Analyzers/De/GermanAnalyzer.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/src/contrib/Analyzers/De/GermanAnalyzer.cs?rev=1305897&r1=1305896&r2=1305897&view=diff
==============================================================================
--- incubator/lucene.net/trunk/src/contrib/Analyzers/De/GermanAnalyzer.cs (original)
+++ incubator/lucene.net/trunk/src/contrib/Analyzers/De/GermanAnalyzer.cs Tue Mar 27 16:42:00 2012
@@ -42,7 +42,6 @@ namespace Lucene.Net.Analysis.De
/// <summary>
/// List of typical german stopwords.
/// </summary>
- [Obsolete("Use GetDefaultStopSet() instead")]
//TODO: make this private in 3.1
private static readonly String[] GERMAN_STOP_WORDS =
{
@@ -88,7 +87,7 @@ namespace Lucene.Net.Analysis.De
private ISet<string> exclusionSet;
private Version matchVersion;
- private readonly bool _useDin2Stemmer;
+ private readonly bool _normalizeDin2;
/// <summary>
/// Builds an analyzer with the default stop words:
@@ -104,6 +103,7 @@ namespace Lucene.Net.Analysis.De
/// Builds an analyzer with the default stop words:
/// <see cref="GetDefaultStopSet"/>
/// </summary>
+ /// <param name="matchVersion">Lucene compatibility version</param>
public GermanAnalyzer(Version matchVersion)
: this(matchVersion, DefaultSetHolder.DEFAULT_SET)
{ }
@@ -112,8 +112,12 @@ namespace Lucene.Net.Analysis.De
/// Builds an analyzer with the default stop words:
/// <see cref="GetDefaultStopSet"/>
/// </summary>
- public GermanAnalyzer(Version matchVersion, bool useDin2Stemmer)
- : this(matchVersion, DefaultSetHolder.DEFAULT_SET, useDin2Stemmer)
+ /// <param name="matchVersion">Lucene compatibility version</param>
+ /// <param name="normalizeDin2">Specifies if the DIN-2007-2 style stemmer should be used in addition to DIN1. This
+ /// will cause words with 'ae', 'ue', or 'oe' in them (expanded umlauts) to be first converted to 'a', 'u', and 'o'
+ /// respectively, before the DIN1 stemmer is invoked.</param>
+ public GermanAnalyzer(Version matchVersion, bool normalizeDin2)
+ : this(matchVersion, DefaultSetHolder.DEFAULT_SET, normalizeDin2)
{ }
/// <summary>
@@ -131,10 +135,11 @@ namespace Lucene.Net.Analysis.De
/// </summary>
/// <param name="matchVersion">Lucene compatibility version</param>
/// <param name="stopwords">a stopword set</param>
- /// <param name="useDin2Stemmer">Specifies if the DIN-2007-2 style stemmer should be used. Commonly referred to as
- /// phone book sorting, since it was defined to be used with names, rather than words</param>
- public GermanAnalyzer(Version matchVersion, ISet<string> stopwords, bool useDin2Stemmer)
- : this(matchVersion, stopwords, CharArraySet.EMPTY_SET, useDin2Stemmer)
+ /// <param name="normalizeDin2">Specifies if the DIN-2007-2 style stemmer should be used in addition to DIN1. This
+ /// will cause words with 'ae', 'ue', or 'oe' in them (expanded umlauts) to be first converted to 'a', 'u', and 'o'
+ /// respectively, before the DIN1 stemmer is invoked.</param>
+ public GermanAnalyzer(Version matchVersion, ISet<string> stopwords, bool normalizeDin2)
+ : this(matchVersion, stopwords, CharArraySet.EMPTY_SET, normalizeDin2)
{
}
@@ -155,14 +160,15 @@ namespace Lucene.Net.Analysis.De
/// <param name="matchVersion">lucene compatibility version</param>
/// <param name="stopwords">a stopword set</param>
/// <param name="stemExclusionSet">a stemming exclusion set</param>
- /// <param name="useDin2Stemmer">Specifies if the DIN-2007-2 style stemmer should be used. Commonly referred to as
- /// phone book sorting, since it was defined to be used with names, rather than words</param>
- public GermanAnalyzer(Version matchVersion, ISet<string> stopwords, ISet<string> stemExclusionSet, bool useDin2Stemmer)
+ /// <param name="normalizeDin2">Specifies if the DIN-2007-2 style stemmer should be used in addition to DIN1. This
+ /// will cause words with 'ae', 'ue', or 'oe' in them (expanded umlauts) to be first converted to 'a', 'u', and 'o'
+ /// respectively, before the DIN1 stemmer is invoked.</param>
+ public GermanAnalyzer(Version matchVersion, ISet<string> stopwords, ISet<string> stemExclusionSet, bool normalizeDin2)
{
stopSet = CharArraySet.UnmodifiableSet(CharArraySet.Copy(stopwords));
exclusionSet = CharArraySet.UnmodifiableSet(CharArraySet.Copy(stemExclusionSet));
this.matchVersion = matchVersion;
- _useDin2Stemmer = useDin2Stemmer;
+ _normalizeDin2 = normalizeDin2;
SetOverridesTokenStreamMethod<GermanAnalyzer>();
}
@@ -237,7 +243,7 @@ namespace Lucene.Net.Analysis.De
result = new StandardFilter(result);
result = new LowerCaseFilter(result);
result = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion), result, stopSet);
- result = new GermanStemFilter(result, exclusionSet, _useDin2Stemmer);
+ result = new GermanStemFilter(result, exclusionSet, _normalizeDin2);
return result;
}
}
Modified: incubator/lucene.net/trunk/src/contrib/Analyzers/De/GermanStemFilter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/src/contrib/Analyzers/De/GermanStemFilter.cs?rev=1305897&r1=1305896&r2=1305897&view=diff
==============================================================================
--- incubator/lucene.net/trunk/src/contrib/Analyzers/De/GermanStemFilter.cs (original)
+++ incubator/lucene.net/trunk/src/contrib/Analyzers/De/GermanStemFilter.cs Tue Mar 27 16:42:00 2012
@@ -64,13 +64,14 @@ namespace Lucene.Net.Analysis.De
/// </summary>
/// <param name="_in"></param>
/// <param name="exclusiontable"></param>
- /// <param name="useDin2Stemmer">Specifies where to use the DIN-5007-2 (names)
- /// stemmer instead of the default DIN-5007-1 (words) stemmer</param>
- public GermanStemFilter(TokenStream _in, ISet<string> exclusiontable, bool useDin2Stemmer)
+ /// <param name="normalizeDin2">Specifies if the DIN-2007-2 style stemmer should be used in addition to DIN1. This
+ /// will cause words with 'ae', 'ue', or 'oe' in them (expanded umlauts) to be first converted to 'a', 'u', and 'o'
+ /// respectively, before the DIN1 stemmer is invoked.</param>
+ public GermanStemFilter(TokenStream _in, ISet<string> exclusiontable, bool normalizeDin2)
: base(_in)
{
exclusionSet = exclusiontable;
- stemmer = useDin2Stemmer ? new GermanDIN2Stemmer() : new GermanStemmer();
+ stemmer = normalizeDin2 ? new GermanDIN2Stemmer() : new GermanStemmer();
termAtt = AddAttribute<TermAttribute>();
}