You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by cc...@apache.org on 2012/03/27 18:42:00 UTC

svn commit: r1305897 - in /incubator/lucene.net/trunk/src/contrib/Analyzers/De: GermanAnalyzer.cs GermanStemFilter.cs

Author: ccurrens
Date: Tue Mar 27 16:42:00 2012
New Revision: 1305897

URL: http://svn.apache.org/viewvc?rev=1305897&view=rev
Log:
[LUCENENET-466] - Added XML comments for new constructors

Modified:
    incubator/lucene.net/trunk/src/contrib/Analyzers/De/GermanAnalyzer.cs
    incubator/lucene.net/trunk/src/contrib/Analyzers/De/GermanStemFilter.cs

Modified: incubator/lucene.net/trunk/src/contrib/Analyzers/De/GermanAnalyzer.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/src/contrib/Analyzers/De/GermanAnalyzer.cs?rev=1305897&r1=1305896&r2=1305897&view=diff
==============================================================================
--- incubator/lucene.net/trunk/src/contrib/Analyzers/De/GermanAnalyzer.cs (original)
+++ incubator/lucene.net/trunk/src/contrib/Analyzers/De/GermanAnalyzer.cs Tue Mar 27 16:42:00 2012
@@ -42,7 +42,6 @@ namespace Lucene.Net.Analysis.De
         /// <summary>
         /// List of typical german stopwords.
         /// </summary>
-        [Obsolete("Use GetDefaultStopSet() instead")]
         //TODO: make this private in 3.1
         private static readonly String[] GERMAN_STOP_WORDS = 
 		{
@@ -88,7 +87,7 @@ namespace Lucene.Net.Analysis.De
         private ISet<string> exclusionSet;
 
         private Version matchVersion;
-        private readonly bool _useDin2Stemmer;
+        private readonly bool _normalizeDin2;
 
         /// <summary>
         /// Builds an analyzer with the default stop words:
@@ -104,6 +103,7 @@ namespace Lucene.Net.Analysis.De
         /// Builds an analyzer with the default stop words:
         /// <see cref="GetDefaultStopSet"/>
         /// </summary>
+        /// <param name="matchVersion">Lucene compatibility version</param>
         public GermanAnalyzer(Version matchVersion)
             : this(matchVersion, DefaultSetHolder.DEFAULT_SET)
         { }
@@ -112,8 +112,12 @@ namespace Lucene.Net.Analysis.De
         /// Builds an analyzer with the default stop words:
         /// <see cref="GetDefaultStopSet"/>
         ///  </summary>
-        public GermanAnalyzer(Version matchVersion, bool useDin2Stemmer)
-            : this(matchVersion, DefaultSetHolder.DEFAULT_SET, useDin2Stemmer)
+        /// <param name="matchVersion">Lucene compatibility version</param>
+        /// <param name="normalizeDin2">Specifies if the DIN-2007-2 style stemmer should be used in addition to DIN1.  This
+        /// will cause words with 'ae', 'ue', or 'oe' in them (expanded umlauts) to be first converted to 'a', 'u', and 'o'
+        /// respectively, before the DIN1 stemmer is invoked.</param>
+        public GermanAnalyzer(Version matchVersion, bool normalizeDin2)
+            : this(matchVersion, DefaultSetHolder.DEFAULT_SET, normalizeDin2)
         { }
 
         /// <summary>
@@ -131,10 +135,11 @@ namespace Lucene.Net.Analysis.De
         /// </summary>
         /// <param name="matchVersion">Lucene compatibility version</param>
         /// <param name="stopwords">a stopword set</param>
-        /// <param name="useDin2Stemmer">Specifies if the DIN-2007-2 style stemmer should be used.  Commonly referred to as
-        /// phone book sorting, since it was defined to be used with names, rather than words</param>
-        public GermanAnalyzer(Version matchVersion, ISet<string> stopwords, bool useDin2Stemmer)
-            : this(matchVersion, stopwords, CharArraySet.EMPTY_SET, useDin2Stemmer)
+        /// <param name="normalizeDin2">Specifies if the DIN-2007-2 style stemmer should be used in addition to DIN1.  This
+        /// will cause words with 'ae', 'ue', or 'oe' in them (expanded umlauts) to be first converted to 'a', 'u', and 'o'
+        /// respectively, before the DIN1 stemmer is invoked.</param>
+        public GermanAnalyzer(Version matchVersion, ISet<string> stopwords, bool normalizeDin2)
+            : this(matchVersion, stopwords, CharArraySet.EMPTY_SET, normalizeDin2)
         {
         }
 
@@ -155,14 +160,15 @@ namespace Lucene.Net.Analysis.De
         /// <param name="matchVersion">lucene compatibility version</param>
         /// <param name="stopwords">a stopword set</param>
         /// <param name="stemExclusionSet">a stemming exclusion set</param>
-        /// <param name="useDin2Stemmer">Specifies if the DIN-2007-2 style stemmer should be used.  Commonly referred to as
-        /// phone book sorting, since it was defined to be used with names, rather than words</param>
-        public GermanAnalyzer(Version matchVersion, ISet<string> stopwords, ISet<string> stemExclusionSet, bool useDin2Stemmer)
+        /// <param name="normalizeDin2">Specifies if the DIN-2007-2 style stemmer should be used in addition to DIN1.  This
+        /// will cause words with 'ae', 'ue', or 'oe' in them (expanded umlauts) to be first converted to 'a', 'u', and 'o'
+        /// respectively, before the DIN1 stemmer is invoked.</param>
+        public GermanAnalyzer(Version matchVersion, ISet<string> stopwords, ISet<string> stemExclusionSet, bool normalizeDin2)
         {
             stopSet = CharArraySet.UnmodifiableSet(CharArraySet.Copy(stopwords));
             exclusionSet = CharArraySet.UnmodifiableSet(CharArraySet.Copy(stemExclusionSet));
             this.matchVersion = matchVersion;
-            _useDin2Stemmer = useDin2Stemmer;
+            _normalizeDin2 = normalizeDin2;
             SetOverridesTokenStreamMethod<GermanAnalyzer>();
         }
 
@@ -237,7 +243,7 @@ namespace Lucene.Net.Analysis.De
             result = new StandardFilter(result);
             result = new LowerCaseFilter(result);
             result = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion), result, stopSet);
-            result = new GermanStemFilter(result, exclusionSet, _useDin2Stemmer);
+            result = new GermanStemFilter(result, exclusionSet, _normalizeDin2);
             return result;
         }
     }

Modified: incubator/lucene.net/trunk/src/contrib/Analyzers/De/GermanStemFilter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/src/contrib/Analyzers/De/GermanStemFilter.cs?rev=1305897&r1=1305896&r2=1305897&view=diff
==============================================================================
--- incubator/lucene.net/trunk/src/contrib/Analyzers/De/GermanStemFilter.cs (original)
+++ incubator/lucene.net/trunk/src/contrib/Analyzers/De/GermanStemFilter.cs Tue Mar 27 16:42:00 2012
@@ -64,13 +64,14 @@ namespace Lucene.Net.Analysis.De
         /// </summary>
         /// <param name="_in"></param>
         /// <param name="exclusiontable"></param>
-        /// <param name="useDin2Stemmer">Specifies where to use the DIN-5007-2 (names) 
-        /// stemmer instead of the default DIN-5007-1 (words) stemmer</param>
-        public GermanStemFilter(TokenStream _in, ISet<string> exclusiontable, bool useDin2Stemmer)
+        /// <param name="normalizeDin2">Specifies if the DIN-2007-2 style stemmer should be used in addition to DIN1.  This
+        /// will cause words with 'ae', 'ue', or 'oe' in them (expanded umlauts) to be first converted to 'a', 'u', and 'o'
+        /// respectively, before the DIN1 stemmer is invoked.</param>
+        public GermanStemFilter(TokenStream _in, ISet<string> exclusiontable, bool normalizeDin2)
             : base(_in)
         {
             exclusionSet = exclusiontable;
-            stemmer = useDin2Stemmer ? new GermanDIN2Stemmer() : new GermanStemmer();
+            stemmer = normalizeDin2 ? new GermanDIN2Stemmer() : new GermanStemmer();
             termAtt = AddAttribute<TermAttribute>();
         }