You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by cc...@apache.org on 2011/11/21 05:44:59 UTC

[Lucene.Net] svn commit: r1204353 [5/9] - in /incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src: contrib/Analyzers/ contrib/Analyzers/AR/ contrib/Analyzers/BR/ contrib/Analyzers/CJK/ contrib/Analyzers/Cn/ contrib/Analyzers/Compound/ contrib/Analyzers/Compoun...

Modified: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Fr/FrenchStemmer.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Fr/FrenchStemmer.cs?rev=1204353&r1=1204352&r2=1204353&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Fr/FrenchStemmer.cs (original)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Fr/FrenchStemmer.cs Mon Nov 21 04:44:55 2011
@@ -24,783 +24,699 @@ using System.Text;
 
 namespace Lucene.Net.Analysis.Fr
 {
-	/* ====================================================================
-	 * The Apache Software License, Version 1.1
-	 *
-	 * Copyright (c) 2001 The Apache Software Foundation.  All rights
-	 * reserved.
-	 *
-	 * Redistribution and use in source and binary forms, with or without
-	 * modification, are permitted provided that the following conditions
-	 * are met:
-	 *
-	 * 1. Redistributions of source code must retain the above copyright
-	 *    notice, this list of conditions and the following disclaimer.
-	 *
-	 * 2. Redistributions in binary form must reproduce the above copyright
-	 *    notice, this list of conditions and the following disclaimer in
-	 *    the documentation and/or other materials provided with the
-	 *    distribution.
-	 *
-	 * 3. The end-user documentation included with the redistribution,
-	 *    if any, must include the following acknowledgment:
-	 *       "This product includes software developed by the
-	 *        Apache Software Foundation (http://www.apache.org/)."
-	 *    Alternately, this acknowledgment may appear in the software itself,
-	 *    if and wherever such third-party acknowledgments normally appear.
-	 *
-	 * 4. The names "Apache" and "Apache Software Foundation" and
-	 *    "Apache Lucene" must not be used to endorse or promote products
-	 *    derived from this software without prior written permission. For
-	 *    written permission, please contact apache@apache.org.
-	 *
-	 * 5. Products derived from this software may not be called "Apache",
-	 *    "Apache Lucene", nor may "Apache" appear in their name, without
-	 *    prior written permission of the Apache Software Foundation.
-	 *
-	 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
-	 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
-	 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-	 * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
-	 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-	 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-	 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
-	 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-	 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-	 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
-	 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-	 * SUCH DAMAGE.
-	 * ====================================================================
-	 *
-	 * This software consists of voluntary contributions made by many
-	 * individuals on behalf of the Apache Software Foundation.  For more
-	 * information on the Apache Software Foundation, please see
-	 * <http://www.apache.org/>.
-	 */
-
-	/// <summary>
-	/// A stemmer for French words. The algorithm is based on the work of
-	/// Dr Martin Porter on his snowball project<br/>
-	/// refer to http://snowball.sourceforge.net/french/stemmer.html<br/>
-	/// (French stemming algorithm) for details
-	/// 
-	/// <author>Patrick Talbot (based on Gerhard Schwarz work for German)</author>
-	/// <version>$Id: FrenchStemmer.java,v 1.2 2004/01/22 20:54:47 ehatcher Exp $</version>
-	/// </summary>
-	public class FrenchStemmer 
-	{
-
-		/// <summary>
-		/// Buffer for the terms while stemming them.
-		/// </summary>
-		private StringBuilder sb = new StringBuilder();
-
-		/// <summary>
-		/// A temporary buffer, used to reconstruct R2
-		/// </summary>
-		private StringBuilder tb = new StringBuilder();
-
-		/// <summary>
-		/// Region R0 is equal to the whole buffer
-		/// </summary>
-		private String R0;
-
-		/// <summary>
-		/// Region RV
-		/// "If the word begins with two vowels, RV is the region after the third letter,
-		/// otherwise the region after the first vowel not at the beginning of the word,
-		/// or the end of the word if these positions cannot be found."
-		/// </summary>
-		private String RV;
-
-		/// <summary>
-		/// Region R1
-		/// "R1 is the region after the first non-vowel following a vowel
-		/// or is the null region at the end of the word if there is no such non-vowel"
-		/// </summary>
-		private String R1;
-
-		/// <summary>
-		/// Region R2
-		/// "R2 is the region after the first non-vowel in R1 following a vowel
-		/// or is the null region at the end of the word if there is no such non-vowel"
-		/// </summary>
-		private String R2;
-
-
-		/// <summary>
-		/// Set to true if we need to perform step 2
-		/// </summary>
-		private bool suite;
-
-		/// <summary>
-		/// Set to true if the buffer was modified
-		/// </summary>
-		private bool modified;
-
-		/// <summary>
-		/// Stemms the given term to a unique <tt>discriminator</tt>.
-		/// </summary>
-		/// <param name="term">
-		/// java.langString The term that should be stemmed
-		/// </param>
-		/// <returns>
-		/// Discriminator for <tt>term</tt>
-		/// </returns>
-		protected internal String Stem( String term ) 
-		{
-			if ( !IsStemmable( term ) ) 
-			{
-				return term;
-			}
-
-			// Use lowercase for medium stemming.
-			term = term.ToLower();
-
-			// Reset the StringBuilder.
-			sb.Remove( 0, sb.Length );
-			sb.Append( term );
-
-			// reset the booleans
-			modified = false;
-			suite = false;
-
-			sb = TreatVowels( sb );
-
-			SetStrings();
-
-			Step1();
-
-			if (!modified || suite)
-			{
-				if (RV != null)
-				{
-					suite = Step2a();
-					if (!suite)
-						Step2b();
-				}
-			}
+    
+/**
+ * A stemmer for French words. 
+ * <p>
+ * The algorithm is based on the work of
+ * Dr Martin Porter on his snowball project<br>
+ * refer to http://snowball.sourceforge.net/french/stemmer.html<br>
+ * (French stemming algorithm) for details
+ * </p>
+ */
+
+public class FrenchStemmer {
+
+    /**
+     * Buffer for the terms while stemming them.
+     */
+    private StringBuilder sb = new StringBuilder();
+
+    /**
+     * A temporary buffer, used to reconstruct R2
+     */
+     private StringBuilder tb = new StringBuilder();
 
-			if (modified || suite)
-				Step3();
-			else
-				Step4();
+	/**
+	 * Region R0 is equal to the whole buffer
+	 */
+	private String R0;
 
-			Step5();
+	/**
+	 * Region RV
+	 * "If the word begins with two vowels, RV is the region after the third letter,
+	 * otherwise the region after the first vowel not at the beginning of the word,
+	 * or the end of the word if these positions cannot be found."
+	 */
+    private String RV;
 
-			Step6();
+	/**
+	 * Region R1
+	 * "R1 is the region after the first non-vowel following a vowel
+	 * or is the null region at the end of the word if there is no such non-vowel"
+	 */
+    private String R1;
 
-			return sb.ToString();
-		}
+	/**
+	 * Region R2
+	 * "R2 is the region after the first non-vowel in R1 following a vowel
+	 * or is the null region at the end of the word if there is no such non-vowel"
+	 */
+    private String R2;
 
-		/// <summary>
-		/// Sets the search region Strings<br/>
-		/// it needs to be done each time the buffer was modified
-		/// </summary>
-		private void SetStrings() 
-		{
-			// set the strings
-			R0 = sb.ToString();
-			RV = RetrieveRV( sb );
-			R1 = RetrieveR( sb );
-			if ( R1 != null )
-			{
-				tb.Remove( 0, tb.Length );
-				tb.Append( R1 );
-				R2 = RetrieveR( tb );
-			}
-			else
-				R2 = null;
-		}
 
-		/// <summary>
-		/// First step of the Porter Algorithmn<br/>
-		/// refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation
-		/// </summary>
-		private void Step1( ) 
-		{
-			String[] suffix = { "ances", "iqUes", "ismes", "ables", "istes", "ance", "iqUe", "isme", "able", "iste" };
-			DeleteFrom( R2, suffix );
+	/**
+	 * Set to true if we need to perform step 2
+	 */
+    private bool suite;
 
-			ReplaceFrom( R2, new String[] { "logies", "logie" }, "log" );
-			ReplaceFrom( R2, new String[] { "usions", "utions", "usion", "ution" }, "u" );
-			ReplaceFrom( R2, new String[] { "ences", "ence" }, "ent" );
+	/**
+	 * Set to true if the buffer was modified
+	 */
+    private bool modified;
 
-			String[] search = { "atrices", "ateurs", "ations", "atrice", "ateur", "ation"};
-			DeleteButSuffixFromElseReplace( R2, search, "ic",  true, R0, "iqU" );
 
-			DeleteButSuffixFromElseReplace( R2, new String[] { "ements", "ement" }, "eus", false, R0, "eux" );
-			DeleteButSuffixFrom( R2, new String[] { "ements", "ement" }, "ativ", false );
-			DeleteButSuffixFrom( R2, new String[] { "ements", "ement" }, "iv", false );
-			DeleteButSuffixFrom( R2, new String[] { "ements", "ement" }, "abl", false );
-			DeleteButSuffixFrom( R2, new String[] { "ements", "ement" }, "iqU", false );
+    /**
+     * Stems the given term to a unique <tt>discriminator</tt>.
+     *
+     * @param term  java.langString The term that should be stemmed
+     * @return java.lang.String  Discriminator for <tt>term</tt>
+     */
+    protected internal String Stem( String term ) {
+		if ( !IsStemmable( term ) ) {
+			return term;
+		}
 
-			DeleteFromIfTestVowelBeforeIn( R1, new String[] { "issements", "issement" }, false, R0 );
-			DeleteFrom( RV, new String[] { "ements", "ement" } );
+		// Use lowercase for medium stemming.
+		term = term.ToLower();
 
-			DeleteButSuffixFromElseReplace( R2, new String[] { "ités", "ité" }, "abil", false, R0, "abl" );
-			DeleteButSuffixFromElseReplace( R2, new String[] { "ités", "ité" }, "ic", false, R0, "iqU" );
-			DeleteButSuffixFrom( R2, new String[] { "ités", "ité" }, "iv", true );
+		// Reset the StringBuilder.
+		sb.Remove( 0, sb.Length );
+		sb.Insert( 0, term );
 
-			String[] autre = { "ifs", "ives", "if", "ive" };
-			DeleteButSuffixFromElseReplace( R2, autre, "icat", false, R0, "iqU" );
-			DeleteButSuffixFromElseReplace( R2, autre, "at", true, R2, "iqU" );
+		// reset the bools
+		modified = false;
+		suite = false;
 
-			ReplaceFrom( R0, new String[] { "eaux" }, "eau" );
+		sb = TreatVowels( sb );
 
-			ReplaceFrom( R1, new String[] { "aux" }, "al" );
+		SetStrings();
 
-			DeleteButSuffixFromElseReplace( R2, new String[] { "euses", "euse" }, "", true, R1, "eux" );
+		Step1();
 
-			DeleteFrom( R2, new String[] { "eux" } );
+		if (!modified || suite)
+		{
+			if (RV != null)
+			{
+				suite = Step2A();
+				if (!suite)
+					Step2B();
+			}
+		}
 
-			// if one of the next steps is performed, we will need to perform step2a
-			bool temp = false;
-			temp = ReplaceFrom( RV, new String[] { "amment" }, "ant" );
-			if (temp == true)
-				suite = true;
-			temp = ReplaceFrom( RV, new String[] { "emment" }, "ent" );
-			if (temp == true)
-				suite = true;
-			temp = DeleteFromIfTestVowelBeforeIn( RV, new String[] { "ments", "ment" }, true, RV );
-			if (temp == true)
-				suite = true;
+		if (modified || suite)
+			Step3();
+		else
+			Step4();
 
-		}
+		Step5();
 
-		/// <summary>
-		/// Second step (A) of the Porter Algorithmn<br/>
-		/// Will be performed if nothing changed from the first step
-		/// or changed were done in the amment, emment, ments or ment suffixes<br/>
-		/// refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation
-		/// </summary>
-		/// <returns>
-		/// true if something changed in the StringBuilder
-		/// </returns>
-		private bool Step2a() 
-		{
-			String[] search = { "îmes", "îtes", "iraIent", "irait", "irais", "irai", "iras", "ira",
-								  "irent", "iriez", "irez", "irions", "irons", "iront",
-								  "issaIent", "issais", "issantes", "issante", "issants", "issant",
-								  "issait", "issais", "issions", "issons", "issiez", "issez", "issent",
-								  "isses", "isse", "ir", "is", "ît", "it", "ies", "ie", "i" };
-			return DeleteFromIfTestVowelBeforeIn( RV, search, false, RV );
+		Step6();
+
+		return sb.ToString();
+    }
+
+	/**
+	 * Sets the search region Strings<br>
+	 * it needs to be done each time the buffer was modified
+	 */
+	private void SetStrings() {
+		// set the strings
+		R0 = sb.ToString();
+		RV = RetrieveRV( sb );
+		R1 = RetrieveR( sb );
+		if ( R1 != null )
+		{
+			tb.Remove( 0, tb.Length );
+			tb.Insert( 0, R1 );
+			R2 = RetrieveR( tb );
 		}
+		else
+			R2 = null;
+	}
 
-		/// <summary>
-		/// Second step (B) of the Porter Algorithmn<br/>
-		/// Will be performed if step 2 A was performed unsuccessfully<br/>
-		/// refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation
-		/// </summary>
-		private void Step2b() 
-		{
-			String[] suffix = { "eraIent", "erais", "erait", "erai", "eras", "erions", "eriez",
-								  "erons", "eront","erez", "èrent", "era", "ées", "iez",
-								  "ée", "és", "er", "ez", "é" };
-			DeleteFrom( RV, suffix );
+	/**
+	 * First step of the Porter Algorithm<br>
+	 * refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation
+	 */
+	private void Step1( ) {
+		String[] suffix = { "ances", "iqUes", "ismes", "ables", "istes", "ance", "iqUe", "isme", "able", "iste" };
+		DeleteFrom( R2, suffix );
+
+		ReplaceFrom( R2, new String[] { "logies", "logie" }, "log" );
+		ReplaceFrom( R2, new String[] { "usions", "utions", "usion", "ution" }, "u" );
+		ReplaceFrom( R2, new String[] { "ences", "ence" }, "ent" );
+
+		String[] search = { "atrices", "ateurs", "ations", "atrice", "ateur", "ation"};
+		DeleteButSuffixFromElseReplace( R2, search, "ic",  true, R0, "iqU" );
+
+		DeleteButSuffixFromElseReplace( R2, new String[] { "ements", "ement" }, "eus", false, R0, "eux" );
+		DeleteButSuffixFrom( R2, new String[] { "ements", "ement" }, "ativ", false );
+		DeleteButSuffixFrom( R2, new String[] { "ements", "ement" }, "iv", false );
+		DeleteButSuffixFrom( R2, new String[] { "ements", "ement" }, "abl", false );
+		DeleteButSuffixFrom( R2, new String[] { "ements", "ement" }, "iqU", false );
+
+		DeleteFromIfTestVowelBeforeIn( R1, new String[] { "issements", "issement" }, false, R0 );
+		DeleteFrom( RV, new String[] { "ements", "ement" } );
+
+		DeleteButSuffixFromElseReplace( R2, new String[] { "ités", "ité" }, "abil", false, R0, "abl" );
+		DeleteButSuffixFromElseReplace( R2, new String[] { "ités", "ité" }, "ic", false, R0, "iqU" );
+		DeleteButSuffixFrom( R2, new String[] { "ités", "ité" }, "iv", true );
+
+		String[] autre = { "ifs", "ives", "if", "ive" };
+		DeleteButSuffixFromElseReplace( R2, autre, "icat", false, R0, "iqU" );
+		DeleteButSuffixFromElseReplace( R2, autre, "at", true, R2, "iqU" );
+
+		ReplaceFrom( R0, new String[] { "eaux" }, "eau" );
+
+		ReplaceFrom( R1, new String[] { "aux" }, "al" );
+
+		DeleteButSuffixFromElseReplace( R2, new String[] { "euses", "euse" }, "", true, R1, "eux" );
+
+		DeleteFrom( R2, new String[] { "eux" } );
+
+		// if one of the next steps is performed, we will need to perform step2a
+		bool temp = false;
+		temp = ReplaceFrom( RV, new String[] { "amment" }, "ant" );
+		if (temp == true)
+			suite = true;
+		temp = ReplaceFrom( RV, new String[] { "emment" }, "ent" );
+		if (temp == true)
+			suite = true;
+		temp = DeleteFromIfTestVowelBeforeIn( RV, new String[] { "ments", "ment" }, true, RV );
+		if (temp == true)
+			suite = true;
 
-			String[] search = { "assions", "assiez", "assent", "asses", "asse", "aIent",
-								  "antes", "aIent", "Aient", "ante", "âmes", "âtes", "ants", "ant",
-								  "ait", "aît", "ais", "Ait", "Aît", "Ais", "ât", "as", "ai", "Ai", "a" };
-			DeleteButSuffixFrom( RV, search, "e", true );
+	}
 
-			DeleteFrom( R2, new String[] { "ions" } );
-		}
+	/**
+	 * Second step (A) of the Porter Algorithm<br>
+	 * Will be performed if nothing changed from the first step
+	 * or changed were done in the amment, emment, ments or ment suffixes<br>
+	 * refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation
+	 *
+	 * @return bool - true if something changed in the StringBuilder
+	 */
+	private bool Step2A() {
+		String[] search = { "îmes", "îtes", "iraIent", "irait", "irais", "irai", "iras", "ira",
+							"irent", "iriez", "irez", "irions", "irons", "iront",
+							"issaIent", "issais", "issantes", "issante", "issants", "issant",
+							"issait", "issais", "issions", "issons", "issiez", "issez", "issent",
+							"isses", "isse", "ir", "is", "ît", "it", "ies", "ie", "i" };
+		return DeleteFromIfTestVowelBeforeIn( RV, search, false, RV );
+	}
+
+	/**
+	 * Second step (B) of the Porter Algorithm<br>
+	 * Will be performed if step 2 A was performed unsuccessfully<br>
+	 * refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation
+	 */
+	private void Step2B() {
+		String[] suffix = { "eraIent", "erais", "erait", "erai", "eras", "erions", "eriez",
+							"erons", "eront","erez", "èrent", "era", "ées", "iez",
+							"ée", "és", "er", "ez", "é" };
+		DeleteFrom( RV, suffix );
+
+		String[] search = { "assions", "assiez", "assent", "asses", "asse", "aIent",
+							"antes", "aIent", "Aient", "ante", "âmes", "âtes", "ants", "ant",
+							"ait", "aît", "ais", "Ait", "Aît", "Ais", "ât", "as", "ai", "Ai", "a" };
+		DeleteButSuffixFrom( RV, search, "e", true );
+
+		DeleteFrom( R2, new String[] { "ions" } );
+	}
 
-		/// <summary>
-		/// Third step of the Porter Algorithmn<br/>
-		/// refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation
-		/// </summary>
-		private void Step3() 
+	/**
+	 * Third step of the Porter Algorithm<br>
+	 * refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation
+	 */
+	private void Step3() {
+		if (sb.Length>0)
 		{
-			if (sb.Length>0)
+			char ch = sb[ sb.Length -1];
+			if (ch == 'Y')
 			{
-				char ch = sb[ sb.Length-1];
-				if (ch == 'Y')
-				{
-					sb[ sb.Length-1] = 'i';
-					SetStrings();
-				}
-				else if (ch == 'ç')
-				{
-					sb[ sb.Length-1] = 'c';
-					SetStrings();
-				}
+				sb[sb.Length -1] = 'i' ;
+				SetStrings();
 			}
-		}
-
-		/// <summary>
-		/// Fourth step of the Porter Algorithmn<br/>
-		/// refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation
-		/// </summary>
-		private void Step4() 
-		{
-			if (sb.Length > 1)
+            else if (ch == 'ç')
 			{
-				char ch = sb[sb.Length-1];
-				if (ch == 's')
-				{
-					char b = sb[ sb.Length-2 ];
-					if (b != 'a' && b != 'i' && b != 'o' && b != 'u' && b != 'è' && b != 's')
-					{
-						sb.Remove( sb.Length - 1, 1);
-						SetStrings();
-					}
-				}
+				sb[sb.Length -1] = 'c';
+				SetStrings();
 			}
-			bool found = DeleteFromIfPrecededIn( R2, new String[] { "ion" }, RV, "s" );
-			if (!found)
-				found = DeleteFromIfPrecededIn( R2, new String[] { "ion" }, RV, "t" );
-
-			ReplaceFrom( RV, new String[] { "Ière", "ière", "Ier", "ier" }, "i" );
-			DeleteFrom( RV, new String[] { "e" } );
-			DeleteFromIfPrecededIn( RV, new String[] { "ë" }, R0, "gu" );
 		}
+	}
 
-		/// <summary>
-		/// Fifth step of the Porter Algorithmn<br/>
-		/// refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation
-		/// </summary>
-		private void Step5() 
+	/**
+	 * Fourth step of the Porter Algorithm<br>
+	 * refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation
+	 */
+	private void Step4() {
+		if (sb.Length > 1)
 		{
-			if (R0 != null)
+			char ch = sb[ sb.Length -1];
+			if (ch == 's')
 			{
-				if (R0.EndsWith("enn") || R0.EndsWith("onn") || R0.EndsWith("ett") || R0.EndsWith("ell") || R0.EndsWith("eill"))
+				char b = sb[ sb.Length -2];
+				if (b != 'a' && b != 'i' && b != 'o' && b != 'u' && b != 'è' && b != 's')
 				{
-					sb.Remove( sb.Length - 1, 1);
+					sb.Remove( sb.Length - 1, sb.Length);
 					SetStrings();
 				}
 			}
 		}
+		bool found = DeleteFromIfPrecededIn( R2, new String[] { "ion" }, RV, "s" );
+		if (!found)
+		found = DeleteFromIfPrecededIn( R2, new String[] { "ion" }, RV, "t" );
+
+		ReplaceFrom( RV, new String[] { "Ière", "ière", "Ier", "ier" }, "i" );
+		DeleteFrom( RV, new String[] { "e" } );
+		DeleteFromIfPrecededIn( RV, new String[] { "ë" }, R0, "gu" );
+	}
 
-		/// <summary>
-		/// Sixth (and last!) step of the Porter Algorithmn<br/>
-		/// refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation
-		/// </summary>
-		private void Step6() 
-		{
-			if (R0!=null && R0.Length>0)
-			{
-				bool seenVowel = false;
-				bool seenConson = false;
-				int pos = -1;
-				for (int i = R0.Length-1; i > -1; i--)
-				{
-					char ch = R0[i];
-					if (IsVowel(ch))
-					{
-						if (!seenVowel)
-						{
-							if (ch == 'é' || ch == 'è')
-							{
-								pos = i;
-								break;
-							}
-						}
-						seenVowel = true;
-					}
-					else
-					{
-						if (seenVowel)
-							break;
-						else
-							seenConson = true;
-					}
-				}
-				if (pos > -1 && seenConson && !seenVowel)
-					sb[pos] = 'e';
+	/**
+	 * Fifth step of the Porter Algorithm<br>
+	 * refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation
+	 */
+	private void Step5() {
+		if (R0 != null)
+		{
+			if (R0.EndsWith("enn") || R0.EndsWith("onn") || R0.EndsWith("ett") || R0.EndsWith("ell") || R0.EndsWith("eill"))
+			{
+				sb.Remove( sb.Length - 1, sb.Length );
+				SetStrings();
 			}
 		}
+	}
 
-		/// <summary>
-		/// Delete a suffix searched in zone "source" if zone "from" contains prefix + search string
-		/// </summary>
-		/// <param name="source">the primary source zone for search</param>
-		/// <param name="search">the strings to search for suppression</param>
-		/// <param name="from">the secondary source zone for search</param>
-		/// <param name="prefix">the prefix to add to the search string to test</param>
-		/// <returns>true if modified</returns>
-		private bool DeleteFromIfPrecededIn( String source, String[] search, String from, String prefix ) 
+	/**
+	 * Sixth (and last!) step of the Porter Algorithm<br>
+	 * refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation
+	 */
+	private void Step6() {
+		if (R0!=null && R0.Length>0)
 		{
-			bool found = false;
-			if (source!=null )
+			bool seenVowel = false;
+			bool seenConson = false;
+			int pos = -1;
+			for (int i = R0.Length-1; i > -1; i--)
 			{
-				for (int i = 0; i < search.Length; i++) 
+				char ch = R0[i] ;
+				if (IsVowel(ch))
 				{
-					if ( source.EndsWith( search[i] ))
+					if (!seenVowel)
 					{
-						if (from!=null && from.EndsWith( prefix + search[i] ))
+						if (ch == 'é' || ch == 'è')
 						{
-							sb.Remove( sb.Length - search[i].Length, search[i].Length);
-							found = true;
-							SetStrings();
+							pos = i;
 							break;
 						}
 					}
+					seenVowel = true;
 				}
-			}
-			return found;
-		}
-
-		/// <summary>
-		/// Delete a suffix searched in zone "source" if the preceding letter is (or isn't) a vowel
-		/// </summary>
-		/// <param name="source">the primary source zone for search</param>
-		/// <param name="search">the strings to search for suppression</param>
-		/// <param name="vowel">true if we need a vowel before the search string</param>
-		/// <param name="from">the secondary source zone for search (where vowel could be)</param>
-		/// <returns>true if modified</returns>
-		private bool DeleteFromIfTestVowelBeforeIn( String source, String[] search, bool vowel, String from ) 
-		{
-			bool found = false;
-			if (source!=null && from!=null)
-			{
-				for (int i = 0; i < search.Length; i++) 
+				else
 				{
-					if ( source.EndsWith( search[i] ))
-					{
-						if ((search[i].Length + 1) <= from.Length)
-						{
-							bool test = IsVowel(sb[sb.Length-(search[i].Length+1)]);
-							if (test == vowel)
-							{
-								sb.Remove( sb.Length - search[i].Length, search[i].Length);
-								modified = true;
-								found = true;
-								SetStrings();
-								break;
-							}
-						}
-					}
+					if (seenVowel)
+						break;
+					else
+						seenConson = true;
 				}
 			}
-			return found;
+			if (pos > -1 && seenConson && !seenVowel)
+				sb[pos] = 'e';
 		}
+	}
 
-		/// <summary>
-		/// Delete a suffix searched in zone "source" if preceded by the prefix
-		/// </summary>
-		/// <param name="source">the primary source zone for search</param>
-		/// <param name="search">the strings to search for suppression</param>
-		/// <param name="prefix">the prefix to add to the search string to test</param>
-		/// <param name="without">true if it will be deleted even without prefix found</param>
-		private void DeleteButSuffixFrom( String source, String[] search, String prefix, bool without ) 
+	/**
+	 * Delete a suffix searched in zone "source" if zone "from" contains prefix + search string
+	 *
+	 * @param source java.lang.String - the primary source zone for search
+	 * @param search java.lang.String[] - the strings to search for suppression
+	 * @param from java.lang.String - the secondary source zone for search
+	 * @param prefix java.lang.String - the prefix to add to the search string to test
+	 * @return bool - true if modified
+	 */
+	private bool DeleteFromIfPrecededIn( String source, String[] search, String from, String prefix ) {
+		bool found = false;
+		if (source!=null )
 		{
-			if (source!=null)
-			{
-				for (int i = 0; i < search.Length; i++) 
+			for (int i = 0; i < search.Length; i++) {
+				if ( source.EndsWith( search[i] ))
 				{
-					if ( source.EndsWith( prefix + search[i] ))
-					{
-						sb.Remove( sb.Length - (prefix.Length + search[i].Length), prefix.Length + search[i].Length);
-						modified = true;
-						SetStrings();
-						break;
-					}
-					else if ( without && source.EndsWith( search[i] ))
+					if (from!=null && from.EndsWith( prefix + search[i] ))
 					{
-						sb.Remove( sb.Length - search[i].Length, search[i].Length);
-						modified = true;
+						sb.Remove( sb.Length - search[i].Length, sb.Length);
+						found = true;
 						SetStrings();
 						break;
 					}
 				}
 			}
 		}
+		return found;
+	}
 
-		/// <summary>
-		/// Delete a suffix searched in zone "source" if preceded by prefix<br/>
-		/// or replace it with the replace string if preceded by the prefix in the zone "from"<br/>
-		/// or delete the suffix if specified
-		/// </summary>
-		/// <param name="source">the primary source zone for search</param>
-		/// <param name="search">the strings to search for suppression</param>
-		/// <param name="prefix">the prefix to add to the search string to test</param>
-		/// <param name="without">true if it will be deleted even without prefix found</param>
-		private void DeleteButSuffixFromElseReplace( String source, String[] search, String prefix, bool without, String from, String replace ) 
+	/**
+	 * Delete a suffix searched in zone "source" if the preceding letter is (or isn't) a vowel
+	 *
+	 * @param source java.lang.String - the primary source zone for search
+	 * @param search java.lang.String[] - the strings to search for suppression
+	 * @param vowel bool - true if we need a vowel before the search string
+	 * @param from java.lang.String - the secondary source zone for search (where vowel could be)
+	 * @return bool - true if modified
+	 */
+	private bool DeleteFromIfTestVowelBeforeIn( String source, String[] search, bool vowel, String from ) {
+		bool found = false;
+		if (source!=null && from!=null)
 		{
-			if (source!=null)
-			{
-				for (int i = 0; i < search.Length; i++) 
+			for (int i = 0; i < search.Length; i++) {
+				if ( source.EndsWith( search[i] ))
 				{
-					if ( source.EndsWith( prefix + search[i] ))
+					if ((search[i].Length + 1) <= from.Length)
 					{
-						sb.Remove( sb.Length - (prefix.Length + search[i].Length), prefix.Length + search[i].Length);
-						modified = true;
-						SetStrings();
-						break;
-					}
-					else if ( from!=null && from.EndsWith( prefix + search[i] ))
-					{
-						sb.Remove(sb.Length - (prefix.Length + search[i].Length), prefix.Length + search[i].Length);
-						sb.Append( replace );
-						modified = true;
-						SetStrings();
-						break;
-					}
-					else if ( without && source.EndsWith( search[i] ))
-					{
-						sb.Remove( sb.Length - search[i].Length, search[i].Length );
-						modified = true;
-						SetStrings();
-						break;
+						bool test = IsVowel(sb[sb.Length -(search[i].Length+1)]);
+						if (test == vowel)
+						{
+							sb.Remove( sb.Length - search[i].Length, sb.Length);
+							modified = true;
+							found = true;
+							SetStrings();
+							break;
+						}
 					}
 				}
 			}
 		}
+		return found;
+	}
 
-		/// <summary>
-		/// Replace a search string with another within the source zone
-		/// </summary>
-		/// <param name="source">the source zone for search</param>
-		/// <param name="search">the strings to search for replacement</param>
-		/// <param name="replace">the replacement string</param>
-		/// <returns></returns>
-		private bool ReplaceFrom( String source, String[] search, String replace ) 
+	/**
+	 * Delete a suffix searched in zone "source" if preceded by the prefix
+	 *
+	 * @param source java.lang.String - the primary source zone for search
+	 * @param search java.lang.String[] - the strings to search for suppression
+	 * @param prefix java.lang.String - the prefix to add to the search string to test
+	 * @param without bool - true if it will be deleted even without prefix found
+	 */
+	private void DeleteButSuffixFrom( String source, String[] search, String prefix, bool without ) {
+		if (source!=null)
 		{
-			bool found = false;
-			if (source!=null)
-			{
-				for (int i = 0; i < search.Length; i++) 
+			for (int i = 0; i < search.Length; i++) {
+				if ( source.EndsWith( prefix + search[i] ))
 				{
-					if ( source.EndsWith( search[i] ))
-					{
-						sb.Remove(sb.Length - search[i].Length, search[i].Length);
-						sb.Append( replace );
-						modified = true;
-						found = true;
-						SetStrings();
-						break;
-					}
+					sb.Remove( sb.Length - (prefix.Length + search[i].Length), sb.Length );
+					modified = true;
+					SetStrings();
+					break;
+				}
+				else if ( without && source.EndsWith( search[i] ))
+				{
+					sb.Remove( sb.Length - search[i].Length, sb.Length );
+					modified = true;
+					SetStrings();
+					break;
 				}
 			}
-			return found;
 		}
+	}
 
-		/// <summary>
-		/// Delete a search string within the source zone
-		/// </summary>
-		/// <param name="source">the source zone for search</param>
-		/// <param name="suffix">the strings to search for suppression</param>
-		private void DeleteFrom(String source, String[] suffix ) 
+	/**
+	 * Delete a suffix searched in zone "source" if preceded by prefix<br>
+	 * or replace it with the replace string if preceded by the prefix in the zone "from"<br>
+	 * or delete the suffix if specified
+	 *
+	 * @param source java.lang.String - the primary source zone for search
+	 * @param search java.lang.String[] - the strings to search for suppression
+	 * @param prefix java.lang.String - the prefix to add to the search string to test
+	 * @param without bool - true if it will be deleted even without prefix found
+	 */
+	private void DeleteButSuffixFromElseReplace( String source, String[] search, String prefix, bool without, String from, String replace ) {
+		if (source!=null)
 		{
-			if (source!=null)
-			{
-				for (int i = 0; i < suffix.Length; i++) 
+			for (int i = 0; i < search.Length; i++) {
+				if ( source.EndsWith( prefix + search[i] ))
 				{
-					if (source.EndsWith( suffix[i] ))
-					{
-						sb.Remove( sb.Length - suffix[i].Length, suffix[i].Length);
-						modified = true;
-						SetStrings();
-						break;
-					}
+					sb.Remove( sb.Length - (prefix.Length + search[i].Length), sb.Length );
+					modified = true;
+					SetStrings();
+					break;
+				}
+				else if ( from!=null && from.EndsWith( prefix + search[i] ))
+				{
+				    sb.Remove(sb.Length - (prefix.Length + search[i].Length), sb.Length);
+					sb.Insert(sb.Length - (prefix.Length + search[i].Length), replace );
+					modified = true;
+					SetStrings();
+					break;
+				}
+				else if ( without && source.EndsWith( search[i] ))
+				{
+					sb.Remove( sb.Length - search[i].Length, sb.Length );
+					modified = true;
+					SetStrings();
+					break;
 				}
 			}
 		}
+	}
 
-		/// <summary>
-		/// Test if a char is a french vowel, including accentuated ones
-		/// </summary>
-		/// <param name="ch">the char to test</param>
-		/// <returns>true if the char is a vowel</returns>
-		private bool IsVowel(char ch) 
-		{
-			switch (ch)
-			{
-				case 'a':
-				case 'e':
-				case 'i':
-				case 'o':
-				case 'u':
-				case 'y':
-				case 'â':
-				case 'à':
-				case 'ë':
-				case 'é':
-				case 'ê':
-				case 'è':
-				case 'ï':
-				case 'î':
-				case 'ô':
-				case 'ü':
-				case 'ù':
-				case 'û':
-					return true;
-				default:
-					return false;
+	/**
+	 * Replace a search string with another within the source zone
+	 *
+	 * @param source java.lang.String - the source zone for search
+	 * @param search java.lang.String[] - the strings to search for replacement
+	 * @param replace java.lang.String - the replacement string
+	 */
+	private bool ReplaceFrom( String source, String[] search, String replace ) {
+		bool found = false;
+		if (source!=null)
+		{
+			for (int i = 0; i < search.Length; i++) {
+				if ( source.EndsWith( search[i] ))
+				{
+				    sb.Remove(sb.Length - search[i].Length, sb.Length);
+				    sb.Insert(sb.Length - search[i].Length, replace );  // replacement for java's replace
+					modified = true;
+					found = true;
+					SetStrings();
+					break;
+				}
 			}
 		}
+		return found;
+	}
 
-		/// <summary>
-		/// Retrieve the "R zone" (1 or 2 depending on the buffer) and return the corresponding string<br/>
-		/// "R is the region after the first non-vowel following a vowel
-		/// or is the null region at the end of the word if there is no such non-vowel"<br/>
-		/// </summary>
-		/// <param name="buffer">the in buffer</param>
-		/// <returns>the resulting string</returns>
-		private String RetrieveR( StringBuilder buffer ) 
+	/**
+	 * Delete a search string within the source zone
+	 *
+	 * @param source the source zone for search
+	 * @param suffix the strings to search for suppression
+	 */
+	private void DeleteFrom(String source, String[] suffix ) {
+		if (source!=null)
 		{
-			int len = buffer.Length;
-			int pos = -1;
-			for (int c = 0; c < len; c++) 
-			{
-				if (IsVowel( buffer[c]))
+			for (int i = 0; i < suffix.Length; i++) {
+				if (source.EndsWith( suffix[i] ))
 				{
-					pos = c;
+					sb.Remove( sb.Length - suffix[i].Length, sb.Length);
+					modified = true;
+					SetStrings();
 					break;
 				}
 			}
-			if (pos > -1)
+		}
+	}
+
+	/**
+	 * Test if a char is a french vowel, including accentuated ones
+	 *
+	 * @param ch the char to test
+	 * @return bool - true if the char is a vowel
+	 */
+	private bool IsVowel(char ch) {
+		switch (ch)
+		{
+			case 'a':
+			case 'e':
+			case 'i':
+			case 'o':
+			case 'u':
+			case 'y':
+			case 'â':
+			case 'à':
+			case 'ë':
+			case 'é':
+			case 'ê':
+			case 'è':
+			case 'ï':
+			case 'î':
+			case 'ô':
+			case 'ü':
+			case 'ù':
+			case 'û':
+				return true;
+			default:
+				return false;
+		}
+	}
+
+	/**
+	 * Retrieve the "R zone" (1 or 2 depending on the buffer) and return the corresponding string<br>
+	 * "R is the region after the first non-vowel following a vowel
+	 * or is the null region at the end of the word if there is no such non-vowel"<br>
+	 * @param buffer java.lang.StringBuilder - the in buffer
+	 * @return java.lang.String - the resulting string
+	 */
+	private String RetrieveR( StringBuilder buffer ) {
+		int len = buffer.Length;
+		int pos = -1;
+		for (int c = 0; c < len; c++) {
+			if (IsVowel( buffer[ c ] ))
 			{
-				int consonne = -1;
-				for (int c = pos; c < len; c++) 
+				pos = c;
+				break;
+			}
+		}
+		if (pos > -1)
+		{
+			int consonne = -1;
+			for (int c = pos; c < len; c++) {
+				if (!IsVowel(buffer[ c ] ))
 				{
-					if (!IsVowel(buffer[c]))
-					{
-						consonne = c;
-						break;
-					}
+					consonne = c;
+					break;
 				}
-				if (consonne > -1 && (consonne+1) < len)
-					return buffer.ToString().Substring( consonne+1, len - (consonne+1) );
-				else
-					return null;
 			}
+			if (consonne > -1 && (consonne+1) < len)
+				return buffer.ToString().Substring( consonne+1, len );
 			else
 				return null;
 		}
+		else
+			return null;
+	}
 
-		/// <summary>
-		/// Retrieve the "RV zone" from a buffer an return the corresponding string<br/>
-		/// "If the word begins with two vowels, RV is the region after the third letter,
-		/// otherwise the region after the first vowel not at the beginning of the word,
-		/// or the end of the word if these positions cannot be found."<br/>
-		/// </summary>
-		/// <param name="buffer">the in buffer</param>
-		/// <returns>the resulting string</returns>
-		private String RetrieveRV( StringBuilder buffer ) 
+	/**
+	 * Retrieve the "RV zone" from a buffer an return the corresponding string<br>
+	 * "If the word begins with two vowels, RV is the region after the third letter,
+	 * otherwise the region after the first vowel not at the beginning of the word,
+	 * or the end of the word if these positions cannot be found."<br>
+	 * @param buffer java.lang.StringBuilder - the in buffer
+	 * @return java.lang.String - the resulting string
+	 */
+	private String RetrieveRV( StringBuilder buffer ) {
+		int len = buffer.Length;
+		if ( buffer.Length > 3)
 		{
-			int len = buffer.Length;
-			if ( buffer.Length > 3)
+			if ( IsVowel(buffer[ 0 ] ) && IsVowel(buffer[ 1 ] )) {
+				return buffer.ToString().Substring(3,len);
+			}
+			else
 			{
-				if ( IsVowel(buffer[0]) && IsVowel(buffer[1])) 
-				{
-					return buffer.ToString().Substring(3,len-3);
-				}
-				else
-				{
-					int pos = 0;
-					for (int c = 1; c < len; c++) 
+				int pos = 0;
+				for (int c = 1; c < len; c++) {
+					if (IsVowel( buffer[ c ] ))
 					{
-						if (IsVowel( buffer[c]))
-						{
-							pos = c;
-							break;
-						}
+						pos = c;
+						break;
 					}
-					if ( pos+1 < len )
-						return buffer.ToString().Substring( pos+1, len - (pos+1));
-					else
-						return null;
 				}
+				if ( pos+1 < len )
+					return buffer.ToString().Substring(pos+1, len );
+				else
+					return null;
 			}
-			else
-				return null;
 		}
+		else
+			return null;
+	}
 
 
-		/// <summary>
-		/// Turns u and i preceded AND followed by a vowel to UpperCase<br/>
-		/// Turns y preceded OR followed by a vowel to UpperCase<br/>
-		/// Turns u preceded by q to UpperCase<br/>
-		/// </summary>
-		/// <param name="buffer">the buffer to treat</param>
-		/// <returns>the treated buffer</returns>
-		private StringBuilder TreatVowels( StringBuilder buffer ) 
-		{
-			for ( int c = 0; c < buffer.Length; c++ ) 
-			{
-				char ch = buffer[c];
 
-				if (c == 0) // first char
+    /**
+	 * Turns u and i preceded AND followed by a vowel to UpperCase<br>
+	 * Turns y preceded OR followed by a vowel to UpperCase<br>
+	 * Turns u preceded by q to UpperCase<br>
+     *
+     * @param buffer java.util.StringBuilder - the buffer to treat
+     * @return java.util.StringBuilder - the treated buffer
+     */
+    private StringBuilder TreatVowels( StringBuilder buffer ) {
+		for ( int c = 0; c < buffer.Length; c++ ) {
+			char ch = buffer[ c ] ;
+
+			if (c == 0) // first char
+			{
+				if (buffer.Length>1)
 				{
-					if (buffer.Length>1)
-					{
-						if (ch == 'y' && IsVowel(buffer[ c + 1 ]))
-							buffer[c] = 'Y';
-					}
+					if (ch == 'y' && IsVowel(buffer[ c + 1 ] ))
+						buffer[c] = 'Y';
 				}
-				else if (c == buffer.Length-1) // last char
+			}
+			else if (c == buffer.Length-1) // last char
+			{
+				if (ch == 'u' && buffer[ c - 1 ] == 'q')
+					buffer[c] = 'U';
+				if (ch == 'y' && IsVowel(buffer[ c - 1 ] ))
+					buffer[c] = 'Y';
+			}
+			else // other cases
+			{
+				if (ch == 'u')
 				{
-					if (ch == 'u' && buffer[c - 1] == 'q')
+					if (buffer[ c - 1]  == 'q')
+						buffer[c] = 'U';
+					else if (IsVowel(buffer[ c - 1 ] ) && IsVowel(buffer[ c + 1 ] ))
 						buffer[c] = 'U';
-					if (ch == 'y' && IsVowel(buffer[ c - 1 ]))
-						buffer[c] = 'Y';
 				}
-				else // other cases
+				if (ch == 'i')
 				{
-					if (ch == 'u')
-					{
-						if (buffer[ c - 1] == 'q')
-							buffer[ c ] = 'U';
-						else if (IsVowel(buffer[c - 1]) && IsVowel(buffer[c + 1]))
-							buffer[c] = 'U';
-					}
-					if (ch == 'i')
-					{
-						if (IsVowel(buffer[c - 1]) && IsVowel(buffer[ c + 1 ]))
-							buffer[c] = 'I';
-					}
-					if (ch == 'y')
-					{
-						if (IsVowel(buffer[c - 1]) || IsVowel(buffer[c + 1]))
-							buffer[c] = 'Y';
-					}
+					if (IsVowel(buffer[ c - 1 ] ) && IsVowel(buffer[ c + 1 ] ))
+						buffer[c] = 'I';
+				}
+				if (ch == 'y')
+				{
+					if (IsVowel(buffer[ c - 1 ] ) || IsVowel(buffer[ c + 1 ] ))
+						buffer[c] = 'Y';
 				}
 			}
-
-			return buffer;
 		}
 
-		/// <summary>
-		/// Checks a term if it can be processed correctly.
-		/// </summary>
-		/// <returns>true if, and only if, the given term consists in letters.</returns>
-		private bool IsStemmable( String term ) 
-		{
-			bool upper = false;
-			int first = -1;
-			for ( int c = 0; c < term.Length; c++ ) 
-			{
-				// Discard terms that contain non-letter characters.
-				if ( !Char.IsLetter( term[c] ) ) 
-				{
+		return buffer;
+    }
+
+    /**
+     * Checks a term if it can be processed correctly.
+     *
+     * @return bool - true if, and only if, the given term consists in letters.
+     */
+    private bool IsStemmable( String term ) {
+		bool upper = false;
+		int first = -1;
+		for ( int c = 0; c < term.Length; c++ ) {
+			// Discard terms that contain non-letter chars.
+			if ( !char.IsLetter( term[c] ) ) {
+				return false;
+			}
+			// Discard terms that contain multiple uppercase letters.
+			if ( char.IsUpper( term[ c] ) ) {
+				if ( upper ) {
 					return false;
 				}
-				// Discard terms that contain multiple uppercase letters.
-				if ( Char.IsUpper( term[c] ) ) 
-				{
-					if ( upper ) 
-					{
-						return false;
-					}
-						// First encountered uppercase letter, set flag and save
-						// position.
-					else 
-					{
-						first = c;
-						upper = true;
-					}
+			// First encountered uppercase letter, set flag and save
+			// position.
+				else {
+					first = c;
+					upper = true;
 				}
 			}
-			// Discard the term if it contains a single uppercase letter that
-			// is not starting the term.
-			if ( first > 0 ) 
-			{
-				return false;
-			}
-			return true;
 		}
-	}
+		// Discard the term if it contains a single uppercase letter that
+		// is not starting the term.
+		if ( first > 0 ) {
+			return false;
+		}
+		return true;
+    }
+}
+
 }

Modified: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Miscellaneous/EmptyTokenStream.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Miscellaneous/EmptyTokenStream.cs?rev=1204353&r1=1204352&r2=1204353&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Miscellaneous/EmptyTokenStream.cs (original)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Miscellaneous/EmptyTokenStream.cs Mon Nov 21 04:44:55 2011
@@ -1,4 +1,4 @@
-/*
+/*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
@@ -20,12 +20,14 @@ using Lucene.Net.Analysis;
 
 namespace Lucene.Net.Analyzers.Miscellaneous
 {
-    public class EmptyTokenStream : TokenStream
+    /// <summary>
+    /// An always exhausted token stream
+    /// </summary>
+    public sealed class EmptyTokenStream : TokenStream
     {
-        [Obsolete("The new IncrementToken() and AttributeSource APIs should be used instead.")]
-        public override Token Next(Token reusableToken)
+        public sealed override bool IncrementToken()
         {
-            return null;
+            return false;
         }
     }
 }
\ No newline at end of file

Modified: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Miscellaneous/InjectablePrefixAwareTokenFilter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Miscellaneous/InjectablePrefixAwareTokenFilter.cs?rev=1204353&r1=1204352&r2=1204353&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Miscellaneous/InjectablePrefixAwareTokenFilter.cs (original)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Miscellaneous/InjectablePrefixAwareTokenFilter.cs Mon Nov 21 04:44:55 2011
@@ -1,4 +1,4 @@
-/*
+/*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.

Added: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Miscellaneous/PatternAnalyzer.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Miscellaneous/PatternAnalyzer.cs?rev=1204353&view=auto
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Miscellaneous/PatternAnalyzer.cs (added)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Miscellaneous/PatternAnalyzer.cs Mon Nov 21 04:44:55 2011
@@ -0,0 +1,510 @@
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+using System.Text;
+using System.Text.RegularExpressions;
+using Lucene.Net.Analysis;
+using Lucene.Net.Analysis.Tokenattributes;
+using Version = Lucene.Net.Util.Version;
+
+namespace Lucene.Net.Analyzers.Miscellaneous
+{
+    /**
+     * Efficient Lucene analyzer/tokenizer that preferably operates on a String rather than a
+     * {@link java.io.Reader}, that can flexibly separate text into terms via a regular expression {@link Regex}
+     * (with behaviour identical to {@link String#split(String)}),
+     * and that combines the functionality of
+     * {@link org.apache.lucene.analysis.LetterTokenizer},
+     * {@link org.apache.lucene.analysis.LowerCaseTokenizer},
+     * {@link org.apache.lucene.analysis.WhitespaceTokenizer},
+     * {@link org.apache.lucene.analysis.StopFilter} into a single efficient
+     * multi-purpose class.
+     * <p>
+     * If you are unsure how exactly a regular expression should look like, consider 
+     * prototyping by simply trying various expressions on some test texts via
+     * {@link String#split(String)}. Once you are satisfied, give that regex to 
+     * RegexAnalyzer. Also see <a target="_blank" 
+     * href="http://java.sun.com/docs/books/tutorial/extra/regex/">Java Regular Expression Tutorial</a>.
+     * <p>
+     * This class can be considerably faster than the "normal" Lucene tokenizers. 
+     * It can also serve as a building block in a compound Lucene
+     * {@link org.apache.lucene.analysis.TokenFilter} chain. For example as in this 
+     * stemming example:
+     * <pre>
+     * RegexAnalyzer pat = ...
+     * TokenStream tokenStream = new SnowballFilter(
+     *     pat.tokenStream("content", "James is running round in the woods"), 
+     *     "English"));
+     * </pre>
+     *
+     */
+    public class RegexAnalyzer : Analyzer
+    {
+
+        /** <code>"\\W+"</code>; Divides text at non-letters (NOT char.IsLetter(c)) */
+        public static readonly Regex NON_WORD_Regex = new Regex("\\W+", RegexOptions.Compiled);
+
+        /** <code>"\\s+"</code>; Divides text at whitespaces (char.IsWhitespace(c)) */
+        public static readonly Regex WHITESPACE_Regex = new Regex("\\s+", RegexOptions.Compiled);
+
+        private static readonly CharArraySet EXTENDED_ENGLISH_STOP_WORDS =
+          CharArraySet.UnmodifiableSet(new CharArraySet(new[]{
+      "a", "about", "above", "across", "adj", "after", "afterwards",
+      "again", "against", "albeit", "all", "almost", "alone", "along",
+      "already", "also", "although", "always", "among", "amongst", "an",
+      "and", "another", "any", "anyhow", "anyone", "anything",
+      "anywhere", "are", "around", "as", "at", "be", "became", "because",
+      "become", "becomes", "becoming", "been", "before", "beforehand",
+      "behind", "being", "below", "beside", "besides", "between",
+      "beyond", "both", "but", "by", "can", "cannot", "co", "could",
+      "down", "during", "each", "eg", "either", "else", "elsewhere",
+      "enough", "etc", "even", "ever", "every", "everyone", "everything",
+      "everywhere", "except", "few", "first", "for", "former",
+      "formerly", "from", "further", "had", "has", "have", "he", "hence",
+      "her", "here", "hereafter", "hereby", "herein", "hereupon", "hers",
+      "herself", "him", "himself", "his", "how", "however", "i", "ie", "if",
+      "in", "inc", "indeed", "into", "is", "it", "its", "itself", "last",
+      "latter", "latterly", "least", "less", "ltd", "many", "may", "me",
+      "meanwhile", "might", "more", "moreover", "most", "mostly", "much",
+      "must", "my", "myself", "namely", "neither", "never",
+      "nevertheless", "next", "no", "nobody", "none", "noone", "nor",
+      "not", "nothing", "now", "nowhere", "of", "off", "often", "on",
+      "once one", "only", "onto", "or", "other", "others", "otherwise",
+      "our", "ours", "ourselves", "out", "over", "own", "per", "perhaps",
+      "rather", "s", "same", "seem", "seemed", "seeming", "seems",
+      "several", "she", "should", "since", "so", "some", "somehow",
+      "someone", "something", "sometime", "sometimes", "somewhere",
+      "still", "such", "t", "than", "that", "the", "their", "them",
+      "themselves", "then", "thence", "there", "thereafter", "thereby",
+      "therefor", "therein", "thereupon", "these", "they", "this",
+      "those", "though", "through", "throughout", "thru", "thus", "to",
+      "together", "too", "toward", "towards", "under", "until", "up",
+      "upon", "us", "very", "via", "was", "we", "well", "were", "what",
+      "whatever", "whatsoever", "when", "whence", "whenever",
+      "whensoever", "where", "whereafter", "whereas", "whereat",
+      "whereby", "wherefrom", "wherein", "whereinto", "whereof",
+      "whereon", "whereto", "whereunto", "whereupon", "wherever",
+      "wherewith", "whether", "which", "whichever", "whichsoever",
+      "while", "whilst", "whither", "who", "whoever", "whole", "whom",
+      "whomever", "whomsoever", "whose", "whosoever", "why", "will",
+      "with", "within", "without", "would", "xsubj", "xcal", "xauthor",
+      "xother ", "xnote", "yet", "you", "your", "yours", "yourself",
+      "yourselves"
+    }, true));
+
+        /**
+         * A lower-casing word analyzer with English stop words (can be shared
+         * freely across threads without harm); global per class loader.
+         */
+        public static readonly RegexAnalyzer DEFAULT_ANALYZER = new RegexAnalyzer(
+          Version.LUCENE_CURRENT, NON_WORD_Regex, true, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
+
+        /**
+         * A lower-casing word analyzer with <b>extended </b> English stop words
+         * (can be shared freely across threads without harm); global per class
+         * loader. The stop words are borrowed from
+         * http://thomas.loc.gov/home/stopwords.html, see
+         * http://thomas.loc.gov/home/all.about.inquery.html
+         */
+        public static readonly RegexAnalyzer EXTENDED_ANALYZER = new RegexAnalyzer(
+          Version.LUCENE_CURRENT, NON_WORD_Regex, true, EXTENDED_ENGLISH_STOP_WORDS);
+
+        private readonly Regex Regex;
+        private readonly bool toLowerCase;
+        private readonly ISet<string> stopWords;
+
+        private readonly Version matchVersion;
+
+        /**
+         * Constructs a new instance with the given parameters.
+         * 
+         * @param matchVersion If >= {@link Version#LUCENE_29}, StopFilter.enablePositionIncrement is set to true
+         * @param Regex
+         *            a regular expression delimiting tokens
+         * @param toLowerCase
+         *            if <code>true</code> returns tokens after applying
+         *            String.toLowerCase()
+         * @param stopWords
+         *            if non-null, ignores all tokens that are contained in the
+         *            given stop set (after previously having applied toLowerCase()
+         *            if applicable). For example, created via
+         *            {@link StopFilter#makeStopSet(String[])}and/or
+         *            {@link org.apache.lucene.analysis.WordlistLoader}as in
+         *            <code>WordlistLoader.getWordSet(new File("samples/fulltext/stopwords.txt")</code>
+         *            or <a href="http://www.unine.ch/info/clef/">other stop words
+         *            lists </a>.
+         */
+        public RegexAnalyzer(Version matchVersion, Regex Regex, bool toLowerCase, ISet<string> stopWords)
+        {
+            if (Regex == null)
+                throw new ArgumentException("Regex must not be null");
+
+            if (EqRegex(NON_WORD_Regex, Regex)) Regex = NON_WORD_Regex;
+            else if (EqRegex(WHITESPACE_Regex, Regex)) Regex = WHITESPACE_Regex;
+
+            if (stopWords != null && stopWords.Count == 0) stopWords = null;
+
+            this.Regex = Regex;
+            this.toLowerCase = toLowerCase;
+            this.stopWords = stopWords;
+            this.matchVersion = matchVersion;
+        }
+
+        /**
+         * Creates a token stream that tokenizes the given string into token terms
+         * (aka words).
+         * 
+         * @param fieldName
+         *            the name of the field to tokenize (currently ignored).
+         * @param text
+         *            the string to tokenize
+         * @return a new token stream
+         */
+        public TokenStream TokenStream(String fieldName, String text)
+        {
+            // Ideally the Analyzer superclass should have a method with the same signature, 
+            // with a default impl that simply delegates to the StringReader flavour. 
+            if (text == null)
+                throw new ArgumentException("text must not be null");
+
+            TokenStream stream;
+            if (Regex == NON_WORD_Regex)
+            { // fast path
+                stream = new FastStringTokenizer(text, true, toLowerCase, stopWords);
+            }
+            else if (Regex == WHITESPACE_Regex)
+            { // fast path
+                stream = new FastStringTokenizer(text, false, toLowerCase, stopWords);
+            }
+            else
+            {
+                stream = new RegexTokenizer(text, Regex, toLowerCase);
+                if (stopWords != null) stream = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion), stream, stopWords);
+            }
+
+            return stream;
+        }
+
+        /**
+         * Creates a token stream that tokenizes all the text in the given Reader;
+         * This implementation forwards to <code>tokenStream(String, String)</code> and is
+         * less efficient than <code>tokenStream(String, String)</code>.
+         * 
+         * @param fieldName
+         *            the name of the field to tokenize (currently ignored).
+         * @param reader
+         *            the reader delivering the text
+         * @return a new token stream
+         */
+        public override TokenStream TokenStream(String fieldName, TextReader reader)
+        {
+            if (reader is FastStringReader)
+            { // fast path
+                return TokenStream(fieldName, ((FastStringReader)reader).GetString());
+            }
+
+            try
+            {
+                String text = ToString(reader);
+                return TokenStream(fieldName, text);
+            }
+            catch (IOException e)
+            {
+                throw new Exception("Wrapped Exception", e);
+            }
+        }
+
+        /**
+         * Indicates whether some other object is "equal to" this one.
+         * 
+         * @param other
+         *            the reference object with which to compare.
+         * @return true if equal, false otherwise
+         */
+        public override bool Equals(Object other)
+        {
+            if (this == other) return true;
+            if (this == DEFAULT_ANALYZER && other == EXTENDED_ANALYZER) return false;
+            if (other == DEFAULT_ANALYZER && this == EXTENDED_ANALYZER) return false;
+
+            if (other is RegexAnalyzer)
+            {
+                RegexAnalyzer p2 = (RegexAnalyzer)other;
+                return
+                  toLowerCase == p2.toLowerCase &&
+                  EqRegex(Regex, p2.Regex) &&
+                  Eq(stopWords, p2.stopWords);
+            }
+            return false;
+        }
+
+        /**
+         * Returns a hash code value for the object.
+         * 
+         * @return the hash code.
+         */
+        public override int GetHashCode()
+        {
+            if (this == DEFAULT_ANALYZER) return -1218418418; // fast path
+            if (this == EXTENDED_ANALYZER) return 1303507063; // fast path
+
+            int h = 1;
+            h = 31 * h + Regex.GetHashCode();
+            h = 31 * h + (int)Regex.Options;
+            h = 31 * h + (toLowerCase ? 1231 : 1237);
+            h = 31 * h + (stopWords != null ? stopWords.GetHashCode() : 0);
+            return h;
+        }
+
+        /** equality where o1 and/or o2 can be null */
+        private static bool Eq(Object o1, Object o2)
+        {
+            return (o1 == o2) || (o1 != null ? o1.Equals(o2) : false);
+        }
+
+        /** assumes p1 and p2 are not null */
+        private static bool EqRegex(Regex p1, Regex p2)
+        {
+            return p1 == p2 || (p1.Options == p2.Options && p1.ToString() == p2.ToString());
+        }
+
+        /**
+         * Reads until end-of-stream and returns all read chars, finally closes the stream.
+         * 
+         * @param input the input stream
+         * @throws IOException if an I/O error occurs while reading the stream
+         */
+        private static String ToString(TextReader input)
+        {
+            try
+            {
+                int len = 256;
+                char[] buffer = new char[len];
+                char[] output = new char[len];
+
+                len = 0;
+                int n;
+                while ((n = input.Read(buffer, 0, buffer.Length)) >= 0)
+                {
+                    if (len + n > output.Length)
+                    { // grow capacity
+                        char[] tmp = new char[Math.Max(output.Length << 1, len + n)];
+                        Array.Copy(output, 0, tmp, 0, len);
+                        Array.Copy(buffer, 0, tmp, len, n);
+                        buffer = output; // use larger buffer for future larger bulk reads
+                        output = tmp;
+                    }
+                    else
+                    {
+                        Array.Copy(buffer, 0, output, len, n);
+                    }
+                    len += n;
+                }
+
+                return new String(output, 0, len);
+            }
+            finally
+            {
+                if (input != null) input.Dispose();
+            }
+        }
+
+
+        ///////////////////////////////////////////////////////////////////////////////
+        // Nested classes:
+        ///////////////////////////////////////////////////////////////////////////////
+        /**
+         * The work horse; performance isn't fantastic, but it's not nearly as bad
+         * as one might think - kudos to the Sun regex developers.
+         */
+        private sealed class RegexTokenizer : TokenStream
+        {
+
+            private readonly String str;
+            private readonly bool toLowerCase;
+            private Match matcher;
+            private int pos = 0;
+            private static readonly System.Globalization.CultureInfo locale = System.Globalization.CultureInfo.CurrentCulture;
+            private TermAttribute termAtt;
+            private OffsetAttribute offsetAtt;
+
+            public RegexTokenizer(String str, Regex regex, bool toLowerCase)
+            {
+                this.str = str;
+                this.matcher = regex.Match(str);
+                this.toLowerCase = toLowerCase;
+                this.termAtt = AddAttribute<TermAttribute>();
+                this.offsetAtt = AddAttribute<OffsetAttribute>();
+            }
+
+            public sealed override bool IncrementToken()
+            {
+                if (matcher == null) return false;
+                ClearAttributes();
+                while (true)
+                { // loop takes care of leading and trailing boundary cases
+                    int start = pos;
+                    int end;
+                    bool isMatch = matcher.Success;
+                    if (isMatch)
+                    {
+                        end = matcher.Index;
+                        pos = matcher.Index + matcher.Length;
+                    }
+                    else
+                    {
+                        end = str.Length;
+                        matcher = null; // we're finished
+                    }
+
+                    if (start != end)
+                    { // non-empty match (header/trailer)
+                        String text = str.Substring(start, end);
+                        if (toLowerCase) text = text.ToLower(locale);
+                        termAtt.SetTermBuffer(text);
+                        offsetAtt.SetOffset(start, end);
+                        return true;
+                    }
+                    if (!isMatch) return false;
+                    matcher = matcher.NextMatch();
+                }
+            }
+
+            public override sealed void End()
+            {
+                // set final offset
+                int finalOffset = str.Length;
+                this.offsetAtt.SetOffset(finalOffset, finalOffset);
+            }
+        }
+
+
+        ///////////////////////////////////////////////////////////////////////////////
+        // Nested classes:
+        ///////////////////////////////////////////////////////////////////////////////
+        /**
+         * Special-case class for best performance in common cases; this class is
+         * otherwise unnecessary.
+         */
+        private sealed class FastStringTokenizer : TokenStream
+        {
+
+            private readonly String str;
+            private int pos;
+            private readonly bool isLetter;
+            private readonly bool toLowerCase;
+            private readonly ISet<string> stopWords;
+            private static readonly System.Globalization.CultureInfo locale = System.Globalization.CultureInfo.CurrentCulture;
+            private TermAttribute termAtt;
+            private OffsetAttribute offsetAtt;
+
+            public FastStringTokenizer(String str, bool isLetter, bool toLowerCase, ISet<string> stopWords)
+            {
+                this.str = str;
+                this.isLetter = isLetter;
+                this.toLowerCase = toLowerCase;
+                this.stopWords = stopWords;
+                this.termAtt = AddAttribute<TermAttribute>();
+                this.offsetAtt = AddAttribute<OffsetAttribute>();
+            }
+
+            public override bool IncrementToken()
+            {
+                ClearAttributes();
+                // cache loop instance vars (performance)
+                String s = str;
+                int len = s.Length;
+                int i = pos;
+                bool letter = isLetter;
+
+                int start = 0;
+                String text;
+                do
+                {
+                    // find beginning of token
+                    text = null;
+                    while (i < len && !IsTokenChar(s[i], letter))
+                    {
+                        i++;
+                    }
+
+                    if (i < len)
+                    { // found beginning; now find end of token
+                        start = i;
+                        while (i < len && IsTokenChar(s[i], letter))
+                        {
+                            i++;
+                        }
+
+                        text = s.Substring(start, i);
+                        if (toLowerCase) text = text.ToLower(locale);
+                        //          if (toLowerCase) {            
+                        ////            use next line once JDK 1.5 String.toLowerCase() performance regression is fixed
+                        ////            see http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6265809
+                        //            text = s.substring(start, i).toLowerCase(); 
+                        ////            char[] chars = new char[i-start];
+                        ////            for (int j=start; j < i; j++) chars[j-start] = char.toLowerCase(s[j] );
+                        ////            text = new String(chars);
+                        //          } else {
+                        //            text = s.substring(start, i);
+                        //          }
+                    }
+                } while (text != null && IsStopWord(text));
+
+                pos = i;
+                if (text == null)
+                {
+                    return false;
+                }
+                termAtt.SetTermBuffer(text);
+                offsetAtt.SetOffset(start, i);
+                return true;
+            }
+
+            public override sealed void End()
+            {
+                // set final offset
+                int finalOffset = str.Length;
+                this.offsetAtt.SetOffset(finalOffset, finalOffset);
+            }
+
+            private bool IsTokenChar(char c, bool isLetter)
+            {
+                return isLetter ? char.IsLetter(c) : !char.IsWhiteSpace(c);
+            }
+
+            private bool IsStopWord(string text)
+            {
+                return stopWords != null && stopWords.Contains(text);
+            }
+
+        }
+
+
+        ///////////////////////////////////////////////////////////////////////////////
+        // Nested classes:
+        ///////////////////////////////////////////////////////////////////////////////
+        /**
+         * A StringReader that exposes it's contained string for fast direct access.
+         * Might make sense to generalize this to CharSequence and make it public?
+         */
+        sealed class FastStringReader : StringReader
+        {
+
+            private readonly string s;
+
+            FastStringReader(string s)
+                : base(s)
+            {
+                this.s = s;
+            }
+
+            internal string GetString()
+            {
+                return s;
+            }
+        }
+
+    }
+}
\ No newline at end of file

Modified: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Miscellaneous/PrefixAndSuffixAwareTokenFilter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Miscellaneous/PrefixAndSuffixAwareTokenFilter.cs?rev=1204353&r1=1204352&r2=1204353&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Miscellaneous/PrefixAndSuffixAwareTokenFilter.cs (original)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Miscellaneous/PrefixAndSuffixAwareTokenFilter.cs Mon Nov 21 04:44:55 2011
@@ -64,26 +64,6 @@ namespace Lucene.Net.Analyzers.Miscellan
             return _suffix.IncrementToken();
         }
 
-        /// <summary>
-        /// @deprecated Will be removed in Lucene 3.0. This method is final, as it should not be overridden. Delegates to the backwards compatibility layer. 
-        /// </summary>
-        /// <param name="reusableToken"></param>
-        /// <returns></returns>
-        [Obsolete("The new IncrementToken() and AttributeSource APIs should be used instead.")]
-        public override sealed Token Next(Token reusableToken)
-        {
-            return base.Next(reusableToken);
-        }
-
-        /// <summary>
-        /// @deprecated Will be removed in Lucene 3.0. This method is final, as it should not be overridden. Delegates to the backwards compatibility layer. 
-        /// </summary>
-        [Obsolete("The returned Token is a \"full private copy\" (not re-used across calls to Next()) but will be slower than calling Next(Token) or using the new IncrementToken() method with the new AttributeSource API.")]
-        public override sealed Token Next()
-        {
-            return base.Next();
-        }
-
         public override void Reset()
         {
             _suffix.Reset();

Modified: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Miscellaneous/PrefixAwareTokenStream.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Miscellaneous/PrefixAwareTokenStream.cs?rev=1204353&r1=1204352&r2=1204353&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Miscellaneous/PrefixAwareTokenStream.cs (original)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Miscellaneous/PrefixAwareTokenStream.cs Mon Nov 21 04:44:55 2011
@@ -60,20 +60,20 @@ namespace Lucene.Net.Analyzers.Miscellan
             _prefixExhausted = false;
 
             // ReSharper disable DoNotCallOverridableMethodsInConstructor
-            _termAtt = (TermAttribute) AddAttribute(typeof (TermAttribute));
-            _posIncrAtt = (PositionIncrementAttribute) AddAttribute(typeof (PositionIncrementAttribute));
-            _payloadAtt = (PayloadAttribute) AddAttribute(typeof (PayloadAttribute));
-            _offsetAtt = (OffsetAttribute) AddAttribute(typeof (OffsetAttribute));
-            _typeAtt = (TypeAttribute) AddAttribute(typeof (TypeAttribute));
-            _flagsAtt = (FlagsAttribute) AddAttribute(typeof (FlagsAttribute));
+            _termAtt = AddAttribute<TermAttribute>();
+            _posIncrAtt = AddAttribute<PositionIncrementAttribute>();
+            _payloadAtt = AddAttribute<PayloadAttribute>();
+            _offsetAtt = AddAttribute<OffsetAttribute>();
+            _typeAtt = AddAttribute<TypeAttribute>();
+            _flagsAtt = AddAttribute<FlagsAttribute>();
             // ReSharper restore DoNotCallOverridableMethodsInConstructor
 
-            _pTermAtt = (TermAttribute) prefix.AddAttribute(typeof (TermAttribute));
-            _pPosIncrAtt = (PositionIncrementAttribute) prefix.AddAttribute(typeof (PositionIncrementAttribute));
-            _pPayloadAtt = (PayloadAttribute) prefix.AddAttribute(typeof (PayloadAttribute));
-            _pOffsetAtt = (OffsetAttribute) prefix.AddAttribute(typeof (OffsetAttribute));
-            _pTypeAtt = (TypeAttribute) prefix.AddAttribute(typeof (TypeAttribute));
-            _pFlagsAtt = (FlagsAttribute) prefix.AddAttribute(typeof (FlagsAttribute));
+            _pTermAtt = prefix.AddAttribute<TermAttribute>();
+            _pPosIncrAtt = prefix.AddAttribute<PositionIncrementAttribute>();
+            _pPayloadAtt = prefix.AddAttribute<PayloadAttribute>();
+            _pOffsetAtt = prefix.AddAttribute<OffsetAttribute>();
+            _pTypeAtt = prefix.AddAttribute<TypeAttribute>();
+            _pFlagsAtt = prefix.AddAttribute<FlagsAttribute>();
         }
 
         public TokenStream Prefix { get; set; }
@@ -114,27 +114,6 @@ namespace Lucene.Net.Analyzers.Miscellan
             return true;
         }
 
-        /// <summary>
-        /// @deprecated Will be removed in Lucene 3.0. This method is final, as it should not be overridden. Delegates to the backwards compatibility layer.
-        /// </summary>
-        /// <param name="reusableToken"></param>
-        /// <returns></returns>
-        [Obsolete("The new IncrementToken() and AttributeSource APIs should be used instead.")]
-        public override sealed Token Next(Token reusableToken)
-        {
-            return base.Next(reusableToken);
-        }
-
-        /// <summary>
-        /// @deprecated Will be removed in Lucene 3.0. This method is final, as it should not be overridden. Delegates to the backwards compatibility layer.
-        /// </summary>
-        /// <returns></returns>
-        [Obsolete("The returned Token is a \"full private copy\" (not re-used across calls to Next()) but will be slower than calling Next(Token) or using the new IncrementToken() method with the new AttributeSource API.")]
-        public override sealed Token Next()
-        {
-            return base.Next();
-        }
-
         private void SetCurrentToken(Token token)
         {
             if (token == null) return;

Modified: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Miscellaneous/SingleTokenTokenStream.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Miscellaneous/SingleTokenTokenStream.cs?rev=1204353&r1=1204352&r2=1204353&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Miscellaneous/SingleTokenTokenStream.cs (original)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Miscellaneous/SingleTokenTokenStream.cs Mon Nov 21 04:44:55 2011
@@ -1,4 +1,4 @@
-/*
+/*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
@@ -26,26 +26,23 @@ namespace Lucene.Net.Analyzers.Miscellan
     /// <summary>
     /// A TokenStream containing a single token.
     /// </summary>
-    public class SingleTokenTokenStream : TokenStream
+    public sealed class SingleTokenTokenStream : TokenStream
     {
-        private readonly AttributeImpl _tokenAtt;
         private bool _exhausted;
 
         // The token needs to be immutable, so work with clones!
         private Token _singleToken;
+        private readonly AttributeImpl _tokenAtt;
 
         public SingleTokenTokenStream(Token token)
+            : base(Token.TOKEN_ATTRIBUTE_FACTORY)
         {
             Debug.Assert(token != null, "Token was null!");
             _singleToken = (Token) token.Clone();
 
-            // ReSharper disable DoNotCallOverridableMethodsInConstructor
-            _tokenAtt = (AttributeImpl) AddAttribute(typeof (TermAttribute));
-            // ReSharper restore DoNotCallOverridableMethodsInConstructor
-
-            Debug.Assert(_tokenAtt is Token || _tokenAtt.GetType().Name.Equals(typeof (TokenWrapper).Name),
-                         "Token Attribute is the wrong type! Type was: " + _tokenAtt.GetType().Name + " but expected " +
-                         typeof (TokenWrapper).Name);
+            _tokenAtt = (AttributeImpl)AddAttribute<TermAttribute>();
+
+            Debug.Assert(_tokenAtt is Token);
         }
 
         public override sealed bool IncrementToken()
@@ -60,29 +57,6 @@ namespace Lucene.Net.Analyzers.Miscellan
             return true;
         }
 
-        /// <summary>
-        /// @deprecated Will be removed in Lucene 3.0. This method is final, as it should not be overridden. Delegates to the backwards compatibility layer.
-        /// </summary>
-        /// <param name="reusableToken"></param>
-        /// <returns></returns>
-        [Obsolete("The new IncrementToken() and AttributeSource APIs should be used instead.")]
-        public override sealed Token Next(Token reusableToken)
-        {
-            return base.Next(reusableToken);
-        }
-
-        /// <summary>
-        /// @deprecated Will be removed in Lucene 3.0. This method is final, as it should not be overridden. Delegates to the backwards compatibility layer. 
-        /// </summary>
-        /// <returns></returns>
-        [Obsolete(
-            "The returned Token is a \"full private copy\" (not re-used across calls to Next()) but will be slower than calling Next(Token) or using the new IncrementToken() method with the new AttributeSource API."
-            )]
-        public override sealed Token Next()
-        {
-            return base.Next();
-        }
-
         public override void Reset()
         {
             _exhausted = false;

Modified: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/NGram/EdgeNGramTokenFilter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/NGram/EdgeNGramTokenFilter.cs?rev=1204353&r1=1204352&r2=1204353&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/NGram/EdgeNGramTokenFilter.cs (original)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/NGram/EdgeNGramTokenFilter.cs Mon Nov 21 04:44:55 2011
@@ -1,4 +1,4 @@
-/*
+/*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
@@ -15,6 +15,7 @@
  * limitations under the License.
  */
 
+using System;
 using System.IO;
 using System.Collections;
 
@@ -24,6 +25,45 @@ using Lucene.Net.Util;
 
 namespace Lucene.Net.Analysis.NGram
 {
+    public static class SideExtensions
+    {
+        public static string GetLabel(this Side theSide)
+        {
+            switch(theSide)
+            {
+                case Side.FRONT:
+                    return "front";
+                case Side.BACK:
+                    return "back";
+                default:
+                    throw new ArgumentException(string.Format("{0} is not a valid value for EdgeNGramTokenFilter.Side", theSide));
+            }
+        }
+
+        public static Side GetSide(string sideName)
+        {
+            if (Side.FRONT.GetLabel() == sideName)
+            {
+                return Side.FRONT;
+            }
+
+            if (Side.BACK.GetLabel() == sideName)
+            {
+                return Side.BACK;
+            }
+
+            return (Side)(-1); // TODO: returning null instead of null?  Should an exception be thrown instead?
+        }
+    }
+
+    /// <summary>
+    /// Specifies which side of the input the n-gram should be generated from
+    /// </summary>
+    public enum Side
+    {
+        FRONT,
+        BACK
+    }
 
     /**
      * Tokenizes the given token into n-grams of given size(s).
@@ -31,44 +71,12 @@ namespace Lucene.Net.Analysis.NGram
      * This <see cref="TokenFilter"/> create n-grams from the beginning edge or ending edge of a input token.
      * </p>
      */
-    public class EdgeNGramTokenFilter : TokenFilter
+    public sealed class EdgeNGramTokenFilter : TokenFilter
     {
         public static Side DEFAULT_SIDE = Side.FRONT;
         public static int DEFAULT_MAX_GRAM_SIZE = 1;
         public static int DEFAULT_MIN_GRAM_SIZE = 1;
 
-        // Replace this with an enum when the Java 1.5 upgrade is made, the impl will be simplified
-        /** Specifies which side of the input the n-gram should be generated from */
-        public class Side
-        {
-            private string label;
-
-            /** Get the n-gram from the front of the input */
-            public static Side FRONT = new Side("front");
-
-            /** Get the n-gram from the end of the input */
-            public static Side BACK = new Side("back");
-
-            // Private ctor
-            private Side(string label) { this.label = label; }
-
-            public string getLabel() { return label; }
-
-            // Get the appropriate Side from a string
-            public static Side getSide(string sideName)
-            {
-                if (FRONT.getLabel().Equals(sideName))
-                {
-                    return FRONT;
-                }
-                else if (BACK.getLabel().Equals(sideName))
-                {
-                    return BACK;
-                }
-                return null;
-            }
-        }
-
         private int minGram;
         private int maxGram;
         private Side side;
@@ -83,8 +91,8 @@ namespace Lucene.Net.Analysis.NGram
 
         protected EdgeNGramTokenFilter(TokenStream input) : base(input)
         {
-            this.termAtt = (TermAttribute)AddAttribute(typeof(TermAttribute));
-            this.offsetAtt = (OffsetAttribute)AddAttribute(typeof(OffsetAttribute));
+            this.termAtt = AddAttribute<TermAttribute>();
+            this.offsetAtt = AddAttribute<OffsetAttribute>();
         }
 
         /**
@@ -100,7 +108,7 @@ namespace Lucene.Net.Analysis.NGram
         {
 
 
-            if (side == null)
+            if (side != Side.FRONT && side != Side.BACK)
             {
                 throw new System.ArgumentException("sideLabel must be either front or back");
             }
@@ -118,8 +126,8 @@ namespace Lucene.Net.Analysis.NGram
             this.minGram = minGram;
             this.maxGram = maxGram;
             this.side = side;
-            this.termAtt = (TermAttribute)AddAttribute(typeof(TermAttribute));
-            this.offsetAtt = (OffsetAttribute)AddAttribute(typeof(OffsetAttribute));
+            this.termAtt = AddAttribute<TermAttribute>();
+            this.offsetAtt = AddAttribute<OffsetAttribute>();
         }
 
         /**
@@ -131,9 +139,8 @@ namespace Lucene.Net.Analysis.NGram
          * <param name="maxGram">the largest n-gram to generate</param>
          */
         public EdgeNGramTokenFilter(TokenStream input, string sideLabel, int minGram, int maxGram)
-            : this(input, Side.getSide(sideLabel), minGram, maxGram)
+            : this(input, SideExtensions.GetSide(sideLabel), minGram, maxGram)
         {
-
         }
 
         public override bool IncrementToken()
@@ -173,22 +180,6 @@ namespace Lucene.Net.Analysis.NGram
             }
         }
 
-        /** @deprecated Will be removed in Lucene 3.0. This method is final, as it should
-         * not be overridden. Delegates to the backwards compatibility layer. */
-        [System.Obsolete("Will be removed in Lucene 3.0. This method is final, as it should not be overridden. Delegates to the backwards compatibility layer.")]
-        public override  Token Next(Token reusableToken)
-        {
-            return base.Next(reusableToken);
-        }
-
-        /** @deprecated Will be removed in Lucene 3.0. This method is final, as it should
-         * not be overridden. Delegates to the backwards compatibility layer. */
-        [System.Obsolete("Will be removed in Lucene 3.0. This method is final, as it should not be overridden. Delegates to the backwards compatibility layer.")]
-        public override Token Next()
-        {
-            return base.Next();
-        }
-
         public override void Reset()
         {
             base.Reset();