You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by ar...@apache.org on 2007/05/01 20:45:35 UTC
svn commit: r534192 [3/19] - in /incubator/lucene.net/trunk/C#: ./ src/ src/Demo/ src/Demo/DeleteFiles/ src/Demo/DemoLib/ src/Demo/DemoLib/HTML/ src/Demo/IndexFiles/ src/Demo/IndexHtml/ src/Demo/SearchFiles/ src/Lucene.Net/ src/Lucene.Net/Analysis/ src...

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/PorterStemmer.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Analysis/PorterStemmer.cs?view=diff&rev=534192&r1=534191&r2=534192
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/PorterStemmer.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/PorterStemmer.cs Tue May  1 11:45:26 2007
@@ -45,706 +45,706 @@
 namespace Lucene.Net.Analysis
 {
 	
-	/// <summary> 
-	/// Stemmer, implementing the Porter Stemming Algorithm
-	/// 
-	/// The Stemmer class transforms a word into its root form.  The input
-	/// word can be provided a character at time (by calling add()), or at once
-	/// by calling one of the various stem(something) methods.
-	/// </summary>
+    /// <summary> 
+    /// Stemmer, implementing the Porter Stemming Algorithm
+    /// 
+    /// The Stemmer class transforms a word into its root form.  The input
+    /// word can be provided a character at time (by calling add()), or at once
+    /// by calling one of the various stem(something) methods.
+    /// </summary>
 	
-	class PorterStemmer
-	{
-		private char[] b;
-		private int i, j, k, k0;
-		private bool dirty = false;
-		private const int INC = 50; /* unit of size whereby b is increased */
-		private const int EXTRA = 1;
-		
-		public PorterStemmer()
-		{
-			b = new char[INC];
-			i = 0;
-		}
-		
-		/// <summary> reset() resets the stemmer so it can stem another word.  If you invoke
-		/// the stemmer by calling add(char) and then Stem(), you must call reset()
-		/// before starting another word.
-		/// </summary>
-		public virtual void  Reset()
-		{
-			i = 0; dirty = false;
-		}
-		
-		/// <summary> Add a character to the word being stemmed.  When you are finished
-		/// adding characters, you can call Stem(void) to process the word.
-		/// </summary>
-		public virtual void  Add(char ch)
-		{
-			if (b.Length <= i + EXTRA)
-			{
-				char[] new_b = new char[b.Length + INC];
-				for (int c = 0; c < b.Length; c++)
-					new_b[c] = b[c];
-				b = new_b;
-			}
-			b[i++] = ch;
-		}
-		
-		/// <summary> After a word has been stemmed, it can be retrieved by toString(),
-		/// or a reference to the internal buffer can be retrieved by getResultBuffer
-		/// and getResultLength (which is generally more efficient.)
-		/// </summary>
-		public override System.String ToString()
-		{
-			return new System.String(b, 0, i);
-		}
-		
-		/// <summary> Returns the length of the word resulting from the stemming process.</summary>
-		public virtual int GetResultLength()
-		{
-			return i;
-		}
-		
-		/// <summary> Returns a reference to a character buffer containing the results of
-		/// the stemming process.  You also need to consult getResultLength()
-		/// to determine the length of the result.
-		/// </summary>
-		public virtual char[] GetResultBuffer()
-		{
-			return b;
-		}
-		
-		/* cons(i) is true <=> b[i] is a consonant. */
-		
-		private bool Cons(int i)
-		{
-			switch (b[i])
-			{
-				
-				case 'a': 
-				case 'e': 
-				case 'i': 
-				case 'o': 
-				case 'u': 
-					return false;
-				
-				case 'y': 
-					return (i == k0)?true:!Cons(i - 1);
-				
-				default: 
-					return true;
-				
-			}
-		}
-		
-		/* m() measures the number of consonant sequences between k0 and j. if c is
-		a consonant sequence and v a vowel sequence, and <..> indicates arbitrary
-		presence,
-		
-		<c><v>       gives 0
-		<c>vc<v>     gives 1
-		<c>vcvc<v>   gives 2
-		<c>vcvcvc<v> gives 3
-		....
-		*/
-		
-		private int M()
-		{
-			int n = 0;
-			int i = k0;
-			while (true)
-			{
-				if (i > j)
-					return n;
-				if (!Cons(i))
-					break;
-				i++;
-			}
-			i++;
-			while (true)
-			{
-				while (true)
-				{
-					if (i > j)
-						return n;
-					if (Cons(i))
-						break;
-					i++;
-				}
-				i++;
-				n++;
-				while (true)
-				{
-					if (i > j)
-						return n;
-					if (!Cons(i))
-						break;
-					i++;
-				}
-				i++;
-			}
-		}
-		
-		/* vowelinstem() is true <=> k0,...j contains a vowel */
-		
-		private bool Vowelinstem()
-		{
-			int i;
-			for (i = k0; i <= j; i++)
-				if (!Cons(i))
-					return true;
-			return false;
-		}
-		
-		/* doublec(j) is true <=> j,(j-1) contain a double consonant. */
-		
-		private bool Doublec(int j)
-		{
-			if (j < k0 + 1)
-				return false;
-			if (b[j] != b[j - 1])
-				return false;
-			return Cons(j);
-		}
-		
-		/* cvc(i) is true <=> i-2,i-1,i has the form consonant - vowel - consonant
-		and also if the second c is not w,x or y. this is used when trying to
-		restore an e at the end of a short word. e.g.
-		
-		cav(e), lov(e), hop(e), crim(e), but
-		snow, box, tray.
-		
-		*/
-		
-		private bool Cvc(int i)
-		{
-			if (i < k0 + 2 || !Cons(i) || Cons(i - 1) || !Cons(i - 2))
-				return false;
-			else
-			{
-				int ch = b[i];
-				if (ch == 'w' || ch == 'x' || ch == 'y')
-					return false;
-			}
-			return true;
-		}
-		
-		private bool Ends(System.String s)
-		{
-			int l = s.Length;
-			int o = k - l + 1;
-			if (o < k0)
-				return false;
-			for (int i = 0; i < l; i++)
-				if (b[o + i] != s[i])
-					return false;
-			j = k - l;
-			return true;
-		}
-		
-		/* setto(s) sets (j+1),...k to the characters in the string s, readjusting
-		k. */
-		
-		internal virtual void  Setto(System.String s)
-		{
-			int l = s.Length;
-			int o = j + 1;
-			for (int i = 0; i < l; i++)
-				b[o + i] = s[i];
-			k = j + l;
-			dirty = true;
-		}
-		
-		/* r(s) is used further down. */
-		
-		internal virtual void  R(System.String s)
-		{
-			if (M() > 0)
-				Setto(s);
-		}
-		
-		/* step1() gets rid of plurals and -ed or -ing. e.g.
-		
-		caresses  ->  caress
-		ponies    ->  poni
-		ties      ->  ti
-		caress    ->  caress
-		cats      ->  cat
-		
-		feed      ->  feed
-		agreed    ->  agree
-		disabled  ->  disable
-		
-		matting   ->  mat
-		mating    ->  mate
-		meeting   ->  meet
-		milling   ->  mill
-		messing   ->  mess
-		
-		meetings  ->  meet
-		
-		*/
-		
-		private void  Step1()
-		{
-			if (b[k] == 's')
-			{
-				if (Ends("sses"))
-					k -= 2;
-				else if (Ends("ies"))
-					Setto("i");
-				else if (b[k - 1] != 's')
-					k--;
-			}
-			if (Ends("eed"))
-			{
-				if (M() > 0)
-					k--;
-			}
-			else if ((Ends("ed") || Ends("ing")) && Vowelinstem())
-			{
-				k = j;
-				if (Ends("at"))
-					Setto("ate");
-				else if (Ends("bl"))
-					Setto("ble");
-				else if (Ends("iz"))
-					Setto("ize");
-				else if (Doublec(k))
-				{
-					int ch = b[k--];
-					if (ch == 'l' || ch == 's' || ch == 'z')
-						k++;
-				}
-				else if (M() == 1 && Cvc(k))
-					Setto("e");
-			}
-		}
-		
-		/* step2() turns terminal y to i when there is another vowel in the stem. */
-		
-		private void  Step2()
-		{
-			if (Ends("y") && Vowelinstem())
-			{
-				b[k] = 'i';
-				dirty = true;
-			}
-		}
-		
-		/* step3() maps double suffices to single ones. so -ization ( = -ize plus
-		-ation) maps to -ize etc. note that the string before the suffix must give
-		m() > 0. */
-		
-		private void  Step3()
-		{
-			if (k == k0)
-				return ; /* For Bug 1 */
-			switch (b[k - 1])
-			{
-				
-				case 'a': 
-					if (Ends("ational"))
-					{
-						R("ate"); break;
-					}
-					if (Ends("tional"))
-					{
-						R("tion"); break;
-					}
-					break;
-				
-				case 'c': 
-					if (Ends("enci"))
-					{
-						R("ence"); break;
-					}
-					if (Ends("anci"))
-					{
-						R("ance"); break;
-					}
-					break;
-				
-				case 'e': 
-					if (Ends("izer"))
-					{
-						R("ize"); break;
-					}
-					break;
-				
-				case 'l': 
-					if (Ends("bli"))
-					{
-						R("ble"); break;
-					}
-					if (Ends("alli"))
-					{
-						R("al"); break;
-					}
-					if (Ends("entli"))
-					{
-						R("ent"); break;
-					}
-					if (Ends("eli"))
-					{
-						R("e"); break;
-					}
-					if (Ends("ousli"))
-					{
-						R("ous"); break;
-					}
-					break;
-				
-				case 'o': 
-					if (Ends("ization"))
-					{
-						R("ize"); break;
-					}
-					if (Ends("ation"))
-					{
-						R("ate"); break;
-					}
-					if (Ends("ator"))
-					{
-						R("ate"); break;
-					}
-					break;
-				
-				case 's': 
-					if (Ends("alism"))
-					{
-						R("al"); break;
-					}
-					if (Ends("iveness"))
-					{
-						R("ive"); break;
-					}
-					if (Ends("fulness"))
-					{
-						R("ful"); break;
-					}
-					if (Ends("ousness"))
-					{
-						R("ous"); break;
-					}
-					break;
-				
-				case 't': 
-					if (Ends("aliti"))
-					{
-						R("al"); break;
-					}
-					if (Ends("iviti"))
-					{
-						R("ive"); break;
-					}
-					if (Ends("biliti"))
-					{
-						R("ble"); break;
-					}
-					break;
-				
-				case 'g': 
-					if (Ends("logi"))
-					{
-						R("log"); break;
-					}
-					break;
-				}
-		}
-		
-		/* step4() deals with -ic-, -full, -ness etc. similar strategy to step3. */
-		
-		private void  Step4()
-		{
-			switch (b[k])
-			{
-				
-				case 'e': 
-					if (Ends("icate"))
-					{
-						R("ic"); break;
-					}
-					if (Ends("ative"))
-					{
-						R(""); break;
-					}
-					if (Ends("alize"))
-					{
-						R("al"); break;
-					}
-					break;
-				
-				case 'i': 
-					if (Ends("iciti"))
-					{
-						R("ic"); break;
-					}
-					break;
-				
-				case 'l': 
-					if (Ends("ical"))
-					{
-						R("ic"); break;
-					}
-					if (Ends("ful"))
-					{
-						R(""); break;
-					}
-					break;
-				
-				case 's': 
-					if (Ends("ness"))
-					{
-						R(""); break;
-					}
-					break;
-				}
-		}
-		
-		/* step5() takes off -ant, -ence etc., in context <c>vcvc<v>. */
-		
-		private void  Step5()
-		{
-			if (k == k0)
-				return ; /* for Bug 1 */
-			switch (b[k - 1])
-			{
-				
-				case 'a': 
-					if (Ends("al"))
-						break;
-					return ;
-				
-				case 'c': 
-					if (Ends("ance"))
-						break;
-					if (Ends("ence"))
-						break;
-					return ;
-				
-				case 'e': 
-					if (Ends("er"))
-						break; return ;
-				
-				case 'i': 
-					if (Ends("ic"))
-						break; return ;
-				
-				case 'l': 
-					if (Ends("able"))
-						break;
-					if (Ends("ible"))
-						break; return ;
-				
-				case 'n': 
-					if (Ends("ant"))
-						break;
-					if (Ends("ement"))
-						break;
-					if (Ends("ment"))
-						break;
-					/* element etc. not stripped before the m */
-					if (Ends("ent"))
-						break;
-					return ;
-				
-				case 'o': 
-					if (Ends("ion") && j >= 0 && (b[j] == 's' || b[j] == 't'))
-						break;
-					/* j >= 0 fixes Bug 2 */
-					if (Ends("ou"))
-						break;
-					return ;
-					/* takes care of -ous */
-				
-				case 's': 
-					if (Ends("ism"))
-						break;
-					return ;
-				
-				case 't': 
-					if (Ends("ate"))
-						break;
-					if (Ends("iti"))
-						break;
-					return ;
-				
-				case 'u': 
-					if (Ends("ous"))
-						break;
-					return ;
-				
-				case 'v': 
-					if (Ends("ive"))
-						break;
-					return ;
-				
-				case 'z': 
-					if (Ends("ize"))
-						break;
-					return ;
-				
-				default: 
-					return ;
-				
-			}
-			if (M() > 1)
-				k = j;
-		}
-		
-		/* step6() removes a final -e if m() > 1. */
-		
-		private void  Step6()
-		{
-			j = k;
-			if (b[k] == 'e')
-			{
-				int a = M();
-				if (a > 1 || a == 1 && !Cvc(k - 1))
-					k--;
-			}
-			if (b[k] == 'l' && Doublec(k) && M() > 1)
-				k--;
-		}
-		
-		
-		/// <summary> Stem a word provided as a String.  Returns the result as a String.</summary>
-		public virtual System.String Stem(System.String s)
-		{
-			if (Stem(s.ToCharArray(), s.Length))
-			{
-				return ToString();
-			}
-			else
-				return s;
-		}
-		
-		/// <summary>Stem a word contained in a char[].  Returns true if the stemming process
-		/// resulted in a word different from the input.  You can retrieve the
-		/// result with getResultLength()/getResultBuffer() or toString().
-		/// </summary>
-		public virtual bool Stem(char[] word)
-		{
-			return Stem(word, word.Length);
-		}
-		
-		/// <summary>Stem a word contained in a portion of a char[] array.  Returns
-		/// true if the stemming process resulted in a word different from
-		/// the input.  You can retrieve the result with
-		/// getResultLength()/getResultBuffer() or toString().
-		/// </summary>
-		public virtual bool Stem(char[] wordBuffer, int offset, int wordLen)
-		{
-			Reset();
-			if (b.Length < wordLen)
-			{
-				char[] new_b = new char[wordLen + EXTRA];
-				b = new_b;
-			}
-			for (int j = 0; j < wordLen; j++)
-				b[j] = wordBuffer[offset + j];
-			i = wordLen;
-			return Stem(0);
-		}
-		
-		/// <summary>Stem a word contained in a leading portion of a char[] array.
-		/// Returns true if the stemming process resulted in a word different
-		/// from the input.  You can retrieve the result with
-		/// getResultLength()/getResultBuffer() or toString().
-		/// </summary>
-		public virtual bool Stem(char[] word, int wordLen)
-		{
-			return Stem(word, 0, wordLen);
-		}
-		
-		/// <summary>Stem the word placed into the Stemmer buffer through calls to add().
-		/// Returns true if the stemming process resulted in a word different
-		/// from the input.  You can retrieve the result with
-		/// getResultLength()/getResultBuffer() or toString().
-		/// </summary>
-		public virtual bool Stem()
-		{
-			return Stem(0);
-		}
-		
-		public virtual bool Stem(int i0)
-		{
-			k = i - 1;
-			k0 = i0;
-			if (k > k0 + 1)
-			{
-				Step1(); Step2(); Step3(); Step4(); Step5(); Step6();
-			}
-			// Also, a word is considered dirty if we lopped off letters
-			// Thanks to Ifigenia Vairelles for pointing this out.
-			if (i != k + 1)
-				dirty = true;
-			i = k + 1;
-			return dirty;
-		}
-		
-		/// <summary>Test program for demonstrating the Stemmer.  It reads a file and
-		/// stems each word, writing the result to standard out.
-		/// Usage: Stemmer file-name
-		/// </summary>
-		[STAThread]
-		public static void  Main(System.String[] args)
-		{
-			PorterStemmer s = new PorterStemmer();
+    class PorterStemmer
+    {
+        private char[] b;
+        private int i, j, k, k0;
+        private bool dirty = false;
+        private const int INC = 50; /* unit of size whereby b is increased */
+        private const int EXTRA = 1;
+		
+        public PorterStemmer()
+        {
+            b = new char[INC];
+            i = 0;
+        }
+		
+        /// <summary> reset() resets the stemmer so it can stem another word.  If you invoke
+        /// the stemmer by calling add(char) and then Stem(), you must call reset()
+        /// before starting another word.
+        /// </summary>
+        public virtual void  Reset()
+        {
+            i = 0; dirty = false;
+        }
+		
+        /// <summary> Add a character to the word being stemmed.  When you are finished
+        /// adding characters, you can call Stem(void) to process the word.
+        /// </summary>
+        public virtual void  Add(char ch)
+        {
+            if (b.Length <= i + EXTRA)
+            {
+                char[] new_b = new char[b.Length + INC];
+                for (int c = 0; c < b.Length; c++)
+                    new_b[c] = b[c];
+                b = new_b;
+            }
+            b[i++] = ch;
+        }
+		
+        /// <summary> After a word has been stemmed, it can be retrieved by toString(),
+        /// or a reference to the internal buffer can be retrieved by getResultBuffer
+        /// and getResultLength (which is generally more efficient.)
+        /// </summary>
+        public override System.String ToString()
+        {
+            return new System.String(b, 0, i);
+        }
+		
+        /// <summary> Returns the length of the word resulting from the stemming process.</summary>
+        public virtual int GetResultLength()
+        {
+            return i;
+        }
+		
+        /// <summary> Returns a reference to a character buffer containing the results of
+        /// the stemming process.  You also need to consult getResultLength()
+        /// to determine the length of the result.
+        /// </summary>
+        public virtual char[] GetResultBuffer()
+        {
+            return b;
+        }
+		
+        /* cons(i) is true <=> b[i] is a consonant. */
+		
+        private bool Cons(int i)
+        {
+            switch (b[i])
+            {
+				
+                case 'a': 
+                case 'e': 
+                case 'i': 
+                case 'o': 
+                case 'u': 
+                    return false;
+				
+                case 'y': 
+                    return (i == k0) ? true : !Cons(i - 1);
+				
+                default: 
+                    return true;
+				
+            }
+        }
+		
+        /* m() measures the number of consonant sequences between k0 and j. if c is
+        a consonant sequence and v a vowel sequence, and <..> indicates arbitrary
+        presence,
+		
+        <c><v>       gives 0
+        <c>vc<v>     gives 1
+        <c>vcvc<v>   gives 2
+        <c>vcvcvc<v> gives 3
+        ....
+        */
+		
+        private int M()
+        {
+            int n = 0;
+            int i = k0;
+            while (true)
+            {
+                if (i > j)
+                    return n;
+                if (!Cons(i))
+                    break;
+                i++;
+            }
+            i++;
+            while (true)
+            {
+                while (true)
+                {
+                    if (i > j)
+                        return n;
+                    if (Cons(i))
+                        break;
+                    i++;
+                }
+                i++;
+                n++;
+                while (true)
+                {
+                    if (i > j)
+                        return n;
+                    if (!Cons(i))
+                        break;
+                    i++;
+                }
+                i++;
+            }
+        }
+		
+        /* vowelinstem() is true <=> k0,...j contains a vowel */
+		
+        private bool Vowelinstem()
+        {
+            int i;
+            for (i = k0; i <= j; i++)
+                if (!Cons(i))
+                    return true;
+            return false;
+        }
+		
+        /* doublec(j) is true <=> j,(j-1) contain a double consonant. */
+		
+        private bool Doublec(int j)
+        {
+            if (j < k0 + 1)
+                return false;
+            if (b[j] != b[j - 1])
+                return false;
+            return Cons(j);
+        }
+		
+        /* cvc(i) is true <=> i-2,i-1,i has the form consonant - vowel - consonant
+        and also if the second c is not w,x or y. this is used when trying to
+        restore an e at the end of a short word. e.g.
+		
+        cav(e), lov(e), hop(e), crim(e), but
+        snow, box, tray.
+		
+        */
+		
+        private bool Cvc(int i)
+        {
+            if (i < k0 + 2 || !Cons(i) || Cons(i - 1) || !Cons(i - 2))
+                return false;
+            else
+            {
+                int ch = b[i];
+                if (ch == 'w' || ch == 'x' || ch == 'y')
+                    return false;
+            }
+            return true;
+        }
+		
+        private bool Ends(System.String s)
+        {
+            int l = s.Length;
+            int o = k - l + 1;
+            if (o < k0)
+                return false;
+            for (int i = 0; i < l; i++)
+                if (b[o + i] != s[i])
+                    return false;
+            j = k - l;
+            return true;
+        }
+		
+        /* setto(s) sets (j+1),...k to the characters in the string s, readjusting
+        k. */
+		
+        internal virtual void  Setto(System.String s)
+        {
+            int l = s.Length;
+            int o = j + 1;
+            for (int i = 0; i < l; i++)
+                b[o + i] = s[i];
+            k = j + l;
+            dirty = true;
+        }
+		
+        /* r(s) is used further down. */
+		
+        internal virtual void  R(System.String s)
+        {
+            if (M() > 0)
+                Setto(s);
+        }
+		
+        /* step1() gets rid of plurals and -ed or -ing. e.g.
+		
+        caresses  ->  caress
+        ponies    ->  poni
+        ties      ->  ti
+        caress    ->  caress
+        cats      ->  cat
+		
+        feed      ->  feed
+        agreed    ->  agree
+        disabled  ->  disable
+		
+        matting   ->  mat
+        mating    ->  mate
+        meeting   ->  meet
+        milling   ->  mill
+        messing   ->  mess
+		
+        meetings  ->  meet
+		
+        */
+		
+        private void  Step1()
+        {
+            if (b[k] == 's')
+            {
+                if (Ends("sses"))
+                    k -= 2;
+                else if (Ends("ies"))
+                    Setto("i");
+                else if (b[k - 1] != 's')
+                    k--;
+            }
+            if (Ends("eed"))
+            {
+                if (M() > 0)
+                    k--;
+            }
+            else if ((Ends("ed") || Ends("ing")) && Vowelinstem())
+            {
+                k = j;
+                if (Ends("at"))
+                    Setto("ate");
+                else if (Ends("bl"))
+                    Setto("ble");
+                else if (Ends("iz"))
+                    Setto("ize");
+                else if (Doublec(k))
+                {
+                    int ch = b[k--];
+                    if (ch == 'l' || ch == 's' || ch == 'z')
+                        k++;
+                }
+                else if (M() == 1 && Cvc(k))
+                    Setto("e");
+            }
+        }
+		
+        /* step2() turns terminal y to i when there is another vowel in the stem. */
+		
+        private void  Step2()
+        {
+            if (Ends("y") && Vowelinstem())
+            {
+                b[k] = 'i';
+                dirty = true;
+            }
+        }
+		
+        /* step3() maps double suffices to single ones. so -ization ( = -ize plus
+        -ation) maps to -ize etc. note that the string before the suffix must give
+        m() > 0. */
+		
+        private void  Step3()
+        {
+            if (k == k0)
+                return ; /* For Bug 1 */
+            switch (b[k - 1])
+            {
+				
+                case 'a': 
+                    if (Ends("ational"))
+                    {
+                        R("ate"); break;
+                    }
+                    if (Ends("tional"))
+                    {
+                        R("tion"); break;
+                    }
+                    break;
+				
+                case 'c': 
+                    if (Ends("enci"))
+                    {
+                        R("ence"); break;
+                    }
+                    if (Ends("anci"))
+                    {
+                        R("ance"); break;
+                    }
+                    break;
+				
+                case 'e': 
+                    if (Ends("izer"))
+                    {
+                        R("ize"); break;
+                    }
+                    break;
+				
+                case 'l': 
+                    if (Ends("bli"))
+                    {
+                        R("ble"); break;
+                    }
+                    if (Ends("alli"))
+                    {
+                        R("al"); break;
+                    }
+                    if (Ends("entli"))
+                    {
+                        R("ent"); break;
+                    }
+                    if (Ends("eli"))
+                    {
+                        R("e"); break;
+                    }
+                    if (Ends("ousli"))
+                    {
+                        R("ous"); break;
+                    }
+                    break;
+				
+                case 'o': 
+                    if (Ends("ization"))
+                    {
+                        R("ize"); break;
+                    }
+                    if (Ends("ation"))
+                    {
+                        R("ate"); break;
+                    }
+                    if (Ends("ator"))
+                    {
+                        R("ate"); break;
+                    }
+                    break;
+				
+                case 's': 
+                    if (Ends("alism"))
+                    {
+                        R("al"); break;
+                    }
+                    if (Ends("iveness"))
+                    {
+                        R("ive"); break;
+                    }
+                    if (Ends("fulness"))
+                    {
+                        R("ful"); break;
+                    }
+                    if (Ends("ousness"))
+                    {
+                        R("ous"); break;
+                    }
+                    break;
+				
+                case 't': 
+                    if (Ends("aliti"))
+                    {
+                        R("al"); break;
+                    }
+                    if (Ends("iviti"))
+                    {
+                        R("ive"); break;
+                    }
+                    if (Ends("biliti"))
+                    {
+                        R("ble"); break;
+                    }
+                    break;
+				
+                case 'g': 
+                    if (Ends("logi"))
+                    {
+                        R("log"); break;
+                    }
+                    break;
+            }
+        }
+		
+        /* step4() deals with -ic-, -full, -ness etc. similar strategy to step3. */
+		
+        private void  Step4()
+        {
+            switch (b[k])
+            {
+				
+                case 'e': 
+                    if (Ends("icate"))
+                    {
+                        R("ic"); break;
+                    }
+                    if (Ends("ative"))
+                    {
+                        R(""); break;
+                    }
+                    if (Ends("alize"))
+                    {
+                        R("al"); break;
+                    }
+                    break;
+				
+                case 'i': 
+                    if (Ends("iciti"))
+                    {
+                        R("ic"); break;
+                    }
+                    break;
+				
+                case 'l': 
+                    if (Ends("ical"))
+                    {
+                        R("ic"); break;
+                    }
+                    if (Ends("ful"))
+                    {
+                        R(""); break;
+                    }
+                    break;
+				
+                case 's': 
+                    if (Ends("ness"))
+                    {
+                        R(""); break;
+                    }
+                    break;
+            }
+        }
+		
+        /* step5() takes off -ant, -ence etc., in context <c>vcvc<v>. */
+		
+        private void  Step5()
+        {
+            if (k == k0)
+                return ; /* for Bug 1 */
+            switch (b[k - 1])
+            {
+				
+                case 'a': 
+                    if (Ends("al"))
+                        break;
+                    return ;
+				
+                case 'c': 
+                    if (Ends("ance"))
+                        break;
+                    if (Ends("ence"))
+                        break;
+                    return ;
+				
+                case 'e': 
+                    if (Ends("er"))
+                        break; return ;
+				
+                case 'i': 
+                    if (Ends("ic"))
+                        break; return ;
+				
+                case 'l': 
+                    if (Ends("able"))
+                        break;
+                    if (Ends("ible"))
+                        break; return ;
+				
+                case 'n': 
+                    if (Ends("ant"))
+                        break;
+                    if (Ends("ement"))
+                        break;
+                    if (Ends("ment"))
+                        break;
+                    /* element etc. not stripped before the m */
+                    if (Ends("ent"))
+                        break;
+                    return ;
+				
+                case 'o': 
+                    if (Ends("ion") && j >= 0 && (b[j] == 's' || b[j] == 't'))
+                        break;
+                    /* j >= 0 fixes Bug 2 */
+                    if (Ends("ou"))
+                        break;
+                    return ;
+                    /* takes care of -ous */
+				
+                case 's': 
+                    if (Ends("ism"))
+                        break;
+                    return ;
+				
+                case 't': 
+                    if (Ends("ate"))
+                        break;
+                    if (Ends("iti"))
+                        break;
+                    return ;
+				
+                case 'u': 
+                    if (Ends("ous"))
+                        break;
+                    return ;
+				
+                case 'v': 
+                    if (Ends("ive"))
+                        break;
+                    return ;
+				
+                case 'z': 
+                    if (Ends("ize"))
+                        break;
+                    return ;
+				
+                default: 
+                    return ;
+				
+            }
+            if (M() > 1)
+                k = j;
+        }
+		
+        /* step6() removes a final -e if m() > 1. */
+		
+        private void  Step6()
+        {
+            j = k;
+            if (b[k] == 'e')
+            {
+                int a = M();
+                if (a > 1 || a == 1 && !Cvc(k - 1))
+                    k--;
+            }
+            if (b[k] == 'l' && Doublec(k) && M() > 1)
+                k--;
+        }
+		
+		
+        /// <summary> Stem a word provided as a String.  Returns the result as a String.</summary>
+        public virtual System.String Stem(System.String s)
+        {
+            if (Stem(s.ToCharArray(), s.Length))
+            {
+                return ToString();
+            }
+            else
+                return s;
+        }
+		
+        /// <summary>Stem a word contained in a char[].  Returns true if the stemming process
+        /// resulted in a word different from the input.  You can retrieve the
+        /// result with getResultLength()/getResultBuffer() or toString().
+        /// </summary>
+        public virtual bool Stem(char[] word)
+        {
+            return Stem(word, word.Length);
+        }
+		
+        /// <summary>Stem a word contained in a portion of a char[] array.  Returns
+        /// true if the stemming process resulted in a word different from
+        /// the input.  You can retrieve the result with
+        /// getResultLength()/getResultBuffer() or toString().
+        /// </summary>
+        public virtual bool Stem(char[] wordBuffer, int offset, int wordLen)
+        {
+            Reset();
+            if (b.Length < wordLen)
+            {
+                char[] new_b = new char[wordLen + EXTRA];
+                b = new_b;
+            }
+            for (int j = 0; j < wordLen; j++)
+                b[j] = wordBuffer[offset + j];
+            i = wordLen;
+            return Stem(0);
+        }
+		
+        /// <summary>Stem a word contained in a leading portion of a char[] array.
+        /// Returns true if the stemming process resulted in a word different
+        /// from the input.  You can retrieve the result with
+        /// getResultLength()/getResultBuffer() or toString().
+        /// </summary>
+        public virtual bool Stem(char[] word, int wordLen)
+        {
+            return Stem(word, 0, wordLen);
+        }
+		
+        /// <summary>Stem the word placed into the Stemmer buffer through calls to add().
+        /// Returns true if the stemming process resulted in a word different
+        /// from the input.  You can retrieve the result with
+        /// getResultLength()/getResultBuffer() or toString().
+        /// </summary>
+        public virtual bool Stem()
+        {
+            return Stem(0);
+        }
+		
+        public virtual bool Stem(int i0)
+        {
+            k = i - 1;
+            k0 = i0;
+            if (k > k0 + 1)
+            {
+                Step1(); Step2(); Step3(); Step4(); Step5(); Step6();
+            }
+            // Also, a word is considered dirty if we lopped off letters
+            // Thanks to Ifigenia Vairelles for pointing this out.
+            if (i != k + 1)
+                dirty = true;
+            i = k + 1;
+            return dirty;
+        }
+		
+        /// <summary>Test program for demonstrating the Stemmer.  It reads a file and
+        /// stems each word, writing the result to standard out.
+        /// Usage: Stemmer file-name
+        /// </summary>
+        [STAThread]
+        public static void  Main(System.String[] args)
+        {
+            PorterStemmer s = new PorterStemmer();
 			
-			for (int i = 0; i < args.Length; i++)
-			{
-				try
-				{
+            for (int i = 0; i < args.Length; i++)
+            {
+                try
+                {
                     System.IO.BinaryReader in_Renamed = new System.IO.BinaryReader(System.IO.File.Open(args[i], System.IO.FileMode.Open, System.IO.FileAccess.Read));
-					byte[] buffer = new byte[1024];
-					int bufferLen, offset, ch;
+                    byte[] buffer = new byte[1024];
+                    int bufferLen, offset, ch;
 					
-					bufferLen = in_Renamed.Read(buffer, 0, buffer.Length);
-					offset = 0;
-					s.Reset();
+                    bufferLen = in_Renamed.Read(buffer, 0, buffer.Length);
+                    offset = 0;
+                    s.Reset();
 					
-					while (true)
-					{
-						if (offset < bufferLen)
-							ch = buffer[offset++];
-						else
-						{
-							bufferLen = in_Renamed.Read(buffer, 0, buffer.Length);
-							offset = 0;
-							if (bufferLen <= 0)
-								ch = - 1;
-							else
-								ch = buffer[offset++];
-						}
+                    while (true)
+                    {
+                        if (offset < bufferLen)
+                            ch = buffer[offset++];
+                        else
+                        {
+                            bufferLen = in_Renamed.Read(buffer, 0, buffer.Length);
+                            offset = 0;
+                            if (bufferLen <= 0)
+                                ch = - 1;
+                            else
+                                ch = buffer[offset++];
+                        }
 						
-						if (System.Char.IsLetter((char) ch))
-						{
-							s.Add(System.Char.ToLower((char) ch));
-						}
-						else
-						{
-							s.Stem();
-							System.Console.Out.Write(s.ToString());
-							s.Reset();
-							if (ch < 0)
-								break;
-							else
-							{
-								System.Console.Out.Write((char) ch);
-							}
-						}
-					}
+                        if (System.Char.IsLetter((char) ch))
+                        {
+                            s.Add(System.Char.ToLower((char) ch));
+                        }
+                        else
+                        {
+                            s.Stem();
+                            System.Console.Out.Write(s.ToString());
+                            s.Reset();
+                            if (ch < 0)
+                                break;
+                            else
+                            {
+                                System.Console.Out.Write((char) ch);
+                            }
+                        }
+                    }
 					
-					in_Renamed.Close();
-				}
-				catch (System.IO.IOException)
-				{
-					System.Console.Out.WriteLine("error reading " + args[i]);
-				}
-			}
-		}
-	}
+                    in_Renamed.Close();
+                }
+                catch (System.IO.IOException)
+                {
+                    System.Console.Out.WriteLine("error reading " + args[i]);
+                }
+            }
+        }
+    }
 }

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/SimpleAnalyzer.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Analysis/SimpleAnalyzer.cs?view=diff&rev=534192&r1=534191&r2=534192
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/SimpleAnalyzer.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/SimpleAnalyzer.cs Tue May  1 11:45:26 2007
@@ -20,13 +20,13 @@
 namespace Lucene.Net.Analysis
 {
 	
-	/// <summary>An Analyzer that filters LetterTokenizer with LowerCaseFilter. </summary>
+    /// <summary>An Analyzer that filters LetterTokenizer with LowerCaseFilter. </summary>
 	
-	public sealed class SimpleAnalyzer : Analyzer
-	{
-		public override TokenStream TokenStream(System.String fieldName, System.IO.TextReader reader)
-		{
-			return new LowerCaseTokenizer(reader);
-		}
-	}
+    public sealed class SimpleAnalyzer : Analyzer
+    {
+        public override TokenStream TokenStream(System.String fieldName, System.IO.TextReader reader)
+        {
+            return new LowerCaseTokenizer(reader);
+        }
+    }
 }

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/CharStream.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Analysis/Standard/CharStream.cs?view=diff&rev=534192&r1=534191&r2=534192
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/CharStream.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/CharStream.cs Tue May  1 11:45:26 2007
@@ -20,86 +20,100 @@
 namespace Lucene.Net.Analysis.Standard
 {
 	
-	/// <summary> This interface describes a character stream that maintains line and
-	/// column number positions of the characters.  It also has the capability
-	/// to backup the stream to some extent.  An implementation of this
-	/// interface is used in the TokenManager implementation generated by
-	/// JavaCCParser.
-	/// 
-	/// All the methods except backup can be implemented in any fashion. backup
-	/// needs to be implemented correctly for the correct operation of the lexer.
-	/// Rest of the methods are all used to get information like line number,
-	/// column number and the String that constitutes a token and are not used
-	/// by the lexer. Hence their implementation won't affect the generated lexer's
-	/// operation.
-	/// </summary>
+    /// <summary> This interface describes a character stream that maintains line and
+    /// column number positions of the characters.  It also has the capability
+    /// to backup the stream to some extent.  An implementation of this
+    /// interface is used in the TokenManager implementation generated by
+    /// JavaCCParser.
+    /// 
+    /// All the methods except backup can be implemented in any fashion. backup
+    /// needs to be implemented correctly for the correct operation of the lexer.
+    /// Rest of the methods are all used to get information like line number,
+    /// column number and the String that constitutes a token and are not used
+    /// by the lexer. Hence their implementation won't affect the generated lexer's
+    /// operation.
+    /// </summary>
 	
-	public interface CharStream
-	{
+    public interface CharStream
+    {
 		
-		/// <summary> Returns the next character from the selected input.  The method
-		/// of selecting the input is the responsibility of the class
-		/// implementing this interface.  Can throw any java.io.IOException.
-		/// </summary>
-		char ReadChar();
-		
-		/// <summary> Returns the column number of the last character for current token (being
-		/// matched after the last call to BeginTOken).
-		/// </summary>
-		int GetEndColumn();
-		
-		/// <summary> Returns the line number of the last character for current token (being
-		/// matched after the last call to BeginTOken).
-		/// </summary>
-		int GetEndLine();
-		
-		/// <summary> Returns the column number of the first character for current token (being
-		/// matched after the last call to BeginTOken).
-		/// </summary>
-		int GetBeginColumn();
-		
-		/// <summary> Returns the line number of the first character for current token (being
-		/// matched after the last call to BeginTOken).
-		/// </summary>
-		int GetBeginLine();
-		
-		/// <summary> Backs up the input stream by amount steps. Lexer calls this method if it
-		/// had already read some characters, but could not use them to match a
-		/// (longer) token. So, they will be used again as the prefix of the next
-		/// token and it is the implemetation's responsibility to do this right.
-		/// </summary>
-		void  Backup(int amount);
-		
-		/// <summary> Returns the next character that marks the beginning of the next token.
-		/// All characters must remain in the buffer between two successive calls
-		/// to this method to implement backup correctly.
-		/// </summary>
-		char BeginToken();
-		
-		/// <summary> Returns a string made up of characters from the marked token beginning 
-		/// to the current buffer position. Implementations have the choice of returning
-		/// anything that they want to. For example, for efficiency, one might decide
-		/// to just return null, which is a valid implementation.
-		/// </summary>
-		System.String GetImage();
-		
-		/// <summary> Returns an array of characters that make up the suffix of length 'len' for
-		/// the currently matched token. This is used to build up the matched string
-		/// for use in actions in the case of MORE. A simple and inefficient
-		/// implementation of this is as follows :
-		/// 
-		/// {
-		/// String t = GetImage();
-		/// return t.substring(t.length() - len, t.length()).toCharArray();
-		/// }
-		/// </summary>
-		char[] GetSuffix(int len);
-		
-		/// <summary> The lexer calls this function to indicate that it is done with the stream
-		/// and hence implementations can free any resources held by this class.
-		/// Again, the body of this function can be just empty and it will not
-		/// affect the lexer's operation.
-		/// </summary>
-		void  Done();
-	}
+        /// <summary> Returns the next character from the selected input.  The method
+        /// of selecting the input is the responsibility of the class
+        /// implementing this interface.  Can throw any java.io.IOException.
+        /// </summary>
+        char ReadChar();
+		
+        /// <summary> Returns the column position of the character last read.</summary>
+        /// <deprecated> 
+        /// </deprecated>
+        /// <seealso cref="#getEndColumn">
+        /// </seealso>
+        int GetColumn();
+		
+        /// <summary> Returns the line number of the character last read.</summary>
+        /// <deprecated> 
+        /// </deprecated>
+        /// <seealso cref="#getEndLine">
+        /// </seealso>
+        int GetLine();
+		
+        /// <summary> Returns the column number of the last character for current token (being
+        /// matched after the last call to BeginTOken).
+        /// </summary>
+        int GetEndColumn();
+		
+        /// <summary> Returns the line number of the last character for current token (being
+        /// matched after the last call to BeginTOken).
+        /// </summary>
+        int GetEndLine();
+		
+        /// <summary> Returns the column number of the first character for current token (being
+        /// matched after the last call to BeginTOken).
+        /// </summary>
+        int GetBeginColumn();
+		
+        /// <summary> Returns the line number of the first character for current token (being
+        /// matched after the last call to BeginTOken).
+        /// </summary>
+        int GetBeginLine();
+		
+        /// <summary> Backs up the input stream by amount steps. Lexer calls this method if it
+        /// had already read some characters, but could not use them to match a
+        /// (longer) token. So, they will be used again as the prefix of the next
+        /// token and it is the implemetation's responsibility to do this right.
+        /// </summary>
+        void  Backup(int amount);
+		
+        /// <summary> Returns the next character that marks the beginning of the next token.
+        /// All characters must remain in the buffer between two successive calls
+        /// to this method to implement backup correctly.
+        /// </summary>
+        char BeginToken();
+		
+        /// <summary> Returns a string made up of characters from the marked token beginning 
+        /// to the current buffer position. Implementations have the choice of returning
+        /// anything that they want to. For example, for efficiency, one might decide
+        /// to just return null, which is a valid implementation.
+        /// </summary>
+        System.String GetImage();
+		
+        /// <summary> Returns an array of characters that make up the suffix of length 'len' for
+        /// the currently matched token. This is used to build up the matched string
+        /// for use in actions in the case of MORE. A simple and inefficient
+        /// implementation of this is as follows :
+        /// 
+        /// {
+        /// String t = GetImage();
+        /// return t.substring(t.length() - len, t.length()).toCharArray();
+        /// }
+        /// </summary>
+        char[] GetSuffix(int len);
+		
+        /// <summary> The lexer calls this function to indicate that it is done with the stream
+        /// and hence implementations can free any resources held by this class.
+        /// Again, the body of this function can be just empty and it will not
+        /// affect the lexer's operation.
+        /// </summary>
+        void  Done();
+    }
 }

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/FastCharStream.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Analysis/Standard/FastCharStream.cs?view=diff&rev=534192&r1=534191&r2=534192
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/FastCharStream.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/FastCharStream.cs Tue May  1 11:45:26 2007
@@ -19,132 +19,132 @@
 namespace Lucene.Net.Analysis.Standard
 {
 	
-	/// <summary>An efficient implementation of JavaCC's CharStream interface.  <p>Note that
-	/// this does not do line-number counting, but instead keeps track of the
-	/// character position of the token in the input, as required by Lucene's {@link
-	/// Lucene.Net.analysis.Token} API. 
-	/// </summary>
-	public sealed class FastCharStream : CharStream
-	{
-		internal char[] buffer = null;
-		
-		internal int bufferLength = 0; // end of valid chars
-		internal int bufferPosition = 0; // next char to read
-		
-		internal int tokenStart = 0; // offset in buffer
-		internal int bufferStart = 0; // position in file of buffer
-		
-		internal System.IO.TextReader input; // source of chars
-		
-		/// <summary>Constructs from a Reader. </summary>
-		public FastCharStream(System.IO.TextReader r)
-		{
-			input = r;
-		}
-		
-		public char ReadChar()
-		{
-			if (bufferPosition >= bufferLength)
-				Refill();
-			return buffer[bufferPosition++];
-		}
-		
-		private void  Refill()
-		{
-			int newPosition = bufferLength - tokenStart;
+    /// <summary>An efficient implementation of JavaCC's CharStream interface.  <p>Note that
+    /// this does not do line-number counting, but instead keeps track of the
+    /// character position of the token in the input, as required by Lucene's {@link
+    /// Lucene.Net.analysis.Token} API. 
+    /// </summary>
+    public sealed class FastCharStream : CharStream
+    {
+        internal char[] buffer = null;
+		
+        internal int bufferLength = 0; // end of valid chars
+        internal int bufferPosition = 0; // next char to read
+		
+        internal int tokenStart = 0; // offset in buffer
+        internal int bufferStart = 0; // position in file of buffer
+		
+        internal System.IO.TextReader input; // source of chars
+		
+        /// <summary>Constructs from a Reader. </summary>
+        public FastCharStream(System.IO.TextReader r)
+        {
+            input = r;
+        }
+		
+        public char ReadChar()
+        {
+            if (bufferPosition >= bufferLength)
+                Refill();
+            return buffer[bufferPosition++];
+        }
+		
+        private void  Refill()
+        {
+            int newPosition = bufferLength - tokenStart;
 			
-			if (tokenStart == 0)
-			{
-				// token won't fit in buffer
-				if (buffer == null)
-				{
-					// first time: alloc buffer
-					buffer = new char[2048];
-				}
-				else if (bufferLength == buffer.Length)
-				{
-					// grow buffer
-					char[] newBuffer = new char[buffer.Length * 2];
-					Array.Copy(buffer, 0, newBuffer, 0, bufferLength);
-					buffer = newBuffer;
-				}
-			}
-			else
-			{
-				// shift token to front
-				Array.Copy(buffer, tokenStart, buffer, 0, newPosition);
-			}
+            if (tokenStart == 0)
+            {
+                // token won't fit in buffer
+                if (buffer == null)
+                {
+                    // first time: alloc buffer
+                    buffer = new char[2048];
+                }
+                else if (bufferLength == buffer.Length)
+                {
+                    // grow buffer
+                    char[] newBuffer = new char[buffer.Length * 2];
+                    Array.Copy(buffer, 0, newBuffer, 0, bufferLength);
+                    buffer = newBuffer;
+                }
+            }
+            else
+            {
+                // shift token to front
+                Array.Copy(buffer, tokenStart, buffer, 0, newPosition);
+            }
 			
-			bufferLength = newPosition; // update state
-			bufferPosition = newPosition;
-			bufferStart += tokenStart;
-			tokenStart = 0;
+            bufferLength = newPosition; // update state
+            bufferPosition = newPosition;
+            bufferStart += tokenStart;
+            tokenStart = 0;
 			
-			int charsRead = input.Read(buffer, newPosition, buffer.Length - newPosition);
-			if (charsRead <= 0)
-				throw new System.IO.IOException("read past eof");
-			else
-				bufferLength += charsRead;
-		}
-		
-		public char BeginToken()
-		{
-			tokenStart = bufferPosition;
-			return ReadChar();
-		}
-		
-		public void  Backup(int amount)
-		{
-			bufferPosition -= amount;
-		}
-		
-		public System.String GetImage()
-		{
-			return new System.String(buffer, tokenStart, bufferPosition - tokenStart);
-		}
-		
-		public char[] GetSuffix(int len)
-		{
-			char[] value_Renamed = new char[len];
-			Array.Copy(buffer, bufferPosition - len, value_Renamed, 0, len);
-			return value_Renamed;
-		}
-		
-		public void  Done()
-		{
-			try
-			{
-				input.Close();
-			}
-			catch (System.IO.IOException e)
-			{
-				System.Console.Error.WriteLine("Caught: " + e + "; ignoring.");
-			}
-		}
-		
-		public int GetColumn()
-		{
-			return bufferStart + bufferPosition;
-		}
-		public int GetLine()
-		{
-			return 1;
-		}
-		public int GetEndColumn()
-		{
-			return bufferStart + bufferPosition;
-		}
-		public int GetEndLine()
-		{
-			return 1;
-		}
-		public int GetBeginColumn()
-		{
-			return bufferStart + tokenStart;
-		}
-		public int GetBeginLine()
-		{
-			return 1;
-		}
-	}
+            int charsRead = input.Read(buffer, newPosition, buffer.Length - newPosition);
+            if (charsRead <= 0)
+                throw new System.IO.IOException("read past eof");
+            else
+                bufferLength += charsRead;
+        }
+		
+        public char BeginToken()
+        {
+            tokenStart = bufferPosition;
+            return ReadChar();
+        }
+		
+        public void  Backup(int amount)
+        {
+            bufferPosition -= amount;
+        }
+		
+        public System.String GetImage()
+        {
+            return new System.String(buffer, tokenStart, bufferPosition - tokenStart);
+        }
+		
+        public char[] GetSuffix(int len)
+        {
+            char[] value_Renamed = new char[len];
+            Array.Copy(buffer, bufferPosition - len, value_Renamed, 0, len);
+            return value_Renamed;
+        }
+		
+        public void  Done()
+        {
+            try
+            {
+                input.Close();
+            }
+            catch (System.IO.IOException e)
+            {
+                System.Console.Error.WriteLine("Caught: " + e + "; ignoring.");
+            }
+        }
+		
+        public int GetColumn()
+        {
+            return bufferStart + bufferPosition;
+        }
+        public int GetLine()
+        {
+            return 1;
+        }
+        public int GetEndColumn()
+        {
+            return bufferStart + bufferPosition;
+        }
+        public int GetEndLine()
+        {
+            return 1;
+        }
+        public int GetBeginColumn()
+        {
+            return bufferStart + tokenStart;
+        }
+        public int GetBeginLine()
+        {
+            return 1;
+        }
+    }
 }

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/Package.html
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Analysis/Standard/Package.html?view=diff&rev=534192&r1=534191&r2=534192
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/Package.html (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/Package.html Tue May  1 11:45:26 2007
@@ -1,15 +1,15 @@
-<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
-<html>
-<head>
-   <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
-   <meta name="Author" content="Doug Cutting">
-</head>
-<body>
-A grammar-based tokenizer constructed with JavaCC.
-<p>Note that JavaCC defines lots of public classes, methods and fields
-that do not need to be public.&nbsp; These clutter the documentation.&nbsp;
-Sorry.
-<p>Note that because JavaCC defines a class named <tt>Token</tt>, <tt>org.apache.lucene.analysis.Token</tt>
-must always be fully qualified in source code in this package.
-</body>
-</html>
+<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
+<html>
+<head>
+   <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
+   <meta name="Author" content="Doug Cutting">
+</head>
+<body>
+A grammar-based tokenizer constructed with JavaCC.
+<p>Note that JavaCC defines lots of public classes, methods and fields
+that do not need to be public.&nbsp; These clutter the documentation.&nbsp;
+Sorry.
+<p>Note that because JavaCC defines a class named <tt>Token</tt>, <tt>Lucene.Net.Analysis.Token</tt>
+must always be fully qualified in source code in this package.
+</body>
+</html>

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/ParseException.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Analysis/Standard/ParseException.cs?view=diff&rev=534192&r1=534191&r2=534192
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/ParseException.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/ParseException.cs Tue May  1 11:45:26 2007
@@ -1,20 +1,3 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- * 
- * http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
 /* Generated By:JavaCC: Do not edit this line. ParseException.java Version 0.7pre6 */
 using System;
 namespace Lucene.Net.Analysis.Standard
@@ -29,7 +12,7 @@
 	/// mechanisms so long as you retain the public fields.
 	/// </summary>
 	[Serializable]
-	public class ParseException : System.IO.IOException
+	public class ParseException:System.IO.IOException
 	{
 		/// <summary> This method has the standard behavior when this object has been
 		/// created using the standard constructors.  Otherwise, it uses
@@ -123,14 +106,14 @@
 		/// these constructors.
 		/// </summary>
 		
-		public ParseException() : base()
+		public ParseException():base()
 		{
-            specialConstructor = false;
+			specialConstructor = false;
 		}
 		
-		public ParseException(System.String message) : base(message)
+		public ParseException(System.String message):base(message)
 		{
-            specialConstructor = false;
+			specialConstructor = false;
 		}
 		
 		/// <summary> This variable determines which constructor was used to create
@@ -158,7 +141,7 @@
 		public System.String[] tokenImage;
 		
 		/// <summary> The end of line string for this machine.</summary>
-		protected internal System.String eol = System.Configuration.ConfigurationSettings.AppSettings.Get("line.separator");
+		protected internal System.String eol = SupportClass.AppSettings.Get("line.separator", "\n");
 		
 		/// <summary> Used to convert raw characters to their escaped version
 		/// when these raw version cannot be used as part of an ASCII

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/StandardAnalyzer.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Analysis/Standard/StandardAnalyzer.cs?view=diff&rev=534192&r1=534191&r2=534192
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/StandardAnalyzer.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/StandardAnalyzer.cs Tue May  1 11:45:26 2007
@@ -21,68 +21,68 @@
 namespace Lucene.Net.Analysis.Standard
 {
 	
-	/// <summary> Filters {@link StandardTokenizer} with {@link StandardFilter}, {@link
-	/// LowerCaseFilter} and {@link StopFilter}, using a list of English stop words.
-	/// 
-	/// </summary>
-	/// <version>  $Id: StandardAnalyzer.java 219090 2005-07-14 20:36:28Z dnaber $
-	/// </version>
-	public class StandardAnalyzer : Analyzer
-	{
-		private System.Collections.Hashtable stopSet;
-		
-		/// <summary>An array containing some common English words that are usually not
-		/// useful for searching. 
-		/// </summary>
-		public static readonly System.String[] STOP_WORDS;
-		
-		/// <summary>Builds an analyzer with the default stop words ({@link #STOP_WORDS}). </summary>
-		public StandardAnalyzer() : this(STOP_WORDS)
-		{
-		}
-		
-		/// <summary>Builds an analyzer with the given stop words. </summary>
-		public StandardAnalyzer(System.Collections.Hashtable stopWords)
-		{
-			stopSet = stopWords;
-		}
-		
-		/// <summary>Builds an analyzer with the given stop words. </summary>
-		public StandardAnalyzer(System.String[] stopWords)
-		{
-			stopSet = StopFilter.MakeStopSet(stopWords);
-		}
-		
-		/// <summary>Builds an analyzer with the stop words from the given file.</summary>
-		/// <seealso cref="WordlistLoader.GetWordSet(File)">
-		/// </seealso>
-		public StandardAnalyzer(System.IO.FileInfo stopwords)
-		{
-			stopSet = WordlistLoader.GetWordSet(stopwords);
-		}
-		
-		/// <summary>Builds an analyzer with the stop words from the given reader.</summary>
-		/// <seealso cref="WordlistLoader.GetWordSet(Reader)">
-		/// </seealso>
-		public StandardAnalyzer(System.IO.TextReader stopwords)
-		{
-			stopSet = WordlistLoader.GetWordSet(stopwords);
-		}
-		
-		/// <summary>Constructs a {@link StandardTokenizer} filtered by a {@link
-		/// StandardFilter}, a {@link LowerCaseFilter} and a {@link StopFilter}. 
-		/// </summary>
-		public override TokenStream TokenStream(System.String fieldName, System.IO.TextReader reader)
-		{
-			TokenStream result = new StandardTokenizer(reader);
-			result = new StandardFilter(result);
-			result = new LowerCaseFilter(result);
-			result = new StopFilter(result, stopSet);
-			return result;
-		}
-		static StandardAnalyzer()
-		{
-			STOP_WORDS = StopAnalyzer.ENGLISH_STOP_WORDS;
-		}
-	}
+    /// <summary> Filters {@link StandardTokenizer} with {@link StandardFilter}, {@link
+    /// LowerCaseFilter} and {@link StopFilter}, using a list of English stop words.
+    /// 
+    /// </summary>
+    /// <version>  $Id: StandardAnalyzer.java 219090 2005-07-14 20:36:28Z dnaber $
+    /// </version>
+    public class StandardAnalyzer : Analyzer
+    {
+        private System.Collections.Hashtable stopSet;
+		
+        /// <summary>An array containing some common English words that are usually not
+        /// useful for searching. 
+        /// </summary>
+        public static readonly System.String[] STOP_WORDS;
+		
+        /// <summary>Builds an analyzer with the default stop words ({@link #STOP_WORDS}). </summary>
+        public StandardAnalyzer() : this(STOP_WORDS)
+        {
+        }
+		
+        /// <summary>Builds an analyzer with the given stop words. </summary>
+        public StandardAnalyzer(System.Collections.Hashtable stopWords)
+        {
+            stopSet = stopWords;
+        }
+		
+        /// <summary>Builds an analyzer with the given stop words. </summary>
+        public StandardAnalyzer(System.String[] stopWords)
+        {
+            stopSet = StopFilter.MakeStopSet(stopWords);
+        }
+		
+        /// <summary>Builds an analyzer with the stop words from the given file.</summary>
+        /// <seealso cref="WordlistLoader.GetWordSet(File)">
+        /// </seealso>
+        public StandardAnalyzer(System.IO.FileInfo stopwords)
+        {
+            stopSet = WordlistLoader.GetWordSet(stopwords);
+        }
+		
+        /// <summary>Builds an analyzer with the stop words from the given reader.</summary>
+        /// <seealso cref="WordlistLoader.GetWordSet(Reader)">
+        /// </seealso>
+        public StandardAnalyzer(System.IO.TextReader stopwords)
+        {
+            stopSet = WordlistLoader.GetWordSet(stopwords);
+        }
+		
+        /// <summary>Constructs a {@link StandardTokenizer} filtered by a {@link
+        /// StandardFilter}, a {@link LowerCaseFilter} and a {@link StopFilter}. 
+        /// </summary>
+        public override TokenStream TokenStream(System.String fieldName, System.IO.TextReader reader)
+        {
+            TokenStream result = new StandardTokenizer(reader);
+            result = new StandardFilter(result);
+            result = new LowerCaseFilter(result);
+            result = new StopFilter(result, stopSet);
+            return result;
+        }
+        static StandardAnalyzer()
+        {
+            STOP_WORDS = StopAnalyzer.ENGLISH_STOP_WORDS;
+        }
+    }
 }

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/StandardFilter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Analysis/Standard/StandardFilter.cs?view=diff&rev=534192&r1=534191&r2=534192
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/StandardFilter.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/StandardFilter.cs Tue May  1 11:45:26 2007
@@ -21,54 +21,54 @@
 namespace Lucene.Net.Analysis.Standard
 {
 	
-	/// <summary>Normalizes tokens extracted with {@link StandardTokenizer}. </summary>
+    /// <summary>Normalizes tokens extracted with {@link StandardTokenizer}. </summary>
 	
-	public sealed class StandardFilter : TokenFilter
-	{
+    public sealed class StandardFilter : TokenFilter
+    {
 		
 		
-		/// <summary>Construct filtering <i>in</i>. </summary>
-		public StandardFilter(TokenStream in_Renamed) : base(in_Renamed)
-		{
-		}
+        /// <summary>Construct filtering <i>in</i>. </summary>
+        public StandardFilter(TokenStream in_Renamed) : base(in_Renamed)
+        {
+        }
 		
-		private static readonly System.String APOSTROPHE_TYPE = Lucene.Net.Analysis.Standard.StandardTokenizerConstants.tokenImage[Lucene.Net.Analysis.Standard.StandardTokenizerConstants.APOSTROPHE];
-		private static readonly System.String ACRONYM_TYPE = Lucene.Net.Analysis.Standard.StandardTokenizerConstants.tokenImage[Lucene.Net.Analysis.Standard.StandardTokenizerConstants.ACRONYM];
+        private static readonly System.String APOSTROPHE_TYPE = Lucene.Net.Analysis.Standard.StandardTokenizerConstants.tokenImage[Lucene.Net.Analysis.Standard.StandardTokenizerConstants.APOSTROPHE];
+        private static readonly System.String ACRONYM_TYPE = Lucene.Net.Analysis.Standard.StandardTokenizerConstants.tokenImage[Lucene.Net.Analysis.Standard.StandardTokenizerConstants.ACRONYM];
 		
-		/// <summary>Returns the next token in the stream, or null at EOS.
-		/// <p>Removes <tt>'s</tt> from the end of words.
-		/// <p>Removes dots from acronyms.
-		/// </summary>
-		public override Lucene.Net.Analysis.Token Next()
-		{
-			Lucene.Net.Analysis.Token t = input.Next();
+        /// <summary>Returns the next token in the stream, or null at EOS.
+        /// <p>Removes <tt>'s</tt> from the end of words.
+        /// <p>Removes dots from acronyms.
+        /// </summary>
+        public override Lucene.Net.Analysis.Token Next()
+        {
+            Lucene.Net.Analysis.Token t = input.Next();
 			
-			if (t == null)
-				return null;
+            if (t == null)
+                return null;
 			
-			System.String text = t.TermText();
-			System.String type = t.Type();
+            System.String text = t.TermText();
+            System.String type = t.Type();
 			
-			if (type == APOSTROPHE_TYPE && (text.EndsWith("'s") || text.EndsWith("'S")))
-			{
-				return new Lucene.Net.Analysis.Token(text.Substring(0, (text.Length - 2) - (0)), t.StartOffset(), t.EndOffset(), type);
-			}
-			else if (type == ACRONYM_TYPE)
-			{
-				// remove dots
-				System.Text.StringBuilder trimmed = new System.Text.StringBuilder();
-				for (int i = 0; i < text.Length; i++)
-				{
-					char c = text[i];
-					if (c != '.')
-						trimmed.Append(c);
-				}
-				return new Lucene.Net.Analysis.Token(trimmed.ToString(), t.StartOffset(), t.EndOffset(), type);
-			}
-			else
-			{
-				return t;
-			}
-		}
-	}
+            if (type == APOSTROPHE_TYPE && (text.EndsWith("'s") || text.EndsWith("'S")))
+            {
+                return new Lucene.Net.Analysis.Token(text.Substring(0, (text.Length - 2) - (0)), t.StartOffset(), t.EndOffset(), type);
+            }
+            else if (type == ACRONYM_TYPE)
+            {
+                // remove dots
+                System.Text.StringBuilder trimmed = new System.Text.StringBuilder();
+                for (int i = 0; i < text.Length; i++)
+                {
+                    char c = text[i];
+                    if (c != '.')
+                        trimmed.Append(c);
+                }
+                return new Lucene.Net.Analysis.Token(trimmed.ToString(), t.StartOffset(), t.EndOffset(), type);
+            }
+            else
+            {
+                return t;
+            }
+        }
+    }
 }

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/StandardTokenizer.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Analysis/Standard/StandardTokenizer.cs?view=diff&rev=534192&r1=534191&r2=534192
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/StandardTokenizer.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/StandardTokenizer.cs Tue May  1 11:45:26 2007
@@ -21,99 +21,99 @@
 namespace Lucene.Net.Analysis.Standard
 {
 	
-	/// <summary>A grammar-based tokenizer constructed with JavaCC.
-	/// 
-	/// <p> This should be a good tokenizer for most European-language documents:
-	/// 
-	/// <ul>
-	/// <li>Splits words at punctuation characters, removing punctuation. However, a 
-	/// dot that's not followed by whitespace is considered part of a token.
-	/// <li>Splits words at hyphens, unless there's a number in the token, in which case
-	/// the whole token is interpreted as a product number and is not split.
-	/// <li>Recognizes email addresses and internet hostnames as one token.
-	/// </ul>
-	/// 
-	/// <p>Many applications have specific tokenizer needs.  If this tokenizer does
-	/// not suit your application, please consider copying this source code
-	/// directory to your project and maintaining your own grammar-based tokenizer.
-	/// </summary>
-	public class StandardTokenizer : Lucene.Net.Analysis.Tokenizer
-	{
-		
-		/// <summary>Constructs a tokenizer for this Reader. </summary>
-		public StandardTokenizer(System.IO.TextReader reader) : this(new FastCharStream(reader))
-		{
-			this.input = reader;
-		}
-		
-		/// <summary>Returns the next token in the stream, or null at EOS.
-		/// <p>The returned token's type is set to an element of {@link
-		/// StandardTokenizerConstants#tokenImage}.
-		/// </summary>
-		public override Lucene.Net.Analysis.Token Next()
-		{
-			Token token = null;
-			switch ((jj_ntk == - 1) ? Jj_ntk() : jj_ntk)
-			{
-				
-				case Lucene.Net.Analysis.Standard.StandardTokenizerConstants.ALPHANUM: 
-					token = Jj_consume_token(Lucene.Net.Analysis.Standard.StandardTokenizerConstants.ALPHANUM);
-					break;
-				
-				case Lucene.Net.Analysis.Standard.StandardTokenizerConstants.APOSTROPHE: 
-					token = Jj_consume_token(Lucene.Net.Analysis.Standard.StandardTokenizerConstants.APOSTROPHE);
-					break;
-				
-				case Lucene.Net.Analysis.Standard.StandardTokenizerConstants.ACRONYM: 
-					token = Jj_consume_token(Lucene.Net.Analysis.Standard.StandardTokenizerConstants.ACRONYM);
-					break;
-				
-				case Lucene.Net.Analysis.Standard.StandardTokenizerConstants.COMPANY: 
-					token = Jj_consume_token(Lucene.Net.Analysis.Standard.StandardTokenizerConstants.COMPANY);
-					break;
-				
-				case Lucene.Net.Analysis.Standard.StandardTokenizerConstants.EMAIL: 
-					token = Jj_consume_token(Lucene.Net.Analysis.Standard.StandardTokenizerConstants.EMAIL);
-					break;
-				
-				case Lucene.Net.Analysis.Standard.StandardTokenizerConstants.HOST: 
-					token = Jj_consume_token(Lucene.Net.Analysis.Standard.StandardTokenizerConstants.HOST);
-					break;
-				
-				case Lucene.Net.Analysis.Standard.StandardTokenizerConstants.NUM: 
-					token = Jj_consume_token(Lucene.Net.Analysis.Standard.StandardTokenizerConstants.NUM);
-					break;
-				
-				case Lucene.Net.Analysis.Standard.StandardTokenizerConstants.CJ: 
-					token = Jj_consume_token(Lucene.Net.Analysis.Standard.StandardTokenizerConstants.CJ);
-					break;
-				
-				case 0: 
-					token = Jj_consume_token(0);
-					break;
-				
-				default: 
-					jj_la1[0] = jj_gen;
-					Jj_consume_token(- 1);
-					throw new ParseException();
-				
-			}
-			if (token.kind == Lucene.Net.Analysis.Standard.StandardTokenizerConstants.EOF)
-			{
-				{
-					if (true)
-						return null;
-				}
-			}
-			else
-			{
-				{
-					if (true)
-						return new Lucene.Net.Analysis.Token(token.image, token.beginColumn, token.endColumn, Lucene.Net.Analysis.Standard.StandardTokenizerConstants.tokenImage[token.kind]);
-				}
-			}
-			throw new System.ApplicationException("Missing return statement in function");
-		}
+    /// <summary>A grammar-based tokenizer constructed with JavaCC.
+    /// 
+    /// <p> This should be a good tokenizer for most European-language documents:
+    /// 
+    /// <ul>
+    /// <li>Splits words at punctuation characters, removing punctuation. However, a 
+    /// dot that's not followed by whitespace is considered part of a token.
+    /// <li>Splits words at hyphens, unless there's a number in the token, in which case
+    /// the whole token is interpreted as a product number and is not split.
+    /// <li>Recognizes email addresses and internet hostnames as one token.
+    /// </ul>
+    /// 
+    /// <p>Many applications have specific tokenizer needs.  If this tokenizer does
+    /// not suit your application, please consider copying this source code
+    /// directory to your project and maintaining your own grammar-based tokenizer.
+    /// </summary>
+    public class StandardTokenizer : Lucene.Net.Analysis.Tokenizer
+    {
+		
+        /// <summary>Constructs a tokenizer for this Reader. </summary>
+        public StandardTokenizer(System.IO.TextReader reader) : this(new FastCharStream(reader))
+        {
+            this.input = reader;
+        }
+		
+        /// <summary>Returns the next token in the stream, or null at EOS.
+        /// <p>The returned token's type is set to an element of {@link
+        /// StandardTokenizerConstants#tokenImage}.
+        /// </summary>
+        public override Lucene.Net.Analysis.Token Next()
+        {
+            Token token = null;
+            switch ((jj_ntk == - 1) ? Jj_ntk() : jj_ntk)
+            {
+				
+                case Lucene.Net.Analysis.Standard.StandardTokenizerConstants.ALPHANUM: 
+                    token = Jj_consume_token(Lucene.Net.Analysis.Standard.StandardTokenizerConstants.ALPHANUM);
+                    break;
+				
+                case Lucene.Net.Analysis.Standard.StandardTokenizerConstants.APOSTROPHE: 
+                    token = Jj_consume_token(Lucene.Net.Analysis.Standard.StandardTokenizerConstants.APOSTROPHE);
+                    break;
+				
+                case Lucene.Net.Analysis.Standard.StandardTokenizerConstants.ACRONYM: 
+                    token = Jj_consume_token(Lucene.Net.Analysis.Standard.StandardTokenizerConstants.ACRONYM);
+                    break;
+				
+                case Lucene.Net.Analysis.Standard.StandardTokenizerConstants.COMPANY: 
+                    token = Jj_consume_token(Lucene.Net.Analysis.Standard.StandardTokenizerConstants.COMPANY);
+                    break;
+				
+                case Lucene.Net.Analysis.Standard.StandardTokenizerConstants.EMAIL: 
+                    token = Jj_consume_token(Lucene.Net.Analysis.Standard.StandardTokenizerConstants.EMAIL);
+                    break;
+				
+                case Lucene.Net.Analysis.Standard.StandardTokenizerConstants.HOST: 
+                    token = Jj_consume_token(Lucene.Net.Analysis.Standard.StandardTokenizerConstants.HOST);
+                    break;
+				
+                case Lucene.Net.Analysis.Standard.StandardTokenizerConstants.NUM: 
+                    token = Jj_consume_token(Lucene.Net.Analysis.Standard.StandardTokenizerConstants.NUM);
+                    break;
+				
+                case Lucene.Net.Analysis.Standard.StandardTokenizerConstants.CJ: 
+                    token = Jj_consume_token(Lucene.Net.Analysis.Standard.StandardTokenizerConstants.CJ);
+                    break;
+				
+                case 0: 
+                    token = Jj_consume_token(0);
+                    break;
+				
+                default: 
+                    jj_la1[0] = jj_gen;
+                    Jj_consume_token(- 1);
+                    throw new ParseException();
+				
+            }
+            if (token.kind == Lucene.Net.Analysis.Standard.StandardTokenizerConstants.EOF)
+            {
+            {
+                if (true)
+                    return null;
+            }
+            }
+            else
+            {
+            {
+                if (true)
+                    return new Lucene.Net.Analysis.Token(token.image, token.beginColumn, token.endColumn, Lucene.Net.Analysis.Standard.StandardTokenizerConstants.tokenImage[token.kind]);
+            }
+            }
+            throw new System.ApplicationException("Missing return statement in function");
+        }
 
         /// <summary>By default, closes the input Reader. </summary>
         public override void Close() 
@@ -123,165 +123,165 @@
         }
 
         public StandardTokenizerTokenManager token_source;
-		public Token token, jj_nt;
-		private int jj_ntk;
-		private int jj_gen;
-		private int[] jj_la1 = new int[1];
-		private static int[] jj_la1_0_Renamed_Field;
-		private static void  jj_la1_0()
-		{
-			jj_la1_0_Renamed_Field = new int[]{0x10ff};
-		}
-		
-		public StandardTokenizer(CharStream stream)
-		{
-			token_source = new StandardTokenizerTokenManager(stream);
-			token = new Token();
-			jj_ntk = - 1;
-			jj_gen = 0;
-			for (int i = 0; i < 1; i++)
-				jj_la1[i] = - 1;
-		}
-		
-		public virtual void  ReInit(CharStream stream)
-		{
-			token_source.ReInit(stream);
-			token = new Token();
-			jj_ntk = - 1;
-			jj_gen = 0;
-			for (int i = 0; i < 1; i++)
-				jj_la1[i] = - 1;
-		}
-		
-		public StandardTokenizer(StandardTokenizerTokenManager tm)
-		{
-			token_source = tm;
-			token = new Token();
-			jj_ntk = - 1;
-			jj_gen = 0;
-			for (int i = 0; i < 1; i++)
-				jj_la1[i] = - 1;
-		}
-		
-		public virtual void  ReInit(StandardTokenizerTokenManager tm)
-		{
-			token_source = tm;
-			token = new Token();
-			jj_ntk = - 1;
-			jj_gen = 0;
-			for (int i = 0; i < 1; i++)
-				jj_la1[i] = - 1;
-		}
-		
-		private Token Jj_consume_token(int kind)
-		{
-			Token oldToken;
-			if ((oldToken = token).next != null)
-				token = token.next;
-			else
-				token = token.next = token_source.GetNextToken();
-			jj_ntk = - 1;
-			if (token.kind == kind)
-			{
-				jj_gen++;
-				return token;
-			}
-			token = oldToken;
-			jj_kind = kind;
-			throw GenerateParseException();
-		}
-		
-		public Token GetNextToken()
-		{
-			if (token.next != null)
-				token = token.next;
-			else
-				token = token.next = token_source.GetNextToken();
-			jj_ntk = - 1;
-			jj_gen++;
-			return token;
-		}
-		
-		public Token GetToken(int index)
-		{
-			Token t = token;
-			for (int i = 0; i < index; i++)
-			{
-				if (t.next != null)
-					t = t.next;
-				else
-					t = t.next = token_source.GetNextToken();
-			}
-			return t;
-		}
-		
-		private int Jj_ntk()
-		{
-			if ((jj_nt = token.next) == null)
-				return (jj_ntk = (token.next = token_source.GetNextToken()).kind);
-			else
-				return (jj_ntk = jj_nt.kind);
-		}
-		
-		private System.Collections.ArrayList jj_expentries = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(10));
-		private int[] jj_expentry;
-		private int jj_kind = - 1;
-		
-		public virtual ParseException GenerateParseException()
-		{
-			jj_expentries.Clear();
-			bool[] la1tokens = new bool[16];
-			for (int i = 0; i < 16; i++)
-			{
-				la1tokens[i] = false;
-			}
-			if (jj_kind >= 0)
-			{
-				la1tokens[jj_kind] = true;
-				jj_kind = - 1;
-			}
-			for (int i = 0; i < 1; i++)
-			{
-				if (jj_la1[i] == jj_gen)
-				{
-					for (int j = 0; j < 32; j++)
-					{
-						if ((jj_la1_0_Renamed_Field[i] & (1 << j)) != 0)
-						{
-							la1tokens[j] = true;
-						}
-					}
-				}
-			}
-			for (int i = 0; i < 16; i++)
-			{
-				if (la1tokens[i])
-				{
-					jj_expentry = new int[1];
-					jj_expentry[0] = i;
-					jj_expentries.Add(jj_expentry);
-				}
-			}
-			int[][] exptokseq = new int[jj_expentries.Count][];
-			for (int i = 0; i < jj_expentries.Count; i++)
-			{
-				exptokseq[i] = (int[]) jj_expentries[i];
-			}
-			return new ParseException(token, exptokseq, Lucene.Net.Analysis.Standard.StandardTokenizerConstants.tokenImage);
-		}
-		
-		public void  Enable_tracing()
-		{
-		}
-		
-		public void  Disable_tracing()
-		{
-		}
-		static StandardTokenizer()
-		{
-			{
-				jj_la1_0();
-			}
-		}
-	}
+        public Token token, jj_nt;
+        private int jj_ntk;
+        private int jj_gen;
+        private int[] jj_la1 = new int[1];
+        private static int[] jj_la1_0_Renamed_Field;
+        private static void  jj_la1_0()
+        {
+            jj_la1_0_Renamed_Field = new int[]{0x10ff};
+        }
+		
+        public StandardTokenizer(CharStream stream)
+        {
+            token_source = new StandardTokenizerTokenManager(stream);
+            token = new Token();
+            jj_ntk = - 1;
+            jj_gen = 0;
+            for (int i = 0; i < 1; i++)
+                jj_la1[i] = - 1;
+        }
+		
+        public virtual void  ReInit(CharStream stream)
+        {
+            token_source.ReInit(stream);
+            token = new Token();
+            jj_ntk = - 1;
+            jj_gen = 0;
+            for (int i = 0; i < 1; i++)
+                jj_la1[i] = - 1;
+        }
+		
+        public StandardTokenizer(StandardTokenizerTokenManager tm)
+        {
+            token_source = tm;
+            token = new Token();
+            jj_ntk = - 1;
+            jj_gen = 0;
+            for (int i = 0; i < 1; i++)
+                jj_la1[i] = - 1;
+        }
+		
+        public virtual void  ReInit(StandardTokenizerTokenManager tm)
+        {
+            token_source = tm;
+            token = new Token();
+            jj_ntk = - 1;
+            jj_gen = 0;
+            for (int i = 0; i < 1; i++)
+                jj_la1[i] = - 1;
+        }
+		
+        private Token Jj_consume_token(int kind)
+        {
+            Token oldToken;
+            if ((oldToken = token).next != null)
+                token = token.next;
+            else
+                token = token.next = token_source.GetNextToken();
+            jj_ntk = - 1;
+            if (token.kind == kind)
+            {
+                jj_gen++;
+                return token;
+            }
+            token = oldToken;
+            jj_kind = kind;
+            throw GenerateParseException();
+        }
+		
+        public Token GetNextToken()
+        {
+            if (token.next != null)
+                token = token.next;
+            else
+                token = token.next = token_source.GetNextToken();
+            jj_ntk = - 1;
+            jj_gen++;
+            return token;
+        }
+		
+        public Token GetToken(int index)
+        {
+            Token t = token;
+            for (int i = 0; i < index; i++)
+            {
+                if (t.next != null)
+                    t = t.next;
+                else
+                    t = t.next = token_source.GetNextToken();
+            }
+            return t;
+        }
+		
+        private int Jj_ntk()
+        {
+            if ((jj_nt = token.next) == null)
+                return (jj_ntk = (token.next = token_source.GetNextToken()).kind);
+            else
+                return (jj_ntk = jj_nt.kind);
+        }
+		
+        private System.Collections.ArrayList jj_expentries = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(10));
+        private int[] jj_expentry;
+        private int jj_kind = - 1;
+		
+        public virtual ParseException GenerateParseException()
+        {
+            jj_expentries.Clear();
+            bool[] la1tokens = new bool[16];
+            for (int i = 0; i < 16; i++)
+            {
+                la1tokens[i] = false;
+            }
+            if (jj_kind >= 0)
+            {
+                la1tokens[jj_kind] = true;
+                jj_kind = - 1;
+            }
+            for (int i = 0; i < 1; i++)
+            {
+                if (jj_la1[i] == jj_gen)
+                {
+                    for (int j = 0; j < 32; j++)
+                    {
+                        if ((jj_la1_0_Renamed_Field[i] & (1 << j)) != 0)
+                        {
+                            la1tokens[j] = true;
+                        }
+                    }
+                }
+            }
+            for (int i = 0; i < 16; i++)
+            {
+                if (la1tokens[i])
+                {
+                    jj_expentry = new int[1];
+                    jj_expentry[0] = i;
+                    jj_expentries.Add(jj_expentry);
+                }
+            }
+            int[][] exptokseq = new int[jj_expentries.Count][];
+            for (int i = 0; i < jj_expentries.Count; i++)
+            {
+                exptokseq[i] = (int[]) jj_expentries[i];
+            }
+            return new ParseException(token, exptokseq, Lucene.Net.Analysis.Standard.StandardTokenizerConstants.tokenImage);
+        }
+		
+        public void  Enable_tracing()
+        {
+        }
+		
+        public void  Disable_tracing()
+        {
+        }
+        static StandardTokenizer()
+        {
+        {
+            jj_la1_0();
+        }
+        }
+    }
 }