You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by ar...@apache.org on 2007/05/01 20:45:35 UTC
svn commit: r534192 [3/19] - in /incubator/lucene.net/trunk/C#: ./ src/
src/Demo/ src/Demo/DeleteFiles/ src/Demo/DemoLib/ src/Demo/DemoLib/HTML/
src/Demo/IndexFiles/ src/Demo/IndexHtml/ src/Demo/SearchFiles/
src/Lucene.Net/ src/Lucene.Net/Analysis/ src...
Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/PorterStemmer.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Analysis/PorterStemmer.cs?view=diff&rev=534192&r1=534191&r2=534192
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/PorterStemmer.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/PorterStemmer.cs Tue May 1 11:45:26 2007
@@ -45,706 +45,706 @@
namespace Lucene.Net.Analysis
{
- /// <summary>
- /// Stemmer, implementing the Porter Stemming Algorithm
- ///
- /// The Stemmer class transforms a word into its root form. The input
- /// word can be provided a character at time (by calling add()), or at once
- /// by calling one of the various stem(something) methods.
- /// </summary>
+ /// <summary>
+ /// Stemmer, implementing the Porter Stemming Algorithm
+ ///
+ /// The Stemmer class transforms a word into its root form. The input
+ /// word can be provided a character at time (by calling add()), or at once
+ /// by calling one of the various stem(something) methods.
+ /// </summary>
- class PorterStemmer
- {
- private char[] b;
- private int i, j, k, k0;
- private bool dirty = false;
- private const int INC = 50; /* unit of size whereby b is increased */
- private const int EXTRA = 1;
-
- public PorterStemmer()
- {
- b = new char[INC];
- i = 0;
- }
-
- /// <summary> reset() resets the stemmer so it can stem another word. If you invoke
- /// the stemmer by calling add(char) and then Stem(), you must call reset()
- /// before starting another word.
- /// </summary>
- public virtual void Reset()
- {
- i = 0; dirty = false;
- }
-
- /// <summary> Add a character to the word being stemmed. When you are finished
- /// adding characters, you can call Stem(void) to process the word.
- /// </summary>
- public virtual void Add(char ch)
- {
- if (b.Length <= i + EXTRA)
- {
- char[] new_b = new char[b.Length + INC];
- for (int c = 0; c < b.Length; c++)
- new_b[c] = b[c];
- b = new_b;
- }
- b[i++] = ch;
- }
-
- /// <summary> After a word has been stemmed, it can be retrieved by toString(),
- /// or a reference to the internal buffer can be retrieved by getResultBuffer
- /// and getResultLength (which is generally more efficient.)
- /// </summary>
- public override System.String ToString()
- {
- return new System.String(b, 0, i);
- }
-
- /// <summary> Returns the length of the word resulting from the stemming process.</summary>
- public virtual int GetResultLength()
- {
- return i;
- }
-
- /// <summary> Returns a reference to a character buffer containing the results of
- /// the stemming process. You also need to consult getResultLength()
- /// to determine the length of the result.
- /// </summary>
- public virtual char[] GetResultBuffer()
- {
- return b;
- }
-
- /* cons(i) is true <=> b[i] is a consonant. */
-
- private bool Cons(int i)
- {
- switch (b[i])
- {
-
- case 'a':
- case 'e':
- case 'i':
- case 'o':
- case 'u':
- return false;
-
- case 'y':
- return (i == k0)?true:!Cons(i - 1);
-
- default:
- return true;
-
- }
- }
-
- /* m() measures the number of consonant sequences between k0 and j. if c is
- a consonant sequence and v a vowel sequence, and <..> indicates arbitrary
- presence,
-
- <c><v> gives 0
- <c>vc<v> gives 1
- <c>vcvc<v> gives 2
- <c>vcvcvc<v> gives 3
- ....
- */
-
- private int M()
- {
- int n = 0;
- int i = k0;
- while (true)
- {
- if (i > j)
- return n;
- if (!Cons(i))
- break;
- i++;
- }
- i++;
- while (true)
- {
- while (true)
- {
- if (i > j)
- return n;
- if (Cons(i))
- break;
- i++;
- }
- i++;
- n++;
- while (true)
- {
- if (i > j)
- return n;
- if (!Cons(i))
- break;
- i++;
- }
- i++;
- }
- }
-
- /* vowelinstem() is true <=> k0,...j contains a vowel */
-
- private bool Vowelinstem()
- {
- int i;
- for (i = k0; i <= j; i++)
- if (!Cons(i))
- return true;
- return false;
- }
-
- /* doublec(j) is true <=> j,(j-1) contain a double consonant. */
-
- private bool Doublec(int j)
- {
- if (j < k0 + 1)
- return false;
- if (b[j] != b[j - 1])
- return false;
- return Cons(j);
- }
-
- /* cvc(i) is true <=> i-2,i-1,i has the form consonant - vowel - consonant
- and also if the second c is not w,x or y. this is used when trying to
- restore an e at the end of a short word. e.g.
-
- cav(e), lov(e), hop(e), crim(e), but
- snow, box, tray.
-
- */
-
- private bool Cvc(int i)
- {
- if (i < k0 + 2 || !Cons(i) || Cons(i - 1) || !Cons(i - 2))
- return false;
- else
- {
- int ch = b[i];
- if (ch == 'w' || ch == 'x' || ch == 'y')
- return false;
- }
- return true;
- }
-
- private bool Ends(System.String s)
- {
- int l = s.Length;
- int o = k - l + 1;
- if (o < k0)
- return false;
- for (int i = 0; i < l; i++)
- if (b[o + i] != s[i])
- return false;
- j = k - l;
- return true;
- }
-
- /* setto(s) sets (j+1),...k to the characters in the string s, readjusting
- k. */
-
- internal virtual void Setto(System.String s)
- {
- int l = s.Length;
- int o = j + 1;
- for (int i = 0; i < l; i++)
- b[o + i] = s[i];
- k = j + l;
- dirty = true;
- }
-
- /* r(s) is used further down. */
-
- internal virtual void R(System.String s)
- {
- if (M() > 0)
- Setto(s);
- }
-
- /* step1() gets rid of plurals and -ed or -ing. e.g.
-
- caresses -> caress
- ponies -> poni
- ties -> ti
- caress -> caress
- cats -> cat
-
- feed -> feed
- agreed -> agree
- disabled -> disable
-
- matting -> mat
- mating -> mate
- meeting -> meet
- milling -> mill
- messing -> mess
-
- meetings -> meet
-
- */
-
- private void Step1()
- {
- if (b[k] == 's')
- {
- if (Ends("sses"))
- k -= 2;
- else if (Ends("ies"))
- Setto("i");
- else if (b[k - 1] != 's')
- k--;
- }
- if (Ends("eed"))
- {
- if (M() > 0)
- k--;
- }
- else if ((Ends("ed") || Ends("ing")) && Vowelinstem())
- {
- k = j;
- if (Ends("at"))
- Setto("ate");
- else if (Ends("bl"))
- Setto("ble");
- else if (Ends("iz"))
- Setto("ize");
- else if (Doublec(k))
- {
- int ch = b[k--];
- if (ch == 'l' || ch == 's' || ch == 'z')
- k++;
- }
- else if (M() == 1 && Cvc(k))
- Setto("e");
- }
- }
-
- /* step2() turns terminal y to i when there is another vowel in the stem. */
-
- private void Step2()
- {
- if (Ends("y") && Vowelinstem())
- {
- b[k] = 'i';
- dirty = true;
- }
- }
-
- /* step3() maps double suffices to single ones. so -ization ( = -ize plus
- -ation) maps to -ize etc. note that the string before the suffix must give
- m() > 0. */
-
- private void Step3()
- {
- if (k == k0)
- return ; /* For Bug 1 */
- switch (b[k - 1])
- {
-
- case 'a':
- if (Ends("ational"))
- {
- R("ate"); break;
- }
- if (Ends("tional"))
- {
- R("tion"); break;
- }
- break;
-
- case 'c':
- if (Ends("enci"))
- {
- R("ence"); break;
- }
- if (Ends("anci"))
- {
- R("ance"); break;
- }
- break;
-
- case 'e':
- if (Ends("izer"))
- {
- R("ize"); break;
- }
- break;
-
- case 'l':
- if (Ends("bli"))
- {
- R("ble"); break;
- }
- if (Ends("alli"))
- {
- R("al"); break;
- }
- if (Ends("entli"))
- {
- R("ent"); break;
- }
- if (Ends("eli"))
- {
- R("e"); break;
- }
- if (Ends("ousli"))
- {
- R("ous"); break;
- }
- break;
-
- case 'o':
- if (Ends("ization"))
- {
- R("ize"); break;
- }
- if (Ends("ation"))
- {
- R("ate"); break;
- }
- if (Ends("ator"))
- {
- R("ate"); break;
- }
- break;
-
- case 's':
- if (Ends("alism"))
- {
- R("al"); break;
- }
- if (Ends("iveness"))
- {
- R("ive"); break;
- }
- if (Ends("fulness"))
- {
- R("ful"); break;
- }
- if (Ends("ousness"))
- {
- R("ous"); break;
- }
- break;
-
- case 't':
- if (Ends("aliti"))
- {
- R("al"); break;
- }
- if (Ends("iviti"))
- {
- R("ive"); break;
- }
- if (Ends("biliti"))
- {
- R("ble"); break;
- }
- break;
-
- case 'g':
- if (Ends("logi"))
- {
- R("log"); break;
- }
- break;
- }
- }
-
- /* step4() deals with -ic-, -full, -ness etc. similar strategy to step3. */
-
- private void Step4()
- {
- switch (b[k])
- {
-
- case 'e':
- if (Ends("icate"))
- {
- R("ic"); break;
- }
- if (Ends("ative"))
- {
- R(""); break;
- }
- if (Ends("alize"))
- {
- R("al"); break;
- }
- break;
-
- case 'i':
- if (Ends("iciti"))
- {
- R("ic"); break;
- }
- break;
-
- case 'l':
- if (Ends("ical"))
- {
- R("ic"); break;
- }
- if (Ends("ful"))
- {
- R(""); break;
- }
- break;
-
- case 's':
- if (Ends("ness"))
- {
- R(""); break;
- }
- break;
- }
- }
-
- /* step5() takes off -ant, -ence etc., in context <c>vcvc<v>. */
-
- private void Step5()
- {
- if (k == k0)
- return ; /* for Bug 1 */
- switch (b[k - 1])
- {
-
- case 'a':
- if (Ends("al"))
- break;
- return ;
-
- case 'c':
- if (Ends("ance"))
- break;
- if (Ends("ence"))
- break;
- return ;
-
- case 'e':
- if (Ends("er"))
- break; return ;
-
- case 'i':
- if (Ends("ic"))
- break; return ;
-
- case 'l':
- if (Ends("able"))
- break;
- if (Ends("ible"))
- break; return ;
-
- case 'n':
- if (Ends("ant"))
- break;
- if (Ends("ement"))
- break;
- if (Ends("ment"))
- break;
- /* element etc. not stripped before the m */
- if (Ends("ent"))
- break;
- return ;
-
- case 'o':
- if (Ends("ion") && j >= 0 && (b[j] == 's' || b[j] == 't'))
- break;
- /* j >= 0 fixes Bug 2 */
- if (Ends("ou"))
- break;
- return ;
- /* takes care of -ous */
-
- case 's':
- if (Ends("ism"))
- break;
- return ;
-
- case 't':
- if (Ends("ate"))
- break;
- if (Ends("iti"))
- break;
- return ;
-
- case 'u':
- if (Ends("ous"))
- break;
- return ;
-
- case 'v':
- if (Ends("ive"))
- break;
- return ;
-
- case 'z':
- if (Ends("ize"))
- break;
- return ;
-
- default:
- return ;
-
- }
- if (M() > 1)
- k = j;
- }
-
- /* step6() removes a final -e if m() > 1. */
-
- private void Step6()
- {
- j = k;
- if (b[k] == 'e')
- {
- int a = M();
- if (a > 1 || a == 1 && !Cvc(k - 1))
- k--;
- }
- if (b[k] == 'l' && Doublec(k) && M() > 1)
- k--;
- }
-
-
- /// <summary> Stem a word provided as a String. Returns the result as a String.</summary>
- public virtual System.String Stem(System.String s)
- {
- if (Stem(s.ToCharArray(), s.Length))
- {
- return ToString();
- }
- else
- return s;
- }
-
- /// <summary>Stem a word contained in a char[]. Returns true if the stemming process
- /// resulted in a word different from the input. You can retrieve the
- /// result with getResultLength()/getResultBuffer() or toString().
- /// </summary>
- public virtual bool Stem(char[] word)
- {
- return Stem(word, word.Length);
- }
-
- /// <summary>Stem a word contained in a portion of a char[] array. Returns
- /// true if the stemming process resulted in a word different from
- /// the input. You can retrieve the result with
- /// getResultLength()/getResultBuffer() or toString().
- /// </summary>
- public virtual bool Stem(char[] wordBuffer, int offset, int wordLen)
- {
- Reset();
- if (b.Length < wordLen)
- {
- char[] new_b = new char[wordLen + EXTRA];
- b = new_b;
- }
- for (int j = 0; j < wordLen; j++)
- b[j] = wordBuffer[offset + j];
- i = wordLen;
- return Stem(0);
- }
-
- /// <summary>Stem a word contained in a leading portion of a char[] array.
- /// Returns true if the stemming process resulted in a word different
- /// from the input. You can retrieve the result with
- /// getResultLength()/getResultBuffer() or toString().
- /// </summary>
- public virtual bool Stem(char[] word, int wordLen)
- {
- return Stem(word, 0, wordLen);
- }
-
- /// <summary>Stem the word placed into the Stemmer buffer through calls to add().
- /// Returns true if the stemming process resulted in a word different
- /// from the input. You can retrieve the result with
- /// getResultLength()/getResultBuffer() or toString().
- /// </summary>
- public virtual bool Stem()
- {
- return Stem(0);
- }
-
- public virtual bool Stem(int i0)
- {
- k = i - 1;
- k0 = i0;
- if (k > k0 + 1)
- {
- Step1(); Step2(); Step3(); Step4(); Step5(); Step6();
- }
- // Also, a word is considered dirty if we lopped off letters
- // Thanks to Ifigenia Vairelles for pointing this out.
- if (i != k + 1)
- dirty = true;
- i = k + 1;
- return dirty;
- }
-
- /// <summary>Test program for demonstrating the Stemmer. It reads a file and
- /// stems each word, writing the result to standard out.
- /// Usage: Stemmer file-name
- /// </summary>
- [STAThread]
- public static void Main(System.String[] args)
- {
- PorterStemmer s = new PorterStemmer();
+ class PorterStemmer
+ {
+ private char[] b;
+ private int i, j, k, k0;
+ private bool dirty = false;
+ private const int INC = 50; /* unit of size whereby b is increased */
+ private const int EXTRA = 1;
+
+ public PorterStemmer()
+ {
+ b = new char[INC];
+ i = 0;
+ }
+
+ /// <summary> reset() resets the stemmer so it can stem another word. If you invoke
+ /// the stemmer by calling add(char) and then Stem(), you must call reset()
+ /// before starting another word.
+ /// </summary>
+ public virtual void Reset()
+ {
+ i = 0; dirty = false;
+ }
+
+ /// <summary> Add a character to the word being stemmed. When you are finished
+ /// adding characters, you can call Stem(void) to process the word.
+ /// </summary>
+ public virtual void Add(char ch)
+ {
+ if (b.Length <= i + EXTRA)
+ {
+ char[] new_b = new char[b.Length + INC];
+ for (int c = 0; c < b.Length; c++)
+ new_b[c] = b[c];
+ b = new_b;
+ }
+ b[i++] = ch;
+ }
+
+ /// <summary> After a word has been stemmed, it can be retrieved by toString(),
+ /// or a reference to the internal buffer can be retrieved by getResultBuffer
+ /// and getResultLength (which is generally more efficient.)
+ /// </summary>
+ public override System.String ToString()
+ {
+ return new System.String(b, 0, i);
+ }
+
+ /// <summary> Returns the length of the word resulting from the stemming process.</summary>
+ public virtual int GetResultLength()
+ {
+ return i;
+ }
+
+ /// <summary> Returns a reference to a character buffer containing the results of
+ /// the stemming process. You also need to consult getResultLength()
+ /// to determine the length of the result.
+ /// </summary>
+ public virtual char[] GetResultBuffer()
+ {
+ return b;
+ }
+
+ /* cons(i) is true <=> b[i] is a consonant. */
+
+ private bool Cons(int i)
+ {
+ switch (b[i])
+ {
+
+ case 'a':
+ case 'e':
+ case 'i':
+ case 'o':
+ case 'u':
+ return false;
+
+ case 'y':
+ return (i == k0) ? true : !Cons(i - 1);
+
+ default:
+ return true;
+
+ }
+ }
+
+ /* m() measures the number of consonant sequences between k0 and j. if c is
+ a consonant sequence and v a vowel sequence, and <..> indicates arbitrary
+ presence,
+
+ <c><v> gives 0
+ <c>vc<v> gives 1
+ <c>vcvc<v> gives 2
+ <c>vcvcvc<v> gives 3
+ ....
+ */
+
+ private int M()
+ {
+ int n = 0;
+ int i = k0;
+ while (true)
+ {
+ if (i > j)
+ return n;
+ if (!Cons(i))
+ break;
+ i++;
+ }
+ i++;
+ while (true)
+ {
+ while (true)
+ {
+ if (i > j)
+ return n;
+ if (Cons(i))
+ break;
+ i++;
+ }
+ i++;
+ n++;
+ while (true)
+ {
+ if (i > j)
+ return n;
+ if (!Cons(i))
+ break;
+ i++;
+ }
+ i++;
+ }
+ }
+
+ /* vowelinstem() is true <=> k0,...j contains a vowel */
+
+ private bool Vowelinstem()
+ {
+ int i;
+ for (i = k0; i <= j; i++)
+ if (!Cons(i))
+ return true;
+ return false;
+ }
+
+ /* doublec(j) is true <=> j,(j-1) contain a double consonant. */
+
+ private bool Doublec(int j)
+ {
+ if (j < k0 + 1)
+ return false;
+ if (b[j] != b[j - 1])
+ return false;
+ return Cons(j);
+ }
+
+ /* cvc(i) is true <=> i-2,i-1,i has the form consonant - vowel - consonant
+ and also if the second c is not w,x or y. this is used when trying to
+ restore an e at the end of a short word. e.g.
+
+ cav(e), lov(e), hop(e), crim(e), but
+ snow, box, tray.
+
+ */
+
+ private bool Cvc(int i)
+ {
+ if (i < k0 + 2 || !Cons(i) || Cons(i - 1) || !Cons(i - 2))
+ return false;
+ else
+ {
+ int ch = b[i];
+ if (ch == 'w' || ch == 'x' || ch == 'y')
+ return false;
+ }
+ return true;
+ }
+
+ private bool Ends(System.String s)
+ {
+ int l = s.Length;
+ int o = k - l + 1;
+ if (o < k0)
+ return false;
+ for (int i = 0; i < l; i++)
+ if (b[o + i] != s[i])
+ return false;
+ j = k - l;
+ return true;
+ }
+
+ /* setto(s) sets (j+1),...k to the characters in the string s, readjusting
+ k. */
+
+ internal virtual void Setto(System.String s)
+ {
+ int l = s.Length;
+ int o = j + 1;
+ for (int i = 0; i < l; i++)
+ b[o + i] = s[i];
+ k = j + l;
+ dirty = true;
+ }
+
+ /* r(s) is used further down. */
+
+ internal virtual void R(System.String s)
+ {
+ if (M() > 0)
+ Setto(s);
+ }
+
+ /* step1() gets rid of plurals and -ed or -ing. e.g.
+
+ caresses -> caress
+ ponies -> poni
+ ties -> ti
+ caress -> caress
+ cats -> cat
+
+ feed -> feed
+ agreed -> agree
+ disabled -> disable
+
+ matting -> mat
+ mating -> mate
+ meeting -> meet
+ milling -> mill
+ messing -> mess
+
+ meetings -> meet
+
+ */
+
+ private void Step1()
+ {
+ if (b[k] == 's')
+ {
+ if (Ends("sses"))
+ k -= 2;
+ else if (Ends("ies"))
+ Setto("i");
+ else if (b[k - 1] != 's')
+ k--;
+ }
+ if (Ends("eed"))
+ {
+ if (M() > 0)
+ k--;
+ }
+ else if ((Ends("ed") || Ends("ing")) && Vowelinstem())
+ {
+ k = j;
+ if (Ends("at"))
+ Setto("ate");
+ else if (Ends("bl"))
+ Setto("ble");
+ else if (Ends("iz"))
+ Setto("ize");
+ else if (Doublec(k))
+ {
+ int ch = b[k--];
+ if (ch == 'l' || ch == 's' || ch == 'z')
+ k++;
+ }
+ else if (M() == 1 && Cvc(k))
+ Setto("e");
+ }
+ }
+
+ /* step2() turns terminal y to i when there is another vowel in the stem. */
+
+ private void Step2()
+ {
+ if (Ends("y") && Vowelinstem())
+ {
+ b[k] = 'i';
+ dirty = true;
+ }
+ }
+
+ /* step3() maps double suffices to single ones. so -ization ( = -ize plus
+ -ation) maps to -ize etc. note that the string before the suffix must give
+ m() > 0. */
+
+ private void Step3()
+ {
+ if (k == k0)
+ return ; /* For Bug 1 */
+ switch (b[k - 1])
+ {
+
+ case 'a':
+ if (Ends("ational"))
+ {
+ R("ate"); break;
+ }
+ if (Ends("tional"))
+ {
+ R("tion"); break;
+ }
+ break;
+
+ case 'c':
+ if (Ends("enci"))
+ {
+ R("ence"); break;
+ }
+ if (Ends("anci"))
+ {
+ R("ance"); break;
+ }
+ break;
+
+ case 'e':
+ if (Ends("izer"))
+ {
+ R("ize"); break;
+ }
+ break;
+
+ case 'l':
+ if (Ends("bli"))
+ {
+ R("ble"); break;
+ }
+ if (Ends("alli"))
+ {
+ R("al"); break;
+ }
+ if (Ends("entli"))
+ {
+ R("ent"); break;
+ }
+ if (Ends("eli"))
+ {
+ R("e"); break;
+ }
+ if (Ends("ousli"))
+ {
+ R("ous"); break;
+ }
+ break;
+
+ case 'o':
+ if (Ends("ization"))
+ {
+ R("ize"); break;
+ }
+ if (Ends("ation"))
+ {
+ R("ate"); break;
+ }
+ if (Ends("ator"))
+ {
+ R("ate"); break;
+ }
+ break;
+
+ case 's':
+ if (Ends("alism"))
+ {
+ R("al"); break;
+ }
+ if (Ends("iveness"))
+ {
+ R("ive"); break;
+ }
+ if (Ends("fulness"))
+ {
+ R("ful"); break;
+ }
+ if (Ends("ousness"))
+ {
+ R("ous"); break;
+ }
+ break;
+
+ case 't':
+ if (Ends("aliti"))
+ {
+ R("al"); break;
+ }
+ if (Ends("iviti"))
+ {
+ R("ive"); break;
+ }
+ if (Ends("biliti"))
+ {
+ R("ble"); break;
+ }
+ break;
+
+ case 'g':
+ if (Ends("logi"))
+ {
+ R("log"); break;
+ }
+ break;
+ }
+ }
+
+ /* step4() deals with -ic-, -full, -ness etc. similar strategy to step3. */
+
+ private void Step4()
+ {
+ switch (b[k])
+ {
+
+ case 'e':
+ if (Ends("icate"))
+ {
+ R("ic"); break;
+ }
+ if (Ends("ative"))
+ {
+ R(""); break;
+ }
+ if (Ends("alize"))
+ {
+ R("al"); break;
+ }
+ break;
+
+ case 'i':
+ if (Ends("iciti"))
+ {
+ R("ic"); break;
+ }
+ break;
+
+ case 'l':
+ if (Ends("ical"))
+ {
+ R("ic"); break;
+ }
+ if (Ends("ful"))
+ {
+ R(""); break;
+ }
+ break;
+
+ case 's':
+ if (Ends("ness"))
+ {
+ R(""); break;
+ }
+ break;
+ }
+ }
+
+ /* step5() takes off -ant, -ence etc., in context <c>vcvc<v>. */
+
+ private void Step5()
+ {
+ if (k == k0)
+ return ; /* for Bug 1 */
+ switch (b[k - 1])
+ {
+
+ case 'a':
+ if (Ends("al"))
+ break;
+ return ;
+
+ case 'c':
+ if (Ends("ance"))
+ break;
+ if (Ends("ence"))
+ break;
+ return ;
+
+ case 'e':
+ if (Ends("er"))
+ break; return ;
+
+ case 'i':
+ if (Ends("ic"))
+ break; return ;
+
+ case 'l':
+ if (Ends("able"))
+ break;
+ if (Ends("ible"))
+ break; return ;
+
+ case 'n':
+ if (Ends("ant"))
+ break;
+ if (Ends("ement"))
+ break;
+ if (Ends("ment"))
+ break;
+ /* element etc. not stripped before the m */
+ if (Ends("ent"))
+ break;
+ return ;
+
+ case 'o':
+ if (Ends("ion") && j >= 0 && (b[j] == 's' || b[j] == 't'))
+ break;
+ /* j >= 0 fixes Bug 2 */
+ if (Ends("ou"))
+ break;
+ return ;
+ /* takes care of -ous */
+
+ case 's':
+ if (Ends("ism"))
+ break;
+ return ;
+
+ case 't':
+ if (Ends("ate"))
+ break;
+ if (Ends("iti"))
+ break;
+ return ;
+
+ case 'u':
+ if (Ends("ous"))
+ break;
+ return ;
+
+ case 'v':
+ if (Ends("ive"))
+ break;
+ return ;
+
+ case 'z':
+ if (Ends("ize"))
+ break;
+ return ;
+
+ default:
+ return ;
+
+ }
+ if (M() > 1)
+ k = j;
+ }
+
+ /* step6() removes a final -e if m() > 1. */
+
+ private void Step6()
+ {
+ j = k;
+ if (b[k] == 'e')
+ {
+ int a = M();
+ if (a > 1 || a == 1 && !Cvc(k - 1))
+ k--;
+ }
+ if (b[k] == 'l' && Doublec(k) && M() > 1)
+ k--;
+ }
+
+
+ /// <summary> Stem a word provided as a String. Returns the result as a String.</summary>
+ public virtual System.String Stem(System.String s)
+ {
+ if (Stem(s.ToCharArray(), s.Length))
+ {
+ return ToString();
+ }
+ else
+ return s;
+ }
+
+ /// <summary>Stem a word contained in a char[]. Returns true if the stemming process
+ /// resulted in a word different from the input. You can retrieve the
+ /// result with getResultLength()/getResultBuffer() or toString().
+ /// </summary>
+ public virtual bool Stem(char[] word)
+ {
+ return Stem(word, word.Length);
+ }
+
+ /// <summary>Stem a word contained in a portion of a char[] array. Returns
+ /// true if the stemming process resulted in a word different from
+ /// the input. You can retrieve the result with
+ /// getResultLength()/getResultBuffer() or toString().
+ /// </summary>
+ public virtual bool Stem(char[] wordBuffer, int offset, int wordLen)
+ {
+ Reset();
+ if (b.Length < wordLen)
+ {
+ char[] new_b = new char[wordLen + EXTRA];
+ b = new_b;
+ }
+ for (int j = 0; j < wordLen; j++)
+ b[j] = wordBuffer[offset + j];
+ i = wordLen;
+ return Stem(0);
+ }
+
+ /// <summary>Stem a word contained in a leading portion of a char[] array.
+ /// Returns true if the stemming process resulted in a word different
+ /// from the input. You can retrieve the result with
+ /// getResultLength()/getResultBuffer() or toString().
+ /// </summary>
+ public virtual bool Stem(char[] word, int wordLen)
+ {
+ return Stem(word, 0, wordLen);
+ }
+
+ /// <summary>Stem the word placed into the Stemmer buffer through calls to add().
+ /// Returns true if the stemming process resulted in a word different
+ /// from the input. You can retrieve the result with
+ /// getResultLength()/getResultBuffer() or toString().
+ /// </summary>
+ public virtual bool Stem()
+ {
+ return Stem(0);
+ }
+
+ public virtual bool Stem(int i0)
+ {
+ k = i - 1;
+ k0 = i0;
+ if (k > k0 + 1)
+ {
+ Step1(); Step2(); Step3(); Step4(); Step5(); Step6();
+ }
+ // Also, a word is considered dirty if we lopped off letters
+ // Thanks to Ifigenia Vairelles for pointing this out.
+ if (i != k + 1)
+ dirty = true;
+ i = k + 1;
+ return dirty;
+ }
+
+ /// <summary>Test program for demonstrating the Stemmer. It reads a file and
+ /// stems each word, writing the result to standard out.
+ /// Usage: Stemmer file-name
+ /// </summary>
+ [STAThread]
+ public static void Main(System.String[] args)
+ {
+ PorterStemmer s = new PorterStemmer();
- for (int i = 0; i < args.Length; i++)
- {
- try
- {
+ for (int i = 0; i < args.Length; i++)
+ {
+ try
+ {
System.IO.BinaryReader in_Renamed = new System.IO.BinaryReader(System.IO.File.Open(args[i], System.IO.FileMode.Open, System.IO.FileAccess.Read));
- byte[] buffer = new byte[1024];
- int bufferLen, offset, ch;
+ byte[] buffer = new byte[1024];
+ int bufferLen, offset, ch;
- bufferLen = in_Renamed.Read(buffer, 0, buffer.Length);
- offset = 0;
- s.Reset();
+ bufferLen = in_Renamed.Read(buffer, 0, buffer.Length);
+ offset = 0;
+ s.Reset();
- while (true)
- {
- if (offset < bufferLen)
- ch = buffer[offset++];
- else
- {
- bufferLen = in_Renamed.Read(buffer, 0, buffer.Length);
- offset = 0;
- if (bufferLen <= 0)
- ch = - 1;
- else
- ch = buffer[offset++];
- }
+ while (true)
+ {
+ if (offset < bufferLen)
+ ch = buffer[offset++];
+ else
+ {
+ bufferLen = in_Renamed.Read(buffer, 0, buffer.Length);
+ offset = 0;
+ if (bufferLen <= 0)
+ ch = - 1;
+ else
+ ch = buffer[offset++];
+ }
- if (System.Char.IsLetter((char) ch))
- {
- s.Add(System.Char.ToLower((char) ch));
- }
- else
- {
- s.Stem();
- System.Console.Out.Write(s.ToString());
- s.Reset();
- if (ch < 0)
- break;
- else
- {
- System.Console.Out.Write((char) ch);
- }
- }
- }
+ if (System.Char.IsLetter((char) ch))
+ {
+ s.Add(System.Char.ToLower((char) ch));
+ }
+ else
+ {
+ s.Stem();
+ System.Console.Out.Write(s.ToString());
+ s.Reset();
+ if (ch < 0)
+ break;
+ else
+ {
+ System.Console.Out.Write((char) ch);
+ }
+ }
+ }
- in_Renamed.Close();
- }
- catch (System.IO.IOException)
- {
- System.Console.Out.WriteLine("error reading " + args[i]);
- }
- }
- }
- }
+ in_Renamed.Close();
+ }
+ catch (System.IO.IOException)
+ {
+ System.Console.Out.WriteLine("error reading " + args[i]);
+ }
+ }
+ }
+ }
}
Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/SimpleAnalyzer.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Analysis/SimpleAnalyzer.cs?view=diff&rev=534192&r1=534191&r2=534192
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/SimpleAnalyzer.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/SimpleAnalyzer.cs Tue May 1 11:45:26 2007
@@ -20,13 +20,13 @@
namespace Lucene.Net.Analysis
{
- /// <summary>An Analyzer that filters LetterTokenizer with LowerCaseFilter. </summary>
+ /// <summary>An Analyzer that filters LetterTokenizer with LowerCaseFilter. </summary>
- public sealed class SimpleAnalyzer : Analyzer
- {
- public override TokenStream TokenStream(System.String fieldName, System.IO.TextReader reader)
- {
- return new LowerCaseTokenizer(reader);
- }
- }
+ public sealed class SimpleAnalyzer : Analyzer
+ {
+ public override TokenStream TokenStream(System.String fieldName, System.IO.TextReader reader)
+ {
+ return new LowerCaseTokenizer(reader);
+ }
+ }
}
Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/CharStream.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Analysis/Standard/CharStream.cs?view=diff&rev=534192&r1=534191&r2=534192
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/CharStream.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/CharStream.cs Tue May 1 11:45:26 2007
@@ -20,86 +20,100 @@
namespace Lucene.Net.Analysis.Standard
{
- /// <summary> This interface describes a character stream that maintains line and
- /// column number positions of the characters. It also has the capability
- /// to backup the stream to some extent. An implementation of this
- /// interface is used in the TokenManager implementation generated by
- /// JavaCCParser.
- ///
- /// All the methods except backup can be implemented in any fashion. backup
- /// needs to be implemented correctly for the correct operation of the lexer.
- /// Rest of the methods are all used to get information like line number,
- /// column number and the String that constitutes a token and are not used
- /// by the lexer. Hence their implementation won't affect the generated lexer's
- /// operation.
- /// </summary>
+ /// <summary> This interface describes a character stream that maintains line and
+ /// column number positions of the characters. It also has the capability
+ /// to backup the stream to some extent. An implementation of this
+ /// interface is used in the TokenManager implementation generated by
+ /// JavaCCParser.
+ ///
+ /// All the methods except backup can be implemented in any fashion. backup
+ /// needs to be implemented correctly for the correct operation of the lexer.
+ /// Rest of the methods are all used to get information like line number,
+ /// column number and the String that constitutes a token and are not used
+ /// by the lexer. Hence their implementation won't affect the generated lexer's
+ /// operation.
+ /// </summary>
- public interface CharStream
- {
+ public interface CharStream
+ {
- /// <summary> Returns the next character from the selected input. The method
- /// of selecting the input is the responsibility of the class
- /// implementing this interface. Can throw any java.io.IOException.
- /// </summary>
- char ReadChar();
-
- /// <summary> Returns the column number of the last character for current token (being
- /// matched after the last call to BeginTOken).
- /// </summary>
- int GetEndColumn();
-
- /// <summary> Returns the line number of the last character for current token (being
- /// matched after the last call to BeginTOken).
- /// </summary>
- int GetEndLine();
-
- /// <summary> Returns the column number of the first character for current token (being
- /// matched after the last call to BeginTOken).
- /// </summary>
- int GetBeginColumn();
-
- /// <summary> Returns the line number of the first character for current token (being
- /// matched after the last call to BeginTOken).
- /// </summary>
- int GetBeginLine();
-
- /// <summary> Backs up the input stream by amount steps. Lexer calls this method if it
- /// had already read some characters, but could not use them to match a
- /// (longer) token. So, they will be used again as the prefix of the next
- /// token and it is the implemetation's responsibility to do this right.
- /// </summary>
- void Backup(int amount);
-
- /// <summary> Returns the next character that marks the beginning of the next token.
- /// All characters must remain in the buffer between two successive calls
- /// to this method to implement backup correctly.
- /// </summary>
- char BeginToken();
-
- /// <summary> Returns a string made up of characters from the marked token beginning
- /// to the current buffer position. Implementations have the choice of returning
- /// anything that they want to. For example, for efficiency, one might decide
- /// to just return null, which is a valid implementation.
- /// </summary>
- System.String GetImage();
-
- /// <summary> Returns an array of characters that make up the suffix of length 'len' for
- /// the currently matched token. This is used to build up the matched string
- /// for use in actions in the case of MORE. A simple and inefficient
- /// implementation of this is as follows :
- ///
- /// {
- /// String t = GetImage();
- /// return t.substring(t.length() - len, t.length()).toCharArray();
- /// }
- /// </summary>
- char[] GetSuffix(int len);
-
- /// <summary> The lexer calls this function to indicate that it is done with the stream
- /// and hence implementations can free any resources held by this class.
- /// Again, the body of this function can be just empty and it will not
- /// affect the lexer's operation.
- /// </summary>
- void Done();
- }
+ /// <summary> Returns the next character from the selected input. The method
+ /// of selecting the input is the responsibility of the class
+ /// implementing this interface. Can throw any java.io.IOException.
+ /// </summary>
+ char ReadChar();
+
+ /// <summary> Returns the column position of the character last read.</summary>
+ /// <deprecated>
+ /// </deprecated>
+ /// <seealso cref="#getEndColumn">
+ /// </seealso>
+ int GetColumn();
+
+ /// <summary> Returns the line number of the character last read.</summary>
+ /// <deprecated>
+ /// </deprecated>
+ /// <seealso cref="#getEndLine">
+ /// </seealso>
+ int GetLine();
+
+ /// <summary> Returns the column number of the last character for current token (being
+ /// matched after the last call to BeginTOken).
+ /// </summary>
+ int GetEndColumn();
+
+ /// <summary> Returns the line number of the last character for current token (being
+ /// matched after the last call to BeginTOken).
+ /// </summary>
+ int GetEndLine();
+
+ /// <summary> Returns the column number of the first character for current token (being
+ /// matched after the last call to BeginTOken).
+ /// </summary>
+ int GetBeginColumn();
+
+ /// <summary> Returns the line number of the first character for current token (being
+ /// matched after the last call to BeginTOken).
+ /// </summary>
+ int GetBeginLine();
+
+ /// <summary> Backs up the input stream by amount steps. Lexer calls this method if it
+ /// had already read some characters, but could not use them to match a
+ /// (longer) token. So, they will be used again as the prefix of the next
+ /// token and it is the implemetation's responsibility to do this right.
+ /// </summary>
+ void Backup(int amount);
+
+ /// <summary> Returns the next character that marks the beginning of the next token.
+ /// All characters must remain in the buffer between two successive calls
+ /// to this method to implement backup correctly.
+ /// </summary>
+ char BeginToken();
+
+ /// <summary> Returns a string made up of characters from the marked token beginning
+ /// to the current buffer position. Implementations have the choice of returning
+ /// anything that they want to. For example, for efficiency, one might decide
+ /// to just return null, which is a valid implementation.
+ /// </summary>
+ System.String GetImage();
+
+ /// <summary> Returns an array of characters that make up the suffix of length 'len' for
+ /// the currently matched token. This is used to build up the matched string
+ /// for use in actions in the case of MORE. A simple and inefficient
+ /// implementation of this is as follows :
+ ///
+ /// {
+ /// String t = GetImage();
+ /// return t.substring(t.length() - len, t.length()).toCharArray();
+ /// }
+ /// </summary>
+ char[] GetSuffix(int len);
+
+ /// <summary> The lexer calls this function to indicate that it is done with the stream
+ /// and hence implementations can free any resources held by this class.
+ /// Again, the body of this function can be just empty and it will not
+ /// affect the lexer's operation.
+ /// </summary>
+ void Done();
+ }
}
Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/FastCharStream.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Analysis/Standard/FastCharStream.cs?view=diff&rev=534192&r1=534191&r2=534192
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/FastCharStream.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/FastCharStream.cs Tue May 1 11:45:26 2007
@@ -19,132 +19,132 @@
namespace Lucene.Net.Analysis.Standard
{
- /// <summary>An efficient implementation of JavaCC's CharStream interface. <p>Note that
- /// this does not do line-number counting, but instead keeps track of the
- /// character position of the token in the input, as required by Lucene's {@link
- /// Lucene.Net.analysis.Token} API.
- /// </summary>
- public sealed class FastCharStream : CharStream
- {
- internal char[] buffer = null;
-
- internal int bufferLength = 0; // end of valid chars
- internal int bufferPosition = 0; // next char to read
-
- internal int tokenStart = 0; // offset in buffer
- internal int bufferStart = 0; // position in file of buffer
-
- internal System.IO.TextReader input; // source of chars
-
- /// <summary>Constructs from a Reader. </summary>
- public FastCharStream(System.IO.TextReader r)
- {
- input = r;
- }
-
- public char ReadChar()
- {
- if (bufferPosition >= bufferLength)
- Refill();
- return buffer[bufferPosition++];
- }
-
- private void Refill()
- {
- int newPosition = bufferLength - tokenStart;
+ /// <summary>An efficient implementation of JavaCC's CharStream interface. <p>Note that
+ /// this does not do line-number counting, but instead keeps track of the
+ /// character position of the token in the input, as required by Lucene's {@link
+ /// Lucene.Net.analysis.Token} API.
+ /// </summary>
+ public sealed class FastCharStream : CharStream
+ {
+ internal char[] buffer = null;
+
+ internal int bufferLength = 0; // end of valid chars
+ internal int bufferPosition = 0; // next char to read
+
+ internal int tokenStart = 0; // offset in buffer
+ internal int bufferStart = 0; // position in file of buffer
+
+ internal System.IO.TextReader input; // source of chars
+
+ /// <summary>Constructs from a Reader. </summary>
+ public FastCharStream(System.IO.TextReader r)
+ {
+ input = r;
+ }
+
+ public char ReadChar()
+ {
+ if (bufferPosition >= bufferLength)
+ Refill();
+ return buffer[bufferPosition++];
+ }
+
+ private void Refill()
+ {
+ int newPosition = bufferLength - tokenStart;
- if (tokenStart == 0)
- {
- // token won't fit in buffer
- if (buffer == null)
- {
- // first time: alloc buffer
- buffer = new char[2048];
- }
- else if (bufferLength == buffer.Length)
- {
- // grow buffer
- char[] newBuffer = new char[buffer.Length * 2];
- Array.Copy(buffer, 0, newBuffer, 0, bufferLength);
- buffer = newBuffer;
- }
- }
- else
- {
- // shift token to front
- Array.Copy(buffer, tokenStart, buffer, 0, newPosition);
- }
+ if (tokenStart == 0)
+ {
+ // token won't fit in buffer
+ if (buffer == null)
+ {
+ // first time: alloc buffer
+ buffer = new char[2048];
+ }
+ else if (bufferLength == buffer.Length)
+ {
+ // grow buffer
+ char[] newBuffer = new char[buffer.Length * 2];
+ Array.Copy(buffer, 0, newBuffer, 0, bufferLength);
+ buffer = newBuffer;
+ }
+ }
+ else
+ {
+ // shift token to front
+ Array.Copy(buffer, tokenStart, buffer, 0, newPosition);
+ }
- bufferLength = newPosition; // update state
- bufferPosition = newPosition;
- bufferStart += tokenStart;
- tokenStart = 0;
+ bufferLength = newPosition; // update state
+ bufferPosition = newPosition;
+ bufferStart += tokenStart;
+ tokenStart = 0;
- int charsRead = input.Read(buffer, newPosition, buffer.Length - newPosition);
- if (charsRead <= 0)
- throw new System.IO.IOException("read past eof");
- else
- bufferLength += charsRead;
- }
-
- public char BeginToken()
- {
- tokenStart = bufferPosition;
- return ReadChar();
- }
-
- public void Backup(int amount)
- {
- bufferPosition -= amount;
- }
-
- public System.String GetImage()
- {
- return new System.String(buffer, tokenStart, bufferPosition - tokenStart);
- }
-
- public char[] GetSuffix(int len)
- {
- char[] value_Renamed = new char[len];
- Array.Copy(buffer, bufferPosition - len, value_Renamed, 0, len);
- return value_Renamed;
- }
-
- public void Done()
- {
- try
- {
- input.Close();
- }
- catch (System.IO.IOException e)
- {
- System.Console.Error.WriteLine("Caught: " + e + "; ignoring.");
- }
- }
-
- public int GetColumn()
- {
- return bufferStart + bufferPosition;
- }
- public int GetLine()
- {
- return 1;
- }
- public int GetEndColumn()
- {
- return bufferStart + bufferPosition;
- }
- public int GetEndLine()
- {
- return 1;
- }
- public int GetBeginColumn()
- {
- return bufferStart + tokenStart;
- }
- public int GetBeginLine()
- {
- return 1;
- }
- }
+ int charsRead = input.Read(buffer, newPosition, buffer.Length - newPosition);
+ if (charsRead <= 0)
+ throw new System.IO.IOException("read past eof");
+ else
+ bufferLength += charsRead;
+ }
+
+ public char BeginToken()
+ {
+ tokenStart = bufferPosition;
+ return ReadChar();
+ }
+
+ public void Backup(int amount)
+ {
+ bufferPosition -= amount;
+ }
+
+ public System.String GetImage()
+ {
+ return new System.String(buffer, tokenStart, bufferPosition - tokenStart);
+ }
+
+ public char[] GetSuffix(int len)
+ {
+ char[] value_Renamed = new char[len];
+ Array.Copy(buffer, bufferPosition - len, value_Renamed, 0, len);
+ return value_Renamed;
+ }
+
+ public void Done()
+ {
+ try
+ {
+ input.Close();
+ }
+ catch (System.IO.IOException e)
+ {
+ System.Console.Error.WriteLine("Caught: " + e + "; ignoring.");
+ }
+ }
+
+ public int GetColumn()
+ {
+ return bufferStart + bufferPosition;
+ }
+ public int GetLine()
+ {
+ return 1;
+ }
+ public int GetEndColumn()
+ {
+ return bufferStart + bufferPosition;
+ }
+ public int GetEndLine()
+ {
+ return 1;
+ }
+ public int GetBeginColumn()
+ {
+ return bufferStart + tokenStart;
+ }
+ public int GetBeginLine()
+ {
+ return 1;
+ }
+ }
}
Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/Package.html
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Analysis/Standard/Package.html?view=diff&rev=534192&r1=534191&r2=534192
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/Package.html (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/Package.html Tue May 1 11:45:26 2007
@@ -1,15 +1,15 @@
-<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
-<html>
-<head>
- <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
- <meta name="Author" content="Doug Cutting">
-</head>
-<body>
-A grammar-based tokenizer constructed with JavaCC.
-<p>Note that JavaCC defines lots of public classes, methods and fields
-that do not need to be public. These clutter the documentation.
-Sorry.
-<p>Note that because JavaCC defines a class named <tt>Token</tt>, <tt>org.apache.lucene.analysis.Token</tt>
-must always be fully qualified in source code in this package.
-</body>
-</html>
+<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
+<html>
+<head>
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
+ <meta name="Author" content="Doug Cutting">
+</head>
+<body>
+A grammar-based tokenizer constructed with JavaCC.
+<p>Note that JavaCC defines lots of public classes, methods and fields
+that do not need to be public. These clutter the documentation.
+Sorry.
+<p>Note that because JavaCC defines a class named <tt>Token</tt>, <tt>Lucene.Net.Analysis.Token</tt>
+must always be fully qualified in source code in this package.
+</body>
+</html>
Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/ParseException.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Analysis/Standard/ParseException.cs?view=diff&rev=534192&r1=534191&r2=534192
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/ParseException.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/ParseException.cs Tue May 1 11:45:26 2007
@@ -1,20 +1,3 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
/* Generated By:JavaCC: Do not edit this line. ParseException.java Version 0.7pre6 */
using System;
namespace Lucene.Net.Analysis.Standard
@@ -29,7 +12,7 @@
/// mechanisms so long as you retain the public fields.
/// </summary>
[Serializable]
- public class ParseException : System.IO.IOException
+ public class ParseException:System.IO.IOException
{
/// <summary> This method has the standard behavior when this object has been
/// created using the standard constructors. Otherwise, it uses
@@ -123,14 +106,14 @@
/// these constructors.
/// </summary>
- public ParseException() : base()
+ public ParseException():base()
{
- specialConstructor = false;
+ specialConstructor = false;
}
- public ParseException(System.String message) : base(message)
+ public ParseException(System.String message):base(message)
{
- specialConstructor = false;
+ specialConstructor = false;
}
/// <summary> This variable determines which constructor was used to create
@@ -158,7 +141,7 @@
public System.String[] tokenImage;
/// <summary> The end of line string for this machine.</summary>
- protected internal System.String eol = System.Configuration.ConfigurationSettings.AppSettings.Get("line.separator");
+ protected internal System.String eol = SupportClass.AppSettings.Get("line.separator", "\n");
/// <summary> Used to convert raw characters to their escaped version
/// when these raw version cannot be used as part of an ASCII
Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/StandardAnalyzer.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Analysis/Standard/StandardAnalyzer.cs?view=diff&rev=534192&r1=534191&r2=534192
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/StandardAnalyzer.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/StandardAnalyzer.cs Tue May 1 11:45:26 2007
@@ -21,68 +21,68 @@
namespace Lucene.Net.Analysis.Standard
{
- /// <summary> Filters {@link StandardTokenizer} with {@link StandardFilter}, {@link
- /// LowerCaseFilter} and {@link StopFilter}, using a list of English stop words.
- ///
- /// </summary>
- /// <version> $Id: StandardAnalyzer.java 219090 2005-07-14 20:36:28Z dnaber $
- /// </version>
- public class StandardAnalyzer : Analyzer
- {
- private System.Collections.Hashtable stopSet;
-
- /// <summary>An array containing some common English words that are usually not
- /// useful for searching.
- /// </summary>
- public static readonly System.String[] STOP_WORDS;
-
- /// <summary>Builds an analyzer with the default stop words ({@link #STOP_WORDS}). </summary>
- public StandardAnalyzer() : this(STOP_WORDS)
- {
- }
-
- /// <summary>Builds an analyzer with the given stop words. </summary>
- public StandardAnalyzer(System.Collections.Hashtable stopWords)
- {
- stopSet = stopWords;
- }
-
- /// <summary>Builds an analyzer with the given stop words. </summary>
- public StandardAnalyzer(System.String[] stopWords)
- {
- stopSet = StopFilter.MakeStopSet(stopWords);
- }
-
- /// <summary>Builds an analyzer with the stop words from the given file.</summary>
- /// <seealso cref="WordlistLoader.GetWordSet(File)">
- /// </seealso>
- public StandardAnalyzer(System.IO.FileInfo stopwords)
- {
- stopSet = WordlistLoader.GetWordSet(stopwords);
- }
-
- /// <summary>Builds an analyzer with the stop words from the given reader.</summary>
- /// <seealso cref="WordlistLoader.GetWordSet(Reader)">
- /// </seealso>
- public StandardAnalyzer(System.IO.TextReader stopwords)
- {
- stopSet = WordlistLoader.GetWordSet(stopwords);
- }
-
- /// <summary>Constructs a {@link StandardTokenizer} filtered by a {@link
- /// StandardFilter}, a {@link LowerCaseFilter} and a {@link StopFilter}.
- /// </summary>
- public override TokenStream TokenStream(System.String fieldName, System.IO.TextReader reader)
- {
- TokenStream result = new StandardTokenizer(reader);
- result = new StandardFilter(result);
- result = new LowerCaseFilter(result);
- result = new StopFilter(result, stopSet);
- return result;
- }
- static StandardAnalyzer()
- {
- STOP_WORDS = StopAnalyzer.ENGLISH_STOP_WORDS;
- }
- }
+ /// <summary> Filters {@link StandardTokenizer} with {@link StandardFilter}, {@link
+ /// LowerCaseFilter} and {@link StopFilter}, using a list of English stop words.
+ ///
+ /// </summary>
+ /// <version> $Id: StandardAnalyzer.java 219090 2005-07-14 20:36:28Z dnaber $
+ /// </version>
+ public class StandardAnalyzer : Analyzer
+ {
+ private System.Collections.Hashtable stopSet;
+
+ /// <summary>An array containing some common English words that are usually not
+ /// useful for searching.
+ /// </summary>
+ public static readonly System.String[] STOP_WORDS;
+
+ /// <summary>Builds an analyzer with the default stop words ({@link #STOP_WORDS}). </summary>
+ public StandardAnalyzer() : this(STOP_WORDS)
+ {
+ }
+
+ /// <summary>Builds an analyzer with the given stop words. </summary>
+ public StandardAnalyzer(System.Collections.Hashtable stopWords)
+ {
+ stopSet = stopWords;
+ }
+
+ /// <summary>Builds an analyzer with the given stop words. </summary>
+ public StandardAnalyzer(System.String[] stopWords)
+ {
+ stopSet = StopFilter.MakeStopSet(stopWords);
+ }
+
+ /// <summary>Builds an analyzer with the stop words from the given file.</summary>
+ /// <seealso cref="WordlistLoader.GetWordSet(File)">
+ /// </seealso>
+ public StandardAnalyzer(System.IO.FileInfo stopwords)
+ {
+ stopSet = WordlistLoader.GetWordSet(stopwords);
+ }
+
+ /// <summary>Builds an analyzer with the stop words from the given reader.</summary>
+ /// <seealso cref="WordlistLoader.GetWordSet(Reader)">
+ /// </seealso>
+ public StandardAnalyzer(System.IO.TextReader stopwords)
+ {
+ stopSet = WordlistLoader.GetWordSet(stopwords);
+ }
+
+ /// <summary>Constructs a {@link StandardTokenizer} filtered by a {@link
+ /// StandardFilter}, a {@link LowerCaseFilter} and a {@link StopFilter}.
+ /// </summary>
+ public override TokenStream TokenStream(System.String fieldName, System.IO.TextReader reader)
+ {
+ TokenStream result = new StandardTokenizer(reader);
+ result = new StandardFilter(result);
+ result = new LowerCaseFilter(result);
+ result = new StopFilter(result, stopSet);
+ return result;
+ }
+ static StandardAnalyzer()
+ {
+ STOP_WORDS = StopAnalyzer.ENGLISH_STOP_WORDS;
+ }
+ }
}
Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/StandardFilter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Analysis/Standard/StandardFilter.cs?view=diff&rev=534192&r1=534191&r2=534192
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/StandardFilter.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/StandardFilter.cs Tue May 1 11:45:26 2007
@@ -21,54 +21,54 @@
namespace Lucene.Net.Analysis.Standard
{
- /// <summary>Normalizes tokens extracted with {@link StandardTokenizer}. </summary>
+ /// <summary>Normalizes tokens extracted with {@link StandardTokenizer}. </summary>
- public sealed class StandardFilter : TokenFilter
- {
+ public sealed class StandardFilter : TokenFilter
+ {
- /// <summary>Construct filtering <i>in</i>. </summary>
- public StandardFilter(TokenStream in_Renamed) : base(in_Renamed)
- {
- }
+ /// <summary>Construct filtering <i>in</i>. </summary>
+ public StandardFilter(TokenStream in_Renamed) : base(in_Renamed)
+ {
+ }
- private static readonly System.String APOSTROPHE_TYPE = Lucene.Net.Analysis.Standard.StandardTokenizerConstants.tokenImage[Lucene.Net.Analysis.Standard.StandardTokenizerConstants.APOSTROPHE];
- private static readonly System.String ACRONYM_TYPE = Lucene.Net.Analysis.Standard.StandardTokenizerConstants.tokenImage[Lucene.Net.Analysis.Standard.StandardTokenizerConstants.ACRONYM];
+ private static readonly System.String APOSTROPHE_TYPE = Lucene.Net.Analysis.Standard.StandardTokenizerConstants.tokenImage[Lucene.Net.Analysis.Standard.StandardTokenizerConstants.APOSTROPHE];
+ private static readonly System.String ACRONYM_TYPE = Lucene.Net.Analysis.Standard.StandardTokenizerConstants.tokenImage[Lucene.Net.Analysis.Standard.StandardTokenizerConstants.ACRONYM];
- /// <summary>Returns the next token in the stream, or null at EOS.
- /// <p>Removes <tt>'s</tt> from the end of words.
- /// <p>Removes dots from acronyms.
- /// </summary>
- public override Lucene.Net.Analysis.Token Next()
- {
- Lucene.Net.Analysis.Token t = input.Next();
+ /// <summary>Returns the next token in the stream, or null at EOS.
+ /// <p>Removes <tt>'s</tt> from the end of words.
+ /// <p>Removes dots from acronyms.
+ /// </summary>
+ public override Lucene.Net.Analysis.Token Next()
+ {
+ Lucene.Net.Analysis.Token t = input.Next();
- if (t == null)
- return null;
+ if (t == null)
+ return null;
- System.String text = t.TermText();
- System.String type = t.Type();
+ System.String text = t.TermText();
+ System.String type = t.Type();
- if (type == APOSTROPHE_TYPE && (text.EndsWith("'s") || text.EndsWith("'S")))
- {
- return new Lucene.Net.Analysis.Token(text.Substring(0, (text.Length - 2) - (0)), t.StartOffset(), t.EndOffset(), type);
- }
- else if (type == ACRONYM_TYPE)
- {
- // remove dots
- System.Text.StringBuilder trimmed = new System.Text.StringBuilder();
- for (int i = 0; i < text.Length; i++)
- {
- char c = text[i];
- if (c != '.')
- trimmed.Append(c);
- }
- return new Lucene.Net.Analysis.Token(trimmed.ToString(), t.StartOffset(), t.EndOffset(), type);
- }
- else
- {
- return t;
- }
- }
- }
+ if (type == APOSTROPHE_TYPE && (text.EndsWith("'s") || text.EndsWith("'S")))
+ {
+ return new Lucene.Net.Analysis.Token(text.Substring(0, (text.Length - 2) - (0)), t.StartOffset(), t.EndOffset(), type);
+ }
+ else if (type == ACRONYM_TYPE)
+ {
+ // remove dots
+ System.Text.StringBuilder trimmed = new System.Text.StringBuilder();
+ for (int i = 0; i < text.Length; i++)
+ {
+ char c = text[i];
+ if (c != '.')
+ trimmed.Append(c);
+ }
+ return new Lucene.Net.Analysis.Token(trimmed.ToString(), t.StartOffset(), t.EndOffset(), type);
+ }
+ else
+ {
+ return t;
+ }
+ }
+ }
}
Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/StandardTokenizer.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Analysis/Standard/StandardTokenizer.cs?view=diff&rev=534192&r1=534191&r2=534192
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/StandardTokenizer.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/StandardTokenizer.cs Tue May 1 11:45:26 2007
@@ -21,99 +21,99 @@
namespace Lucene.Net.Analysis.Standard
{
- /// <summary>A grammar-based tokenizer constructed with JavaCC.
- ///
- /// <p> This should be a good tokenizer for most European-language documents:
- ///
- /// <ul>
- /// <li>Splits words at punctuation characters, removing punctuation. However, a
- /// dot that's not followed by whitespace is considered part of a token.
- /// <li>Splits words at hyphens, unless there's a number in the token, in which case
- /// the whole token is interpreted as a product number and is not split.
- /// <li>Recognizes email addresses and internet hostnames as one token.
- /// </ul>
- ///
- /// <p>Many applications have specific tokenizer needs. If this tokenizer does
- /// not suit your application, please consider copying this source code
- /// directory to your project and maintaining your own grammar-based tokenizer.
- /// </summary>
- public class StandardTokenizer : Lucene.Net.Analysis.Tokenizer
- {
-
- /// <summary>Constructs a tokenizer for this Reader. </summary>
- public StandardTokenizer(System.IO.TextReader reader) : this(new FastCharStream(reader))
- {
- this.input = reader;
- }
-
- /// <summary>Returns the next token in the stream, or null at EOS.
- /// <p>The returned token's type is set to an element of {@link
- /// StandardTokenizerConstants#tokenImage}.
- /// </summary>
- public override Lucene.Net.Analysis.Token Next()
- {
- Token token = null;
- switch ((jj_ntk == - 1) ? Jj_ntk() : jj_ntk)
- {
-
- case Lucene.Net.Analysis.Standard.StandardTokenizerConstants.ALPHANUM:
- token = Jj_consume_token(Lucene.Net.Analysis.Standard.StandardTokenizerConstants.ALPHANUM);
- break;
-
- case Lucene.Net.Analysis.Standard.StandardTokenizerConstants.APOSTROPHE:
- token = Jj_consume_token(Lucene.Net.Analysis.Standard.StandardTokenizerConstants.APOSTROPHE);
- break;
-
- case Lucene.Net.Analysis.Standard.StandardTokenizerConstants.ACRONYM:
- token = Jj_consume_token(Lucene.Net.Analysis.Standard.StandardTokenizerConstants.ACRONYM);
- break;
-
- case Lucene.Net.Analysis.Standard.StandardTokenizerConstants.COMPANY:
- token = Jj_consume_token(Lucene.Net.Analysis.Standard.StandardTokenizerConstants.COMPANY);
- break;
-
- case Lucene.Net.Analysis.Standard.StandardTokenizerConstants.EMAIL:
- token = Jj_consume_token(Lucene.Net.Analysis.Standard.StandardTokenizerConstants.EMAIL);
- break;
-
- case Lucene.Net.Analysis.Standard.StandardTokenizerConstants.HOST:
- token = Jj_consume_token(Lucene.Net.Analysis.Standard.StandardTokenizerConstants.HOST);
- break;
-
- case Lucene.Net.Analysis.Standard.StandardTokenizerConstants.NUM:
- token = Jj_consume_token(Lucene.Net.Analysis.Standard.StandardTokenizerConstants.NUM);
- break;
-
- case Lucene.Net.Analysis.Standard.StandardTokenizerConstants.CJ:
- token = Jj_consume_token(Lucene.Net.Analysis.Standard.StandardTokenizerConstants.CJ);
- break;
-
- case 0:
- token = Jj_consume_token(0);
- break;
-
- default:
- jj_la1[0] = jj_gen;
- Jj_consume_token(- 1);
- throw new ParseException();
-
- }
- if (token.kind == Lucene.Net.Analysis.Standard.StandardTokenizerConstants.EOF)
- {
- {
- if (true)
- return null;
- }
- }
- else
- {
- {
- if (true)
- return new Lucene.Net.Analysis.Token(token.image, token.beginColumn, token.endColumn, Lucene.Net.Analysis.Standard.StandardTokenizerConstants.tokenImage[token.kind]);
- }
- }
- throw new System.ApplicationException("Missing return statement in function");
- }
+ /// <summary>A grammar-based tokenizer constructed with JavaCC.
+ ///
+ /// <p> This should be a good tokenizer for most European-language documents:
+ ///
+ /// <ul>
+ /// <li>Splits words at punctuation characters, removing punctuation. However, a
+ /// dot that's not followed by whitespace is considered part of a token.
+ /// <li>Splits words at hyphens, unless there's a number in the token, in which case
+ /// the whole token is interpreted as a product number and is not split.
+ /// <li>Recognizes email addresses and internet hostnames as one token.
+ /// </ul>
+ ///
+ /// <p>Many applications have specific tokenizer needs. If this tokenizer does
+ /// not suit your application, please consider copying this source code
+ /// directory to your project and maintaining your own grammar-based tokenizer.
+ /// </summary>
+ public class StandardTokenizer : Lucene.Net.Analysis.Tokenizer
+ {
+
+ /// <summary>Constructs a tokenizer for this Reader. </summary>
+ public StandardTokenizer(System.IO.TextReader reader) : this(new FastCharStream(reader))
+ {
+ this.input = reader;
+ }
+
+ /// <summary>Returns the next token in the stream, or null at EOS.
+ /// <p>The returned token's type is set to an element of {@link
+ /// StandardTokenizerConstants#tokenImage}.
+ /// </summary>
+ public override Lucene.Net.Analysis.Token Next()
+ {
+ Token token = null;
+ switch ((jj_ntk == - 1) ? Jj_ntk() : jj_ntk)
+ {
+
+ case Lucene.Net.Analysis.Standard.StandardTokenizerConstants.ALPHANUM:
+ token = Jj_consume_token(Lucene.Net.Analysis.Standard.StandardTokenizerConstants.ALPHANUM);
+ break;
+
+ case Lucene.Net.Analysis.Standard.StandardTokenizerConstants.APOSTROPHE:
+ token = Jj_consume_token(Lucene.Net.Analysis.Standard.StandardTokenizerConstants.APOSTROPHE);
+ break;
+
+ case Lucene.Net.Analysis.Standard.StandardTokenizerConstants.ACRONYM:
+ token = Jj_consume_token(Lucene.Net.Analysis.Standard.StandardTokenizerConstants.ACRONYM);
+ break;
+
+ case Lucene.Net.Analysis.Standard.StandardTokenizerConstants.COMPANY:
+ token = Jj_consume_token(Lucene.Net.Analysis.Standard.StandardTokenizerConstants.COMPANY);
+ break;
+
+ case Lucene.Net.Analysis.Standard.StandardTokenizerConstants.EMAIL:
+ token = Jj_consume_token(Lucene.Net.Analysis.Standard.StandardTokenizerConstants.EMAIL);
+ break;
+
+ case Lucene.Net.Analysis.Standard.StandardTokenizerConstants.HOST:
+ token = Jj_consume_token(Lucene.Net.Analysis.Standard.StandardTokenizerConstants.HOST);
+ break;
+
+ case Lucene.Net.Analysis.Standard.StandardTokenizerConstants.NUM:
+ token = Jj_consume_token(Lucene.Net.Analysis.Standard.StandardTokenizerConstants.NUM);
+ break;
+
+ case Lucene.Net.Analysis.Standard.StandardTokenizerConstants.CJ:
+ token = Jj_consume_token(Lucene.Net.Analysis.Standard.StandardTokenizerConstants.CJ);
+ break;
+
+ case 0:
+ token = Jj_consume_token(0);
+ break;
+
+ default:
+ jj_la1[0] = jj_gen;
+ Jj_consume_token(- 1);
+ throw new ParseException();
+
+ }
+ if (token.kind == Lucene.Net.Analysis.Standard.StandardTokenizerConstants.EOF)
+ {
+ {
+ if (true)
+ return null;
+ }
+ }
+ else
+ {
+ {
+ if (true)
+ return new Lucene.Net.Analysis.Token(token.image, token.beginColumn, token.endColumn, Lucene.Net.Analysis.Standard.StandardTokenizerConstants.tokenImage[token.kind]);
+ }
+ }
+ throw new System.ApplicationException("Missing return statement in function");
+ }
/// <summary>By default, closes the input Reader. </summary>
public override void Close()
@@ -123,165 +123,165 @@
}
public StandardTokenizerTokenManager token_source;
- public Token token, jj_nt;
- private int jj_ntk;
- private int jj_gen;
- private int[] jj_la1 = new int[1];
- private static int[] jj_la1_0_Renamed_Field;
- private static void jj_la1_0()
- {
- jj_la1_0_Renamed_Field = new int[]{0x10ff};
- }
-
- public StandardTokenizer(CharStream stream)
- {
- token_source = new StandardTokenizerTokenManager(stream);
- token = new Token();
- jj_ntk = - 1;
- jj_gen = 0;
- for (int i = 0; i < 1; i++)
- jj_la1[i] = - 1;
- }
-
- public virtual void ReInit(CharStream stream)
- {
- token_source.ReInit(stream);
- token = new Token();
- jj_ntk = - 1;
- jj_gen = 0;
- for (int i = 0; i < 1; i++)
- jj_la1[i] = - 1;
- }
-
- public StandardTokenizer(StandardTokenizerTokenManager tm)
- {
- token_source = tm;
- token = new Token();
- jj_ntk = - 1;
- jj_gen = 0;
- for (int i = 0; i < 1; i++)
- jj_la1[i] = - 1;
- }
-
- public virtual void ReInit(StandardTokenizerTokenManager tm)
- {
- token_source = tm;
- token = new Token();
- jj_ntk = - 1;
- jj_gen = 0;
- for (int i = 0; i < 1; i++)
- jj_la1[i] = - 1;
- }
-
- private Token Jj_consume_token(int kind)
- {
- Token oldToken;
- if ((oldToken = token).next != null)
- token = token.next;
- else
- token = token.next = token_source.GetNextToken();
- jj_ntk = - 1;
- if (token.kind == kind)
- {
- jj_gen++;
- return token;
- }
- token = oldToken;
- jj_kind = kind;
- throw GenerateParseException();
- }
-
- public Token GetNextToken()
- {
- if (token.next != null)
- token = token.next;
- else
- token = token.next = token_source.GetNextToken();
- jj_ntk = - 1;
- jj_gen++;
- return token;
- }
-
- public Token GetToken(int index)
- {
- Token t = token;
- for (int i = 0; i < index; i++)
- {
- if (t.next != null)
- t = t.next;
- else
- t = t.next = token_source.GetNextToken();
- }
- return t;
- }
-
- private int Jj_ntk()
- {
- if ((jj_nt = token.next) == null)
- return (jj_ntk = (token.next = token_source.GetNextToken()).kind);
- else
- return (jj_ntk = jj_nt.kind);
- }
-
- private System.Collections.ArrayList jj_expentries = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(10));
- private int[] jj_expentry;
- private int jj_kind = - 1;
-
- public virtual ParseException GenerateParseException()
- {
- jj_expentries.Clear();
- bool[] la1tokens = new bool[16];
- for (int i = 0; i < 16; i++)
- {
- la1tokens[i] = false;
- }
- if (jj_kind >= 0)
- {
- la1tokens[jj_kind] = true;
- jj_kind = - 1;
- }
- for (int i = 0; i < 1; i++)
- {
- if (jj_la1[i] == jj_gen)
- {
- for (int j = 0; j < 32; j++)
- {
- if ((jj_la1_0_Renamed_Field[i] & (1 << j)) != 0)
- {
- la1tokens[j] = true;
- }
- }
- }
- }
- for (int i = 0; i < 16; i++)
- {
- if (la1tokens[i])
- {
- jj_expentry = new int[1];
- jj_expentry[0] = i;
- jj_expentries.Add(jj_expentry);
- }
- }
- int[][] exptokseq = new int[jj_expentries.Count][];
- for (int i = 0; i < jj_expentries.Count; i++)
- {
- exptokseq[i] = (int[]) jj_expentries[i];
- }
- return new ParseException(token, exptokseq, Lucene.Net.Analysis.Standard.StandardTokenizerConstants.tokenImage);
- }
-
- public void Enable_tracing()
- {
- }
-
- public void Disable_tracing()
- {
- }
- static StandardTokenizer()
- {
- {
- jj_la1_0();
- }
- }
- }
+ public Token token, jj_nt;
+ private int jj_ntk;
+ private int jj_gen;
+ private int[] jj_la1 = new int[1];
+ private static int[] jj_la1_0_Renamed_Field;
+ private static void jj_la1_0()
+ {
+ jj_la1_0_Renamed_Field = new int[]{0x10ff};
+ }
+
+ public StandardTokenizer(CharStream stream)
+ {
+ token_source = new StandardTokenizerTokenManager(stream);
+ token = new Token();
+ jj_ntk = - 1;
+ jj_gen = 0;
+ for (int i = 0; i < 1; i++)
+ jj_la1[i] = - 1;
+ }
+
+ public virtual void ReInit(CharStream stream)
+ {
+ token_source.ReInit(stream);
+ token = new Token();
+ jj_ntk = - 1;
+ jj_gen = 0;
+ for (int i = 0; i < 1; i++)
+ jj_la1[i] = - 1;
+ }
+
+ public StandardTokenizer(StandardTokenizerTokenManager tm)
+ {
+ token_source = tm;
+ token = new Token();
+ jj_ntk = - 1;
+ jj_gen = 0;
+ for (int i = 0; i < 1; i++)
+ jj_la1[i] = - 1;
+ }
+
+ public virtual void ReInit(StandardTokenizerTokenManager tm)
+ {
+ token_source = tm;
+ token = new Token();
+ jj_ntk = - 1;
+ jj_gen = 0;
+ for (int i = 0; i < 1; i++)
+ jj_la1[i] = - 1;
+ }
+
+ private Token Jj_consume_token(int kind)
+ {
+ Token oldToken;
+ if ((oldToken = token).next != null)
+ token = token.next;
+ else
+ token = token.next = token_source.GetNextToken();
+ jj_ntk = - 1;
+ if (token.kind == kind)
+ {
+ jj_gen++;
+ return token;
+ }
+ token = oldToken;
+ jj_kind = kind;
+ throw GenerateParseException();
+ }
+
+ public Token GetNextToken()
+ {
+ if (token.next != null)
+ token = token.next;
+ else
+ token = token.next = token_source.GetNextToken();
+ jj_ntk = - 1;
+ jj_gen++;
+ return token;
+ }
+
+ public Token GetToken(int index)
+ {
+ Token t = token;
+ for (int i = 0; i < index; i++)
+ {
+ if (t.next != null)
+ t = t.next;
+ else
+ t = t.next = token_source.GetNextToken();
+ }
+ return t;
+ }
+
+ private int Jj_ntk()
+ {
+ if ((jj_nt = token.next) == null)
+ return (jj_ntk = (token.next = token_source.GetNextToken()).kind);
+ else
+ return (jj_ntk = jj_nt.kind);
+ }
+
+ private System.Collections.ArrayList jj_expentries = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(10));
+ private int[] jj_expentry;
+ private int jj_kind = - 1;
+
+ public virtual ParseException GenerateParseException()
+ {
+ jj_expentries.Clear();
+ bool[] la1tokens = new bool[16];
+ for (int i = 0; i < 16; i++)
+ {
+ la1tokens[i] = false;
+ }
+ if (jj_kind >= 0)
+ {
+ la1tokens[jj_kind] = true;
+ jj_kind = - 1;
+ }
+ for (int i = 0; i < 1; i++)
+ {
+ if (jj_la1[i] == jj_gen)
+ {
+ for (int j = 0; j < 32; j++)
+ {
+ if ((jj_la1_0_Renamed_Field[i] & (1 << j)) != 0)
+ {
+ la1tokens[j] = true;
+ }
+ }
+ }
+ }
+ for (int i = 0; i < 16; i++)
+ {
+ if (la1tokens[i])
+ {
+ jj_expentry = new int[1];
+ jj_expentry[0] = i;
+ jj_expentries.Add(jj_expentry);
+ }
+ }
+ int[][] exptokseq = new int[jj_expentries.Count][];
+ for (int i = 0; i < jj_expentries.Count; i++)
+ {
+ exptokseq[i] = (int[]) jj_expentries[i];
+ }
+ return new ParseException(token, exptokseq, Lucene.Net.Analysis.Standard.StandardTokenizerConstants.tokenImage);
+ }
+
+ public void Enable_tracing()
+ {
+ }
+
+ public void Disable_tracing()
+ {
+ }
+ static StandardTokenizer()
+ {
+ {
+ jj_la1_0();
+ }
+ }
+ }
}