You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by ar...@apache.org on 2008/06/25 04:53:12 UTC

svn commit: r671406 [2/3] - in /incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis: ./ Standard/

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/StandardTokenizer.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Analysis/Standard/StandardTokenizer.cs?rev=671406&r1=671405&r2=671406&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/StandardTokenizer.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/StandardTokenizer.cs Tue Jun 24 19:53:11 2008
@@ -15,273 +15,194 @@
  * limitations under the License.
  */
 
-/* Generated By:JavaCC: Do not edit this line. StandardTokenizer.java */
 using System;
 
+using Token = Lucene.Net.Analysis.Token;
+using Tokenizer = Lucene.Net.Analysis.Tokenizer;
+
 namespace Lucene.Net.Analysis.Standard
 {
 	
-    /// <summary>A grammar-based tokenizer constructed with JavaCC.
-    /// 
-    /// <p> This should be a good tokenizer for most European-language documents:
-    /// 
-    /// <ul>
-    /// <li>Splits words at punctuation characters, removing punctuation. However, a 
-    /// dot that's not followed by whitespace is considered part of a token.
-    /// <li>Splits words at hyphens, unless there's a number in the token, in which case
-    /// the whole token is interpreted as a product number and is not split.
-    /// <li>Recognizes email addresses and internet hostnames as one token.
-    /// </ul>
-    /// 
-    /// <p>Many applications have specific tokenizer needs.  If this tokenizer does
-    /// not suit your application, please consider copying this source code
-    /// directory to your project and maintaining your own grammar-based tokenizer.
-    /// </summary>
-    public class StandardTokenizer : Lucene.Net.Analysis.Tokenizer
-    {
-		
-        /// <summary>Constructs a tokenizer for this Reader. </summary>
-        public StandardTokenizer(System.IO.TextReader reader) : this(new FastCharStream(reader))
-        {
-            this.input = reader;
-        }
-		
-        /// <summary>Returns the next token in the stream, or null at EOS.
-        /// <p>The returned token's type is set to an element of {@link
-        /// StandardTokenizerConstants#tokenImage}.
-        /// </summary>
-        public override Lucene.Net.Analysis.Token Next()
-        {
-            Token token = null;
-            switch ((jj_ntk == - 1) ? Jj_ntk() : jj_ntk)
-            {
-				
-                case Lucene.Net.Analysis.Standard.StandardTokenizerConstants.ALPHANUM: 
-                    token = Jj_consume_token(Lucene.Net.Analysis.Standard.StandardTokenizerConstants.ALPHANUM);
-                    break;
-				
-                case Lucene.Net.Analysis.Standard.StandardTokenizerConstants.APOSTROPHE: 
-                    token = Jj_consume_token(Lucene.Net.Analysis.Standard.StandardTokenizerConstants.APOSTROPHE);
-                    break;
-				
-                case Lucene.Net.Analysis.Standard.StandardTokenizerConstants.ACRONYM: 
-                    token = Jj_consume_token(Lucene.Net.Analysis.Standard.StandardTokenizerConstants.ACRONYM);
-                    break;
-				
-                case Lucene.Net.Analysis.Standard.StandardTokenizerConstants.COMPANY: 
-                    token = Jj_consume_token(Lucene.Net.Analysis.Standard.StandardTokenizerConstants.COMPANY);
-                    break;
-				
-                case Lucene.Net.Analysis.Standard.StandardTokenizerConstants.EMAIL: 
-                    token = Jj_consume_token(Lucene.Net.Analysis.Standard.StandardTokenizerConstants.EMAIL);
-                    break;
-				
-                case Lucene.Net.Analysis.Standard.StandardTokenizerConstants.HOST: 
-                    token = Jj_consume_token(Lucene.Net.Analysis.Standard.StandardTokenizerConstants.HOST);
-                    break;
-				
-                case Lucene.Net.Analysis.Standard.StandardTokenizerConstants.NUM: 
-                    token = Jj_consume_token(Lucene.Net.Analysis.Standard.StandardTokenizerConstants.NUM);
-                    break;
-				
-                case Lucene.Net.Analysis.Standard.StandardTokenizerConstants.CJ: 
-                    token = Jj_consume_token(Lucene.Net.Analysis.Standard.StandardTokenizerConstants.CJ);
-                    break;
-				
-                case 0: 
-                    token = Jj_consume_token(0);
-                    break;
-				
-                default: 
-                    jj_la1[0] = jj_gen;
-                    Jj_consume_token(- 1);
-                    throw new ParseException();
-				
-            }
-            if (token.kind == Lucene.Net.Analysis.Standard.StandardTokenizerConstants.EOF)
-            {
-            {
-                if (true)
-                    return null;
-            }
-            }
-            else
-            {
-            {
-                if (true)
-                    return new Lucene.Net.Analysis.Token(token.image, token.beginColumn, token.endColumn, Lucene.Net.Analysis.Standard.StandardTokenizerConstants.tokenImage[token.kind]);
-            }
-            }
-            throw new System.ApplicationException("Missing return statement in function");
-        }
-
-        /// <summary>By default, closes the input Reader. </summary>
-        public override void Close() 
-        { 
-            token_source.Close(); 
-            base.Close(); 
-        }
-
-        public StandardTokenizerTokenManager token_source;
-        public Token token, jj_nt;
-        private int jj_ntk;
-        private int jj_gen;
-        private int[] jj_la1 = new int[1];
-        private static int[] jj_la1_0_Renamed_Field;
-        private static void  jj_la1_0()
-        {
-            jj_la1_0_Renamed_Field = new int[]{0x10ff};
-        }
-		
-        public StandardTokenizer(CharStream stream)
-        {
-            token_source = new StandardTokenizerTokenManager(stream);
-            token = new Token();
-            jj_ntk = - 1;
-            jj_gen = 0;
-            for (int i = 0; i < 1; i++)
-                jj_la1[i] = - 1;
-        }
-		
-        public virtual void  ReInit(CharStream stream)
-        {
-            token_source.ReInit(stream);
-            token = new Token();
-            jj_ntk = - 1;
-            jj_gen = 0;
-            for (int i = 0; i < 1; i++)
-                jj_la1[i] = - 1;
-        }
-		
-        public StandardTokenizer(StandardTokenizerTokenManager tm)
-        {
-            token_source = tm;
-            token = new Token();
-            jj_ntk = - 1;
-            jj_gen = 0;
-            for (int i = 0; i < 1; i++)
-                jj_la1[i] = - 1;
-        }
-		
-        public virtual void  ReInit(StandardTokenizerTokenManager tm)
-        {
-            token_source = tm;
-            token = new Token();
-            jj_ntk = - 1;
-            jj_gen = 0;
-            for (int i = 0; i < 1; i++)
-                jj_la1[i] = - 1;
-        }
-		
-        private Token Jj_consume_token(int kind)
-        {
-            Token oldToken;
-            if ((oldToken = token).next != null)
-                token = token.next;
-            else
-                token = token.next = token_source.GetNextToken();
-            jj_ntk = - 1;
-            if (token.kind == kind)
-            {
-                jj_gen++;
-                return token;
-            }
-            token = oldToken;
-            jj_kind = kind;
-            throw GenerateParseException();
-        }
-		
-        public Token GetNextToken()
-        {
-            if (token.next != null)
-                token = token.next;
-            else
-                token = token.next = token_source.GetNextToken();
-            jj_ntk = - 1;
-            jj_gen++;
-            return token;
-        }
-		
-        public Token GetToken(int index)
-        {
-            Token t = token;
-            for (int i = 0; i < index; i++)
-            {
-                if (t.next != null)
-                    t = t.next;
-                else
-                    t = t.next = token_source.GetNextToken();
-            }
-            return t;
-        }
-		
-        private int Jj_ntk()
-        {
-            if ((jj_nt = token.next) == null)
-                return (jj_ntk = (token.next = token_source.GetNextToken()).kind);
-            else
-                return (jj_ntk = jj_nt.kind);
-        }
-		
-        private System.Collections.ArrayList jj_expentries = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(10));
-        private int[] jj_expentry;
-        private int jj_kind = - 1;
-		
-        public virtual ParseException GenerateParseException()
-        {
-            jj_expentries.Clear();
-            bool[] la1tokens = new bool[16];
-            for (int i = 0; i < 16; i++)
-            {
-                la1tokens[i] = false;
-            }
-            if (jj_kind >= 0)
-            {
-                la1tokens[jj_kind] = true;
-                jj_kind = - 1;
-            }
-            for (int i = 0; i < 1; i++)
-            {
-                if (jj_la1[i] == jj_gen)
-                {
-                    for (int j = 0; j < 32; j++)
-                    {
-                        if ((jj_la1_0_Renamed_Field[i] & (1 << j)) != 0)
-                        {
-                            la1tokens[j] = true;
-                        }
-                    }
-                }
-            }
-            for (int i = 0; i < 16; i++)
-            {
-                if (la1tokens[i])
-                {
-                    jj_expentry = new int[1];
-                    jj_expentry[0] = i;
-                    jj_expentries.Add(jj_expentry);
-                }
-            }
-            int[][] exptokseq = new int[jj_expentries.Count][];
-            for (int i = 0; i < jj_expentries.Count; i++)
-            {
-                exptokseq[i] = (int[]) jj_expentries[i];
-            }
-            return new ParseException(token, exptokseq, Lucene.Net.Analysis.Standard.StandardTokenizerConstants.tokenImage);
-        }
-		
-        public void  Enable_tracing()
-        {
-        }
-		
-        public void  Disable_tracing()
-        {
-        }
-        static StandardTokenizer()
-        {
-        {
-            jj_la1_0();
-        }
-        }
-    }
+	/// <summary>A grammar-based tokenizer constructed with JFlex
+	/// 
+	/// <p> This should be a good tokenizer for most European-language documents:
+	/// 
+	/// <ul>
+	/// <li>Splits words at punctuation characters, removing punctuation. However, a 
+	/// dot that's not followed by whitespace is considered part of a token.
+	/// <li>Splits words at hyphens, unless there's a number in the token, in which case
+	/// the whole token is interpreted as a product number and is not split.
+	/// <li>Recognizes email addresses and internet hostnames as one token.
+	/// </ul>
+	/// 
+	/// <p>Many applications have specific tokenizer needs.  If this tokenizer does
+	/// not suit your application, please consider copying this source code
+	/// directory to your project and maintaining your own grammar-based tokenizer.
+	/// </summary>
+	
+	public class StandardTokenizer : Tokenizer
+	{
+		private void  InitBlock()
+		{
+			maxTokenLength = StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH;
+		}
+		/// <summary>A private instance of the JFlex-constructed scanner </summary>
+		private StandardTokenizerImpl scanner;
+		
+		/// <summary> Specifies whether deprecated acronyms should be replaced with HOST type.
+		/// This is false by default to support backward compatibility.
+		/// <p/>
+		/// See http://issues.apache.org/jira/browse/LUCENE-1068
+		/// 
+		/// </summary>
+		/// <deprecated> this should be removed in the next release (3.0).
+		/// </deprecated>
+		private bool replaceInvalidAcronym = false;
+		
+		internal virtual void  SetInput(System.IO.TextReader reader)
+		{
+			this.input = reader;
+		}
+		
+		private int maxTokenLength;
+		
+		/// <summary>Set the max allowed token length.  Any token longer
+		/// than this is skipped. 
+		/// </summary>
+		public virtual void  SetMaxTokenLength(int length)
+		{
+			this.maxTokenLength = length;
+		}
+		
+		/// <seealso cref="setMaxTokenLength">
+		/// </seealso>
+		public virtual int GetMaxTokenLength()
+		{
+			return maxTokenLength;
+		}
+		
+		/// <summary> Creates a new instance of the {@link StandardTokenizer}. Attaches the
+		/// <code>input</code> to a newly created JFlex scanner.
+		/// </summary>
+		public StandardTokenizer(System.IO.TextReader input)
+		{
+			InitBlock();
+			this.input = input;
+			this.scanner = new StandardTokenizerImpl(input);
+		}
+		
+		/// <summary> Creates a new instance of the {@link Lucene.Net.Analysis.Standard.StandardTokenizer}.  Attaches
+		/// the <code>input</code> to the newly created JFlex scanner.
+		/// 
+		/// </summary>
+		/// <param name="input">The input reader
+		/// </param>
+		/// <param name="replaceInvalidAcronym">Set to true to replace mischaracterized acronyms with HOST.
+		/// 
+		/// See http://issues.apache.org/jira/browse/LUCENE-1068
+		/// </param>
+		public StandardTokenizer(System.IO.TextReader input, bool replaceInvalidAcronym)
+		{
+			InitBlock();
+			this.replaceInvalidAcronym = replaceInvalidAcronym;
+			this.input = input;
+			this.scanner = new StandardTokenizerImpl(input);
+		}
+		
+		/*
+		* (non-Javadoc)
+		*
+		* @see Lucene.Net.Analysis.TokenStream#next()
+		*/
+		public override Token Next(Token result)
+		{
+			int posIncr = 1;
+			
+			while (true)
+			{
+				int tokenType = scanner.GetNextToken();
+				
+				if (tokenType == StandardTokenizerImpl.YYEOF)
+				{
+					return null;
+				}
+				
+				if (scanner.Yylength() <= maxTokenLength)
+				{
+					result.Clear();
+					result.SetPositionIncrement(posIncr);
+					scanner.GetText(result);
+					int start = scanner.Yychar();
+					result.SetStartOffset(start);
+					result.SetEndOffset(start + result.TermLength());
+					// This 'if' should be removed in the next release. For now, it converts
+					// invalid acronyms to HOST. When removed, only the 'else' part should
+					// remain.
+					if (tokenType == StandardTokenizerImpl.ACRONYM_DEP)
+					{
+						if (replaceInvalidAcronym)
+						{
+							result.SetType(StandardTokenizerImpl.TOKEN_TYPES[StandardTokenizerImpl.HOST]);
+							result.SetTermLength(result.TermLength() - 1); // remove extra '.'
+						}
+						else
+						{
+							result.SetType(StandardTokenizerImpl.TOKEN_TYPES[StandardTokenizerImpl.ACRONYM]);
+						}
+					}
+					else
+					{
+						result.SetType(StandardTokenizerImpl.TOKEN_TYPES[tokenType]);
+					}
+					return result;
+				}
+				// When we skip a too-long term, we still increment the
+				// position increment
+				else
+					posIncr++;
+			}
+		}
+		
+		/*
+		* (non-Javadoc)
+		*
+		* @see Lucene.Net.Analysis.TokenStream#reset()
+		*/
+		public override void  Reset()
+		{
+			base.Reset();
+			scanner.Yyreset(input);
+		}
+		
+		public override void  Reset(System.IO.TextReader reader)
+		{
+			input = reader;
+			Reset();
+		}
+		
+		/// <summary> Prior to https://issues.apache.org/jira/browse/LUCENE-1068, StandardTokenizer mischaracterized as acronyms tokens like www.abc.com
+		/// when they should have been labeled as hosts instead.
+		/// </summary>
+		/// <returns> true if StandardTokenizer now returns these tokens as Hosts, otherwise false
+		/// 
+		/// </returns>
+		/// <deprecated> Remove in 3.X and make true the only valid value
+		/// </deprecated>
+		public virtual bool IsReplaceInvalidAcronym()
+		{
+			return replaceInvalidAcronym;
+		}
+		
+		/// <summary> </summary>
+		/// <param name="replaceInvalidAcronym">Set to true to replace mischaracterized acronyms as HOST.
+		/// </param>
+		/// <deprecated> Remove in 3.X and make true the only valid value
+		/// 
+		/// See https://issues.apache.org/jira/browse/LUCENE-1068
+		/// </deprecated>
+		public virtual void  SetReplaceInvalidAcronym(bool replaceInvalidAcronym)
+		{
+			this.replaceInvalidAcronym = replaceInvalidAcronym;
+		}
+	}
 }
\ No newline at end of file

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/StandardTokenizer.jj
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Analysis/Standard/StandardTokenizer.jj?rev=671406&r1=671405&r2=671406&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/StandardTokenizer.jj (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/StandardTokenizer.jj Tue Jun 24 19:53:11 2008
@@ -14,7 +14,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
+// {{Aroush-2.3.1}} remove this file from SVN
 options {
   STATIC = false;
 //IGNORE_CASE = true;

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/StandardTokenizerConstants.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Analysis/Standard/StandardTokenizerConstants.cs?rev=671406&r1=671405&r2=671406&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/StandardTokenizerConstants.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/StandardTokenizerConstants.cs Tue Jun 24 19:53:11 2008
@@ -14,10 +14,10 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
+// {{Aroush-2.3.1}} remove this file from SVN
 /* Generated By:JavaCC: Do not edit this line. StandardTokenizerConstants.java */
 using System;
-
+/*
 namespace Lucene.Net.Analysis.Standard
 {
 	
@@ -42,4 +42,5 @@
         public const int DEFAULT = 0;
         public static System.String[] tokenImage = new System.String[]{"<EOF>", "<ALPHANUM>", "<APOSTROPHE>", "<ACRONYM>", "<COMPANY>", "<EMAIL>", "<HOST>", "<NUM>", "<P>", "<HAS_DIGIT>", "<ALPHA>", "<LETTER>", "<CJ>", "<KOREAN>", "<DIGIT>", "<NOISE>"};
     }
-}
\ No newline at end of file
+}
+*/
\ No newline at end of file

Added: incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/StandardTokenizerImpl.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Analysis/Standard/StandardTokenizerImpl.cs?rev=671406&view=auto
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/StandardTokenizerImpl.cs (added)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/StandardTokenizerImpl.cs Tue Jun 24 19:53:11 2008
@@ -0,0 +1,662 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* The following code was generated by JFlex 1.4.1 on 12/18/07 9:22 PM */
+using System;
+
+using Token = Lucene.Net.Analysis.Token;
+
+namespace Lucene.Net.Analysis.Standard
+{
+	
+	
+	/// <summary> This class is a scanner generated by 
+	/// <a href="http://www.jflex.de/">JFlex</a> 1.4.1
+	/// on 12/18/07 9:22 PM from the specification file
+	/// <tt>/Volumes/User/grantingersoll/projects/lucene/java/lucene-clean/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex</tt>
+	/// </summary>
+	class StandardTokenizerImpl
+	{
+		
+		/// <summary>This character denotes the end of file </summary>
+		public const int YYEOF = - 1;
+		
+		/// <summary>initial size of the lookahead buffer </summary>
+		private const int ZZ_BUFFERSIZE = 16384;
+		
+		/// <summary>lexical states </summary>
+		public const int YYINITIAL = 0;
+		
+		/// <summary> Translates characters to character classes</summary>
+		private const System.String ZZ_CMAP_PACKED = "\x0009\x0000\x0001\x0000\x0001\x000E\x0001\x0000\x0001\x0000\x0001\x000D\x0012\x0000\x0001\x0000\x0005\x0000\x0001\x0003" + "\x0001\x0001\x0004\x0000\x0001\x0007\x0001\x0005\x0001\x0002\x0001\x0007\x000A\x0009\x0006\x0000\x0001\x0004\x001A\x0008" + "\x0004\x0000\x0001\x0006\x0001\x0000\x001A\x0008\x0045\x0000\x0017\x0008\x0001\x0000\x001F\x0008\x0001\x0000\u0568\x0008" + "\x000A\x000A\x0086\x0008\x000A\x000A\u026c\x0008\x000A\x000A\x0076\x0008\x000A\x000A\x0076\x0008\x000A\x000A\x0076\x0008" + "\x000A\x000A\x0076\x0008\x000A\x000A\x0077\x0008\x0009\x000A\x0076\x0008\x000A\x000A\x0076\x0008\x000A\x000A\x0076\x0008" + "\x000A\x000A\x00E0\x0008\x000A\x000A\x0076\x0008\x000A\x000A\u0166\x0008\x000A\x000A\x00B6\x0008\u0100\x0008\u0e00\x0008" + "\u1040\x0000\u0150\x000C\x0060\x0000\x0010\x000C\u0100\x0000\x0080\x000C\x0080\x0000\u19c0\x000C\x0040\x0000\u5200\x000C" + "\u0c00\x0000\u2bb0\x000B\u2150\x0000\u0200\x000C\u0465\x0000\x003B
 \x000C\x003D\x0008\x0023\x0000";
+		
+		/// <summary> Translates characters to character classes</summary>
+		private static readonly char[] ZZ_CMAP = ZzUnpackCMap(ZZ_CMAP_PACKED);
+		
+		/// <summary> Translates DFA states to action switch labels.</summary>
+		private static readonly int[] ZZ_ACTION = ZzUnpackAction();
+		
+		private const System.String ZZ_ACTION_PACKED_0 = "\x0001\x0000\x0001\x0001\x0004\x0002\x0001\x0003\x0001\x0001\x0006\x0000\x0002\x0002\x0006\x0000" + "\x0001\x0004\x0004\x0005\x0002\x0006\x0002\x0000\x0001\x0007\x0001\x0000\x0001\x0007\x0003\x0005" + "\x0006\x0007\x0003\x0005\x0001\x0008\x0001\x0000\x0001\x0009\x0002\x0000\x0001\x0008\x0001\x0009" + "\x0001\x0000\x0002\x0009\x0002\x0008\x0002\x0005\x0001\x000A";
+		
+		private static int[] ZzUnpackAction()
+		{
+			int[] result = new int[61];
+			int offset = 0;
+			offset = ZzUnpackAction(ZZ_ACTION_PACKED_0, offset, result);
+			return result;
+		}
+		
+		private static int ZzUnpackAction(System.String packed, int offset, int[] result)
+		{
+			int i = 0; /* index in packed string  */
+			int j = offset; /* index in unpacked array */
+			int l = packed.Length;
+			while (i < l)
+			{
+				int count = packed[i++];
+				int value_Renamed = packed[i++];
+				do 
+					result[j++] = value_Renamed;
+				while (--count > 0);
+			}
+			return j;
+		}
+		
+		
+		/// <summary> Translates a state to a row index in the transition table</summary>
+		private static readonly int[] ZZ_ROWMAP = ZzUnpackRowMap();
+		
+		private const System.String ZZ_ROWMAP_PACKED_0 = "\x0000\x0000\x0000\x000F\x0000\x001E\x0000\x002D\x0000\x003C\x0000\x004B\x0000\x000F\x0000\x005A" + "\x0000\x0069\x0000\x0078\x0000\x0087\x0000\x0096\x0000\x00A5\x0000\x00B4\x0000\x00C3\x0000\x00D2" + "\x0000\x00E1\x0000\x00F0\x0000\x00FF\x0000\u010e\x0000\u011d\x0000\u012c\x0000\u013b\x0000\u014a" + "\x0000\u0159\x0000\u0168\x0000\u0177\x0000\x0087\x0000\u0186\x0000\u0195\x0000\u01a4\x0000\u01b3" + "\x0000\u01c2\x0000\u01d1\x0000\u01e0\x0000\u01ef\x0000\u01fe\x0000\u020d\x0000\u021c\x0000\u022b" + "\x0000\u023a\x0000\u0249\x0000\u0258\x0000\u0267\x0000\u0276\x0000\u0285\x0000\u0294\x0000\u02a3" + "\x0000\u02b2\x0000\u02c1\x0000\u02d0\x0000\u02df\x0000\u02ee\x0000\u02fd\x0000\u012c\x0000\x00E1" + "\x0000\x0078\x0000\u011d\x0000\u030c\x0000\u031b\x0000\u032a";
+		
+		private static int[] ZzUnpackRowMap()
+		{
+			int[] result = new int[61];
+			int offset = 0;
+			offset = ZzUnpackRowMap(ZZ_ROWMAP_PACKED_0, offset, result);
+			return result;
+		}
+		
+		private static int ZzUnpackRowMap(System.String packed, int offset, int[] result)
+		{
+			int i = 0; /* index in packed string  */
+			int j = offset; /* index in unpacked array */
+			int l = packed.Length;
+			while (i < l)
+			{
+				int high = packed[i++] << 16;
+				result[j++] = high | packed[i++];
+			}
+			return j;
+		}
+		
+		/// <summary> The transition table of the DFA</summary>
+		private static readonly int[] ZZ_TRANS = ZzUnpackTrans();
+		
+		private const System.String ZZ_TRANS_PACKED_0 = "\x0008\x0002\x0001\x0003\x0001\x0004\x0001\x0005\x0001\x0006\x0001\x0007\x0001\x0008\x0001\x0002" + "\x0010\x0000\x0001\x0009\x0001\x000A\x0001\x000B\x0001\x000C\x0002\x000D\x0001\x000E\x0001\x000F" + "\x0001\x0004\x0001\x0010\x0001\x0006\x0005\x0000\x0001\x0011\x0001\x0000\x0001\x0012\x0002\x0013" + "\x0001\x0014\x0003\x0004\x0001\x0006\x0004\x0000\x0001\x0009\x0001\x0015\x0001\x000B\x0001\x000C" + "\x0002\x0013\x0001\x0014\x0001\x0010\x0001\x0004\x0001\x0010\x0001\x0006\x0005\x0000\x0001\x0016" + "\x0001\x0000\x0001\x0012\x0002\x000D\x0001\x000E\x0004\x0006\x0011\x0000\x0001\x0002\x0008\x0000" + "\x0001\x0017\x0001\x0000\x0001\x0017\x000C\x0000\x0001\x0018\x0001\x0019\x0001\x001A\x0001\x001B" + "\x000B\x0000\x0001\x001C\x0001\x0000\x0001\x001C\x000C\x0000\x0001\x001D\x0001\x001E\x0001\x001D" + "\x0001\x001E\x000B\x0000\x0001\x001F\x0002\x0020\x0001\x0021\x000B\x0000\x0001\x000E\x0002\x0022" + "\x0005\x0000\x0001\x0009\x000
 1\x0016\x0001\x000B\x0001\x000C\x0002\x000D\x0001\x000E\x0001\x000F" + "\x0001\x0004\x0001\x0010\x0001\x0006\x0004\x0000\x0001\x0009\x0001\x0011\x0001\x000B\x0001\x000C" + "\x0002\x0013\x0001\x0014\x0001\x0010\x0001\x0004\x0001\x0010\x0001\x0006\x000B\x0000\x0001\x0023" + "\x0002\x0024\x0001\x0025\x000B\x0000\x0004\x001E\x000B\x0000\x0001\x0026\x0002\x0027\x0001\x0028" + "\x000B\x0000\x0001\x0029\x0002\x002A\x0001\x002B\x000B\x0000\x0001\x002C\x0001\x0024\x0001\x002D" + "\x0001\x0025\x000B\x0000\x0001\x002E\x0002\x0019\x0001\x001B\x0004\x0000\x0001\x0009\x0006\x0000" + "\x0001\x0017\x0001\x0000\x0001\x0017\x0006\x0000\x0001\x002F\x0001\x0000\x0001\x0012\x0002\x0030" + "\x0001\x0000\x0001\x002E\x0002\x0019\x0001\x001B\x0005\x0000\x0001\x0031\x0001\x0000\x0001\x0012" + "\x0002\x0032\x0001\x0033\x0003\x0019\x0001\x001B\x0005\x0000\x0001\x0034\x0001\x0000\x0001\x0012" + "\x0002\x0032\x0001\x0033\x0003\x0019\x0001\x001B\x0005\x0000\x0001\x0035\x0001\x0000\x0001\x0012" + 
+			"\x0002\x0030\x0001\x0000\x0004\x001B\x0005\x0000\x0001\x0036\x0002\x0000\x0001\x0036\x0002\x0000" + "\x0001\x001D\x0001\x001E\x0001\x001D\x0001\x001E\x0005\x0000\x0001\x0036\x0002\x0000\x0001\x0036" + "\x0002\x0000\x0004\x001E\x0005\x0000\x0001\x0030\x0001\x0000\x0001\x0012\x0002\x0030\x0001\x0000" + "\x0001\x001F\x0002\x0020\x0001\x0021\x0005\x0000\x0001\x0032\x0001\x0000\x0001\x0012\x0002\x0032" + "\x0001\x0033\x0003\x0020\x0001\x0021\x0005\x0000\x0001\x0030\x0001\x0000\x0001\x0012\x0002\x0030" + "\x0001\x0000\x0004\x0021\x0005\x0000\x0001\x0033\x0002\x0000\x0003\x0033\x0003\x0022\x0006\x0000" + "\x0001\x0037\x0001\x0000\x0001\x0012\x0002\x000D\x0001\x000E\x0001\x0023\x0002\x0024\x0001\x0025" + "\x0005\x0000\x0001\x0038\x0001\x0000\x0001\x0012\x0002\x0013\x0001\x0014\x0003\x0024\x0001\x0025" + "\x0005\x0000\x0001\x0037\x0001\x0000\x0001\x0012\x0002\x000D\x0001\x000E\x0004\x0025\x0005\x0000" + "\x0001\x000D\x0001\x0000\x0001\x0012\x0002\x000D\x0001\x000E\x0001\x0026\x00
 02\x0027\x0001\x0028" + "\x0005\x0000\x0001\x0013\x0001\x0000\x0001\x0012\x0002\x0013\x0001\x0014\x0003\x0027\x0001\x0028" + "\x0005\x0000\x0001\x000D\x0001\x0000\x0001\x0012\x0002\x000D\x0001\x000E\x0004\x0028\x0005\x0000" + "\x0001\x000E\x0002\x0000\x0003\x000E\x0001\x0029\x0002\x002A\x0001\x002B\x0005\x0000\x0001\x0014" + "\x0002\x0000\x0003\x0014\x0003\x002A\x0001\x002B\x0005\x0000\x0001\x000E\x0002\x0000\x0003\x000E" + "\x0004\x002B\x0005\x0000\x0001\x0039\x0001\x0000\x0001\x0012\x0002\x000D\x0001\x000E\x0001\x0023" + "\x0002\x0024\x0001\x0025\x0005\x0000\x0001\x003A\x0001\x0000\x0001\x0012\x0002\x0013\x0001\x0014" + "\x0003\x0024\x0001\x0025\x0005\x0000\x0001\x0035\x0001\x0000\x0001\x0012\x0002\x0030\x0001\x0000" + "\x0001\x002E\x0002\x0019\x0001\x001B\x000B\x0000\x0001\x003B\x0001\x001B\x0001\x003B\x0001\x001B" + "\x000B\x0000\x0004\x0021\x000B\x0000\x0004\x0025\x000B\x0000\x0004\x0028\x000B\x0000\x0004\x002B" + "\x000B\x0000\x0001\x003C\x0001\x0025\x0001\x003C\x0001\
 x0025\x000B\x0000\x0004\x001B\x000B\x0000" + 
+			"\x0004\x003D\x0005\x0000\x0001\x002F\x0001\x0000\x0001\x0012\x0002\x0030\x0001\x0000\x0004\x001B" + "\x0005\x0000\x0001\x0039\x0001\x0000\x0001\x0012\x0002\x000D\x0001\x000E\x0004\x0025\x0005\x0000" + "\x0001\x0036\x0002\x0000\x0001\x0036\x0002\x0000\x0004\x003D\x0003\x0000";
+		
+		private static int[] ZzUnpackTrans()
+		{
+			int[] result = new int[825];
+			int offset = 0;
+			offset = ZzUnpackTrans(ZZ_TRANS_PACKED_0, offset, result);
+			return result;
+		}
+		
+		private static int ZzUnpackTrans(System.String packed, int offset, int[] result)
+		{
+			int i = 0; /* index in packed string  */
+			int j = offset; /* index in unpacked array */
+			int l = packed.Length;
+			while (i < l)
+			{
+				int count = packed[i++];
+				int value_Renamed = packed[i++];
+				value_Renamed--;
+				do 
+					result[j++] = value_Renamed;
+				while (--count > 0);
+			}
+			return j;
+		}
+		
+		
+		/* error codes */
+		private const int ZZ_UNKNOWN_ERROR = 0;
+		private const int ZZ_NO_MATCH = 1;
+		private const int ZZ_PUSHBACK_2BIG = 2;
+		
+		/* error messages for the codes above */
+		private static readonly System.String[] ZZ_ERROR_MSG = new System.String[]{"Unkown internal scanner error", "Error: could not match input", "Error: pushback value was too large"};
+		
+		/// <summary> ZZ_ATTRIBUTE[aState] contains the attributes of state <code>aState</code></summary>
+		private static readonly int[] ZZ_ATTRIBUTE = ZzUnpackAttribute();
+		
+		private const System.String ZZ_ATTRIBUTE_PACKED_0 = "\x0001\x0000\x0001\x0009\x0004\x0001\x0001\x0009\x0001\x0001\x0006\x0000\x0002\x0001\x0006\x0000" + "\x0007\x0001\x0002\x0000\x0001\x0001\x0001\x0000\x000E\x0001\x0001\x0000\x0001\x0001\x0002\x0000" + "\x0002\x0001\x0001\x0000\x0007\x0001";
+		
+		private static int[] ZzUnpackAttribute()
+		{
+			int[] result = new int[61];
+			int offset = 0;
+			offset = ZzUnpackAttribute(ZZ_ATTRIBUTE_PACKED_0, offset, result);
+			return result;
+		}
+		
+		private static int ZzUnpackAttribute(System.String packed, int offset, int[] result)
+		{
+			int i = 0; /* index in packed string  */
+			int j = offset; /* index in unpacked array */
+			int l = packed.Length;
+			while (i < l)
+			{
+				int count = packed[i++];
+				int value_Renamed = packed[i++];
+				do 
+					result[j++] = value_Renamed;
+				while (--count > 0);
+			}
+			return j;
+		}
+		
+		/// <summary>the input device </summary>
+		private System.IO.TextReader zzReader;
+		
+		/// <summary>the current state of the DFA </summary>
+		private int zzState;
+		
+		/// <summary>the current lexical state </summary>
+		private int zzLexicalState = YYINITIAL;
+		
+		/// <summary>this buffer contains the current text to be matched and is
+		/// the source of the yytext() string 
+		/// </summary>
+		private char[] zzBuffer = new char[ZZ_BUFFERSIZE];
+		
+		/// <summary>the textposition at the last accepting state </summary>
+		private int zzMarkedPos;
+		
+		/// <summary>the textposition at the last state to be included in yytext </summary>
+		private int zzPushbackPos;
+		
+		/// <summary>the current text position in the buffer </summary>
+		private int zzCurrentPos;
+		
+		/// <summary>startRead marks the beginning of the yytext() string in the buffer </summary>
+		private int zzStartRead;
+		
+		/// <summary>endRead marks the last character in the buffer, that has been read
+		/// from input 
+		/// </summary>
+		private int zzEndRead;
+		
+		/// <summary>number of newlines encountered up to the start of the matched text </summary>
+		private int yyline;
+		
+		/// <summary>the number of characters up to the start of the matched text </summary>
+		private int yychar;
+		
+		/// <summary> the number of characters from the last newline up to the start of the 
+		/// matched text
+		/// </summary>
+		private int yycolumn;
+		
+		/// <summary> zzAtBOL == true <=> the scanner is currently at the beginning of a line</summary>
+		private bool zzAtBOL = true;
+		
+		/// <summary>zzAtEOF == true <=> the scanner is at the EOF </summary>
+		private bool zzAtEOF;
+		
+		/* user code: */
+		
+		public const int ALPHANUM = 0;
+		public const int APOSTROPHE = 1;
+		public const int ACRONYM = 2;
+		public const int COMPANY = 3;
+		public const int EMAIL = 4;
+		public const int HOST = 5;
+		public const int NUM = 6;
+		public const int CJ = 7;
+		/// <deprecated> this solves a bug where HOSTs that end with '.' are identified
+		/// as ACRONYMs. It is deprecated and will be removed in the next
+		/// release.
+		/// </deprecated>
+		public const int ACRONYM_DEP = 8;
+		
+		public static readonly System.String[] TOKEN_TYPES = new System.String[]{"<ALPHANUM>", "<APOSTROPHE>", "<ACRONYM>", "<COMPANY>", "<EMAIL>", "<HOST>", "<NUM>", "<CJ>", "<ACRONYM_DEP>"};
+		
+		public int Yychar()
+		{
+			return yychar;
+		}
+		
+		/// <summary> Fills Lucene token with the current token text.</summary>
+		internal void  GetText(Token t)
+		{
+			t.SetTermBuffer(zzBuffer, zzStartRead, zzMarkedPos - zzStartRead);
+		}
+		
+		
+		/// <summary> Creates a new scanner
+		/// There is also a java.io.InputStream version of this constructor.
+		/// 
+		/// </summary>
+		/// <param name="in"> the java.io.Reader to read input from.
+		/// </param>
+		internal StandardTokenizerImpl(System.IO.TextReader in_Renamed)
+		{
+			this.zzReader = in_Renamed;
+		}
+		
+		/// <summary> Creates a new scanner.
+		/// There is also java.io.Reader version of this constructor.
+		/// 
+		/// </summary>
+		/// <param name="in"> the java.io.Inputstream to read input from.
+		/// </param>
+		internal StandardTokenizerImpl(System.IO.Stream in_Renamed) : this(new System.IO.StreamReader(in_Renamed, System.Text.Encoding.Default))
+		{
+		}
+		
+		/// <summary> Unpacks the compressed character translation table.
+		/// 
+		/// </summary>
+		/// <param name="packed">  the packed character translation table
+		/// </param>
+		/// <returns>         the unpacked character translation table
+		/// </returns>
+		private static char[] ZzUnpackCMap(System.String packed)
+		{
+			char[] map = new char[0x10000];
+			int i = 0; /* index in packed string  */
+			int j = 0; /* index in unpacked array */
+			while (i < 156)
+			{
+				int count = packed[i++];
+				char value_Renamed = packed[i++];
+				do 
+					map[j++] = value_Renamed;
+				while (--count > 0);
+			}
+			return map;
+		}
+		
+		
+		/// <summary> Refills the input buffer.
+		/// 
+		/// </summary>
+		/// <returns>      <code>false</code>, iff there was new input.
+		/// 
+		/// </returns>
+		/// <exception cref="java.io.IOException"> if any I/O-Error occurs
+		/// </exception>
+		private bool ZzRefill()
+		{
+			
+			/* first: make room (if you can) */
+			if (zzStartRead > 0)
+			{
+				Array.Copy(zzBuffer, zzStartRead, zzBuffer, 0, zzEndRead - zzStartRead);
+				
+				/* translate stored positions */
+				zzEndRead -= zzStartRead;
+				zzCurrentPos -= zzStartRead;
+				zzMarkedPos -= zzStartRead;
+				zzPushbackPos -= zzStartRead;
+				zzStartRead = 0;
+			}
+			
+			/* is the buffer big enough? */
+			if (zzCurrentPos >= zzBuffer.Length)
+			{
+				/* if not: blow it up */
+				char[] newBuffer = new char[zzCurrentPos * 2];
+				Array.Copy(zzBuffer, 0, newBuffer, 0, zzBuffer.Length);
+				zzBuffer = newBuffer;
+			}
+			
+			/* finally: fill the buffer with new input */
+			int numRead = zzReader.Read(zzBuffer, zzEndRead, zzBuffer.Length - zzEndRead);
+			
+			if (numRead < 1)
+			{
+				return true;
+			}
+			else
+			{
+				zzEndRead += numRead;
+				return false;
+			}
+		}
+		
+		
+		/// <summary> Closes the input stream.</summary>
+		public void  Yyclose()
+		{
+			zzAtEOF = true; /* indicate end of file */
+			zzEndRead = zzStartRead; /* invalidate buffer    */
+			
+			if (zzReader != null)
+				zzReader.Close();
+		}
+		
+		
+		/// <summary> Resets the scanner to read from a new input stream.
+		/// Does not close the old reader.
+		/// 
+		/// All internal variables are reset, the old input stream 
+		/// <b>cannot</b> be reused (internal buffer is discarded and lost).
+		/// Lexical state is set to <tt>ZZ_INITIAL</tt>.
+		/// 
+		/// </summary>
+		/// <param name="reader">  the new input stream 
+		/// </param>
+		public void  Yyreset(System.IO.TextReader reader)
+		{
+			zzReader = reader;
+			zzAtBOL = true;
+			zzAtEOF = false;
+			zzEndRead = zzStartRead = 0;
+			zzCurrentPos = zzMarkedPos = zzPushbackPos = 0;
+			yyline = yychar = yycolumn = 0;
+			zzLexicalState = YYINITIAL;
+		}
+		
+		
+		/// <summary> Returns the current lexical state.</summary>
+		public int Yystate()
+		{
+			return zzLexicalState;
+		}
+		
+		
+		/// <summary> Enters a new lexical state
+		/// 
+		/// </summary>
+		/// <param name="newState">the new lexical state
+		/// </param>
+		public void  Yybegin(int newState)
+		{
+			zzLexicalState = newState;
+		}
+		
+		
+		/// <summary> Returns the text matched by the current regular expression.</summary>
+		public System.String Yytext()
+		{
+			return new System.String(zzBuffer, zzStartRead, zzMarkedPos - zzStartRead);
+		}
+		
+		
+		/// <summary> Returns the character at position <tt>pos</tt> from the 
+		/// matched text. 
+		/// 
+		/// It is equivalent to yytext().charAt(pos), but faster
+		/// 
+		/// </summary>
+		/// <param name="pos">the position of the character to fetch. 
+		/// A value from 0 to yylength()-1.
+		/// 
+		/// </param>
+		/// <returns> the character at position pos
+		/// </returns>
+		public char Yycharat(int pos)
+		{
+			return zzBuffer[zzStartRead + pos];
+		}
+		
+		
+		/// <summary> Returns the length of the matched text region.</summary>
+		public int Yylength()
+		{
+			return zzMarkedPos - zzStartRead;
+		}
+		
+		
+		/// <summary> Reports an error that occured while scanning.
+		/// 
+		/// In a wellformed scanner (no or only correct usage of 
+		/// yypushback(int) and a match-all fallback rule) this method 
+		/// will only be called with things that "Can't Possibly Happen".
+		/// If this method is called, something is seriously wrong
+		/// (e.g. a JFlex bug producing a faulty scanner etc.).
+		/// 
+		/// Usual syntax/scanner level error handling should be done
+		/// in error fallback rules.
+		/// 
+		/// </summary>
+		/// <param name="errorCode"> the code of the errormessage to display
+		/// </param>
+		private void  ZzScanError(int errorCode)
+		{
+			System.String message;
+			try
+			{
+				message = ZZ_ERROR_MSG[errorCode];
+			}
+			catch (System.IndexOutOfRangeException)
+			{
+				message = ZZ_ERROR_MSG[ZZ_UNKNOWN_ERROR];
+			}
+			
+			throw new System.ApplicationException(message);
+		}
+		
+		
+		/// <summary> Pushes the specified amount of characters back into the input stream.
+		/// 
+		/// They will be read again by then next call of the scanning method
+		/// 
+		/// </summary>
+		/// <param name="number"> the number of characters to be read again.
+		/// This number must not be greater than yylength()!
+		/// </param>
+		public virtual void  Yypushback(int number)
+		{
+			if (number > Yylength())
+				ZzScanError(ZZ_PUSHBACK_2BIG);
+			
+			zzMarkedPos -= number;
+		}
+		
+		
+		/// <summary> Resumes scanning until the next regular expression is matched,
+		/// the end of input is encountered or an I/O-Error occurs.
+		/// 
+		/// </summary>
+		/// <returns>      the next token
+		/// </returns>
+		/// <exception cref="java.io.IOException"> if any I/O-Error occurs
+		/// </exception>
+		public virtual int GetNextToken()
+		{
+			int zzInput;
+			int zzAction;
+			
+			// cached fields:
+			int zzCurrentPosL;
+			int zzMarkedPosL;
+			int zzEndReadL = zzEndRead;
+			char[] zzBufferL = zzBuffer;
+			char[] zzCMapL = ZZ_CMAP;
+			
+			int[] zzTransL = ZZ_TRANS;
+			int[] zzRowMapL = ZZ_ROWMAP;
+			int[] zzAttrL = ZZ_ATTRIBUTE;
+			
+			while (true)
+			{
+				zzMarkedPosL = zzMarkedPos;
+				
+				yychar += zzMarkedPosL - zzStartRead;
+				
+				zzAction = - 1;
+				
+				zzCurrentPosL = zzCurrentPos = zzStartRead = zzMarkedPosL;
+				
+				zzState = zzLexicalState;
+				
+				
+				{
+					while (true)
+					{
+						
+						if (zzCurrentPosL < zzEndReadL)
+							zzInput = zzBufferL[zzCurrentPosL++];
+						else if (zzAtEOF)
+						{
+							zzInput = YYEOF;
+							goto zzForAction_brk;
+						}
+						else
+						{
+							// store back cached positions
+							zzCurrentPos = zzCurrentPosL;
+							zzMarkedPos = zzMarkedPosL;
+							bool eof = ZzRefill();
+							// get translated positions and possibly new buffer
+							zzCurrentPosL = zzCurrentPos;
+							zzMarkedPosL = zzMarkedPos;
+							zzBufferL = zzBuffer;
+							zzEndReadL = zzEndRead;
+							if (eof)
+							{
+								zzInput = YYEOF;
+								goto zzForAction_brk;
+							}
+							else
+							{
+								zzInput = zzBufferL[zzCurrentPosL++];
+							}
+						}
+						int zzNext = zzTransL[zzRowMapL[zzState] + zzCMapL[zzInput]];
+						if (zzNext == - 1)
+						{
+							goto zzForAction_brk;
+						}
+						zzState = zzNext;
+						
+						int zzAttributes = zzAttrL[zzState];
+						if ((zzAttributes & 1) == 1)
+						{
+							zzAction = zzState;
+							zzMarkedPosL = zzCurrentPosL;
+							if ((zzAttributes & 8) == 8)
+							{
+								goto zzForAction_brk;
+							}
+						}
+					}
+				}
+
+zzForAction_brk: ;
+				
+				
+				// store back cached position
+				zzMarkedPos = zzMarkedPosL;
+				
+				switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction])
+				{
+					
+					case 5: 
+					{
+						return HOST;
+					}
+					
+					case 11:  break;
+					
+					case 9: 
+					{
+						return ACRONYM_DEP;
+					}
+					
+					case 12:  break;
+					
+					case 8: 
+					{
+						return ACRONYM;
+					}
+					
+					case 13:  break;
+					
+					case 1: 
+						{
+							/* ignore */
+						}
+						goto case 14;
+					
+					case 14:  break;
+					
+					case 7: 
+					{
+						return NUM;
+					}
+					
+					case 15:  break;
+					
+					case 3: 
+					{
+						return CJ;
+					}
+					
+					case 16:  break;
+					
+					case 2: 
+					{
+						return ALPHANUM;
+					}
+					
+					case 17:  break;
+					
+					case 6: 
+					{
+						return COMPANY;
+					}
+					
+					case 18:  break;
+					
+					case 4: 
+					{
+						return APOSTROPHE;
+					}
+					
+					case 19:  break;
+					
+					case 10: 
+					{
+						return EMAIL;
+					}
+					
+					case 20:  break;
+					
+					default: 
+						if (zzInput == YYEOF && zzStartRead == zzCurrentPos)
+						{
+							zzAtEOF = true;
+							return YYEOF;
+						}
+						else
+						{
+							ZzScanError(ZZ_NO_MATCH);
+						}
+						break;
+					
+				}
+			}
+		}
+	}
+}
\ No newline at end of file

Added: incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/StandardTokenizerImpl.jflex
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Analysis/Standard/StandardTokenizerImpl.jflex?rev=671406&view=auto
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/StandardTokenizerImpl.jflex (added)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/StandardTokenizerImpl.jflex Tue Jun 24 19:53:11 2008
@@ -0,0 +1,140 @@
+package org.apache.lucene.analysis.standard;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.Token;
+
+%%
+
+%class StandardTokenizerImpl
+%unicode
+%integer
+%function getNextToken
+%pack
+%char
+
+%{
+
+public static final int ALPHANUM          = 0;
+public static final int APOSTROPHE        = 1;
+public static final int ACRONYM           = 2;
+public static final int COMPANY           = 3;
+public static final int EMAIL             = 4;
+public static final int HOST              = 5;
+public static final int NUM               = 6;
+public static final int CJ                = 7;
+/**
+ * @deprecated this solves a bug where HOSTs that end with '.' are identified
+ *             as ACRONYMs. It is deprecated and will be removed in the next
+ *             release.
+ */
+public static final int ACRONYM_DEP       = 8;
+
+public static final String [] TOKEN_TYPES = new String [] {
+    "<ALPHANUM>",
+    "<APOSTROPHE>",
+    "<ACRONYM>",
+    "<COMPANY>",
+    "<EMAIL>",
+    "<HOST>",
+    "<NUM>",
+    "<CJ>",
+    "<ACRONYM_DEP>"
+};
+
+public final int yychar()
+{
+    return yychar;
+}
+
+/**
+ * Fills Lucene token with the current token text.
+ */
+final void getText(Token t) {
+  t.setTermBuffer(zzBuffer, zzStartRead, zzMarkedPos-zzStartRead);
+}
+%}
+
+// basic word: a sequence of digits & letters
+ALPHANUM   = ({LETTER}|{DIGIT}|{KOREAN})+
+
+// internal apostrophes: O'Reilly, you're, O'Reilly's
+// use a post-filter to remove possesives
+APOSTROPHE =  {ALPHA} ("'" {ALPHA})+
+
+// acronyms: U.S.A., I.B.M., etc.
+// use a post-filter to remove dots
+ACRONYM    =  {LETTER} "." ({LETTER} ".")+
+
+ACRONYM_DEP	= {ALPHANUM} "." ({ALPHANUM} ".")+
+
+// company names like AT&T and Excite@Home.
+COMPANY    =  {ALPHA} ("&"|"@") {ALPHA}
+
+// email addresses
+EMAIL      =  {ALPHANUM} (("."|"-"|"_") {ALPHANUM})* "@" {ALPHANUM} (("."|"-") {ALPHANUM})+
+
+// hostname
+HOST       =  {ALPHANUM} ((".") {ALPHANUM})+
+
+// floating point, serial, model numbers, ip addresses, etc.
+// every other segment must have at least one digit
+NUM        = ({ALPHANUM} {P} {HAS_DIGIT}
+           | {HAS_DIGIT} {P} {ALPHANUM}
+           | {ALPHANUM} ({P} {HAS_DIGIT} {P} {ALPHANUM})+
+           | {HAS_DIGIT} ({P} {ALPHANUM} {P} {HAS_DIGIT})+
+           | {ALPHANUM} {P} {HAS_DIGIT} ({P} {ALPHANUM} {P} {HAS_DIGIT})+
+           | {HAS_DIGIT} {P} {ALPHANUM} ({P} {HAS_DIGIT} {P} {ALPHANUM})+)
+
+// punctuation
+P	         = ("_"|"-"|"/"|"."|",")
+
+// at least one digit
+HAS_DIGIT  =
+    ({LETTER}|{DIGIT})*
+    {DIGIT}
+    ({LETTER}|{DIGIT})*
+
+ALPHA      = ({LETTER})+
+
+
+LETTER     = [\u0041-\u005a\u0061-\u007a\u00c0-\u00d6\u00d8-\u00f6\u00f8-\u00ff\u0100-\u1fff\uffa0-\uffdc]
+
+DIGIT      = [\u0030-\u0039\u0660-\u0669\u06f0-\u06f9\u0966-\u096f\u09e6-\u09ef\u0a66-\u0a6f\u0ae6-\u0aef\u0b66-\u0b6f\u0be7-\u0bef\u0c66-\u0c6f\u0ce6-\u0cef\u0d66-\u0d6f\u0e50-\u0e59\u0ed0-\u0ed9\u1040-\u1049]
+
+KOREAN     = [\uac00-\ud7af\u1100-\u11ff]
+
+// Chinese, Japanese
+CJ         = [\u3040-\u318f\u3100-\u312f\u3040-\u309F\u30A0-\u30FF\u31F0-\u31FF\u3300-\u337f\u3400-\u4dbf\u4e00-\u9fff\uf900-\ufaff\uff65-\uff9f]
+
+WHITESPACE = \r\n | [ \r\n\t\f]
+
+%%
+
+{ALPHANUM}                                                     { return ALPHANUM; }
+{APOSTROPHE}                                                   { return APOSTROPHE; }
+{ACRONYM}                                                      { return ACRONYM; }
+{COMPANY}                                                      { return COMPANY; }
+{EMAIL}                                                        { return EMAIL; }
+{HOST}                                                         { return HOST; }
+{NUM}                                                          { return NUM; }
+{CJ}                                                           { return CJ; }
+{ACRONYM_DEP}                                                  { return ACRONYM_DEP; }
+
+/** Ignore the rest */
+. | {WHITESPACE}                                               { /* ignore */ }

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/StandardTokenizerTokenManager.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Analysis/Standard/StandardTokenizerTokenManager.cs?rev=671406&r1=671405&r2=671406&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/StandardTokenizerTokenManager.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/StandardTokenizerTokenManager.cs Tue Jun 24 19:53:11 2008
@@ -14,8 +14,9 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
+// {{Aroush-2.3.1}} remove this file from SVN
 /* Generated By:JavaCC: Do not edit this line. StandardTokenizerTokenManager.java */
+/*
 using System;
 
 namespace Lucene.Net.Analysis.Standard
@@ -113,7 +114,8 @@
 					ReInitRounds();
 				if (curChar < 64)
 				{
-					ulong l = ((ulong) 1L) << curChar;
+					ulong l = ((ulong) 1L) << curChar;
+
 MatchLoop: 
 					do 
 					{
@@ -463,7 +465,8 @@
 				}
 				else if (curChar < 128)
 				{
-					ulong l = ((ulong) 1L) << (curChar & 63);
+					ulong l = ((ulong) 1L) << (curChar & 63);
+
 MatchLoop1: 
 					do 
 					{
@@ -821,7 +824,8 @@
 					int i1 = hiByte >> 6;
 					ulong l1 = ((ulong) 1L) << (hiByte & 63);
 					int i2 = (curChar & 0xff) >> 6;
-					ulong l2 = ((ulong) 1L) << (curChar & 63);
+					ulong l2 = ((ulong) 1L) << (curChar & 63);
+
 MatchLoop1: 
 					do 
 					{
@@ -1421,7 +1425,7 @@
 				catch (System.IO.IOException)
 				{
 					EOFSeen = true;
-					error_after = curPos <= 1?"":input_stream.GetImage();
+					error_after = curPos <= 1 ? "" : input_stream.GetImage();
 					if (curChar == '\n' || curChar == '\r')
 					{
 						error_line++;
@@ -1433,7 +1437,7 @@
 				if (!EOFSeen)
 				{
 					input_stream.Backup(1);
-					error_after = curPos <= 1?"":input_stream.GetImage();
+					error_after = curPos <= 1 ? "" : input_stream.GetImage();
 				}
 				throw new TokenMgrError(EOFSeen, curLexState, error_line, error_column, error_after, curChar, TokenMgrError.LEXICAL_ERROR);
 
@@ -1441,4 +1445,4 @@
 			}
 		}
 	}
-}
\ No newline at end of file
+}*/
\ No newline at end of file

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/Token.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Analysis/Standard/Token.cs?rev=671406&r1=671405&r2=671406&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/Token.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/Token.cs Tue Jun 24 19:53:11 2008
@@ -14,8 +14,9 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
+// {{Aroush-2.3.1}} remove this file from SVN
 /* Generated By:JavaCC: Do not edit this line. Token.java Version 3.0 */
+/*
 using System;
 
 namespace Lucene.Net.Analysis.Standard
@@ -90,4 +91,5 @@
             }
         }
     }
-}
\ No newline at end of file
+}
+*/
\ No newline at end of file

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/TokenMgrError.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Analysis/Standard/TokenMgrError.cs?rev=671406&r1=671405&r2=671406&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/TokenMgrError.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/TokenMgrError.cs Tue Jun 24 19:53:11 2008
@@ -14,8 +14,9 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
+// {{Aroush-2.3.1}} remove this file from SVN
 /* Generated By:JavaCC: Do not edit this line. TokenMgrError.java Version 3.0 */
+/*
 using System;
 
 namespace Lucene.Net.Analysis.Standard
@@ -138,7 +139,7 @@
         /// </summary>
         protected internal static System.String LexicalError(bool EOFSeen, int lexState, int errorLine, int errorColumn, System.String errorAfter, char curChar)
         {
-            return ("Lexical error at line " + errorLine + ", column " + errorColumn + ".  Encountered: " + (EOFSeen?"<EOF> ":("\"" + addEscapes(System.Convert.ToString(curChar)) + "\"") + " (" + (int) curChar + "), ") + "after : \"" + addEscapes(errorAfter) + "\"");
+            return ("Lexical error at line " + errorLine + ", column " + errorColumn + ".  Encountered: " + (EOFSeen ? "<EOF> " : ("\"" + addEscapes(System.Convert.ToString(curChar)) + "\"") + " (" + (int) curChar + "), ") + "after : \"" + addEscapes(errorAfter) + "\"");
         }
 		
         /*
@@ -158,4 +159,5 @@
         {
         }
     }
-}
\ No newline at end of file
+}
+*/
\ No newline at end of file

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/StopAnalyzer.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Analysis/StopAnalyzer.cs?rev=671406&r1=671405&r2=671406&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/StopAnalyzer.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/StopAnalyzer.cs Tue Jun 24 19:53:11 2008
@@ -70,5 +70,44 @@
         {
             return new StopFilter(new LowerCaseTokenizer(reader), stopWords);
         }
+
+		/// <summary>Filters LowerCaseTokenizer with StopFilter. </summary>
+		private class SavedStreams
+		{
+			public SavedStreams(StopAnalyzer enclosingInstance)
+			{
+				InitBlock(enclosingInstance);
+			}
+			private void  InitBlock(StopAnalyzer enclosingInstance)
+			{
+				this.enclosingInstance = enclosingInstance;
+			}
+			private StopAnalyzer enclosingInstance;
+			public StopAnalyzer Enclosing_Instance
+			{
+				get
+				{
+					return enclosingInstance;
+				}
+				
+			}
+			internal Tokenizer source;
+			internal TokenStream result;
+		}
+		
+		public override TokenStream ReusableTokenStream(System.String fieldName, System.IO.TextReader reader)
+		{
+			SavedStreams streams = (SavedStreams) GetPreviousTokenStream();
+			if (streams == null)
+			{
+				streams = new SavedStreams(this);
+				streams.source = new LowerCaseTokenizer(reader);
+				streams.result = new StopFilter(streams.source, stopWords);
+				SetPreviousTokenStream(streams);
+			}
+			else
+				streams.source.Reset(reader);
+			return streams.result;
+		}
     }
 }
\ No newline at end of file

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/StopFilter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Analysis/StopFilter.cs?rev=671406&r1=671405&r2=671406&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/StopFilter.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/StopFilter.cs Tue Jun 24 19:53:11 2008
@@ -25,92 +25,159 @@
     public sealed class StopFilter : TokenFilter
     {
 		
-        private System.Collections.Hashtable stopWords;
-        private bool ignoreCase;
+		private static bool ENABLE_POSITION_INCREMENTS_DEFAULT = false;
+		
+		private CharArraySet stopWords;
+		private bool enablePositionIncrements = ENABLE_POSITION_INCREMENTS_DEFAULT;
 		
         /// <summary> Construct a token stream filtering the given input.</summary>
         public StopFilter(TokenStream input, System.String[] stopWords) : this(input, stopWords, false)
         {
         }
-		
-        /// <summary> Constructs a filter which removes words from the input
-        /// TokenStream that are named in the array of words.
-        /// </summary>
-        public StopFilter(TokenStream in_Renamed, System.String[] stopWords, bool ignoreCase) : base(in_Renamed)
-        {
-            this.ignoreCase = ignoreCase;
-            this.stopWords = MakeStopSet(stopWords, ignoreCase);
-        }
-		
 
-        /// <summary> Construct a token stream filtering the given input.</summary>
-        /// <param name="input">
-        /// </param>
-        /// <param name="stopWords">The set of Stop Words, as Strings.  If ignoreCase is true, all strings should be lower cased
-        /// </param>
-        /// <param name="ignoreCase">-Ignore case when stopping.  The stopWords set must be setup to contain only lower case words 
-        /// </param>
-        public StopFilter(TokenStream input, System.Collections.Hashtable stopWords, bool ignoreCase) : base(input)
-        {
-            this.ignoreCase = ignoreCase;
-            this.stopWords = stopWords;
-        }
-		
         /// <summary> Constructs a filter which removes words from the input
-        /// TokenStream that are named in the Set.
-        /// It is crucial that an efficient Set implementation is used
-        /// for maximum performance.
-        /// 
-        /// </summary>
-        /// <seealso cref="MakeStopSet(String[])">
-        /// </seealso>
-        public StopFilter(TokenStream in_Renamed, System.Collections.Hashtable stopWords) : this(in_Renamed, stopWords, false)
-        {
-        }
-		
-        /// <summary> Builds a Set from an array of stop words,
-        /// appropriate for passing into the StopFilter constructor.
-        /// This permits this stopWords construction to be cached once when
-        /// an Analyzer is constructed.
-        /// 
+        /// TokenStream that are named in the array of words.
         /// </summary>
-        /// <seealso cref="MakeStopSet(String[], boolean) passing false to ignoreCase">
-        /// </seealso>
-        public static System.Collections.Hashtable MakeStopSet(System.String[] stopWords)
-        {
-            return MakeStopSet(stopWords, false);
-        }
-		
-        /// <summary> </summary>
-        /// <param name="stopWords">
-        /// </param>
-        /// <param name="ignoreCase">If true, all words are lower cased first.  
-        /// </param>
-        /// <returns> a Set containing the words
-        /// </returns>
-        public static System.Collections.Hashtable MakeStopSet(System.String[] stopWords, bool ignoreCase)
-        {
-            System.Collections.Hashtable stopTable = new System.Collections.Hashtable(stopWords.Length);
-            for (int i = 0; i < stopWords.Length; i++)
-            {
-                System.String tmp = ignoreCase ? stopWords[i].ToLower() : stopWords[i];
-                stopTable.Add(tmp, tmp);
-            }
-            return stopTable;
-        }
-		
-        /// <summary> Returns the next input Token whose termText() is not a stop word.</summary>
-        public override Token Next()
-        {
-            // return the first non-stop word found
-            for (Token token = input.Next(); token != null; token = input.Next())
-            {
-                System.String termText = ignoreCase ? token.termText.ToLower() : token.termText;
-                if (!stopWords.Contains(termText))
-                    return token;
-            }
-            // reached EOS -- return null
-            return null;
-        }
-    }
+		public StopFilter(TokenStream in_Renamed, System.String[] stopWords, bool ignoreCase) : base(in_Renamed)
+		{
+			this.stopWords = (CharArraySet) MakeStopSet(stopWords, ignoreCase);
+		}
+		
+		
+		/// <summary> Construct a token stream filtering the given input.
+		/// If <code>stopWords</code> is an instance of {@link CharArraySet} (true if
+		/// <code>makeStopSet()</code> was used to construct the set) it will be directly used
+		/// and <code>ignoreCase</code> will be ignored since <code>CharArraySet</code>
+		/// directly controls case sensitivity.
+		/// <p/>
+		/// If <code>stopWords</code> is not an instance of {@link CharArraySet},
+		/// a new CharArraySet will be constructed and <code>ignoreCase</code> will be
+		/// used to specify the case sensitivity of that set.
+		/// 
+		/// </summary>
+		/// <param name="input">
+		/// </param>
+		/// <param name="stopWords">The set of Stop Words.
+		/// </param>
+		/// <param name="ignoreCase">-Ignore case when stopping.
+		/// </param>
+		public StopFilter(TokenStream input, System.Collections.Hashtable stopWords, bool ignoreCase) : base(input)
+		{
+			if (stopWords is CharArraySet)
+			{
+				this.stopWords = (CharArraySet) stopWords;
+			}
+			else
+			{
+				this.stopWords = new CharArraySet(stopWords.Count, ignoreCase);
+				for (int i = 0; i < stopWords.Count; i++)
+				{
+					this.stopWords.Add(stopWords[i]);
+				}
+			}
+		}
+		
+		/// <summary> Constructs a filter which removes words from the input
+		/// TokenStream that are named in the Set.
+		/// 
+		/// </summary>
+		/// <seealso cref="MakeStopSet(java.lang.String[])">
+		/// </seealso>
+		public StopFilter(TokenStream in_Renamed, System.Collections.Hashtable stopWords) : this(in_Renamed, stopWords, false)
+		{
+		}
+		
+		/// <summary> Builds a Set from an array of stop words,
+		/// appropriate for passing into the StopFilter constructor.
+		/// This permits this stopWords construction to be cached once when
+		/// an Analyzer is constructed.
+		/// 
+		/// </summary>
+		/// <seealso cref="MakeStopSet(java.lang.String[], boolean) passing false to ignoreCase">
+		/// </seealso>
+		public static System.Collections.Hashtable MakeStopSet(System.String[] stopWords)
+		{
+			return MakeStopSet(stopWords, false);
+		}
+		
+		/// <summary> </summary>
+		/// <param name="stopWords">
+		/// </param>
+		/// <param name="ignoreCase">If true, all words are lower cased first.  
+		/// </param>
+		/// <returns> a Set containing the words
+		/// </returns>
+		public static System.Collections.Hashtable MakeStopSet(System.String[] stopWords, bool ignoreCase)
+		{
+			CharArraySet stopSet = new CharArraySet(stopWords.Length, ignoreCase);
+			for (int i = 0; i < stopWords.Length; i++)
+			{
+				stopSet.Add(stopWords[i]);
+			}
+			return stopSet;
+		}
+		
+		/// <summary> Returns the next input Token whose termText() is not a stop word.</summary>
+		public override Token Next(Token result)
+		{
+			// return the first non-stop word found
+			int skippedPositions = 0;
+			while ((result = input.Next(result)) != null)
+			{
+				if (!stopWords.Contains(result.TermBuffer(), 0, result.termLength))
+				{
+					if (enablePositionIncrements)
+					{
+						result.SetPositionIncrement(result.GetPositionIncrement() + skippedPositions);
+					}
+					return result;
+				}
+				skippedPositions += result.GetPositionIncrement();
+			}
+			// reached EOS -- return null
+			return null;
+		}
+		
+		/// <seealso cref="setEnablePositionIncrementsDefault(boolean).">
+		/// </seealso>
+		public static bool GetEnablePositionIncrementsDefault()
+		{
+			return ENABLE_POSITION_INCREMENTS_DEFAULT;
+		}
+		
+		/// <summary> Set the default position increments behavior of every StopFilter created from now on.
+		/// <p>
+		/// Note: behavior of a single StopFilter instance can be modified 
+		/// with {@link #SetEnablePositionIncrements(boolean)}.
+		/// This static method allows control over behavior of classes using StopFilters internally, 
+		/// for example {@link Lucene.Net.Analysis.Standard.StandardAnalyzer StandardAnalyzer}. 
+		/// <p>
+		/// Default : false.
+		/// </summary>
+		/// <seealso cref="setEnablePositionIncrements(boolean).">
+		/// </seealso>
+		public static void  SetEnablePositionIncrementsDefault(bool defaultValue)
+		{
+			ENABLE_POSITION_INCREMENTS_DEFAULT = defaultValue;
+		}
+		
+		/// <seealso cref="setEnablePositionIncrements(boolean).">
+		/// </seealso>
+		public bool GetEnablePositionIncrements()
+		{
+			return enablePositionIncrements;
+		}
+		
+		/// <summary> Set to <code>true</code> to make <b>this</b> StopFilter enable position increments to result tokens.
+		/// <p>
+		/// When set, when a token is stopped (omitted), the position increment of 
+		/// the following token is incremented.  
+		/// <p>
+		/// Default: see {@link #SetEnablePositionIncrementsDefault(boolean)}.
+		/// </summary>
+		public void  SetEnablePositionIncrements(bool enable)
+		{
+			this.enablePositionIncrements = enable;
+		}
+	}
 }
\ No newline at end of file

Added: incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/TeeTokenFilter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Analysis/TeeTokenFilter.cs?rev=671406&view=auto
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/TeeTokenFilter.cs (added)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/TeeTokenFilter.cs Tue Jun 24 19:53:11 2008
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Analysis
+{
+	
+	
+	/// <summary> Works in conjunction with the SinkTokenizer to provide the ability to set aside tokens
+	/// that have already been analyzed.  This is useful in situations where multiple fields share
+	/// many common analysis steps and then go their separate ways.
+	/// <p/>
+	/// It is also useful for doing things like entity extraction or proper noun analysis as
+	/// part of the analysis workflow and saving off those tokens for use in another field.
+	/// 
+	/// <pre>
+	/// SinkTokenizer sink1 = new SinkTokenizer(null);
+	/// SinkTokenizer sink2 = new SinkTokenizer(null);
+	/// TokenStream source1 = new TeeTokenFilter(new TeeTokenFilter(new WhitespaceTokenizer(reader1), sink1), sink2);
+	/// TokenStream source2 = new TeeTokenFilter(new TeeTokenFilter(new WhitespaceTokenizer(reader2), sink1), sink2);
+	/// TokenStream final1 = new LowerCaseFilter(source1);
+	/// TokenStream final2 = source2;
+	/// TokenStream final3 = new EntityDetect(sink1);
+	/// TokenStream final4 = new URLDetect(sink2);
+	/// d.add(new Field("f1", final1));
+	/// d.add(new Field("f2", final2));
+	/// d.add(new Field("f3", final3));
+	/// d.add(new Field("f4", final4));
+	/// </pre>
+	/// In this example, sink1 and sink2 will both get tokens from both reader1 and reader2 after whitespace tokenizer
+	/// and now we can further wrap any of these in extra analysis, and more "sources" can be inserted if desired.
+	/// Note, the EntityDetect and URLDetect TokenStreams are for the example and do not currently exist in Lucene
+	/// <p/>
+	/// 
+	/// See http://issues.apache.org/jira/browse/LUCENE-1058
+	/// </summary>
+	/// <seealso cref="SinkTokenizer">
+	/// 
+	/// 
+	/// </seealso>
+	public class TeeTokenFilter : TokenFilter
+	{
+		internal SinkTokenizer sink;
+		
+		public TeeTokenFilter(TokenStream input, SinkTokenizer sink) : base(input)
+		{
+			this.sink = sink;
+		}
+		
+		public override Token Next(Token result)
+		{
+			Token t = input.Next(result);
+			sink.Add(t);
+			return t;
+		}
+	}
+}
\ No newline at end of file