You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by cc...@apache.org on 2011/11/21 05:44:59 UTC
[Lucene.Net] svn commit: r1204353 [6/9] - in /incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src: contrib/Analyzers/ contrib/Analyzers/AR/ contrib/Analyzers/BR/ contrib/Analyzers/CJK/ contrib/Analyzers/Cn/ contrib/Analyzers/Compound/ contrib/Analyzers/Compoun...

Modified: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/NGram/EdgeNGramTokenizer.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/NGram/EdgeNGramTokenizer.cs?rev=1204353&r1=1204352&r2=1204353&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/NGram/EdgeNGramTokenizer.cs (original)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/NGram/EdgeNGramTokenizer.cs Mon Nov 21 04:44:55 2011
@@ -1,4 +1,4 @@
-ï»¿/*
+ï»¿/*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
@@ -32,7 +32,7 @@ namespace Lucene.Net.Analysis.NGram
      * MaxGram can't be larger than 1024 because of limitation.
      * </p>
      */
-    public class EdgeNGramTokenizer : Tokenizer
+    public sealed class EdgeNGramTokenizer : Tokenizer
     {
         public static Side DEFAULT_SIDE = Side.FRONT;
         public static int DEFAULT_MAX_GRAM_SIZE = 1;
@@ -41,38 +41,8 @@ namespace Lucene.Net.Analysis.NGram
         private TermAttribute termAtt;
         private OffsetAttribute offsetAtt;
 
-        // Replace this with an enum when the Java 1.5 upgrade is made, the impl will be simplified
         /** Specifies which side of the input the n-gram should be generated from */
-        public class Side
-        {
-            private string label;
-
-            /** Get the n-gram from the front of the input */
-            public static Side FRONT = new Side("front");
-
-            /** Get the n-gram from the end of the input */
-            public static Side BACK = new Side("back");
-
-            // Private ctor
-            private Side(string label) { this.label = label; }
-
-
-            public string getLabel() { return label; }
-
-            // Get the appropriate Side from a string
-            public static Side getSide(string sideName)
-            {
-                if (FRONT.getLabel().Equals(sideName))
-                {
-                    return FRONT;
-                }
-                else if (BACK.getLabel().Equals(sideName))
-                {
-                    return BACK;
-                }
-                return null;
-            }
-        }
+        // Moved Side enum from this class to external definition
 
         private int minGram;
         private int maxGram;
@@ -138,7 +108,7 @@ namespace Lucene.Net.Analysis.NGram
          * <param name="maxGram">the largest n-gram to generate</param>
          */
         public EdgeNGramTokenizer(TextReader input, string sideLabel, int minGram, int maxGram)
-            : this(input, Side.getSide(sideLabel), minGram, maxGram)
+            : this(input, SideExtensions.GetSide(sideLabel), minGram, maxGram)
         {
 
         }
@@ -153,7 +123,7 @@ namespace Lucene.Net.Analysis.NGram
          * <param name="maxGram">the largest n-gram to generate</param>
          */
         public EdgeNGramTokenizer(AttributeSource source, TextReader input, string sideLabel, int minGram, int maxGram)
-            : this(source, input, Side.getSide(sideLabel), minGram, maxGram)
+            : this(source, input, SideExtensions.GetSide(sideLabel), minGram, maxGram)
         {
 
         }
@@ -168,7 +138,7 @@ namespace Lucene.Net.Analysis.NGram
          * <param name="maxGram">the largest n-gram to generate</param>
          */
         public EdgeNGramTokenizer(AttributeFactory factory, TextReader input, string sideLabel, int minGram, int maxGram) :
-            this(factory, input, Side.getSide(sideLabel), minGram, maxGram)
+            this(factory, input, SideExtensions.GetSide(sideLabel), minGram, maxGram)
         {
         }
 
@@ -193,8 +163,8 @@ namespace Lucene.Net.Analysis.NGram
             this.maxGram = maxGram;
             this.side = side;
 
-            this.termAtt = (TermAttribute)AddAttribute(typeof(TermAttribute));
-            this.offsetAtt = (OffsetAttribute)AddAttribute(typeof(OffsetAttribute));
+            this.termAtt = AddAttribute<TermAttribute>();
+            this.offsetAtt = AddAttribute<OffsetAttribute>();
 
         }
 
@@ -240,22 +210,6 @@ namespace Lucene.Net.Analysis.NGram
             this.offsetAtt.SetOffset(finalOffset, finalOffset);
         }
 
-        /** @deprecated Will be removed in Lucene 3.0. This method is final, as it should
-         * not be overridden. Delegates to the backwards compatibility layer. */
-        [System.Obsolete("Will be removed in Lucene 3.0. This method is final, as it should not be overridden. Delegates to the backwards compatibility layer.")]
-        public override Token Next(Token reusableToken)
-        {
-            return base.Next(reusableToken);
-        }
-
-        /** @deprecated Will be removed in Lucene 3.0. This method is final, as it should
-         * not be overridden. Delegates to the backwards compatibility layer. */
-        [System.Obsolete("Will be removed in Lucene 3.0. This method is final, as it should not be overridden. Delegates to the backwards compatibility layer.")]
-        public override Token Next()
-        {
-            return base.Next();
-        }
-
         public override void Reset(TextReader input)
         {
             base.Reset(input);

Modified: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/NGram/NGramTokenFilter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/NGram/NGramTokenFilter.cs?rev=1204353&r1=1204352&r2=1204353&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/NGram/NGramTokenFilter.cs (original)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/NGram/NGramTokenFilter.cs Mon Nov 21 04:44:55 2011
@@ -1,4 +1,4 @@
-ï»¿/*
+ï»¿/*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
@@ -24,11 +24,10 @@ using Lucene.Net.Util;
 
 namespace Lucene.Net.Analysis.NGram
 {
-
     /**
      * Tokenizes the input into n-grams of the given size(s).
      */
-    public class NGramTokenFilter : TokenFilter
+    public sealed class NGramTokenFilter : TokenFilter
     {
         public static int DEFAULT_MIN_NGRAM_SIZE = 1;
         public static int DEFAULT_MAX_NGRAM_SIZE = 2;
@@ -65,8 +64,8 @@ namespace Lucene.Net.Analysis.NGram
             this.minGram = minGram;
             this.maxGram = maxGram;
 
-            this.termAtt = (TermAttribute)AddAttribute(typeof(TermAttribute));
-            this.offsetAtt = (OffsetAttribute)AddAttribute(typeof(OffsetAttribute));
+            this.termAtt = AddAttribute<TermAttribute>();
+            this.offsetAtt = AddAttribute<OffsetAttribute>();
         }
 
         /**
@@ -116,22 +115,6 @@ namespace Lucene.Net.Analysis.NGram
             }
         }
 
-        /** @deprecated Will be removed in Lucene 3.0. This method is final, as it should
-         * not be overridden. Delegates to the backwards compatibility layer. */
-        [System.Obsolete("Will be removed in Lucene 3.0. This method is final, as it should not be overridden. Delegates to the backwards compatibility layer.")]
-        public override Token Next(Token reusableToken)
-        {
-            return base.Next(reusableToken);
-        }
-
-        /** @deprecated Will be removed in Lucene 3.0. This method is final, as it should
-         * not be overridden. Delegates to the backwards compatibility layer. */
-        [System.Obsolete("Will be removed in Lucene 3.0. This method is final, as it should not be overridden. Delegates to the backwards compatibility layer.")]
-        public override Token Next()
-        {
-            return base.Next();
-        }
-
         public override void Reset()
         {
             base.Reset();

Modified: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/NGram/NGramTokenizer.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/NGram/NGramTokenizer.cs?rev=1204353&r1=1204352&r2=1204353&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/NGram/NGramTokenizer.cs (original)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/NGram/NGramTokenizer.cs Mon Nov 21 04:44:55 2011
@@ -1,4 +1,4 @@
-ï»¿/*
+ï»¿/*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
@@ -28,7 +28,7 @@ namespace Lucene.Net.Analysis.NGram
     /**
      * Tokenizes the input into n-grams of the given size(s).
      */
-    public class NGramTokenizer : Tokenizer
+    public sealed class NGramTokenizer : Tokenizer
     {
         public static int DEFAULT_MIN_NGRAM_SIZE = 1;
         public static int DEFAULT_MAX_NGRAM_SIZE = 2;
@@ -104,8 +104,8 @@ namespace Lucene.Net.Analysis.NGram
             this.minGram = minGram;
             this.maxGram = maxGram;
 
-            this.termAtt = (TermAttribute)AddAttribute(typeof(TermAttribute));
-            this.offsetAtt = (OffsetAttribute)AddAttribute(typeof(OffsetAttribute));
+            this.termAtt = AddAttribute<TermAttribute>();
+            this.offsetAtt = AddAttribute<OffsetAttribute>();
         }
 
         /** Returns the next token in the stream, or null at EOS. */
@@ -145,22 +145,6 @@ namespace Lucene.Net.Analysis.NGram
             this.offsetAtt.SetOffset(finalOffset, finalOffset);
         }
 
-        /** @deprecated Will be removed in Lucene 3.0. This method is final, as it should
-         * not be overridden. Delegates to the backwards compatibility layer. */
-        [System.Obsolete("Will be removed in Lucene 3.0. This method is final, as it should not be overridden. Delegates to the backwards compatibility layer.")]
-        public override Token Next(Token reusableToken)
-        {
-            return base.Next(reusableToken);
-        }
-
-        /** @deprecated Will be removed in Lucene 3.0. This method is final, as it should
-         * not be overridden. Delegates to the backwards compatibility layer. */
-        [System.Obsolete("Will be removed in Lucene 3.0. This method is final, as it should not be overridden. Delegates to the backwards compatibility layer.")]
-        public override Token Next()
-        {
-            return base.Next();
-        }
-
         public override void Reset(TextReader input)
         {
             base.Reset(input);

Modified: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Nl/DutchAnalyzer.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Nl/DutchAnalyzer.cs?rev=1204353&r1=1204352&r2=1204353&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Nl/DutchAnalyzer.cs (original)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Nl/DutchAnalyzer.cs Mon Nov 21 04:44:55 2011
@@ -20,198 +20,269 @@
 */
 
 using System;
+using System.Collections.Generic;
 using System.IO;
 using System.Collections;
 using Lucene.Net.Analysis.Standard;
+using Lucene.Net.Support;
+using Version = Lucene.Net.Util.Version;
 
 namespace Lucene.Net.Analysis.Nl
 {
-
-	/* ====================================================================
-	 * The Apache Software License, Version 1.1
-	 *
-	 * Copyright (c) 2001 The Apache Software Foundation.  All rights
-	 * reserved.
-	 *
-	 * Redistribution and use in source and binary forms, with or without
-	 * modification, are permitted provided that the following conditions
-	 * are met:
-	 *
-	 * 1. Redistributions of source code must retain the above copyright
-	 *    notice, this list of conditions and the following disclaimer.
-	 *
-	 * 2. Redistributions in binary form must reproduce the above copyright
-	 *    notice, this list of conditions and the following disclaimer in
-	 *    the documentation and/or other materials provided with the
-	 *    distribution.
-	 *
-	 * 3. The end-user documentation included with the redistribution,
-	 *    if any, must include the following acknowledgment:
-	 *       "This product includes software developed by the
-	 *        Apache Software Foundation (http://www.apache.org/)."
-	 *    Alternately, this acknowledgment may appear in the software itself,
-	 *    if and wherever such third-party acknowledgments normally appear.
-	 *
-	 * 4. The names "Apache" and "Apache Software Foundation" and
-	 *    "Apache Lucene" must not be used to endorse or promote products
-	 *    derived from this software without prior written permission. For
-	 *    written permission, please contact apache@apache.org.
-	 *
-	 * 5. Products derived from this software may not be called "Apache",
-	 *    "Apache Lucene", nor may "Apache" appear in their name, without
-	 *    prior written permission of the Apache Software Foundation.
-	 *
-	 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
-	 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
-	 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-	 * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
-	 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-	 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-	 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
-	 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-	 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-	 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
-	 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-	 * SUCH DAMAGE.
-	 * ====================================================================
-	 *
-	 * This software consists of voluntary contributions made by many
-	 * individuals on behalf of the Apache Software Foundation.  For more
-	 * information on the Apache Software Foundation, please see
-	 * <http://www.apache.org/>.
-	 */
-
-	/// <summary>
-	/// Analyzer for Dutch language. Supports an external list of stopwords (words that
-	/// will not be indexed at all), an external list of exclusions (word that will
-	/// not be stemmed, but indexed) and an external list of word-stem pairs that overrule
-	/// the algorithm (dictionary stemming).
-	/// A default set of stopwords is used unless an alternative list is specified, the
-	/// exclusion list is empty by default. 
-	/// <version>$Id: DutchAnalyzer.java,v 1.1 2004/03/09 14:55:08 otis Exp $</version>
-	/// </summary>
-	/// <author>Edwin de Jonge</author>
-	public class DutchAnalyzer : Analyzer
-	{
-		/// <summary>
-		/// List of typical german stopwords.
-		/// </summary>
-		public static string[] DUTCH_STOP_WORDS = 
-		{
-       "de","en","van","ik","te","dat","die","in","een",
-       "hij","het","niet","zijn","is","was","op","aan","met","als","voor","had",
-       "er","maar","om","hem","dan","zou","of","wat","mijn","men","dit","zo",
-       "door","over","ze","zich","bij","ook","tot","je","mij","uit","der","daar",
-       "haar","naar","heb","hoe","heeft","hebben","deze","u","want","nog","zal",
-       "me","zij","nu","ge","geen","omdat","iets","worden","toch","al","waren",
-       "veel","meer","doen","toen","moet","ben","zonder","kan","hun","dus",
-       "alles","onder","ja","eens","hier","wie","werd","altijd","doch","wordt",
-       "wezen","kunnen","ons","zelf","tegen","na","reeds","wil","kon","niets",
-       "uw","iemand","geweest","andere"		
-		};
-		/// <summary>
-		/// Contains the stopwords used with the StopFilter. 
-		/// </summary>
-		private Hashtable stoptable = new Hashtable();
-
-		/// <summary>
-		/// Contains words that should be indexed but not stemmed. 
-		/// </summary>
-		private Hashtable excltable = new Hashtable();
-
-		private Hashtable _stemdict = new Hashtable();
-
-		/// <summary>
-		/// Builds an analyzer. 
-		/// </summary>
-		public DutchAnalyzer()
-		{
-			stoptable = StopFilter.MakeStopSet( DUTCH_STOP_WORDS );
-			_stemdict.Add("fiets","fiets"); //otherwise fiet
-			_stemdict.Add("bromfiets","bromfiets"); //otherwise bromfiet
-			_stemdict.Add("ei","eier"); 
-			_stemdict.Add("kind","kinder");
-		}
-
-		/// <summary>
-		/// Builds an analyzer with the given stop words. 
-		/// </summary>
-		/// <param name="stopwords"></param>
-		public DutchAnalyzer( String[] stopwords )
-		{
-			stoptable = StopFilter.MakeStopSet( stopwords );
-		}
-
-		/// <summary>
-		/// Builds an analyzer with the given stop words. 
-		/// </summary>
-		/// <param name="stopwords"></param>
-		public DutchAnalyzer( Hashtable stopwords )
-		{
-			stoptable = stopwords;
-		}
-
-		/// <summary>
-		/// Builds an analyzer with the given stop words. 
-		/// </summary>
-		/// <param name="stopwords"></param>
-		public DutchAnalyzer( FileInfo stopwords )
-		{
-			stoptable = WordlistLoader.GetWordtable( stopwords );
-		}
-
-		/// <summary>
-		/// Builds an exclusionlist from an array of Strings. 
-		/// </summary>
-		/// <param name="exclusionlist"></param>
-		public void SetStemExclusionTable( String[] exclusionlist )
-		{
-			excltable = StopFilter.MakeStopSet( exclusionlist );
-		}
-
-		/// <summary>
-		/// Builds an exclusionlist from a Hashtable. 
-		/// </summary>
-		/// <param name="exclusionlist"></param>
-		public void SetStemExclusionTable( Hashtable exclusionlist )
-		{
-			excltable = exclusionlist;
-		}
-
-		/// <summary>
-		/// Builds an exclusionlist from the words contained in the given file. 
-		/// </summary>
-		/// <param name="exclusionlist"></param>
-		public void SetStemExclusionTable(FileInfo exclusionlist)
-		{
-			excltable = WordlistLoader.GetWordtable(exclusionlist);
-		}
-
-		/// <summary>
-		/// Reads a stemdictionary file , that overrules the stemming algorithm
-		/// This is a textfile that contains per line
-		/// word\tstem
-		/// i.e: tabseperated
-		/// </summary>
-		/// <param name="stemdict"></param>
-		public void SetStemDictionary(FileInfo stemdict)
-		{
-			_stemdict = WordlistLoader.GetStemDict(stemdict);
-		}
-
-		/// <summary>
-		/// Creates a TokenStream which tokenizes all the text in the provided TextReader. 
-		/// </summary>
-		/// <param name="fieldName"></param>
-		/// <param name="reader"></param>
-		/// <returns>A TokenStream build from a StandardTokenizer filtered with StandardFilter, StopFilter, GermanStemFilter</returns>
-		public override TokenStream TokenStream(String fieldName, TextReader reader)
-		{
-			TokenStream result = new StandardTokenizer( reader );
-			result = new StandardFilter( result );
-			result = new StopFilter( result, stoptable );
-			result = new DutchStemFilter( result, excltable, _stemdict);
-			return result;
-		}
-	}
+    /**
+ * {@link Analyzer} for Dutch language. 
+ * <p>
+ * Supports an external list of stopwords (words that
+ * will not be indexed at all), an external list of exclusions (word that will
+ * not be stemmed, but indexed) and an external list of word-stem pairs that overrule
+ * the algorithm (dictionary stemming).
+ * A default set of stopwords is used unless an alternative list is specified, but the
+ * exclusion list is empty by default.
+ * </p>
+ *
+ * <p><b>NOTE</b>: This class uses the same {@link Version}
+ * dependent settings as {@link StandardAnalyzer}.</p>
+ */
+    public class DutchAnalyzer : Analyzer
+    {
+        /**
+         * List of typical Dutch stopwords.
+         * @deprecated use {@link #getDefaultStopSet()} instead
+         */
+        public static readonly String[] DUTCH_STOP_WORDS =
+      {
+        "de", "en", "van", "ik", "te", "dat", "die", "in", "een",
+        "hij", "het", "niet", "zijn", "is", "was", "op", "aan", "met", "als", "voor", "had",
+        "er", "maar", "om", "hem", "dan", "zou", "of", "wat", "mijn", "men", "dit", "zo",
+        "door", "over", "ze", "zich", "bij", "ook", "tot", "je", "mij", "uit", "der", "daar",
+        "haar", "naar", "heb", "hoe", "heeft", "hebben", "deze", "u", "want", "nog", "zal",
+        "me", "zij", "nu", "ge", "geen", "omdat", "iets", "worden", "toch", "al", "waren",
+        "veel", "meer", "doen", "toen", "moet", "ben", "zonder", "kan", "hun", "dus",
+        "alles", "onder", "ja", "eens", "hier", "wie", "werd", "altijd", "doch", "wordt",
+        "wezen", "kunnen", "ons", "zelf", "tegen", "na", "reeds", "wil", "kon", "niets",
+        "uw", "iemand", "geweest", "andere"
+      };
+        /**
+         * Returns an unmodifiable instance of the default stop-words set.
+         * @return an unmodifiable instance of the default stop-words set.
+         */
+        public static ISet<string> getDefaultStopSet()
+        {
+            return DefaultSetHolder.DEFAULT_STOP_SET;
+        }
+
+        static class DefaultSetHolder
+        {
+            internal static readonly ISet<string> DEFAULT_STOP_SET = CharArraySet
+                .UnmodifiableSet(new CharArraySet(DUTCH_STOP_WORDS, false));
+        }
+
+
+        /**
+         * Contains the stopwords used with the StopFilter.
+         */
+        private readonly ISet<string> stoptable;
+
+        /**
+         * Contains words that should be indexed but not stemmed.
+         */
+        private ISet<string> excltable = new HashSet<string>();
+
+        private IDictionary<String, String> stemdict = new HashMap<String, String>();
+        private readonly Version matchVersion;
+
+        /**
+         * Builds an analyzer with the default stop words ({@link #DUTCH_STOP_WORDS}) 
+         * and a few default entries for the stem exclusion table.
+         * 
+         */
+        public DutchAnalyzer(Version matchVersion)
+            : this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET)
+        {
+            stemdict.Add("fiets", "fiets"); //otherwise fiet
+            stemdict.Add("bromfiets", "bromfiets"); //otherwise bromfiet
+            stemdict.Add("ei", "eier");
+            stemdict.Add("kind", "kinder");
+        }
+
+        public DutchAnalyzer(Version matchVersion, ISet<string> stopwords)
+            : this(matchVersion, stopwords, CharArraySet.EMPTY_SET)
+        {
+
+        }
+
+        public DutchAnalyzer(Version matchVersion, ISet<string> stopwords, ISet<string> stemExclusionTable)
+        {
+            stoptable = CharArraySet.UnmodifiableSet(CharArraySet.Copy(stopwords));
+            excltable = CharArraySet.UnmodifiableSet(CharArraySet.Copy(stemExclusionTable));
+            this.matchVersion = matchVersion;
+            SetOverridesTokenStreamMethod(typeof(DutchAnalyzer));
+        }
+
+        /**
+         * Builds an analyzer with the given stop words.
+         *
+         * @param matchVersion
+         * @param stopwords
+         * @deprecated use {@link #DutchAnalyzer(Version, Set)} instead
+         */
+        public DutchAnalyzer(Version matchVersion, params string[] stopwords)
+            : this(matchVersion, StopFilter.MakeStopSet(stopwords))
+        {
+
+        }
+
+        /**
+         * Builds an analyzer with the given stop words.
+         *
+         * @param stopwords
+         * @deprecated use {@link #DutchAnalyzer(Version, Set)} instead
+         */
+        public DutchAnalyzer(Version matchVersion, HashSet<string> stopwords)
+            : this(matchVersion, (ISet<string>)stopwords)
+        {
+
+        }
+
+        /**
+         * Builds an analyzer with the given stop words.
+         *
+         * @param stopwords
+         * @deprecated use {@link #DutchAnalyzer(Version, Set)} instead
+         */
+        public DutchAnalyzer(Version matchVersion, FileInfo stopwords)
+        {
+            // this is completely broken!
+            SetOverridesTokenStreamMethod(typeof(DutchAnalyzer));
+            try
+            {
+                stoptable = WordlistLoader.GetWordSet(stopwords);
+            }
+            catch (IOException e)
+            {
+                // TODO: throw IOException
+                throw new Exception("", e);
+            }
+            this.matchVersion = matchVersion;
+        }
+
+        /**
+         * Builds an exclusionlist from an array of Strings.
+         *
+         * @param exclusionlist
+         * @deprecated use {@link #DutchAnalyzer(Version, Set, Set)} instead
+         */
+        public void setStemExclusionTable(params string[] exclusionlist)
+        {
+            excltable = StopFilter.MakeStopSet(exclusionlist);
+            SetPreviousTokenStream(null); // force a new stemmer to be created
+        }
+
+        /**
+         * Builds an exclusionlist from a Hashtable.
+         * @deprecated use {@link #DutchAnalyzer(Version, Set, Set)} instead
+         */
+        public void setStemExclusionTable(HashSet<string> exclusionlist)
+        {
+            excltable = exclusionlist;
+            SetPreviousTokenStream(null); // force a new stemmer to be created
+        }
+
+        /**
+         * Builds an exclusionlist from the words contained in the given file.
+         * @deprecated use {@link #DutchAnalyzer(Version, Set, Set)} instead
+         */
+        public void setStemExclusionTable(FileInfo exclusionlist)
+        {
+            try
+            {
+                excltable = WordlistLoader.GetWordSet(exclusionlist);
+                SetPreviousTokenStream(null); // force a new stemmer to be created
+            }
+            catch (IOException e)
+            {
+                // TODO: throw IOException
+                throw new Exception("", e);
+            }
+        }
+
+        /**
+         * Reads a stemdictionary file , that overrules the stemming algorithm
+         * This is a textfile that contains per line
+         * <tt>word<b>\t</b>stem</tt>, i.e: two tab seperated words
+         */
+        public void setStemDictionary(FileInfo stemdictFile)
+        {
+            try
+            {
+                stemdict = WordlistLoader.GetStemDict(stemdictFile);
+                SetPreviousTokenStream(null); // force a new stemmer to be created
+            }
+            catch (IOException e)
+            {
+                // TODO: throw IOException
+                throw new Exception(string.Empty, e);
+            }
+        }
+
+        /**
+         * Creates a {@link TokenStream} which tokenizes all the text in the 
+         * provided {@link Reader}.
+         *
+         * @return A {@link TokenStream} built from a {@link StandardTokenizer}
+         *   filtered with {@link StandardFilter}, {@link StopFilter}, 
+         *   and {@link DutchStemFilter}
+         */
+        public override TokenStream TokenStream(String fieldName, TextReader reader)
+        {
+            TokenStream result = new StandardTokenizer(matchVersion, reader);
+            result = new StandardFilter(result);
+            result = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion),
+                                    result, stoptable);
+            result = new DutchStemFilter(result, excltable, stemdict);
+            return result;
+        }
+
+        class SavedStreams
+        {
+            protected internal Tokenizer source;
+            protected internal TokenStream result;
+        };
+
+        /**
+         * Returns a (possibly reused) {@link TokenStream} which tokenizes all the 
+         * text in the provided {@link Reader}.
+         *
+         * @return A {@link TokenStream} built from a {@link StandardTokenizer}
+         *   filtered with {@link StandardFilter}, {@link StopFilter}, 
+         *   and {@link DutchStemFilter}
+         */
+        public override TokenStream ReusableTokenStream(String fieldName, TextReader reader)
+        {
+            if (overridesTokenStreamMethod)
+            {
+                // LUCENE-1678: force fallback to tokenStream() if we
+                // have been subclassed and that subclass overrides
+                // tokenStream but not reusableTokenStream
+                return TokenStream(fieldName, reader);
+            }
+
+            SavedStreams streams = (SavedStreams)GetPreviousTokenStream();
+            if (streams == null)
+            {
+                streams = new SavedStreams();
+                streams.source = new StandardTokenizer(matchVersion, reader);
+                streams.result = new StandardFilter(streams.source);
+                streams.result = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion),
+                                                streams.result, stoptable);
+                streams.result = new DutchStemFilter(streams.result, excltable, stemdict);
+                SetPreviousTokenStream(streams);
+            }
+            else
+            {
+                streams.source.Reset(reader);
+            }
+            return streams.result;
+        }
+    }
 }
\ No newline at end of file

Modified: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Nl/DutchStemFilter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Nl/DutchStemFilter.cs?rev=1204353&r1=1204352&r2=1204353&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Nl/DutchStemFilter.cs (original)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Nl/DutchStemFilter.cs Mon Nov 21 04:44:55 2011
@@ -20,167 +20,113 @@
 */
 
 using System;
+using System.Collections.Generic;
 using System.IO;
 using System.Collections;
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Support;
 
 namespace Lucene.Net.Analysis.Nl
 {
-
-	/* ====================================================================
-	 * The Apache Software License, Version 1.1
-	 *
-	 * Copyright (c) 2001 The Apache Software Foundation.  All rights
-	 * reserved.
-	 *
-	 * Redistribution and use in source and binary forms, with or without
-	 * modification, are permitted provided that the following conditions
-	 * are met:
-	 *
-	 * 1. Redistributions of source code must retain the above copyright
-	 *    notice, this list of conditions and the following disclaimer.
-	 *
-	 * 2. Redistributions in binary form must reproduce the above copyright
-	 *    notice, this list of conditions and the following disclaimer in
-	 *    the documentation and/or other materials provided with the
-	 *    distribution.
-	 *
-	 * 3. The end-user documentation included with the redistribution,
-	 *    if any, must include the following acknowledgment:
-	 *       "This product includes software developed by the
-	 *        Apache Software Foundation (http://www.apache.org/)."
-	 *    Alternately, this acknowledgment may appear in the software itself,
-	 *    if and wherever such third-party acknowledgments normally appear.
-	 *
-	 * 4. The names "Apache" and "Apache Software Foundation" and
-	 *    "Apache Lucene" must not be used to endorse or promote products
-	 *    derived from this software without prior written permission. For
-	 *    written permission, please contact apache@apache.org.
-	 *
-	 * 5. Products derived from this software may not be called "Apache",
-	 *    "Apache Lucene", nor may "Apache" appear in their name, without
-	 *    prior written permission of the Apache Software Foundation.
-	 *
-	 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
-	 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
-	 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-	 * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
-	 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-	 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-	 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
-	 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-	 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-	 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
-	 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-	 * SUCH DAMAGE.
-	 * ====================================================================
-	 *
-	 * This software consists of voluntary contributions made by many
-	 * individuals on behalf of the Apache Software Foundation.  For more
-	 * information on the Apache Software Foundation, please see
-	 * <http://www.apache.org/>.
-	 */
-
-	/// <summary>
-	/// A filter that stems Dutch words. It supports a table of words that should
-	/// not be stemmed at all. The stemmer used can be changed at runtime after the
-	/// filter object is created (as long as it is a DutchStemmer).
-	/// 
-	/// <version>$Id: DutchStemFilter.java,v 1.1 2004/03/09 14:55:08 otis Exp $</version>
-	/// </summary>
-	/// <author>Edwin de Jonge</author>
-	public sealed class DutchStemFilter : TokenFilter
-	{
-		/// <summary>
-		/// The actual token in the input stream.
-		/// </summary>
-		private Token token = null;
-		private DutchStemmer stemmer = null;
-		private Hashtable exclusions = null;
-    
-		public DutchStemFilter( TokenStream _in ) : base(_in)
-		{
-			stemmer = new DutchStemmer();
-		}
-    
-		/// <summary>
-		/// Builds a DutchStemFilter that uses an exclusiontable. 
-		/// </summary>
-		/// <param name="_in"></param>
-		/// <param name="exclusiontable"></param>
-		public DutchStemFilter( TokenStream _in, Hashtable exclusiontable ): this(_in)
-		{
-			exclusions = exclusiontable;
-		}
-
-		/// <summary>
-		/// 
-		/// </summary>
-		/// <param name="_in"></param>
-		/// <param name="exclusiontable"></param>
-		/// <param name="stemdictionary">Dictionary of word stem pairs, that overrule the algorithm</param>
-		public DutchStemFilter( TokenStream _in, Hashtable exclusiontable , Hashtable stemdictionary): this(_in, exclusiontable)
-		{
-			stemmer.SetStemDictionary(stemdictionary);
-		}
-
-		/// <summary>
-		/// </summary>
-		/// <returns>Returns the next token in the stream, or null at EOS</returns>
-		public override Token Next()
-	
-		{
-			if ( ( token = input.Next() ) == null ) 
-			{
-				return null;
-			}
-				// Check the exclusiontable
-			else if ( exclusions != null && exclusions.Contains( token.TermText() ) ) 
-			{
-				return token;
-			}
-			else 
-			{
-				String s = stemmer.Stem( token.TermText() );
-				// If not stemmed, dont waste the time creating a new token
-				if ( !s.Equals( token.TermText() ) ) 
-				{
-					return new Token( s, token.StartOffset(),
-						token.EndOffset(), token.Type() );
-				}
-				return token;
-			}
-		}
-
-		/// <summary>
-		/// Set a alternative/custom DutchStemmer for this filter. 
-		/// </summary>
-		/// <param name="stemmer"></param>
-		public void SetStemmer( DutchStemmer stemmer )
-		{
-			if ( stemmer != null ) 
-			{
-				this.stemmer = stemmer;
-			}
-		}
-
-		/// <summary>
-		/// Set an alternative exclusion list for this filter. 
-		/// </summary>
-		/// <param name="exclusiontable"></param>
-		public void SetExclusionTable( Hashtable exclusiontable )
-		{
-			exclusions = exclusiontable;
-		}
-
-		/// <summary>
-		/// Set dictionary for stemming, this dictionary overrules the algorithm,
-		/// so you can correct for a particular unwanted word-stem pair.
-		/// </summary>
-		/// <param name="dict"></param>
-		public void SetStemDictionary(Hashtable dict)
-		{
-			if (stemmer != null)
-				stemmer.SetStemDictionary(dict);
-		}
-	}
+    /**
+ * A {@link TokenFilter} that stems Dutch words. 
+ * <p>
+ * It supports a table of words that should
+ * not be stemmed at all. The stemmer used can be changed at runtime after the
+ * filter object is created (as long as it is a {@link DutchStemmer}).
+ * </p>
+ * NOTE: This stemmer does not implement the Snowball algorithm correctly,
+ * specifically doubled consonants. It is recommended that you consider using
+ * the "Dutch" stemmer in the snowball package instead. This stemmer will likely
+ * be deprecated in a future release.
+ */
+    public sealed class DutchStemFilter : TokenFilter
+    {
+        /**
+         * The actual token in the input stream.
+         */
+        private DutchStemmer stemmer = null;
+        private ISet<string> exclusions = null;
+
+        private TermAttribute termAtt;
+
+        public DutchStemFilter(TokenStream _in)
+            : base(_in)
+        {
+            stemmer = new DutchStemmer();
+            termAtt = AddAttribute<TermAttribute>();
+        }
+
+        /**
+         * Builds a DutchStemFilter that uses an exclusion table.
+         */
+        public DutchStemFilter(TokenStream _in, ISet<string> exclusiontable)
+            : this(_in)
+        {
+            exclusions = exclusiontable;
+        }
+
+        /**
+         * @param stemdictionary Dictionary of word stem pairs, that overrule the algorithm
+         */
+        public DutchStemFilter(TokenStream _in, ISet<string> exclusiontable, IDictionary<string, string> stemdictionary)
+            : this(_in, exclusiontable)
+        {
+            stemmer.SetStemDictionary(stemdictionary);
+        }
+
+        /**
+         * Returns the next token in the stream, or null at EOS
+         */
+        public override bool IncrementToken()
+        {
+            if (input.IncrementToken())
+            {
+                String term = termAtt.Term();
+
+                // Check the exclusion table.
+                if (exclusions == null || !exclusions.Contains(term))
+                {
+                    String s = stemmer.Stem(term);
+                    // If not stemmed, don't waste the time adjusting the token.
+                    if ((s != null) && !s.Equals(term))
+                        termAtt.SetTermBuffer(s);
+                }
+                return true;
+            }
+            else
+            {
+                return false;
+            }
+        }
+
+        /**
+         * Set a alternative/custom {@link DutchStemmer} for this filter.
+         */
+        public void SetStemmer(DutchStemmer stemmer)
+        {
+            if (stemmer != null)
+            {
+                this.stemmer = stemmer;
+            }
+        }
+
+        /**
+         * Set an alternative exclusion list for this filter.
+         */
+        public void SetExclusionTable(HashSet<string> exclusiontable)
+        {
+            exclusions = exclusiontable;
+        }
+
+        /**
+         * Set dictionary for stemming, this dictionary overrules the algorithm,
+         * so you can correct for a particular unwanted word-stem pair.
+         */
+        public void SetStemDictionary(IDictionary<string, string> dict)
+        {
+            if (stemmer != null)
+                stemmer.SetStemDictionary(dict);
+        }
+    }
 }
\ No newline at end of file

Modified: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Nl/DutchStemmer.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Nl/DutchStemmer.cs?rev=1204353&r1=1204352&r2=1204353&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Nl/DutchStemmer.cs (original)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Nl/DutchStemmer.cs Mon Nov 21 04:44:55 2011
@@ -23,484 +23,439 @@ using System;
 using System.IO;
 using System.Text;
 using System.Collections;
+using System.Collections.Generic;
 
 namespace Lucene.Net.Analysis.Nl
 {
-
-	/* ====================================================================
-	 * The Apache Software License, Version 1.1
-	 *
-	 * Copyright (c) 2001 The Apache Software Foundation.  All rights
-	 * reserved.
-	 *
-	 * Redistribution and use in source and binary forms, with or without
-	 * modification, are permitted provided that the following conditions
-	 * are met:
-	 *
-	 * 1. Redistributions of source code must retain the above copyright
-	 *    notice, this list of conditions and the following disclaimer.
-	 *
-	 * 2. Redistributions in binary form must reproduce the above copyright
-	 *    notice, this list of conditions and the following disclaimer in
-	 *    the documentation and/or other materials provided with the
-	 *    distribution.
-	 *
-	 * 3. The end-user documentation included with the redistribution,
-	 *    if any, must include the following acknowledgment:
-	 *       "This product includes software developed by the
-	 *        Apache Software Foundation (http://www.apache.org/)."
-	 *    Alternately, this acknowledgment may appear in the software itself,
-	 *    if and wherever such third-party acknowledgments normally appear.
-	 *
-	 * 4. The names "Apache" and "Apache Software Foundation" and
-	 *    "Apache Lucene" must not be used to endorse or promote products
-	 *    derived from this software without prior written permission. For
-	 *    written permission, please contact apache@apache.org.
-	 *
-	 * 5. Products derived from this software may not be called "Apache",
-	 *    "Apache Lucene", nor may "Apache" appear in their name, without
-	 *    prior written permission of the Apache Software Foundation.
-	 *
-	 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
-	 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
-	 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-	 * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
-	 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-	 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-	 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
-	 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-	 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-	 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
-	 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-	 * SUCH DAMAGE.
-	 * ====================================================================
-	 *
-	 * This software consists of voluntary contributions made by many
-	 * individuals on behalf of the Apache Software Foundation.  For more
-	 * information on the Apache Software Foundation, please see
-	 * <http://www.apache.org/>.
-	 */
-
-	/// <summary>
-	/// A stemmer for Dutch words. The algorithm is an implementation of
-	/// the <see c="http://snowball.tartarus.org/dutch/stemmer.html">dutch stemming</see>
-	/// algorithm in snowball. Snowball is a project of Martin Porter (does Porter Stemmer ring a bell?): 
-	/// 
-	/// @version   $Id: DutchStemmer.java,v 1.1 2004/03/09 14:55:08 otis Exp $
-	/// </summary>
-	/// <author>Edwin de Jonge (ejne@cbs.nl)</author>
-	public class DutchStemmer
-	{
-		/// <summary>
-		/// Buffer for the terms while stemming them. 
-		/// </summary>
-		private StringBuilder sb = new StringBuilder();
-		private bool _removedE;
-		private Hashtable _stemDict;
-
-
-		private int _R1;
-		private int _R2;
-
-		/// <summary>
-		/// Stemms the given term to an unique <tt>discriminator</tt>.
-		/// </summary>
-		/// <param name="term">The term that should be stemmed.</param>
-		/// <returns>Discriminator for <tt>term</tt></returns>
-		//TODO convert to internal
-		public string Stem( String term )
-		{
-			term = term.ToLower();
-			if ( !IsStemmable( term ) )
-				return term;
-			if (_stemDict != null && _stemDict.Contains(term))
-				return _stemDict[term] as string;
-			// Reset the StringBuilder.
-			sb.Remove(0, sb.Length);
-			sb.Insert(0, term);
-			// Stemming starts here...
-			Substitute(sb);
-			StoreYandI(sb);
-			_R1 = GetRIndex(sb, 0);
-			_R1 = Math.Max(3,_R1);
-			Step1(sb);
-			Step2(sb);
-			_R2 = GetRIndex(sb, _R1);
-			Step3a(sb);
-			Step3b(sb);
-			Step4(sb);
-			ReStoreYandI(sb);
-			return sb.ToString();
-		}
-
-		private bool enEnding(StringBuilder sb)
-		{
-			string[] enend = new string[]{"ene","en"};
-			foreach(string end in enend)
-			{
-				string s = sb.ToString();
-				int index = s.Length - end.Length;
-				if ( s.EndsWith(end) &&
-					  index >= _R1 && 
-					  IsValidEnEnding(sb,index-1) 
-					)
-				{
-					sb.Remove(index, end.Length);
-					UnDouble(sb,index);
-					return true;
-				}
-			}
-			return false;
-		}
-
-
-		private void Step1(StringBuilder sb)
-		{
-			if (_R1 >= sb.Length)
-				return;
-
-			string s = sb.ToString();
-			int lengthR1 = sb.Length - _R1;
-			int index;
-
-			if (s.EndsWith("heden"))
-			{
-				sb.Replace("heden","heid", _R1, lengthR1);
-				return;
-			}
-
-			if (enEnding(sb))
-				return;
-			
-			if (s.EndsWith("se")              && 
-				 (index = s.Length - 2) >= _R1  &&
-				 IsValidSEnding(sb, index -1)
-				)
-			{
-				sb.Remove(index, 2);
-				return;
-			} 
-			if (s.EndsWith("s") && 
-				(index = s.Length - 1) >= _R1  &&
-				IsValidSEnding(sb, index - 1))
-			{
-				sb.Remove(index, 1);
-			}
-		}
-
-		/// <summary>
-		/// Delete suffix e if in R1 and 
-		/// preceded by a non-vowel, and then undouble the ending
-		/// </summary>
-		/// <param name="sb">string being stemmed</param>
-		private void Step2(StringBuilder sb)
-		{
-			_removedE = false;
-			if (_R1 >= sb.Length)
-				return;
-			string s = sb.ToString();
-			int index = s.Length - 1;
-			if ( index >= _R1   && 
-				 s.EndsWith("e") &&
-				 !IsVowel(sb[index-1]))
-			{
-				sb.Remove(index,1);
-				UnDouble(sb);
-				_removedE = true;
-			}
-		}
-
-		/// <summary>
-		/// Delete "heid"
-		/// </summary>
-		/// <param name="sb">string being stemmed</param>
-		private void Step3a(StringBuilder sb)
-		{
-			if (_R2 >= sb.Length)
-				return;
-			string s = sb.ToString();
-			int index = s.Length - 4;
-			if (s.EndsWith("heid")&& index >= _R2 && sb[index - 1] != 'c')
-			{
-				sb.Remove(index,4); //remove heid
-				enEnding(sb);
-			}
-		}
-
-		/// <summary>
-		/// <p>A d-suffix, or derivational suffix, enables a new word, 
-		/// often with a different grammatical category, or with a different 
-		/// sense, to be built from another word. Whether a d-suffix can be 
-		/// attached is discovered not from the rules of grammar, but by 
-		/// referring to a dictionary. So in English, ness can be added to 
-		/// certain adjectives to form corresponding nouns (littleness, 
-		/// kindness, foolishness ...) but not to all adjectives 
-		/// (not for example, to big, cruel, wise ...) d-suffixes can be 
-		/// used to change meaning, often in rather exotic ways.</p>
-		/// Remove "ing", "end", "ig", "lijk", "baar" and "bar"
-		/// </summary>
-		/// <param name="sb">string being stemmed</param>
-		private void Step3b(StringBuilder sb)
-		{
-			if (_R2 >= sb.Length)
-				return;
-			string s = sb.ToString();
-			int index;
-
-			if ((s.EndsWith("end") || s.EndsWith("ing")) &&
-      		 (index = s.Length - 3) >= _R2
-				)
-			{
-				sb.Remove(index,3);
-				if (sb[index - 2] == 'i' && 
-					 sb[index - 1] == 'g')
-				{
-					if (sb[index - 3] != 'e' & index-2 >= _R2)
-					{
-						index -= 2;
-						sb.Remove(index,2);
-					}
-				}
-				else
-				{
-					UnDouble(sb,index);
-				}
-				return;
-			}
-			if ( s.EndsWith("ig")    &&
-				  (index = s.Length - 2) >= _R2
-				)
-			{
-				if (sb[index - 1] != 'e')
-					sb.Remove(index, 2);
-				return;
-			}
-			if (s.EndsWith("lijk") &&
-				 (index = s.Length - 4) >= _R2
-				)
-			{
-				sb.Remove(index, 4);
-				Step2(sb);
-				return;
-			}
-			if (s.EndsWith("baar") &&
-				(index = s.Length - 4) >= _R2
-				)
-			{
-				sb.Remove(index, 4);
-				return;
-			}
-			if (s.EndsWith("bar")  &&
-				 (index = s.Length - 3) >= _R2
-				)
-			{
-				if (_removedE)
-					sb.Remove(index, 3);
-				return;
-			}
-		}
-
-		/// <summary>
-		/// undouble vowel 
-		/// If the words ends CVD, where C is a non-vowel, D is a non-vowel other than I, and V is double a, e, o or u, remove one of the vowels from V (for example, maan -> man, brood -> brod). 
-		/// </summary>
-		/// <param name="sb">string being stemmed</param>
-		private void Step4(StringBuilder sb)
-		{
-			if (sb.Length < 4)
-				return;
-			string end = sb.ToString(sb.Length - 4,4);
-			char c = end[0];
-			char v1 = end[1];
-			char v2 = end[2];
-			char d = end[3];
-			if (v1 == v2    &&
-				 d != 'I'    &&
-				 v1 != 'i'    &&
-				 IsVowel(v1) &&
-				!IsVowel(d)  &&
-				!IsVowel(c))
-			{
-				sb.Remove(sb.Length - 2, 1);
-			}
-		}
-
-		/// <summary>
-		/// Checks if a term could be stemmed.
-		/// </summary>
-		/// <param name="term"></param>
-		/// <returns>true if, and only if, the given term consists in letters.</returns>
-		private bool IsStemmable( String term )
-		{
-			for ( int c = 0; c < term.Length; c++ ) 
-			{
-				if ( !Char.IsLetter(term[c])) return false;
-			}
-			return true;
-		}
-
-		/// <summary>
-		/// Substitute Ã¤, Ã«, Ã¯, Ã¶, Ã¼, Ã¡ , Ã©, Ã, Ã³, Ãº
-		/// </summary>
-		/// <param name="buffer"></param>
-		private void Substitute( StringBuilder buffer )
-		{
-			for ( int i = 0; i < buffer.Length; i++ ) 
-			{
-				switch (buffer[i])
-				{
-					case 'Ã¤':
-					case 'Ã¡':
-					{
-						buffer[i] = 'a';
-						break;
-					}
-					case 'Ã«':
-					case 'Ã©':
-					{
-						buffer[i] = 'e';
-						break;
-					}
-					case 'Ã¼':
-					case 'Ãº':
-					{
-						buffer[i] = 'u';
-						break;
-					}
-					case 'Ã¯':
-					case 'i':
-					{
-						buffer[i] = 'i';
-						break;
-					}
-					case 'Ã¶':
-					case 'Ã³':
-					{
-						buffer[i] = 'o';
-						break;
-					}
-				}
-			}
-		}
-
-//		private bool IsValidSEnding(StringBuilder sb)
-//		{
-//			return  IsValidSEnding(sb,sb.Length - 1);
-//		}
-
-		private bool IsValidSEnding(StringBuilder sb, int index)
-		{
-			char c = sb[index];
-			if (IsVowel(c) || c == 'j')
-				return false;
-			return true;
-		}
-
-//		private bool IsValidEnEnding(StringBuilder sb)
-//		{
-//			return IsValidEnEnding(sb,sb.Length - 1);
-//		}
-
-		private bool IsValidEnEnding(StringBuilder sb, int index)
-		{
-			char c = sb[index];
-			if (IsVowel(c))
-				return false;
-			if (c < 3)
-				return false;
-			// ends with "gem"?
-			if (c == 'm' && sb[index - 2] == 'g' && sb[index-1] == 'e')
-				return false;
-			return true;
-		}
-
-		private void UnDouble(StringBuilder sb)
-		{
-			UnDouble(sb, sb.Length);
-		}
-
-		private void UnDouble(StringBuilder sb, int endIndex)
-		{
-			string s = sb.ToString(0, endIndex);
-			if (s.EndsWith("kk") || s.EndsWith("tt") || s.EndsWith("dd") || s.EndsWith("nn") || s.EndsWith("mm") || s.EndsWith("ff"))
-			{
-				sb.Remove(endIndex-1,1);
-			}
-		}
-
-		private int GetRIndex(StringBuilder sb, int start)
-		{
-			if (start == 0) 
-				start = 1;
-			int i = start;
-			for (; i < sb.Length; i++)
-			{
-				//first non-vowel preceded by a vowel
-				if (!IsVowel(sb[i]) && IsVowel(sb[i-1]))
-				{
-					return i + 1;
-				}
-			}
-			return i + 1;
-		}
-
-		private void StoreYandI(StringBuilder sb)
-		{
-			if (sb[0] == 'y')
-				sb[0] = 'Y';
-			//char c;
-			int last = sb.Length - 1;
-			for (int i = 1; i < last; i++)
-			{
-				switch (sb[i])
-				{
-					case 'i':
-					{
-						if (IsVowel(sb[i-1]) && 
-							IsVowel(sb[i+1])
-							)
-							sb[i] = 'I';
-						break;
-					}
-					case 'y':
-					{
-						if (IsVowel(sb[i-1]))
-							sb[i] = 'Y';
-						break;
-					}
-				}
-			}
-			if (last > 0 && sb[last]=='y' && IsVowel(sb[last-1]))
-				sb[last]='Y';
-		}
-
-		private void ReStoreYandI(StringBuilder sb)
-		{
-			sb.Replace("I","i");
-			sb.Replace("Y","y");
-		}
-
-		private bool IsVowel(char c)
-		{
-			switch (c)
-			{
-				case 'e':
-				case 'a':
-				case 'o':
-				case 'i':
-				case 'u':
-				case 'y':
-				case 'Ã¨':
-				{
-					return true;
-				}
-			}
-			return false;
-		}
-
-		internal void SetStemDictionary(Hashtable dict)
-		{
-			_stemDict = dict;
-		}
-	}
+    /**
+     * A stemmer for Dutch words. 
+     * <p>
+     * The algorithm is an implementation of
+     * the <a href="http://snowball.tartarus.org/algorithms/dutch/stemmer.html">dutch stemming</a>
+     * algorithm in Martin Porter's snowball project.
+     * </p>
+     */
+
+    public class DutchStemmer
+    {
+        /**
+         * Buffer for the terms while stemming them.
+         */
+        private StringBuilder sb = new StringBuilder();
+        private bool _removedE;
+        private IDictionary<string, string> _stemDict;
+
+        private int _R1;
+        private int _R2;
+
+        //TODO convert to internal
+        /*
+         * Stems the given term to an unique <tt>discriminator</tt>.
+         *
+         * @param term The term that should be stemmed.
+         * @return Discriminator for <tt>term</tt>
+         */
+        public String Stem(String term)
+        {
+            term = term.ToLower();
+            if (!isStemmable(term))
+                return term;
+            if (_stemDict != null && _stemDict.ContainsKey(term))
+                if (_stemDict[term] is String)
+                    return (String)_stemDict[term];
+                else
+                    return null;
+
+            // Reset the StringBuilder.
+            sb.Clear();
+            sb.Insert(0, term);
+            // Stemming starts here...
+            substitute(sb);
+            storeYandI(sb);
+            _R1 = getRIndex(sb, 0);
+            _R1 = Math.Max(3, _R1);
+            step1(sb);
+            step2(sb);
+            _R2 = getRIndex(sb, _R1);
+            step3a(sb);
+            step3b(sb);
+            step4(sb);
+            reStoreYandI(sb);
+            return sb.ToString();
+        }
+
+        private bool enEnding(StringBuilder sb)
+        {
+            String[] enend = new String[] { "ene", "en" };
+            for (int i = 0; i < enend.Length; i++)
+            {
+                String end = enend[i];
+                String s = sb.ToString();
+                int index = s.Length - end.Length;
+                if (s.EndsWith(end) &&
+                    index >= _R1 &&
+                    isValidEnEnding(sb, index - 1)
+                )
+                {
+                    sb.Remove(index, index + end.Length);
+                    unDouble(sb, index);
+                    return true;
+                }
+            }
+            return false;
+        }
+
+
+        private void step1(StringBuilder sb)
+        {
+            if (_R1 >= sb.Length)
+                return;
+
+            String s = sb.ToString();
+            int LengthR1 = sb.Length - _R1;
+            int index;
+
+            if (s.EndsWith("heden"))
+            {
+                sb.Remove(_R1, LengthR1 + _R1);
+                sb.Insert(_R1, sb.ToString(_R1, LengthR1 + _R1).Replace("heden", "heid"));
+                return;
+            }
+
+            if (enEnding(sb))
+                return;
+
+            if (s.EndsWith("se") &&
+                (index = s.Length - 2) >= _R1 &&
+                isValidSEnding(sb, index - 1)
+            )
+            {
+                sb.Remove(index, index + 2);
+                return;
+            }
+            if (s.EndsWith("s") &&
+                (index = s.Length - 1) >= _R1 &&
+                isValidSEnding(sb, index - 1))
+            {
+                sb.Remove(index, index + 1);
+            }
+        }
+
+        /**
+         * Remove suffix e if in R1 and
+         * preceded by a non-vowel, and then undouble the ending
+         *
+         * @param sb String being stemmed
+         */
+        private void step2(StringBuilder sb)
+        {
+            _removedE = false;
+            if (_R1 >= sb.Length)
+                return;
+            String s = sb.ToString();
+            int index = s.Length - 1;
+            if (index >= _R1 &&
+                s.EndsWith("e") &&
+                !isVowel(sb[index - 1]))
+            {
+                sb.Remove(index, index + 1);
+                unDouble(sb);
+                _removedE = true;
+            }
+        }
+
+        /**
+         * Remove "heid"
+         *
+         * @param sb String being stemmed
+         */
+        private void step3a(StringBuilder sb)
+        {
+            if (_R2 >= sb.Length)
+                return;
+            String s = sb.ToString();
+            int index = s.Length - 4;
+            if (s.EndsWith("heid") && index >= _R2 && sb[index - 1] != 'c')
+            {
+                sb.Remove(index, index + 4); //remove heid
+                enEnding(sb);
+            }
+        }
+
+        /**
+         * <p>A d-suffix, or derivational suffix, enables a new word,
+         * often with a different grammatical category, or with a different
+         * sense, to be built from another word. Whether a d-suffix can be
+         * attached is discovered not from the rules of grammar, but by
+         * referring to a dictionary. So in English, ness can be added to
+         * certain adjectives to form corresponding nouns (littleness,
+         * kindness, foolishness ...) but not to all adjectives
+         * (not for example, to big, cruel, wise ...) d-suffixes can be
+         * used to change meaning, often in rather exotic ways.</p>
+         * Remove "ing", "end", "ig", "lijk", "baar" and "bar"
+         *
+         * @param sb String being stemmed
+         */
+        private void step3b(StringBuilder sb)
+        {
+            if (_R2 >= sb.Length)
+                return;
+            String s = sb.ToString();
+            int index = 0;
+
+            if ((s.EndsWith("end") || s.EndsWith("ing")) &&
+                (index = s.Length - 3) >= _R2)
+            {
+                sb.Remove(index, index + 3);
+                if (sb[index - 2] == 'i' &&
+                    sb[index - 1] == 'g')
+                {
+                    if (sb[index - 3] != 'e' & index - 2 >= _R2)
+                    {
+                        index -= 2;
+                        sb.Remove(index, index + 2);
+                    }
+                }
+                else
+                {
+                    unDouble(sb, index);
+                }
+                return;
+            }
+            if (s.EndsWith("ig") &&
+                (index = s.Length - 2) >= _R2
+            )
+            {
+                if (sb[index - 1] != 'e')
+                    sb.Remove(index, index + 2);
+                return;
+            }
+            if (s.EndsWith("lijk") &&
+                (index = s.Length - 4) >= _R2
+            )
+            {
+                sb.Remove(index, index + 4);
+                step2(sb);
+                return;
+            }
+            if (s.EndsWith("baar") &&
+                (index = s.Length - 4) >= _R2
+            )
+            {
+                sb.Remove(index, index + 4);
+                return;
+            }
+            if (s.EndsWith("bar") &&
+                (index = s.Length - 3) >= _R2
+            )
+            {
+                if (_removedE)
+                    sb.Remove(index, index + 3);
+                return;
+            }
+        }
+
+        /**
+         * undouble vowel
+         * If the words ends CVD, where C is a non-vowel, D is a non-vowel other than I, and V is double a, e, o or u, remove one of the vowels from V (for example, maan -> man, brood -> brod).
+         *
+         * @param sb String being stemmed
+         */
+        private void step4(StringBuilder sb)
+        {
+            if (sb.Length < 4)
+                return;
+            String end = sb.ToString(sb.Length - 4, sb.Length);
+            char c = end[0];
+            char v1 = end[1];
+            char v2 = end[2];
+            char d = end[3];
+            if (v1 == v2 &&
+                d != 'I' &&
+                v1 != 'i' &&
+                isVowel(v1) &&
+                !isVowel(d) &&
+                !isVowel(c))
+            {
+                sb.Remove(sb.Length - 2, sb.Length - 1);
+            }
+        }
+
+        /**
+         * Checks if a term could be stemmed.
+         *
+         * @return true if, and only if, the given term consists in letters.
+         */
+        private bool isStemmable(String term)
+        {
+            for (int c = 0; c < term.Length; c++)
+            {
+                if (!char.IsLetter(term[c])) return false;
+            }
+            return true;
+        }
+
+        /**
+         * Substitute ÃÂ¤, ÃÂ«, ÃÂ¯, ÃÂ¶, ÃÂ¼, ÃÂ¡ , ÃÂ©, ÃÂ, ÃÂ³, ÃÂº
+         */
+        private void substitute(StringBuilder buffer)
+        {
+            for (int i = 0; i < buffer.Length; i++)
+            {
+                switch (buffer[i])
+                {
+                    case 'Ã¤':
+                    case 'Ã¡':
+                        {
+                            buffer[i] = 'a';
+                            break;
+                        }
+                    case 'Ã«':
+                    case 'Ã©':
+                        {
+                            buffer[i] = 'e';
+                            break;
+                        }
+                    case 'Ã¼':
+                    case 'Ãº':
+                        {
+                            buffer[i] = 'u';
+                            break;
+                        }
+                    case 'Ã¯':
+                    case 'i':
+                        {
+                            buffer[i] = 'i';
+                            break;
+                        }
+                    case 'Ã¶':
+                    case 'Ã³':
+                        {
+                            buffer[i] = 'o';
+                            break;
+                        }
+                }
+            }
+        }
+
+        /*private bool isValidSEnding(StringBuilder sb) {
+          return isValidSEnding(sb, sb.Length - 1);
+        }*/
+
+        private bool isValidSEnding(StringBuilder sb, int index)
+        {
+            char c = sb[index];
+            if (isVowel(c) || c == 'j')
+                return false;
+            return true;
+        }
+
+        /*private bool isValidEnEnding(StringBuilder sb) {
+          return isValidEnEnding(sb, sb.Length - 1);
+        }*/
+
+        private bool isValidEnEnding(StringBuilder sb, int index)
+        {
+            char c = sb[index];
+            if (isVowel(c))
+                return false;
+            if (c < 3)
+                return false;
+            // ends with "gem"?
+            if (c == 'm' && sb[index - 2] == 'g' && sb[index - 1] == 'e')
+                return false;
+            return true;
+        }
+
+        private void unDouble(StringBuilder sb)
+        {
+            unDouble(sb, sb.Length);
+        }
+
+        private void unDouble(StringBuilder sb, int endIndex)
+        {
+            String s = sb.ToString(0, endIndex);
+            if (s.EndsWith("kk") || s.EndsWith("tt") || s.EndsWith("dd") || s.EndsWith("nn") || s.EndsWith("mm") || s.EndsWith("ff"))
+            {
+                sb.Remove(endIndex - 1, endIndex);
+            }
+        }
+
+        private int getRIndex(StringBuilder sb, int start)
+        {
+            if (start == 0)
+                start = 1;
+            int i = start;
+            for (; i < sb.Length; i++)
+            {
+                //first non-vowel preceded by a vowel
+                if (!isVowel(sb[i]) && isVowel(sb[i - 1]))
+                {
+                    return i + 1;
+                }
+            }
+            return i + 1;
+        }
+
+        private void storeYandI(StringBuilder sb)
+        {
+            if (sb[0] == 'y')
+                sb[0] = 'Y';
+
+            int last = sb.Length - 1;
+
+            for (int i = 1; i < last; i++)
+            {
+                switch (sb[i])
+                {
+                    case 'i':
+                        {
+                            if (isVowel(sb[i - 1]) &&
+                                isVowel(sb[i + 1])
+                            )
+                                sb[i] = 'I';
+                            break;
+                        }
+                    case 'y':
+                        {
+                            if (isVowel(sb[i - 1]))
+                                sb[i] = 'Y';
+                            break;
+                        }
+                }
+            }
+            if (last > 0 && sb[last] == 'y' && isVowel(sb[last - 1]))
+                sb[last] = 'Y';
+        }
+
+        private void reStoreYandI(StringBuilder sb)
+        {
+            String tmp = sb.ToString();
+            sb.Remove(0, sb.Length);
+            sb.Insert(0, tmp.Replace("I", "i").Replace("Y", "y"));
+        }
+
+        private bool isVowel(char c)
+        {
+            switch (c)
+            {
+                case 'e':
+                case 'a':
+                case 'o':
+                case 'i':
+                case 'u':
+                case 'y':
+                case 'Ã¨':
+                    {
+                        return true;
+                    }
+            }
+            return false;
+        }
+
+        protected internal void SetStemDictionary(IDictionary<string, string> dict)
+        {
+            _stemDict = dict;
+        }
+    }
 }
\ No newline at end of file

Added: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Payloads/AbstractEncoder.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Payloads/AbstractEncoder.cs?rev=1204353&view=auto
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Payloads/AbstractEncoder.cs (added)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Payloads/AbstractEncoder.cs Mon Nov 21 04:44:55 2011
@@ -0,0 +1,38 @@
+ï»¿/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using Lucene.Net.Index;
+
+namespace Lucene.Net.Analyzers.Payloads
+{
+    /// <summary>
+    /// Base class for payload encoders.
+    /// </summary>
+    public abstract class AbstractEncoder : PayloadEncoder
+    {
+        public Payload Encode(char[] buffer)
+        {
+            return Encode(buffer, 0, buffer.Length);
+        }
+
+        public abstract Payload Encode(char[] buffer, int offset, int length);
+    }
+}

Added: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Payloads/DelimitedPayloadTokenFilter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Payloads/DelimitedPayloadTokenFilter.cs?rev=1204353&view=auto
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Payloads/DelimitedPayloadTokenFilter.cs (added)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Payloads/DelimitedPayloadTokenFilter.cs Mon Nov 21 04:44:55 2011
@@ -0,0 +1,95 @@
+ï»¿/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using Lucene.Net.Analysis;
+using Lucene.Net.Analysis.Tokenattributes;
+
+namespace Lucene.Net.Analyzers.Payloads
+{
+    /// <summary>
+    /// Characters before the delimiter are the "token", those after are the payload.
+    /// <p/>
+    /// For example, if the delimiter is '|', then for the string "foo|bar", foo is the token
+    /// and "bar" is a payload.
+    /// <p/>
+    /// Note, you can also include a {@link org.apache.lucene.analysis.payloads.PayloadEncoder} to convert the 
+    /// payload in an appropriate way (from characters to bytes).
+    /// <p/>
+    /// Note make sure your Tokenizer doesn't split on the delimiter, or this won't work
+    /// </summary>
+    /// <seealso cref="PayloadEncoder"/>
+    public sealed class DelimitedPayloadTokenFilter : TokenFilter
+    {
+        public static readonly char DEFAULT_DELIMITER = '|';
+        internal char delimiter = DEFAULT_DELIMITER;
+        internal TermAttribute termAtt;
+        internal PayloadAttribute payAtt;
+        internal PayloadEncoder encoder;
+
+        /// <summary>
+        /// Construct a token stream filtering the given input.
+        /// </summary>
+        internal DelimitedPayloadTokenFilter(TokenStream input)
+            : this(input, DEFAULT_DELIMITER, new IdentityEncoder())
+        {
+
+        }
+
+
+        public DelimitedPayloadTokenFilter(TokenStream input, char delimiter, PayloadEncoder encoder)
+            : base(input)
+        {
+            termAtt = AddAttribute<TermAttribute>();
+            payAtt = AddAttribute<PayloadAttribute>();
+            this.delimiter = delimiter;
+            this.encoder = encoder;
+        }
+
+        public override bool IncrementToken()
+        {
+            bool result = false;
+            if (input.IncrementToken())
+            {
+                char[] buffer = termAtt.TermBuffer();
+                int length = termAtt.TermLength();
+                //look for the delimiter
+                bool seen = false;
+                for (int i = 0; i < length; i++)
+                {
+                    if (buffer[i] == delimiter)
+                    {
+                        termAtt.SetTermBuffer(buffer, 0, i);
+                        payAtt.SetPayload(encoder.Encode(buffer, i + 1, (length - (i + 1))));
+                        seen = true;
+                        break;//at this point, we know the whole piece, so we can exit.  If we don't see the delimiter, then the termAtt is the same
+                    }
+                }
+                if (seen == false)
+                {
+                    //no delimiter
+                    payAtt.SetPayload(null);
+                }
+                result = true;
+            }
+            return result;
+        }
+    }
+}

Added: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Payloads/FloatEncoder.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Payloads/FloatEncoder.cs?rev=1204353&view=auto
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Payloads/FloatEncoder.cs (added)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Payloads/FloatEncoder.cs Mon Nov 21 04:44:55 2011
@@ -0,0 +1,41 @@
+ï»¿/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using Lucene.Net.Index;
+
+namespace Lucene.Net.Analyzers.Payloads
+{
+    /// <summary>
+    /// Encode a character array Float as a {@link org.apache.lucene.index.Payload}.
+    /// </summary>
+    /// <seealso cref="PayloadHelper.EncodeFloat(float, byte[], int)"/>
+    public class FloatEncoder : AbstractEncoder, PayloadEncoder
+    {
+        public override Payload Encode(char[] buffer, int offset, int length)
+        {
+            Payload result = new Payload();
+            float payload = float.Parse(new string(buffer, offset, length)); // TODO: improve this so that we don't have to new Strings
+            byte[] bytes = PayloadHelper.EncodeFloat(payload);
+            result.SetData(bytes);
+            return result;
+        }
+    }
+}

Added: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Payloads/IdentityEncoder.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Payloads/IdentityEncoder.cs?rev=1204353&view=auto
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Payloads/IdentityEncoder.cs (added)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Payloads/IdentityEncoder.cs Mon Nov 21 04:44:55 2011
@@ -0,0 +1,64 @@
+ï»¿/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using Lucene.Net.Index;
+
+namespace Lucene.Net.Analyzers.Payloads
+{
+    /// <summary>
+    /// Does nothing other than convert the char array to a byte array using the specified encoding.
+    /// </summary>
+    public class IdentityEncoder : AbstractEncoder, PayloadEncoder
+    {
+
+        protected internal Encoding encoding = Encoding.UTF8;
+        protected internal String encodingName = "UTF-8";  //argh, stupid 1.4
+
+        public IdentityEncoder()
+        {
+        }
+
+        public IdentityEncoder(Encoding encoding)
+        {
+            this.encoding = encoding;
+            encodingName = encoding.EncodingName;
+        }
+
+
+        public override Payload Encode(char[] buffer, int offset, int length)
+        {
+            //what's the most efficient way to get a byte [] from a char[] array
+            //Do we have to go through String?
+            String tmp = new String(buffer, offset, length);
+            Payload result = null;//Can we avoid allocating by knowing where using the new API?
+            try
+            {
+                result = new Payload(encoding.GetBytes(tmp));
+            }
+            catch (EncoderFallbackException e)
+            {
+                //should never hit this, since we get the name from the Charset
+            }
+
+            return result;
+        }
+    }
+}
\ No newline at end of file

Added: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Payloads/IntegerEncoder.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Payloads/IntegerEncoder.cs?rev=1204353&view=auto
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Payloads/IntegerEncoder.cs (added)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Payloads/IntegerEncoder.cs Mon Nov 21 04:44:55 2011
@@ -0,0 +1,42 @@
+ï»¿/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using Lucene.Net.Index;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Analyzers.Payloads
+{
+    /// <summary>
+    /// Encode a character array Integer as a {@link org.apache.lucene.index.Payload}.
+    /// </summary>
+    /// <seealso cref="PayloadHelper.EncodeInt(int, byte[], int)"/>
+    public class IntegerEncoder : AbstractEncoder, PayloadEncoder
+    {
+        public override Payload Encode(char[] buffer, int offset, int length)
+        {
+            Payload result = new Payload();
+            int payload = ArrayUtil.ParseInt(buffer, offset, length);//TODO: improve this so that we don't have to new Strings
+            byte[] bytes = PayloadHelper.EncodeInt(payload);
+            result.SetData(bytes);
+            return result;
+        }
+    }
+}

Added: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Payloads/NumericPayloadTokenFilter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Payloads/NumericPayloadTokenFilter.cs?rev=1204353&view=auto
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Payloads/NumericPayloadTokenFilter.cs (added)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Payloads/NumericPayloadTokenFilter.cs Mon Nov 21 04:44:55 2011
@@ -0,0 +1,46 @@
+ï»¿using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using Lucene.Net.Analysis;
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Index;
+
+namespace Lucene.Net.Analyzers.Payloads
+{
+    /// <summary>
+    /// Assigns a payload to a token based on the <see cref="Token.Type()"/>
+    /// </summary>
+    public class NumericPayloadTokenFilter : TokenFilter
+    {
+        private String typeMatch;
+        private Payload thePayload;
+
+        private PayloadAttribute payloadAtt;
+        private TypeAttribute typeAtt;
+
+        public NumericPayloadTokenFilter(TokenStream input, float payload, String typeMatch)
+            : base(input)
+        {
+            //Need to encode the payload
+            thePayload = new Payload(PayloadHelper.EncodeFloat(payload));
+            this.typeMatch = typeMatch;
+            payloadAtt = AddAttribute<PayloadAttribute>();
+            typeAtt = AddAttribute<TypeAttribute>();
+        }
+
+        public sealed override bool IncrementToken()
+        {
+            if (input.IncrementToken())
+            {
+                if (typeAtt.Type().Equals(typeMatch))
+                    payloadAtt.SetPayload(thePayload);
+                return true;
+            }
+            else
+            {
+                return false;
+            }
+        }
+    }
+}
\ No newline at end of file

Added: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Payloads/PayloadEncoder.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Payloads/PayloadEncoder.cs?rev=1204353&view=auto
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Payloads/PayloadEncoder.cs (added)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Payloads/PayloadEncoder.cs Mon Nov 21 04:44:55 2011
@@ -0,0 +1,41 @@
+ï»¿/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using Lucene.Net.Index;
+
+namespace Lucene.Net.Analyzers.Payloads
+{
+    /// <summary>
+    /// Mainly for use with the DelimitedPayloadTokenFilter, converts char buffers to Payload
+    /// <p/>
+    /// NOTE: this interface is subject to change
+    /// </summary>
+    public interface PayloadEncoder
+    {
+        Payload Encode(char[] buffer);
+
+        /// <summary>
+        /// Convert a char array to a <see cref="Payload"/>
+        /// </summary>
+        /// <returns>An encoded <see cref="Payload"/></returns>
+        Payload Encode(char[] buffer, int offset, int length);
+    }
+}

Modified: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Payloads/PayloadHelper.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Payloads/PayloadHelper.cs?rev=1204353&r1=1204352&r2=1204353&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Payloads/PayloadHelper.cs (original)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Payloads/PayloadHelper.cs Mon Nov 21 04:44:55 2011
@@ -15,6 +15,8 @@
  * limitations under the License.
  */
 
+using Lucene.Net.Support;
+
 namespace Lucene.Net.Analyzers.Payloads
 {
     /// <summary>
@@ -29,7 +31,7 @@ namespace Lucene.Net.Analyzers.Payloads
 
         public static byte[] EncodeFloat(float payload, byte[] data, int offset)
         {
-            return EncodeInt(SupportClass.Single.FloatToIntBits(payload), data, offset);
+            return EncodeInt(Single.FloatToIntBits(payload), data, offset);
         }
 
         public static byte[] EncodeInt(int payload)
@@ -66,7 +68,7 @@ namespace Lucene.Net.Analyzers.Payloads
         /// <returns>The float that was encoded</returns>
         public static float DecodeFloat(byte[] bytes, int offset)
         {
-            return SupportClass.Single.IntBitsToFloat(DecodeInt(bytes, offset));
+            return Single.IntBitsToFloat(DecodeInt(bytes, offset));
         }
 
         public static int DecodeInt(byte[] bytes, int offset)

Added: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Payloads/TokenOffsetPayloadTokenFilter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Payloads/TokenOffsetPayloadTokenFilter.cs?rev=1204353&view=auto
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Payloads/TokenOffsetPayloadTokenFilter.cs (added)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Analyzers/Payloads/TokenOffsetPayloadTokenFilter.cs Mon Nov 21 04:44:55 2011
@@ -0,0 +1,45 @@
+ï»¿using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using Lucene.Net.Analysis;
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Index;
+
+namespace Lucene.Net.Analyzers.Payloads
+{
+    /// <summary>
+    /// Adds the <see cref="Token.SetStartOffset(int)"/>
+    /// and <see cref="Token.SetEndOffset(int)"/>
+    /// First 4 bytes are the start
+    /// </summary>
+    public class TokenOffsetPayloadTokenFilter : TokenFilter
+    {
+        protected OffsetAttribute offsetAtt;
+        protected PayloadAttribute payAtt;
+
+        public TokenOffsetPayloadTokenFilter(TokenStream input)
+            : base(input)
+        {
+            offsetAtt = AddAttribute<OffsetAttribute>();
+            payAtt = AddAttribute<PayloadAttribute>();
+        }
+
+        public sealed override bool IncrementToken()
+        {
+            if (input.IncrementToken())
+            {
+                byte[] data = new byte[8];
+                PayloadHelper.EncodeInt(offsetAtt.StartOffset(), data, 0);
+                PayloadHelper.EncodeInt(offsetAtt.EndOffset(), data, 4);
+                Payload payload = new Payload(data);
+                payAtt.SetPayload(payload);
+                return true;
+            }
+            else
+            {
+                return false;
+            }
+        }
+    }
+}