You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by di...@apache.org on 2011/02/10 22:17:45 UTC

svn commit: r1069573 [1/3] - in /incubator/lucene.net: tags/Lucene.Net_2_9_2/contrib/Analyzers/ tags/Lucene.Net_2_9_2/contrib/Analyzers/BR/ tags/Lucene.Net_2_9_2/contrib/Analyzers/Lucene.Net.Analyzers/ tags/Lucene.Net_2_9_2/contrib/Analyzers/Lucene.Net...

Author: digy
Date: Thu Feb 10 21:17:43 2011
New Revision: 1069573

URL: http://svn.apache.org/viewvc?rev=1069573&view=rev
Log:
Rearrangement of contrib/Analyzers + Arabic Analyzer for 2.9.2 tag and trunk

Added:
    incubator/lucene.net/tags/Lucene.Net_2_9_2/contrib/Analyzers/Lucene.Net.Analyzers/
    incubator/lucene.net/tags/Lucene.Net_2_9_2/contrib/Analyzers/Lucene.Net.Analyzers/AR/
    incubator/lucene.net/tags/Lucene.Net_2_9_2/contrib/Analyzers/Lucene.Net.Analyzers/AR/ArabicAnalyzer.cs
    incubator/lucene.net/tags/Lucene.Net_2_9_2/contrib/Analyzers/Lucene.Net.Analyzers/AR/ArabicLetterTokenizer.cs
    incubator/lucene.net/tags/Lucene.Net_2_9_2/contrib/Analyzers/Lucene.Net.Analyzers/AR/ArabicNormalizationFilter.cs
    incubator/lucene.net/tags/Lucene.Net_2_9_2/contrib/Analyzers/Lucene.Net.Analyzers/AR/ArabicNormalizer.cs
    incubator/lucene.net/tags/Lucene.Net_2_9_2/contrib/Analyzers/Lucene.Net.Analyzers/AR/ArabicStemFilter.cs
    incubator/lucene.net/tags/Lucene.Net_2_9_2/contrib/Analyzers/Lucene.Net.Analyzers/AR/ArabicStemmer.cs
    incubator/lucene.net/tags/Lucene.Net_2_9_2/contrib/Analyzers/Lucene.Net.Analyzers/AR/ArabicStopWords.txt   (with props)
    incubator/lucene.net/tags/Lucene.Net_2_9_2/contrib/Analyzers/Lucene.Net.Analyzers/BR/
    incubator/lucene.net/tags/Lucene.Net_2_9_2/contrib/Analyzers/Lucene.Net.Analyzers/BR/BrazilianAnalyzer.cs
    incubator/lucene.net/tags/Lucene.Net_2_9_2/contrib/Analyzers/Lucene.Net.Analyzers/BR/BrazilianStemFilter.cs
    incubator/lucene.net/tags/Lucene.Net_2_9_2/contrib/Analyzers/Lucene.Net.Analyzers/BR/BrazilianStemmer.cs
    incubator/lucene.net/tags/Lucene.Net_2_9_2/contrib/Analyzers/Lucene.Net.Analyzers/Lucene.Net.Analyzers.csproj
    incubator/lucene.net/tags/Lucene.Net_2_9_2/contrib/Analyzers/Lucene.Net.Analyzers/Lucene.Net.Analyzers.csproj.user
    incubator/lucene.net/tags/Lucene.Net_2_9_2/contrib/Analyzers/Lucene.Net.Analyzers/Properties/
    incubator/lucene.net/tags/Lucene.Net_2_9_2/contrib/Analyzers/Lucene.Net.Analyzers/Properties/AssemblyInfo.cs
    incubator/lucene.net/tags/Lucene.Net_2_9_2/contrib/Analyzers/Test/
    incubator/lucene.net/tags/Lucene.Net_2_9_2/contrib/Analyzers/Test/AR/
    incubator/lucene.net/tags/Lucene.Net_2_9_2/contrib/Analyzers/Test/AR/TestArabicAnalyzer.cs
    incubator/lucene.net/tags/Lucene.Net_2_9_2/contrib/Analyzers/Test/AR/TestArabicNormalizationFilter.cs
    incubator/lucene.net/tags/Lucene.Net_2_9_2/contrib/Analyzers/Test/AR/TestArabicStemFilter.cs
    incubator/lucene.net/tags/Lucene.Net_2_9_2/contrib/Analyzers/Test/Properties/
    incubator/lucene.net/tags/Lucene.Net_2_9_2/contrib/Analyzers/Test/Properties/AssemblyInfo.cs
    incubator/lucene.net/tags/Lucene.Net_2_9_2/contrib/Analyzers/Test/Test.csproj
    incubator/lucene.net/trunk/C#/contrib/Analyzers/Lucene.Net.Analyzers/
    incubator/lucene.net/trunk/C#/contrib/Analyzers/Lucene.Net.Analyzers/AR/
    incubator/lucene.net/trunk/C#/contrib/Analyzers/Lucene.Net.Analyzers/AR/ArabicAnalyzer.cs
    incubator/lucene.net/trunk/C#/contrib/Analyzers/Lucene.Net.Analyzers/AR/ArabicLetterTokenizer.cs
    incubator/lucene.net/trunk/C#/contrib/Analyzers/Lucene.Net.Analyzers/AR/ArabicNormalizationFilter.cs
    incubator/lucene.net/trunk/C#/contrib/Analyzers/Lucene.Net.Analyzers/AR/ArabicNormalizer.cs
    incubator/lucene.net/trunk/C#/contrib/Analyzers/Lucene.Net.Analyzers/AR/ArabicStemFilter.cs
    incubator/lucene.net/trunk/C#/contrib/Analyzers/Lucene.Net.Analyzers/AR/ArabicStemmer.cs
    incubator/lucene.net/trunk/C#/contrib/Analyzers/Lucene.Net.Analyzers/AR/ArabicStopWords.txt   (with props)
    incubator/lucene.net/trunk/C#/contrib/Analyzers/Lucene.Net.Analyzers/BR/
    incubator/lucene.net/trunk/C#/contrib/Analyzers/Lucene.Net.Analyzers/BR/BrazilianAnalyzer.cs
    incubator/lucene.net/trunk/C#/contrib/Analyzers/Lucene.Net.Analyzers/BR/BrazilianStemFilter.cs
    incubator/lucene.net/trunk/C#/contrib/Analyzers/Lucene.Net.Analyzers/BR/BrazilianStemmer.cs
    incubator/lucene.net/trunk/C#/contrib/Analyzers/Lucene.Net.Analyzers/Lucene.Net.Analyzers.csproj
    incubator/lucene.net/trunk/C#/contrib/Analyzers/Lucene.Net.Analyzers/Lucene.Net.Analyzers.csproj.user
    incubator/lucene.net/trunk/C#/contrib/Analyzers/Lucene.Net.Analyzers/Properties/
    incubator/lucene.net/trunk/C#/contrib/Analyzers/Lucene.Net.Analyzers/Properties/AssemblyInfo.cs
    incubator/lucene.net/trunk/C#/contrib/Analyzers/Test/
    incubator/lucene.net/trunk/C#/contrib/Analyzers/Test/AR/
    incubator/lucene.net/trunk/C#/contrib/Analyzers/Test/AR/TestArabicAnalyzer.cs
    incubator/lucene.net/trunk/C#/contrib/Analyzers/Test/AR/TestArabicNormalizationFilter.cs
    incubator/lucene.net/trunk/C#/contrib/Analyzers/Test/AR/TestArabicStemFilter.cs
    incubator/lucene.net/trunk/C#/contrib/Analyzers/Test/Properties/
    incubator/lucene.net/trunk/C#/contrib/Analyzers/Test/Properties/AssemblyInfo.cs
    incubator/lucene.net/trunk/C#/contrib/Analyzers/Test/Test.csproj
Removed:
    incubator/lucene.net/tags/Lucene.Net_2_9_2/contrib/Analyzers/BR/
    incubator/lucene.net/tags/Lucene.Net_2_9_2/contrib/Analyzers/Lucene.Net.Analyzers.csproj
    incubator/lucene.net/tags/Lucene.Net_2_9_2/contrib/Analyzers/Properties/
    incubator/lucene.net/trunk/C#/contrib/Analyzers/BR/
    incubator/lucene.net/trunk/C#/contrib/Analyzers/Lucene.Net.Analyzers.csproj
    incubator/lucene.net/trunk/C#/contrib/Analyzers/Properties/
Modified:
    incubator/lucene.net/tags/Lucene.Net_2_9_2/contrib/Analyzers/Lucene.Net.Analyzers.sln
    incubator/lucene.net/trunk/C#/contrib/Analyzers/Lucene.Net.Analyzers.sln

Modified: incubator/lucene.net/tags/Lucene.Net_2_9_2/contrib/Analyzers/Lucene.Net.Analyzers.sln
URL: http://svn.apache.org/viewvc/incubator/lucene.net/tags/Lucene.Net_2_9_2/contrib/Analyzers/Lucene.Net.Analyzers.sln?rev=1069573&r1=1069572&r2=1069573&view=diff
==============================================================================
--- incubator/lucene.net/tags/Lucene.Net_2_9_2/contrib/Analyzers/Lucene.Net.Analyzers.sln (original)
+++ incubator/lucene.net/tags/Lucene.Net_2_9_2/contrib/Analyzers/Lucene.Net.Analyzers.sln Thu Feb 10 21:17:43 2011
@@ -1,7 +1,9 @@
 
-Microsoft Visual Studio Solution File, Format Version 9.00
-# Visual C# Express 2005
-Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Lucene.Net.Analyzers", "Lucene.Net.Analyzers.csproj", "{A4AF790F-900A-48D2-85A7-B948E5214C16}"
+Microsoft Visual Studio Solution File, Format Version 10.00
+# Visual C# Express 2008
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Lucene.Net.Analyzers", "Lucene.Net.Analyzers\Lucene.Net.Analyzers.csproj", "{4286E961-9143-4821-B46D-3D39D3736386}"
+EndProject
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Test", "Test\Test.csproj", "{67D27628-F1D5-4499-9818-B669731925C8}"
 EndProject
 Global
 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
@@ -9,10 +11,14 @@ Global
 		Release|Any CPU = Release|Any CPU
 	EndGlobalSection
 	GlobalSection(ProjectConfigurationPlatforms) = postSolution
-		{A4AF790F-900A-48D2-85A7-B948E5214C16}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
-		{A4AF790F-900A-48D2-85A7-B948E5214C16}.Debug|Any CPU.Build.0 = Debug|Any CPU
-		{A4AF790F-900A-48D2-85A7-B948E5214C16}.Release|Any CPU.ActiveCfg = Release|Any CPU
-		{A4AF790F-900A-48D2-85A7-B948E5214C16}.Release|Any CPU.Build.0 = Release|Any CPU
+		{4286E961-9143-4821-B46D-3D39D3736386}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{4286E961-9143-4821-B46D-3D39D3736386}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{4286E961-9143-4821-B46D-3D39D3736386}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{4286E961-9143-4821-B46D-3D39D3736386}.Release|Any CPU.Build.0 = Release|Any CPU
+		{67D27628-F1D5-4499-9818-B669731925C8}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{67D27628-F1D5-4499-9818-B669731925C8}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{67D27628-F1D5-4499-9818-B669731925C8}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{67D27628-F1D5-4499-9818-B669731925C8}.Release|Any CPU.Build.0 = Release|Any CPU
 	EndGlobalSection
 	GlobalSection(SolutionProperties) = preSolution
 		HideSolutionNode = FALSE

Added: incubator/lucene.net/tags/Lucene.Net_2_9_2/contrib/Analyzers/Lucene.Net.Analyzers/AR/ArabicAnalyzer.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/tags/Lucene.Net_2_9_2/contrib/Analyzers/Lucene.Net.Analyzers/AR/ArabicAnalyzer.cs?rev=1069573&view=auto
==============================================================================
--- incubator/lucene.net/tags/Lucene.Net_2_9_2/contrib/Analyzers/Lucene.Net.Analyzers/AR/ArabicAnalyzer.cs (added)
+++ incubator/lucene.net/tags/Lucene.Net_2_9_2/contrib/Analyzers/Lucene.Net.Analyzers/AR/ArabicAnalyzer.cs Thu Feb 10 21:17:43 2011
@@ -0,0 +1,202 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System.IO;
+using System.Collections;
+
+using Lucene.Net.Analysis;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Analysis.AR
+{
+    /**
+     * {@link Analyzer} for Arabic. 
+     * <p>
+     * This analyzer implements light-stemming as specified by:
+     * <i>
+     * Light Stemming for Arabic Information Retrieval
+     * </i>    
+     * http://www.mtholyoke.edu/~lballest/Pubs/arab_stem05.pdf
+     * <p>
+     * The analysis package contains three primary components:
+     * <ul>
+     *  <li>{@link ArabicNormalizationFilter}: Arabic orthographic normalization.
+     *  <li>{@link ArabicStemFilter}: Arabic light stemming
+     *  <li>Arabic stop words file: a set of default Arabic stop words.
+     * </ul>
+     * 
+     */
+    public class ArabicAnalyzer : Analyzer
+    {
+
+        /**
+         * File containing default Arabic stopwords.
+         * 
+         * Default stopword list is from http://members.unine.ch/jacques.savoy/clef/index.html
+         * The stopword list is BSD-Licensed.
+         */
+        public static string DEFAULT_STOPWORD_FILE = "ArabicStopWords.txt";
+
+        /**
+         * Contains the stopwords used with the StopFilter.
+         */
+        private Hashtable stoptable = new Hashtable();
+        /**
+         * The comment character in the stopwords file.  All lines prefixed with this will be ignored  
+         */
+        public static string STOPWORDS_COMMENT = "#";
+
+        private Version matchVersion;
+
+        /**
+         * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
+         *
+         * @deprecated Use {@link #ArabicAnalyzer(Version)} instead
+         */
+        public ArabicAnalyzer() : this(Version.LUCENE_24)
+        {
+            
+        }
+
+        /**
+         * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
+         */
+        public ArabicAnalyzer(Version matchVersion)
+        {
+            this.matchVersion = matchVersion;
+
+            using (StreamReader reader = new StreamReader(System.Reflection.Assembly.GetAssembly(this.GetType()).GetManifestResourceStream("Lucene.Net.Analyzers.AR." + DEFAULT_STOPWORD_FILE)))
+            {
+                while (!reader.EndOfStream)
+                {
+                    string word = reader.ReadLine();
+                    stoptable.Add(word, word);
+                }
+            }
+        }
+
+        /**
+         * Builds an analyzer with the given stop words.
+         *
+         * @deprecated Use {@link #ArabicAnalyzer(Version, String[])} instead
+         */
+        public ArabicAnalyzer(string[] stopwords): this(Version.LUCENE_24, stopwords)
+        {
+        }
+
+        /**
+         * Builds an analyzer with the given stop words.
+         */
+        public ArabicAnalyzer(Version matchVersion, string[] stopwords)
+        {
+            stoptable = StopFilter.MakeStopSet(stopwords);
+            this.matchVersion = matchVersion;
+        }
+
+        /**
+         * Builds an analyzer with the given stop words.
+         *
+         * @deprecated Use {@link #ArabicAnalyzer(Version, Hashtable)} instead
+         */
+        public ArabicAnalyzer(Hashtable stopwords) : this(Version.LUCENE_24, stopwords)
+        {
+        }
+
+        /**
+         * Builds an analyzer with the given stop words.
+         */
+        public ArabicAnalyzer(Version matchVersion, Hashtable stopwords)
+        {
+            stoptable = new Hashtable(stopwords);
+            this.matchVersion = matchVersion;
+        }
+
+        //DIGY
+        ///**
+        // * Builds an analyzer with the given stop words.  Lines can be commented out using {@link #STOPWORDS_COMMENT}
+        // *
+        // * @deprecated Use {@link #ArabicAnalyzer(Version, File)} instead
+        // */
+        //public ArabicAnalyzer(File stopwords)
+        //{
+        //    this(Version.LUCENE_24, stopwords);
+        //}
+
+        ///**
+        // * Builds an analyzer with the given stop words.  Lines can be commented out using {@link #STOPWORDS_COMMENT}
+        // */
+        //public ArabicAnalyzer(Version matchVersion, File stopwords)
+        //{
+        //    stoptable = WordlistLoader.getWordSet(stopwords, STOPWORDS_COMMENT);
+        //    this.matchVersion = matchVersion;
+        //}
+
+
+        /**
+         * Creates a {@link TokenStream} which tokenizes all the text in the provided {@link Reader}.
+         *
+         * @return  A {@link TokenStream} built from an {@link ArabicLetterTokenizer} filtered with
+         * 			{@link LowerCaseFilter}, {@link StopFilter}, {@link ArabicNormalizationFilter}
+         *            and {@link ArabicStemFilter}.
+         */
+        public override TokenStream TokenStream(string fieldName, TextReader reader)
+        {
+            TokenStream result = new ArabicLetterTokenizer(reader);
+            result = new LowerCaseFilter(result);
+            result = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion), result, stoptable);
+            result = new ArabicNormalizationFilter(result);
+            result = new ArabicStemFilter(result);
+
+            return result;
+        }
+
+        private class SavedStreams
+        {
+            internal Tokenizer Source;
+            internal TokenStream Result;
+        };
+
+        /**
+         * Returns a (possibly reused) {@link TokenStream} which tokenizes all the text 
+         * in the provided {@link Reader}.
+         *
+         * @return  A {@link TokenStream} built from an {@link ArabicLetterTokenizer} filtered with
+         *            {@link LowerCaseFilter}, {@link StopFilter}, {@link ArabicNormalizationFilter}
+         *            and {@link ArabicStemFilter}.
+         */
+        public override TokenStream ReusableTokenStream(string fieldName, TextReader reader)
+        {
+            SavedStreams streams = (SavedStreams)GetPreviousTokenStream();
+            if (streams == null)
+            {
+                streams = new SavedStreams();
+                streams.Source = new ArabicLetterTokenizer(reader);
+                streams.Result = new LowerCaseFilter(streams.Source);
+                streams.Result = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion),
+                                                streams.Result, stoptable);
+                streams.Result = new ArabicNormalizationFilter(streams.Result);
+                streams.Result = new ArabicStemFilter(streams.Result);
+                SetPreviousTokenStream(streams);
+            }
+            else
+            {
+                streams.Source.Reset(reader);
+            }
+            return streams.Result;
+        }
+    }
+}
\ No newline at end of file

Added: incubator/lucene.net/tags/Lucene.Net_2_9_2/contrib/Analyzers/Lucene.Net.Analyzers/AR/ArabicLetterTokenizer.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/tags/Lucene.Net_2_9_2/contrib/Analyzers/Lucene.Net.Analyzers/AR/ArabicLetterTokenizer.cs?rev=1069573&view=auto
==============================================================================
--- incubator/lucene.net/tags/Lucene.Net_2_9_2/contrib/Analyzers/Lucene.Net.Analyzers/AR/ArabicLetterTokenizer.cs (added)
+++ incubator/lucene.net/tags/Lucene.Net_2_9_2/contrib/Analyzers/Lucene.Net.Analyzers/AR/ArabicLetterTokenizer.cs Thu Feb 10 21:17:43 2011
@@ -0,0 +1,63 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System.IO;
+using System.Collections;
+
+using Lucene.Net.Analysis;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Analysis.AR
+{
+
+    /**
+     * Tokenizer that breaks text into runs of letters and diacritics.
+     * <p>
+     * The problem with the standard Letter tokenizer is that it fails on diacritics.
+     * Handling similar to this is necessary for Indic Scripts, Hebrew, Thaana, etc.
+     * </p>
+     *
+     */
+    public class ArabicLetterTokenizer : LetterTokenizer
+    {
+
+        public ArabicLetterTokenizer(TextReader @in): base(@in)
+        {
+            
+        }
+
+        public ArabicLetterTokenizer(AttributeSource source, TextReader @in) : base(source, @in)
+        {
+            
+        }
+
+        public ArabicLetterTokenizer(AttributeFactory factory, TextReader @in) : base(factory, @in)
+        {
+            
+        }
+
+        /** 
+         * Allows for Letter category or NonspacingMark category
+         * @see org.apache.lucene.analysis.LetterTokenizer#isTokenChar(char)
+         */
+        protected override bool IsTokenChar(char c)
+        {
+            return base.IsTokenChar(c) || char.GetUnicodeCategory(c)==System.Globalization.UnicodeCategory.NonSpacingMark ;
+        }
+
+    }
+}
\ No newline at end of file

Added: incubator/lucene.net/tags/Lucene.Net_2_9_2/contrib/Analyzers/Lucene.Net.Analyzers/AR/ArabicNormalizationFilter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/tags/Lucene.Net_2_9_2/contrib/Analyzers/Lucene.Net.Analyzers/AR/ArabicNormalizationFilter.cs?rev=1069573&view=auto
==============================================================================
--- incubator/lucene.net/tags/Lucene.Net_2_9_2/contrib/Analyzers/Lucene.Net.Analyzers/AR/ArabicNormalizationFilter.cs (added)
+++ incubator/lucene.net/tags/Lucene.Net_2_9_2/contrib/Analyzers/Lucene.Net.Analyzers/AR/ArabicNormalizationFilter.cs Thu Feb 10 21:17:43 2011
@@ -0,0 +1,61 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System.IO;
+using System.Collections;
+
+using Lucene.Net.Analysis;
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Util;
+
+
+namespace Lucene.Net.Analysis.AR
+{
+
+    /**
+     * A {@link TokenFilter} that applies {@link ArabicNormalizer} to normalize the orthography.
+     * 
+     */
+
+    public class ArabicNormalizationFilter : TokenFilter
+    {
+
+        protected ArabicNormalizer normalizer = null;
+        private TermAttribute termAtt;
+
+        public ArabicNormalizationFilter(TokenStream input) : base(input)
+        {
+            
+            normalizer = new ArabicNormalizer();
+            termAtt = (TermAttribute)AddAttribute(typeof(TermAttribute));
+        }
+
+        public override bool IncrementToken()
+        {
+            if (input.IncrementToken())
+            {
+                int newlen = normalizer.Normalize(termAtt.TermBuffer(), termAtt.TermLength());
+                termAtt.SetTermLength(newlen);
+                return true;
+            }
+            else
+            {
+                return false;
+            }
+        }
+    }
+}
\ No newline at end of file

Added: incubator/lucene.net/tags/Lucene.Net_2_9_2/contrib/Analyzers/Lucene.Net.Analyzers/AR/ArabicNormalizer.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/tags/Lucene.Net_2_9_2/contrib/Analyzers/Lucene.Net.Analyzers/AR/ArabicNormalizer.cs?rev=1069573&view=auto
==============================================================================
--- incubator/lucene.net/tags/Lucene.Net_2_9_2/contrib/Analyzers/Lucene.Net.Analyzers/AR/ArabicNormalizer.cs (added)
+++ incubator/lucene.net/tags/Lucene.Net_2_9_2/contrib/Analyzers/Lucene.Net.Analyzers/AR/ArabicNormalizer.cs Thu Feb 10 21:17:43 2011
@@ -0,0 +1,117 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.IO;
+using System.Collections;
+
+using Lucene.Net.Analysis;
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Util;
+
+
+namespace Lucene.Net.Analysis.AR
+{
+    /**
+     *  Normalizer for Arabic.
+     *  <p>
+     *  Normalization is done in-place for efficiency, operating on a termbuffer.
+     *  <p>
+     *  Normalization is defined as:
+     *  <ul>
+     *  <li> Normalization of hamza with alef seat to a bare alef.
+     *  <li> Normalization of teh marbuta to heh
+     *  <li> Normalization of dotless yeh (alef maksura) to yeh.
+     *  <li> Removal of Arabic diacritics (the harakat)
+     *  <li> Removal of tatweel (stretching character).
+     * </ul>
+     *
+     */
+    public class ArabicNormalizer
+    {
+        public static char ALEF = '\u0627';
+        public static char ALEF_MADDA = '\u0622';
+        public static char ALEF_HAMZA_ABOVE = '\u0623';
+        public static char ALEF_HAMZA_BELOW = '\u0625';
+
+        public static char YEH = '\u064A';
+        public static char DOTLESS_YEH = '\u0649';
+
+        public static char TEH_MARBUTA = '\u0629';
+        public static char HEH = '\u0647';
+
+        public static char TATWEEL = '\u0640';
+
+        public static char FATHATAN = '\u064B';
+        public static char DAMMATAN = '\u064C';
+        public static char KASRATAN = '\u064D';
+        public static char FATHA = '\u064E';
+        public static char DAMMA = '\u064F';
+        public static char KASRA = '\u0650';
+        public static char SHADDA = '\u0651';
+        public static char SUKUN = '\u0652';
+
+        /**
+         * Normalize an input buffer of Arabic text
+         * 
+         * @param s input buffer
+         * @param len length of input buffer
+         * @return length of input buffer after normalization
+         */
+        public int Normalize(char[] s, int len)
+        {
+
+            for (int i = 0; i < len; i++)
+            {
+                if (s[i] == ALEF_MADDA || s[i] == ALEF_HAMZA_ABOVE || s[i] == ALEF_HAMZA_BELOW)
+                    s[i] = ALEF;
+
+                if (s[i] == DOTLESS_YEH)
+                    s[i] = YEH;
+
+                if (s[i] == TEH_MARBUTA)
+                    s[i] = HEH;
+
+                if (s[i] == TATWEEL || s[i] == KASRATAN || s[i] == DAMMATAN || s[i] == FATHATAN ||
+                    s[i] == FATHA || s[i] == DAMMA || s[i] == KASRA || s[i] == SHADDA || s[i] == SUKUN)
+                {
+                    len = Delete(s, i, len);
+                    i--;
+                }
+            }
+
+            return len;
+        }
+
+        /**
+         * Delete a character in-place
+         * 
+         * @param s Input Buffer
+         * @param pos Position of character to delete
+         * @param len length of input buffer
+         * @return length of input buffer after deletion
+         */
+        protected int Delete(char[] s, int pos, int len)
+        {
+            if (pos < len)
+                Array.Copy(s, pos + 1, s, pos, len - pos - 1); 
+
+            return len - 1;
+        }
+
+    }
+}
\ No newline at end of file

Added: incubator/lucene.net/tags/Lucene.Net_2_9_2/contrib/Analyzers/Lucene.Net.Analyzers/AR/ArabicStemFilter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/tags/Lucene.Net_2_9_2/contrib/Analyzers/Lucene.Net.Analyzers/AR/ArabicStemFilter.cs?rev=1069573&view=auto
==============================================================================
--- incubator/lucene.net/tags/Lucene.Net_2_9_2/contrib/Analyzers/Lucene.Net.Analyzers/AR/ArabicStemFilter.cs (added)
+++ incubator/lucene.net/tags/Lucene.Net_2_9_2/contrib/Analyzers/Lucene.Net.Analyzers/AR/ArabicStemFilter.cs Thu Feb 10 21:17:43 2011
@@ -0,0 +1,62 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.IO;
+using System.Collections;
+
+using Lucene.Net.Analysis;
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Util;
+
+
+namespace Lucene.Net.Analysis.AR
+{
+
+
+    /**
+     * A {@link TokenFilter} that applies {@link ArabicStemmer} to stem Arabic words..
+     * 
+     */
+
+    public class ArabicStemFilter : TokenFilter
+    {
+
+        protected ArabicStemmer stemmer = null;
+        private TermAttribute termAtt;
+
+        public ArabicStemFilter(TokenStream input) : base(input)
+        {
+            stemmer = new ArabicStemmer();
+            termAtt = (TermAttribute)AddAttribute(typeof(TermAttribute));
+        }
+
+        public override bool IncrementToken()
+        {
+            if (input.IncrementToken())
+            {
+                int newlen = stemmer.Stem(termAtt.TermBuffer(), termAtt.TermLength());
+                termAtt.SetTermLength(newlen);
+                return true;
+            }
+            else
+            {
+                return false;
+            }
+        }
+    }
+}
\ No newline at end of file

Added: incubator/lucene.net/tags/Lucene.Net_2_9_2/contrib/Analyzers/Lucene.Net.Analyzers/AR/ArabicStemmer.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/tags/Lucene.Net_2_9_2/contrib/Analyzers/Lucene.Net.Analyzers/AR/ArabicStemmer.cs?rev=1069573&view=auto
==============================================================================
--- incubator/lucene.net/tags/Lucene.Net_2_9_2/contrib/Analyzers/Lucene.Net.Analyzers/AR/ArabicStemmer.cs (added)
+++ incubator/lucene.net/tags/Lucene.Net_2_9_2/contrib/Analyzers/Lucene.Net.Analyzers/AR/ArabicStemmer.cs Thu Feb 10 21:17:43 2011
@@ -0,0 +1,208 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.IO;
+using System.Collections;
+
+using Lucene.Net.Analysis;
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Util;
+
+
+namespace Lucene.Net.Analysis.AR
+{
+
+
+    /**
+     *  Stemmer for Arabic.
+     *  <p>
+     *  Stemming  is done in-place for efficiency, operating on a termbuffer.
+     *  <p>
+     *  Stemming is defined as:
+     *  <ul>
+     *  <li> Removal of attached definite article, conjunction, and prepositions.
+     *  <li> Stemming of common suffixes.
+     * </ul>
+     *
+     */
+    public class ArabicStemmer
+    {
+        public static char ALEF = '\u0627';
+        public static char BEH = '\u0628';
+        public static char TEH_MARBUTA = '\u0629';
+        public static char TEH = '\u062A';
+        public static char FEH = '\u0641';
+        public static char KAF = '\u0643';
+        public static char LAM = '\u0644';
+        public static char NOON = '\u0646';
+        public static char HEH = '\u0647';
+        public static char WAW = '\u0648';
+        public static char YEH = '\u064A';
+
+        public static char[][] prefixes = {
+            ("" + ALEF + LAM).ToCharArray(), 
+            ("" + WAW + ALEF + LAM).ToCharArray(), 
+            ("" + BEH + ALEF + LAM).ToCharArray(),
+            ("" + KAF + ALEF + LAM).ToCharArray(),
+            ("" + FEH + ALEF + LAM).ToCharArray(),
+            ("" + LAM + LAM).ToCharArray(),
+            ("" + WAW).ToCharArray(),
+        };
+
+        public static char[][] suffixes = {
+            ("" + HEH + ALEF).ToCharArray(), 
+            ("" + ALEF + NOON).ToCharArray(), 
+            ("" + ALEF + TEH).ToCharArray(), 
+            ("" + WAW + NOON).ToCharArray(), 
+            ("" + YEH + NOON).ToCharArray(), 
+            ("" + YEH + HEH).ToCharArray(),
+            ("" + YEH + TEH_MARBUTA).ToCharArray(),
+            ("" + HEH).ToCharArray(),
+            ("" + TEH_MARBUTA).ToCharArray(),
+            ("" + YEH).ToCharArray(),
+        };
+
+
+        /**
+         * Stem an input buffer of Arabic text.
+         * 
+         * @param s input buffer
+         * @param len length of input buffer
+         * @return length of input buffer after normalization
+         */
+        public int Stem(char[] s, int len)
+        {
+            len = StemPrefix(s, len);
+            len = StemSuffix(s, len);
+
+            return len;
+        }
+
+        /**
+         * Stem a prefix off an Arabic word.
+         * @param s input buffer
+         * @param len length of input buffer
+         * @return new length of input buffer after stemming.
+         */
+        public int StemPrefix(char[] s, int len)
+        {
+            for (int i = 0; i < prefixes.Length; i++)
+                if (StartsWith(s, len, prefixes[i]))
+                    return DeleteN(s, 0, len, prefixes[i].Length);
+            return len;
+        }
+
+        /**
+         * Stem suffix(es) off an Arabic word.
+         * @param s input buffer
+         * @param len length of input buffer
+         * @return new length of input buffer after stemming
+         */
+        public int StemSuffix(char[] s, int len)
+        {
+            for (int i = 0; i < suffixes.Length; i++)
+                if (EndsWith(s, len, suffixes[i]))
+                    len = DeleteN(s, len - suffixes[i].Length, len, suffixes[i].Length);
+            return len;
+        }
+
+        /**
+         * Returns true if the prefix matches and can be stemmed
+         * @param s input buffer
+         * @param len length of input buffer
+         * @param prefix prefix to check
+         * @return true if the prefix matches and can be stemmed
+         */
+        bool StartsWith(char[] s, int len, char[] prefix)
+        {
+            if (prefix.Length == 1 && len < 4)
+            { // wa- prefix requires at least 3 characters
+                return false;
+            }
+            else if (len < prefix.Length + 2)
+            { // other prefixes require only 2.
+                return false;
+            }
+            else
+            {
+                for (int i = 0; i < prefix.Length; i++)
+                    if (s[i] != prefix[i])
+                        return false;
+
+                return true;
+            }
+        }
+
+        /**
+         * Returns true if the suffix matches and can be stemmed
+         * @param s input buffer
+         * @param len length of input buffer
+         * @param suffix suffix to check
+         * @return true if the suffix matches and can be stemmed
+         */
+        bool EndsWith(char[] s, int len, char[] suffix)
+        {
+            if (len < suffix.Length + 2)
+            { // all suffixes require at least 2 characters after stemming
+                return false;
+            }
+            else
+            {
+                for (int i = 0; i < suffix.Length; i++)
+                    if (s[len - suffix.Length + i] != suffix[i])
+                        return false;
+
+                return true;
+            }
+        }
+
+
+        /**
+         * Delete n characters in-place
+         * 
+         * @param s Input Buffer
+         * @param pos Position of character to delete
+         * @param len Length of input buffer
+         * @param nChars number of characters to delete
+         * @return length of input buffer after deletion
+         */
+        protected int DeleteN(char[] s, int pos, int len, int nChars)
+        {
+            for (int i = 0; i < nChars; i++)
+                len = Delete(s, pos, len);
+            return len;
+        }
+
+        /**
+         * Delete a character in-place
+         * 
+         * @param s Input Buffer
+         * @param pos Position of character to delete
+         * @param len length of input buffer
+         * @return length of input buffer after deletion
+         */
+        protected int Delete(char[] s, int pos, int len)
+        {
+            if (pos < len)
+                Array.Copy(s, pos + 1, s, pos, len - pos - 1); 
+
+            return len - 1;
+        }
+
+    }
+}
\ No newline at end of file

Added: incubator/lucene.net/tags/Lucene.Net_2_9_2/contrib/Analyzers/Lucene.Net.Analyzers/AR/ArabicStopWords.txt
URL: http://svn.apache.org/viewvc/incubator/lucene.net/tags/Lucene.Net_2_9_2/contrib/Analyzers/Lucene.Net.Analyzers/AR/ArabicStopWords.txt?rev=1069573&view=auto
==============================================================================
Binary file - no diff available.

Propchange: incubator/lucene.net/tags/Lucene.Net_2_9_2/contrib/Analyzers/Lucene.Net.Analyzers/AR/ArabicStopWords.txt
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: incubator/lucene.net/tags/Lucene.Net_2_9_2/contrib/Analyzers/Lucene.Net.Analyzers/BR/BrazilianAnalyzer.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/tags/Lucene.Net_2_9_2/contrib/Analyzers/Lucene.Net.Analyzers/BR/BrazilianAnalyzer.cs?rev=1069573&view=auto
==============================================================================
--- incubator/lucene.net/tags/Lucene.Net_2_9_2/contrib/Analyzers/Lucene.Net.Analyzers/BR/BrazilianAnalyzer.cs (added)
+++ incubator/lucene.net/tags/Lucene.Net_2_9_2/contrib/Analyzers/Lucene.Net.Analyzers/BR/BrazilianAnalyzer.cs Thu Feb 10 21:17:43 2011
@@ -0,0 +1,140 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System.Collections;
+
+using Lucene.Net.Analysis;
+using Lucene.Net.Analysis.Standard;
+using System.IO;
+
+/**
+ * Analyzer for Brazilian language. Supports an external list of stopwords (words that
+ * will not be indexed at all) and an external list of exclusions (word that will
+ * not be stemmed, but indexed).
+ *
+ */
+namespace Lucene.Net.Analysis.BR
+{
+    public sealed class BrazilianAnalyzer : Analyzer
+    {
+
+        /**
+         * List of typical Brazilian stopwords.
+         */
+        public static string[] BRAZILIAN_STOP_WORDS = {
+      "a","ainda","alem","ambas","ambos","antes",
+      "ao","aonde","aos","apos","aquele","aqueles",
+      "as","assim","com","como","contra","contudo",
+      "cuja","cujas","cujo","cujos","da","das","de",
+      "dela","dele","deles","demais","depois","desde",
+      "desta","deste","dispoe","dispoem","diversa",
+      "diversas","diversos","do","dos","durante","e",
+      "ela","elas","ele","eles","em","entao","entre",
+      "essa","essas","esse","esses","esta","estas",
+      "este","estes","ha","isso","isto","logo","mais",
+      "mas","mediante","menos","mesma","mesmas","mesmo",
+      "mesmos","na","nas","nao","nas","nem","nesse","neste",
+      "nos","o","os","ou","outra","outras","outro","outros",
+      "pelas","pelas","pelo","pelos","perante","pois","por",
+      "porque","portanto","proprio","propios","quais","qual",
+      "qualquer","quando","quanto","que","quem","quer","se",
+      "seja","sem","sendo","seu","seus","sob","sobre","sua",
+      "suas","tal","tambem","teu","teus","toda","todas","todo",
+      "todos","tua","tuas","tudo","um","uma","umas","uns"};
+
+
+        /**
+         * Contains the stopwords used with the StopFilter.
+         */
+        private Hashtable stoptable = new Hashtable();
+
+        /**
+         * Contains words that should be indexed but not stemmed.
+         */
+        private Hashtable excltable = new Hashtable();
+
+        /**
+         * Builds an analyzer with the default stop words ({@link #BRAZILIAN_STOP_WORDS}).
+         */
+        public BrazilianAnalyzer()
+        {
+            stoptable = StopFilter.MakeStopSet(BRAZILIAN_STOP_WORDS);
+        }
+
+        /**
+         * Builds an analyzer with the given stop words.
+         */
+        public BrazilianAnalyzer(string[] stopwords)
+        {
+            stoptable = StopFilter.MakeStopSet(stopwords);
+        }
+
+        /**
+         * Builds an analyzer with the given stop words.
+         */
+        public BrazilianAnalyzer(Hashtable stopwords)
+        {
+            stoptable = stopwords;
+        }
+
+        /**
+         * Builds an analyzer with the given stop words.
+         */
+        public BrazilianAnalyzer(FileInfo stopwords)
+        {
+            stoptable = WordlistLoader.GetWordSet(stopwords);
+        }
+
+        /**
+         * Builds an exclusionlist from an array of Strings.
+         */
+        public void SetStemExclusionTable(string[] exclusionlist)
+        {
+            excltable = StopFilter.MakeStopSet(exclusionlist);
+        }
+        /**
+         * Builds an exclusionlist from a Hashtable.
+         */
+        public void SetStemExclusionTable(Hashtable exclusionlist)
+        {
+            excltable = exclusionlist;
+        }
+        /**
+         * Builds an exclusionlist from the words contained in the given file.
+         */
+        public void SetStemExclusionTable(FileInfo exclusionlist)
+        {
+            excltable = WordlistLoader.GetWordSet(exclusionlist);
+        }
+
+        /**
+         * Creates a TokenStream which tokenizes all the text in the provided Reader.
+         *
+         * @return  A TokenStream build from a StandardTokenizer filtered with
+         * 			StandardFilter, StopFilter, GermanStemFilter and LowerCaseFilter.
+         */
+        public override TokenStream TokenStream(string fieldName, TextReader reader)
+        {
+            TokenStream result = new StandardTokenizer(reader);
+            result = new LowerCaseFilter(result);
+            result = new StandardFilter(result);
+            result = new StopFilter(result, stoptable);
+            result = new BrazilianStemFilter(result, excltable);
+            return result;
+        }
+    }
+}

Added: incubator/lucene.net/tags/Lucene.Net_2_9_2/contrib/Analyzers/Lucene.Net.Analyzers/BR/BrazilianStemFilter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/tags/Lucene.Net_2_9_2/contrib/Analyzers/Lucene.Net.Analyzers/BR/BrazilianStemFilter.cs?rev=1069573&view=auto
==============================================================================
--- incubator/lucene.net/tags/Lucene.Net_2_9_2/contrib/Analyzers/Lucene.Net.Analyzers/BR/BrazilianStemFilter.cs (added)
+++ incubator/lucene.net/tags/Lucene.Net_2_9_2/contrib/Analyzers/Lucene.Net.Analyzers/BR/BrazilianStemFilter.cs Thu Feb 10 21:17:43 2011
@@ -0,0 +1,74 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using Lucene.Net.Analysis;
+using System.Collections;
+
+
+/**
+ * Based on GermanStemFilter
+ *
+ */
+namespace Lucene.Net.Analysis.BR
+{
+
+    public sealed class BrazilianStemFilter : TokenFilter
+    {
+
+        /**
+         * The actual token in the input stream.
+         */
+        private BrazilianStemmer stemmer = null;
+        private Hashtable exclusions = null;
+
+        public BrazilianStemFilter(TokenStream input)
+            : base(input)
+        {
+            stemmer = new BrazilianStemmer();
+        }
+
+        public BrazilianStemFilter(TokenStream input, Hashtable exclusiontable)
+            : this(input)
+        {
+            this.exclusions = exclusiontable;
+        }
+
+        /**
+         * @return Returns the next token in the stream, or null at EOS.
+         */
+        public override Token Next(Token reusableToken)
+        {
+            System.Diagnostics.Trace.Assert(reusableToken != null);
+
+            Token nextToken = input.Next(reusableToken);
+            if (nextToken == null)
+                return null;
+
+            string term = nextToken.TermText();
+
+            // Check the exclusion table.
+            if (exclusions == null || !exclusions.Contains(term))
+            {
+                string s = stemmer.Stem(term);
+                // If not stemmed, don't waste the time adjusting the token.
+                if ((s != null) && !s.Equals(term))
+                    nextToken.SetTermBuffer(s.ToCharArray(), 0, s.Length);//was  SetTermBuffer(s)
+            }
+            return nextToken;
+        }
+    }
+}
\ No newline at end of file

Added: incubator/lucene.net/tags/Lucene.Net_2_9_2/contrib/Analyzers/Lucene.Net.Analyzers/BR/BrazilianStemmer.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/tags/Lucene.Net_2_9_2/contrib/Analyzers/Lucene.Net.Analyzers/BR/BrazilianStemmer.cs?rev=1069573&view=auto
==============================================================================
--- incubator/lucene.net/tags/Lucene.Net_2_9_2/contrib/Analyzers/Lucene.Net.Analyzers/BR/BrazilianStemmer.cs (added)
+++ incubator/lucene.net/tags/Lucene.Net_2_9_2/contrib/Analyzers/Lucene.Net.Analyzers/BR/BrazilianStemmer.cs Thu Feb 10 21:17:43 2011
@@ -0,0 +1,1264 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * A stemmer for Brazilian words.
+ */
+namespace Lucene.Net.Analysis.BR
+{
+
+    public class BrazilianStemmer
+    {
+
+        /**
+         * Changed term
+         */
+        private string TERM;
+        private string CT;
+        private string R1;
+        private string R2;
+        private string RV;
+
+
+        public BrazilianStemmer()
+        {
+        }
+
+        /**
+         * Stemms the given term to an unique <tt>discriminator</tt>.
+         *
+         * @param term  The term that should be stemmed.
+         * @return      Discriminator for <tt>term</tt>
+         */
+        public string Stem(string term)
+        {
+            bool altered = false; // altered the term
+
+            // creates CT
+            createCT(term);
+
+            if (!isIndexable(CT))
+            {
+                return null;
+            }
+            if (!isStemmable(CT))
+            {
+                return CT;
+            }
+
+            R1 = getR1(CT);
+            R2 = getR1(R1);
+            RV = getRV(CT);
+            TERM = term + ";" + CT;
+
+            altered = step1();
+            if (!altered)
+            {
+                altered = step2();
+            }
+
+            if (altered)
+            {
+                step3();
+            }
+            else
+            {
+                step4();
+            }
+
+            step5();
+
+            return CT;
+        }
+
+        /**
+         * Checks a term if it can be processed correctly.
+         *
+         * @return  true if, and only if, the given term consists in letters.
+         */
+        private bool isStemmable(string term)
+        {
+            for (int c = 0; c < term.Length; c++)
+            {
+                // Discard terms that contain non-letter characters.
+                if (!char.IsLetter(term[c]))
+                {
+                    return false;
+                }
+            }
+            return true;
+        }
+
+        /**
+         * Checks a term if it can be processed indexed.
+         *
+         * @return  true if it can be indexed
+         */
+        private bool isIndexable(string term)
+        {
+            return (term.Length < 30) && (term.Length > 2);
+        }
+
+        /**
+         * See if string is 'a','e','i','o','u'
+       *
+       * @return true if is vowel
+         */
+        private bool isVowel(char value)
+        {
+            return (value == 'a') ||
+                   (value == 'e') ||
+                   (value == 'i') ||
+                   (value == 'o') ||
+                   (value == 'u');
+        }
+
+        /**
+         * Gets R1
+       *
+       * R1 - is the region after the first non-vowel follwing a vowel,
+       *      or is the null region at the end of the word if there is
+       *      no such non-vowel.
+       *
+       * @return null or a string representing R1
+         */
+        private string getR1(string value)
+        {
+            int i;
+            int j;
+
+            // be-safe !!!
+            if (value == null)
+            {
+                return null;
+            }
+
+            // find 1st vowel
+            i = value.Length - 1;
+            for (j = 0; j < i; j++)
+            {
+                if (isVowel(value[j]))
+                {
+                    break;
+                }
+            }
+
+            if (!(j < i))
+            {
+                return null;
+            }
+
+            // find 1st non-vowel
+            for (; j < i; j++)
+            {
+                if (!(isVowel(value[j])))
+                {
+                    break;
+                }
+            }
+
+            if (!(j < i))
+            {
+                return null;
+            }
+
+            return value.Substring(j + 1);
+        }
+
+        /**
+         * Gets RV
+       *
+       * RV - IF the second letter is a consoant, RV is the region after
+       *      the next following vowel,
+       *
+       *      OR if the first two letters are vowels, RV is the region
+       *      after the next consoant,
+       *
+       *      AND otherwise (consoant-vowel case) RV is the region after
+       *      the third letter.
+       *
+       *      BUT RV is the end of the word if this positions cannot be
+       *      found.
+       *
+       * @return null or a string representing RV
+         */
+        private string getRV(string value)
+        {
+            int i;
+            int j;
+
+            // be-safe !!!
+            if (value == null)
+            {
+                return null;
+            }
+
+            i = value.Length - 1;
+
+            // RV - IF the second letter is a consoant, RV is the region after
+            //      the next following vowel,
+            if ((i > 0) && !isVowel(value[1]))
+            {
+                // find 1st vowel
+                for (j = 2; j < i; j++)
+                {
+                    if (isVowel(value[j]))
+                    {
+                        break;
+                    }
+                }
+
+                if (j < i)
+                {
+                    return value.Substring(j + 1);
+                }
+            }
+
+
+            // RV - OR if the first two letters are vowels, RV is the region
+            //      after the next consoant,
+            if ((i > 1) &&
+                isVowel(value[0]) &&
+                isVowel(value[1]))
+            {
+                // find 1st consoant
+                for (j = 2; j < i; j++)
+                {
+                    if (!isVowel(value[j]))
+                    {
+                        break;
+                    }
+                }
+
+                if (j < i)
+                {
+                    return value.Substring(j + 1);
+                }
+            }
+
+            // RV - AND otherwise (consoant-vowel case) RV is the region after
+            //      the third letter.
+            if (i > 2)
+            {
+                return value.Substring(3);
+            }
+
+            return null;
+        }
+
+        /**
+       * 1) Turn to lowercase
+       * 2) Remove accents
+       * 3) ã -> a ; õ -> o
+       * 4) ç -> c
+       *
+       * @return null or a string transformed
+         */
+        private string changeTerm(string value)
+        {
+            int j;
+            string r = "";
+
+            // be-safe !!!
+            if (value == null)
+            {
+                return null;
+            }
+
+            value = value.ToLower();
+            for (j = 0; j < value.Length; j++)
+            {
+                if ((value[j] == 'á') ||
+                    (value[j] == 'â') ||
+                    (value[j] == 'ã'))
+                {
+                    r = r + "a"; continue;
+                }
+                if ((value[j] == 'é') ||
+                    (value[j] == 'ê'))
+                {
+                    r = r + "e"; continue;
+                }
+                if (value[j] == 'í')
+                {
+                    r = r + "i"; continue;
+                }
+                if ((value[j] == 'ó') ||
+                    (value[j] == 'ô') ||
+                    (value[j] == 'õ'))
+                {
+                    r = r + "o"; continue;
+                }
+                if ((value[j] == 'ú') ||
+                    (value[j] == 'ü'))
+                {
+                    r = r + "u"; continue;
+                }
+                if (value[j] == 'ç')
+                {
+                    r = r + "c"; continue;
+                }
+                if (value[j] == 'ñ')
+                {
+                    r = r + "n"; continue;
+                }
+
+                r = r + value[j];
+            }
+
+            return r;
+        }
+
+        /**
+       * Check if a string ends with a suffix
+       *
+       * @return true if the string ends with the specified suffix
+         */
+        private bool suffix(string value, string suffix)
+        {
+
+            // be-safe !!!
+            if ((value == null) || (suffix == null))
+            {
+                return false;
+            }
+
+            if (suffix.Length > value.Length)
+            {
+                return false;
+            }
+
+            return value.Substring(value.Length - suffix.Length).Equals(suffix);
+        }
+
+        /**
+       * Replace a string suffix by another
+       *
+       * @return the replaced string
+         */
+        private string replaceSuffix(string value, string toReplace, string changeTo)
+        {
+            string vvalue;
+
+            // be-safe !!!
+            if ((value == null) ||
+                (toReplace == null) ||
+                (changeTo == null))
+            {
+                return value;
+            }
+
+            vvalue = removeSuffix(value, toReplace);
+
+            if (value.Equals(vvalue))
+            {
+                return value;
+            }
+            else
+            {
+                return vvalue + changeTo;
+            }
+        }
+
+        /**
+       * Remove a string suffix
+       *
+       * @return the string without the suffix
+         */
+        private string removeSuffix(string value, string toRemove)
+        {
+            // be-safe !!!
+            if ((value == null) ||
+                (toRemove == null) ||
+                !suffix(value, toRemove))
+            {
+                return value;
+            }
+
+            return value.Substring(0, value.Length - toRemove.Length);
+        }
+
+        /**
+       * See if a suffix is preceded by a string
+       *
+       * @return true if the suffix is preceded
+         */
+        private bool suffixPreceded(string value, string _suffix, string preceded)
+        {
+            // be-safe !!!
+            if ((value == null) ||
+                (_suffix == null) ||
+                (preceded == null) ||
+                !suffix(value, _suffix))
+            {
+                return false;
+            }
+
+            return suffix(removeSuffix(value, _suffix), preceded);
+        }
+
+
+
+
+        /**
+         * Creates CT (changed term) , substituting * 'ã' and 'õ' for 'a~' and 'o~'.
+         */
+        private void createCT(string term)
+        {
+            CT = changeTerm(term);
+
+            if (CT.Length < 2) return;
+
+            // if the first character is ... , remove it
+            if ((CT[0] == '"') ||
+                (CT[0] == '\'') ||
+                (CT[0] == '-') ||
+                (CT[0] == ',') ||
+                (CT[0] == ';') ||
+                (CT[0] == '.') ||
+                (CT[0] == '?') ||
+                (CT[0] == '!')
+                )
+            {
+                CT = CT.Substring(1);
+            }
+
+            if (CT.Length < 2) return;
+
+            // if the last character is ... , remove it
+            if ((CT[CT.Length - 1] == '-') ||
+                (CT[CT.Length - 1] == ',') ||
+                (CT[CT.Length - 1] == ';') ||
+                (CT[CT.Length - 1] == '.') ||
+                (CT[CT.Length - 1] == '?') ||
+                (CT[CT.Length - 1] == '!') ||
+                (CT[CT.Length - 1] == '\'') ||
+                (CT[CT.Length - 1] == '"')
+                )
+            {
+                CT = CT.Substring(0, CT.Length - 1);
+            }
+        }
+
+
+        /**
+         * Standart suffix removal.
+       * Search for the longest among the following suffixes, and perform
+       * the following actions:
+       *
+       * @return false if no ending was removed
+         */
+        private bool step1()
+        {
+            if (CT == null) return false;
+
+            // suffix lenght = 7
+            if (suffix(CT, "uciones") && suffix(R2, "uciones"))
+            {
+                CT = replaceSuffix(CT, "uciones", "u"); return true;
+            }
+
+            // suffix lenght = 6
+            if (CT.Length >= 6)
+            {
+                if (suffix(CT, "imentos") && suffix(R2, "imentos"))
+                {
+                    CT = removeSuffix(CT, "imentos"); return true;
+                }
+                if (suffix(CT, "amentos") && suffix(R2, "amentos"))
+                {
+                    CT = removeSuffix(CT, "amentos"); return true;
+                }
+                if (suffix(CT, "adores") && suffix(R2, "adores"))
+                {
+                    CT = removeSuffix(CT, "adores"); return true;
+                }
+                if (suffix(CT, "adoras") && suffix(R2, "adoras"))
+                {
+                    CT = removeSuffix(CT, "adoras"); return true;
+                }
+                if (suffix(CT, "logias") && suffix(R2, "logias"))
+                {
+                    replaceSuffix(CT, "logias", "log"); return true;
+                }
+                if (suffix(CT, "encias") && suffix(R2, "encias"))
+                {
+                    CT = replaceSuffix(CT, "encias", "ente"); return true;
+                }
+                if (suffix(CT, "amente") && suffix(R1, "amente"))
+                {
+                    CT = removeSuffix(CT, "amente"); return true;
+                }
+                if (suffix(CT, "idades") && suffix(R2, "idades"))
+                {
+                    CT = removeSuffix(CT, "idades"); return true;
+                }
+            }
+
+            // suffix lenght = 5
+            if (CT.Length >= 5)
+            {
+                if (suffix(CT, "acoes") && suffix(R2, "acoes"))
+                {
+                    CT = removeSuffix(CT, "acoes"); return true;
+                }
+                if (suffix(CT, "imento") && suffix(R2, "imento"))
+                {
+                    CT = removeSuffix(CT, "imento"); return true;
+                }
+                if (suffix(CT, "amento") && suffix(R2, "amento"))
+                {
+                    CT = removeSuffix(CT, "amento"); return true;
+                }
+                if (suffix(CT, "adora") && suffix(R2, "adora"))
+                {
+                    CT = removeSuffix(CT, "adora"); return true;
+                }
+                if (suffix(CT, "ismos") && suffix(R2, "ismos"))
+                {
+                    CT = removeSuffix(CT, "ismos"); return true;
+                }
+                if (suffix(CT, "istas") && suffix(R2, "istas"))
+                {
+                    CT = removeSuffix(CT, "istas"); return true;
+                }
+                if (suffix(CT, "logia") && suffix(R2, "logia"))
+                {
+                    CT = replaceSuffix(CT, "logia", "log"); return true;
+                }
+                if (suffix(CT, "ucion") && suffix(R2, "ucion"))
+                {
+                    CT = replaceSuffix(CT, "ucion", "u"); return true;
+                }
+                if (suffix(CT, "encia") && suffix(R2, "encia"))
+                {
+                    CT = replaceSuffix(CT, "encia", "ente"); return true;
+                }
+                if (suffix(CT, "mente") && suffix(R2, "mente"))
+                {
+                    CT = removeSuffix(CT, "mente"); return true;
+                }
+                if (suffix(CT, "idade") && suffix(R2, "idade"))
+                {
+                    CT = removeSuffix(CT, "idade"); return true;
+                }
+            }
+
+            // suffix lenght = 4
+            if (CT.Length >= 4)
+            {
+                if (suffix(CT, "acao") && suffix(R2, "acao"))
+                {
+                    CT = removeSuffix(CT, "acao"); return true;
+                }
+                if (suffix(CT, "ezas") && suffix(R2, "ezas"))
+                {
+                    CT = removeSuffix(CT, "ezas"); return true;
+                }
+                if (suffix(CT, "icos") && suffix(R2, "icos"))
+                {
+                    CT = removeSuffix(CT, "icos"); return true;
+                }
+                if (suffix(CT, "icas") && suffix(R2, "icas"))
+                {
+                    CT = removeSuffix(CT, "icas"); return true;
+                }
+                if (suffix(CT, "ismo") && suffix(R2, "ismo"))
+                {
+                    CT = removeSuffix(CT, "ismo"); return true;
+                }
+                if (suffix(CT, "avel") && suffix(R2, "avel"))
+                {
+                    CT = removeSuffix(CT, "avel"); return true;
+                }
+                if (suffix(CT, "ivel") && suffix(R2, "ivel"))
+                {
+                    CT = removeSuffix(CT, "ivel"); return true;
+                }
+                if (suffix(CT, "ista") && suffix(R2, "ista"))
+                {
+                    CT = removeSuffix(CT, "ista"); return true;
+                }
+                if (suffix(CT, "osos") && suffix(R2, "osos"))
+                {
+                    CT = removeSuffix(CT, "osos"); return true;
+                }
+                if (suffix(CT, "osas") && suffix(R2, "osas"))
+                {
+                    CT = removeSuffix(CT, "osas"); return true;
+                }
+                if (suffix(CT, "ador") && suffix(R2, "ador"))
+                {
+                    CT = removeSuffix(CT, "ador"); return true;
+                }
+                if (suffix(CT, "ivas") && suffix(R2, "ivas"))
+                {
+                    CT = removeSuffix(CT, "ivas"); return true;
+                }
+                if (suffix(CT, "ivos") && suffix(R2, "ivos"))
+                {
+                    CT = removeSuffix(CT, "ivos"); return true;
+                }
+                if (suffix(CT, "iras") &&
+                    suffix(RV, "iras") &&
+                    suffixPreceded(CT, "iras", "e"))
+                {
+                    CT = replaceSuffix(CT, "iras", "ir"); return true;
+                }
+            }
+
+            // suffix lenght = 3
+            if (CT.Length >= 3)
+            {
+                if (suffix(CT, "eza") && suffix(R2, "eza"))
+                {
+                    CT = removeSuffix(CT, "eza"); return true;
+                }
+                if (suffix(CT, "ico") && suffix(R2, "ico"))
+                {
+                    CT = removeSuffix(CT, "ico"); return true;
+                }
+                if (suffix(CT, "ica") && suffix(R2, "ica"))
+                {
+                    CT = removeSuffix(CT, "ica"); return true;
+                }
+                if (suffix(CT, "oso") && suffix(R2, "oso"))
+                {
+                    CT = removeSuffix(CT, "oso"); return true;
+                }
+                if (suffix(CT, "osa") && suffix(R2, "osa"))
+                {
+                    CT = removeSuffix(CT, "osa"); return true;
+                }
+                if (suffix(CT, "iva") && suffix(R2, "iva"))
+                {
+                    CT = removeSuffix(CT, "iva"); return true;
+                }
+                if (suffix(CT, "ivo") && suffix(R2, "ivo"))
+                {
+                    CT = removeSuffix(CT, "ivo"); return true;
+                }
+                if (suffix(CT, "ira") &&
+                    suffix(RV, "ira") &&
+                    suffixPreceded(CT, "ira", "e"))
+                {
+                    CT = replaceSuffix(CT, "ira", "ir"); return true;
+                }
+            }
+
+            // no ending was removed by step1
+            return false;
+        }
+
+
+        /**
+         * Verb suffixes.
+       *
+       * Search for the longest among the following suffixes in RV,
+       * and if found, delete.
+       *
+       * @return false if no ending was removed
+        */
+        private bool step2()
+        {
+            if (RV == null) return false;
+
+            // suffix lenght = 7
+            if (RV.Length >= 7)
+            {
+                if (suffix(RV, "issemos"))
+                {
+                    CT = removeSuffix(CT, "issemos"); return true;
+                }
+                if (suffix(RV, "essemos"))
+                {
+                    CT = removeSuffix(CT, "essemos"); return true;
+                }
+                if (suffix(RV, "assemos"))
+                {
+                    CT = removeSuffix(CT, "assemos"); return true;
+                }
+                if (suffix(RV, "ariamos"))
+                {
+                    CT = removeSuffix(CT, "ariamos"); return true;
+                }
+                if (suffix(RV, "eriamos"))
+                {
+                    CT = removeSuffix(CT, "eriamos"); return true;
+                }
+                if (suffix(RV, "iriamos"))
+                {
+                    CT = removeSuffix(CT, "iriamos"); return true;
+                }
+            }
+
+            // suffix lenght = 6
+            if (RV.Length >= 6)
+            {
+                if (suffix(RV, "iremos"))
+                {
+                    CT = removeSuffix(CT, "iremos"); return true;
+                }
+                if (suffix(RV, "eremos"))
+                {
+                    CT = removeSuffix(CT, "eremos"); return true;
+                }
+                if (suffix(RV, "aremos"))
+                {
+                    CT = removeSuffix(CT, "aremos"); return true;
+                }
+                if (suffix(RV, "avamos"))
+                {
+                    CT = removeSuffix(CT, "avamos"); return true;
+                }
+                if (suffix(RV, "iramos"))
+                {
+                    CT = removeSuffix(CT, "iramos"); return true;
+                }
+                if (suffix(RV, "eramos"))
+                {
+                    CT = removeSuffix(CT, "eramos"); return true;
+                }
+                if (suffix(RV, "aramos"))
+                {
+                    CT = removeSuffix(CT, "aramos"); return true;
+                }
+                if (suffix(RV, "asseis"))
+                {
+                    CT = removeSuffix(CT, "asseis"); return true;
+                }
+                if (suffix(RV, "esseis"))
+                {
+                    CT = removeSuffix(CT, "esseis"); return true;
+                }
+                if (suffix(RV, "isseis"))
+                {
+                    CT = removeSuffix(CT, "isseis"); return true;
+                }
+                if (suffix(RV, "arieis"))
+                {
+                    CT = removeSuffix(CT, "arieis"); return true;
+                }
+                if (suffix(RV, "erieis"))
+                {
+                    CT = removeSuffix(CT, "erieis"); return true;
+                }
+                if (suffix(RV, "irieis"))
+                {
+                    CT = removeSuffix(CT, "irieis"); return true;
+                }
+            }
+
+
+            // suffix lenght = 5
+            if (RV.Length >= 5)
+            {
+                if (suffix(RV, "irmos"))
+                {
+                    CT = removeSuffix(CT, "irmos"); return true;
+                }
+                if (suffix(RV, "iamos"))
+                {
+                    CT = removeSuffix(CT, "iamos"); return true;
+                }
+                if (suffix(RV, "armos"))
+                {
+                    CT = removeSuffix(CT, "armos"); return true;
+                }
+                if (suffix(RV, "ermos"))
+                {
+                    CT = removeSuffix(CT, "ermos"); return true;
+                }
+                if (suffix(RV, "areis"))
+                {
+                    CT = removeSuffix(CT, "areis"); return true;
+                }
+                if (suffix(RV, "ereis"))
+                {
+                    CT = removeSuffix(CT, "ereis"); return true;
+                }
+                if (suffix(RV, "ireis"))
+                {
+                    CT = removeSuffix(CT, "ireis"); return true;
+                }
+                if (suffix(RV, "asses"))
+                {
+                    CT = removeSuffix(CT, "asses"); return true;
+                }
+                if (suffix(RV, "esses"))
+                {
+                    CT = removeSuffix(CT, "esses"); return true;
+                }
+                if (suffix(RV, "isses"))
+                {
+                    CT = removeSuffix(CT, "isses"); return true;
+                }
+                if (suffix(RV, "astes"))
+                {
+                    CT = removeSuffix(CT, "astes"); return true;
+                }
+                if (suffix(RV, "assem"))
+                {
+                    CT = removeSuffix(CT, "assem"); return true;
+                }
+                if (suffix(RV, "essem"))
+                {
+                    CT = removeSuffix(CT, "essem"); return true;
+                }
+                if (suffix(RV, "issem"))
+                {
+                    CT = removeSuffix(CT, "issem"); return true;
+                }
+                if (suffix(RV, "ardes"))
+                {
+                    CT = removeSuffix(CT, "ardes"); return true;
+                }
+                if (suffix(RV, "erdes"))
+                {
+                    CT = removeSuffix(CT, "erdes"); return true;
+                }
+                if (suffix(RV, "irdes"))
+                {
+                    CT = removeSuffix(CT, "irdes"); return true;
+                }
+                if (suffix(RV, "ariam"))
+                {
+                    CT = removeSuffix(CT, "ariam"); return true;
+                }
+                if (suffix(RV, "eriam"))
+                {
+                    CT = removeSuffix(CT, "eriam"); return true;
+                }
+                if (suffix(RV, "iriam"))
+                {
+                    CT = removeSuffix(CT, "iriam"); return true;
+                }
+                if (suffix(RV, "arias"))
+                {
+                    CT = removeSuffix(CT, "arias"); return true;
+                }
+                if (suffix(RV, "erias"))
+                {
+                    CT = removeSuffix(CT, "erias"); return true;
+                }
+                if (suffix(RV, "irias"))
+                {
+                    CT = removeSuffix(CT, "irias"); return true;
+                }
+                if (suffix(RV, "estes"))
+                {
+                    CT = removeSuffix(CT, "estes"); return true;
+                }
+                if (suffix(RV, "istes"))
+                {
+                    CT = removeSuffix(CT, "istes"); return true;
+                }
+                if (suffix(RV, "areis"))
+                {
+                    CT = removeSuffix(CT, "areis"); return true;
+                }
+                if (suffix(RV, "aveis"))
+                {
+                    CT = removeSuffix(CT, "aveis"); return true;
+                }
+            }
+
+            // suffix lenght = 4
+            if (RV.Length >= 4)
+            {
+                if (suffix(RV, "aria"))
+                {
+                    CT = removeSuffix(CT, "aria"); return true;
+                }
+                if (suffix(RV, "eria"))
+                {
+                    CT = removeSuffix(CT, "eria"); return true;
+                }
+                if (suffix(RV, "iria"))
+                {
+                    CT = removeSuffix(CT, "iria"); return true;
+                }
+                if (suffix(RV, "asse"))
+                {
+                    CT = removeSuffix(CT, "asse"); return true;
+                }
+                if (suffix(RV, "esse"))
+                {
+                    CT = removeSuffix(CT, "esse"); return true;
+                }
+                if (suffix(RV, "isse"))
+                {
+                    CT = removeSuffix(CT, "isse"); return true;
+                }
+                if (suffix(RV, "aste"))
+                {
+                    CT = removeSuffix(CT, "aste"); return true;
+                }
+                if (suffix(RV, "este"))
+                {
+                    CT = removeSuffix(CT, "este"); return true;
+                }
+                if (suffix(RV, "iste"))
+                {
+                    CT = removeSuffix(CT, "iste"); return true;
+                }
+                if (suffix(RV, "arei"))
+                {
+                    CT = removeSuffix(CT, "arei"); return true;
+                }
+                if (suffix(RV, "erei"))
+                {
+                    CT = removeSuffix(CT, "erei"); return true;
+                }
+                if (suffix(RV, "irei"))
+                {
+                    CT = removeSuffix(CT, "irei"); return true;
+                }
+                if (suffix(RV, "aram"))
+                {
+                    CT = removeSuffix(CT, "aram"); return true;
+                }
+                if (suffix(RV, "eram"))
+                {
+                    CT = removeSuffix(CT, "eram"); return true;
+                }
+                if (suffix(RV, "iram"))
+                {
+                    CT = removeSuffix(CT, "iram"); return true;
+                }
+                if (suffix(RV, "avam"))
+                {
+                    CT = removeSuffix(CT, "avam"); return true;
+                }
+                if (suffix(RV, "arem"))
+                {
+                    CT = removeSuffix(CT, "arem"); return true;
+                }
+                if (suffix(RV, "erem"))
+                {
+                    CT = removeSuffix(CT, "erem"); return true;
+                }
+                if (suffix(RV, "irem"))
+                {
+                    CT = removeSuffix(CT, "irem"); return true;
+                }
+                if (suffix(RV, "ando"))
+                {
+                    CT = removeSuffix(CT, "ando"); return true;
+                }
+                if (suffix(RV, "endo"))
+                {
+                    CT = removeSuffix(CT, "endo"); return true;
+                }
+                if (suffix(RV, "indo"))
+                {
+                    CT = removeSuffix(CT, "indo"); return true;
+                }
+                if (suffix(RV, "arao"))
+                {
+                    CT = removeSuffix(CT, "arao"); return true;
+                }
+                if (suffix(RV, "erao"))
+                {
+                    CT = removeSuffix(CT, "erao"); return true;
+                }
+                if (suffix(RV, "irao"))
+                {
+                    CT = removeSuffix(CT, "irao"); return true;
+                }
+                if (suffix(RV, "adas"))
+                {
+                    CT = removeSuffix(CT, "adas"); return true;
+                }
+                if (suffix(RV, "idas"))
+                {
+                    CT = removeSuffix(CT, "idas"); return true;
+                }
+                if (suffix(RV, "aras"))
+                {
+                    CT = removeSuffix(CT, "aras"); return true;
+                }
+                if (suffix(RV, "eras"))
+                {
+                    CT = removeSuffix(CT, "eras"); return true;
+                }
+                if (suffix(RV, "iras"))
+                {
+                    CT = removeSuffix(CT, "iras"); return true;
+                }
+                if (suffix(RV, "avas"))
+                {
+                    CT = removeSuffix(CT, "avas"); return true;
+                }
+                if (suffix(RV, "ares"))
+                {
+                    CT = removeSuffix(CT, "ares"); return true;
+                }
+                if (suffix(RV, "eres"))
+                {
+                    CT = removeSuffix(CT, "eres"); return true;
+                }
+                if (suffix(RV, "ires"))
+                {
+                    CT = removeSuffix(CT, "ires"); return true;
+                }
+                if (suffix(RV, "ados"))
+                {
+                    CT = removeSuffix(CT, "ados"); return true;
+                }
+                if (suffix(RV, "idos"))
+                {
+                    CT = removeSuffix(CT, "idos"); return true;
+                }
+                if (suffix(RV, "amos"))
+                {
+                    CT = removeSuffix(CT, "amos"); return true;
+                }
+                if (suffix(RV, "emos"))
+                {
+                    CT = removeSuffix(CT, "emos"); return true;
+                }
+                if (suffix(RV, "imos"))
+                {
+                    CT = removeSuffix(CT, "imos"); return true;
+                }
+                if (suffix(RV, "iras"))
+                {
+                    CT = removeSuffix(CT, "iras"); return true;
+                }
+                if (suffix(RV, "ieis"))
+                {
+                    CT = removeSuffix(CT, "ieis"); return true;
+                }
+            }
+
+            // suffix lenght = 3
+            if (RV.Length >= 3)
+            {
+                if (suffix(RV, "ada"))
+                {
+                    CT = removeSuffix(CT, "ada"); return true;
+                }
+                if (suffix(RV, "ida"))
+                {
+                    CT = removeSuffix(CT, "ida"); return true;
+                }
+                if (suffix(RV, "ara"))
+                {
+                    CT = removeSuffix(CT, "ara"); return true;
+                }
+                if (suffix(RV, "era"))
+                {
+                    CT = removeSuffix(CT, "era"); return true;
+                }
+                if (suffix(RV, "ira"))
+                {
+                    CT = removeSuffix(CT, "ava"); return true;
+                }
+                if (suffix(RV, "iam"))
+                {
+                    CT = removeSuffix(CT, "iam"); return true;
+                }
+                if (suffix(RV, "ado"))
+                {
+                    CT = removeSuffix(CT, "ado"); return true;
+                }
+                if (suffix(RV, "ido"))
+                {
+                    CT = removeSuffix(CT, "ido"); return true;
+                }
+                if (suffix(RV, "ias"))
+                {
+                    CT = removeSuffix(CT, "ias"); return true;
+                }
+                if (suffix(RV, "ais"))
+                {
+                    CT = removeSuffix(CT, "ais"); return true;
+                }
+                if (suffix(RV, "eis"))
+                {
+                    CT = removeSuffix(CT, "eis"); return true;
+                }
+                if (suffix(RV, "ira"))
+                {
+                    CT = removeSuffix(CT, "ira"); return true;
+                }
+                if (suffix(RV, "ear"))
+                {
+                    CT = removeSuffix(CT, "ear"); return true;
+                }
+            }
+
+            // suffix lenght = 2
+            if (RV.Length >= 2)
+            {
+                if (suffix(RV, "ia"))
+                {
+                    CT = removeSuffix(CT, "ia"); return true;
+                }
+                if (suffix(RV, "ei"))
+                {
+                    CT = removeSuffix(CT, "ei"); return true;
+                }
+                if (suffix(RV, "am"))
+                {
+                    CT = removeSuffix(CT, "am"); return true;
+                }
+                if (suffix(RV, "em"))
+                {
+                    CT = removeSuffix(CT, "em"); return true;
+                }
+                if (suffix(RV, "ar"))
+                {
+                    CT = removeSuffix(CT, "ar"); return true;
+                }
+                if (suffix(RV, "er"))
+                {
+                    CT = removeSuffix(CT, "er"); return true;
+                }
+                if (suffix(RV, "ir"))
+                {
+                    CT = removeSuffix(CT, "ir"); return true;
+                }
+                if (suffix(RV, "as"))
+                {
+                    CT = removeSuffix(CT, "as"); return true;
+                }
+                if (suffix(RV, "es"))
+                {
+                    CT = removeSuffix(CT, "es"); return true;
+                }
+                if (suffix(RV, "is"))
+                {
+                    CT = removeSuffix(CT, "is"); return true;
+                }
+                if (suffix(RV, "eu"))
+                {
+                    CT = removeSuffix(CT, "eu"); return true;
+                }
+                if (suffix(RV, "iu"))
+                {
+                    CT = removeSuffix(CT, "iu"); return true;
+                }
+                if (suffix(RV, "iu"))
+                {
+                    CT = removeSuffix(CT, "iu"); return true;
+                }
+                if (suffix(RV, "ou"))
+                {
+                    CT = removeSuffix(CT, "ou"); return true;
+                }
+            }
+
+            // no ending was removed by step2
+            return false;
+        }
+
+        /**
+         * Delete suffix 'i' if in RV and preceded by 'c'
+       *
+        */
+        private void step3()
+        {
+            if (RV == null) return;
+
+            if (suffix(RV, "i") && suffixPreceded(RV, "i", "c"))
+            {
+                CT = removeSuffix(CT, "i");
+            }
+
+        }
+
+        /**
+         * Residual suffix
+       *
+       * If the word ends with one of the suffixes (os a i o á í ó)
+       * in RV, delete it
+       *
+        */
+        private void step4()
+        {
+            if (RV == null) return;
+
+            if (suffix(RV, "os"))
+            {
+                CT = removeSuffix(CT, "os"); return;
+            }
+            if (suffix(RV, "a"))
+            {
+                CT = removeSuffix(CT, "a"); return;
+            }
+            if (suffix(RV, "i"))
+            {
+                CT = removeSuffix(CT, "i"); return;
+            }
+            if (suffix(RV, "o"))
+            {
+                CT = removeSuffix(CT, "o"); return;
+            }
+
+        }
+
+        /**
+         * If the word ends with one of ( e é ê) in RV,delete it,
+       * and if preceded by 'gu' (or 'ci') with the 'u' (or 'i') in RV,
+       * delete the 'u' (or 'i')
+       *
+       * Or if the word ends ç remove the cedilha
+       *
+        */
+        private void step5()
+        {
+            if (RV == null) return;
+
+            if (suffix(RV, "e"))
+            {
+                if (suffixPreceded(RV, "e", "gu"))
+                {
+                    CT = removeSuffix(CT, "e");
+                    CT = removeSuffix(CT, "u");
+                    return;
+                }
+
+                if (suffixPreceded(RV, "e", "ci"))
+                {
+                    CT = removeSuffix(CT, "e");
+                    CT = removeSuffix(CT, "i");
+                    return;
+                }
+
+                CT = removeSuffix(CT, "e"); return;
+            }
+        }
+
+        /**
+         * For log and debug purpose
+         *
+         * @return  TERM, CT, RV, R1 and R2
+         */
+        public string Log()
+        {
+            return " (TERM = " + TERM + ")" +
+                   " (CT = " + CT + ")" +
+                   " (RV = " + RV + ")" +
+                   " (R1 = " + R1 + ")" +
+                   " (R2 = " + R2 + ")";
+        }
+
+    }
+
+}
\ No newline at end of file