You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by si...@apache.org on 2012/06/06 21:46:00 UTC
svn commit: r1347076 [1/9] - in /incubator/lucene.net/trunk:
src/contrib/Analyzers/ src/contrib/Analyzers/Hunspell/
test/contrib/Analyzers/ test/contrib/Analyzers/Hunspell/
test/contrib/Analyzers/Hunspell/Dictionaries/
Author: sisve
Date: Wed Jun 6 19:45:59 2012
New Revision: 1347076
URL: http://svn.apache.org/viewvc?rev=1347076&view=rev
Log:
Support for stemming using hunspell dictionaries.
Added:
incubator/lucene.net/trunk/src/contrib/Analyzers/Hunspell/
incubator/lucene.net/trunk/src/contrib/Analyzers/Hunspell/HunspellAffix.cs (with props)
incubator/lucene.net/trunk/src/contrib/Analyzers/Hunspell/HunspellDictionary.cs (with props)
incubator/lucene.net/trunk/src/contrib/Analyzers/Hunspell/HunspellStem.cs (with props)
incubator/lucene.net/trunk/src/contrib/Analyzers/Hunspell/HunspellStemFilter.cs (with props)
incubator/lucene.net/trunk/src/contrib/Analyzers/Hunspell/HunspellStemmer.cs (with props)
incubator/lucene.net/trunk/src/contrib/Analyzers/Hunspell/HunspellWord.cs (with props)
incubator/lucene.net/trunk/test/contrib/Analyzers/Hunspell/
incubator/lucene.net/trunk/test/contrib/Analyzers/Hunspell/Dictionaries/
incubator/lucene.net/trunk/test/contrib/Analyzers/Hunspell/Dictionaries/en_US.aff (with props)
incubator/lucene.net/trunk/test/contrib/Analyzers/Hunspell/Dictionaries/en_US.dic (with props)
incubator/lucene.net/trunk/test/contrib/Analyzers/Hunspell/Dictionaries/fr-moderne.aff (with props)
incubator/lucene.net/trunk/test/contrib/Analyzers/Hunspell/Dictionaries/fr-moderne.dic (with props)
incubator/lucene.net/trunk/test/contrib/Analyzers/Hunspell/Dictionaries/nl_NL.aff (with props)
incubator/lucene.net/trunk/test/contrib/Analyzers/Hunspell/Dictionaries/nl_NL.dic (with props)
incubator/lucene.net/trunk/test/contrib/Analyzers/Hunspell/HunspellDictionaryLoader.cs (with props)
incubator/lucene.net/trunk/test/contrib/Analyzers/Hunspell/TestHunspellDictionary.cs (with props)
incubator/lucene.net/trunk/test/contrib/Analyzers/Hunspell/TestHunspellStemFilter.cs (with props)
incubator/lucene.net/trunk/test/contrib/Analyzers/Hunspell/TestHunspellStemmer.cs (with props)
Modified:
incubator/lucene.net/trunk/src/contrib/Analyzers/Contrib.Analyzers.csproj
incubator/lucene.net/trunk/test/contrib/Analyzers/Contrib.Analyzers.Test.csproj
Modified: incubator/lucene.net/trunk/src/contrib/Analyzers/Contrib.Analyzers.csproj
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/src/contrib/Analyzers/Contrib.Analyzers.csproj?rev=1347076&r1=1347075&r2=1347076&view=diff
==============================================================================
--- incubator/lucene.net/trunk/src/contrib/Analyzers/Contrib.Analyzers.csproj (original)
+++ incubator/lucene.net/trunk/src/contrib/Analyzers/Contrib.Analyzers.csproj Wed Jun 6 19:45:59 2012
@@ -110,6 +110,12 @@
<Compile Include="Fr\FrenchAnalyzer.cs" />
<Compile Include="Fr\FrenchStemFilter.cs" />
<Compile Include="Fr\FrenchStemmer.cs" />
+ <Compile Include="Hunspell\HunspellAffix.cs" />
+ <Compile Include="Hunspell\HunspellDictionary.cs" />
+ <Compile Include="Hunspell\HunspellStem.cs" />
+ <Compile Include="Hunspell\HunspellStemFilter.cs" />
+ <Compile Include="Hunspell\HunspellStemmer.cs" />
+ <Compile Include="Hunspell\HunspellWord.cs" />
<Compile Include="Miscellaneous\EmptyTokenStream.cs" />
<Compile Include="Miscellaneous\InjectablePrefixAwareTokenFilter.cs" />
<Compile Include="Miscellaneous\PatternAnalyzer.cs" />
@@ -185,4 +191,4 @@
<Target Name="AfterBuild">
</Target>
-->
-</Project>
+</Project>
\ No newline at end of file
Added: incubator/lucene.net/trunk/src/contrib/Analyzers/Hunspell/HunspellAffix.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/src/contrib/Analyzers/Hunspell/HunspellAffix.cs?rev=1347076&view=auto
==============================================================================
--- incubator/lucene.net/trunk/src/contrib/Analyzers/Hunspell/HunspellAffix.cs (added)
+++ incubator/lucene.net/trunk/src/contrib/Analyzers/Hunspell/HunspellAffix.cs Wed Jun 6 19:45:59 2012
@@ -0,0 +1,90 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Diagnostics;
+using System.Text.RegularExpressions;
+
+namespace Lucene.Net.Analysis.Hunspell {
+ /// <summary>
+ /// Wrapper class representing a hunspell affix.
+ /// </summary>
+ [DebuggerDisplay("{Condition}")]
+ public class HunspellAffix {
+ private String _condition;
+ private Regex _conditionPattern;
+
+ /// <summary>
+ /// The append defined for the affix.
+ /// </summary>
+ public String Append { get; set; }
+
+ /// <summary>
+ /// The flags defined for the affix append.
+ /// </summary>
+ public Char[] AppendFlags { get; set; }
+
+ /// <summary>
+ /// The condition that must be met before the affix can be applied.
+ /// </summary>
+ public String Condition {
+ get { return _condition; }
+ }
+
+ /// <summary>
+ /// The affix flag.
+ /// </summary>
+ public Char Flag { get; set; }
+
+ /// <summary>
+ /// Whether the affix is defined as cross product.
+ /// </summary>
+ public Boolean IsCrossProduct { get; set; }
+
+ /// <summary>
+ /// The stripping characters defined for the affix.
+ /// </summary>
+ public String Strip { get; set; }
+
+ /// <summary>
+ /// Checks whether the String defined by the provided char array, offset
+ /// and length, meets the condition of this affix.
+ /// </summary>
+ /// <returns>
+ /// <c>true</c> if the String meets the condition, <c>false</c> otherwise.
+ /// </returns>
+ public Boolean CheckCondition(String text) {
+ if (text == null)
+ throw new ArgumentNullException("text");
+
+ return _conditionPattern.IsMatch(text);
+ }
+
+ /// <summary>
+ /// Sets the condition that must be met before the affix can be applied.
+ /// </summary>
+ /// <param name="condition">Condition to be met before affix application.</param>
+ /// <param name="pattern">Condition as a regular expression pattern.</param>
+ public void SetCondition(String condition, String pattern) {
+ if (condition == null) throw new ArgumentNullException("condition");
+ if (pattern == null) throw new ArgumentNullException("pattern");
+
+ _condition = condition;
+ _conditionPattern = new Regex(pattern);
+ }
+ }
+}
\ No newline at end of file
Propchange: incubator/lucene.net/trunk/src/contrib/Analyzers/Hunspell/HunspellAffix.cs
------------------------------------------------------------------------------
svn:eol-style = native
Added: incubator/lucene.net/trunk/src/contrib/Analyzers/Hunspell/HunspellDictionary.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/src/contrib/Analyzers/Hunspell/HunspellDictionary.cs?rev=1347076&view=auto
==============================================================================
--- incubator/lucene.net/trunk/src/contrib/Analyzers/Hunspell/HunspellDictionary.cs (added)
+++ incubator/lucene.net/trunk/src/contrib/Analyzers/Hunspell/HunspellDictionary.cs Wed Jun 6 19:45:59 2012
@@ -0,0 +1,428 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+using System.Globalization;
+using System.IO;
+using System.Text;
+using System.Text.RegularExpressions;
+
+namespace Lucene.Net.Analysis.Hunspell {
+ public class HunspellDictionary {
+ private static readonly HunspellWord NoFlags = new HunspellWord();
+
+ private static readonly String PREFIX_KEY = "PFX";
+ private static readonly String SUFFIX_KEY = "SFX";
+ private static readonly String FLAG_KEY = "FLAG";
+ private static readonly String AF_KEY = "AF";
+
+ private static readonly String NUM_FLAG_TYPE = "num";
+ private static readonly String UTF8_FLAG_TYPE = "UTF-8";
+ private static readonly String LONG_FLAG_TYPE = "long";
+
+ private static readonly String PREFIX_CONDITION_REGEX_PATTERN = @"^{0}";
+ private static readonly String SUFFIX_CONDITION_REGEX_PATTERN = @"{0}$";
+
+ private readonly Dictionary<String, List<HunspellAffix>> _prefixes = new Dictionary<String, List<HunspellAffix>>();
+ private readonly Dictionary<String, List<HunspellAffix>> _suffixes = new Dictionary<String, List<HunspellAffix>>();
+ private readonly Dictionary<String, List<HunspellWord>> _words = new Dictionary<String, List<HunspellWord>>();
+ private readonly Dictionary<String, Char[]> _aliases = new Dictionary<String, Char[]>();
+ private FlagParsingStrategy _flagParsingStrategy = new SimpleFlagParsingStrategy(); // Default flag parsing strategy
+
+ /// <summary>
+ /// Creates a new HunspellDictionary containing the information read from the provided streams to hunspell affix and dictionary file.
+ /// </summary>
+ /// <param name = "affix">Stream for reading the hunspell affix file.</param>
+ /// <param name = "dictionary">Stream for reading the hunspell dictionary file.</param>
+ /// <exception cref = "IOException">Can be thrown while reading from the streams.</exception>
+ /// <exception cref = "InvalidDataException">Can be thrown if the content of the files does not meet expected formats.</exception>
+ public HunspellDictionary(Stream affix, Stream dictionary)
+ : this(affix, new[] { dictionary }) {
+ }
+
+ /// <summary>
+ /// Creates a new HunspellDictionary containing the information read from the provided streams to hunspell affix and dictionary files.
+ /// </summary>
+ /// <param name = "affix">Stream for reading the hunspell affix file.</param>
+ /// <param name = "dictionaries">Streams for reading the hunspell dictionary file.</param>
+ /// <exception cref = "IOException">Can be thrown while reading from the streams.</exception>
+ /// <exception cref = "InvalidDataException">Can be thrown if the content of the files does not meet expected formats.</exception>
+ public HunspellDictionary(Stream affix, IEnumerable<Stream> dictionaries) {
+ if (affix == null) throw new ArgumentNullException("affix");
+ if (dictionaries == null) throw new ArgumentNullException("dictionaries");
+
+ var encodingName = ReadDictionaryEncoding(affix);
+ var encoding = Encoding.GetEncoding(encodingName);
+
+ ReadAffixFile(affix, encoding);
+ foreach (var dictionary in dictionaries)
+ ReadDictionaryFile(dictionary, encoding);
+ }
+
+ /// <summary>
+ /// Looks up HunspellWords that match the String created from the given char array, offset and length.
+ /// </summary>
+ public IEnumerable<HunspellWord> LookupWord(String word) {
+ if (word == null) throw new ArgumentNullException("word");
+
+ List<HunspellWord> list;
+ if (_words.TryGetValue(word, out list))
+ return list;
+
+ return null;
+ }
+
+ /// <summary>
+ /// Looks up HunspellAffix prefixes that have an append that matches the String created from the given char array, offset and length.
+ /// </summary>
+ /// <param name="word">Char array to generate the String from.</param>
+ /// <param name="offset">Offset in the char array that the String starts at.</param>
+ /// <param name="length">Length from the offset that the String is.</param>
+ /// <returns>List of HunspellAffix prefixes with an append that matches the String, or <c>null</c> if none are found.</returns>
+ public IEnumerable<HunspellAffix> LookupPrefix(char[] word, int offset, int length) {
+ if (word == null) throw new ArgumentNullException("word");
+ var key = new String(word, offset, length);
+
+ List<HunspellAffix> list;
+ if (_prefixes.TryGetValue(key, out list))
+ return list;
+
+ return null;
+ }
+
+ /// <summary>
+ /// Looks up HunspellAffix suffixes that have an append that matches the String created from the given char array, offset and length.
+ /// </summary>
+ /// <param name="word">Char array to generate the String from.</param>
+ /// <param name="offset">Offset in the char array that the String starts at.</param>
+ /// <param name="length">Length from the offset that the String is.</param>
+ /// <returns>List of HunspellAffix suffixes with an append that matches the String, or <c>null</c> if none are found</returns>
+ public IEnumerable<HunspellAffix> LookupSuffix(char[] word, int offset, int length) {
+ if (word == null) throw new ArgumentNullException("word");
+ var key = new String(word, offset, length);
+
+ List<HunspellAffix> list;
+ if (_suffixes.TryGetValue(key, out list))
+ return list;
+
+ return null;
+ }
+
+ /// <summary>
+ /// Reads the affix file through the provided Stream, building up the prefix and suffix maps.
+ /// </summary>
+ /// <param name="affixStream">Stream to read the content of the affix file from.</param>
+ /// <param name="encoding">Encoding to decode the content of the file.</param>
+ /// <exception cref="IOException">IOException Can be thrown while reading from the Stream.</exception>
+ private void ReadAffixFile(Stream affixStream, Encoding encoding) {
+ if (affixStream == null) throw new ArgumentNullException("affixStream");
+ if (encoding == null) throw new ArgumentNullException("encoding");
+
+ using (var reader = new StreamReader(affixStream, encoding)) {
+ String line;
+ while ((line = reader.ReadLine()) != null) {
+ if (line.StartsWith(PREFIX_KEY)) {
+ ParseAffix(_prefixes, line, reader, PREFIX_CONDITION_REGEX_PATTERN);
+ } else if (line.StartsWith(SUFFIX_KEY)) {
+ ParseAffix(_suffixes, line, reader, SUFFIX_CONDITION_REGEX_PATTERN);
+ } else if (line.StartsWith(FLAG_KEY)) {
+ // Assume that the FLAG line comes before any prefix or suffixes
+ // Store the strategy so it can be used when parsing the dic file
+ _flagParsingStrategy = GetFlagParsingStrategy(line);
+ } else if (line.StartsWith(AF_KEY)) {
+ // Parse Alias Flag
+ ParseAliasFlag(line, reader);
+ }
+ }
+ }
+ }
+
+ /// <summary>
+ /// Parse alias flag and put it in hash
+ /// </summary>
+ /// <param name="line"></param>
+ /// <param name="reader"></param>
+ private void ParseAliasFlag(String line, TextReader reader) {
+ if (reader == null) throw new ArgumentNullException("reader");
+ var args = Regex.Split(line, "\\s+");
+ var numLines = Int32.Parse(args[1]);
+
+ for (var i = 0; i < numLines; i++) {
+ line = reader.ReadLine();
+ var ruleArgs = Regex.Split(line, "\\s+");
+
+ if (ruleArgs[0] != "AF")
+ throw new Exception("File corrupted, should be AF directive : " + line);
+
+ var appendFlags = _flagParsingStrategy.ParseFlags(ruleArgs[1]);
+ _aliases.Add((i+1).ToString(CultureInfo.InvariantCulture), appendFlags);
+ }
+ }
+
+ /// <summary>
+ /// Parses a specific affix rule putting the result into the provided affix map.
+ /// </summary>
+ /// <param name="affixes">Map where the result of the parsing will be put.</param>
+ /// <param name="header">Header line of the affix rule.</param>
+ /// <param name="reader">TextReader to read the content of the rule from.</param>
+ /// <param name="conditionPattern">Pattern to be used to generate the condition regex pattern.</param>
+ private void ParseAffix(Dictionary<String, List<HunspellAffix>> affixes, String header, TextReader reader, String conditionPattern) {
+ if (affixes == null) throw new ArgumentNullException("affixes");
+ if (header == null) throw new ArgumentNullException("header");
+ if (reader == null) throw new ArgumentNullException("reader");
+ if (conditionPattern == null) throw new ArgumentNullException("conditionPattern");
+
+ var args = Regex.Split(header, "\\s+");
+ var crossProduct = args[2].Equals("Y");
+ var numLines = Int32.Parse(args[3]);
+
+ var hasAliases = _aliases.Count > 0;
+ for (var i = 0; i < numLines; i++) {
+ var line = reader.ReadLine();
+ var ruleArgs = Regex.Split(line, "\\s+");
+
+ var affix = new HunspellAffix();
+
+ affix.Flag = _flagParsingStrategy.ParseFlag(ruleArgs[1]);
+ affix.Strip = (ruleArgs[2] == "0") ? "" : ruleArgs[2];
+
+ var affixArg = ruleArgs[3];
+
+ var flagSep = affixArg.LastIndexOf('/');
+ if (flagSep != -1) {
+ var cflag = affixArg.Substring(flagSep + 1);
+ var appendFlags = hasAliases ? _aliases[cflag] : _flagParsingStrategy.ParseFlags(cflag);
+ Array.Sort(appendFlags);
+ affix.AppendFlags = appendFlags;
+ affix.Append = affixArg.Substring(0, flagSep);
+ } else {
+ affix.Append = affixArg;
+ }
+
+ var condition = ruleArgs[4];
+ affix.SetCondition(condition, String.Format(conditionPattern, condition));
+ affix.IsCrossProduct = crossProduct;
+
+ List<HunspellAffix> list;
+ if (!affixes.TryGetValue(affix.Append, out list))
+ affixes.Add(affix.Append, list = new List<HunspellAffix>());
+
+ list.Add(affix);
+ }
+ }
+
+ /// <summary>
+ /// Parses the encoding specificed in the affix file readable through the provided Stream.
+ /// </summary>
+ /// <param name="affix">Stream for reading the affix file.</param>
+ /// <returns>Encoding specified in the affix file.</returns>
+ /// <exception cref="InvalidDataException">
+ /// Thrown if the first non-empty non-comment line read from the file does not
+ /// adhere to the format <c>SET encoding</c>.
+ /// </exception>
+ private static String ReadDictionaryEncoding(Stream affix) {
+ if (affix == null) throw new ArgumentNullException("affix");
+
+ var builder = new StringBuilder();
+ for (; ; ) {
+ builder.Length = 0;
+ int ch;
+ while ((ch = affix.ReadByte()) >= 0) {
+ if (ch == '\n') {
+ break;
+ }
+ if (ch != '\r') {
+ builder.Append((char)ch);
+ }
+ }
+
+ if (builder.Length == 0 ||
+ builder[0] == '#' ||
+ // this test only at the end as ineffective but would allow lines only containing spaces:
+ builder.ToString().Trim().Length == 0
+ ) {
+ if (ch < 0)
+ throw new InvalidDataException("Unexpected end of affix file.");
+
+ continue;
+ }
+
+ if ("SET ".Equals(builder.ToString(0, 4))) {
+ // cleanup the encoding string, too (whitespace)
+ return builder.ToString(4, builder.Length - 4).Trim();
+ }
+
+ throw new InvalidDataException("The first non-comment line in the affix file must " +
+ "be a 'SET charset', was: '" + builder + "'");
+ }
+ }
+
+ /// <summary>
+ /// Determines the appropriate {@link FlagParsingStrategy} based on the FLAG definiton line taken from the affix file.
+ /// </summary>
+ /// <param name="flagLine">Line containing the flag information</param>
+ /// <returns>FlagParsingStrategy that handles parsing flags in the way specified in the FLAG definition.</returns>
+ private static FlagParsingStrategy GetFlagParsingStrategy(String flagLine) {
+ if (flagLine == null) throw new ArgumentNullException("flagLine");
+ var flagType = flagLine.Substring(5);
+
+ if (NUM_FLAG_TYPE.Equals(flagType))
+ return new NumFlagParsingStrategy();
+
+ if (UTF8_FLAG_TYPE.Equals(flagType))
+ return new SimpleFlagParsingStrategy();
+
+ if (LONG_FLAG_TYPE.Equals(flagType))
+ return new DoubleASCIIFlagParsingStrategy();
+
+ throw new ArgumentException("Unknown flag type: " + flagType);
+ }
+
+ /// <summary>
+ /// Reads the dictionary file through the provided Stream, building up the words map.
+ /// </summary>
+ /// <param name="dictionary">Stream to read the dictionary file through.</param>
+ /// <param name="encoding">Encoding used to decode the contents of the file.</param>
+ /// <exception cref="IOException">Can be thrown while reading from the file.</exception>
+ private void ReadDictionaryFile(Stream dictionary, Encoding encoding) {
+ if (dictionary == null) throw new ArgumentNullException("dictionary");
+ if (encoding == null) throw new ArgumentNullException("encoding");
+ var reader = new StreamReader(dictionary, encoding);
+
+ // nocommit, don't create millions of strings.
+ var line = reader.ReadLine(); // first line is number of entries
+ var numEntries = Int32.Parse(line);
+ var hasAliases = _aliases.Count > 0;
+
+ // nocommit, the flags themselves can be double-chars (long) or also numeric
+ // either way the trick is to encode them as char... but they must be parsed differently
+ while ((line = reader.ReadLine()) != null) {
+ String entry;
+ HunspellWord wordForm;
+
+ var flagSep = line.LastIndexOf('/');
+ if (flagSep == -1) {
+ wordForm = NoFlags;
+ entry = line;
+ } else {
+ // note, there can be comments (morph description) after a flag.
+ // we should really look for any whitespace
+ var end = line.IndexOf('\t', flagSep);
+ var cflag = end == -1 ? line.Substring(flagSep + 1) : line.Substring(flagSep + 1, end - flagSep - 1);
+
+ wordForm = new HunspellWord(hasAliases ? _aliases[cflag] : _flagParsingStrategy.ParseFlags(cflag));
+
+ entry = line.Substring(0, flagSep);
+ }
+
+ List<HunspellWord> entries;
+ if (!_words.TryGetValue(entry, out entries))
+ _words.Add(entry, entries = new List<HunspellWord>());
+
+ entries.Add(wordForm);
+ }
+ }
+
+ #region Nested type: DoubleASCIIFlagParsingStrategy
+
+ /// <summary>
+ /// Implementation of {@link FlagParsingStrategy} that assumes each flag is encoded as
+ /// two ASCII characters whose codes must be combined into a single character.
+ /// </summary>
+ private class DoubleASCIIFlagParsingStrategy : FlagParsingStrategy {
+ public override Char[] ParseFlags(String rawFlags) {
+ if (rawFlags.Length == 0)
+ return new Char[0];
+
+ var builder = new StringBuilder();
+ for (var i = 0; i < rawFlags.Length; i += 2) {
+ var cookedFlag = (Char)(rawFlags[i] + rawFlags[i + 1]);
+ builder.Append(cookedFlag);
+ }
+
+ return builder.ToString().ToCharArray();
+ }
+ }
+
+ #endregion
+
+ #region Nested type: FlagParsingStrategy
+ /// <summary>
+ /// Abstraction of the process of parsing flags taken from the affix and dic files
+ /// </summary>
+ private abstract class FlagParsingStrategy {
+ /// <summary>
+ /// Parses the given String into a single flag.
+ /// </summary>
+ /// <param name="rawFlag">String to parse into a flag.</param>
+ /// <returns>Parsed flag.</returns>
+ public Char ParseFlag(String rawFlag) {
+ if (rawFlag == null)
+ throw new ArgumentNullException("rawFlag");
+
+ return ParseFlags(rawFlag)[0];
+ }
+
+ /// <summary>
+ /// Parses the given String into multiple flag.
+ /// </summary>
+ /// <param name="rawFlags">String to parse into a flags.</param>
+ /// <returns>Parsed flags.</returns>
+ public abstract Char[] ParseFlags(String rawFlags);
+ }
+
+ #endregion
+
+ #region Nested type: NumFlagParsingStrategy
+
+ /// <summary>
+ /// Implementation of {@link FlagParsingStrategy} that assumes each flag is encoded in its
+ /// numerical form. In the case of multiple flags, each number is separated by a comma.
+ /// </summary>
+ private class NumFlagParsingStrategy : FlagParsingStrategy {
+ public override Char[] ParseFlags(String rawFlags) {
+ var rawFlagParts = rawFlags.Trim().Split(',');
+ var flags = new Char[rawFlagParts.Length];
+
+ for (var i = 0; i < rawFlagParts.Length; i++) {
+ // note, removing the trailing X/leading I for nepali... what is the rule here?!
+ var replaced = Regex.Replace(rawFlagParts[i], "[^0-9]", "");
+ flags[i] = (Char)Int32.Parse(replaced);
+ }
+
+ return flags;
+ }
+ }
+
+ #endregion
+
+ #region Nested type: SimpleFlagParsingStrategy
+
+ /// <summary>
+ /// Simple implementation of {@link FlagParsingStrategy} that treats the chars in each
+ /// String as a individual flags. Can be used with both the ASCII and UTF-8 flag types.
+ /// </summary>
+ private class SimpleFlagParsingStrategy : FlagParsingStrategy {
+ public override Char[] ParseFlags(String rawFlags) {
+ return rawFlags.ToCharArray();
+ }
+ }
+
+ #endregion
+ }
+}
\ No newline at end of file
Propchange: incubator/lucene.net/trunk/src/contrib/Analyzers/Hunspell/HunspellDictionary.cs
------------------------------------------------------------------------------
svn:eol-style = native
Added: incubator/lucene.net/trunk/src/contrib/Analyzers/Hunspell/HunspellStem.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/src/contrib/Analyzers/Hunspell/HunspellStem.cs?rev=1347076&view=auto
==============================================================================
--- incubator/lucene.net/trunk/src/contrib/Analyzers/Hunspell/HunspellStem.cs (added)
+++ incubator/lucene.net/trunk/src/contrib/Analyzers/Hunspell/HunspellStem.cs Wed Jun 6 19:45:59 2012
@@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+
+namespace Lucene.Net.Analysis.Hunspell {
+ public class HunspellStem {
+ private readonly List<HunspellAffix> _prefixes = new List<HunspellAffix>();
+ private readonly List<HunspellAffix> _suffixes = new List<HunspellAffix>();
+ private readonly String _stem;
+
+ /// <summary>
+ /// the actual word stem itself.
+ /// </summary>
+ public String Stem {
+ get { return _stem; }
+ }
+
+ /// <summary>
+ /// The stem length.
+ /// </summary>
+ public Int32 StemLength {
+ get { return _stem.Length; }
+ }
+
+ /// <summary>
+ /// The list of prefixes used to generate the stem.
+ /// </summary>
+ public IEnumerable<HunspellAffix> Prefixes {
+ get { return _prefixes; }
+ }
+
+ /// <summary>
+ /// The list of suffixes used to generate the stem.
+ /// </summary>
+ public IEnumerable<HunspellAffix> Suffixes {
+ get { return _suffixes; }
+ }
+
+ /// <summary>
+ /// Creates a new Stem wrapping the given word stem.
+ /// </summary>
+ public HunspellStem(String stem) {
+ if (stem == null) throw new ArgumentNullException("stem");
+
+ _stem = stem;
+ }
+
+ /// <summary>
+ /// Adds a prefix to the list of prefixes used to generate this stem. Because it is
+ /// assumed that prefixes are added depth first, the prefix is added to the front of
+ /// the list.
+ /// </summary>
+ /// <param name="prefix">Prefix to add to the list of prefixes for this stem.</param>
+ public void AddPrefix(HunspellAffix prefix) {
+ _prefixes.Insert(0, prefix);
+ }
+
+ /// <summary>
+ /// Adds a suffix to the list of suffixes used to generate this stem. Because it
+ /// is assumed that suffixes are added depth first, the suffix is added to the end
+ /// of the list.
+ /// </summary>
+ /// <param name="suffix">Suffix to add to the list of suffixes for this stem.</param>
+ public void AddSuffix(HunspellAffix suffix) {
+ _suffixes.Add(suffix);
+ }
+ }
+}
\ No newline at end of file
Propchange: incubator/lucene.net/trunk/src/contrib/Analyzers/Hunspell/HunspellStem.cs
------------------------------------------------------------------------------
svn:eol-style = native
Added: incubator/lucene.net/trunk/src/contrib/Analyzers/Hunspell/HunspellStemFilter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/src/contrib/Analyzers/Hunspell/HunspellStemFilter.cs?rev=1347076&view=auto
==============================================================================
--- incubator/lucene.net/trunk/src/contrib/Analyzers/Hunspell/HunspellStemFilter.cs (added)
+++ incubator/lucene.net/trunk/src/contrib/Analyzers/Hunspell/HunspellStemFilter.cs Wed Jun 6 19:45:59 2012
@@ -0,0 +1,92 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using Lucene.Net.Analysis.Tokenattributes;
+
+namespace Lucene.Net.Analysis.Hunspell {
+ /// <summary>
+ /// TokenFilter that uses hunspell affix rules and words to stem tokens. Since hunspell supports a
+ /// word having multiple stems, this filter can emit multiple tokens for each consumed token.
+ /// </summary>
+ public class HunspellStemFilter : TokenFilter {
+ private readonly ITermAttribute _termAtt;
+ private readonly IPositionIncrementAttribute _posIncAtt;
+ private readonly HunspellStemmer _stemmer;
+
+ private readonly Queue<HunspellStem> _buffer = new Queue<HunspellStem>();
+ private State _savedState;
+
+ private readonly Boolean _dedup;
+
+ /// <summary>
+ /// Creates a new HunspellStemFilter that will stem tokens from the given TokenStream using
+ /// affix rules in the provided HunspellDictionary.
+ /// </summary>
+ /// <param name="input">TokenStream whose tokens will be stemmed.</param>
+ /// <param name="dictionary">HunspellDictionary containing the affix rules and words that will be used to stem the tokens.</param>
+ /// <param name="dedup">true if only unique terms should be output.</param>
+ public HunspellStemFilter(TokenStream input, HunspellDictionary dictionary, Boolean dedup = true)
+ : base(input) {
+ _posIncAtt = AddAttribute<IPositionIncrementAttribute>();
+ _termAtt = AddAttribute<ITermAttribute>();
+
+ _dedup = dedup;
+ _stemmer = new HunspellStemmer(dictionary);
+ }
+
+ public override Boolean IncrementToken() {
+ if (_buffer.Any()) {
+ var nextStem = _buffer.Dequeue();
+
+ RestoreState(_savedState);
+ _posIncAtt.PositionIncrement = 0;
+ _termAtt.SetTermBuffer(nextStem.Stem, 0, nextStem.StemLength);
+ return true;
+ }
+
+ if (!input.IncrementToken())
+ return false;
+
+ var newTerms = _dedup
+ ? _stemmer.UniqueStems(_termAtt.Term())
+ : _stemmer.Stem(_termAtt.Term());
+ foreach (var newTerm in newTerms)
+ _buffer.Enqueue(newTerm);
+
+ if (_buffer.Count == 0)
+ // we do not know this word, return it unchanged
+ return true;
+
+ var stem = _buffer.Dequeue();
+ _termAtt.SetTermBuffer(stem.Stem, 0, stem.StemLength);
+
+ if (_buffer.Count > 0)
+ _savedState = CaptureState();
+
+ return true;
+ }
+
+ public override void Reset() {
+ base.Reset();
+
+ _buffer.Clear();
+ }
+ }
+}
Propchange: incubator/lucene.net/trunk/src/contrib/Analyzers/Hunspell/HunspellStemFilter.cs
------------------------------------------------------------------------------
svn:eol-style = native
Added: incubator/lucene.net/trunk/src/contrib/Analyzers/Hunspell/HunspellStemmer.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/src/contrib/Analyzers/Hunspell/HunspellStemmer.cs?rev=1347076&view=auto
==============================================================================
--- incubator/lucene.net/trunk/src/contrib/Analyzers/Hunspell/HunspellStemmer.cs (added)
+++ incubator/lucene.net/trunk/src/contrib/Analyzers/Hunspell/HunspellStemmer.cs Wed Jun 6 19:45:59 2012
@@ -0,0 +1,201 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+
+namespace Lucene.Net.Analysis.Hunspell {
+ /// <summary>
+ /// HunspellStemmer uses the affix rules declared in the HunspellDictionary to generate one or
+ /// more stems for a word. It conforms to the algorithm in the original hunspell algorithm,
+ /// including recursive suffix stripping.
+ /// </summary>
+ /// <author>Chris Male</author>
+ public class HunspellStemmer {
+ private static Int32 RECURSION_CAP = 2;
+ private readonly HunspellDictionary _dictionary;
+
+ /// <summary>
+ /// Constructs a new HunspellStemmer which will use the provided HunspellDictionary
+ /// to create its stems.
+ /// </summary>
+ /// <param name="dictionary">HunspellDictionary that will be used to create the stems.</param>
+ public HunspellStemmer(HunspellDictionary dictionary) {
+ if (dictionary == null) throw new ArgumentNullException("dictionary");
+ _dictionary = dictionary;
+ }
+
+ /// <summary>
+ /// Find the stem(s) of the provided word.
+ /// </summary>
+ /// <param name="word">Word to find the stems for.</param>
+ /// <returns>List of stems for the word.</returns>
+ public IEnumerable<HunspellStem> Stem(String word) {
+ if (word == null) throw new ArgumentNullException("word");
+
+ var stems = new List<HunspellStem>();
+ if (_dictionary.LookupWord(word) != null)
+ stems.Add(new HunspellStem(word));
+
+ stems.AddRange(Stem(word, null, 0));
+ return stems;
+ }
+
+ /// <summary>
+ /// Find the unique stem(s) of the provided word.
+ /// </summary>
+ /// <param name="word">Word to find the stems for.</param>
+ /// <returns>List of stems for the word.</returns>
+ public IEnumerable<HunspellStem> UniqueStems(String word) {
+ if (word == null) throw new ArgumentNullException("word");
+
+ var stems = new List<HunspellStem>();
+ var terms = new CharArraySet(8, false);
+ if (_dictionary.LookupWord(word) != null) {
+ stems.Add(new HunspellStem(word));
+ terms.Add(word);
+ }
+
+ var otherStems = Stem(word, null, 0);
+ foreach (var s in otherStems) {
+ if (!terms.Contains(s.Stem)) {
+ stems.Add(s);
+ terms.Add(s.Stem);
+ }
+ }
+
+ return stems;
+ }
+
+ /// <summary>
+ /// Generates a list of stems for the provided word.
+ /// </summary>
+ /// <param name="word">Word to generate the stems for.</param>
+ /// <param name="flags">Flags from a previous stemming step that need to be cross-checked with any affixes in this recursive step.</param>
+ /// <param name="recursionDepth">Level of recursion this stemming step is at.</param>
+ /// <returns>List of stems, pr an empty if no stems are found.</returns>
+ private IEnumerable<HunspellStem> Stem(String word, Char[] flags, Int32 recursionDepth) {
+ if (word == null) throw new ArgumentNullException("word");
+
+ var stems = new List<HunspellStem>();
+ var chars = word.ToCharArray();
+ var length = word.Length;
+
+ for (var i = 0; i < length; i++) {
+ var suffixes = _dictionary.LookupSuffix(chars, i, length - i);
+ if (suffixes != null) {
+ foreach (var suffix in suffixes) {
+ if (HasCrossCheckedFlag(suffix.Flag, flags)) {
+ var deAffixedLength = length - suffix.Append.Length;
+
+ // TODO: can we do this in-place?
+ var strippedWord = new StringBuilder()
+ .Append(word, 0, deAffixedLength)
+ .Append(suffix.Strip)
+ .ToString();
+
+ var stemList = ApplyAffix(strippedWord, suffix, recursionDepth);
+ foreach (var stem in stemList) {
+ stem.AddSuffix(suffix);
+ }
+
+ stems.AddRange(stemList);
+ }
+ }
+ }
+ }
+
+ for (var i = length - 1; i >= 0; i--) {
+ var prefixes = _dictionary.LookupPrefix(chars, 0, i);
+ if (prefixes != null) {
+ foreach (var prefix in prefixes) {
+ if (HasCrossCheckedFlag(prefix.Flag, flags)) {
+ var deAffixedStart = prefix.Append.Length;
+ var deAffixedLength = length - deAffixedStart;
+
+ var strippedWord = new StringBuilder()
+ .Append(prefix.Strip)
+ .Append(word, deAffixedStart, deAffixedLength)
+ .ToString();
+
+ var stemList = ApplyAffix(strippedWord, prefix, recursionDepth);
+ foreach (var stem in stemList) {
+ stem.AddPrefix(prefix);
+ }
+
+ stems.AddRange(stemList);
+ }
+ }
+ }
+ }
+
+ return stems;
+ }
+
+ /// <summary>
+ /// Applies the affix rule to the given word, producing a list of stems if any are found.
+ /// </summary>
+ /// <param name="strippedWord">Word the affix has been removed and the strip added.</param>
+ /// <param name="affix">HunspellAffix representing the affix rule itself.</param>
+ /// <param name="recursionDepth">Level of recursion this stemming step is at.</param>
+ /// <returns>List of stems for the word, or an empty list if none are found.</returns>
+ public IEnumerable<HunspellStem> ApplyAffix(String strippedWord, HunspellAffix affix, Int32 recursionDepth) {
+ if (strippedWord == null) throw new ArgumentNullException("strippedWord");
+ if (affix == null) throw new ArgumentNullException("affix");
+
+ if (!affix.CheckCondition(strippedWord)) {
+ return new List<HunspellStem>();
+ }
+
+ var words = _dictionary.LookupWord(strippedWord);
+ if (words == null) {
+ return new List<HunspellStem>();
+ }
+
+ var stems = new List<HunspellStem>();
+
+ foreach (var hunspellWord in words) {
+ if (hunspellWord.HasFlag(affix.Flag)) {
+ if (affix.IsCrossProduct && recursionDepth < RECURSION_CAP) {
+ var recursiveStems = Stem(strippedWord, affix.AppendFlags, ++recursionDepth);
+ if (recursiveStems.Any()) {
+ stems.AddRange(recursiveStems);
+ } else {
+ stems.Add(new HunspellStem(strippedWord));
+ }
+ } else {
+ stems.Add(new HunspellStem(strippedWord));
+ }
+ }
+ }
+
+ return stems;
+ }
+
+ /// <summary>
+ /// Checks if the given flag cross checks with the given array of flags.
+ /// </summary>
+ /// <param name="flag">Flag to cross check with the array of flags.</param>
+ /// <param name="flags">Array of flags to cross check against. Can be <c>null</c>.</param>
+ /// <returns><c>true</c> if the flag is found in the array or the array is <c>null</c>, <c>false</c> otherwise.</returns>
+ private static Boolean HasCrossCheckedFlag(Char flag, Char[] flags) {
+ return flags == null || Array.BinarySearch(flags, flag) >= 0;
+ }
+ }
+}
\ No newline at end of file
Propchange: incubator/lucene.net/trunk/src/contrib/Analyzers/Hunspell/HunspellStemmer.cs
------------------------------------------------------------------------------
svn:eol-style = native
Added: incubator/lucene.net/trunk/src/contrib/Analyzers/Hunspell/HunspellWord.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/src/contrib/Analyzers/Hunspell/HunspellWord.cs?rev=1347076&view=auto
==============================================================================
--- incubator/lucene.net/trunk/src/contrib/Analyzers/Hunspell/HunspellWord.cs (added)
+++ incubator/lucene.net/trunk/src/contrib/Analyzers/Hunspell/HunspellWord.cs Wed Jun 6 19:45:59 2012
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Linq;
+
+namespace Lucene.Net.Analysis.Hunspell {
+ public class HunspellWord {
+ private readonly Char[] _flags;
+
+ /// <summary>
+ /// Creates a new HunspellWord with no associated flags.
+ /// </summary>
+ public HunspellWord() : this(new Char[0]) {
+ }
+
+ /// <summary>
+ /// Constructs a new HunspellWord with the given flags.
+ /// </summary>
+ /// <param name="flags">Flags to associate with the word.</param>
+ public HunspellWord(Char[] flags) {
+ if (flags == null)
+ throw new ArgumentNullException("flags");
+
+ _flags = flags;
+ }
+
+ /// <summary>
+ /// Checks whether the word has the given flag associated with it.
+ /// </summary>
+ /// <param name="flag">Flag to check whether it is associated with the word.</param>
+ /// <returns><c>true</c> if the flag is associated, <c>false</c> otherwise</returns>
+ public Boolean HasFlag(Char flag) {
+ return _flags.Contains(flag);
+ }
+ }
+}
Propchange: incubator/lucene.net/trunk/src/contrib/Analyzers/Hunspell/HunspellWord.cs
------------------------------------------------------------------------------
svn:eol-style = native
Modified: incubator/lucene.net/trunk/test/contrib/Analyzers/Contrib.Analyzers.Test.csproj
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/test/contrib/Analyzers/Contrib.Analyzers.Test.csproj?rev=1347076&r1=1347075&r2=1347076&view=diff
==============================================================================
--- incubator/lucene.net/trunk/test/contrib/Analyzers/Contrib.Analyzers.Test.csproj (original)
+++ incubator/lucene.net/trunk/test/contrib/Analyzers/Contrib.Analyzers.Test.csproj Wed Jun 6 19:45:59 2012
@@ -98,6 +98,10 @@
<Compile Include="Fa\TestPersianAnalyzer.cs" />
<Compile Include="Fr\TestElision.cs" />
<Compile Include="Fr\TestFrenchAnalyzer.cs" />
+ <Compile Include="Hunspell\HunspellDictionaryLoader.cs" />
+ <Compile Include="Hunspell\TestHunspellDictionary.cs" />
+ <Compile Include="Hunspell\TestHunspellStemFilter.cs" />
+ <Compile Include="Hunspell\TestHunspellStemmer.cs" />
<Compile Include="NGram\TestEdgeNGramTokenFilter.cs" />
<Compile Include="NGram\TestEdgeNGramTokenizer.cs" />
<Compile Include="Miscellaneous\PatternAnalyzerTest.cs" />
@@ -191,6 +195,12 @@
</Content>
</ItemGroup>
<ItemGroup>
+ <EmbeddedResource Include="Hunspell\Dictionaries\en_US.aff" />
+ <EmbeddedResource Include="Hunspell\Dictionaries\en_US.dic" />
+ <EmbeddedResource Include="Hunspell\Dictionaries\fr-moderne.aff" />
+ <EmbeddedResource Include="Hunspell\Dictionaries\fr-moderne.dic" />
+ <EmbeddedResource Include="Hunspell\Dictionaries\nl_NL.aff" />
+ <EmbeddedResource Include="Hunspell\Dictionaries\nl_NL.dic" />
<None Include="Lucene.Net.snk" />
</ItemGroup>
<ItemGroup />
@@ -202,4 +212,4 @@
<Target Name="AfterBuild">
</Target>
-->
-</Project>
+</Project>
\ No newline at end of file
Added: incubator/lucene.net/trunk/test/contrib/Analyzers/Hunspell/Dictionaries/en_US.aff
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/test/contrib/Analyzers/Hunspell/Dictionaries/en_US.aff?rev=1347076&view=auto
==============================================================================
--- incubator/lucene.net/trunk/test/contrib/Analyzers/Hunspell/Dictionaries/en_US.aff (added)
+++ incubator/lucene.net/trunk/test/contrib/Analyzers/Hunspell/Dictionaries/en_US.aff Wed Jun 6 19:45:59 2012
@@ -0,0 +1,207 @@
+# testcomment
+
+
+# Alles so schön!
+
+SET ISO8859-1
+
+TRY esianrtolcdugmphbyfvkwzESIANRTOLCDUGMPHBYFVKWZ'
+NOSUGGEST !
+
+# ordinal numbers
+COMPOUNDMIN 1
+# only in compounds: 1th, 2th, 3th
+ONLYINCOMPOUND c
+# compound rules:
+# 1. [0-9]*1[0-9]th (10th, 11th, 12th, 56714th, etc.)
+# 2. [0-9]*[02-9](1st|2nd|3rd|[4-9]th) (21st, 22nd, 123rd, 1234th, etc.)
+COMPOUNDRULE 2
+COMPOUNDRULE n*1t
+COMPOUNDRULE n*mp
+WORDCHARS 0123456789
+
+PFX A Y 1
+PFX A 0 re .
+
+PFX I Y 1
+PFX I 0 in .
+
+PFX U Y 1
+PFX U 0 un .
+
+PFX C Y 1
+PFX C 0 de .
+
+PFX E Y 1
+PFX E 0 dis .
+
+PFX F Y 1
+PFX F 0 con .
+
+PFX K Y 1
+PFX K 0 pro .
+
+SFX V N 2
+SFX V e ive e
+SFX V 0 ive [^e]
+
+SFX N Y 3
+SFX N e ion e
+SFX N y ication y
+SFX N 0 en [^ey]
+
+SFX X Y 3
+SFX X e ions e
+SFX X y ications y
+SFX X 0 ens [^ey]
+
+SFX H N 2
+SFX H y ieth y
+SFX H 0 th [^y]
+
+SFX Y Y 1
+SFX Y 0 ly .
+
+SFX G Y 2
+SFX G e ing e
+SFX G 0 ing [^e]
+
+SFX J Y 2
+SFX J e ings e
+SFX J 0 ings [^e]
+
+SFX D Y 4
+SFX D 0 d e
+SFX D y ied [^aeiou]y
+SFX D 0 ed [^ey]
+SFX D 0 ed [aeiou]y
+
+SFX T N 4
+SFX T 0 st e
+SFX T y iest [^aeiou]y
+SFX T 0 est [aeiou]y
+SFX T 0 est [^ey]
+
+SFX R Y 4
+SFX R 0 r e
+SFX R y ier [^aeiou]y
+SFX R 0 er [aeiou]y
+SFX R 0 er [^ey]
+
+SFX Z Y 4
+SFX Z 0 rs e
+SFX Z y iers [^aeiou]y
+SFX Z 0 ers [aeiou]y
+SFX Z 0 ers [^ey]
+
+SFX S Y 4
+SFX S y ies [^aeiou]y
+SFX S 0 s [aeiou]y
+SFX S 0 es [sxzh]
+SFX S 0 s [^sxzhy]
+
+SFX P Y 3
+SFX P y iness [^aeiou]y
+SFX P 0 ness [aeiou]y
+SFX P 0 ness [^y]
+
+SFX M Y 1
+SFX M 0 's .
+
+SFX B Y 3
+SFX B 0 able [^aeiou]
+SFX B 0 able ee
+SFX B e able [^aeiou]e
+
+SFX L Y 1
+SFX L 0 ment .
+
+REP 88
+REP a ei
+REP ei a
+REP a ey
+REP ey a
+REP ai ie
+REP ie ai
+REP are air
+REP are ear
+REP are eir
+REP air are
+REP air ere
+REP ere air
+REP ere ear
+REP ere eir
+REP ear are
+REP ear air
+REP ear ere
+REP eir are
+REP eir ere
+REP ch te
+REP te ch
+REP ch ti
+REP ti ch
+REP ch tu
+REP tu ch
+REP ch s
+REP s ch
+REP ch k
+REP k ch
+REP f ph
+REP ph f
+REP gh f
+REP f gh
+REP i igh
+REP igh i
+REP i uy
+REP uy i
+REP i ee
+REP ee i
+REP j di
+REP di j
+REP j gg
+REP gg j
+REP j ge
+REP ge j
+REP s ti
+REP ti s
+REP s ci
+REP ci s
+REP k cc
+REP cc k
+REP k qu
+REP qu k
+REP kw qu
+REP o eau
+REP eau o
+REP o ew
+REP ew o
+REP oo ew
+REP ew oo
+REP ew ui
+REP ui ew
+REP oo ui
+REP ui oo
+REP ew u
+REP u ew
+REP oo u
+REP u oo
+REP u oe
+REP oe u
+REP u ieu
+REP ieu u
+REP ue ew
+REP ew ue
+REP uff ough
+REP oo ieu
+REP ieu oo
+REP ier ear
+REP ear ier
+REP ear air
+REP air ear
+REP w qu
+REP qu w
+REP z ss
+REP ss z
+REP shun tion
+REP shun sion
+REP shun cion
Propchange: incubator/lucene.net/trunk/test/contrib/Analyzers/Hunspell/Dictionaries/en_US.aff
------------------------------------------------------------------------------
svn:eol-style = native