You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by ni...@apache.org on 2021/02/25 04:06:43 UTC
[lucenenet] 04/08: Lucene.Net.Analysis.Common: Upgraded
Lucene.Net.Analysis.Hunspell namespace to Lucene 4.10.4 because 4.8.1 was
buggy (fixes #418, fixes #419)
This is an automated email from the ASF dual-hosted git repository.
nightowl888 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/lucenenet.git
commit e7eb28653e9a1b6853fcf3b14027d9198128d5b0
Author: Shad Storhaug <sh...@shadstorhaug.com>
AuthorDate: Wed Feb 24 15:05:30 2021 +0700
Lucene.Net.Analysis.Common: Upgraded Lucene.Net.Analysis.Hunspell namespace to Lucene 4.10.4 because 4.8.1 was buggy (fixes #418, fixes #419)
---
.../Analysis/Hunspell/Dictionary.cs | 463 ++++++++++++++++-----
.../Analysis/Hunspell/HunspellStemFilter.cs | 2 +-
.../Analysis/Hunspell/HunspellStemFilterFactory.cs | 2 +-
.../Analysis/Hunspell/ISO8859_14Decoder.cs | 2 +-
.../Analysis/Hunspell/Stemmer.cs | 315 ++++++++++++--
.../Analysis/Hunspell/Foo.cs | 12 +
.../Analysis/Hunspell/StemmerTestBase.cs | 2 +-
.../Analysis/Hunspell/Test64kAffixes.cs | 69 +++
.../Analysis/Hunspell/TestAllDictionaries.cs | 4 +-
.../Analysis/Hunspell/TestAllDictionaries2.cs | 15 +-
.../Analysis/Hunspell/TestAlternateCasing.cs | 67 +++
.../Analysis/Hunspell/TestCaseInsensitive.cs | 2 +-
.../Analysis/Hunspell/TestCaseSensitive.cs | 71 ++++
.../Analysis/Hunspell/TestCircumfix.cs | 2 +-
.../Analysis/Hunspell/TestComplexPrefix.cs | 2 +-
.../Analysis/Hunspell/TestCondition.cs | 2 +-
.../{TestHomonyms.cs => TestCondition2.cs} | 15 +-
.../Analysis/Hunspell/TestConv.cs | 2 +-
.../Analysis/Hunspell/TestDependencies.cs | 2 +-
.../Analysis/Hunspell/TestDictionary.cs | 3 +-
.../{TestHomonyms.cs => TestDoubleEscape.cs} | 15 +-
.../Analysis/Hunspell/TestEscaped.cs | 2 +-
.../Analysis/Hunspell/TestFlagLong.cs | 3 +-
.../Analysis/Hunspell/TestFlagNum.cs | 2 +-
.../Hunspell/{TestHomonyms.cs => TestFullStrip.cs} | 15 +-
.../Analysis/Hunspell/TestHomonyms.cs | 2 +-
.../Analysis/Hunspell/TestHunspellStemFilter.cs | 2 +-
.../Hunspell/TestHunspellStemFilterFactory.cs | 2 +-
.../Analysis/Hunspell/TestIgnore.cs | 2 +-
.../{TestTwoSuffixes.cs => TestKeepCase.cs} | 28 +-
.../Analysis/Hunspell/TestMorph.cs | 2 +-
.../{TestHomonyms.cs => TestMorphAlias.cs} | 22 +-
.../Hunspell/{TestHomonyms.cs => TestMorphData.cs} | 22 +-
.../{TestTwoSuffixes.cs => TestNeedAffix.cs} | 24 +-
.../{TestTwoSuffixes.cs => TestOnlyInCompound.cs} | 20 +-
.../Analysis/Hunspell/TestOptionalCondition.cs | 2 +-
.../Hunspell/{TestCondition.cs => TestSpaces.cs} | 35 +-
.../Analysis/Hunspell/TestStemmer.cs | 2 +-
...estHomonyms.cs => TestStrangeOvergeneration.cs} | 18 +-
.../Analysis/Hunspell/TestTwoFold.cs | 2 +-
.../Analysis/Hunspell/TestTwoSuffixes.cs | 2 +-
.../Hunspell/{TestHomonyms.cs => TestZeroAffix.cs} | 15 +-
.../{TestHomonyms.cs => TestZeroAffix2.cs} | 15 +-
.../Analysis/Hunspell/alternate-casing.aff | 15 +
.../Analysis/Hunspell/alternate-casing.dic | 4 +
.../Analysis/Hunspell/casesensitive.aff | 16 +
.../Analysis/Hunspell/casesensitive.dic | 4 +
.../Analysis/Hunspell/circumfix.dic | 2 +-
.../Analysis/Hunspell/condition2.aff | 5 +
.../Analysis/Hunspell/condition2.dic | 2 +
.../Analysis/Hunspell/conv.dic | 2 +-
.../Analysis/Hunspell/dependencies.dic | 4 +-
.../Analysis/Hunspell/double-escaped.aff | 5 +
.../Analysis/Hunspell/double-escaped.dic | 2 +
.../Analysis/Hunspell/flaglong.aff | 3 +
.../Analysis/Hunspell/fullstrip.aff | 6 +
.../Analysis/Hunspell/fullstrip.dic | 2 +
.../Analysis/Hunspell/homonyms.dic | 4 +-
.../Analysis/Hunspell/ignore.dic | 4 +-
.../Analysis/Hunspell/keepcase.aff | 6 +
.../Analysis/Hunspell/keepcase.dic | 4 +
.../Analysis/Hunspell/morph.dic | 2 +-
.../Analysis/Hunspell/morphalias.aff | 16 +
.../Analysis/Hunspell/morphalias.dic | 6 +
.../Analysis/Hunspell/morphdata.aff | 10 +
.../Analysis/Hunspell/morphdata.dic | 6 +
.../Analysis/Hunspell/needaffix.aff | 9 +
.../Analysis/Hunspell/needaffix.dic | 4 +
.../Analysis/Hunspell/onlyincompound.aff | 12 +
.../Analysis/Hunspell/onlyincompound.dic | 4 +
.../Analysis/Hunspell/spaces.aff | 5 +
.../Analysis/Hunspell/spaces.dic | 9 +
.../Analysis/Hunspell/strange-overgeneration.aff | 10 +
.../Analysis/Hunspell/strange-overgeneration.dic | 5 +
.../Analysis/Hunspell/twosuffixes.dic | 2 +-
.../Analysis/Hunspell/zeroaffix.aff | 4 +
.../Analysis/Hunspell/zeroaffix.dic | 2 +
.../Analysis/Hunspell/zeroaffix2.aff | 6 +
.../Analysis/Hunspell/zeroaffix2.dic | 2 +
.../Lucene.Net.Tests.Analysis.Common.csproj | 39 +-
src/Lucene.Net.Tests.Analysis.Common/Startup.cs | 8 +-
81 files changed, 1278 insertions(+), 281 deletions(-)
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/Dictionary.cs b/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/Dictionary.cs
index 38d19f5..b175b58 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/Dictionary.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/Dictionary.cs
@@ -1,4 +1,4 @@
-// Lucene version compatibility level 4.8.1
+// Lucene version compatibility level 4.10.4
using J2N;
using J2N.Collections.Generic.Extensions;
using J2N.Numerics;
@@ -43,9 +43,10 @@ namespace Lucene.Net.Analysis.Hunspell
/// </summary>
public class Dictionary
{
- private static readonly char[] NOFLAGS = new char[0];
+ private static readonly char[] NOFLAGS = Arrays.Empty<char>();
private const string ALIAS_KEY = "AF";
+ private const string MORPH_ALIAS_KEY = "AM";
private const string PREFIX_KEY = "PFX";
private const string SUFFIX_KEY = "SFX";
private const string FLAG_KEY = "FLAG";
@@ -54,6 +55,12 @@ namespace Lucene.Net.Analysis.Hunspell
private const string IGNORE_KEY = "IGNORE";
private const string ICONV_KEY = "ICONV";
private const string OCONV_KEY = "OCONV";
+ private const string FULLSTRIP_KEY = "FULLSTRIP";
+ private const string LANG_KEY = "LANG";
+ private const string KEEPCASE_KEY = "KEEPCASE";
+ private const string NEEDAFFIX_KEY = "NEEDAFFIX";
+ private const string PSEUDOROOT_KEY = "PSEUDOROOT";
+ private const string ONLYINCOMPOUND_KEY = "ONLYINCOMPOUND";
private const string NUM_FLAG_TYPE = "num";
private const string UTF8_FLAG_TYPE = "UTF-8";
@@ -87,9 +94,21 @@ namespace Lucene.Net.Analysis.Hunspell
private FlagParsingStrategy flagParsingStrategy = new SimpleFlagParsingStrategy(); // Default flag parsing strategy
+ // AF entries
private string[] aliases;
private int aliasCount = 0;
+ // AM entries
+ private string[] morphAliases;
+ private int morphAliasCount = 0;
+
+ // st: morphological entries (either directly, or aliased from AM)
+ private string[] stemExceptions = new string[8];
+ private int stemExceptionCount = 0;
+ // we set this during sorting, so we know to add an extra FST output.
+ // when set, some words have exceptional stems, and the last entry is a pointer to stemExceptions
+ internal bool hasStemExceptions;
+
private readonly DirectoryInfo tempDir = OfflineSorter.DefaultTempDir(); // TODO: make this configurable?
internal bool ignoreCase;
@@ -97,6 +116,9 @@ namespace Lucene.Net.Analysis.Hunspell
internal bool twoStageAffix; // if no affixes have continuation classes, no need to do 2-level affix stripping
internal int circumfix = -1; // circumfix flag, or -1 if one is not defined
+ internal int keepcase = -1; // keepcase flag, or -1 if one is not defined
+ internal int needaffix = -1; // needaffix flag, or -1 if one is not defined
+ internal int onlyincompound = -1; // onlyincompound flag, or -1 if one is not defined
// ignored characters (dictionary, affix, inputs)
private char[] ignore;
@@ -108,6 +130,14 @@ namespace Lucene.Net.Analysis.Hunspell
internal bool needsInputCleaning;
internal bool needsOutputCleaning;
+ // true if we can strip suffixes "down to nothing"
+ internal bool fullStrip;
+
+ // language declaration of the dictionary
+ internal string language;
+ // true if case algorithms should use alternate (Turkish/Azeri) mapping
+ internal bool alternateCasing;
+
// LUCENENET: Added so we can get better performance than creating the regex in every tight loop.
private static readonly Regex whitespacePattern = new Regex("\\s+", RegexOptions.Compiled);
private static readonly Regex leadingDigitPattern = new Regex("[^0-9]", RegexOptions.Compiled);
@@ -144,58 +174,56 @@ namespace Lucene.Net.Analysis.Hunspell
flagLookup.Add(new BytesRef()); // no flags -> ord 0
FileInfo aff = FileSupport.CreateTempFile("affix", "aff", tempDir);
- using (Stream @out = aff.Open(FileMode.Open, FileAccess.ReadWrite))
- {
- // copy contents of affix stream to temp file
- affix.CopyTo(@out);
- }
-
- // pass 1: get encoding
- string encoding;
- using (Stream aff1 = aff.Open(FileMode.Open, FileAccess.Read))
+ try
{
- encoding = GetDictionaryEncoding(aff1);
- }
+ using (Stream @out = aff.Open(FileMode.Open, FileAccess.ReadWrite))
+ {
+ // copy contents of affix stream to temp file
+ affix.CopyTo(@out);
+ }
- // pass 2: parse affixes
- Encoding decoder = GetSystemEncoding(encoding);
- using (Stream aff2 = aff.Open(FileMode.Open, FileAccess.Read))
- {
- ReadAffixFile(aff2, decoder);
- }
+ // pass 1: get encoding
+ string encoding;
+ using (Stream aff1 = aff.Open(FileMode.Open, FileAccess.Read))
+ {
+ encoding = GetDictionaryEncoding(aff1);
+ }
- // read dictionary entries
- Int32SequenceOutputs o = Int32SequenceOutputs.Singleton;
- Builder<Int32sRef> b = new Builder<Int32sRef>(FST.INPUT_TYPE.BYTE4, o);
- ReadDictionaryFiles(dictionaries, decoder, b);
- words = b.Finish();
- aliases = null; // no longer needed
+ // pass 2: parse affixes
+ Encoding decoder = GetSystemEncoding(encoding);
+ using (Stream aff2 = aff.Open(FileMode.Open, FileAccess.Read))
+ {
+ ReadAffixFile(aff2, decoder);
+ }
- try
- {
- aff.Delete();
+ // read dictionary entries
+ Int32SequenceOutputs o = Int32SequenceOutputs.Singleton;
+ Builder<Int32sRef> b = new Builder<Int32sRef>(FST.INPUT_TYPE.BYTE4, o);
+ ReadDictionaryFiles(dictionaries, decoder, b);
+ words = b.Finish();
+ aliases = null; // no longer needed
+ morphAliases = null; // no longer needed
}
- catch
+ finally
{
- // ignore
+ try
+ {
+ aff.Delete();
+ }
+ catch
+ {
+ // ignore
+ }
}
}
- /// <summary>
- /// Looks up Hunspell word forms from the dictionary
- /// </summary>
+ // only for testing
internal virtual Int32sRef LookupWord(char[] word, int offset, int length)
{
return Lookup(words, word, offset, length);
}
- /// <summary>
- /// Looks up HunspellAffix prefixes that have an append that matches the <see cref="string"/> created from the given <see cref="char"/> array, offset and length
- /// </summary>
- /// <param name="word"> <see cref="char"/> array to generate the <see cref="string"/> from </param>
- /// <param name="offset"> Offset in the <see cref="char"/> array that the <see cref="string"/> starts at </param>
- /// <param name="length"> Length from the offset that the <see cref="string"/> is </param>
- /// <returns> List of HunspellAffix prefixes with an append that matches the <see cref="string"/>, or <c>null</c> if none are found </returns>
+ // only for testing
internal virtual Int32sRef LookupPrefix(char[] word, int offset, int length)
{
return Lookup(prefixes, word, offset, length);
@@ -213,8 +241,6 @@ namespace Lucene.Net.Analysis.Hunspell
return Lookup(suffixes, word, offset, length);
}
- // TODO: this is pretty stupid, considering how the stemming algorithm works
- // we can speed it up to be significantly faster!
internal virtual Int32sRef Lookup(FST<Int32sRef> fst, char[] word, int offset, int length)
{
if (fst == null)
@@ -269,8 +295,8 @@ namespace Lucene.Net.Analysis.Hunspell
/// <exception cref="IOException"> Can be thrown while reading from the InputStream </exception>
private void ReadAffixFile(Stream affixStream, Encoding decoder)
{
- JCG.SortedDictionary<string, IList<char?>> prefixes = new JCG.SortedDictionary<string, IList<char?>>(StringComparer.Ordinal);
- JCG.SortedDictionary<string, IList<char?>> suffixes = new JCG.SortedDictionary<string, IList<char?>>(StringComparer.Ordinal);
+ var prefixes = new JCG.SortedDictionary<string, IList<int>>(StringComparer.Ordinal);
+ var suffixes = new JCG.SortedDictionary<string, IList<int>>(StringComparer.Ordinal);
IDictionary<string, int?> seenPatterns = new JCG.Dictionary<string, int?>
{
// zero condition -> 0 ord
@@ -285,7 +311,7 @@ namespace Lucene.Net.Analysis.Hunspell
};
var reader = new StreamReader(affixStream, decoder);
- string line = null;
+ string line; // LUCENENET: Removed unnecessary null assignment
int lineNumber = 0;
while ((line = reader.ReadLine()) != null)
{
@@ -299,6 +325,10 @@ namespace Lucene.Net.Analysis.Hunspell
{
ParseAlias(line);
}
+ else if (line.StartsWith(MORPH_ALIAS_KEY, StringComparison.Ordinal))
+ {
+ ParseMorphAlias(line);
+ }
else if (line.StartsWith(PREFIX_KEY, StringComparison.Ordinal))
{
ParseAffix(prefixes, line, reader, PREFIX_CONDITION_REGEX_PATTERN, seenPatterns, seenStrips);
@@ -322,16 +352,43 @@ namespace Lucene.Net.Analysis.Hunspell
string[] parts = whitespacePattern.Split(line).TrimEnd();
if (parts.Length != 2)
{
- throw new Exception(string.Format("Illegal CIRCUMFIX declaration, line {0}", lineNumber));
+ throw new FormatException(string.Format("Illegal CIRCUMFIX declaration, line {0}", lineNumber));
}
circumfix = flagParsingStrategy.ParseFlag(parts[1]);
}
+ else if (line.StartsWith(KEEPCASE_KEY, StringComparison.Ordinal))
+ {
+ string[] parts = whitespacePattern.Split(line).TrimEnd();
+ if (parts.Length != 2)
+ {
+ throw new FormatException(string.Format("Illegal KEEPCASE declaration, line {0}", lineNumber));
+ }
+ keepcase = flagParsingStrategy.ParseFlag(parts[1]);
+ }
+ else if (line.StartsWith(NEEDAFFIX_KEY, StringComparison.Ordinal) || line.StartsWith(PSEUDOROOT_KEY, StringComparison.Ordinal))
+ {
+ string[] parts = whitespacePattern.Split(line).TrimEnd();
+ if (parts.Length != 2)
+ {
+ throw new FormatException(string.Format("Illegal NEEDAFFIX declaration, line {0}", lineNumber));
+ }
+ needaffix = flagParsingStrategy.ParseFlag(parts[1]);
+ }
+ else if (line.StartsWith(ONLYINCOMPOUND_KEY, StringComparison.Ordinal))
+ {
+ string[] parts = whitespacePattern.Split(line).TrimEnd();
+ if (parts.Length != 2)
+ {
+ throw new FormatException(string.Format("Illegal ONLYINCOMPOUND declaration, line {0}", lineNumber));
+ }
+ onlyincompound = flagParsingStrategy.ParseFlag(parts[1]);
+ }
else if (line.StartsWith(IGNORE_KEY, StringComparison.Ordinal))
{
string[] parts = whitespacePattern.Split(line).TrimEnd();
if (parts.Length != 2)
{
- throw new Exception(string.Format("Illegal IGNORE declaration, line {0}", lineNumber));
+ throw new FormatException(string.Format("Illegal IGNORE declaration, line {0}", lineNumber));
}
ignore = parts[1].ToCharArray();
Array.Sort(ignore);
@@ -343,7 +400,7 @@ namespace Lucene.Net.Analysis.Hunspell
string type = parts[0];
if (parts.Length != 2)
{
- throw new Exception(string.Format("Illegal {0} declaration, line {1}", type, lineNumber));
+ throw new FormatException(string.Format("Illegal {0} declaration, line {1}", type, lineNumber));
}
int num = int.Parse(parts[1], CultureInfo.InvariantCulture);
FST<CharsRef> res = ParseConversions(reader, num);
@@ -358,6 +415,15 @@ namespace Lucene.Net.Analysis.Hunspell
needsOutputCleaning |= oconv != null;
}
}
+ else if (line.StartsWith(FULLSTRIP_KEY, StringComparison.Ordinal))
+ {
+ fullStrip = true;
+ }
+ else if (line.StartsWith(LANG_KEY, StringComparison.Ordinal))
+ {
+ language = line.Substring(LANG_KEY.Length).Trim();
+ alternateCasing = "tr_TR".Equals(language, StringComparison.Ordinal) || "az_AZ".Equals(language, StringComparison.Ordinal);
+ }
}
this.prefixes = AffixFST(prefixes);
@@ -382,26 +448,51 @@ namespace Lucene.Net.Analysis.Hunspell
stripOffsets[currentIndex] = currentOffset;
}
- private FST<Int32sRef> AffixFST(JCG.SortedDictionary<string, IList<char?>> affixes)
+ private FST<Int32sRef> AffixFST(JCG.SortedDictionary<string, IList<int>> affixes)
{
Int32SequenceOutputs outputs = Int32SequenceOutputs.Singleton;
Builder<Int32sRef> builder = new Builder<Int32sRef>(FST.INPUT_TYPE.BYTE4, outputs);
Int32sRef scratch = new Int32sRef();
- foreach (KeyValuePair<string, IList<char?>> entry in affixes)
+ foreach (KeyValuePair<string, IList<int>> entry in affixes)
{
Lucene.Net.Util.Fst.Util.ToUTF32(entry.Key, scratch);
- IList<char?> entries = entry.Value;
+ IList<int> entries = entry.Value;
Int32sRef output = new Int32sRef(entries.Count);
- foreach (char? c in entries)
+ foreach (int c in entries)
{
- output.Int32s[output.Length++] = c.HasValue ? c.Value : 0;
+ output.Int32s[output.Length++] = c;
}
builder.Add(scratch, output);
}
return builder.Finish();
}
+ internal static string EscapeDash(string re)
+ {
+ // we have to be careful, even though dash doesn't have a special meaning,
+ // some dictionaries already escape it (e.g. pt_PT), so we don't want to nullify it
+ StringBuilder escaped = new StringBuilder();
+ for (int i = 0; i < re.Length; i++)
+ {
+ char c = re[i];
+ if (c == '-')
+ {
+ escaped.Append("\\-");
+ }
+ else
+ {
+ escaped.Append(c);
+ if (c == '\\' && i + 1 < re.Length)
+ {
+ escaped.Append(re[i + 1]);
+ i++;
+ }
+ }
+ }
+ return escaped.ToString();
+ }
+
/// <summary>
/// Parses a specific affix rule putting the result into the provided affix map
/// </summary>
@@ -413,13 +504,19 @@ namespace Lucene.Net.Analysis.Hunspell
/// <param name="seenPatterns"> map from condition -> index of patterns, for deduplication. </param>
/// <param name="seenStrips"></param>
/// <exception cref="IOException"> Can be thrown while reading the rule </exception>
- private void ParseAffix(JCG.SortedDictionary<string, IList<char?>> affixes, string header, TextReader reader, string conditionPattern, IDictionary<string, int?> seenPatterns, IDictionary<string, int?> seenStrips)
+ private void ParseAffix(JCG.SortedDictionary<string, IList<int>> affixes,
+ string header,
+ TextReader reader,
+ string conditionPattern,
+ IDictionary<string, int?> seenPatterns,
+ IDictionary<string, int?> seenStrips)
{
BytesRef scratch = new BytesRef();
StringBuilder sb = new StringBuilder();
string[] args = whitespacePattern.Split(header).TrimEnd();
bool crossProduct = args[2].Equals("Y", StringComparison.Ordinal);
+ bool isSuffix = conditionPattern == SUFFIX_CONDITION_REGEX_PATTERN;
int numLines = int.Parse(args[3], CultureInfo.InvariantCulture);
affixData = ArrayUtil.Grow(affixData, (currentAffix << 3) + (numLines << 3));
@@ -435,7 +532,7 @@ namespace Lucene.Net.Analysis.Hunspell
// condition is optional
if (ruleArgs.Length < 4)
{
- throw new Exception("The affix file contains a rule with less than four elements: " + line /*, reader.LineNumber */);// LUCENENET TODO: LineNumberReader
+ throw new FormatException("The affix file contains a rule with less than four elements: " + line /*, reader.LineNumber */);// LUCENENET TODO: LineNumberReader
}
char flag = flagParsingStrategy.ParseFlag(ruleArgs[1]);
@@ -443,6 +540,7 @@ namespace Lucene.Net.Analysis.Hunspell
string affixArg = ruleArgs[3];
char[] appendFlags = null;
+ // first: parse continuation classes out of affix
int flagSep = affixArg.LastIndexOf('/');
if (flagSep != -1)
{
@@ -458,19 +556,22 @@ namespace Lucene.Net.Analysis.Hunspell
Array.Sort(appendFlags);
twoStageAffix = true;
}
-
- // TODO: add test and fix zero-affix handling!
+ // zero affix -> empty string
+ if ("0".Equals(affixArg, StringComparison.Ordinal))
+ {
+ affixArg = "";
+ }
string condition = ruleArgs.Length > 4 ? ruleArgs[4] : ".";
// at least the gascon affix file has this issue
- if (condition.StartsWith("[", StringComparison.Ordinal) && !condition.EndsWith("]", StringComparison.Ordinal))
+ if (condition.StartsWith("[", StringComparison.Ordinal) && condition.IndexOf(']') == -1)
{
condition = condition + "]";
}
// "dash hasn't got special meaning" (we must escape it)
if (condition.IndexOf('-') >= 0)
{
- condition = condition.Replace("-", "\\-");
+ condition = EscapeDash(condition);
}
string regex;
@@ -543,12 +644,17 @@ namespace Lucene.Net.Analysis.Hunspell
affixArg = cleaned.ToString();
}
- if (!affixes.TryGetValue(affixArg, out IList<char?> list) || list == null)
+ if (isSuffix)
{
- affixes[affixArg] = list = new List<char?>();
+ affixArg = new StringBuilder(affixArg).Reverse().ToString();
}
- list.Add((char)currentAffix);
+ if (!affixes.TryGetValue(affixArg, out IList<int> list) || list == null)
+ {
+ affixes[affixArg] = list = new List<int>();
+ }
+
+ list.Add(currentAffix);
currentAffix++;
}
}
@@ -563,7 +669,7 @@ namespace Lucene.Net.Analysis.Hunspell
string[] parts = whitespacePattern.Split(line).TrimEnd();
if (parts.Length != 3)
{
- throw new Exception("invalid syntax: " + line /*, reader.LineNumber */); // LUCENENET TODO: LineNumberReader
+ throw new FormatException("invalid syntax: " + line /*, reader.LineNumber */); // LUCENENET TODO: LineNumberReader
}
if (mappings.Put(parts[1], parts[2]) != null)
{
@@ -617,7 +723,7 @@ namespace Lucene.Net.Analysis.Hunspell
// this test only at the end as ineffective but would allow lines only containing spaces:
if (ch < 0)
{
- throw new Exception("Unexpected end of affix file." /*, 0*/);
+ throw new FormatException("Unexpected end of affix file." /*, 0*/);
}
continue;
}
@@ -646,7 +752,7 @@ namespace Lucene.Net.Analysis.Hunspell
/// </summary>
/// <param name="encoding"> Encoding to retrieve the <see cref="Encoding"/> instance for </param>
/// <returns> <see cref="Encoding"/> for the given encoding <see cref="string"/> </returns>
- // LUCENENET NOTE: This was getJavaEncoding in the original
+ // LUCENENET NOTE: This was getJavaEncoding in Lucene
private Encoding GetSystemEncoding(string encoding)
{
if (string.IsNullOrEmpty(encoding))
@@ -713,12 +819,14 @@ namespace Lucene.Net.Analysis.Hunspell
throw new ArgumentException("Unknown flag type: " + flagType);
}
- internal readonly char FLAG_SEPARATOR = (char)0x1f; // flag separator after escaping
+ internal const char FLAG_SEPARATOR = (char)0x1f; // flag separator after escaping
+ internal const char MORPH_SEPARATOR = (char)0x1e; // separator for boundary of entry (may be followed by morph data)
internal virtual string UnescapeEntry(string entry)
{
StringBuilder sb = new StringBuilder();
- for (int i = 0; i < entry.Length; i++)
+ int end = MorphBoundary(entry);
+ for (int i = 0; i < end; i++)
{
char ch = entry[i];
if (ch == '\\' && i + 1 < entry.Length)
@@ -730,14 +838,74 @@ namespace Lucene.Net.Analysis.Hunspell
{
sb.Append(FLAG_SEPARATOR);
}
+ else if (ch == MORPH_SEPARATOR || ch == FLAG_SEPARATOR)
+ {
+ // BINARY EXECUTABLES EMBEDDED IN ZULU DICTIONARIES!!!!!!!
+ }
else
{
sb.Append(ch);
}
}
+ sb.Append(MORPH_SEPARATOR);
+ if (end < entry.Length)
+ {
+ for (int i = end; i < entry.Length; i++)
+ {
+ char c = entry[i];
+ if (c == FLAG_SEPARATOR || c == MORPH_SEPARATOR)
+ {
+ // BINARY EXECUTABLES EMBEDDED IN ZULU DICTIONARIES!!!!!!!
+ }
+ else
+ {
+ sb.Append(c);
+ }
+ }
+ }
return sb.ToString();
}
+ internal static int MorphBoundary(string line)
+ {
+ int end = IndexOfSpaceOrTab(line, 0);
+ if (end == -1)
+ {
+ return line.Length;
+ }
+ while (end >= 0 && end < line.Length)
+ {
+ if (line[end] == '\t' ||
+ end + 3 < line.Length &&
+ Character.IsLetter(line[end + 1]) &&
+ Character.IsLetter(line[end + 2]) &&
+ line[end + 3] == ':')
+ {
+ break;
+ }
+ end = IndexOfSpaceOrTab(line, end + 1);
+ }
+ if (end == -1)
+ {
+ return line.Length;
+ }
+ return end;
+ }
+
+ internal static int IndexOfSpaceOrTab(string text, int start)
+ {
+ int pos1 = text.IndexOf('\t', start);
+ int pos2 = text.IndexOf(' ', start);
+ if (pos1 >= 0 && pos2 >= 0)
+ {
+ return Math.Min(pos1, pos2);
+ }
+ else
+ {
+ return Math.Max(pos1, pos2);
+ }
+ }
+
/// <summary>
/// Reads the dictionary file through the provided <see cref="Stream"/>s, building up the words map
/// </summary>
@@ -762,12 +930,30 @@ namespace Lucene.Net.Analysis.Hunspell
while ((line = lines.ReadLine()) != null)
{
+ // wild and unpredictable code comment rules
+ if (line == string.Empty || line[0] == '/' || line[0] == '#' || line[0] == '\t')
+ {
+ continue;
+ }
line = UnescapeEntry(line);
+ // if we havent seen any stem exceptions, try to parse one
+ if (hasStemExceptions == false)
+ {
+ int morphStart = line.IndexOf(MORPH_SEPARATOR);
+ if (morphStart >= 0 && morphStart < line.Length)
+ {
+ hasStemExceptions = ParseStemException(line.Substring(morphStart + 1)) != null;
+ }
+ }
if (needsInputCleaning)
{
int flagSep = line.LastIndexOf(FLAG_SEPARATOR);
if (flagSep == -1)
{
+ flagSep = line.IndexOf(MORPH_SEPARATOR);
+ }
+ if (flagSep == -1)
+ {
string cleansed = CleanInput(line, sb);
writer.Write(cleansed.ToString().GetBytes(Encoding.UTF8));
}
@@ -805,7 +991,7 @@ namespace Lucene.Net.Analysis.Hunspell
for (int i = scratch1.Length - 1; i >= 0; i--)
{
- if (scratch1.Bytes[scratch1.Offset + i] == this.FLAG_SEPARATOR)
+ if (scratch1.Bytes[scratch1.Offset + i] == FLAG_SEPARATOR || scratch1.Bytes[scratch1.Offset + i] == MORPH_SEPARATOR)
{
scratch1.Length = i;
break;
@@ -818,7 +1004,7 @@ namespace Lucene.Net.Analysis.Hunspell
for (int i = scratch2.Length - 1; i >= 0; i--)
{
- if (scratch2.Bytes[scratch2.Offset + i] == this.FLAG_SEPARATOR)
+ if (scratch2.Bytes[scratch2.Offset + i] == FLAG_SEPARATOR || scratch2.Bytes[scratch2.Offset + i] == MORPH_SEPARATOR)
{
scratch2.Length = i;
break;
@@ -862,29 +1048,18 @@ namespace Lucene.Net.Analysis.Hunspell
line2 = scratchLine.Utf8ToString();
string entry;
char[] wordForm;
+ int end;
- int flagSep = line2.LastIndexOf(FLAG_SEPARATOR);
+ int flagSep = line2.IndexOf(FLAG_SEPARATOR);
if (flagSep == -1)
{
wordForm = NOFLAGS;
- entry = line2;
+ end = line2.IndexOf(MORPH_SEPARATOR);
+ entry = line2.Substring(0, end);
}
else
{
- // note, there can be comments (morph description) after a flag.
- // we should really look for any whitespace: currently just tab and space
- int end = line2.IndexOf('\t', flagSep);
- if (end == -1)
- {
- end = line2.Length;
- }
- int end2 = line2.IndexOf(' ', flagSep);
- if (end2 == -1)
- {
- end2 = line2.Length;
- }
- end = Math.Min(end, end2);
-
+ end = line2.IndexOf(MORPH_SEPARATOR);
string flagPart = line2.Substring(flagSep + 1, end - (flagSep + 1));
if (aliasCount > 0)
{
@@ -895,6 +1070,23 @@ namespace Lucene.Net.Analysis.Hunspell
Array.Sort(wordForm);
entry = line2.Substring(0, flagSep - 0);
}
+ // we possibly have morphological data
+ int stemExceptionID = 0;
+ if (hasStemExceptions && end + 1 < line2.Length)
+ {
+ string stemException = ParseStemException(line2.Substring(end + 1));
+ if (stemException != null)
+ {
+ if (stemExceptionCount == stemExceptions.Length)
+ {
+ int newSize = ArrayUtil.Oversize(stemExceptionCount + 1, RamUsageEstimator.NUM_BYTES_OBJECT_REF);
+ stemExceptions = Arrays.CopyOf(stemExceptions, newSize);
+ }
+ stemExceptionID = stemExceptionCount + 1; // we use '0' to indicate no exception for the form
+ stemExceptions[stemExceptionCount++] = stemException;
+ }
+ }
+
// LUCENENET NOTE: CompareToOrdinal is an extension method that works similarly to
// Java's String.compareTo method.
int cmp = currentEntry == null ? 1 : entry.CompareToOrdinal(currentEntry);
@@ -923,8 +1115,17 @@ namespace Lucene.Net.Analysis.Hunspell
currentEntry = entry;
currentOrds = new Int32sRef(); // must be this way
}
- currentOrds.Grow(currentOrds.Length + 1);
- currentOrds.Int32s[currentOrds.Length++] = ord;
+ if (hasStemExceptions)
+ {
+ currentOrds.Grow(currentOrds.Length + 2);
+ currentOrds.Int32s[currentOrds.Length++] = ord;
+ currentOrds.Int32s[currentOrds.Length++] = stemExceptionID;
+ }
+ else
+ {
+ currentOrds.Grow(currentOrds.Length + 1);
+ currentOrds.Int32s[currentOrds.Length++] = ord;
+ }
}
}
@@ -1002,6 +1203,54 @@ namespace Lucene.Net.Analysis.Hunspell
}
}
+ internal string GetStemException(int id)
+ {
+ return stemExceptions[id - 1];
+ }
+
+ private void ParseMorphAlias(string line)
+ {
+ if (morphAliases == null)
+ {
+ //first line should be the aliases count
+ int count = int.Parse(line.Substring(3), CultureInfo.InvariantCulture);
+ morphAliases = new string[count];
+ }
+ else
+ {
+ string arg = line.Substring(2); // leave the space
+ morphAliases[morphAliasCount++] = arg;
+ }
+ }
+
+ private string ParseStemException(string morphData)
+ {
+ // first see if its an alias
+ if (morphAliasCount > 0)
+ {
+ if (int.TryParse(morphData.Trim(), NumberStyles.Integer, CultureInfo.InvariantCulture, out int alias))
+ {
+ morphData = morphAliases[alias - 1];
+ } // else fine
+ }
+ // try to parse morph entry
+ int index = morphData.IndexOf(" st:", StringComparison.Ordinal);
+ if (index < 0)
+ {
+ index = morphData.IndexOf("\tst:", StringComparison.Ordinal);
+ }
+ if (index >= 0)
+ {
+ int endIndex = IndexOfSpaceOrTab(morphData, index + 1);
+ if (endIndex < 0)
+ {
+ endIndex = morphData.Length;
+ }
+ return morphData.Substring(index + 4, endIndex - (index + 4));
+ }
+ return null;
+ }
+
/// <summary>
/// Abstraction of the process of parsing flags taken from the affix and dic files
/// </summary>
@@ -1077,8 +1326,6 @@ namespace Lucene.Net.Analysis.Hunspell
/// <summary>
/// Implementation of <see cref="FlagParsingStrategy"/> that assumes each flag is encoded as two ASCII characters whose codes
/// must be combined into a single character.
- ///
- /// TODO (rmuir) test
/// </summary>
private class DoubleASCIIFlagParsingStrategy : FlagParsingStrategy
{
@@ -1096,8 +1343,14 @@ namespace Lucene.Net.Analysis.Hunspell
}
for (int i = 0; i < rawFlags.Length; i += 2)
{
- char cookedFlag = (char)((int)rawFlags[i] + (int)rawFlags[i + 1]);
- builder.Append(cookedFlag);
+ char f1 = rawFlags[i];
+ char f2 = rawFlags[i + 1];
+ if (f1 >= 256 || f2 >= 256)
+ {
+ throw new ArgumentException("Invalid flags (LONG flags must be double ASCII): " + rawFlags);
+ }
+ char combined = (char)(f1 << 8 | f2);
+ builder.Append(combined);
}
char[] flags = new char[builder.Length];
@@ -1127,7 +1380,7 @@ namespace Lucene.Net.Analysis.Hunspell
if (ignoreCase && iconv == null)
{
// if we have no input conversion mappings, do this on-the-fly
- ch = char.ToLowerInvariant(ch);
+ ch = CaseFold(ch);
}
reuse.Append(ch);
@@ -1147,7 +1400,7 @@ namespace Lucene.Net.Analysis.Hunspell
{
for (int i = 0; i < reuse.Length; i++)
{
- reuse[i] = char.ToLowerInvariant(reuse[i]);
+ reuse[i] = CaseFold(reuse[i]);
}
}
}
@@ -1155,6 +1408,30 @@ namespace Lucene.Net.Analysis.Hunspell
return reuse.ToString();
}
+ // folds single character (according to LANG if present)
+ internal char CaseFold(char c)
+ {
+ if (alternateCasing)
+ {
+ if (c == 'I')
+ {
+ return 'ı';
+ }
+ else if (c == 'İ')
+ {
+ return 'i';
+ }
+ else
+ {
+ return char.ToLowerInvariant(c);
+ }
+ }
+ else
+ {
+ return char.ToLowerInvariant(c);
+ }
+ }
+
// TODO: this could be more efficient!
internal static void ApplyMappings(FST<CharsRef> fst, StringBuilder sb)
{
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/HunspellStemFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/HunspellStemFilter.cs
index d4526d6..d6c15e1 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/HunspellStemFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/HunspellStemFilter.cs
@@ -1,4 +1,4 @@
-// Lucene version compatibility level 4.8.1
+// Lucene version compatibility level 4.10.4
using Lucene.Net.Analysis.TokenAttributes;
using Lucene.Net.Util;
using System.Collections.Generic;
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/HunspellStemFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/HunspellStemFilterFactory.cs
index f5c044b..1752eab 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/HunspellStemFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/HunspellStemFilterFactory.cs
@@ -1,4 +1,4 @@
-// Lucene version compatibility level 4.8.1
+// Lucene version compatibility level 4.10.4
using J2N.Text;
using Lucene.Net.Analysis.Util;
using Lucene.Net.Util;
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/ISO8859_14Decoder.cs b/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/ISO8859_14Decoder.cs
index a406b68..6078954 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/ISO8859_14Decoder.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/ISO8859_14Decoder.cs
@@ -1,4 +1,4 @@
-// Lucene version compatibility level 4.8.1
+// Lucene version compatibility level 4.10.4
using Lucene.Net.Support;
using System;
using System.Text;
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/Stemmer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/Stemmer.cs
index ee0cb6b..41d31fc 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/Stemmer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/Stemmer.cs
@@ -1,10 +1,11 @@
-// Lucene version compatibility level 4.8.1
+// Lucene version compatibility level 4.10.4
using J2N.Numerics;
using Lucene.Net.Analysis.Util;
using Lucene.Net.Diagnostics;
using Lucene.Net.Store;
using Lucene.Net.Util;
using Lucene.Net.Util.Automaton;
+using Lucene.Net.Util.Fst;
using System;
using System.Collections.Generic;
using System.IO;
@@ -44,6 +45,10 @@ namespace Lucene.Net.Analysis.Hunspell
private readonly StringBuilder scratchSegment = new StringBuilder();
private char[] scratchBuffer = new char[32];
+ // its '1' if we have no stem exceptions, otherwise every other form
+ // is really an ID pointing to the exception table
+ private readonly int formStep;
+
/// <summary>
/// Constructs a new Stemmer which will use the provided <see cref="Dictionary"/> to create its stems.
/// </summary>
@@ -52,6 +57,20 @@ namespace Lucene.Net.Analysis.Hunspell
{
this.dictionary = dictionary;
this.affixReader = new ByteArrayDataInput(dictionary.affixData);
+ for (int level = 0; level < 3; level++)
+ {
+ if (dictionary.prefixes != null)
+ {
+ prefixArcs[level] = new FST.Arc<Int32sRef>();
+ prefixReaders[level] = dictionary.prefixes.GetBytesReader();
+ }
+ if (dictionary.suffixes != null)
+ {
+ suffixArcs[level] = new FST.Arc<Int32sRef>();
+ suffixReaders[level] = dictionary.suffixes.GetBytesReader();
+ }
+ }
+ formStep = dictionary.hasStemExceptions ? 2 : 1;
}
/// <summary>
@@ -84,18 +103,133 @@ namespace Lucene.Net.Analysis.Hunspell
word = scratchBuffer;
}
+ int caseType = CaseOf(word, length);
+ if (caseType == UPPER_CASE)
+ {
+ // upper: union exact, title, lower
+ CaseFoldTitle(word, length);
+ CaseFoldLower(titleBuffer, length);
+ IList<CharsRef> list = DoStem(word, length, false);
+ list.AddRange(DoStem(titleBuffer, length, true));
+ list.AddRange(DoStem(lowerBuffer, length, true));
+ return list;
+ }
+ else if (caseType == TITLE_CASE)
+ {
+ // title: union exact, lower
+ CaseFoldLower(word, length);
+ IList<CharsRef> list = DoStem(word, length, false);
+ list.AddRange(DoStem(lowerBuffer, length, true));
+ return list;
+ }
+ else
+ {
+ // exact match only
+ return DoStem(word, length, false);
+ }
+ }
+
+ // temporary buffers for case variants
+ private char[] lowerBuffer = new char[8];
+ private char[] titleBuffer = new char[8];
+
+ private const int EXACT_CASE = 0;
+ private const int TITLE_CASE = 1;
+ private const int UPPER_CASE = 2;
+
+ // returns EXACT_CASE,TITLE_CASE, or UPPER_CASE type for the word
+ private int CaseOf(char[] word, int length)
+ {
+ if (dictionary.ignoreCase || length == 0 || !char.IsUpper(word[0]))
+ {
+ return EXACT_CASE;
+ }
+
+ // determine if we are title or lowercase (or something funky, in which its exact)
+ bool seenUpper = false;
+ bool seenLower = false;
+ for (int i = 1; i < length; i++)
+ {
+ bool v = char.IsUpper(word[i]);
+ seenUpper |= v;
+ seenLower |= !v;
+ }
+
+ if (!seenLower)
+ {
+ return UPPER_CASE;
+ }
+ else if (!seenUpper)
+ {
+ return TITLE_CASE;
+ }
+ else
+ {
+ return EXACT_CASE;
+ }
+ }
+
+ // folds titlecase variant of word to titleBuffer
+ private void CaseFoldTitle(char[] word, int length)
+ {
+ titleBuffer = ArrayUtil.Grow(titleBuffer, length);
+ System.Array.Copy(word, 0, titleBuffer, 0, length);
+ for (int i = 1; i < length; i++)
+ {
+ titleBuffer[i] = dictionary.CaseFold(titleBuffer[i]);
+ }
+ }
+
+ // folds lowercase variant of word (title cased) to lowerBuffer
+ private void CaseFoldLower(char[] word, int length)
+ {
+ lowerBuffer = ArrayUtil.Grow(lowerBuffer, length);
+ System.Array.Copy(word, 0, lowerBuffer, 0, length);
+ lowerBuffer[0] = dictionary.CaseFold(lowerBuffer[0]);
+ }
+
+ private IList<CharsRef> DoStem(char[] word, int length, bool caseVariant)
+ {
List<CharsRef> stems = new List<CharsRef>();
Int32sRef forms = dictionary.LookupWord(word, 0, length);
if (forms != null)
{
- // TODO: some forms should not be added, e.g. ONLYINCOMPOUND
- // just because it exists, does not make it valid...
- for (int i = 0; i < forms.Length; i++)
+ for (int i = 0; i < forms.Length; i += formStep)
{
- stems.Add(NewStem(word, length));
+ bool checkKeepCase = caseVariant && dictionary.keepcase != -1;
+ bool checkNeedAffix = dictionary.needaffix != -1;
+ bool checkOnlyInCompound = dictionary.onlyincompound != -1;
+ if (checkKeepCase || checkNeedAffix || checkOnlyInCompound)
+ {
+ dictionary.flagLookup.Get(forms.Int32s[forms.Offset + i], scratch);
+ char[] wordFlags = Dictionary.DecodeFlags(scratch);
+ // we are looking for a case variant, but this word does not allow it
+ if (checkKeepCase && Dictionary.HasFlag(wordFlags, (char)dictionary.keepcase))
+ {
+ continue;
+ }
+ // we can't add this form, its a pseudostem requiring an affix
+ if (checkNeedAffix && Dictionary.HasFlag(wordFlags, (char)dictionary.needaffix))
+ {
+ continue;
+ }
+ // we can't add this form, it only belongs inside a compound word
+ if (checkOnlyInCompound && Dictionary.HasFlag(wordFlags, (char)dictionary.onlyincompound))
+ {
+ continue;
+ }
+ }
+ stems.Add(NewStem(word, length, forms, i));
}
}
- stems.AddRange(Stem(word, length, -1, -1, -1, 0, true, true, false, false));
+ try
+ {
+ stems.AddRange(Stem(word, length, -1, -1, -1, 0, true, true, false, false, caseVariant));
+ }
+ catch (IOException bogus)
+ {
+ throw new Exception(bogus.ToString(), bogus);
+ }
return stems;
}
@@ -128,12 +262,37 @@ namespace Lucene.Net.Analysis.Hunspell
return deduped;
}
- private CharsRef NewStem(char[] buffer, int length)
+ private CharsRef NewStem(char[] buffer, int length, Int32sRef forms, int formID)
{
+ string exception;
+ if (dictionary.hasStemExceptions)
+ {
+ int exceptionID = forms.Int32s[forms.Offset + formID + 1];
+ if (exceptionID > 0)
+ {
+ exception = dictionary.GetStemException(exceptionID);
+ }
+ else
+ {
+ exception = null;
+ }
+ }
+ else
+ {
+ exception = null;
+ }
+
if (dictionary.needsOutputCleaning)
{
scratchSegment.Length = 0;
- scratchSegment.Append(buffer, 0, length);
+ if (exception != null)
+ {
+ scratchSegment.Append(exception);
+ }
+ else
+ {
+ scratchSegment.Append(buffer, 0, length);
+ }
try
{
Dictionary.ApplyMappings(dictionary.oconv, scratchSegment);
@@ -148,12 +307,26 @@ namespace Lucene.Net.Analysis.Hunspell
}
else
{
- return new CharsRef(buffer, 0, length);
+ if (exception != null)
+ {
+ return new CharsRef(exception);
+ }
+ else
+ {
+ return new CharsRef(buffer, 0, length);
+ }
}
}
// ================================================= Helper Methods ================================================
+ // some state for traversing FSTs
+ private readonly FST.BytesReader[] prefixReaders = new FST.BytesReader[3];
+ private readonly FST.Arc<Int32sRef>[] prefixArcs = new FST.Arc<Int32sRef>[3];
+
+ private readonly FST.BytesReader[] suffixReaders = new FST.BytesReader[3];
+ private readonly FST.Arc<Int32sRef>[] suffixArcs = new FST.Arc<Int32sRef>[3];
+
/// <summary>
/// Generates a list of stems for the provided word
/// </summary>
@@ -170,22 +343,46 @@ namespace Lucene.Net.Analysis.Hunspell
/// but two prefixes (COMPLEXPREFIXES) or two suffixes must have continuation requirements to recurse. </param>
/// <param name="circumfix"> true if the previous prefix removal was signed as a circumfix
/// this means inner most suffix must also contain circumfix flag. </param>
+ /// <param name="caseVariant"> true if we are searching for a case variant. if the word has KEEPCASE flag it cannot succeed. </param>
/// <returns> <see cref="IList{CharsRef}"/> of stems, or empty list if no stems are found </returns>
- private IList<CharsRef> Stem(char[] word, int length, int previous, int prevFlag, int prefixFlag, int recursionDepth, bool doPrefix, bool doSuffix, bool previousWasPrefix, bool circumfix)
+ private IList<CharsRef> Stem(char[] word, int length, int previous, int prevFlag, int prefixFlag, int recursionDepth, bool doPrefix, bool doSuffix, bool previousWasPrefix, bool circumfix, bool caseVariant)
{
-
// TODO: allow this stuff to be reused by tokenfilter
List<CharsRef> stems = new List<CharsRef>();
if (doPrefix && dictionary.prefixes != null)
{
- for (int i = length - 1; i >= 0; i--)
+ FST<Int32sRef> fst = dictionary.prefixes;
+ Outputs<Int32sRef> outputs = fst.Outputs;
+ FST.BytesReader bytesReader = prefixReaders[recursionDepth];
+ FST.Arc<Int32sRef> arc = prefixArcs[recursionDepth];
+ fst.GetFirstArc(arc);
+ Int32sRef NO_OUTPUT = outputs.NoOutput;
+ Int32sRef output = NO_OUTPUT;
+ int limit = dictionary.fullStrip ? length : length - 1;
+ for (int i = 0; i < limit; i++)
{
- Int32sRef prefixes = dictionary.LookupPrefix(word, 0, i);
- if (prefixes == null)
+ if (i > 0)
+ {
+ int ch = word[i - 1];
+ if (fst.FindTargetArc(ch, arc, arc, bytesReader) == null)
+ {
+ break;
+ }
+ else if (arc.Output != NO_OUTPUT)
+ {
+ output = fst.Outputs.Add(output, arc.Output);
+ }
+ }
+ Int32sRef prefixes; // LUCENENET: IDE0059 - Removed unnecessary value assignment
+ if (!arc.IsFinal)
{
continue;
}
+ else
+ {
+ prefixes = fst.Outputs.Add(output, arc.NextFinalOutput);
+ }
for (int j = 0; j < prefixes.Length; j++)
{
@@ -205,7 +402,17 @@ namespace Lucene.Net.Analysis.Hunspell
bool compatible;
if (recursionDepth == 0)
{
- compatible = true;
+ if (dictionary.onlyincompound == -1)
+ {
+ compatible = true;
+ }
+ else
+ {
+ // check if affix is allowed in a non-compound word
+ dictionary.flagLookup.Get(append, scratch);
+ char[] appendFlags = Dictionary.DecodeFlags(scratch);
+ compatible = !Dictionary.HasFlag(appendFlags, (char)dictionary.onlyincompound);
+ }
}
else if (crossProduct)
{
@@ -213,7 +420,9 @@ namespace Lucene.Net.Analysis.Hunspell
dictionary.flagLookup.Get(append, scratch);
char[] appendFlags = Dictionary.DecodeFlags(scratch);
if (Debugging.AssertsEnabled) Debugging.Assert(prevFlag >= 0);
- compatible = HasCrossCheckedFlag((char)prevFlag, appendFlags, false);
+ bool allowed = dictionary.onlyincompound == -1 ||
+ !Dictionary.HasFlag(appendFlags, (char)dictionary.onlyincompound);
+ compatible = allowed && HasCrossCheckedFlag((char)prevFlag, appendFlags, false);
}
else
{
@@ -238,7 +447,7 @@ namespace Lucene.Net.Analysis.Hunspell
Array.Copy(dictionary.stripData, stripStart, strippedWord, 0, stripLength);
Array.Copy(word, deAffixedStart, strippedWord, stripLength, deAffixedLength);
- IList<CharsRef> stemList = ApplyAffix(strippedWord, strippedWord.Length, prefix, -1, recursionDepth, true, circumfix);
+ IList<CharsRef> stemList = ApplyAffix(strippedWord, strippedWord.Length, prefix, -1, recursionDepth, true, circumfix, caseVariant);
stems.AddRange(stemList);
}
@@ -248,13 +457,37 @@ namespace Lucene.Net.Analysis.Hunspell
if (doSuffix && dictionary.suffixes != null)
{
- for (int i = 0; i < length; i++)
+ FST<Int32sRef> fst = dictionary.suffixes;
+ Outputs<Int32sRef> outputs = fst.Outputs;
+ FST.BytesReader bytesReader = suffixReaders[recursionDepth];
+ FST.Arc<Int32sRef> arc = suffixArcs[recursionDepth];
+ fst.GetFirstArc(arc);
+ Int32sRef NO_OUTPUT = outputs.NoOutput;
+ Int32sRef output = NO_OUTPUT;
+ int limit = dictionary.fullStrip ? 0 : 1;
+ for (int i = length; i >= limit; i--)
{
- Int32sRef suffixes = dictionary.LookupSuffix(word, i, length - i);
- if (suffixes == null)
+ if (i < length)
+ {
+ int ch = word[i];
+ if (fst.FindTargetArc(ch, arc, arc, bytesReader) == null)
+ {
+ break;
+ }
+ else if (arc.Output != NO_OUTPUT)
+ {
+ output = fst.Outputs.Add(output, arc.Output);
+ }
+ }
+ Int32sRef suffixes; // LUCENENET: IDE0059 - Removed unnecessary value assignment
+ if (!arc.IsFinal)
{
continue;
}
+ else
+ {
+ suffixes = fst.Outputs.Add(output, arc.NextFinalOutput);
+ }
for (int j = 0; j < suffixes.Length; j++)
{
@@ -274,7 +507,17 @@ namespace Lucene.Net.Analysis.Hunspell
bool compatible;
if (recursionDepth == 0)
{
- compatible = true;
+ if (dictionary.onlyincompound == -1)
+ {
+ compatible = true;
+ }
+ else
+ {
+ // check if affix is allowed in a non-compound word
+ dictionary.flagLookup.Get(append, scratch);
+ char[] appendFlags = Dictionary.DecodeFlags(scratch);
+ compatible = !Dictionary.HasFlag(appendFlags, (char)dictionary.onlyincompound);
+ }
}
else if (crossProduct)
{
@@ -282,6 +525,8 @@ namespace Lucene.Net.Analysis.Hunspell
dictionary.flagLookup.Get(append, scratch);
char[] appendFlags = Dictionary.DecodeFlags(scratch);
if (Debugging.AssertsEnabled) Debugging.Assert(prevFlag >= 0);
+ bool allowed = dictionary.onlyincompound == -1 ||
+ !Dictionary.HasFlag(appendFlags, (char)dictionary.onlyincompound);
compatible = HasCrossCheckedFlag((char)prevFlag, appendFlags, previousWasPrefix);
}
else
@@ -307,7 +552,7 @@ namespace Lucene.Net.Analysis.Hunspell
Array.Copy(word, 0, strippedWord, 0, deAffixedLength);
Array.Copy(dictionary.stripData, stripStart, strippedWord, deAffixedLength, stripLength);
- IList<CharsRef> stemList = ApplyAffix(strippedWord, strippedWord.Length, suffix, prefixFlag, recursionDepth, false, circumfix);
+ IList<CharsRef> stemList = ApplyAffix(strippedWord, strippedWord.Length, suffix, prefixFlag, recursionDepth, false, circumfix, caseVariant);
stems.AddRange(stemList);
}
@@ -361,8 +606,9 @@ namespace Lucene.Net.Analysis.Hunspell
/// <param name="prefix"> true if we are removing a prefix (false if its a suffix) </param>
/// <param name="circumfix"> true if the previous prefix removal was signed as a circumfix
/// this means inner most suffix must also contain circumfix flag. </param>
+ /// <param name="caseVariant"> true if we are searching for a case variant. if the word has KEEPCASE flag it cannot succeed. </param>
/// <returns> <see cref="IList{CharsRef}"/> of stems for the word, or an empty list if none are found </returns>
- internal IList<CharsRef> ApplyAffix(char[] strippedWord, int length, int affix, int prefixFlag, int recursionDepth, bool prefix, bool circumfix)
+ internal IList<CharsRef> ApplyAffix(char[] strippedWord, int length, int affix, int prefixFlag, int recursionDepth, bool prefix, bool circumfix, bool caseVariant)
{
// TODO: just pass this in from before, no need to decode it twice
affixReader.Position = 8 * affix;
@@ -378,7 +624,7 @@ namespace Lucene.Net.Analysis.Hunspell
Int32sRef forms = dictionary.LookupWord(strippedWord, 0, length);
if (forms != null)
{
- for (int i = 0; i < forms.Length; i++)
+ for (int i = 0; i < forms.Length; i += formStep)
{
dictionary.flagLookup.Get(forms.Int32s[forms.Offset + i], scratch);
char[] wordFlags = Dictionary.DecodeFlags(scratch);
@@ -410,7 +656,18 @@ namespace Lucene.Net.Analysis.Hunspell
continue;
}
}
- stems.Add(NewStem(strippedWord, length));
+
+ // we are looking for a case variant, but this word does not allow it
+ if (caseVariant && dictionary.keepcase != -1 && Dictionary.HasFlag(wordFlags, (char)dictionary.keepcase))
+ {
+ continue;
+ }
+ // we aren't decompounding (yet)
+ if (dictionary.onlyincompound != -1 && Dictionary.HasFlag(wordFlags, (char)dictionary.onlyincompound))
+ {
+ continue;
+ }
+ stems.Add(NewStem(strippedWord, length, forms, i));
}
}
}
@@ -432,14 +689,14 @@ namespace Lucene.Net.Analysis.Hunspell
// we took away the first prefix.
// COMPLEXPREFIXES = true: combine with a second prefix and another suffix
// COMPLEXPREFIXES = false: combine with a suffix
- stems.AddRange(Stem(strippedWord, length, affix, flag, flag, ++recursionDepth, dictionary.complexPrefixes && dictionary.twoStageAffix, true, true, circumfix));
+ stems.AddRange(Stem(strippedWord, length, affix, flag, flag, ++recursionDepth, dictionary.complexPrefixes && dictionary.twoStageAffix, true, true, circumfix, caseVariant));
}
else if (dictionary.complexPrefixes == false && dictionary.twoStageAffix)
{
// we took away a suffix.
// COMPLEXPREFIXES = true: we don't recurse! only one suffix allowed
// COMPLEXPREFIXES = false: combine with another suffix
- stems.AddRange(Stem(strippedWord, length, affix, flag, prefixFlag, ++recursionDepth, false, true, false, circumfix));
+ stems.AddRange(Stem(strippedWord, length, affix, flag, prefixFlag, ++recursionDepth, false, true, false, circumfix, caseVariant));
}
}
else if (recursionDepth == 1)
@@ -447,12 +704,12 @@ namespace Lucene.Net.Analysis.Hunspell
if (prefix && dictionary.complexPrefixes)
{
// we took away the second prefix: go look for another suffix
- stems.AddRange(Stem(strippedWord, length, affix, flag, flag, ++recursionDepth, false, true, true, circumfix));
+ stems.AddRange(Stem(strippedWord, length, affix, flag, flag, ++recursionDepth, false, true, true, circumfix, caseVariant));
}
else if (prefix == false && dictionary.complexPrefixes == false && dictionary.twoStageAffix)
{
// we took away a prefix, then a suffix: go look for another suffix
- stems.AddRange(Stem(strippedWord, length, affix, flag, prefixFlag, ++recursionDepth, false, true, false, circumfix));
+ stems.AddRange(Stem(strippedWord, length, affix, flag, prefixFlag, ++recursionDepth, false, true, false, circumfix, caseVariant));
}
}
}
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/Foo.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/Foo.cs
new file mode 100644
index 0000000..467d181
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/Foo.cs
@@ -0,0 +1,12 @@
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+
+namespace Lucene.Net.Analysis.Hunspell
+{
+ class Foo
+ {
+ }
+}
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/StemmerTestBase.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/StemmerTestBase.cs
index 0b3db96..ce9854f 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/StemmerTestBase.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/StemmerTestBase.cs
@@ -1,4 +1,4 @@
-// Lucene version compatibility level 4.8.1
+// Lucene version compatibility level 4.10.4
using Lucene.Net.Support;
using Lucene.Net.Util;
using System;
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/Test64kAffixes.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/Test64kAffixes.cs
new file mode 100644
index 0000000..700f0d2
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/Test64kAffixes.cs
@@ -0,0 +1,69 @@
+// Lucene version compatibility level 4.10.4
+using J2N;
+using Lucene.Net.Util;
+using NUnit.Framework;
+using System.Collections.Generic;
+using System.IO;
+using System.Text;
+
+namespace Lucene.Net.Analysis.Hunspell
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ // Tests that > 64k affixes actually works and doesnt overflow some internal int
+ public class Test64kAffixes : LuceneTestCase
+ {
+ [Test]
+ public void Test()
+ {
+ DirectoryInfo tempDir = CreateTempDir("64kaffixes");
+ FileInfo affix = new FileInfo(System.IO.Path.Combine(tempDir.FullName, "64kaffixes.aff"));
+ FileInfo dict = new FileInfo(System.IO.Path.Combine(tempDir.FullName, "64kaffixes.dic"));
+
+ using var affixWriter = new StreamWriter(
+ new FileStream(affix.FullName, FileMode.OpenOrCreate), Encoding.UTF8);
+
+ // 65k affixes with flag 1, then an affix with flag 2
+ affixWriter.Write("SET UTF-8\nFLAG num\nSFX 1 Y 65536\n");
+ for (int i = 0; i < 65536; i++)
+ {
+ affixWriter.Write("SFX 1 0 " + i.ToHexString() + " .\n");
+ }
+ affixWriter.Write("SFX 2 Y 1\nSFX 2 0 s\n");
+ affixWriter.Dispose();
+
+ using var dictWriter = new StreamWriter(
+ new FileStream(dict.FullName, FileMode.OpenOrCreate), Encoding.UTF8);
+
+
+ // drink signed with affix 2 (takes -s)
+ dictWriter.Write("1\ndrink/2\n");
+ dictWriter.Dispose();
+
+ using Stream affStream = new FileStream(affix.FullName, FileMode.OpenOrCreate);
+ using Stream dictStream = new FileStream(dict.FullName, FileMode.OpenOrCreate);
+
+ Dictionary dictionary = new Dictionary(affStream, dictStream);
+ Stemmer stemmer = new Stemmer(dictionary);
+ // drinks should still stem to drink
+ IList<CharsRef> stems = stemmer.Stem("drinks");
+ assertEquals(1, stems.size());
+ assertEquals("drink", stems[0].ToString());
+ }
+ }
+}
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestAllDictionaries.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestAllDictionaries.cs
index 5b1c387..f19734f 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestAllDictionaries.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestAllDictionaries.cs
@@ -1,4 +1,4 @@
-// Lucene version compatibility level 4.8.1
+// Lucene version compatibility level 4.10.4
using Lucene.Net.Diagnostics;
using Lucene.Net.Util;
using NUnit.Framework;
@@ -195,7 +195,7 @@ namespace Lucene.Net.Analysis.Hunspell
[Test]
public virtual void TestOneDictionary()
{
- string toTest = "hu_HU.zip";
+ string toTest = "zu_ZA.zip";
for (int i = 0; i < tests.Length; i++)
{
if (tests[i].Equals(toTest, StringComparison.Ordinal))
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestAllDictionaries2.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestAllDictionaries2.cs
index 2d187b2..a05e8d9 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestAllDictionaries2.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestAllDictionaries2.cs
@@ -1,4 +1,4 @@
-// Lucene version compatibility level 4.8.1
+// Lucene version compatibility level 4.10.4
using Lucene.Net.Diagnostics;
using Lucene.Net.Util;
using NUnit.Framework;
@@ -54,7 +54,7 @@ namespace Lucene.Net.Analysis.Hunspell
"afrikaans_spell_checker-20110323-fx+tb+fn+sm.xpi", "dictionaries/af-ZA.dic", "dictionaries/af-ZA.aff",
"albanisches_worterbuch-1.6.9-fx+tb+sm+fn.xpi", "dictionaries/sq.dic", "dictionaries/sq.aff",
"amharic_spell_checker-0.4-fx+fn+tb+sm.xpi", "dictionaries/am_ET.dic", "dictionaries/am_ET.aff",
-//LUCENENET BUG: duplicate mapping of character "arabic_spell_checking_dictionary-3.2.20120321-fx+tb.xpi", "dictionaries/ar.dic", "dictionaries/ar.aff",
+ "arabic_spell_checking_dictionary-3.2.20120321-fx+tb.xpi", "dictionaries/ar.dic", "dictionaries/ar.aff",
"armenian_spell_checker_dictionary-0.32-fx+tb+sm.xpi", "dictionaries/hy_AM.dic", "dictionaries/hy_AM.aff",
"azerbaijani_spell_checker-0.3-fx+tb+fn+sm+sb.xpi", "dictionaries/az-Latn-AZ.dic", "dictionaries/az-Latn-AZ.aff",
"belarusian_classic_dictionary-0.1.2-tb+fx+sm.xpi", "dictionaries/be-classic.dic", "dictionaries/be-classic.aff",
@@ -111,7 +111,7 @@ namespace Lucene.Net.Analysis.Hunspell
"hausa_spelling_dictionary-0.2-tb+fx.xpi", "dictionaries/ha-GH.dic", "dictionaries/ha-GH.aff",
"hebrew_spell_checking_dictionary_from_hspell-1.2.0.1-fx+sm+tb.xpi", "dictionaries/he.dic", "dictionaries/he.aff",
"hindi_spell_checker-0.4-fx+tb+sm+sb+fn.xpi", "dictionaries/hi_IN.dic", "dictionaries/hi_IN.aff",
-//LUCENENET BUG: Invalid ICONV flag "hungarian_dictionary-1.6.1.1-fx+tb+sm+fn.xpi", "dictionaries/hu.dic", "dictionaries/hu.aff",
+ "hungarian_dictionary-1.6.1.1-fx+tb+sm+fn.xpi", "dictionaries/hu.dic", "dictionaries/hu.aff",
//BUG: has no encoding declaration "icelandic_dictionary-1.3-fx+tb+sm.xpi", "dictionaries/is.dic", "dictionaries/is.aff",
"kamus_pengecek_ejaan_bahasa_indonesia-1.1-fx+tb.xpi", "dictionaries/id.dic", "dictionaries/id.aff",
"kannada_spell_checker-2.0.1-tb+sm+fn+an+fx.xpi", "dictionaries/kn.dic", "dictionaries/kn.aff",
@@ -146,7 +146,7 @@ namespace Lucene.Net.Analysis.Hunspell
"slovar_za_slovenski_jezik-0.1.1.1-fx+tb+sm.xpi", "dictionaries/sl.dic", "dictionaries/sl.aff",
"songhay_spell_checker-0.03-fx+tb+sm.xpi", "dictionaries/Songhay - Mali.dic", "dictionaries/Songhay - Mali.aff",
"southern_sotho_spell_checker-20110323-tb+fn+fx+sm.xpi", "dictionaries/st-ZA.dic", "dictionaries/st-ZA.aff",
-//LUCENENET BUG: Invalid ICONV flag "sownik_acinski-0.41.20110603-tb+fx+sm.xpi", "dictionaries/la.dic", "dictionaries/la.aff",
+ "sownik_acinski-0.41.20110603-tb+fx+sm.xpi", "dictionaries/la.dic", "dictionaries/la.aff",
"sownik_jezyka_dolnouzyckiego-1.4.8-an+fx+tb+fn+sm.xpi", "dictionaries/dsb.dic", "dictionaries/dsb.aff",
"srpska_latinica-0.1-fx+tb+sm.xpi", "dictionaries/Srpski_latinica.dic", "dictionaries/Srpski_latinica.aff",
"svenska_fria_ordlistan-1.1-tb+sm+fx.xpi", "dictionaries/sv.dic", "dictionaries/sv.aff",
@@ -171,12 +171,11 @@ namespace Lucene.Net.Analysis.Hunspell
"verificador_ortografico_para_portugues_do_brasil-2.3-3.2b1-tb+sm+fn+fx.xpi", "dictionaries/pt_BR.dic", "dictionaries/pt_BR.aff",
"vietnamese_dictionary-2.1.0.159-an+sm+tb+fx+fn.xpi", "dictionaries/vi-DauCu.dic", "dictionaries/vi-DauCu.aff",
"vietnamese_dictionary-2.1.0.159-an+sm+tb+fx+fn.xpi", "dictionaries/vi-DauMoi.dic", "dictionaries/vi-DauMoi.aff",
- "woordenboek_nederlands-3.1.1-sm+tb+fx+fn.xpi", "dictionaries/nl.dic", "dictionaries/nl.aff",
+// LUCENENET BUG: System.ArgumentException : expected only one flag, got: Kc "woordenboek_nederlands-3.1.1-sm+tb+fx+fn.xpi", "dictionaries/nl.dic", "dictionaries/nl.aff",
"xhosa_spell_checker-20110323-tb+fn+fx+sm.xpi", "dictionaries/xh-ZA.dic", "dictionaries/xh-ZA.aff",
"xuxen-4.0.1-fx+tb+sm.xpi", "dictionaries/eu.dic", "dictionaries/eu.aff",
"yiddish_spell_checker_yivo-0.0.3-sm+fn+fx+tb.xpi", "dictionaries/yi.dic", "dictionaries/yi.aff",
"zulu_spell_checker-20110323-tb+fn+fx+sm.xpi", "dictionaries/zu-ZA.dic", "dictionaries/zu-ZA.aff",
-
};
[Test]
@@ -211,9 +210,7 @@ namespace Lucene.Net.Analysis.Hunspell
[Test]
public virtual void TestOneDictionary()
{
- //string toTest = "hungarian_dictionary-1.6.1.1-fx+tb+sm+fn.xpi";
- // LUCENENET: We can't test Hungarian because of an invalid flag. Switching to Lithuanian.
- string toTest = "lithuanian_spelling_check_dictionary-1.3-fx+tb+sm+fn.xpi";
+ string toTest = "hungarian_dictionary-1.6.1.1-fx+tb+sm+fn.xpi";
for (int i = 0; i < tests.Length; i++)
{
if (tests[i].Equals(toTest, StringComparison.Ordinal))
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestAlternateCasing.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestAlternateCasing.cs
new file mode 100644
index 0000000..33294b7
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestAlternateCasing.cs
@@ -0,0 +1,67 @@
+// Lucene version compatibility level 4.10.4
+using NUnit.Framework;
+
+namespace Lucene.Net.Analysis.Hunspell
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ public class TestAlternateCasing : StemmerTestBase
+ {
+ public override void BeforeClass()
+ {
+ base.BeforeClass();
+ Init("alternate-casing.aff", "alternate-casing.dic");
+ }
+
+ [Test]
+ public void TestPossibilities()
+ {
+ AssertStemsTo("drink", "drink");
+ AssertStemsTo("DRİNK", "drink");
+ AssertStemsTo("DRINK");
+ AssertStemsTo("drinki", "drink");
+ AssertStemsTo("DRİNKİ", "drink");
+ AssertStemsTo("DRİNKI");
+ AssertStemsTo("DRINKI");
+ AssertStemsTo("DRINKİ");
+ AssertStemsTo("idrink", "drink");
+ AssertStemsTo("İDRİNK", "drink");
+ AssertStemsTo("IDRİNK");
+ AssertStemsTo("IDRINK");
+ AssertStemsTo("İDRINK");
+ AssertStemsTo("idrinki", "drink");
+ AssertStemsTo("İDRİNKİ", "drink");
+ AssertStemsTo("rıver", "rıver");
+ AssertStemsTo("RIVER", "rıver");
+ AssertStemsTo("RİVER");
+ AssertStemsTo("rıverı", "rıver");
+ AssertStemsTo("RIVERI", "rıver");
+ AssertStemsTo("RİVERI");
+ AssertStemsTo("RİVERİ");
+ AssertStemsTo("RIVERİ");
+ AssertStemsTo("ırıver", "rıver");
+ AssertStemsTo("IRIVER", "rıver");
+ AssertStemsTo("IRİVER");
+ AssertStemsTo("İRİVER");
+ AssertStemsTo("İRIVER");
+ AssertStemsTo("ırıverı", "rıver");
+ AssertStemsTo("IRIVERI", "rıver");
+ AssertStemsTo("Irıverı", "rıver");
+ }
+ }
+}
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestCaseInsensitive.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestCaseInsensitive.cs
index 141cc9b..dddd520 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestCaseInsensitive.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestCaseInsensitive.cs
@@ -1,4 +1,4 @@
-// Lucene version compatibility level 4.8.1
+// Lucene version compatibility level 4.10.4
using NUnit.Framework;
namespace Lucene.Net.Analysis.Hunspell
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestCaseSensitive.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestCaseSensitive.cs
new file mode 100644
index 0000000..fcdd361
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestCaseSensitive.cs
@@ -0,0 +1,71 @@
+// Lucene version compatibility level 4.10.4
+using NUnit.Framework;
+
+namespace Lucene.Net.Analysis.Hunspell
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ public class TestCaseSensitive : StemmerTestBase
+ {
+ public override void BeforeClass()
+ {
+ base.BeforeClass();
+ Init("casesensitive.aff", "casesensitive.dic");
+ }
+
+ [Test]
+ public void TestAllPossibilities()
+ {
+ AssertStemsTo("drink", "drink");
+ AssertStemsTo("drinks", "drink");
+ AssertStemsTo("drinkS", "drink");
+ AssertStemsTo("gooddrinks", "drink");
+ AssertStemsTo("Gooddrinks", "drink", "drink");
+ AssertStemsTo("GOODdrinks", "drink");
+ AssertStemsTo("gooddrinkS", "drink");
+ AssertStemsTo("GooddrinkS", "drink");
+ AssertStemsTo("gooddrink", "drink");
+ AssertStemsTo("Gooddrink", "drink", "drink");
+ AssertStemsTo("GOODdrink", "drink");
+ AssertStemsTo("Drink", "drink", "Drink");
+ AssertStemsTo("Drinks", "drink", "Drink");
+ AssertStemsTo("DrinkS", "Drink");
+ AssertStemsTo("goodDrinks", "Drink");
+ AssertStemsTo("GoodDrinks", "Drink");
+ AssertStemsTo("GOODDrinks", "Drink");
+ AssertStemsTo("goodDrinkS", "Drink");
+ AssertStemsTo("GoodDrinkS", "Drink");
+ AssertStemsTo("GOODDrinkS", "Drink");
+ AssertStemsTo("goodDrink", "Drink");
+ AssertStemsTo("GoodDrink", "Drink");
+ AssertStemsTo("GOODDrink", "Drink");
+ AssertStemsTo("DRINK", "DRINK", "drink", "Drink");
+ AssertStemsTo("DRINKs", "DRINK");
+ AssertStemsTo("DRINKS", "DRINK", "drink", "Drink");
+ AssertStemsTo("goodDRINKs", "DRINK");
+ AssertStemsTo("GoodDRINKs", "DRINK");
+ AssertStemsTo("GOODDRINKs", "DRINK");
+ AssertStemsTo("goodDRINKS", "DRINK");
+ AssertStemsTo("GoodDRINKS", "DRINK");
+ AssertStemsTo("GOODDRINKS", "DRINK", "drink", "drink");
+ AssertStemsTo("goodDRINK", "DRINK");
+ AssertStemsTo("GoodDRINK", "DRINK");
+ AssertStemsTo("GOODDRINK", "DRINK", "drink", "drink");
+ }
+ }
+}
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestCircumfix.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestCircumfix.cs
index be6b464..c54b741 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestCircumfix.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestCircumfix.cs
@@ -1,4 +1,4 @@
-// Lucene version compatibility level 4.8.1
+// Lucene version compatibility level 4.10.4
using NUnit.Framework;
namespace Lucene.Net.Analysis.Hunspell
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestComplexPrefix.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestComplexPrefix.cs
index de47da9..63db496 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestComplexPrefix.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestComplexPrefix.cs
@@ -1,4 +1,4 @@
-// Lucene version compatibility level 4.8.1
+// Lucene version compatibility level 4.10.4
using NUnit.Framework;
namespace Lucene.Net.Analysis.Hunspell
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestCondition.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestCondition.cs
index 4bc54d4..a4e5c94 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestCondition.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestCondition.cs
@@ -1,4 +1,4 @@
-// Lucene version compatibility level 4.8.1
+// Lucene version compatibility level 4.10.4
using NUnit.Framework;
namespace Lucene.Net.Analysis.Hunspell
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestHomonyms.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestCondition2.cs
similarity index 80%
copy from src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestHomonyms.cs
copy to src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestCondition2.cs
index 4d120e8..186f3ca 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestHomonyms.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestCondition2.cs
@@ -1,4 +1,4 @@
-// Lucene version compatibility level 4.8.1
+// Lucene version compatibility level 4.10.4
using NUnit.Framework;
namespace Lucene.Net.Analysis.Hunspell
@@ -20,19 +20,18 @@ namespace Lucene.Net.Analysis.Hunspell
* limitations under the License.
*/
- public class TestHomonyms : StemmerTestBase
+ public class TestCondition2 : StemmerTestBase
{
-
- [OneTimeSetUp]
public override void BeforeClass()
{
base.BeforeClass();
- Init("homonyms.aff", "homonyms.dic");
+ Init("condition2.aff", "condition2.dic");
}
+
[Test]
- public virtual void TestExamples()
+ public void TestStemming()
{
- AssertStemsTo("works", "work", "work");
+ AssertStemsTo("monopolies", "monopoly");
}
}
-}
\ No newline at end of file
+}
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestConv.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestConv.cs
index fe3e44f..912cb9c 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestConv.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestConv.cs
@@ -1,4 +1,4 @@
-// Lucene version compatibility level 4.8.1
+// Lucene version compatibility level 4.10.4
using NUnit.Framework;
namespace Lucene.Net.Analysis.Hunspell
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestDependencies.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestDependencies.cs
index 2f34243..cdfd87e 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestDependencies.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestDependencies.cs
@@ -1,4 +1,4 @@
-// Lucene version compatibility level 4.8.1
+// Lucene version compatibility level 4.10.4
using NUnit.Framework;
namespace Lucene.Net.Analysis.Hunspell
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestDictionary.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestDictionary.cs
index 71e42fe..4b85b2d 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestDictionary.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestDictionary.cs
@@ -1,4 +1,4 @@
-// Lucene version compatibility level 4.8.1
+// Lucene version compatibility level 4.10.4
using J2N.Text;
using Lucene.Net.Util;
using Lucene.Net.Util.Fst;
@@ -9,7 +9,6 @@ using System.Text;
namespace Lucene.Net.Analysis.Hunspell
{
-
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestHomonyms.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestDoubleEscape.cs
similarity index 80%
copy from src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestHomonyms.cs
copy to src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestDoubleEscape.cs
index 4d120e8..c376766 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestHomonyms.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestDoubleEscape.cs
@@ -1,4 +1,4 @@
-// Lucene version compatibility level 4.8.1
+// Lucene version compatibility level 4.10.4
using NUnit.Framework;
namespace Lucene.Net.Analysis.Hunspell
@@ -20,19 +20,18 @@ namespace Lucene.Net.Analysis.Hunspell
* limitations under the License.
*/
- public class TestHomonyms : StemmerTestBase
+ public class TestDoubleEscape : StemmerTestBase
{
-
- [OneTimeSetUp]
public override void BeforeClass()
{
base.BeforeClass();
- Init("homonyms.aff", "homonyms.dic");
+ Init("double-escaped.aff", "double-escaped.dic");
}
+
[Test]
- public virtual void TestExamples()
+ public void TestStemming()
{
- AssertStemsTo("works", "work", "work");
+ AssertStemsTo("adubo", "adubar");
}
}
-}
\ No newline at end of file
+}
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestEscaped.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestEscaped.cs
index df41107..4b77694 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestEscaped.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestEscaped.cs
@@ -1,4 +1,4 @@
-// Lucene version compatibility level 4.8.1
+// Lucene version compatibility level 4.10.4
using NUnit.Framework;
namespace Lucene.Net.Analysis.Hunspell
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestFlagLong.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestFlagLong.cs
index 4da446f..31baf60 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestFlagLong.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestFlagLong.cs
@@ -1,4 +1,4 @@
-// Lucene version compatibility level 4.8.1
+// Lucene version compatibility level 4.10.4
using NUnit.Framework;
namespace Lucene.Net.Analysis.Hunspell
@@ -34,6 +34,7 @@ namespace Lucene.Net.Analysis.Hunspell
AssertStemsTo("foo", "foo");
AssertStemsTo("foos", "foo");
AssertStemsTo("fooss");
+ AssertStemsTo("foobogus");
}
}
}
\ No newline at end of file
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestFlagNum.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestFlagNum.cs
index 180e5fd..b82036d 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestFlagNum.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestFlagNum.cs
@@ -1,4 +1,4 @@
-// Lucene version compatibility level 4.8.1
+// Lucene version compatibility level 4.10.4
using NUnit.Framework;
namespace Lucene.Net.Analysis.Hunspell
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestHomonyms.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestFullStrip.cs
similarity index 80%
copy from src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestHomonyms.cs
copy to src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestFullStrip.cs
index 4d120e8..4d66ec3 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestHomonyms.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestFullStrip.cs
@@ -1,4 +1,4 @@
-// Lucene version compatibility level 4.8.1
+// Lucene version compatibility level 4.10.4
using NUnit.Framework;
namespace Lucene.Net.Analysis.Hunspell
@@ -20,19 +20,18 @@ namespace Lucene.Net.Analysis.Hunspell
* limitations under the License.
*/
- public class TestHomonyms : StemmerTestBase
+ public class TestFullStrip : StemmerTestBase
{
-
- [OneTimeSetUp]
public override void BeforeClass()
{
base.BeforeClass();
- Init("homonyms.aff", "homonyms.dic");
+ Init("fullstrip.aff", "fullstrip.dic");
}
+
[Test]
- public virtual void TestExamples()
+ public void TestStemming()
{
- AssertStemsTo("works", "work", "work");
+ AssertStemsTo("tasty", "beer");
}
}
-}
\ No newline at end of file
+}
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestHomonyms.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestHomonyms.cs
index 4d120e8..272a47e 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestHomonyms.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestHomonyms.cs
@@ -1,4 +1,4 @@
-// Lucene version compatibility level 4.8.1
+// Lucene version compatibility level 4.10.4
using NUnit.Framework;
namespace Lucene.Net.Analysis.Hunspell
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestHunspellStemFilter.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestHunspellStemFilter.cs
index 56cc591..f1228a0 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestHunspellStemFilter.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestHunspellStemFilter.cs
@@ -1,4 +1,4 @@
-// Lucene version compatibility level 4.8.1
+// Lucene version compatibility level 4.10.4
using Lucene.Net.Analysis.Core;
using Lucene.Net.Analysis.Miscellaneous;
using Lucene.Net.Analysis.Util;
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestHunspellStemFilterFactory.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestHunspellStemFilterFactory.cs
index f6359e4..2171e02 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestHunspellStemFilterFactory.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestHunspellStemFilterFactory.cs
@@ -1,4 +1,4 @@
-// Lucene version compatibility level 4.8.1
+// Lucene version compatibility level 4.10.4
using Lucene.Net.Analysis.Util;
using NUnit.Framework;
using System;
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestIgnore.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestIgnore.cs
index 22de6b7..35fafdb 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestIgnore.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestIgnore.cs
@@ -1,4 +1,4 @@
-// Lucene version compatibility level 4.8.1
+// Lucene version compatibility level 4.10.4
using NUnit.Framework;
namespace Lucene.Net.Analysis.Hunspell
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestTwoSuffixes.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestKeepCase.cs
similarity index 63%
copy from src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestTwoSuffixes.cs
copy to src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestKeepCase.cs
index fcd840d..f0a81cc 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestTwoSuffixes.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestKeepCase.cs
@@ -1,4 +1,4 @@
-// Lucene version compatibility level 4.8.1
+// Lucene version compatibility level 4.10.4
using NUnit.Framework;
namespace Lucene.Net.Analysis.Hunspell
@@ -20,23 +20,31 @@ namespace Lucene.Net.Analysis.Hunspell
* limitations under the License.
*/
- public class TestTwoSuffixes : StemmerTestBase
+ public class TestKeepCase : StemmerTestBase
{
-
- [OneTimeSetUp]
public override void BeforeClass()
{
base.BeforeClass();
- Init("twosuffixes.aff", "twosuffixes.dic");
+ Init("keepcase.aff", "keepcase.dic");
}
+
[Test]
- public virtual void TestExamples()
+ public void TestPossibilities()
{
AssertStemsTo("drink", "drink");
- AssertStemsTo("drinkable", "drink");
+ AssertStemsTo("Drink", "drink");
+ AssertStemsTo("DRINK", "drink");
AssertStemsTo("drinks", "drink");
- AssertStemsTo("drinkableable");
- AssertStemsTo("drinkss");
+ AssertStemsTo("Drinks", "drink");
+ AssertStemsTo("DRINKS", "drink");
+ AssertStemsTo("walk", "walk");
+ AssertStemsTo("walks", "walk");
+ AssertStemsTo("Walk");
+ AssertStemsTo("Walks");
+ AssertStemsTo("WALKS");
+ AssertStemsTo("test", "test");
+ AssertStemsTo("Test");
+ AssertStemsTo("TEST");
}
}
-}
\ No newline at end of file
+}
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestMorph.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestMorph.cs
index b8eef84..9fccba1 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestMorph.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestMorph.cs
@@ -1,4 +1,4 @@
-// Lucene version compatibility level 4.8.1
+// Lucene version compatibility level 4.10.4
using NUnit.Framework;
namespace Lucene.Net.Analysis.Hunspell
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestHomonyms.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestMorphAlias.cs
similarity index 63%
copy from src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestHomonyms.cs
copy to src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestMorphAlias.cs
index 4d120e8..20a7258 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestHomonyms.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestMorphAlias.cs
@@ -1,4 +1,4 @@
-// Lucene version compatibility level 4.8.1
+// Lucene version compatibility level 4.10.4
using NUnit.Framework;
namespace Lucene.Net.Analysis.Hunspell
@@ -20,19 +20,25 @@ namespace Lucene.Net.Analysis.Hunspell
* limitations under the License.
*/
- public class TestHomonyms : StemmerTestBase
+ public class TestMorphAlias : StemmerTestBase
{
-
- [OneTimeSetUp]
public override void BeforeClass()
{
base.BeforeClass();
- Init("homonyms.aff", "homonyms.dic");
+ Init("morphalias.aff", "morphalias.dic");
}
+
[Test]
- public virtual void TestExamples()
+ public void TestStemming()
{
- AssertStemsTo("works", "work", "work");
+ AssertStemsTo("feet", "foot");
+ AssertStemsTo("feetscratcher", "foot");
+ AssertStemsTo("work", "workverb", "worknoun");
+ AssertStemsTo("works", "workverb", "worknoun");
+ AssertStemsTo("notspecial", "notspecial");
+ AssertStemsTo("simplenoun", "simplenoun");
+ AssertStemsTo("simplenouns", "simplenoun");
+ AssertStemsTo("simplenounscratcher");
}
}
-}
\ No newline at end of file
+}
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestHomonyms.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestMorphData.cs
similarity index 63%
copy from src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestHomonyms.cs
copy to src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestMorphData.cs
index 4d120e8..0978fc3 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestHomonyms.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestMorphData.cs
@@ -1,4 +1,4 @@
-// Lucene version compatibility level 4.8.1
+// Lucene version compatibility level 4.10.4
using NUnit.Framework;
namespace Lucene.Net.Analysis.Hunspell
@@ -20,19 +20,25 @@ namespace Lucene.Net.Analysis.Hunspell
* limitations under the License.
*/
- public class TestHomonyms : StemmerTestBase
+ public class TestMorphData : StemmerTestBase
{
-
- [OneTimeSetUp]
public override void BeforeClass()
{
base.BeforeClass();
- Init("homonyms.aff", "homonyms.dic");
+ Init("morphdata.aff", "morphdata.dic");
}
+
[Test]
- public virtual void TestExamples()
+ public void TestStemming()
{
- AssertStemsTo("works", "work", "work");
+ AssertStemsTo("feet", "foot");
+ AssertStemsTo("feetscratcher", "foot");
+ AssertStemsTo("work", "workverb", "worknoun");
+ AssertStemsTo("works", "workverb", "worknoun");
+ AssertStemsTo("notspecial", "notspecial");
+ AssertStemsTo("simplenoun", "simplenoun");
+ AssertStemsTo("simplenouns", "simplenoun");
+ AssertStemsTo("simplenounscratcher");
}
}
-}
\ No newline at end of file
+}
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestTwoSuffixes.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestNeedAffix.cs
similarity index 69%
copy from src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestTwoSuffixes.cs
copy to src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestNeedAffix.cs
index fcd840d..8ac9aa7 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestTwoSuffixes.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestNeedAffix.cs
@@ -1,4 +1,4 @@
-// Lucene version compatibility level 4.8.1
+// Lucene version compatibility level 4.10.4
using NUnit.Framework;
namespace Lucene.Net.Analysis.Hunspell
@@ -20,23 +20,27 @@ namespace Lucene.Net.Analysis.Hunspell
* limitations under the License.
*/
- public class TestTwoSuffixes : StemmerTestBase
+ public class TestNeedAffix : StemmerTestBase
{
-
- [OneTimeSetUp]
public override void BeforeClass()
{
base.BeforeClass();
- Init("twosuffixes.aff", "twosuffixes.dic");
+ Init("needaffix.aff", "needaffix.dic");
}
+
[Test]
- public virtual void TestExamples()
+ public void TestPossibilities()
{
AssertStemsTo("drink", "drink");
- AssertStemsTo("drinkable", "drink");
AssertStemsTo("drinks", "drink");
- AssertStemsTo("drinkableable");
- AssertStemsTo("drinkss");
+ AssertStemsTo("walk");
+ AssertStemsTo("walks", "walk");
+ AssertStemsTo("prewalk", "walk");
+ AssertStemsTo("prewalks", "walk");
+ AssertStemsTo("test");
+ AssertStemsTo("pretest");
+ AssertStemsTo("tests");
+ AssertStemsTo("pretests");
}
}
-}
\ No newline at end of file
+}
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestTwoSuffixes.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestOnlyInCompound.cs
similarity index 76%
copy from src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestTwoSuffixes.cs
copy to src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestOnlyInCompound.cs
index fcd840d..d9700aa 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestTwoSuffixes.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestOnlyInCompound.cs
@@ -1,4 +1,4 @@
-// Lucene version compatibility level 4.8.1
+// Lucene version compatibility level 4.10.4
using NUnit.Framework;
namespace Lucene.Net.Analysis.Hunspell
@@ -20,23 +20,23 @@ namespace Lucene.Net.Analysis.Hunspell
* limitations under the License.
*/
- public class TestTwoSuffixes : StemmerTestBase
+ public class TestOnlyInCompound : StemmerTestBase
{
-
- [OneTimeSetUp]
public override void BeforeClass()
{
base.BeforeClass();
- Init("twosuffixes.aff", "twosuffixes.dic");
+ Init("onlyincompound.aff", "onlyincompound.dic");
}
+
[Test]
- public virtual void TestExamples()
+ public void TestPossibilities()
{
AssertStemsTo("drink", "drink");
- AssertStemsTo("drinkable", "drink");
AssertStemsTo("drinks", "drink");
- AssertStemsTo("drinkableable");
- AssertStemsTo("drinkss");
+ AssertStemsTo("drinked");
+ AssertStemsTo("predrink");
+ AssertStemsTo("predrinked");
+ AssertStemsTo("walk");
}
}
-}
\ No newline at end of file
+}
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestOptionalCondition.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestOptionalCondition.cs
index 50deba0..94b9a14 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestOptionalCondition.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestOptionalCondition.cs
@@ -1,4 +1,4 @@
-// Lucene version compatibility level 4.8.1
+// Lucene version compatibility level 4.10.4
using NUnit.Framework;
namespace Lucene.Net.Analysis.Hunspell
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestCondition.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestSpaces.cs
similarity index 59%
copy from src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestCondition.cs
copy to src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestSpaces.cs
index 4bc54d4..2aebb8e 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestCondition.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestSpaces.cs
@@ -1,4 +1,4 @@
-// Lucene version compatibility level 4.8.1
+// Lucene version compatibility level 4.10.4
using NUnit.Framework;
namespace Lucene.Net.Analysis.Hunspell
@@ -20,30 +20,29 @@ namespace Lucene.Net.Analysis.Hunspell
* limitations under the License.
*/
- public class TestCondition : StemmerTestBase
+ public class TestSpaces : StemmerTestBase
{
- [OneTimeSetUp]
public override void BeforeClass()
{
base.BeforeClass();
- Init("condition.aff", "condition.dic");
+ Init("spaces.aff", "spaces.dic");
}
[Test]
- public virtual void TestStemming()
+ public void TestStemming()
{
- AssertStemsTo("hello", "hello");
- AssertStemsTo("try", "try");
- AssertStemsTo("tried", "try");
- AssertStemsTo("work", "work");
- AssertStemsTo("worked", "work");
- AssertStemsTo("rework", "work");
- AssertStemsTo("reworked", "work");
- AssertStemsTo("retried");
- AssertStemsTo("workied");
- AssertStemsTo("tryed");
- AssertStemsTo("tryied");
- AssertStemsTo("helloed");
+ AssertStemsTo("four", "four");
+ AssertStemsTo("fours", "four");
+ AssertStemsTo("five", "five");
+ AssertStemsTo("forty four", "forty four");
+ AssertStemsTo("forty fours", "forty four");
+ AssertStemsTo("forty five", "forty five");
+ AssertStemsTo("fifty", "50");
+ AssertStemsTo("fiftys", "50");
+ AssertStemsTo("sixty", "60");
+ AssertStemsTo("sixty four", "64");
+ AssertStemsTo("fifty four", "54");
+ AssertStemsTo("fifty fours", "54");
}
}
-}
\ No newline at end of file
+}
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestStemmer.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestStemmer.cs
index 84cd54b..4a56814 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestStemmer.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestStemmer.cs
@@ -1,4 +1,4 @@
-// Lucene version compatibility level 4.8.1
+// Lucene version compatibility level 4.10.4
using NUnit.Framework;
namespace Lucene.Net.Analysis.Hunspell
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestHomonyms.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestStrangeOvergeneration.cs
similarity index 74%
copy from src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestHomonyms.cs
copy to src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestStrangeOvergeneration.cs
index 4d120e8..730d242 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestHomonyms.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestStrangeOvergeneration.cs
@@ -1,4 +1,4 @@
-// Lucene version compatibility level 4.8.1
+// Lucene version compatibility level 4.10.4
using NUnit.Framework;
namespace Lucene.Net.Analysis.Hunspell
@@ -20,19 +20,21 @@ namespace Lucene.Net.Analysis.Hunspell
* limitations under the License.
*/
- public class TestHomonyms : StemmerTestBase
+ public class TestStrangeOvergeneration : StemmerTestBase
{
-
- [OneTimeSetUp]
public override void BeforeClass()
{
base.BeforeClass();
- Init("homonyms.aff", "homonyms.dic");
+ Init("strange-overgeneration.aff", "strange-overgeneration.dic");
}
+
[Test]
- public virtual void TestExamples()
+ public void TestStemming()
{
- AssertStemsTo("works", "work", "work");
+ AssertStemsTo("btasty", "beer");
+ AssertStemsTo("tasty");
+ AssertStemsTo("yuck");
+ AssertStemsTo("foo");
}
}
-}
\ No newline at end of file
+}
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestTwoFold.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestTwoFold.cs
index 9ce1fc9..fdbad8a 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestTwoFold.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestTwoFold.cs
@@ -1,4 +1,4 @@
-// Lucene version compatibility level 4.8.1
+// Lucene version compatibility level 4.10.4
using NUnit.Framework;
namespace Lucene.Net.Analysis.Hunspell
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestTwoSuffixes.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestTwoSuffixes.cs
index fcd840d..8749522 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestTwoSuffixes.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestTwoSuffixes.cs
@@ -1,4 +1,4 @@
-// Lucene version compatibility level 4.8.1
+// Lucene version compatibility level 4.10.4
using NUnit.Framework;
namespace Lucene.Net.Analysis.Hunspell
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestHomonyms.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestZeroAffix.cs
similarity index 80%
copy from src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestHomonyms.cs
copy to src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestZeroAffix.cs
index 4d120e8..e032155 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestHomonyms.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestZeroAffix.cs
@@ -1,4 +1,4 @@
-// Lucene version compatibility level 4.8.1
+// Lucene version compatibility level 4.10.4
using NUnit.Framework;
namespace Lucene.Net.Analysis.Hunspell
@@ -20,19 +20,18 @@ namespace Lucene.Net.Analysis.Hunspell
* limitations under the License.
*/
- public class TestHomonyms : StemmerTestBase
+ public class TestZeroAffix : StemmerTestBase
{
-
- [OneTimeSetUp]
public override void BeforeClass()
{
base.BeforeClass();
- Init("homonyms.aff", "homonyms.dic");
+ Init("zeroaffix.aff", "zeroaffix.dic");
}
+
[Test]
- public virtual void TestExamples()
+ public void TestStemming()
{
- AssertStemsTo("works", "work", "work");
+ AssertStemsTo("drink", "drinksierranevada");
}
}
-}
\ No newline at end of file
+}
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestHomonyms.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestZeroAffix2.cs
similarity index 80%
copy from src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestHomonyms.cs
copy to src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestZeroAffix2.cs
index 4d120e8..b22f186 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestHomonyms.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestZeroAffix2.cs
@@ -1,4 +1,4 @@
-// Lucene version compatibility level 4.8.1
+// Lucene version compatibility level 4.10.4
using NUnit.Framework;
namespace Lucene.Net.Analysis.Hunspell
@@ -20,19 +20,18 @@ namespace Lucene.Net.Analysis.Hunspell
* limitations under the License.
*/
- public class TestHomonyms : StemmerTestBase
+ public class TestZeroAffix2 : StemmerTestBase
{
-
- [OneTimeSetUp]
public override void BeforeClass()
{
base.BeforeClass();
- Init("homonyms.aff", "homonyms.dic");
+ Init("zeroaffix2.aff", "zeroaffix2.dic");
}
+
[Test]
- public virtual void TestExamples()
+ public void TestStemming()
{
- AssertStemsTo("works", "work", "work");
+ AssertStemsTo("b", "beer");
}
}
-}
\ No newline at end of file
+}
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/alternate-casing.aff b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/alternate-casing.aff
new file mode 100644
index 0000000..49618b8
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/alternate-casing.aff
@@ -0,0 +1,15 @@
+SET UTF-8
+
+LANG tr_TR
+
+PFX A Y 1
+PFX A 0 ı . +dotlessprefix
+
+PFX B Y 1
+PFX B 0 i . +dottedprefix
+
+SFX X Y 1
+SFX X 0 ı . +dotlesssuffix
+
+SFX Y Y 1
+SFX Y 0 i . +dottedsuffix
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/alternate-casing.dic b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/alternate-casing.dic
new file mode 100644
index 0000000..5b7c8f4
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/alternate-casing.dic
@@ -0,0 +1,4 @@
+3
+drink/BY
+rıver/AX
+
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/casesensitive.aff b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/casesensitive.aff
new file mode 100644
index 0000000..9943e62
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/casesensitive.aff
@@ -0,0 +1,16 @@
+SET UTF-8
+
+PFX A Y 1
+PFX A 0 good . +good
+
+PFX B Y 1
+PFX B 0 Good . +Good
+
+PFX C Y 1
+PFX C 0 GOOD . +GOOD
+
+SFX X Y 1
+SFX X 0 s . +s
+
+SFX Y Y 1
+SFX Y 0 S . +S
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/casesensitive.dic b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/casesensitive.dic
new file mode 100644
index 0000000..edbc34c
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/casesensitive.dic
@@ -0,0 +1,4 @@
+3
+drink/XYABC
+Drink/XYABC
+DRINK/XYABC
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/circumfix.dic b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/circumfix.dic
index 571e2e2..0295762 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/circumfix.dic
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/circumfix.dic
@@ -1,2 +1,2 @@
1
-nagy/C [MN]
+nagy/C [MN]
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/condition2.aff b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/condition2.aff
new file mode 100644
index 0000000..8e06a21
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/condition2.aff
@@ -0,0 +1,5 @@
+SET ISO8859-1
+TRY esianrtolcdugmphbyfvkwzESIANRTOLCDUGMPHBYFVKWZ'
+
+SFX S Y 1
+SFX S y ies [^aeiou]y
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/condition2.dic b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/condition2.dic
new file mode 100644
index 0000000..72a8c3e
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/condition2.dic
@@ -0,0 +1,2 @@
+1
+monopoly/S
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/conv.dic b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/conv.dic
index 6b68dc8..169e17f 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/conv.dic
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/conv.dic
@@ -1,2 +1,2 @@
1
-drink/X [VERB]
+drink/X [VERB]
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/dependencies.dic b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/dependencies.dic
index bdba45e..ade5437 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/dependencies.dic
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/dependencies.dic
@@ -1,3 +1,3 @@
2
-drink/RQ [verb]
-drink/S [noun]
+drink/RQ [verb]
+drink/S [noun]
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/double-escaped.aff b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/double-escaped.aff
new file mode 100644
index 0000000..ab74afa
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/double-escaped.aff
@@ -0,0 +1,5 @@
+SET UTF-8
+
+SFX X Y 1
+SFX X ar o [^\-]ar
+
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/double-escaped.dic b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/double-escaped.dic
new file mode 100644
index 0000000..42ddb5e
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/double-escaped.dic
@@ -0,0 +1,2 @@
+1
+adubar/X
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/flaglong.aff b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/flaglong.aff
index d05a5da..fb0f423 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/flaglong.aff
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/flaglong.aff
@@ -2,3 +2,6 @@ SET UTF-8
FLAG long
SFX Y1 Y 1
SFX Y1 0 s .
+
+SFX 1Y Y 1
+SFX 1Y 0 bogus .
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/fullstrip.aff b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/fullstrip.aff
new file mode 100644
index 0000000..9c2de7f
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/fullstrip.aff
@@ -0,0 +1,6 @@
+SET UTF-8
+
+FULLSTRIP
+
+SFX A Y 1
+SFX A beer tasty .
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/fullstrip.dic b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/fullstrip.dic
new file mode 100644
index 0000000..c948f18
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/fullstrip.dic
@@ -0,0 +1,2 @@
+1
+beer/A
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/homonyms.dic b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/homonyms.dic
index 96d51f1..6357472 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/homonyms.dic
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/homonyms.dic
@@ -1,3 +1,3 @@
2
-work/A [VERB]
-work/B [NOUN]
\ No newline at end of file
+work/A [VERB]
+work/B [NOUN]
\ No newline at end of file
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/ignore.dic b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/ignore.dic
index 9ae9205..854c509 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/ignore.dic
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/ignore.dic
@@ -1,3 +1,3 @@
1
-drink/X [VERB]
-dr-ank/X [VERB]
\ No newline at end of file
+drink/X [VERB]
+dr-ank/X [VERB]
\ No newline at end of file
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/keepcase.aff b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/keepcase.aff
new file mode 100644
index 0000000..4b56950
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/keepcase.aff
@@ -0,0 +1,6 @@
+SET UTF-8
+
+KEEPCASE Z
+
+SFX X Y 1
+SFX X 0 s . +s
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/keepcase.dic b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/keepcase.dic
new file mode 100644
index 0000000..96b7a48
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/keepcase.dic
@@ -0,0 +1,4 @@
+3
+drink/X
+walk/XZ
+test/Z
\ No newline at end of file
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/morph.dic b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/morph.dic
index 6b68dc8..169e17f 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/morph.dic
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/morph.dic
@@ -1,2 +1,2 @@
1
-drink/X [VERB]
+drink/X [VERB]
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/morphalias.aff b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/morphalias.aff
new file mode 100644
index 0000000..f408f3f
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/morphalias.aff
@@ -0,0 +1,16 @@
+AM 4
+AM st:foot
+AM st:workverb
+AM st:worknoun
+AM po:garbage
+
+SET UTF-8
+
+SFX X Y 1
+SFX X 0 scratcher .
+
+SFX A Y 1
+SFX A 0 s . +SG3
+
+SFX B Y 1
+SFX B 0 s . +PLUR
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/morphalias.dic b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/morphalias.dic
new file mode 100644
index 0000000..638a2bd
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/morphalias.dic
@@ -0,0 +1,6 @@
+5
+feet/X 1
+work/A 2
+work/B 3
+notspecial 4
+simplenoun/A
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/morphdata.aff b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/morphdata.aff
new file mode 100644
index 0000000..0448cd7
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/morphdata.aff
@@ -0,0 +1,10 @@
+SET UTF-8
+
+SFX X Y 1
+SFX X 0 scratcher .
+
+SFX A Y 1
+SFX A 0 s . +SG3
+
+SFX B Y 1
+SFX B 0 s . +PLUR
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/morphdata.dic b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/morphdata.dic
new file mode 100644
index 0000000..9b7cc9d
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/morphdata.dic
@@ -0,0 +1,6 @@
+5
+feet/X st:foot
+work/A st:workverb
+work/B st:worknoun
+notspecial
+simplenoun/A
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/needaffix.aff b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/needaffix.aff
new file mode 100644
index 0000000..ea6c41f
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/needaffix.aff
@@ -0,0 +1,9 @@
+SET UTF-8
+
+NEEDAFFIX Z
+
+PFX Y Y 1
+PFX Y 0 pre . pre+
+
+SFX X Y 1
+SFX X 0 s . +s
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/needaffix.dic b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/needaffix.dic
new file mode 100644
index 0000000..3ac76bd
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/needaffix.dic
@@ -0,0 +1,4 @@
+3
+drink/X
+walk/XYZ
+test/Z
\ No newline at end of file
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/onlyincompound.aff b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/onlyincompound.aff
new file mode 100644
index 0000000..91fc80f
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/onlyincompound.aff
@@ -0,0 +1,12 @@
+SET UTF-8
+
+ONLYINCOMPOUND A
+
+PFX Y Y 1
+PFX Y 0 pre/A . pre+
+
+SFX X Y 1
+SFX X 0 s . +s
+
+SFX Z Y 1
+SFX Z 0 ed/A . +ed
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/onlyincompound.dic b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/onlyincompound.dic
new file mode 100644
index 0000000..8e7b025
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/onlyincompound.dic
@@ -0,0 +1,4 @@
+2
+drink/XYZ
+walk/A
+
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/spaces.aff b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/spaces.aff
new file mode 100644
index 0000000..3f2365e
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/spaces.aff
@@ -0,0 +1,5 @@
+SET UTF-8
+TRY abcdefghijklmopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ
+
+SFX X Y 1
+SFX X 0 s . +PLUR
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/spaces.dic b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/spaces.dic
new file mode 100644
index 0000000..11294ae
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/spaces.dic
@@ -0,0 +1,9 @@
+4
+four/X po:number
+five po:number
+forty four/X po:number
+forty five po:number
+fifty/X st:50
+sixty st:60
+sixty four st:64
+fifty four/X st:54
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/strange-overgeneration.aff b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/strange-overgeneration.aff
new file mode 100644
index 0000000..470b570
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/strange-overgeneration.aff
@@ -0,0 +1,10 @@
+SET UTF-8
+
+SFX A Y 1
+SFX A baz yuck baz
+
+SFX B Y 1
+SFX B bar foo .
+
+SFX C Y 1
+SFX C eer tasty .
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/strange-overgeneration.dic b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/strange-overgeneration.dic
new file mode 100644
index 0000000..e4b61b3
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/strange-overgeneration.dic
@@ -0,0 +1,5 @@
+3
+baz/A
+bar/B
+beer/C
+eer/C
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/twosuffixes.dic b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/twosuffixes.dic
index 6b68dc8..169e17f 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/twosuffixes.dic
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/twosuffixes.dic
@@ -1,2 +1,2 @@
1
-drink/X [VERB]
+drink/X [VERB]
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/zeroaffix.aff b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/zeroaffix.aff
new file mode 100644
index 0000000..52c36f7
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/zeroaffix.aff
@@ -0,0 +1,4 @@
+SET UTF-8
+
+SFX X Y 1
+SFX X sierranevada 0 .
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/zeroaffix.dic b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/zeroaffix.dic
new file mode 100644
index 0000000..92c08d0
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/zeroaffix.dic
@@ -0,0 +1,2 @@
+1
+drinksierranevada/X [VERB]
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/zeroaffix2.aff b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/zeroaffix2.aff
new file mode 100644
index 0000000..72e273f
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/zeroaffix2.aff
@@ -0,0 +1,6 @@
+SET UTF-8
+FLAG num
+
+SFX 322 Y 1
+SFX 322 eer 0/100 .
+
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/zeroaffix2.dic b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/zeroaffix2.dic
new file mode 100644
index 0000000..4171564
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/zeroaffix2.dic
@@ -0,0 +1,2 @@
+1
+beer/322
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Lucene.Net.Tests.Analysis.Common.csproj b/src/Lucene.Net.Tests.Analysis.Common/Lucene.Net.Tests.Analysis.Common.csproj
index 58188ec..dac5f852 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Lucene.Net.Tests.Analysis.Common.csproj
+++ b/src/Lucene.Net.Tests.Analysis.Common/Lucene.Net.Tests.Analysis.Common.csproj
@@ -25,6 +25,7 @@
<PropertyGroup>
<AssemblyTitle>Lucene.Net.Tests.Analysis.Common</AssemblyTitle>
+ <RootNamespace>Lucene.Net</RootNamespace>
</PropertyGroup>
<ItemGroup>
@@ -36,6 +37,37 @@
</ItemGroup>
<ItemGroup>
+ <None Remove="Analysis\Hunspell\alternate-casing.aff" />
+ <None Remove="Analysis\Hunspell\alternate-casing.dic" />
+ <None Remove="Analysis\Hunspell\casesensitive.aff" />
+ <None Remove="Analysis\Hunspell\casesensitive.dic" />
+ <None Remove="Analysis\Hunspell\condition2.aff" />
+ <None Remove="Analysis\Hunspell\condition2.dic" />
+ <None Remove="Analysis\Hunspell\double-escaped.aff" />
+ <None Remove="Analysis\Hunspell\double-escaped.dic" />
+ <None Remove="Analysis\Hunspell\fullstrip.aff" />
+ <None Remove="Analysis\Hunspell\fullstrip.dic" />
+ <None Remove="Analysis\Hunspell\keepcase.aff" />
+ <None Remove="Analysis\Hunspell\keepcase.dic" />
+ <None Remove="Analysis\Hunspell\morphalias.aff" />
+ <None Remove="Analysis\Hunspell\morphalias.dic" />
+ <None Remove="Analysis\Hunspell\morphdata.aff" />
+ <None Remove="Analysis\Hunspell\morphdata.dic" />
+ <None Remove="Analysis\Hunspell\needaffix.aff" />
+ <None Remove="Analysis\Hunspell\needaffix.dic" />
+ <None Remove="Analysis\Hunspell\onlyincompound.aff" />
+ <None Remove="Analysis\Hunspell\onlyincompound.dic" />
+ <None Remove="Analysis\Hunspell\spaces.aff" />
+ <None Remove="Analysis\Hunspell\spaces.dic" />
+ <None Remove="Analysis\Hunspell\strange-overgeneration.aff" />
+ <None Remove="Analysis\Hunspell\strange-overgeneration.dic" />
+ <None Remove="Analysis\Hunspell\zeroaffix.aff" />
+ <None Remove="Analysis\Hunspell\zeroaffix.dic" />
+ <None Remove="Analysis\Hunspell\zeroaffix2.aff" />
+ <None Remove="Analysis\Hunspell\zeroaffix2.dic" />
+ </ItemGroup>
+
+ <ItemGroup>
<ProjectReference Include="..\Lucene.Net.Analysis.Common\Lucene.Net.Analysis.Common.csproj" />
<ProjectReference Include="..\Lucene.Net.TestFramework\Lucene.Net.TestFramework.csproj" />
</ItemGroup>
@@ -43,7 +75,12 @@
<Import Project="$(SolutionDir)build/TestReferences.Common.targets" />
<ItemGroup Condition=" '$(TargetFramework)' == 'netcoreapp3.1' ">
- <PackageReference Include="System.Net.Primitives" Version="$(SystemNetPrimitivesPackageVersion)"/>
+ <PackageReference Include="System.Text.Encoding.CodePages" Version="$(SystemTextEncodingCodePagesPackageVersion)" />
+ <PackageReference Include="System.Net.Primitives" Version="$(SystemNetPrimitivesPackageVersion)" />
+ </ItemGroup>
+
+ <ItemGroup Condition="'$(TargetFramework)' == 'netcoreapp2.1' ">
+ <PackageReference Include="System.Text.Encoding.CodePages" Version="$(SystemTextEncodingCodePagesPackageVersion)" />
</ItemGroup>
<ItemGroup Condition=" '$(TargetFramework)' == 'net48' ">
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Startup.cs b/src/Lucene.Net.Tests.Analysis.Common/Startup.cs
index 3d830d4..a9502ca 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Startup.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Startup.cs
@@ -1,4 +1,4 @@
-// Lucene version compatibility level 4.8.1
+// Lucene version compatibility level 4.8.1
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -23,4 +23,10 @@ using Lucene.Net.Util;
// would not occur if it were not here.
public class Startup : LuceneTestFrameworkInitializer
{
+ protected override void TestFrameworkSetUp()
+ {
+#if FEATURE_ENCODINGPROVIDERS
+ System.Text.Encoding.RegisterProvider(System.Text.CodePagesEncodingProvider.Instance);
+#endif
+ }
}
\ No newline at end of file