You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2014/03/02 02:47:18 UTC
svn commit: r1573248 - in /lucene/dev/trunk/lucene: ./
analysis/common/src/java/org/apache/lucene/analysis/hunspell/
analysis/common/src/test/org/apache/lucene/analysis/hunspell/
Author: rmuir
Date: Sun Mar 2 01:47:18 2014
New Revision: 1573248
URL: http://svn.apache.org/r1573248
Log:
LUCENE-5483: fix hunspell inaccuracies
Added:
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestComplexPrefix.java (with props)
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestCondition.java (with props)
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestFlagLong.java (with props)
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestFlagNum.java (with props)
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestTwoSuffixes.java (with props)
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/complexprefix.aff (with props)
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/complexprefix.dic (with props)
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/condition.aff (with props)
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/condition.dic (with props)
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/flaglong.aff (with props)
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/flaglong.dic (with props)
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/flagnum.aff (with props)
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/flagnum.dic (with props)
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/twosuffixes.aff (with props)
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/twosuffixes.dic (with props)
Modified:
lucene/dev/trunk/lucene/CHANGES.txt
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellStemFilter.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellStemFilterFactory.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestCaseInsensitive.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestDependencies.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestDictionary.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestHunspellStemFilter.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestMorph.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestStemmer.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestTwoFold.java
Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1573248&r1=1573247&r2=1573248&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Sun Mar 2 01:47:18 2014
@@ -119,6 +119,12 @@ Bug fixes
* LUCENE-5481: IndexWriter.forceMerge used to run a merge even if there was a
single segment in the index. (Adrien Grand, Mike McCandless)
+* LUCENE-5483: Fix inaccuracies in HunspellStemFilter. Multi-stage affix-stripping,
+ prefix-suffix dependencies, and COMPLEXPREFIXES now work correctly according
+ to the hunspell algorithm. Removed recursionCap parameter, as its no longer needed, rules for
+ recursive affix application are driven correctly by continuation classes in the affix file.
+ (Robert Muir)
+
Test Framework
* LUCENE-5449: Rename _TestUtil and _TestHelper to remove the leading _.
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java?rev=1573248&r1=1573247&r2=1573248&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java Sun Mar 2 01:47:18 2014
@@ -65,6 +65,7 @@ public class Dictionary {
private static final String PREFIX_KEY = "PFX";
private static final String SUFFIX_KEY = "SFX";
private static final String FLAG_KEY = "FLAG";
+ private static final String COMPLEXPREFIXES_KEY = "COMPLEXPREFIXES";
private static final String NUM_FLAG_TYPE = "num";
private static final String UTF8_FLAG_TYPE = "UTF-8";
@@ -104,6 +105,7 @@ public class Dictionary {
private final File tempDir = OfflineSorter.defaultTempDir(); // TODO: make this configurable?
boolean ignoreCase;
+ boolean complexPrefixes;
/**
* Creates a new Dictionary containing the information read from the provided InputStreams to hunspell affix
@@ -131,9 +133,10 @@ public class Dictionary {
*/
public Dictionary(InputStream affix, List<InputStream> dictionaries, boolean ignoreCase) throws IOException, ParseException {
this.ignoreCase = ignoreCase;
- BufferedInputStream buffered = new BufferedInputStream(affix, 8192);
- buffered.mark(8192);
- String encoding = getDictionaryEncoding(affix);
+ // hungarian has thousands of AF before the SET, so a 32k buffer is needed
+ BufferedInputStream buffered = new BufferedInputStream(affix, 32768);
+ buffered.mark(32768);
+ String encoding = getDictionaryEncoding(buffered);
buffered.reset();
CharsetDecoder decoder = getJavaEncoding(encoding);
readAffixFile(buffered, decoder);
@@ -235,6 +238,8 @@ public class Dictionary {
// Assume that the FLAG line comes before any prefix or suffixes
// Store the strategy so it can be used when parsing the dic file
flagParsingStrategy = getFlagParsingStrategy(line);
+ } else if (line.equals(COMPLEXPREFIXES_KEY)) {
+ complexPrefixes = true; // 2-stage prefix+1-stage suffix instead of 2-stage suffix+1-stage prefix
}
}
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellStemFilter.java?rev=1573248&r1=1573247&r2=1573248&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellStemFilter.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellStemFilter.java Sun Mar 2 01:47:18 2014
@@ -58,29 +58,16 @@ public final class HunspellStemFilter ex
private final boolean dedup;
private final boolean longestOnly;
- /** Create a {@link HunspellStemFilter} which deduplicates stems and has a maximum
- * recursion level of 2.
- * @see #HunspellStemFilter(TokenStream, Dictionary, int) */
+ /** Create a {@link HunspellStemFilter} outputting all possible stems.
+ * @see #HunspellStemFilter(TokenStream, Dictionary, boolean) */
public HunspellStemFilter(TokenStream input, Dictionary dictionary) {
- this(input, dictionary, 2);
+ this(input, dictionary, true);
}
- /**
- * Creates a new HunspellStemFilter that will stem tokens from the given TokenStream using affix rules in the provided
- * Dictionary
- *
- * @param input TokenStream whose tokens will be stemmed
- * @param dictionary HunspellDictionary containing the affix rules and words that will be used to stem the tokens
- * @param recursionCap maximum level of recursion stemmer can go into, defaults to <code>2</code>
- */
- public HunspellStemFilter(TokenStream input, Dictionary dictionary, int recursionCap) {
- this(input, dictionary, true, recursionCap);
- }
-
- /** Create a {@link HunspellStemFilter} which has a maximum recursion level of 2.
- * @see #HunspellStemFilter(TokenStream, Dictionary, boolean, int) */
+ /** Create a {@link HunspellStemFilter} outputting all possible stems.
+ * @see #HunspellStemFilter(TokenStream, Dictionary, boolean, boolean) */
public HunspellStemFilter(TokenStream input, Dictionary dictionary, boolean dedup) {
- this(input, dictionary, dedup, 2);
+ this(input, dictionary, dedup, false);
}
/**
@@ -89,27 +76,12 @@ public final class HunspellStemFilter ex
*
* @param input TokenStream whose tokens will be stemmed
* @param dictionary HunspellDictionary containing the affix rules and words that will be used to stem the tokens
- * @param dedup true if only unique terms should be output.
- * @param recursionCap maximum level of recursion stemmer can go into, defaults to <code>2</code>
- */
- public HunspellStemFilter(TokenStream input, Dictionary dictionary, boolean dedup, int recursionCap) {
- this(input, dictionary, dedup, recursionCap, false);
- }
-
- /**
- * Creates a new HunspellStemFilter that will stem tokens from the given TokenStream using affix rules in the provided
- * Dictionary
- *
- * @param input TokenStream whose tokens will be stemmed
- * @param dictionary HunspellDictionary containing the affix rules and words that will be used to stem the tokens
- * @param dedup true if only unique terms should be output.
- * @param recursionCap maximum level of recursion stemmer can go into, defaults to <code>2</code>
* @param longestOnly true if only the longest term should be output.
*/
- public HunspellStemFilter(TokenStream input, Dictionary dictionary, boolean dedup, int recursionCap, boolean longestOnly) {
+ public HunspellStemFilter(TokenStream input, Dictionary dictionary, boolean dedup, boolean longestOnly) {
super(input);
this.dedup = dedup && longestOnly == false; // don't waste time deduping if longestOnly is set
- this.stemmer = new Stemmer(dictionary, recursionCap);
+ this.stemmer = new Stemmer(dictionary);
this.longestOnly = longestOnly;
}
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellStemFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellStemFilterFactory.java?rev=1573248&r1=1573247&r2=1573248&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellStemFilterFactory.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellStemFilterFactory.java Sun Mar 2 01:47:18 2014
@@ -57,7 +57,6 @@ public class HunspellStemFilterFactory e
private final boolean ignoreCase;
private final boolean longestOnly;
private Dictionary dictionary;
- private int recursionCap;
/** Creates a new HunspellStemFilterFactory */
public HunspellStemFilterFactory(Map<String,String> args) {
@@ -65,11 +64,14 @@ public class HunspellStemFilterFactory e
dictionaryFiles = require(args, PARAM_DICTIONARY);
affixFile = get(args, PARAM_AFFIX);
ignoreCase = getBoolean(args, PARAM_IGNORE_CASE, false);
- recursionCap = getInt(args, PARAM_RECURSION_CAP, 2);
longestOnly = getBoolean(args, PARAM_LONGEST_ONLY, false);
// this isnt necessary: we properly load all dictionaries.
// but recognize and ignore for back compat
getBoolean(args, "strictAffixParsing", true);
+ // this isn't necessary: multi-stage stripping is fixed and
+ // flags like COMPLEXPREFIXES in the data itself control this.
+ // but recognize and ignore for back compat
+ getInt(args, "recursionCap", 0);
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
@@ -100,6 +102,6 @@ public class HunspellStemFilterFactory e
@Override
public TokenStream create(TokenStream tokenStream) {
- return new HunspellStemFilter(tokenStream, dictionary, true, recursionCap, longestOnly);
+ return new HunspellStemFilter(tokenStream, dictionary, true, longestOnly);
}
}
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java?rev=1573248&r1=1573247&r2=1573248&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java Sun Mar 2 01:47:18 2014
@@ -36,7 +36,6 @@ import org.apache.lucene.util.Version;
* conforms to the algorithm in the original hunspell algorithm, including recursive suffix stripping.
*/
final class Stemmer {
- private final int recursionCap;
private final Dictionary dictionary;
private final BytesRef scratch = new BytesRef();
private final StringBuilder segment = new StringBuilder();
@@ -44,25 +43,13 @@ final class Stemmer {
private final CharacterUtils charUtils = CharacterUtils.getInstance(Version.LUCENE_CURRENT);
/**
- * Constructs a new Stemmer which will use the provided Dictionary to create its stems. Uses the
- * default recursion cap of <code>2</code> (based on Hunspell documentation).
- *
- * @param dictionary Dictionary that will be used to create the stems
- */
- public Stemmer(Dictionary dictionary) {
- this(dictionary, 2);
- }
-
- /**
* Constructs a new Stemmer which will use the provided Dictionary to create its stems.
*
* @param dictionary Dictionary that will be used to create the stems
- * @param recursionCap maximum level of recursion stemmer can go into
*/
- public Stemmer(Dictionary dictionary, int recursionCap) {
+ public Stemmer(Dictionary dictionary) {
this.dictionary = dictionary;
this.affixReader = new ByteArrayDataInput(dictionary.affixData);
- this.recursionCap = recursionCap;
}
/**
@@ -94,7 +81,7 @@ final class Stemmer {
stems.add(new CharsRef(word, 0, length));
}
}
- stems.addAll(stem(word, length, Dictionary.NOFLAGS, 0));
+ stems.addAll(stem(word, length, -1, -1, -1, 0, true, true, false));
return stems;
}
@@ -126,62 +113,116 @@ final class Stemmer {
* Generates a list of stems for the provided word
*
* @param word Word to generate the stems for
- * @param flags Flags from a previous stemming step that need to be cross-checked with any affixes in this recursive step
- * @param recursionDepth Level of recursion this stemming step is at
+ * @param previous previous affix that was removed (so we dont remove same one twice)
+ * @param prevFlag Flag from a previous stemming step that need to be cross-checked with any affixes in this recursive step
+ * @param prefixFlag flag of the most inner removed prefix, so that when removing a suffix, its also checked against the word
+ * @param recursionDepth current recursiondepth
+ * @param doPrefix true if we should remove prefixes
+ * @param doSuffix true if we should remove suffixes
+ * @param previousWasPrefix true if the previous removal was a prefix:
+ * if we are removing a suffix, and it has no continuation requirements, its ok.
+ * but two prefixes (COMPLEXPREFIXES) or two suffixes must have continuation requirements to recurse.
* @return List of stems, or empty list if no stems are found
*/
- private List<CharsRef> stem(char word[], int length, char[] flags, int recursionDepth) {
+ private List<CharsRef> stem(char word[], int length, int previous, int prevFlag, int prefixFlag, int recursionDepth, boolean doPrefix, boolean doSuffix, boolean previousWasPrefix) {
+
// TODO: allow this stuff to be reused by tokenfilter
List<CharsRef> stems = new ArrayList<CharsRef>();
-
- for (int i = 0; i < length; i++) {
- IntsRef suffixes = dictionary.lookupSuffix(word, i, length - i);
- if (suffixes == null) {
- continue;
- }
-
- for (int j = 0; j < suffixes.length; j++) {
- int suffix = suffixes.ints[suffixes.offset + j];
- affixReader.setPosition(8 * suffix);
- char flag = (char) (affixReader.readShort() & 0xffff);
- if (hasCrossCheckedFlag(flag, flags)) {
- int appendLength = length - i;
- int deAffixedLength = length - appendLength;
- // TODO: can we do this in-place?
+
+ if (doPrefix) {
+ for (int i = length - 1; i >= 0; i--) {
+ IntsRef prefixes = dictionary.lookupPrefix(word, 0, i);
+ if (prefixes == null) {
+ continue;
+ }
+
+ for (int j = 0; j < prefixes.length; j++) {
+ int prefix = prefixes.ints[prefixes.offset + j];
+ if (prefix == previous) {
+ continue;
+ }
+ affixReader.setPosition(8 * prefix);
+ char flag = (char) (affixReader.readShort() & 0xffff);
char stripOrd = (char) (affixReader.readShort() & 0xffff);
- dictionary.stripLookup.get(stripOrd, scratch);
- String strippedWord = new StringBuilder().append(word, 0, deAffixedLength).append(scratch.utf8ToString()).toString();
-
- List<CharsRef> stemList = applyAffix(strippedWord.toCharArray(), strippedWord.length(), suffix, recursionDepth);
-
- stems.addAll(stemList);
+ int condition = (char) (affixReader.readShort() & 0xffff);
+ boolean crossProduct = (condition & 1) == 1;
+ condition >>>= 1;
+ char append = (char) (affixReader.readShort() & 0xffff);
+
+ final boolean compatible;
+ if (recursionDepth == 0) {
+ compatible = true;
+ } else if (crossProduct) {
+ // cross check incoming continuation class (flag of previous affix) against list.
+ dictionary.flagLookup.get(append, scratch);
+ char appendFlags[] = Dictionary.decodeFlags(scratch);
+ assert prevFlag >= 0;
+ compatible = hasCrossCheckedFlag((char)prevFlag, appendFlags, false);
+ } else {
+ compatible = false;
+ }
+
+ if (compatible) {
+ int deAffixedStart = i;
+ int deAffixedLength = length - deAffixedStart;
+
+ dictionary.stripLookup.get(stripOrd, scratch);
+ String strippedWord = new StringBuilder().append(scratch.utf8ToString())
+ .append(word, deAffixedStart, deAffixedLength)
+ .toString();
+
+ List<CharsRef> stemList = applyAffix(strippedWord.toCharArray(), strippedWord.length(), prefix, -1, recursionDepth, true);
+
+ stems.addAll(stemList);
+ }
}
}
- }
-
- for (int i = length - 1; i >= 0; i--) {
- IntsRef prefixes = dictionary.lookupPrefix(word, 0, i);
- if (prefixes == null) {
- continue;
- }
-
- for (int j = 0; j < prefixes.length; j++) {
- int prefix = prefixes.ints[prefixes.offset + j];
- affixReader.setPosition(8 * prefix);
- char flag = (char) (affixReader.readShort() & 0xffff);
- if (hasCrossCheckedFlag(flag, flags)) {
- int deAffixedStart = i;
- int deAffixedLength = length - deAffixedStart;
+ }
+
+ if (doSuffix) {
+ for (int i = 0; i < length; i++) {
+ IntsRef suffixes = dictionary.lookupSuffix(word, i, length - i);
+ if (suffixes == null) {
+ continue;
+ }
+
+ for (int j = 0; j < suffixes.length; j++) {
+ int suffix = suffixes.ints[suffixes.offset + j];
+ if (suffix == previous) {
+ continue;
+ }
+ affixReader.setPosition(8 * suffix);
+ char flag = (char) (affixReader.readShort() & 0xffff);
char stripOrd = (char) (affixReader.readShort() & 0xffff);
-
- dictionary.stripLookup.get(stripOrd, scratch);
- String strippedWord = new StringBuilder().append(scratch.utf8ToString())
- .append(word, deAffixedStart, deAffixedLength)
- .toString();
-
- List<CharsRef> stemList = applyAffix(strippedWord.toCharArray(), strippedWord.length(), prefix, recursionDepth);
-
- stems.addAll(stemList);
+ int condition = (char) (affixReader.readShort() & 0xffff);
+ boolean crossProduct = (condition & 1) == 1;
+ condition >>>= 1;
+ char append = (char) (affixReader.readShort() & 0xffff);
+
+ final boolean compatible;
+ if (recursionDepth == 0) {
+ compatible = true;
+ } else if (crossProduct) {
+ // cross check incoming continuation class (flag of previous affix) against list.
+ dictionary.flagLookup.get(append, scratch);
+ char appendFlags[] = Dictionary.decodeFlags(scratch);
+ assert prevFlag >= 0;
+ compatible = hasCrossCheckedFlag((char)prevFlag, appendFlags, previousWasPrefix);
+ } else {
+ compatible = false;
+ }
+
+ if (compatible) {
+ int appendLength = length - i;
+ int deAffixedLength = length - appendLength;
+ // TODO: can we do this in-place?
+ dictionary.stripLookup.get(stripOrd, scratch);
+ String strippedWord = new StringBuilder().append(word, 0, deAffixedLength).append(scratch.utf8ToString()).toString();
+
+ List<CharsRef> stemList = applyAffix(strippedWord.toCharArray(), strippedWord.length(), suffix, prefixFlag, recursionDepth, false);
+
+ stems.addAll(stemList);
+ }
}
}
}
@@ -193,14 +234,19 @@ final class Stemmer {
* Applies the affix rule to the given word, producing a list of stems if any are found
*
* @param strippedWord Word the affix has been removed and the strip added
+ * @param length valid length of stripped word
* @param affix HunspellAffix representing the affix rule itself
- * @param recursionDepth Level of recursion this stemming step is at
+ * @param prefixFlag when we already stripped a prefix, we cant simply recurse and check the suffix, unless both are compatible
+ * so we must check dictionary form against both to add it as a stem!
+ * @param recursionDepth current recursion depth
+ * @param prefix true if we are removing a prefix (false if its a suffix)
* @return List of stems for the word, or an empty list if none are found
*/
- List<CharsRef> applyAffix(char strippedWord[], int length, int affix, int recursionDepth) {
+ List<CharsRef> applyAffix(char strippedWord[], int length, int affix, int prefixFlag, int recursionDepth, boolean prefix) {
segment.setLength(0);
segment.append(strippedWord, 0, length);
+ // TODO: just pass this in from before, no need to decode it twice
affixReader.setPosition(8 * affix);
char flag = (char) (affixReader.readShort() & 0xffff);
affixReader.skipBytes(2); // strip
@@ -221,16 +267,45 @@ final class Stemmer {
for (int i = 0; i < forms.length; i++) {
dictionary.flagLookup.get(forms.ints[forms.offset+i], scratch);
char wordFlags[] = Dictionary.decodeFlags(scratch);
- if (wordFlags != null && Dictionary.hasFlag(wordFlags, flag)) {
+ if (Dictionary.hasFlag(wordFlags, flag)) {
+ // confusing: in this one exception, we already chained the first prefix against the second,
+ // so it doesnt need to be checked against the word
+ boolean chainedPrefix = dictionary.complexPrefixes && recursionDepth == 1 && prefix;
+ if (chainedPrefix == false && prefixFlag >= 0 && !Dictionary.hasFlag(wordFlags, (char)prefixFlag)) {
+ // see if we can chain prefix thru the suffix continuation class (only if it has any!)
+ dictionary.flagLookup.get(append, scratch);
+ char appendFlags[] = Dictionary.decodeFlags(scratch);
+ if (!hasCrossCheckedFlag((char)prefixFlag, appendFlags, false)) {
+ continue;
+ }
+ }
stems.add(new CharsRef(strippedWord, 0, length));
}
}
}
- if (crossProduct && recursionDepth < recursionCap) {
- dictionary.flagLookup.get(append, scratch);
- char appendFlags[] = Dictionary.decodeFlags(scratch);
- stems.addAll(stem(strippedWord, length, appendFlags, ++recursionDepth));
+ if (crossProduct) {
+ if (recursionDepth == 0) {
+ if (prefix) {
+ // we took away the first prefix.
+ // COMPLEXPREFIXES = true: combine with a second prefix and another suffix
+ // COMPLEXPREFIXES = false: combine with another suffix
+ stems.addAll(stem(strippedWord, length, affix, flag, flag, ++recursionDepth, dictionary.complexPrefixes, true, true));
+ } else if (!dictionary.complexPrefixes) {
+ // we took away a suffix.
+ // COMPLEXPREFIXES = true: we don't recurse! only one suffix allowed
+ // COMPLEXPREFIXES = false: combine with another suffix
+ stems.addAll(stem(strippedWord, length, affix, flag, prefixFlag, ++recursionDepth, false, true, false));
+ }
+ } else if (recursionDepth == 1) {
+ if (prefix && dictionary.complexPrefixes) {
+ // we took away the second prefix: go look for another suffix
+ stems.addAll(stem(strippedWord, length, affix, flag, flag, ++recursionDepth, false, true, true));
+ } else if (prefix == false && dictionary.complexPrefixes == false) {
+ // we took away a prefix, then a suffix: go look for another suffix
+ stems.addAll(stem(strippedWord, length, affix, flag, prefixFlag, ++recursionDepth, false, true, false));
+ }
+ }
}
return stems;
@@ -243,7 +318,7 @@ final class Stemmer {
* @param flags Array of flags to cross check against. Can be {@code null}
* @return {@code true} if the flag is found in the array or the array is {@code null}, {@code false} otherwise
*/
- private boolean hasCrossCheckedFlag(char flag, char[] flags) {
- return flags.length == 0 || Arrays.binarySearch(flags, flag) >= 0;
+ private boolean hasCrossCheckedFlag(char flag, char[] flags, boolean matchEmpty) {
+ return (flags.length == 0 && matchEmpty) || Arrays.binarySearch(flags, flag) >= 0;
}
}
Modified: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestCaseInsensitive.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestCaseInsensitive.java?rev=1573248&r1=1573247&r2=1573248&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestCaseInsensitive.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestCaseInsensitive.java Sun Mar 2 01:47:18 2014
@@ -38,7 +38,8 @@ public class TestCaseInsensitive extends
}
public void testRecursiveSuffix() {
- assertStemsTo("abcd", "ab");
+ // we should not recurse here! as the suffix has no continuation!
+ assertStemsTo("abcd");
}
// all forms unmunched from dictionary
Added: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestComplexPrefix.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestComplexPrefix.java?rev=1573248&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestComplexPrefix.java (added)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestComplexPrefix.java Sun Mar 2 01:47:18 2014
@@ -0,0 +1,39 @@
+package org.apache.lucene.analysis.hunspell;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.junit.BeforeClass;
+
+public class TestComplexPrefix extends StemmerTestBase {
+ @BeforeClass
+ public static void beforeClass() throws Exception {
+ init("complexprefix.aff", "complexprefix.dic");
+ }
+
+ public void testPrefixes() {
+ assertStemsTo("ptwofoo", "foo");
+ assertStemsTo("poneptwofoo", "foo");
+ assertStemsTo("foosuf", "foo");
+ assertStemsTo("ptwofoosuf", "foo");
+ assertStemsTo("poneptwofoosuf", "foo");
+ assertStemsTo("ponefoo");
+ assertStemsTo("ponefoosuf");
+ assertStemsTo("ptwoponefoo");
+ assertStemsTo("ptwoponefoosuf");
+ }
+}
Added: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestCondition.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestCondition.java?rev=1573248&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestCondition.java (added)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestCondition.java Sun Mar 2 01:47:18 2014
@@ -0,0 +1,42 @@
+package org.apache.lucene.analysis.hunspell;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.junit.BeforeClass;
+
+public class TestCondition extends StemmerTestBase {
+ @BeforeClass
+ public static void beforeClass() throws Exception {
+ init("condition.aff", "condition.dic");
+ }
+
+ public void testStemming() {
+ assertStemsTo("hello", "hello");
+ assertStemsTo("try", "try");
+ assertStemsTo("tried", "try");
+ assertStemsTo("work", "work");
+ assertStemsTo("worked", "work");
+ assertStemsTo("rework", "work");
+ assertStemsTo("reworked", "work");
+ assertStemsTo("retried");
+ assertStemsTo("workied");
+ assertStemsTo("tryed");
+ assertStemsTo("tryied");
+ assertStemsTo("helloed");
+ }
+}
Modified: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestDependencies.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestDependencies.java?rev=1573248&r1=1573247&r2=1573248&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestDependencies.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestDependencies.java Sun Mar 2 01:47:18 2014
@@ -30,10 +30,10 @@ public class TestDependencies extends St
assertStemsTo("drink", "drink", "drink");
assertStemsTo("drinks", "drink", "drink");
assertStemsTo("drinkable", "drink");
- // TODO: BUG! assertStemsTo("drinkables", "drink");
+ assertStemsTo("drinkables", "drink");
assertStemsTo("undrinkable", "drink");
- // TODO: BUG! assertStemsTo("undrinkables", "drink");
+ assertStemsTo("undrinkables", "drink");
assertStemsTo("undrink");
- // TODO: BUG! assertStemsTo("undrinks");
+ assertStemsTo("undrinks");
}
}
Modified: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestDictionary.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestDictionary.java?rev=1573248&r1=1573247&r2=1573248&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestDictionary.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestDictionary.java Sun Mar 2 01:47:18 2014
@@ -83,7 +83,7 @@ public class TestDictionary extends Luce
fail("didn't get expected exception");
} catch (ParseException expected) {
assertEquals("The affix file contains a rule with less than five elements", expected.getMessage());
- assertEquals(23, expected.getErrorOffset());
+ assertEquals(24, expected.getErrorOffset());
}
affixStream.close();
Added: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestFlagLong.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestFlagLong.java?rev=1573248&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestFlagLong.java (added)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestFlagLong.java Sun Mar 2 01:47:18 2014
@@ -0,0 +1,33 @@
+package org.apache.lucene.analysis.hunspell;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.junit.BeforeClass;
+
+public class TestFlagLong extends StemmerTestBase {
+ @BeforeClass
+ public static void beforeClass() throws Exception {
+ init("flaglong.aff", "flaglong.dic");
+ }
+
+ public void testLongFlags() {
+ assertStemsTo("foo", "foo");
+ assertStemsTo("foos", "foo");
+ assertStemsTo("fooss");
+ }
+}
Added: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestFlagNum.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestFlagNum.java?rev=1573248&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestFlagNum.java (added)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestFlagNum.java Sun Mar 2 01:47:18 2014
@@ -0,0 +1,33 @@
+package org.apache.lucene.analysis.hunspell;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.junit.BeforeClass;
+
+public class TestFlagNum extends StemmerTestBase {
+ @BeforeClass
+ public static void beforeClass() throws Exception {
+ init("flagnum.aff", "flagnum.dic");
+ }
+
+ public void testNumFlags() {
+ assertStemsTo("foo", "foo");
+ assertStemsTo("foos", "foo");
+ assertStemsTo("fooss");
+ }
+}
Modified: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestHunspellStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestHunspellStemFilter.java?rev=1573248&r1=1573247&r2=1573248&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestHunspellStemFilter.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestHunspellStemFilter.java Sun Mar 2 01:47:18 2014
@@ -54,13 +54,13 @@ public class TestHunspellStemFilter exte
public void testKeywordAttribute() throws IOException {
MockTokenizer tokenizer = whitespaceMockTokenizer("lucene is awesome");
tokenizer.setEnableChecks(true);
- HunspellStemFilter filter = new HunspellStemFilter(tokenizer, dictionary, TestUtil.nextInt(random(), 1, 3));
+ HunspellStemFilter filter = new HunspellStemFilter(tokenizer, dictionary);
assertTokenStreamContents(filter, new String[]{"lucene", "lucen", "is", "awesome"}, new int[] {1, 0, 1, 1});
// assert with keyword marker
tokenizer = whitespaceMockTokenizer("lucene is awesome");
CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, Arrays.asList("Lucene"), true);
- filter = new HunspellStemFilter(new SetKeywordMarkerFilter(tokenizer, set), dictionary, TestUtil.nextInt(random(), 1, 3));
+ filter = new HunspellStemFilter(new SetKeywordMarkerFilter(tokenizer, set), dictionary);
assertTokenStreamContents(filter, new String[]{"lucene", "is", "awesome"}, new int[] {1, 1, 1});
}
@@ -68,7 +68,7 @@ public class TestHunspellStemFilter exte
public void testLongestOnly() throws IOException {
MockTokenizer tokenizer = whitespaceMockTokenizer("lucene is awesome");
tokenizer.setEnableChecks(true);
- HunspellStemFilter filter = new HunspellStemFilter(tokenizer, dictionary, true, TestUtil.nextInt(random(), 1, 3), true);
+ HunspellStemFilter filter = new HunspellStemFilter(tokenizer, dictionary, true, true);
assertTokenStreamContents(filter, new String[]{"lucene", "is", "awesome"}, new int[] {1, 1, 1});
}
@@ -78,7 +78,7 @@ public class TestHunspellStemFilter exte
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
- return new TokenStreamComponents(tokenizer, new HunspellStemFilter(tokenizer, dictionary, TestUtil.nextInt(random(), 1, 3)));
+ return new TokenStreamComponents(tokenizer, new HunspellStemFilter(tokenizer, dictionary));
}
};
checkRandomData(random(), analyzer, 1000*RANDOM_MULTIPLIER);
@@ -89,7 +89,7 @@ public class TestHunspellStemFilter exte
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new KeywordTokenizer();
- return new TokenStreamComponents(tokenizer, new HunspellStemFilter(tokenizer, dictionary, TestUtil.nextInt(random(), 1, 3)));
+ return new TokenStreamComponents(tokenizer, new HunspellStemFilter(tokenizer, dictionary));
}
};
checkOneTerm(a, "", "");
Modified: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestMorph.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestMorph.java?rev=1573248&r1=1573247&r2=1573248&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestMorph.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestMorph.java Sun Mar 2 01:47:18 2014
@@ -29,5 +29,6 @@ public class TestMorph extends StemmerTe
public void testExamples() {
assertStemsTo("drink", "drink");
assertStemsTo("drinkable", "drink");
+ assertStemsTo("drinkableable");
}
}
Modified: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestStemmer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestStemmer.java?rev=1573248&r1=1573247&r2=1573248&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestStemmer.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestStemmer.java Sun Mar 2 01:47:18 2014
@@ -36,7 +36,8 @@ public class TestStemmer extends Stemmer
}
public void testRecursiveSuffix() {
- assertStemsTo("abcd", "ab");
+ // we should not recurse here, as the suffix has no continuation!
+ assertStemsTo("abcd");
}
// all forms unmunched from dictionary
Modified: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestTwoFold.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestTwoFold.java?rev=1573248&r1=1573247&r2=1573248&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestTwoFold.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestTwoFold.java Sun Mar 2 01:47:18 2014
@@ -31,5 +31,7 @@ public class TestTwoFold extends Stemmer
assertStemsTo("drinkable", "drink");
assertStemsTo("drinkables", "drink");
assertStemsTo("drinksable");
+ assertStemsTo("drinkableable");
+ assertStemsTo("drinks");
}
}
Added: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestTwoSuffixes.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestTwoSuffixes.java?rev=1573248&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestTwoSuffixes.java (added)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestTwoSuffixes.java Sun Mar 2 01:47:18 2014
@@ -0,0 +1,36 @@
+package org.apache.lucene.analysis.hunspell;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.junit.BeforeClass;
+
+public class TestTwoSuffixes extends StemmerTestBase {
+
+ @BeforeClass
+ public static void beforeClass() throws Exception {
+ init("twosuffixes.aff", "twosuffixes.dic");
+ }
+
+ public void testExamples() {
+ assertStemsTo("drink", "drink");
+ assertStemsTo("drinkable", "drink");
+ assertStemsTo("drinks", "drink");
+ assertStemsTo("drinkableable");
+ assertStemsTo("drinkss");
+ }
+}
Added: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/complexprefix.aff
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/complexprefix.aff?rev=1573248&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/complexprefix.aff (added)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/complexprefix.aff Sun Mar 2 01:47:18 2014
@@ -0,0 +1,12 @@
+SET UTF-8
+
+COMPLEXPREFIXES
+
+PFX A Y 1
+PFX A 0 pone .
+
+PFX B Y 1
+PFX B 0 ptwo/A .
+
+SFX C Y 1
+SFX C 0 suf .
\ No newline at end of file
Added: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/complexprefix.dic
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/complexprefix.dic?rev=1573248&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/complexprefix.dic (added)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/complexprefix.dic Sun Mar 2 01:47:18 2014
@@ -0,0 +1,2 @@
+1
+foo/BC
Added: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/condition.aff
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/condition.aff?rev=1573248&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/condition.aff (added)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/condition.aff Sun Mar 2 01:47:18 2014
@@ -0,0 +1,13 @@
+SET UTF-8
+TRY esianrtolcdugmphbyfvkwzESIANRTOLCDUGMPHBYFVKWZâ
+
+REP 2
+REP f ph
+REP ph f
+
+PFX A Y 1
+PFX A 0 re .
+
+SFX B Y 2
+SFX B 0 ed [^y]
+SFX B y ied y
Added: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/condition.dic
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/condition.dic?rev=1573248&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/condition.dic (added)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/condition.dic Sun Mar 2 01:47:18 2014
@@ -0,0 +1,4 @@
+3
+hello
+try/B
+work/AB
Added: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/flaglong.aff
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/flaglong.aff?rev=1573248&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/flaglong.aff (added)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/flaglong.aff Sun Mar 2 01:47:18 2014
@@ -0,0 +1,4 @@
+SET UTF-8
+FLAG long
+SFX Y1 Y 1
+SFX Y1 0 s .
Added: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/flaglong.dic
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/flaglong.dic?rev=1573248&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/flaglong.dic (added)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/flaglong.dic Sun Mar 2 01:47:18 2014
@@ -0,0 +1,2 @@
+1
+foo/Y1Z3F?
Added: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/flagnum.aff
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/flagnum.aff?rev=1573248&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/flagnum.aff (added)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/flagnum.aff Sun Mar 2 01:47:18 2014
@@ -0,0 +1,4 @@
+SET UTF-8
+FLAG num
+SFX 65000 Y 1
+SFX 65000 0 s .
Added: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/flagnum.dic
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/flagnum.dic?rev=1573248&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/flagnum.dic (added)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/flagnum.dic Sun Mar 2 01:47:18 2014
@@ -0,0 +1,3 @@
+1
+foo/65000,12,2756
+
Added: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/twosuffixes.aff
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/twosuffixes.aff?rev=1573248&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/twosuffixes.aff (added)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/twosuffixes.aff Sun Mar 2 01:47:18 2014
@@ -0,0 +1,7 @@
+SET UTF-8
+
+SFX X Y 1
+SFX X 0 able . +ABLE
+
+SFX X Y 1
+SFX X 0 s . +s
Added: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/twosuffixes.dic
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/twosuffixes.dic?rev=1573248&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/twosuffixes.dic (added)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/twosuffixes.dic Sun Mar 2 01:47:18 2014
@@ -0,0 +1,2 @@
+1
+drink/X [VERB]