You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2014/07/16 22:01:50 UTC
svn commit: r1611161 - in /lucene/dev/trunk/lucene: ./
analysis/common/src/java/org/apache/lucene/analysis/hunspell/
analysis/common/src/test/org/apache/lucene/analysis/hunspell/
Author: rmuir
Date: Wed Jul 16 20:01:49 2014
New Revision: 1611161
URL: http://svn.apache.org/r1611161
Log:
LUCENE-5826: Support proper hunspell case handling and related options
Added:
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestAlternateCasing.java (with props)
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestCaseSensitive.java (with props)
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestKeepCase.java (with props)
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestNeedAffix.java (with props)
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestOnlyInCompound.java (with props)
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/alternate-casing.aff (with props)
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/alternate-casing.dic (with props)
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/casesensitive.aff (with props)
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/casesensitive.dic (with props)
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/keepcase.aff (with props)
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/keepcase.dic (with props)
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/needaffix.aff (with props)
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/needaffix.dic (with props)
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/onlyincompound.aff (with props)
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/onlyincompound.dic (with props)
Modified:
lucene/dev/trunk/lucene/CHANGES.txt
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java
Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1611161&r1=1611160&r2=1611161&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Wed Jul 16 20:01:49 2014
@@ -108,6 +108,9 @@ New Features
* LUCENE-5806: Extend expressions grammar to support array access in variables.
Added helper class VariableContext to parse complex variable into pieces.
(Ryan Ernst)
+
+* LUCENE-5826: Support proper hunspell case handling, LANG, KEEPCASE, NEEDAFFIX,
+ and ONLYINCOMPOUND flags. (Robert Muir)
API Changes
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java?rev=1611161&r1=1611160&r2=1611161&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java Wed Jul 16 20:01:49 2014
@@ -85,6 +85,11 @@ public class Dictionary {
private static final String ICONV_KEY = "ICONV";
private static final String OCONV_KEY = "OCONV";
private static final String FULLSTRIP_KEY = "FULLSTRIP";
+ private static final String LANG_KEY = "LANG";
+ private static final String KEEPCASE_KEY = "KEEPCASE";
+ private static final String NEEDAFFIX_KEY = "NEEDAFFIX";
+ private static final String PSEUDOROOT_KEY = "PSEUDOROOT";
+ private static final String ONLYINCOMPOUND_KEY = "ONLYINCOMPOUND";
private static final String NUM_FLAG_TYPE = "num";
private static final String UTF8_FLAG_TYPE = "UTF-8";
@@ -140,6 +145,9 @@ public class Dictionary {
boolean twoStageAffix; // if no affixes have continuation classes, no need to do 2-level affix stripping
int circumfix = -1; // circumfix flag, or -1 if one is not defined
+ int keepcase = -1; // keepcase flag, or -1 if one is not defined
+ int needaffix = -1; // needaffix flag, or -1 if one is not defined
+ int onlyincompound = -1; // onlyincompound flag, or -1 if one is not defined
// ignored characters (dictionary, affix, inputs)
private char[] ignore;
@@ -154,6 +162,11 @@ public class Dictionary {
// true if we can strip suffixes "down to nothing"
boolean fullStrip;
+ // language declaration of the dictionary
+ String language;
+ // true if case algorithms should use alternate (Turkish/Azeri) mapping
+ boolean alternateCasing;
+
/**
* Creates a new Dictionary containing the information read from the provided InputStreams to hunspell affix
* and dictionary files.
@@ -315,6 +328,24 @@ public class Dictionary {
throw new ParseException("Illegal CIRCUMFIX declaration", reader.getLineNumber());
}
circumfix = flagParsingStrategy.parseFlag(parts[1]);
+ } else if (line.startsWith(KEEPCASE_KEY)) {
+ String parts[] = line.split("\\s+");
+ if (parts.length != 2) {
+ throw new ParseException("Illegal KEEPCASE declaration", reader.getLineNumber());
+ }
+ keepcase = flagParsingStrategy.parseFlag(parts[1]);
+ } else if (line.startsWith(NEEDAFFIX_KEY) || line.startsWith(PSEUDOROOT_KEY)) {
+ String parts[] = line.split("\\s+");
+ if (parts.length != 2) {
+ throw new ParseException("Illegal NEEDAFFIX declaration", reader.getLineNumber());
+ }
+ needaffix = flagParsingStrategy.parseFlag(parts[1]);
+ } else if (line.startsWith(ONLYINCOMPOUND_KEY)) {
+ String parts[] = line.split("\\s+");
+ if (parts.length != 2) {
+ throw new ParseException("Illegal ONLYINCOMPOUND declaration", reader.getLineNumber());
+ }
+ onlyincompound = flagParsingStrategy.parseFlag(parts[1]);
} else if (line.startsWith(IGNORE_KEY)) {
String parts[] = line.split("\\s+");
if (parts.length != 2) {
@@ -340,6 +371,9 @@ public class Dictionary {
}
} else if (line.startsWith(FULLSTRIP_KEY)) {
fullStrip = true;
+ } else if (line.startsWith(LANG_KEY)) {
+ language = line.substring(LANG_KEY.length()).trim();
+ alternateCasing = "tr_TR".equals(language) || "az_AZ".equals(language);
}
}
@@ -1108,7 +1142,7 @@ public class Dictionary {
if (ignoreCase && iconv == null) {
// if we have no input conversion mappings, do this on-the-fly
- ch = Character.toLowerCase(ch);
+ ch = caseFold(ch);
}
reuse.append(ch);
@@ -1122,7 +1156,7 @@ public class Dictionary {
}
if (ignoreCase) {
for (int i = 0; i < reuse.length(); i++) {
- reuse.setCharAt(i, Character.toLowerCase(reuse.charAt(i)));
+ reuse.setCharAt(i, caseFold(reuse.charAt(i)));
}
}
}
@@ -1130,6 +1164,21 @@ public class Dictionary {
return reuse;
}
+ /** folds single character (according to LANG if present) */
+ char caseFold(char c) {
+ if (alternateCasing) {
+ if (c == 'I') {
+ return 'ı';
+ } else if (c == 'Ä°') {
+ return 'i';
+ } else {
+ return Character.toLowerCase(c);
+ }
+ } else {
+ return Character.toLowerCase(c);
+ }
+ }
+
// TODO: this could be more efficient!
static void applyMappings(FST<CharsRef> fst, StringBuilder sb) throws IOException {
final FST.BytesReader bytesReader = fst.getBytesReader();
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java?rev=1611161&r1=1611160&r2=1611161&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java Wed Jul 16 20:01:49 2014
@@ -100,17 +100,104 @@ final class Stemmer {
word = scratchBuffer;
}
+ int caseType = caseOf(word, length);
+ if (caseType == UPPER_CASE) {
+ // upper: union exact, title, lower
+ caseFoldTitle(word, length);
+ caseFoldLower(titleBuffer, length);
+ List<CharsRef> list = doStem(word, length, false);
+ list.addAll(doStem(titleBuffer, length, true));
+ list.addAll(doStem(lowerBuffer, length, true));
+ return list;
+ } else if (caseType == TITLE_CASE) {
+ // title: union exact, lower
+ caseFoldLower(word, length);
+ List<CharsRef> list = doStem(word, length, false);
+ list.addAll(doStem(lowerBuffer, length, true));
+ return list;
+ } else {
+ // exact match only
+ return doStem(word, length, false);
+ }
+ }
+
+ // temporary buffers for case variants
+ private char[] lowerBuffer = new char[8];
+ private char[] titleBuffer = new char[8];
+
+ private static final int EXACT_CASE = 0;
+ private static final int TITLE_CASE = 1;
+ private static final int UPPER_CASE = 2;
+
+ /** returns EXACT_CASE,TITLE_CASE, or UPPER_CASE type for the word */
+ private int caseOf(char word[], int length) {
+ if (dictionary.ignoreCase || length == 0 || !Character.isUpperCase(word[0])) {
+ return EXACT_CASE;
+ }
+
+ // determine if we are title or lowercase (or something funky, in which its exact)
+ boolean seenUpper = false;
+ boolean seenLower = false;
+ for (int i = 1; i < length; i++) {
+ boolean v = Character.isUpperCase(word[i]);
+ seenUpper |= v;
+ seenLower |= !v;
+ }
+
+ if (!seenLower) {
+ return UPPER_CASE;
+ } else if (!seenUpper) {
+ return TITLE_CASE;
+ } else {
+ return EXACT_CASE;
+ }
+ }
+
+ /** folds titlecase variant of word to titleBuffer */
+ private void caseFoldTitle(char word[], int length) {
+ titleBuffer = ArrayUtil.grow(titleBuffer, length);
+ System.arraycopy(word, 0, titleBuffer, 0, length);
+ for (int i = 1; i < length; i++) {
+ titleBuffer[i] = dictionary.caseFold(titleBuffer[i]);
+ }
+ }
+
+ /** folds lowercase variant of word (title cased) to lowerBuffer */
+ private void caseFoldLower(char word[], int length) {
+ lowerBuffer = ArrayUtil.grow(lowerBuffer, length);
+ System.arraycopy(word, 0, lowerBuffer, 0, length);
+ lowerBuffer[0] = dictionary.caseFold(lowerBuffer[0]);
+ }
+
+ private List<CharsRef> doStem(char word[], int length, boolean caseVariant) {
List<CharsRef> stems = new ArrayList<>();
IntsRef forms = dictionary.lookupWord(word, 0, length);
if (forms != null) {
- // TODO: some forms should not be added, e.g. ONLYINCOMPOUND
- // just because it exists, does not make it valid...
for (int i = 0; i < forms.length; i += formStep) {
+ boolean checkKeepCase = caseVariant && dictionary.keepcase != -1;
+ boolean checkNeedAffix = dictionary.needaffix != -1;
+ boolean checkOnlyInCompound = dictionary.onlyincompound != -1;
+ if (checkKeepCase || checkNeedAffix || checkOnlyInCompound) {
+ dictionary.flagLookup.get(forms.ints[forms.offset+i], scratch);
+ char wordFlags[] = Dictionary.decodeFlags(scratch);
+ // we are looking for a case variant, but this word does not allow it
+ if (checkKeepCase && Dictionary.hasFlag(wordFlags, (char)dictionary.keepcase)) {
+ continue;
+ }
+ // we can't add this form, its a pseudostem requiring an affix
+ if (checkNeedAffix && Dictionary.hasFlag(wordFlags, (char)dictionary.needaffix)) {
+ continue;
+ }
+ // we can't add this form, it only belongs inside a compound word
+ if (checkOnlyInCompound && Dictionary.hasFlag(wordFlags, (char)dictionary.onlyincompound)) {
+ continue;
+ }
+ }
stems.add(newStem(word, length, forms, i));
}
}
try {
- boolean v = stems.addAll(stem(word, length, -1, -1, -1, 0, true, true, false, false));
+ boolean v = stems.addAll(stem(word, length, -1, -1, -1, 0, true, true, false, false, caseVariant));
} catch (IOException bogus) {
throw new RuntimeException(bogus);
}
@@ -203,9 +290,10 @@ final class Stemmer {
* but two prefixes (COMPLEXPREFIXES) or two suffixes must have continuation requirements to recurse.
* @param circumfix true if the previous prefix removal was signed as a circumfix
* this means inner most suffix must also contain circumfix flag.
+ * @param caseVariant true if we are searching for a case variant. if the word has KEEPCASE flag it cannot succeed.
* @return List of stems, or empty list if no stems are found
*/
- private List<CharsRef> stem(char word[], int length, int previous, int prevFlag, int prefixFlag, int recursionDepth, boolean doPrefix, boolean doSuffix, boolean previousWasPrefix, boolean circumfix) throws IOException {
+ private List<CharsRef> stem(char word[], int length, int previous, int prevFlag, int prefixFlag, int recursionDepth, boolean doPrefix, boolean doSuffix, boolean previousWasPrefix, boolean circumfix, boolean caseVariant) throws IOException {
// TODO: allow this stuff to be reused by tokenfilter
List<CharsRef> stems = new ArrayList<>();
@@ -250,13 +338,22 @@ final class Stemmer {
final boolean compatible;
if (recursionDepth == 0) {
- compatible = true;
+ if (dictionary.onlyincompound == -1) {
+ compatible = true;
+ } else {
+ // check if affix is allowed in a non-compound word
+ dictionary.flagLookup.get(append, scratch);
+ char appendFlags[] = Dictionary.decodeFlags(scratch);
+ compatible = !Dictionary.hasFlag(appendFlags, (char) dictionary.onlyincompound);
+ }
} else if (crossProduct) {
// cross check incoming continuation class (flag of previous affix) against list.
dictionary.flagLookup.get(append, scratch);
char appendFlags[] = Dictionary.decodeFlags(scratch);
assert prevFlag >= 0;
- compatible = hasCrossCheckedFlag((char)prevFlag, appendFlags, false);
+ boolean allowed = dictionary.onlyincompound == -1 ||
+ !Dictionary.hasFlag(appendFlags, (char) dictionary.onlyincompound);
+ compatible = allowed && hasCrossCheckedFlag((char)prevFlag, appendFlags, false);
} else {
compatible = false;
}
@@ -277,7 +374,7 @@ final class Stemmer {
System.arraycopy(dictionary.stripData, stripStart, strippedWord, 0, stripLength);
System.arraycopy(word, deAffixedStart, strippedWord, stripLength, deAffixedLength);
- List<CharsRef> stemList = applyAffix(strippedWord, strippedWord.length, prefix, -1, recursionDepth, true, circumfix);
+ List<CharsRef> stemList = applyAffix(strippedWord, strippedWord.length, prefix, -1, recursionDepth, true, circumfix, caseVariant);
stems.addAll(stemList);
}
@@ -325,13 +422,22 @@ final class Stemmer {
final boolean compatible;
if (recursionDepth == 0) {
- compatible = true;
+ if (dictionary.onlyincompound == -1) {
+ compatible = true;
+ } else {
+ // check if affix is allowed in a non-compound word
+ dictionary.flagLookup.get(append, scratch);
+ char appendFlags[] = Dictionary.decodeFlags(scratch);
+ compatible = !Dictionary.hasFlag(appendFlags, (char) dictionary.onlyincompound);
+ }
} else if (crossProduct) {
// cross check incoming continuation class (flag of previous affix) against list.
dictionary.flagLookup.get(append, scratch);
char appendFlags[] = Dictionary.decodeFlags(scratch);
assert prevFlag >= 0;
- compatible = hasCrossCheckedFlag((char)prevFlag, appendFlags, previousWasPrefix);
+ boolean allowed = dictionary.onlyincompound == -1 ||
+ !Dictionary.hasFlag(appendFlags, (char) dictionary.onlyincompound);
+ compatible = allowed && hasCrossCheckedFlag((char)prevFlag, appendFlags, previousWasPrefix);
} else {
compatible = false;
}
@@ -352,7 +458,7 @@ final class Stemmer {
System.arraycopy(word, 0, strippedWord, 0, deAffixedLength);
System.arraycopy(dictionary.stripData, stripStart, strippedWord, deAffixedLength, stripLength);
- List<CharsRef> stemList = applyAffix(strippedWord, strippedWord.length, suffix, prefixFlag, recursionDepth, false, circumfix);
+ List<CharsRef> stemList = applyAffix(strippedWord, strippedWord.length, suffix, prefixFlag, recursionDepth, false, circumfix, caseVariant);
stems.addAll(stemList);
}
@@ -399,7 +505,7 @@ final class Stemmer {
* @param prefix true if we are removing a prefix (false if its a suffix)
* @return List of stems for the word, or an empty list if none are found
*/
- List<CharsRef> applyAffix(char strippedWord[], int length, int affix, int prefixFlag, int recursionDepth, boolean prefix, boolean circumfix) throws IOException {
+ List<CharsRef> applyAffix(char strippedWord[], int length, int affix, int prefixFlag, int recursionDepth, boolean prefix, boolean circumfix, boolean caseVariant) throws IOException {
// TODO: just pass this in from before, no need to decode it twice
affixReader.setPosition(8 * affix);
char flag = (char) (affixReader.readShort() & 0xffff);
@@ -439,6 +545,15 @@ final class Stemmer {
continue;
}
}
+
+ // we are looking for a case variant, but this word does not allow it
+ if (caseVariant && dictionary.keepcase != -1 && Dictionary.hasFlag(wordFlags, (char)dictionary.keepcase)) {
+ continue;
+ }
+ // we aren't decompounding (yet)
+ if (dictionary.onlyincompound != -1 && Dictionary.hasFlag(wordFlags, (char)dictionary.onlyincompound)) {
+ continue;
+ }
stems.add(newStem(strippedWord, length, forms, i));
}
}
@@ -457,20 +572,20 @@ final class Stemmer {
// we took away the first prefix.
// COMPLEXPREFIXES = true: combine with a second prefix and another suffix
// COMPLEXPREFIXES = false: combine with a suffix
- stems.addAll(stem(strippedWord, length, affix, flag, flag, ++recursionDepth, dictionary.complexPrefixes && dictionary.twoStageAffix, true, true, circumfix));
+ stems.addAll(stem(strippedWord, length, affix, flag, flag, ++recursionDepth, dictionary.complexPrefixes && dictionary.twoStageAffix, true, true, circumfix, caseVariant));
} else if (dictionary.complexPrefixes == false && dictionary.twoStageAffix) {
// we took away a suffix.
// COMPLEXPREFIXES = true: we don't recurse! only one suffix allowed
// COMPLEXPREFIXES = false: combine with another suffix
- stems.addAll(stem(strippedWord, length, affix, flag, prefixFlag, ++recursionDepth, false, true, false, circumfix));
+ stems.addAll(stem(strippedWord, length, affix, flag, prefixFlag, ++recursionDepth, false, true, false, circumfix, caseVariant));
}
} else if (recursionDepth == 1) {
if (prefix && dictionary.complexPrefixes) {
// we took away the second prefix: go look for another suffix
- stems.addAll(stem(strippedWord, length, affix, flag, flag, ++recursionDepth, false, true, true, circumfix));
+ stems.addAll(stem(strippedWord, length, affix, flag, flag, ++recursionDepth, false, true, true, circumfix, caseVariant));
} else if (prefix == false && dictionary.complexPrefixes == false && dictionary.twoStageAffix) {
// we took away a prefix, then a suffix: go look for another suffix
- stems.addAll(stem(strippedWord, length, affix, flag, prefixFlag, ++recursionDepth, false, true, false, circumfix));
+ stems.addAll(stem(strippedWord, length, affix, flag, prefixFlag, ++recursionDepth, false, true, false, circumfix, caseVariant));
}
}
}
Added: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestAlternateCasing.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestAlternateCasing.java?rev=1611161&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestAlternateCasing.java (added)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestAlternateCasing.java Wed Jul 16 20:01:49 2014
@@ -0,0 +1,62 @@
+package org.apache.lucene.analysis.hunspell;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.junit.BeforeClass;
+
+public class TestAlternateCasing extends StemmerTestBase {
+
+ @BeforeClass
+ public static void beforeClass() throws Exception {
+ init("alternate-casing.aff", "alternate-casing.dic");
+ }
+
+ public void testPossibilities() {
+ assertStemsTo("drink", "drink");
+ assertStemsTo("DRÄ°NK", "drink");
+ assertStemsTo("DRINK");
+ assertStemsTo("drinki", "drink");
+ assertStemsTo("DRÄ°NKÄ°", "drink");
+ assertStemsTo("DRÄ°NKI");
+ assertStemsTo("DRINKI");
+ assertStemsTo("DRINKÄ°");
+ assertStemsTo("idrink", "drink");
+ assertStemsTo("Ä°DRÄ°NK", "drink");
+ assertStemsTo("IDRÄ°NK");
+ assertStemsTo("IDRINK");
+ assertStemsTo("Ä°DRINK");
+ assertStemsTo("idrinki", "drink");
+ assertStemsTo("Ä°DRÄ°NKÄ°", "drink");
+ assertStemsTo("rıver", "rıver");
+ assertStemsTo("RIVER", "rıver");
+ assertStemsTo("RÄ°VER");
+ assertStemsTo("rıverı", "rıver");
+ assertStemsTo("RIVERI", "rıver");
+ assertStemsTo("RÄ°VERI");
+ assertStemsTo("RÄ°VERÄ°");
+ assertStemsTo("RIVERÄ°");
+ assertStemsTo("ırıver", "rıver");
+ assertStemsTo("IRIVER", "rıver");
+ assertStemsTo("IRÄ°VER");
+ assertStemsTo("Ä°RÄ°VER");
+ assertStemsTo("Ä°RIVER");
+ assertStemsTo("ırıverı", "rıver");
+ assertStemsTo("IRIVERI", "rıver");
+ assertStemsTo("Irıverı", "rıver");
+ }
+}
Added: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestCaseSensitive.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestCaseSensitive.java?rev=1611161&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestCaseSensitive.java (added)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestCaseSensitive.java Wed Jul 16 20:01:49 2014
@@ -0,0 +1,66 @@
+package org.apache.lucene.analysis.hunspell;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.junit.BeforeClass;
+
+public class TestCaseSensitive extends StemmerTestBase {
+
+ @BeforeClass
+ public static void beforeClass() throws Exception {
+ init("casesensitive.aff", "casesensitive.dic");
+ }
+
+ public void testAllPossibilities() {
+ assertStemsTo("drink", "drink");
+ assertStemsTo("drinks", "drink");
+ assertStemsTo("drinkS", "drink");
+ assertStemsTo("gooddrinks", "drink");
+ assertStemsTo("Gooddrinks", "drink", "drink");
+ assertStemsTo("GOODdrinks", "drink");
+ assertStemsTo("gooddrinkS", "drink");
+ assertStemsTo("GooddrinkS", "drink");
+ assertStemsTo("gooddrink", "drink");
+ assertStemsTo("Gooddrink", "drink", "drink");
+ assertStemsTo("GOODdrink", "drink");
+ assertStemsTo("Drink", "drink", "Drink");
+ assertStemsTo("Drinks", "drink", "Drink");
+ assertStemsTo("DrinkS", "Drink");
+ assertStemsTo("goodDrinks", "Drink");
+ assertStemsTo("GoodDrinks", "Drink");
+ assertStemsTo("GOODDrinks", "Drink");
+ assertStemsTo("goodDrinkS", "Drink");
+ assertStemsTo("GoodDrinkS", "Drink");
+ assertStemsTo("GOODDrinkS", "Drink");
+ assertStemsTo("goodDrink", "Drink");
+ assertStemsTo("GoodDrink", "Drink");
+ assertStemsTo("GOODDrink", "Drink");
+ assertStemsTo("DRINK", "DRINK", "drink", "Drink");
+ assertStemsTo("DRINKs", "DRINK");
+ assertStemsTo("DRINKS", "DRINK", "drink", "Drink");
+ assertStemsTo("goodDRINKs", "DRINK");
+ assertStemsTo("GoodDRINKs", "DRINK");
+ assertStemsTo("GOODDRINKs", "DRINK");
+ assertStemsTo("goodDRINKS", "DRINK");
+ assertStemsTo("GoodDRINKS", "DRINK");
+ assertStemsTo("GOODDRINKS", "DRINK", "drink", "drink");
+ assertStemsTo("goodDRINK", "DRINK");
+ assertStemsTo("GoodDRINK", "DRINK");
+ assertStemsTo("GOODDRINK", "DRINK", "drink", "drink");
+ }
+}
Added: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestKeepCase.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestKeepCase.java?rev=1611161&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestKeepCase.java (added)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestKeepCase.java Wed Jul 16 20:01:49 2014
@@ -0,0 +1,45 @@
+package org.apache.lucene.analysis.hunspell;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.junit.BeforeClass;
+
+public class TestKeepCase extends StemmerTestBase {
+
+ @BeforeClass
+ public static void beforeClass() throws Exception {
+ init("keepcase.aff", "keepcase.dic");
+ }
+
+ public void testPossibilities() {
+ assertStemsTo("drink", "drink");
+ assertStemsTo("Drink", "drink");
+ assertStemsTo("DRINK", "drink");
+ assertStemsTo("drinks", "drink");
+ assertStemsTo("Drinks", "drink");
+ assertStemsTo("DRINKS", "drink");
+ assertStemsTo("walk", "walk");
+ assertStemsTo("walks", "walk");
+ assertStemsTo("Walk");
+ assertStemsTo("Walks");
+ assertStemsTo("WALKS");
+ assertStemsTo("test", "test");
+ assertStemsTo("Test");
+ assertStemsTo("TEST");
+ }
+}
Added: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestNeedAffix.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestNeedAffix.java?rev=1611161&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestNeedAffix.java (added)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestNeedAffix.java Wed Jul 16 20:01:49 2014
@@ -0,0 +1,41 @@
+package org.apache.lucene.analysis.hunspell;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.junit.BeforeClass;
+
+public class TestNeedAffix extends StemmerTestBase {
+
+ @BeforeClass
+ public static void beforeClass() throws Exception {
+ init("needaffix.aff", "needaffix.dic");
+ }
+
+ public void testPossibilities() {
+ assertStemsTo("drink", "drink");
+ assertStemsTo("drinks", "drink");
+ assertStemsTo("walk");
+ assertStemsTo("walks", "walk");
+ assertStemsTo("prewalk", "walk");
+ assertStemsTo("prewalks", "walk");
+ assertStemsTo("test");
+ assertStemsTo("pretest");
+ assertStemsTo("tests");
+ assertStemsTo("pretests");
+ }
+}
Added: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestOnlyInCompound.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestOnlyInCompound.java?rev=1611161&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestOnlyInCompound.java (added)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestOnlyInCompound.java Wed Jul 16 20:01:49 2014
@@ -0,0 +1,37 @@
+package org.apache.lucene.analysis.hunspell;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.junit.BeforeClass;
+
+public class TestOnlyInCompound extends StemmerTestBase {
+
+ @BeforeClass
+ public static void beforeClass() throws Exception {
+ init("onlyincompound.aff", "onlyincompound.dic");
+ }
+
+ public void testPossibilities() {
+ assertStemsTo("drink", "drink");
+ assertStemsTo("drinks", "drink");
+ assertStemsTo("drinked");
+ assertStemsTo("predrink");
+ assertStemsTo("predrinked");
+ assertStemsTo("walk");
+ }
+}
Added: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/alternate-casing.aff
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/alternate-casing.aff?rev=1611161&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/alternate-casing.aff (added)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/alternate-casing.aff Wed Jul 16 20:01:49 2014
@@ -0,0 +1,15 @@
+SET UTF-8
+
+LANG tr_TR
+
+PFX A Y 1
+PFX A 0 ı . +dotlessprefix
+
+PFX B Y 1
+PFX B 0 i . +dottedprefix
+
+SFX X Y 1
+SFX X 0 ı . +dotlesssuffix
+
+SFX Y Y 1
+SFX Y 0 i . +dottedsuffix
Added: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/alternate-casing.dic
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/alternate-casing.dic?rev=1611161&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/alternate-casing.dic (added)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/alternate-casing.dic Wed Jul 16 20:01:49 2014
@@ -0,0 +1,4 @@
+3
+drink/BY
+rıver/AX
+
Added: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/casesensitive.aff
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/casesensitive.aff?rev=1611161&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/casesensitive.aff (added)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/casesensitive.aff Wed Jul 16 20:01:49 2014
@@ -0,0 +1,16 @@
+SET UTF-8
+
+PFX A Y 1
+PFX A 0 good . +good
+
+PFX B Y 1
+PFX B 0 Good . +Good
+
+PFX C Y 1
+PFX C 0 GOOD . +GOOD
+
+SFX X Y 1
+SFX X 0 s . +s
+
+SFX Y Y 1
+SFX Y 0 S . +S
Added: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/casesensitive.dic
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/casesensitive.dic?rev=1611161&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/casesensitive.dic (added)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/casesensitive.dic Wed Jul 16 20:01:49 2014
@@ -0,0 +1,4 @@
+3
+drink/XYABC
+Drink/XYABC
+DRINK/XYABC
Added: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/keepcase.aff
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/keepcase.aff?rev=1611161&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/keepcase.aff (added)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/keepcase.aff Wed Jul 16 20:01:49 2014
@@ -0,0 +1,6 @@
+SET UTF-8
+
+KEEPCASE Z
+
+SFX X Y 1
+SFX X 0 s . +s
Added: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/keepcase.dic
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/keepcase.dic?rev=1611161&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/keepcase.dic (added)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/keepcase.dic Wed Jul 16 20:01:49 2014
@@ -0,0 +1,4 @@
+3
+drink/X
+walk/XZ
+test/Z
\ No newline at end of file
Added: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/needaffix.aff
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/needaffix.aff?rev=1611161&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/needaffix.aff (added)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/needaffix.aff Wed Jul 16 20:01:49 2014
@@ -0,0 +1,9 @@
+SET UTF-8
+
+NEEDAFFIX Z
+
+PFX Y Y 1
+PFX Y 0 pre . pre+
+
+SFX X Y 1
+SFX X 0 s . +s
Added: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/needaffix.dic
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/needaffix.dic?rev=1611161&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/needaffix.dic (added)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/needaffix.dic Wed Jul 16 20:01:49 2014
@@ -0,0 +1,4 @@
+3
+drink/X
+walk/XYZ
+test/Z
\ No newline at end of file
Added: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/onlyincompound.aff
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/onlyincompound.aff?rev=1611161&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/onlyincompound.aff (added)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/onlyincompound.aff Wed Jul 16 20:01:49 2014
@@ -0,0 +1,12 @@
+SET UTF-8
+
+ONLYINCOMPOUND A
+
+PFX Y Y 1
+PFX Y 0 pre/A . pre+
+
+SFX X Y 1
+SFX X 0 s . +s
+
+SFX Z Y 1
+SFX Z 0 ed/A . +ed
Added: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/onlyincompound.dic
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/onlyincompound.dic?rev=1611161&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/onlyincompound.dic (added)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/onlyincompound.dic Wed Jul 16 20:01:49 2014
@@ -0,0 +1,4 @@
+2
+drink/XYZ
+walk/A
+