You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2014/07/16 22:01:50 UTC

svn commit: r1611161 - in /lucene/dev/trunk/lucene: ./ analysis/common/src/java/org/apache/lucene/analysis/hunspell/ analysis/common/src/test/org/apache/lucene/analysis/hunspell/

Author: rmuir
Date: Wed Jul 16 20:01:49 2014
New Revision: 1611161

URL: http://svn.apache.org/r1611161
Log:
LUCENE-5826: Support proper hunspell case handling and related options

Added:
    lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestAlternateCasing.java   (with props)
    lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestCaseSensitive.java   (with props)
    lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestKeepCase.java   (with props)
    lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestNeedAffix.java   (with props)
    lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestOnlyInCompound.java   (with props)
    lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/alternate-casing.aff   (with props)
    lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/alternate-casing.dic   (with props)
    lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/casesensitive.aff   (with props)
    lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/casesensitive.dic   (with props)
    lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/keepcase.aff   (with props)
    lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/keepcase.dic   (with props)
    lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/needaffix.aff   (with props)
    lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/needaffix.dic   (with props)
    lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/onlyincompound.aff   (with props)
    lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/onlyincompound.dic   (with props)
Modified:
    lucene/dev/trunk/lucene/CHANGES.txt
    lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java
    lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java

Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1611161&r1=1611160&r2=1611161&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Wed Jul 16 20:01:49 2014
@@ -108,6 +108,9 @@ New Features
 * LUCENE-5806: Extend expressions grammar to support array access in variables.
   Added helper class VariableContext to parse complex variable into pieces.
   (Ryan Ernst)
+
+* LUCENE-5826: Support proper hunspell case handling, LANG, KEEPCASE, NEEDAFFIX,
+  and ONLYINCOMPOUND flags.  (Robert Muir)
   
 API Changes
 

Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java?rev=1611161&r1=1611160&r2=1611161&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java Wed Jul 16 20:01:49 2014
@@ -85,6 +85,11 @@ public class Dictionary {
   private static final String ICONV_KEY = "ICONV";
   private static final String OCONV_KEY = "OCONV";
   private static final String FULLSTRIP_KEY = "FULLSTRIP";
+  private static final String LANG_KEY = "LANG";
+  private static final String KEEPCASE_KEY = "KEEPCASE";
+  private static final String NEEDAFFIX_KEY = "NEEDAFFIX";
+  private static final String PSEUDOROOT_KEY = "PSEUDOROOT";
+  private static final String ONLYINCOMPOUND_KEY = "ONLYINCOMPOUND";
 
   private static final String NUM_FLAG_TYPE = "num";
   private static final String UTF8_FLAG_TYPE = "UTF-8";
@@ -140,6 +145,9 @@ public class Dictionary {
   boolean twoStageAffix; // if no affixes have continuation classes, no need to do 2-level affix stripping
   
   int circumfix = -1; // circumfix flag, or -1 if one is not defined
+  int keepcase = -1;  // keepcase flag, or -1 if one is not defined
+  int needaffix = -1; // needaffix flag, or -1 if one is not defined
+  int onlyincompound = -1; // onlyincompound flag, or -1 if one is not defined
   
   // ignored characters (dictionary, affix, inputs)
   private char[] ignore;
@@ -154,6 +162,11 @@ public class Dictionary {
   // true if we can strip suffixes "down to nothing"
   boolean fullStrip;
   
+  // language declaration of the dictionary
+  String language;
+  // true if case algorithms should use alternate (Turkish/Azeri) mapping
+  boolean alternateCasing;
+  
   /**
    * Creates a new Dictionary containing the information read from the provided InputStreams to hunspell affix
    * and dictionary files.
@@ -315,6 +328,24 @@ public class Dictionary {
           throw new ParseException("Illegal CIRCUMFIX declaration", reader.getLineNumber());
         }
         circumfix = flagParsingStrategy.parseFlag(parts[1]);
+      } else if (line.startsWith(KEEPCASE_KEY)) {
+        String parts[] = line.split("\\s+");
+        if (parts.length != 2) {
+          throw new ParseException("Illegal KEEPCASE declaration", reader.getLineNumber());
+        }
+        keepcase = flagParsingStrategy.parseFlag(parts[1]);
+      } else if (line.startsWith(NEEDAFFIX_KEY) || line.startsWith(PSEUDOROOT_KEY)) {
+        String parts[] = line.split("\\s+");
+        if (parts.length != 2) {
+          throw new ParseException("Illegal NEEDAFFIX declaration", reader.getLineNumber());
+        }
+        needaffix = flagParsingStrategy.parseFlag(parts[1]);
+      } else if (line.startsWith(ONLYINCOMPOUND_KEY)) {
+        String parts[] = line.split("\\s+");
+        if (parts.length != 2) {
+          throw new ParseException("Illegal ONLYINCOMPOUND declaration", reader.getLineNumber());
+        }
+        onlyincompound = flagParsingStrategy.parseFlag(parts[1]);
       } else if (line.startsWith(IGNORE_KEY)) {
         String parts[] = line.split("\\s+");
         if (parts.length != 2) {
@@ -340,6 +371,9 @@ public class Dictionary {
         }
       } else if (line.startsWith(FULLSTRIP_KEY)) {
         fullStrip = true;
+      } else if (line.startsWith(LANG_KEY)) {
+        language = line.substring(LANG_KEY.length()).trim();
+        alternateCasing = "tr_TR".equals(language) || "az_AZ".equals(language);
       }
     }
     
@@ -1108,7 +1142,7 @@ public class Dictionary {
       
       if (ignoreCase && iconv == null) {
         // if we have no input conversion mappings, do this on-the-fly
-        ch = Character.toLowerCase(ch);
+        ch = caseFold(ch);
       }
       
       reuse.append(ch);
@@ -1122,7 +1156,7 @@ public class Dictionary {
       }
       if (ignoreCase) {
         for (int i = 0; i < reuse.length(); i++) {
-          reuse.setCharAt(i, Character.toLowerCase(reuse.charAt(i)));
+          reuse.setCharAt(i, caseFold(reuse.charAt(i)));
         }
       }
     }
@@ -1130,6 +1164,21 @@ public class Dictionary {
     return reuse;
   }
   
+  /** folds single character (according to LANG if present) */
+  char caseFold(char c) {
+    if (alternateCasing) {
+      if (c == 'I') {
+        return 'ı';
+      } else if (c == 'Ä°') {
+        return 'i';
+      } else {
+        return Character.toLowerCase(c);
+      }
+    } else {
+      return Character.toLowerCase(c);
+    }
+  }
+  
   // TODO: this could be more efficient!
   static void applyMappings(FST<CharsRef> fst, StringBuilder sb) throws IOException {
     final FST.BytesReader bytesReader = fst.getBytesReader();

Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java?rev=1611161&r1=1611160&r2=1611161&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java Wed Jul 16 20:01:49 2014
@@ -100,17 +100,104 @@ final class Stemmer {
       word = scratchBuffer;
     }
     
+    int caseType = caseOf(word, length);
+    if (caseType == UPPER_CASE) {
+      // upper: union exact, title, lower
+      caseFoldTitle(word, length);
+      caseFoldLower(titleBuffer, length);
+      List<CharsRef> list = doStem(word, length, false);
+      list.addAll(doStem(titleBuffer, length, true));
+      list.addAll(doStem(lowerBuffer, length, true));
+      return list;
+    } else if (caseType == TITLE_CASE) {
+      // title: union exact, lower
+      caseFoldLower(word, length);
+      List<CharsRef> list = doStem(word, length, false);
+      list.addAll(doStem(lowerBuffer, length, true));
+      return list;
+    } else {
+      // exact match only
+      return doStem(word, length, false);
+    }
+  }
+  
+  // temporary buffers for case variants
+  private char[] lowerBuffer = new char[8];
+  private char[] titleBuffer = new char[8];
+  
+  private static final int EXACT_CASE = 0;
+  private static final int TITLE_CASE = 1;
+  private static final int UPPER_CASE = 2;
+  
+  /** returns EXACT_CASE,TITLE_CASE, or UPPER_CASE type for the word */
+  private int caseOf(char word[], int length) {
+    if (dictionary.ignoreCase || length == 0 || !Character.isUpperCase(word[0])) {
+      return EXACT_CASE;
+    }
+    
+    // determine if we are title or lowercase (or something funky, in which its exact)
+    boolean seenUpper = false;
+    boolean seenLower = false;
+    for (int i = 1; i < length; i++) {
+      boolean v = Character.isUpperCase(word[i]);
+      seenUpper |= v;
+      seenLower |= !v;
+    }
+    
+    if (!seenLower) {
+      return UPPER_CASE;
+    } else if (!seenUpper) {
+      return TITLE_CASE;
+    } else {
+      return EXACT_CASE;
+    }
+  }
+  
+  /** folds titlecase variant of word to titleBuffer */
+  private void caseFoldTitle(char word[], int length) {
+    titleBuffer = ArrayUtil.grow(titleBuffer, length);
+    System.arraycopy(word, 0, titleBuffer, 0, length);
+    for (int i = 1; i < length; i++) {
+      titleBuffer[i] = dictionary.caseFold(titleBuffer[i]);
+    }
+  }
+  
+  /** folds lowercase variant of word (title cased) to lowerBuffer */
+  private void caseFoldLower(char word[], int length) {
+    lowerBuffer = ArrayUtil.grow(lowerBuffer, length);
+    System.arraycopy(word, 0, lowerBuffer, 0, length);
+    lowerBuffer[0] = dictionary.caseFold(lowerBuffer[0]);
+  }
+  
+  private List<CharsRef> doStem(char word[], int length, boolean caseVariant) {
     List<CharsRef> stems = new ArrayList<>();
     IntsRef forms = dictionary.lookupWord(word, 0, length);
     if (forms != null) {
-      // TODO: some forms should not be added, e.g. ONLYINCOMPOUND
-      // just because it exists, does not make it valid...
       for (int i = 0; i < forms.length; i += formStep) {
+        boolean checkKeepCase = caseVariant && dictionary.keepcase != -1;
+        boolean checkNeedAffix = dictionary.needaffix != -1;
+        boolean checkOnlyInCompound = dictionary.onlyincompound != -1;
+        if (checkKeepCase || checkNeedAffix || checkOnlyInCompound) {
+          dictionary.flagLookup.get(forms.ints[forms.offset+i], scratch);
+          char wordFlags[] = Dictionary.decodeFlags(scratch);
+          // we are looking for a case variant, but this word does not allow it
+          if (checkKeepCase && Dictionary.hasFlag(wordFlags, (char)dictionary.keepcase)) {
+            continue;
+          }
+          // we can't add this form, its a pseudostem requiring an affix
+          if (checkNeedAffix && Dictionary.hasFlag(wordFlags, (char)dictionary.needaffix)) {
+            continue;
+          }
+          // we can't add this form, it only belongs inside a compound word
+          if (checkOnlyInCompound && Dictionary.hasFlag(wordFlags, (char)dictionary.onlyincompound)) {
+            continue;
+          }
+        }
         stems.add(newStem(word, length, forms, i));
       }
     }
     try {
-      boolean v = stems.addAll(stem(word, length, -1, -1, -1, 0, true, true, false, false));
+      boolean v = stems.addAll(stem(word, length, -1, -1, -1, 0, true, true, false, false, caseVariant));
     } catch (IOException bogus) {
       throw new RuntimeException(bogus);
     }
@@ -203,9 +290,10 @@ final class Stemmer {
    *        but two prefixes (COMPLEXPREFIXES) or two suffixes must have continuation requirements to recurse. 
    * @param circumfix true if the previous prefix removal was signed as a circumfix
    *        this means inner most suffix must also contain circumfix flag.
+   * @param caseVariant true if we are searching for a case variant. if the word has KEEPCASE flag it cannot succeed.
    * @return List of stems, or empty list if no stems are found
    */
-  private List<CharsRef> stem(char word[], int length, int previous, int prevFlag, int prefixFlag, int recursionDepth, boolean doPrefix, boolean doSuffix, boolean previousWasPrefix, boolean circumfix) throws IOException {
+  private List<CharsRef> stem(char word[], int length, int previous, int prevFlag, int prefixFlag, int recursionDepth, boolean doPrefix, boolean doSuffix, boolean previousWasPrefix, boolean circumfix, boolean caseVariant) throws IOException {
     
     // TODO: allow this stuff to be reused by tokenfilter
     List<CharsRef> stems = new ArrayList<>();
@@ -250,13 +338,22 @@ final class Stemmer {
           
           final boolean compatible;
           if (recursionDepth == 0) {
-            compatible = true;
+            if (dictionary.onlyincompound == -1) {
+              compatible = true;
+            } else {
+              // check if affix is allowed in a non-compound word
+              dictionary.flagLookup.get(append, scratch);
+              char appendFlags[] = Dictionary.decodeFlags(scratch);
+              compatible = !Dictionary.hasFlag(appendFlags, (char) dictionary.onlyincompound);
+            }
           } else if (crossProduct) {
             // cross check incoming continuation class (flag of previous affix) against list.
             dictionary.flagLookup.get(append, scratch);
             char appendFlags[] = Dictionary.decodeFlags(scratch);
             assert prevFlag >= 0;
-            compatible = hasCrossCheckedFlag((char)prevFlag, appendFlags, false);
+            boolean allowed = dictionary.onlyincompound == -1 || 
+                              !Dictionary.hasFlag(appendFlags, (char) dictionary.onlyincompound);
+            compatible = allowed && hasCrossCheckedFlag((char)prevFlag, appendFlags, false);
           } else {
             compatible = false;
           }
@@ -277,7 +374,7 @@ final class Stemmer {
             System.arraycopy(dictionary.stripData, stripStart, strippedWord, 0, stripLength);
             System.arraycopy(word, deAffixedStart, strippedWord, stripLength, deAffixedLength);
 
-            List<CharsRef> stemList = applyAffix(strippedWord, strippedWord.length, prefix, -1, recursionDepth, true, circumfix);
+            List<CharsRef> stemList = applyAffix(strippedWord, strippedWord.length, prefix, -1, recursionDepth, true, circumfix, caseVariant);
             
             stems.addAll(stemList);
           }
@@ -325,13 +422,22 @@ final class Stemmer {
           
           final boolean compatible;
           if (recursionDepth == 0) {
-            compatible = true;
+            if (dictionary.onlyincompound == -1) {
+              compatible = true;
+            } else {
+              // check if affix is allowed in a non-compound word
+              dictionary.flagLookup.get(append, scratch);
+              char appendFlags[] = Dictionary.decodeFlags(scratch);
+              compatible = !Dictionary.hasFlag(appendFlags, (char) dictionary.onlyincompound);
+            }
           } else if (crossProduct) {
             // cross check incoming continuation class (flag of previous affix) against list.
             dictionary.flagLookup.get(append, scratch);
             char appendFlags[] = Dictionary.decodeFlags(scratch);
             assert prevFlag >= 0;
-            compatible = hasCrossCheckedFlag((char)prevFlag, appendFlags, previousWasPrefix);
+            boolean allowed = dictionary.onlyincompound == -1 || 
+                              !Dictionary.hasFlag(appendFlags, (char) dictionary.onlyincompound);
+            compatible = allowed && hasCrossCheckedFlag((char)prevFlag, appendFlags, previousWasPrefix);
           } else {
             compatible = false;
           }
@@ -352,7 +458,7 @@ final class Stemmer {
             System.arraycopy(word, 0, strippedWord, 0, deAffixedLength);
             System.arraycopy(dictionary.stripData, stripStart, strippedWord, deAffixedLength, stripLength);
             
-            List<CharsRef> stemList = applyAffix(strippedWord, strippedWord.length, suffix, prefixFlag, recursionDepth, false, circumfix);
+            List<CharsRef> stemList = applyAffix(strippedWord, strippedWord.length, suffix, prefixFlag, recursionDepth, false, circumfix, caseVariant);
             
             stems.addAll(stemList);
           }
@@ -399,7 +505,7 @@ final class Stemmer {
    * @param prefix true if we are removing a prefix (false if its a suffix)
    * @return List of stems for the word, or an empty list if none are found
    */
-  List<CharsRef> applyAffix(char strippedWord[], int length, int affix, int prefixFlag, int recursionDepth, boolean prefix, boolean circumfix) throws IOException {    
+  List<CharsRef> applyAffix(char strippedWord[], int length, int affix, int prefixFlag, int recursionDepth, boolean prefix, boolean circumfix, boolean caseVariant) throws IOException {    
     // TODO: just pass this in from before, no need to decode it twice
     affixReader.setPosition(8 * affix);
     char flag = (char) (affixReader.readShort() & 0xffff);
@@ -439,6 +545,15 @@ final class Stemmer {
               continue;
             }
           }
+          
+          // we are looking for a case variant, but this word does not allow it
+          if (caseVariant && dictionary.keepcase != -1 && Dictionary.hasFlag(wordFlags, (char)dictionary.keepcase)) {
+            continue;
+          }
+          // we aren't decompounding (yet)
+          if (dictionary.onlyincompound != -1 && Dictionary.hasFlag(wordFlags, (char)dictionary.onlyincompound)) {
+            continue;
+          }
           stems.add(newStem(strippedWord, length, forms, i));
         }
       }
@@ -457,20 +572,20 @@ final class Stemmer {
           // we took away the first prefix.
           // COMPLEXPREFIXES = true:  combine with a second prefix and another suffix 
           // COMPLEXPREFIXES = false: combine with a suffix
-          stems.addAll(stem(strippedWord, length, affix, flag, flag, ++recursionDepth, dictionary.complexPrefixes && dictionary.twoStageAffix, true, true, circumfix));
+          stems.addAll(stem(strippedWord, length, affix, flag, flag, ++recursionDepth, dictionary.complexPrefixes && dictionary.twoStageAffix, true, true, circumfix, caseVariant));
         } else if (dictionary.complexPrefixes == false && dictionary.twoStageAffix) {
           // we took away a suffix.
           // COMPLEXPREFIXES = true: we don't recurse! only one suffix allowed
           // COMPLEXPREFIXES = false: combine with another suffix
-          stems.addAll(stem(strippedWord, length, affix, flag, prefixFlag, ++recursionDepth, false, true, false, circumfix));
+          stems.addAll(stem(strippedWord, length, affix, flag, prefixFlag, ++recursionDepth, false, true, false, circumfix, caseVariant));
         }
       } else if (recursionDepth == 1) {
         if (prefix && dictionary.complexPrefixes) {
           // we took away the second prefix: go look for another suffix
-          stems.addAll(stem(strippedWord, length, affix, flag, flag, ++recursionDepth, false, true, true, circumfix));
+          stems.addAll(stem(strippedWord, length, affix, flag, flag, ++recursionDepth, false, true, true, circumfix, caseVariant));
         } else if (prefix == false && dictionary.complexPrefixes == false && dictionary.twoStageAffix) {
           // we took away a prefix, then a suffix: go look for another suffix
-          stems.addAll(stem(strippedWord, length, affix, flag, prefixFlag, ++recursionDepth, false, true, false, circumfix));
+          stems.addAll(stem(strippedWord, length, affix, flag, prefixFlag, ++recursionDepth, false, true, false, circumfix, caseVariant));
         }
       }
     }

Added: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestAlternateCasing.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestAlternateCasing.java?rev=1611161&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestAlternateCasing.java (added)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestAlternateCasing.java Wed Jul 16 20:01:49 2014
@@ -0,0 +1,62 @@
+package org.apache.lucene.analysis.hunspell;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.junit.BeforeClass;
+
+public class TestAlternateCasing extends StemmerTestBase {
+  
+  @BeforeClass
+  public static void beforeClass() throws Exception {
+    init("alternate-casing.aff", "alternate-casing.dic");
+  }
+  
+  public void testPossibilities() {
+    assertStemsTo("drink",   "drink");
+    assertStemsTo("DRÄ°NK",   "drink");
+    assertStemsTo("DRINK");
+    assertStemsTo("drinki",  "drink");
+    assertStemsTo("DRÄ°NKÄ°",  "drink");
+    assertStemsTo("DRÄ°NKI");
+    assertStemsTo("DRINKI");
+    assertStemsTo("DRINKÄ°");
+    assertStemsTo("idrink",  "drink");
+    assertStemsTo("Ä°DRÄ°NK",  "drink");
+    assertStemsTo("IDRÄ°NK");
+    assertStemsTo("IDRINK");
+    assertStemsTo("Ä°DRINK");
+    assertStemsTo("idrinki", "drink");
+    assertStemsTo("Ä°DRÄ°NKÄ°", "drink");
+    assertStemsTo("rıver",   "rıver");
+    assertStemsTo("RIVER",   "rıver");
+    assertStemsTo("RÄ°VER");
+    assertStemsTo("rıverı",  "rıver");
+    assertStemsTo("RIVERI",  "rıver");
+    assertStemsTo("RÄ°VERI");
+    assertStemsTo("RÄ°VERÄ°");
+    assertStemsTo("RIVERÄ°");
+    assertStemsTo("ırıver",  "rıver");
+    assertStemsTo("IRIVER",  "rıver");
+    assertStemsTo("IRÄ°VER");
+    assertStemsTo("Ä°RÄ°VER");
+    assertStemsTo("Ä°RIVER");
+    assertStemsTo("ırıverı",  "rıver");
+    assertStemsTo("IRIVERI",  "rıver");
+    assertStemsTo("Irıverı",  "rıver");
+  }
+}

Added: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestCaseSensitive.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestCaseSensitive.java?rev=1611161&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestCaseSensitive.java (added)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestCaseSensitive.java Wed Jul 16 20:01:49 2014
@@ -0,0 +1,66 @@
+package org.apache.lucene.analysis.hunspell;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.junit.BeforeClass;
+
+public class TestCaseSensitive extends StemmerTestBase {
+  
+  @BeforeClass
+  public static void beforeClass() throws Exception {
+    init("casesensitive.aff", "casesensitive.dic");
+  }
+  
+  public void testAllPossibilities() {
+    assertStemsTo("drink",      "drink");
+    assertStemsTo("drinks",     "drink");
+    assertStemsTo("drinkS",     "drink");
+    assertStemsTo("gooddrinks", "drink");
+    assertStemsTo("Gooddrinks", "drink", "drink");
+    assertStemsTo("GOODdrinks", "drink");
+    assertStemsTo("gooddrinkS", "drink");
+    assertStemsTo("GooddrinkS", "drink");
+    assertStemsTo("gooddrink",  "drink");
+    assertStemsTo("Gooddrink",  "drink", "drink");
+    assertStemsTo("GOODdrink",  "drink");
+    assertStemsTo("Drink",      "drink", "Drink");
+    assertStemsTo("Drinks",     "drink", "Drink");
+    assertStemsTo("DrinkS",     "Drink");
+    assertStemsTo("goodDrinks", "Drink");
+    assertStemsTo("GoodDrinks", "Drink");
+    assertStemsTo("GOODDrinks", "Drink");
+    assertStemsTo("goodDrinkS", "Drink");
+    assertStemsTo("GoodDrinkS", "Drink");
+    assertStemsTo("GOODDrinkS", "Drink");
+    assertStemsTo("goodDrink",  "Drink");
+    assertStemsTo("GoodDrink",  "Drink");
+    assertStemsTo("GOODDrink",  "Drink");
+    assertStemsTo("DRINK",      "DRINK", "drink", "Drink");
+    assertStemsTo("DRINKs",     "DRINK");
+    assertStemsTo("DRINKS",     "DRINK", "drink", "Drink");
+    assertStemsTo("goodDRINKs", "DRINK");
+    assertStemsTo("GoodDRINKs", "DRINK");
+    assertStemsTo("GOODDRINKs", "DRINK");
+    assertStemsTo("goodDRINKS", "DRINK");
+    assertStemsTo("GoodDRINKS", "DRINK");
+    assertStemsTo("GOODDRINKS", "DRINK", "drink", "drink");
+    assertStemsTo("goodDRINK",  "DRINK");
+    assertStemsTo("GoodDRINK",  "DRINK");
+    assertStemsTo("GOODDRINK",  "DRINK", "drink", "drink");
+  }
+}

Added: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestKeepCase.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestKeepCase.java?rev=1611161&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestKeepCase.java (added)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestKeepCase.java Wed Jul 16 20:01:49 2014
@@ -0,0 +1,45 @@
+package org.apache.lucene.analysis.hunspell;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.junit.BeforeClass;
+
+public class TestKeepCase extends StemmerTestBase {
+  
+  @BeforeClass
+  public static void beforeClass() throws Exception {
+    init("keepcase.aff", "keepcase.dic");
+  }
+  
+  public void testPossibilities() {
+    assertStemsTo("drink",   "drink");
+    assertStemsTo("Drink",   "drink");
+    assertStemsTo("DRINK",   "drink");
+    assertStemsTo("drinks",  "drink");
+    assertStemsTo("Drinks",  "drink");
+    assertStemsTo("DRINKS",  "drink");
+    assertStemsTo("walk",    "walk");
+    assertStemsTo("walks",   "walk");
+    assertStemsTo("Walk");
+    assertStemsTo("Walks");
+    assertStemsTo("WALKS");
+    assertStemsTo("test",    "test");
+    assertStemsTo("Test");
+    assertStemsTo("TEST");
+  }
+}

Added: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestNeedAffix.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestNeedAffix.java?rev=1611161&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestNeedAffix.java (added)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestNeedAffix.java Wed Jul 16 20:01:49 2014
@@ -0,0 +1,41 @@
+package org.apache.lucene.analysis.hunspell;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.junit.BeforeClass;
+
+public class TestNeedAffix extends StemmerTestBase {
+  
+  @BeforeClass
+  public static void beforeClass() throws Exception {
+    init("needaffix.aff", "needaffix.dic");
+  }
+  
+  public void testPossibilities() {
+    assertStemsTo("drink",     "drink");
+    assertStemsTo("drinks",    "drink");
+    assertStemsTo("walk");
+    assertStemsTo("walks",     "walk");
+    assertStemsTo("prewalk",   "walk");
+    assertStemsTo("prewalks",  "walk");
+    assertStemsTo("test");
+    assertStemsTo("pretest");
+    assertStemsTo("tests");
+    assertStemsTo("pretests");
+  }
+}

Added: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestOnlyInCompound.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestOnlyInCompound.java?rev=1611161&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestOnlyInCompound.java (added)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestOnlyInCompound.java Wed Jul 16 20:01:49 2014
@@ -0,0 +1,37 @@
+package org.apache.lucene.analysis.hunspell;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.junit.BeforeClass;
+
+public class TestOnlyInCompound extends StemmerTestBase {
+  
+  @BeforeClass
+  public static void beforeClass() throws Exception {
+    init("onlyincompound.aff", "onlyincompound.dic");
+  }
+  
+  public void testPossibilities() {
+    assertStemsTo("drink",     "drink");
+    assertStemsTo("drinks",    "drink");
+    assertStemsTo("drinked");
+    assertStemsTo("predrink");
+    assertStemsTo("predrinked");
+    assertStemsTo("walk");
+  }
+}

Added: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/alternate-casing.aff
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/alternate-casing.aff?rev=1611161&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/alternate-casing.aff (added)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/alternate-casing.aff Wed Jul 16 20:01:49 2014
@@ -0,0 +1,15 @@
+SET UTF-8
+
+LANG tr_TR
+
+PFX A Y 1
+PFX A 0 ı . +dotlessprefix
+
+PFX B Y 1
+PFX B 0 i . +dottedprefix
+
+SFX X Y 1
+SFX X 0 ı . +dotlesssuffix
+
+SFX Y Y 1
+SFX Y 0 i . +dottedsuffix

Added: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/alternate-casing.dic
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/alternate-casing.dic?rev=1611161&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/alternate-casing.dic (added)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/alternate-casing.dic Wed Jul 16 20:01:49 2014
@@ -0,0 +1,4 @@
+3
+drink/BY
+rıver/AX
+

Added: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/casesensitive.aff
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/casesensitive.aff?rev=1611161&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/casesensitive.aff (added)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/casesensitive.aff Wed Jul 16 20:01:49 2014
@@ -0,0 +1,16 @@
+SET UTF-8
+
+PFX A Y 1
+PFX A 0 good . +good
+
+PFX B Y 1
+PFX B 0 Good . +Good
+
+PFX C Y 1
+PFX C 0 GOOD . +GOOD
+
+SFX X Y 1
+SFX X 0 s . +s
+
+SFX Y Y 1
+SFX Y 0 S . +S

Added: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/casesensitive.dic
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/casesensitive.dic?rev=1611161&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/casesensitive.dic (added)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/casesensitive.dic Wed Jul 16 20:01:49 2014
@@ -0,0 +1,4 @@
+3
+drink/XYABC
+Drink/XYABC
+DRINK/XYABC

Added: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/keepcase.aff
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/keepcase.aff?rev=1611161&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/keepcase.aff (added)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/keepcase.aff Wed Jul 16 20:01:49 2014
@@ -0,0 +1,6 @@
+SET UTF-8
+
+KEEPCASE Z
+
+SFX X Y 1
+SFX X 0 s . +s

Added: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/keepcase.dic
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/keepcase.dic?rev=1611161&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/keepcase.dic (added)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/keepcase.dic Wed Jul 16 20:01:49 2014
@@ -0,0 +1,4 @@
+3
+drink/X
+walk/XZ
+test/Z
\ No newline at end of file

Added: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/needaffix.aff
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/needaffix.aff?rev=1611161&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/needaffix.aff (added)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/needaffix.aff Wed Jul 16 20:01:49 2014
@@ -0,0 +1,9 @@
+SET UTF-8
+
+NEEDAFFIX Z
+
+PFX Y Y 1
+PFX Y 0 pre . pre+
+
+SFX X Y 1
+SFX X 0 s . +s

Added: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/needaffix.dic
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/needaffix.dic?rev=1611161&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/needaffix.dic (added)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/needaffix.dic Wed Jul 16 20:01:49 2014
@@ -0,0 +1,4 @@
+3
+drink/X
+walk/XYZ
+test/Z
\ No newline at end of file

Added: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/onlyincompound.aff
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/onlyincompound.aff?rev=1611161&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/onlyincompound.aff (added)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/onlyincompound.aff Wed Jul 16 20:01:49 2014
@@ -0,0 +1,12 @@
+SET UTF-8
+
+ONLYINCOMPOUND A
+
+PFX Y Y 1
+PFX Y 0 pre/A . pre+
+
+SFX X Y 1
+SFX X 0 s . +s
+
+SFX Z Y 1
+SFX Z 0 ed/A . +ed

Added: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/onlyincompound.dic
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/onlyincompound.dic?rev=1611161&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/onlyincompound.dic (added)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/onlyincompound.dic Wed Jul 16 20:01:49 2014
@@ -0,0 +1,4 @@
+2
+drink/XYZ
+walk/A
+