You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ja...@apache.org on 2013/07/03 02:38:32 UTC

svn commit: r1499166 - in /lucene/dev/branches/branch_4x: ./ lucene/ lucene/analysis/ lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/ lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/ solr/

Author: janhoy
Date: Wed Jul  3 00:38:32 2013
New Revision: 1499166

URL: http://svn.apache.org/r1499166
Log:
SOLR-4452: Hunspell stemmer should not merge duplicate dictionary entries (merge from trunk)

Added:
    lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/testOverride.dic
      - copied unchanged from r1499164, lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/testOverride.dic
Modified:
    lucene/dev/branches/branch_4x/   (props changed)
    lucene/dev/branches/branch_4x/lucene/   (props changed)
    lucene/dev/branches/branch_4x/lucene/analysis/   (props changed)
    lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellDictionary.java
    lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/HunspellDictionaryTest.java
    lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/test.dic
    lucene/dev/branches/branch_4x/solr/   (props changed)
    lucene/dev/branches/branch_4x/solr/CHANGES.txt   (contents, props changed)

Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellDictionary.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellDictionary.java?rev=1499166&r1=1499165&r2=1499166&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellDictionary.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellDictionary.java Wed Jul  3 00:38:32 2013
@@ -383,12 +383,9 @@ public class HunspellDictionary {
         }
       }
       
-      List<HunspellWord> entries = words.get(entry);
-      if (entries == null) {
-        entries = new ArrayList<HunspellWord>();
-        words.put(entry, entries);
-      }
+      List<HunspellWord> entries = new ArrayList<HunspellWord>();
       entries.add(wordForm);
+      words.put(entry, entries);
     }
   }
 

Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/HunspellDictionaryTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/HunspellDictionaryTest.java?rev=1499166&r1=1499165&r2=1499166&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/HunspellDictionaryTest.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/HunspellDictionaryTest.java Wed Jul  3 00:38:32 2013
@@ -20,7 +20,9 @@ package org.apache.lucene.analysis.hunsp
 import java.io.IOException;
 import java.io.InputStream;
 import java.text.ParseException;
+import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.List;
 
 import org.apache.lucene.util.LuceneTestCase;
 import org.junit.Assert;
@@ -131,12 +133,30 @@ public class HunspellDictionaryTest exte
     assertEquals(3, dictionary.lookupSuffix(new char[]{'e'}, 0, 1).size());
     assertEquals(1, dictionary.lookupPrefix(new char[]{'s'}, 0, 1).size());
     assertEquals(1, dictionary.lookupWord(new char[]{'o', 'l', 'r'}, 0, 3).size());
-    
+    assertEquals("Wrong number of flags for lucen", 1, dictionary.lookupWord(new char[]{'l', 'u', 'c', 'e', 'n'}, 0, 5).get(0).getFlags().length);
+
     affixStream.close();
     dictStream.close();
   }
 
   @Test
+  public void testHunspellDictionary_multipleDictWithOverride() throws IOException, ParseException {
+    InputStream affixStream = getClass().getResourceAsStream("test.aff");
+    List<InputStream> dictStreams = new ArrayList<InputStream>();
+    dictStreams.add(getClass().getResourceAsStream("test.dic"));
+    dictStreams.add(getClass().getResourceAsStream("testOverride.dic"));
+
+    HunspellDictionary dictionary = new HunspellDictionary(affixStream, dictStreams, TEST_VERSION_CURRENT, false);
+    assertEquals("Wrong number of flags for lucen", 3, dictionary.lookupWord(new char[]{'l', 'u', 'c', 'e', 'n'}, 0, 5).get(0).getFlags().length);
+    assertEquals("Wrong number of flags for bar", 1, dictionary.lookupWord(new char[]{'b', 'a', 'r'}, 0, 3).get(0).getFlags().length);
+
+    affixStream.close();
+    for(InputStream dstream : dictStreams) {
+      dstream.close();
+    }
+  }
+
+  @Test
   public void testCompressedHunspellDictionary_loadDicAff() throws IOException, ParseException {
     InputStream affixStream = getClass().getResourceAsStream("testCompressed.aff");
     InputStream dictStream = getClass().getResourceAsStream("testCompressed.dic");

Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/test.dic
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/test.dic?rev=1499166&r1=1499165&r2=1499166&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/test.dic (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/test.dic Wed Jul  3 00:38:32 2013
@@ -1,4 +1,4 @@
-6
+8
 lucen/A
 lucene
 mahout/A

Modified: lucene/dev/branches/branch_4x/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/CHANGES.txt?rev=1499166&r1=1499165&r2=1499166&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/CHANGES.txt (original)
+++ lucene/dev/branches/branch_4x/solr/CHANGES.txt Wed Jul  3 00:38:32 2013
@@ -211,6 +211,8 @@ Bug Fixes
 * SOLR-3369: shards.tolerant=true is broken for group queries
   (Russell Black, Martijn van Groningen, Jabouille jean Charles, Ryan McKinley via shalin)
 
+* SOLR-4452: Hunspell stemmer should not merge duplicate dictionary entries (janhoy)
+
 Optimizations
 ----------------------