You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2014/07/11 18:29:46 UTC

svn commit: r1609738 - in /lucene/dev/trunk/lucene: ./ analysis/common/src/java/org/apache/lucene/analysis/hunspell/ analysis/common/src/test/org/apache/lucene/analysis/hunspell/

Author: rmuir
Date: Fri Jul 11 16:29:46 2014
New Revision: 1609738

URL: http://svn.apache.org/r1609738
Log:
Fix hunspell zero-string overgeneration

Added:
    lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestStrangeOvergeneration.java   (with props)
    lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/strange-overgeneration.aff   (with props)
    lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/strange-overgeneration.dic   (with props)
Modified:
    lucene/dev/trunk/lucene/CHANGES.txt
    lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java

Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1609738&r1=1609737&r2=1609738&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Fri Jul 11 16:29:46 2014
@@ -159,6 +159,9 @@ Bug Fixes
 * LUCENE-5817: Fix hunspell zero-affix handling: previously only zero-strips worked
   correctly.  (Robert Muir)
 
+* LUCENE-5818: Fix hunspell overgeneration for short strings that also match affixes.
+  (Robert Muir)
+
 Test Framework
 
 * LUCENE-5786: Unflushed/ truncated events file (hung testing subprocess).

Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java?rev=1609738&r1=1609737&r2=1609738&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java Fri Jul 11 16:29:46 2014
@@ -218,7 +218,7 @@ final class Stemmer {
       fst.getFirstArc(arc);
       IntsRef NO_OUTPUT = outputs.getNoOutput();
       IntsRef output = NO_OUTPUT;
-      for (int i = 0; i < length; i++) {
+      for (int i = 0; i < length-1; i++) {
         if (i > 0) {
           int ch = word[i-1];
           if (fst.findTargetArc(ch, arc, arc, bytesReader) == null) {
@@ -292,7 +292,7 @@ final class Stemmer {
       fst.getFirstArc(arc);
       IntsRef NO_OUTPUT = outputs.getNoOutput();
       IntsRef output = NO_OUTPUT;
-      for (int i = length; i >= 0; i--) {
+      for (int i = length; i > 0; i--) {
         if (i < length) {
           int ch = word[i];
           if (fst.findTargetArc(ch, arc, arc, bytesReader) == null) {

Added: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestStrangeOvergeneration.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestStrangeOvergeneration.java?rev=1609738&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestStrangeOvergeneration.java (added)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestStrangeOvergeneration.java Fri Jul 11 16:29:46 2014
@@ -0,0 +1,34 @@
+package org.apache.lucene.analysis.hunspell;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.junit.BeforeClass;
+
+public class TestStrangeOvergeneration extends StemmerTestBase {
+  @BeforeClass
+  public static void beforeClass() throws Exception {
+    init("strange-overgeneration.aff", "strange-overgeneration.dic");
+  }
+  
+  public void testStemming() {
+    assertStemsTo("btasty", "beer");
+    assertStemsTo("tasty");
+    assertStemsTo("yuck");
+    assertStemsTo("foo");
+  }
+}

Added: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/strange-overgeneration.aff
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/strange-overgeneration.aff?rev=1609738&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/strange-overgeneration.aff (added)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/strange-overgeneration.aff Fri Jul 11 16:29:46 2014
@@ -0,0 +1,10 @@
+SET UTF-8
+
+SFX A Y 1
+SFX A   baz        yuck         baz
+
+SFX B Y 1
+SFX B   bar        foo    .
+
+SFX C Y 1
+SFX C   eer        tasty  .

Added: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/strange-overgeneration.dic
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/strange-overgeneration.dic?rev=1609738&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/strange-overgeneration.dic (added)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/strange-overgeneration.dic Fri Jul 11 16:29:46 2014
@@ -0,0 +1,5 @@
+3
+baz/A
+bar/B
+beer/C
+eer/C