You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2014/07/11 18:29:46 UTC
svn commit: r1609738 - in /lucene/dev/trunk/lucene: ./
analysis/common/src/java/org/apache/lucene/analysis/hunspell/
analysis/common/src/test/org/apache/lucene/analysis/hunspell/
Author: rmuir
Date: Fri Jul 11 16:29:46 2014
New Revision: 1609738
URL: http://svn.apache.org/r1609738
Log:
Fix hunspell zero-string overgeneration
Added:
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestStrangeOvergeneration.java (with props)
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/strange-overgeneration.aff (with props)
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/strange-overgeneration.dic (with props)
Modified:
lucene/dev/trunk/lucene/CHANGES.txt
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java
Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1609738&r1=1609737&r2=1609738&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Fri Jul 11 16:29:46 2014
@@ -159,6 +159,9 @@ Bug Fixes
* LUCENE-5817: Fix hunspell zero-affix handling: previously only zero-strips worked
correctly. (Robert Muir)
+* LUCENE-5818: Fix hunspell overgeneration for short strings that also match affixes.
+ (Robert Muir)
+
Test Framework
* LUCENE-5786: Unflushed/ truncated events file (hung testing subprocess).
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java?rev=1609738&r1=1609737&r2=1609738&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java Fri Jul 11 16:29:46 2014
@@ -218,7 +218,7 @@ final class Stemmer {
fst.getFirstArc(arc);
IntsRef NO_OUTPUT = outputs.getNoOutput();
IntsRef output = NO_OUTPUT;
- for (int i = 0; i < length; i++) {
+ for (int i = 0; i < length-1; i++) {
if (i > 0) {
int ch = word[i-1];
if (fst.findTargetArc(ch, arc, arc, bytesReader) == null) {
@@ -292,7 +292,7 @@ final class Stemmer {
fst.getFirstArc(arc);
IntsRef NO_OUTPUT = outputs.getNoOutput();
IntsRef output = NO_OUTPUT;
- for (int i = length; i >= 0; i--) {
+ for (int i = length; i > 0; i--) {
if (i < length) {
int ch = word[i];
if (fst.findTargetArc(ch, arc, arc, bytesReader) == null) {
Added: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestStrangeOvergeneration.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestStrangeOvergeneration.java?rev=1609738&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestStrangeOvergeneration.java (added)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestStrangeOvergeneration.java Fri Jul 11 16:29:46 2014
@@ -0,0 +1,34 @@
+package org.apache.lucene.analysis.hunspell;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.junit.BeforeClass;
+
+public class TestStrangeOvergeneration extends StemmerTestBase {
+ @BeforeClass
+ public static void beforeClass() throws Exception {
+ init("strange-overgeneration.aff", "strange-overgeneration.dic");
+ }
+
+ public void testStemming() {
+ assertStemsTo("btasty", "beer");
+ assertStemsTo("tasty");
+ assertStemsTo("yuck");
+ assertStemsTo("foo");
+ }
+}
Added: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/strange-overgeneration.aff
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/strange-overgeneration.aff?rev=1609738&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/strange-overgeneration.aff (added)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/strange-overgeneration.aff Fri Jul 11 16:29:46 2014
@@ -0,0 +1,10 @@
+SET UTF-8
+
+SFX A Y 1
+SFX A baz yuck baz
+
+SFX B Y 1
+SFX B bar foo .
+
+SFX C Y 1
+SFX C eer tasty .
Added: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/strange-overgeneration.dic
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/strange-overgeneration.dic?rev=1609738&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/strange-overgeneration.dic (added)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/strange-overgeneration.dic Fri Jul 11 16:29:46 2014
@@ -0,0 +1,5 @@
+3
+baz/A
+bar/B
+beer/C
+eer/C