You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by mi...@apache.org on 2009/02/27 15:07:12 UTC
svn commit: r748534 - in /lucene/java/trunk/contrib: ./
spellchecker/src/java/org/apache/lucene/search/spell/
spellchecker/src/test/org/apache/lucene/search/spell/
Author: mikemccand
Date: Fri Feb 27 14:07:12 2009
New Revision: 748534
URL: http://svn.apache.org/viewvc?rev=748534&view=rev
Log:
LUCENE-1548: fix distance normalization in LevenshteinDistance to not produce negative distances
Modified:
lucene/java/trunk/contrib/CHANGES.txt
lucene/java/trunk/contrib/spellchecker/src/java/org/apache/lucene/search/spell/LevensteinDistance.java
lucene/java/trunk/contrib/spellchecker/src/test/org/apache/lucene/search/spell/TestLevenshteinDistance.java
lucene/java/trunk/contrib/spellchecker/src/test/org/apache/lucene/search/spell/TestSpellChecker.java
Modified: lucene/java/trunk/contrib/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/CHANGES.txt?rev=748534&r1=748533&r2=748534&view=diff
==============================================================================
--- lucene/java/trunk/contrib/CHANGES.txt (original)
+++ lucene/java/trunk/contrib/CHANGES.txt Fri Feb 27 14:07:12 2009
@@ -25,6 +25,9 @@
4. LUCENE-1514: ShingleMatrixFilter#next(Token) easily throws a StackOverflowException
due to recursive invocation. (Karl Wettin)
+ 5. LUCENE-1548: Fix distance normalization in LevenshteinDistance to
+ not produce negative distances (Thomas Morton via Mike McCandless)
+
New features
1. LUCENE-1470: Added TrieRangeQuery, a much faster implementation of
Modified: lucene/java/trunk/contrib/spellchecker/src/java/org/apache/lucene/search/spell/LevensteinDistance.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/spellchecker/src/java/org/apache/lucene/search/spell/LevensteinDistance.java?rev=748534&r1=748533&r2=748534&view=diff
==============================================================================
--- lucene/java/trunk/contrib/spellchecker/src/java/org/apache/lucene/search/spell/LevensteinDistance.java (original)
+++ lucene/java/trunk/contrib/spellchecker/src/java/org/apache/lucene/search/spell/LevensteinDistance.java Fri Feb 27 14:07:12 2009
@@ -100,7 +100,7 @@
// our last action in the above loop was to switch d and p, so p now
// actually has the most recent cost counts
- return 1.0f - ((float) p[n] / Math.min(other.length(), sa.length));
+ return 1.0f - ((float) p[n] / Math.max(other.length(), sa.length));
}
}
Modified: lucene/java/trunk/contrib/spellchecker/src/test/org/apache/lucene/search/spell/TestLevenshteinDistance.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/spellchecker/src/test/org/apache/lucene/search/spell/TestLevenshteinDistance.java?rev=748534&r1=748533&r2=748534&view=diff
==============================================================================
--- lucene/java/trunk/contrib/spellchecker/src/test/org/apache/lucene/search/spell/TestLevenshteinDistance.java (original)
+++ lucene/java/trunk/contrib/spellchecker/src/test/org/apache/lucene/search/spell/TestLevenshteinDistance.java Fri Feb 27 14:07:12 2009
@@ -25,25 +25,25 @@
public void testGetDistance() {
float d = sd.getDistance("al", "al");
- assertTrue(d == 1.0f);
+ assertEquals(d,1.0f,0.001);
d = sd.getDistance("martha", "marhta");
- assertTrue(d > 0.66 && d <0.67);
+ assertEquals(d,0.6666,0.001);
d = sd.getDistance("jones", "johnson");
- assertTrue(d > 0.199 && d < 0.201);
+ assertEquals(d,0.4285,0.001);
d = sd.getDistance("abcvwxyz", "cabvwxyz");
- assertTrue(d > 0.749 && d < 0.751);
+ assertEquals(d,0.75,0.001);
d = sd.getDistance("dwayne", "duane");
- assertTrue(d > 0.599 && d < 0.601);
+ assertEquals(d,0.666,0.001);
d = sd.getDistance("dixon", "dicksonx");
- assertTrue(d > 0.199 && d < 0.201);
+ assertEquals(d,0.5,0.001);
d = sd.getDistance("six", "ten");
- assertTrue(d == 0f);
+ assertEquals(d,0,0.001);
float d1 = sd.getDistance("zac ephron", "zac efron");
float d2 = sd.getDistance("zac ephron", "kai ephron");
- assertTrue(d1 < d2);
+ assertEquals(d1,d2,0.001);
d1 = sd.getDistance("brittney spears", "britney spears");
d2 = sd.getDistance("brittney spears", "brittney startzman");
- assertTrue(d1 > d2);
+ assertTrue(d1 > d2);
}
}
Modified: lucene/java/trunk/contrib/spellchecker/src/test/org/apache/lucene/search/spell/TestSpellChecker.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/spellchecker/src/test/org/apache/lucene/search/spell/TestSpellChecker.java?rev=748534&r1=748533&r2=748534&view=diff
==============================================================================
--- lucene/java/trunk/contrib/spellchecker/src/test/org/apache/lucene/search/spell/TestSpellChecker.java (original)
+++ lucene/java/trunk/contrib/spellchecker/src/test/org/apache/lucene/search/spell/TestSpellChecker.java Fri Feb 27 14:07:12 2009
@@ -129,19 +129,22 @@
assertEquals(similar[0], "five");
similar = spellChecker.suggestSimilar("ive", 2);
- assertEquals(1, similar.length);
+ assertEquals(2, similar.length);
assertEquals(similar[0], "five");
+ assertEquals(similar[1], "nine");
similar = spellChecker.suggestSimilar("fives", 2);
assertEquals(1, similar.length);
assertEquals(similar[0], "five");
similar = spellChecker.suggestSimilar("fie", 2);
- assertEquals(1, similar.length);
+ assertEquals(2, similar.length);
assertEquals(similar[0], "five");
-
+ assertEquals(similar[1], "nine");
+
similar = spellChecker.suggestSimilar("fi", 2);
- assertEquals(0, similar.length);
+ assertEquals(1, similar.length);
+ assertEquals(similar[0], "five");
// test restraint to a field
similar = spellChecker.suggestSimilar("tousand", 10, r, "field1", false);
@@ -151,8 +154,9 @@
assertEquals(1, similar.length); // there is the term thousand in the field field2
similar = spellChecker.suggestSimilar("onety", 2);
- assertEquals(1, similar.length);
+ assertEquals(2, similar.length);
assertEquals(similar[0], "ninety");
+ assertEquals(similar[1], "one");
try {
similar = spellChecker.suggestSimilar("tousand", 10, r, null, false);
} catch (NullPointerException e) {