You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by mi...@apache.org on 2009/02/27 15:07:12 UTC

svn commit: r748534 - in /lucene/java/trunk/contrib: ./ spellchecker/src/java/org/apache/lucene/search/spell/ spellchecker/src/test/org/apache/lucene/search/spell/

Author: mikemccand
Date: Fri Feb 27 14:07:12 2009
New Revision: 748534

URL: http://svn.apache.org/viewvc?rev=748534&view=rev
Log:
LUCENE-1548: fix distance normalization in LevenshteinDistance to not produce negative distances

Modified:
    lucene/java/trunk/contrib/CHANGES.txt
    lucene/java/trunk/contrib/spellchecker/src/java/org/apache/lucene/search/spell/LevensteinDistance.java
    lucene/java/trunk/contrib/spellchecker/src/test/org/apache/lucene/search/spell/TestLevenshteinDistance.java
    lucene/java/trunk/contrib/spellchecker/src/test/org/apache/lucene/search/spell/TestSpellChecker.java

Modified: lucene/java/trunk/contrib/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/CHANGES.txt?rev=748534&r1=748533&r2=748534&view=diff
==============================================================================
--- lucene/java/trunk/contrib/CHANGES.txt (original)
+++ lucene/java/trunk/contrib/CHANGES.txt Fri Feb 27 14:07:12 2009
@@ -25,6 +25,9 @@
  4. LUCENE-1514: ShingleMatrixFilter#next(Token) easily throws a StackOverflowException
     due to recursive invocation. (Karl Wettin)
 
+ 5. LUCENE-1548: Fix distance normalization in LevenshteinDistance to
+    not produce negative distances (Thomas Morton via Mike McCandless)
+
 New features
 
  1. LUCENE-1470: Added TrieRangeQuery, a much faster implementation of

Modified: lucene/java/trunk/contrib/spellchecker/src/java/org/apache/lucene/search/spell/LevensteinDistance.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/spellchecker/src/java/org/apache/lucene/search/spell/LevensteinDistance.java?rev=748534&r1=748533&r2=748534&view=diff
==============================================================================
--- lucene/java/trunk/contrib/spellchecker/src/java/org/apache/lucene/search/spell/LevensteinDistance.java (original)
+++ lucene/java/trunk/contrib/spellchecker/src/java/org/apache/lucene/search/spell/LevensteinDistance.java Fri Feb 27 14:07:12 2009
@@ -100,7 +100,7 @@
 
         // our last action in the above loop was to switch d and p, so p now
         // actually has the most recent cost counts
-        return 1.0f - ((float) p[n] / Math.min(other.length(), sa.length));
+        return 1.0f - ((float) p[n] / Math.max(other.length(), sa.length));
     }
 
 }

Modified: lucene/java/trunk/contrib/spellchecker/src/test/org/apache/lucene/search/spell/TestLevenshteinDistance.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/spellchecker/src/test/org/apache/lucene/search/spell/TestLevenshteinDistance.java?rev=748534&r1=748533&r2=748534&view=diff
==============================================================================
--- lucene/java/trunk/contrib/spellchecker/src/test/org/apache/lucene/search/spell/TestLevenshteinDistance.java (original)
+++ lucene/java/trunk/contrib/spellchecker/src/test/org/apache/lucene/search/spell/TestLevenshteinDistance.java Fri Feb 27 14:07:12 2009
@@ -25,25 +25,25 @@
   
   public void testGetDistance() {
     float d = sd.getDistance("al", "al");
-    assertTrue(d == 1.0f);
+    assertEquals(d,1.0f,0.001);
     d = sd.getDistance("martha", "marhta");
-    assertTrue(d > 0.66 && d <0.67);
+    assertEquals(d,0.6666,0.001);
     d = sd.getDistance("jones", "johnson");
-    assertTrue(d > 0.199 && d < 0.201);
+    assertEquals(d,0.4285,0.001);
     d = sd.getDistance("abcvwxyz", "cabvwxyz");
-    assertTrue(d > 0.749 && d < 0.751);
+    assertEquals(d,0.75,0.001);    
     d = sd.getDistance("dwayne", "duane");
-    assertTrue(d > 0.599 && d < 0.601);
+    assertEquals(d,0.666,0.001);
     d = sd.getDistance("dixon", "dicksonx");
-    assertTrue(d > 0.199 && d < 0.201);
+    assertEquals(d,0.5,0.001);
     d = sd.getDistance("six", "ten");
-    assertTrue(d == 0f);
+    assertEquals(d,0,0.001);
     float d1 = sd.getDistance("zac ephron", "zac efron");
     float d2 = sd.getDistance("zac ephron", "kai ephron");
-    assertTrue(d1 < d2);
+    assertEquals(d1,d2,0.001);
     d1 = sd.getDistance("brittney spears", "britney spears");
     d2 = sd.getDistance("brittney spears", "brittney startzman");
-    assertTrue(d1 > d2);    
+    assertTrue(d1 > d2);
   }
 
 }

Modified: lucene/java/trunk/contrib/spellchecker/src/test/org/apache/lucene/search/spell/TestSpellChecker.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/spellchecker/src/test/org/apache/lucene/search/spell/TestSpellChecker.java?rev=748534&r1=748533&r2=748534&view=diff
==============================================================================
--- lucene/java/trunk/contrib/spellchecker/src/test/org/apache/lucene/search/spell/TestSpellChecker.java (original)
+++ lucene/java/trunk/contrib/spellchecker/src/test/org/apache/lucene/search/spell/TestSpellChecker.java Fri Feb 27 14:07:12 2009
@@ -129,19 +129,22 @@
     assertEquals(similar[0], "five");
 
     similar = spellChecker.suggestSimilar("ive", 2);
-    assertEquals(1, similar.length);
+    assertEquals(2, similar.length);
     assertEquals(similar[0], "five");
+    assertEquals(similar[1], "nine");
 
     similar = spellChecker.suggestSimilar("fives", 2);
     assertEquals(1, similar.length);
     assertEquals(similar[0], "five");
 
     similar = spellChecker.suggestSimilar("fie", 2);
-    assertEquals(1, similar.length);
+    assertEquals(2, similar.length);
     assertEquals(similar[0], "five");
-
+    assertEquals(similar[1], "nine");
+    
     similar = spellChecker.suggestSimilar("fi", 2);
-    assertEquals(0, similar.length);
+    assertEquals(1, similar.length);
+    assertEquals(similar[0], "five");
 
     // test restraint to a field
     similar = spellChecker.suggestSimilar("tousand", 10, r, "field1", false);
@@ -151,8 +154,9 @@
     assertEquals(1, similar.length); // there is the term thousand in the field field2
     
     similar = spellChecker.suggestSimilar("onety", 2);
-    assertEquals(1, similar.length);
+    assertEquals(2, similar.length);
     assertEquals(similar[0], "ninety");
+    assertEquals(similar[1], "one");
     try {
       similar = spellChecker.suggestSimilar("tousand", 10, r, null, false);
     } catch (NullPointerException e) {