You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by mi...@apache.org on 2009/02/27 15:12:22 UTC

svn commit: r748535 - in /lucene/java/branches/lucene_2_4/contrib: ./ spellchecker/src/java/org/apache/lucene/search/spell/ spellchecker/src/test/org/apache/lucene/search/spell/

Author: mikemccand
Date: Fri Feb 27 14:12:21 2009
New Revision: 748535

URL: http://svn.apache.org/viewvc?rev=748535&view=rev
Log:
LUCENE-1548 (on 2.4 branch): fix distance normalization in LevenshteinDistance to not produce negative distances

Modified:
    lucene/java/branches/lucene_2_4/contrib/CHANGES.txt
    lucene/java/branches/lucene_2_4/contrib/spellchecker/src/java/org/apache/lucene/search/spell/LevensteinDistance.java
    lucene/java/branches/lucene_2_4/contrib/spellchecker/src/test/org/apache/lucene/search/spell/TestLevenshteinDistance.java
    lucene/java/branches/lucene_2_4/contrib/spellchecker/src/test/org/apache/lucene/search/spell/TestSpellChecker.java

Modified: lucene/java/branches/lucene_2_4/contrib/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/branches/lucene_2_4/contrib/CHANGES.txt?rev=748535&r1=748534&r2=748535&view=diff
==============================================================================
--- lucene/java/branches/lucene_2_4/contrib/CHANGES.txt (original)
+++ lucene/java/branches/lucene_2_4/contrib/CHANGES.txt Fri Feb 27 14:12:21 2009
@@ -4,6 +4,9 @@
 
 Bug fixes
 
+ 1. LUCENE-1548: Fix distance normalization in LevenshteinDistance to
+    not produce negative distances (Thomas Morton via Mike McCandless)
+
 ======================= Release 2.4.0 2008-10-06 =======================
 
 Changes in runtime behavior

Modified: lucene/java/branches/lucene_2_4/contrib/spellchecker/src/java/org/apache/lucene/search/spell/LevensteinDistance.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/lucene_2_4/contrib/spellchecker/src/java/org/apache/lucene/search/spell/LevensteinDistance.java?rev=748535&r1=748534&r2=748535&view=diff
==============================================================================
--- lucene/java/branches/lucene_2_4/contrib/spellchecker/src/java/org/apache/lucene/search/spell/LevensteinDistance.java (original)
+++ lucene/java/branches/lucene_2_4/contrib/spellchecker/src/java/org/apache/lucene/search/spell/LevensteinDistance.java Fri Feb 27 14:12:21 2009
@@ -100,7 +100,7 @@
 
         // our last action in the above loop was to switch d and p, so p now
         // actually has the most recent cost counts
-        return 1.0f - ((float) p[n] / Math.min(other.length(), sa.length));
+        return 1.0f - ((float) p[n] / Math.max(other.length(), sa.length));
     }
 
 }

Modified: lucene/java/branches/lucene_2_4/contrib/spellchecker/src/test/org/apache/lucene/search/spell/TestLevenshteinDistance.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/lucene_2_4/contrib/spellchecker/src/test/org/apache/lucene/search/spell/TestLevenshteinDistance.java?rev=748535&r1=748534&r2=748535&view=diff
==============================================================================
--- lucene/java/branches/lucene_2_4/contrib/spellchecker/src/test/org/apache/lucene/search/spell/TestLevenshteinDistance.java (original)
+++ lucene/java/branches/lucene_2_4/contrib/spellchecker/src/test/org/apache/lucene/search/spell/TestLevenshteinDistance.java Fri Feb 27 14:12:21 2009
@@ -25,25 +25,25 @@
   
   public void testGetDistance() {
     float d = sd.getDistance("al", "al");
-    assertTrue(d == 1.0f);
+    assertEquals(d,1.0f,0.001);
     d = sd.getDistance("martha", "marhta");
-    assertTrue(d > 0.66 && d <0.67);
+    assertEquals(d,0.6666,0.001);
     d = sd.getDistance("jones", "johnson");
-    assertTrue(d > 0.199 && d < 0.201);
+    assertEquals(d,0.4285,0.001);
     d = sd.getDistance("abcvwxyz", "cabvwxyz");
-    assertTrue(d > 0.749 && d < 0.751);
+    assertEquals(d,0.75,0.001);    
     d = sd.getDistance("dwayne", "duane");
-    assertTrue(d > 0.599 && d < 0.601);
+    assertEquals(d,0.666,0.001);
     d = sd.getDistance("dixon", "dicksonx");
-    assertTrue(d > 0.199 && d < 0.201);
+    assertEquals(d,0.5,0.001);
     d = sd.getDistance("six", "ten");
-    assertTrue(d == 0f);
+    assertEquals(d,0,0.001);
     float d1 = sd.getDistance("zac ephron", "zac efron");
     float d2 = sd.getDistance("zac ephron", "kai ephron");
-    assertTrue(d1 < d2);
+    assertEquals(d1,d2,0.001);
     d1 = sd.getDistance("brittney spears", "britney spears");
     d2 = sd.getDistance("brittney spears", "brittney startzman");
-    assertTrue(d1 > d2);    
+    assertTrue(d1 > d2);
   }
 
 }

Modified: lucene/java/branches/lucene_2_4/contrib/spellchecker/src/test/org/apache/lucene/search/spell/TestSpellChecker.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/lucene_2_4/contrib/spellchecker/src/test/org/apache/lucene/search/spell/TestSpellChecker.java?rev=748535&r1=748534&r2=748535&view=diff
==============================================================================
--- lucene/java/branches/lucene_2_4/contrib/spellchecker/src/test/org/apache/lucene/search/spell/TestSpellChecker.java (original)
+++ lucene/java/branches/lucene_2_4/contrib/spellchecker/src/test/org/apache/lucene/search/spell/TestSpellChecker.java Fri Feb 27 14:12:21 2009
@@ -129,19 +129,22 @@
     assertEquals(similar[0], "five");
 
     similar = spellChecker.suggestSimilar("ive", 2);
-    assertEquals(1, similar.length);
+    assertEquals(2, similar.length);
     assertEquals(similar[0], "five");
+    assertEquals(similar[1], "nine");
 
     similar = spellChecker.suggestSimilar("fives", 2);
     assertEquals(1, similar.length);
     assertEquals(similar[0], "five");
 
     similar = spellChecker.suggestSimilar("fie", 2);
-    assertEquals(1, similar.length);
+    assertEquals(2, similar.length);
     assertEquals(similar[0], "five");
-
+    assertEquals(similar[1], "nine");
+    
     similar = spellChecker.suggestSimilar("fi", 2);
-    assertEquals(0, similar.length);
+    assertEquals(1, similar.length);
+    assertEquals(similar[0], "five");
 
     // test restraint to a field
     similar = spellChecker.suggestSimilar("tousand", 10, r, "field1", false);
@@ -151,8 +154,9 @@
     assertEquals(1, similar.length); // there is the term thousand in the field field2
     
     similar = spellChecker.suggestSimilar("onety", 2);
-    assertEquals(1, similar.length);
+    assertEquals(2, similar.length);
     assertEquals(similar[0], "ninety");
+    assertEquals(similar[1], "one");
     try {
       similar = spellChecker.suggestSimilar("tousand", 10, r, null, false);
     } catch (NullPointerException e) {