You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2012/08/03 15:13:33 UTC

svn commit: r1368933 - in /lucene/dev/branches/branch_4x: ./ lucene/ lucene/core/ lucene/core/src/java/org/apache/lucene/search/ lucene/core/src/test/org/apache/lucene/search/ lucene/sandbox/ lucene/sandbox/src/test/org/apache/lucene/sandbox/queries/

Author: rmuir
Date: Fri Aug  3 13:13:33 2012
New Revision: 1368933

URL: http://svn.apache.org/viewvc?rev=1368933&view=rev
Log:
LUCENE-4282: Automaton FuzzyQuery didnt always deliver all results

Modified:
    lucene/dev/branches/branch_4x/   (props changed)
    lucene/dev/branches/branch_4x/lucene/   (props changed)
    lucene/dev/branches/branch_4x/lucene/CHANGES.txt   (contents, props changed)
    lucene/dev/branches/branch_4x/lucene/core/   (props changed)
    lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/FuzzyTermsEnum.java
    lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/search/TestFuzzyQuery.java
    lucene/dev/branches/branch_4x/lucene/sandbox/   (props changed)
    lucene/dev/branches/branch_4x/lucene/sandbox/src/test/org/apache/lucene/sandbox/queries/TestSlowFuzzyQuery.java

Modified: lucene/dev/branches/branch_4x/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/CHANGES.txt?rev=1368933&r1=1368932&r2=1368933&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/branch_4x/lucene/CHANGES.txt Fri Aug  3 13:13:33 2012
@@ -156,6 +156,9 @@ Bug Fixes
   instance are already checked out and queued up but not yet flushed. 
   (Simon Willnauer)
 
+* LUCENE-4282: Automaton FuzzyQuery didnt always deliver all results.
+  (Johannes Christen, Uwe Schindler, Robert Muir)
+
 Changes in Runtime Behavior
 
 * LUCENE-4109: Enable position increments in the flexible queryparser by default.

Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/FuzzyTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/FuzzyTermsEnum.java?rev=1368933&r1=1368932&r2=1368933&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/FuzzyTermsEnum.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/FuzzyTermsEnum.java Fri Aug  3 13:13:33 2012
@@ -122,7 +122,7 @@ public class FuzzyTermsEnum extends Term
     this.realPrefixLength = prefixLength > termLength ? termLength : prefixLength;
     // if minSimilarity >= 1, we treat it as number of edits
     if (minSimilarity >= 1f) {
-      this.minSimilarity = 1 - (minSimilarity+1) / this.termLength;
+      this.minSimilarity = 0; // just driven by number of edits
       maxEdits = (int) minSimilarity;
       raw = true;
     } else {

Modified: lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/search/TestFuzzyQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/search/TestFuzzyQuery.java?rev=1368933&r1=1368932&r2=1368933&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/search/TestFuzzyQuery.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/search/TestFuzzyQuery.java Fri Aug  3 13:13:33 2012
@@ -22,6 +22,7 @@ import java.util.Arrays;
 import java.io.IOException;
 
 import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.index.IndexReader;
@@ -189,6 +190,41 @@ public class TestFuzzyQuery extends Luce
     directory.close();
   }
   
+  public void test2() throws Exception {
+    Directory directory = newDirectory();
+    RandomIndexWriter writer = new RandomIndexWriter(random(), directory, new MockAnalyzer(random(), MockTokenizer.KEYWORD, false));
+    addDoc("LANGE", writer);
+    addDoc("LUETH", writer);
+    addDoc("PIRSING", writer);
+    addDoc("RIEGEL", writer);
+    addDoc("TRZECZIAK", writer);
+    addDoc("WALKER", writer);
+    addDoc("WBR", writer);
+    addDoc("WE", writer);
+    addDoc("WEB", writer);
+    addDoc("WEBE", writer);
+    addDoc("WEBER", writer);
+    addDoc("WEBERE", writer);
+    addDoc("WEBREE", writer);
+    addDoc("WEBEREI", writer);
+    addDoc("WBRE", writer);
+    addDoc("WITTKOPF", writer);
+    addDoc("WOJNAROWSKI", writer);
+    addDoc("WRICKE", writer);
+
+    IndexReader reader = writer.getReader();
+    IndexSearcher searcher = newSearcher(reader);
+    writer.close();
+
+    FuzzyQuery query = new FuzzyQuery(new Term("field", "WEBER"), 2, 1);
+    //query.setRewriteMethod(FuzzyQuery.SCORING_BOOLEAN_QUERY_REWRITE);
+    ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
+    assertEquals(8, hits.length);
+
+    reader.close();
+    directory.close();
+  }
+  
   /** 
    * MultiTermQuery provides (via attribute) information about which values
    * must be competitive to enter the priority queue. 

Modified: lucene/dev/branches/branch_4x/lucene/sandbox/src/test/org/apache/lucene/sandbox/queries/TestSlowFuzzyQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/sandbox/src/test/org/apache/lucene/sandbox/queries/TestSlowFuzzyQuery.java?rev=1368933&r1=1368932&r2=1368933&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/sandbox/src/test/org/apache/lucene/sandbox/queries/TestSlowFuzzyQuery.java (original)
+++ lucene/dev/branches/branch_4x/lucene/sandbox/src/test/org/apache/lucene/sandbox/queries/TestSlowFuzzyQuery.java Fri Aug  3 13:13:33 2012
@@ -440,21 +440,25 @@ public class TestSlowFuzzyQuery extends 
     assertEquals(1, hits.length);
     assertEquals("foobar", searcher.doc(hits[0].doc).get("field"));
     
-    q = new SlowFuzzyQuery(new Term("field", "t"), 3);
-    hits = searcher.search(q, 10).scoreDocs;
-    assertEquals(1, hits.length);
-    assertEquals("test", searcher.doc(hits[0].doc).get("field"));
+    // TODO: cannot really be supported given the legacy scoring
+    // system which scores negative, if the distance > min term len,
+    // so such matches were always impossible with lucene 3.x, etc
+    //
+    //q = new SlowFuzzyQuery(new Term("field", "t"), 3);
+    //hits = searcher.search(q, 10).scoreDocs;
+    //assertEquals(1, hits.length);
+    //assertEquals("test", searcher.doc(hits[0].doc).get("field"));
     
-    q = new SlowFuzzyQuery(new Term("field", "a"), 4f, 0, 50);
-    hits = searcher.search(q, 10).scoreDocs;
-    assertEquals(1, hits.length);
-    assertEquals("test", searcher.doc(hits[0].doc).get("field"));
+    // q = new SlowFuzzyQuery(new Term("field", "a"), 4f, 0, 50);
+    // hits = searcher.search(q, 10).scoreDocs;
+    // assertEquals(1, hits.length);
+    // assertEquals("test", searcher.doc(hits[0].doc).get("field"));
     
-    q = new SlowFuzzyQuery(new Term("field", "a"), 6f, 0, 50);
-    hits = searcher.search(q, 10).scoreDocs;
-    assertEquals(2, hits.length);
-    assertEquals("test", searcher.doc(hits[0].doc).get("field"));
-    assertEquals("foobar", searcher.doc(hits[1].doc).get("field"));
+    // q = new SlowFuzzyQuery(new Term("field", "a"), 6f, 0, 50);
+    // hits = searcher.search(q, 10).scoreDocs;
+    // assertEquals(2, hits.length);
+    // assertEquals("test", searcher.doc(hits[0].doc).get("field"));
+    // assertEquals("foobar", searcher.doc(hits[1].doc).get("field"));
     
     reader.close();
     index.close();