You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2012/08/03 15:13:33 UTC
svn commit: r1368933 - in /lucene/dev/branches/branch_4x: ./ lucene/
lucene/core/ lucene/core/src/java/org/apache/lucene/search/
lucene/core/src/test/org/apache/lucene/search/ lucene/sandbox/
lucene/sandbox/src/test/org/apache/lucene/sandbox/queries/
Author: rmuir
Date: Fri Aug 3 13:13:33 2012
New Revision: 1368933
URL: http://svn.apache.org/viewvc?rev=1368933&view=rev
Log:
LUCENE-4282: Automaton FuzzyQuery didnt always deliver all results
Modified:
lucene/dev/branches/branch_4x/ (props changed)
lucene/dev/branches/branch_4x/lucene/ (props changed)
lucene/dev/branches/branch_4x/lucene/CHANGES.txt (contents, props changed)
lucene/dev/branches/branch_4x/lucene/core/ (props changed)
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/FuzzyTermsEnum.java
lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/search/TestFuzzyQuery.java
lucene/dev/branches/branch_4x/lucene/sandbox/ (props changed)
lucene/dev/branches/branch_4x/lucene/sandbox/src/test/org/apache/lucene/sandbox/queries/TestSlowFuzzyQuery.java
Modified: lucene/dev/branches/branch_4x/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/CHANGES.txt?rev=1368933&r1=1368932&r2=1368933&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/branch_4x/lucene/CHANGES.txt Fri Aug 3 13:13:33 2012
@@ -156,6 +156,9 @@ Bug Fixes
instance are already checked out and queued up but not yet flushed.
(Simon Willnauer)
+* LUCENE-4282: Automaton FuzzyQuery didnt always deliver all results.
+ (Johannes Christen, Uwe Schindler, Robert Muir)
+
Changes in Runtime Behavior
* LUCENE-4109: Enable position increments in the flexible queryparser by default.
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/FuzzyTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/FuzzyTermsEnum.java?rev=1368933&r1=1368932&r2=1368933&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/FuzzyTermsEnum.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/FuzzyTermsEnum.java Fri Aug 3 13:13:33 2012
@@ -122,7 +122,7 @@ public class FuzzyTermsEnum extends Term
this.realPrefixLength = prefixLength > termLength ? termLength : prefixLength;
// if minSimilarity >= 1, we treat it as number of edits
if (minSimilarity >= 1f) {
- this.minSimilarity = 1 - (minSimilarity+1) / this.termLength;
+ this.minSimilarity = 0; // just driven by number of edits
maxEdits = (int) minSimilarity;
raw = true;
} else {
Modified: lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/search/TestFuzzyQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/search/TestFuzzyQuery.java?rev=1368933&r1=1368932&r2=1368933&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/search/TestFuzzyQuery.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/search/TestFuzzyQuery.java Fri Aug 3 13:13:33 2012
@@ -22,6 +22,7 @@ import java.util.Arrays;
import java.io.IOException;
import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
@@ -189,6 +190,41 @@ public class TestFuzzyQuery extends Luce
directory.close();
}
+ public void test2() throws Exception {
+ Directory directory = newDirectory();
+ RandomIndexWriter writer = new RandomIndexWriter(random(), directory, new MockAnalyzer(random(), MockTokenizer.KEYWORD, false));
+ addDoc("LANGE", writer);
+ addDoc("LUETH", writer);
+ addDoc("PIRSING", writer);
+ addDoc("RIEGEL", writer);
+ addDoc("TRZECZIAK", writer);
+ addDoc("WALKER", writer);
+ addDoc("WBR", writer);
+ addDoc("WE", writer);
+ addDoc("WEB", writer);
+ addDoc("WEBE", writer);
+ addDoc("WEBER", writer);
+ addDoc("WEBERE", writer);
+ addDoc("WEBREE", writer);
+ addDoc("WEBEREI", writer);
+ addDoc("WBRE", writer);
+ addDoc("WITTKOPF", writer);
+ addDoc("WOJNAROWSKI", writer);
+ addDoc("WRICKE", writer);
+
+ IndexReader reader = writer.getReader();
+ IndexSearcher searcher = newSearcher(reader);
+ writer.close();
+
+ FuzzyQuery query = new FuzzyQuery(new Term("field", "WEBER"), 2, 1);
+ //query.setRewriteMethod(FuzzyQuery.SCORING_BOOLEAN_QUERY_REWRITE);
+ ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
+ assertEquals(8, hits.length);
+
+ reader.close();
+ directory.close();
+ }
+
/**
* MultiTermQuery provides (via attribute) information about which values
* must be competitive to enter the priority queue.
Modified: lucene/dev/branches/branch_4x/lucene/sandbox/src/test/org/apache/lucene/sandbox/queries/TestSlowFuzzyQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/sandbox/src/test/org/apache/lucene/sandbox/queries/TestSlowFuzzyQuery.java?rev=1368933&r1=1368932&r2=1368933&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/sandbox/src/test/org/apache/lucene/sandbox/queries/TestSlowFuzzyQuery.java (original)
+++ lucene/dev/branches/branch_4x/lucene/sandbox/src/test/org/apache/lucene/sandbox/queries/TestSlowFuzzyQuery.java Fri Aug 3 13:13:33 2012
@@ -440,21 +440,25 @@ public class TestSlowFuzzyQuery extends
assertEquals(1, hits.length);
assertEquals("foobar", searcher.doc(hits[0].doc).get("field"));
- q = new SlowFuzzyQuery(new Term("field", "t"), 3);
- hits = searcher.search(q, 10).scoreDocs;
- assertEquals(1, hits.length);
- assertEquals("test", searcher.doc(hits[0].doc).get("field"));
+ // TODO: cannot really be supported given the legacy scoring
+ // system which scores negative, if the distance > min term len,
+ // so such matches were always impossible with lucene 3.x, etc
+ //
+ //q = new SlowFuzzyQuery(new Term("field", "t"), 3);
+ //hits = searcher.search(q, 10).scoreDocs;
+ //assertEquals(1, hits.length);
+ //assertEquals("test", searcher.doc(hits[0].doc).get("field"));
- q = new SlowFuzzyQuery(new Term("field", "a"), 4f, 0, 50);
- hits = searcher.search(q, 10).scoreDocs;
- assertEquals(1, hits.length);
- assertEquals("test", searcher.doc(hits[0].doc).get("field"));
+ // q = new SlowFuzzyQuery(new Term("field", "a"), 4f, 0, 50);
+ // hits = searcher.search(q, 10).scoreDocs;
+ // assertEquals(1, hits.length);
+ // assertEquals("test", searcher.doc(hits[0].doc).get("field"));
- q = new SlowFuzzyQuery(new Term("field", "a"), 6f, 0, 50);
- hits = searcher.search(q, 10).scoreDocs;
- assertEquals(2, hits.length);
- assertEquals("test", searcher.doc(hits[0].doc).get("field"));
- assertEquals("foobar", searcher.doc(hits[1].doc).get("field"));
+ // q = new SlowFuzzyQuery(new Term("field", "a"), 6f, 0, 50);
+ // hits = searcher.search(q, 10).scoreDocs;
+ // assertEquals(2, hits.length);
+ // assertEquals("test", searcher.doc(hits[0].doc).get("field"));
+ // assertEquals("foobar", searcher.doc(hits[1].doc).get("field"));
reader.close();
index.close();