You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2016/09/15 19:45:54 UTC
lucene-solr:branch_6x: LUCENE-7439: FuzzyQuery now matches all terms
within the specified edit distance, even if they are short
Repository: lucene-solr
Updated Branches:
refs/heads/branch_6x bd9962aba -> 471f90cf8
LUCENE-7439: FuzzyQuery now matches all terms within the specified edit distance, even if they are short
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/471f90cf
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/471f90cf
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/471f90cf
Branch: refs/heads/branch_6x
Commit: 471f90cf825ee3106fef1fa4c1094d0ca461e7fb
Parents: bd9962a
Author: Mike McCandless <mi...@apache.org>
Authored: Thu Sep 15 15:45:41 2016 -0400
Committer: Mike McCandless <mi...@apache.org>
Committed: Thu Sep 15 15:45:41 2016 -0400
----------------------------------------------------------------------
lucene/CHANGES.txt | 3 +++
.../org/apache/lucene/search/FuzzyTermsEnum.java | 2 +-
.../apache/lucene/search/TopTermsRewrite.java | 4 +++-
.../lucene/search/FuzzyTermOnShortTermsTest.java | 15 ++++++++-------
.../org/apache/lucene/search/TestFuzzyQuery.java | 10 ++++------
.../sandbox/queries/TestSlowFuzzyQuery.java | 19 +++++++++++++++----
6 files changed, 34 insertions(+), 19 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/471f90cf/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 964719a..522da2f 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -32,6 +32,9 @@ Bug Fixes
Improvements
+* LUCENE-7439: FuzzyQuery now matches all terms within the specified
+ edit distance, even if they are short terms (Mike McCandless)
+
Optimizations
Other
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/471f90cf/lucene/core/src/java/org/apache/lucene/search/FuzzyTermsEnum.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/FuzzyTermsEnum.java b/lucene/core/src/java/org/apache/lucene/search/FuzzyTermsEnum.java
index 66a64e1..37f16b4 100644
--- a/lucene/core/src/java/org/apache/lucene/search/FuzzyTermsEnum.java
+++ b/lucene/core/src/java/org/apache/lucene/search/FuzzyTermsEnum.java
@@ -350,7 +350,7 @@ public class FuzzyTermsEnum extends TermsEnum {
final int codePointCount = UnicodeUtil.codePointCount(term);
final float similarity = 1.0f - ((float) ed / (float)
(Math.min(codePointCount, termLength)));
- if (similarity > minSimilarity) {
+ if (minSimilarity == 0 || similarity > minSimilarity) {
boostAtt.setBoost((similarity - minSimilarity) * scale_factor);
//System.out.println(" yes");
return AcceptStatus.YES;
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/471f90cf/lucene/core/src/java/org/apache/lucene/search/TopTermsRewrite.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/TopTermsRewrite.java b/lucene/core/src/java/org/apache/lucene/search/TopTermsRewrite.java
index 013171d..b75836e 100644
--- a/lucene/core/src/java/org/apache/lucene/search/TopTermsRewrite.java
+++ b/lucene/core/src/java/org/apache/lucene/search/TopTermsRewrite.java
@@ -160,7 +160,9 @@ public abstract class TopTermsRewrite<B> extends TermCollectingRewrite<B> {
for (final ScoreTerm st : scoreTerms) {
final Term term = new Term(query.field, st.bytes.toBytesRef());
- addClause(b, term, st.termState.docFreq(), st.boost, st.termState); // add to query
+ // We allow negative term scores (fuzzy query does this, for example) while collecting the terms,
+ // but truncate such boosts to 0.0f when building the query:
+ addClause(b, term, st.termState.docFreq(), Math.max(0.0f, st.boost), st.termState); // add to query
}
return build(b);
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/471f90cf/lucene/core/src/test/org/apache/lucene/search/FuzzyTermOnShortTermsTest.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/search/FuzzyTermOnShortTermsTest.java b/lucene/core/src/test/org/apache/lucene/search/FuzzyTermOnShortTermsTest.java
index 427888b..faf4552 100644
--- a/lucene/core/src/test/org/apache/lucene/search/FuzzyTermOnShortTermsTest.java
+++ b/lucene/core/src/test/org/apache/lucene/search/FuzzyTermOnShortTermsTest.java
@@ -49,15 +49,16 @@ public class FuzzyTermOnShortTermsTest extends LuceneTestCase {
countHits(a, new String[]{"abcde"}, new FuzzyQuery(new Term(FIELD, "abc"), 2), 1);
countHits(a, new String[]{"abc"}, new FuzzyQuery(new Term(FIELD, "abcde"), 2), 1);
- //these don't
- countHits(a, new String[]{"ab"}, new FuzzyQuery(new Term(FIELD, "a"), 1), 0);
- countHits(a, new String[]{"a"}, new FuzzyQuery(new Term(FIELD, "ab"), 1), 0);
+ // LUCENE-7439: these now work as well:
- countHits(a, new String[]{"abc"}, new FuzzyQuery(new Term(FIELD, "a"), 2), 0);
- countHits(a, new String[]{"a"}, new FuzzyQuery(new Term(FIELD, "abc"), 2), 0);
+ countHits(a, new String[]{"ab"}, new FuzzyQuery(new Term(FIELD, "a"), 1), 1);
+ countHits(a, new String[]{"a"}, new FuzzyQuery(new Term(FIELD, "ab"), 1), 1);
+
+ countHits(a, new String[]{"abc"}, new FuzzyQuery(new Term(FIELD, "a"), 2), 1);
+ countHits(a, new String[]{"a"}, new FuzzyQuery(new Term(FIELD, "abc"), 2), 1);
- countHits(a, new String[]{"abcd"}, new FuzzyQuery(new Term(FIELD, "ab"), 2), 0);
- countHits(a, new String[]{"ab"}, new FuzzyQuery(new Term(FIELD, "abcd"), 2), 0);
+ countHits(a, new String[]{"abcd"}, new FuzzyQuery(new Term(FIELD, "ab"), 2), 1);
+ countHits(a, new String[]{"ab"}, new FuzzyQuery(new Term(FIELD, "abcd"), 2), 1);
}
private void countHits(Analyzer analyzer, String[] docs, Query q, int expected) throws Exception {
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/471f90cf/lucene/core/src/test/org/apache/lucene/search/TestFuzzyQuery.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/search/TestFuzzyQuery.java b/lucene/core/src/test/org/apache/lucene/search/TestFuzzyQuery.java
index 1e90525..62e63ea 100644
--- a/lucene/core/src/test/org/apache/lucene/search/TestFuzzyQuery.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestFuzzyQuery.java
@@ -543,12 +543,10 @@ public class TestFuzzyQuery extends LuceneTestCase {
continue;
}
int ed = getDistance(term, queryTerm);
- if (Math.min(queryTerm.length(), term.length()) > ed) {
- float score = 1f - (float) ed / (float) Math.min(queryTerm.length(), term.length());
- while (ed < 3) {
- expected[ed].add(new TermAndScore(term, score));
- ed++;
- }
+ float score = 1f - (float) ed / (float) Math.min(queryTerm.length(), term.length());
+ while (ed < 3) {
+ expected[ed].add(new TermAndScore(term, score));
+ ed++;
}
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/471f90cf/lucene/sandbox/src/test/org/apache/lucene/sandbox/queries/TestSlowFuzzyQuery.java
----------------------------------------------------------------------
diff --git a/lucene/sandbox/src/test/org/apache/lucene/sandbox/queries/TestSlowFuzzyQuery.java b/lucene/sandbox/src/test/org/apache/lucene/sandbox/queries/TestSlowFuzzyQuery.java
index 922213f..3ff1f3b 100644
--- a/lucene/sandbox/src/test/org/apache/lucene/sandbox/queries/TestSlowFuzzyQuery.java
+++ b/lucene/sandbox/src/test/org/apache/lucene/sandbox/queries/TestSlowFuzzyQuery.java
@@ -16,9 +16,11 @@
*/
package org.apache.lucene.sandbox.queries;
-import java.util.List;
-import java.util.Arrays;
import java.io.IOException;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
@@ -472,8 +474,17 @@ public class TestSlowFuzzyQuery extends LuceneTestCase {
q = new SlowFuzzyQuery(new Term("field", "a"), 6f, 0, 50);
hits = searcher.search(q, 10).scoreDocs;
assertEquals(2, hits.length);
- assertEquals("test", searcher.doc(hits[0].doc).get("field"));
- assertEquals("foobar", searcher.doc(hits[1].doc).get("field"));
+
+ // We cannot expect a particular order since both hits 0.0 score:
+ Set<String> actual = new HashSet<>();
+ actual.add(searcher.doc(hits[0].doc).get("field"));
+ actual.add(searcher.doc(hits[1].doc).get("field"));
+
+ Set<String> expected = new HashSet<>();
+ expected.add("test");
+ expected.add("foobar");
+
+ assertEquals(expected, actual);
reader.close();
index.close();