You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2012/08/10 12:46:03 UTC
svn commit: r1371644 - in /lucene/dev/trunk/lucene: CHANGES.txt
core/src/java/org/apache/lucene/search/BooleanQuery.java
core/src/test/org/apache/lucene/search/TestBooleanMinShouldMatch.java
Author: rmuir
Date: Fri Aug 10 10:46:03 2012
New Revision: 1371644
URL: http://svn.apache.org/viewvc?rev=1371644&view=rev
Log:
LUCENE-4300: BooleanQuery's rewrite was unsafe if coord(1,1) != 1
Modified:
lucene/dev/trunk/lucene/CHANGES.txt
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java
lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestBooleanMinShouldMatch.java
Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1371644&r1=1371643&r2=1371644&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Fri Aug 10 10:46:03 2012
@@ -20,6 +20,10 @@ Bug Fixes
did not work at all, it would infinitely recurse.
(Alberto Paro via Robert Muir)
+* LUCENE-4300: BooleanQuery's rewrite was not always safe: if you
+ had a custom Similarity where coord(1,1) != 1F, then the rewritten
+ query would be scored differently. (Robert Muir)
+
======================= Lucene 4.0.0-BETA =======================
New features
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java?rev=1371644&r1=1371643&r2=1371644&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java Fri Aug 10 10:46:03 2012
@@ -213,7 +213,11 @@ public class BooleanQuery extends Query
}
public float coord(int overlap, int maxOverlap) {
- return similarity.coord(overlap, maxOverlap);
+ // LUCENE-4300: in most cases of maxOverlap=1, BQ rewrites itself away,
+ // so coord() is not applied. But when BQ cannot optimize itself away
+ // for a single clause (minNrShouldMatch, prohibited clauses, etc), its
+ // important not to apply coord(1,1) for consistency, it might not be 1.0F
+ return maxOverlap == 1 ? 1F : similarity.coord(overlap, maxOverlap);
}
@Override
Modified: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestBooleanMinShouldMatch.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestBooleanMinShouldMatch.java?rev=1371644&r1=1371643&r2=1371644&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestBooleanMinShouldMatch.java (original)
+++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestBooleanMinShouldMatch.java Fri Aug 10 10:46:03 2012
@@ -23,6 +23,8 @@ import org.apache.lucene.document.Docume
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
+import org.apache.lucene.search.similarities.DefaultSimilarity;
+import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.store.Directory;
import org.junit.AfterClass;
import org.junit.BeforeClass;
@@ -297,8 +299,8 @@ public class TestBooleanMinShouldMatch e
}
public void testRandomQueries() throws Exception {
- String field="data";
- String[] vals = {"1","2","3","4","5","6","A","Z","B","Y","Z","X","foo"};
+ final String field="data";
+ final String[] vals = {"1","2","3","4","5","6","A","Z","B","Y","Z","X","foo"};
int maxLev=4;
// callback object to set a random setMinimumNumberShouldMatch
@@ -310,13 +312,18 @@ public class TestBooleanMinShouldMatch e
if (c[i].getOccur() == BooleanClause.Occur.SHOULD) opt++;
}
q.setMinimumNumberShouldMatch(random().nextInt(opt+2));
+ if (random().nextBoolean()) {
+ // also add a random negation
+ Term randomTerm = new Term(field, vals[random().nextInt(vals.length)]);
+ q.add(new TermQuery(randomTerm), BooleanClause.Occur.MUST_NOT);
+ }
}
};
// increase number of iterations for more complete testing
- int num = atLeast(10);
+ int num = atLeast(20);
for (int i=0; i<num; i++) {
int lev = random().nextInt(maxLev);
final long seed = random().nextLong();
@@ -336,44 +343,90 @@ public class TestBooleanMinShouldMatch e
QueryUtils.check(random(), q1,s);
QueryUtils.check(random(), q2,s);
}
- // The constrained query
- // should be a superset to the unconstrained query.
- if (top2.totalHits > top1.totalHits) {
- fail("Constrained results not a subset:\n"
- + CheckHits.topdocsString(top1,0,0)
- + CheckHits.topdocsString(top2,0,0)
- + "for query:" + q2.toString());
- }
-
- for (int hit=0; hit<top2.totalHits; hit++) {
- int id = top2.scoreDocs[hit].doc;
- float score = top2.scoreDocs[hit].score;
- boolean found=false;
- // find this doc in other hits
- for (int other=0; other<top1.totalHits; other++) {
- if (top1.scoreDocs[other].doc == id) {
- found=true;
- float otherScore = top1.scoreDocs[other].score;
- // check if scores match
- assertEquals("Doc " + id + " scores don't match\n"
- + CheckHits.topdocsString(top1,0,0)
- + CheckHits.topdocsString(top2,0,0)
- + "for query:" + q2.toString(),
- score, otherScore, CheckHits.explainToleranceDelta(score, otherScore));
- }
- }
+ assertSubsetOfSameScores(q2, top1, top2);
+ }
+ // System.out.println("Total hits:"+tot);
+ }
+
+ private void assertSubsetOfSameScores(Query q, TopDocs top1, TopDocs top2) {
+ // The constrained query
+ // should be a subset to the unconstrained query.
+ if (top2.totalHits > top1.totalHits) {
+ fail("Constrained results not a subset:\n"
+ + CheckHits.topdocsString(top1,0,0)
+ + CheckHits.topdocsString(top2,0,0)
+ + "for query:" + q.toString());
+ }
- // check if subset
- if (!found) fail("Doc " + id + " not found\n"
+ for (int hit=0; hit<top2.totalHits; hit++) {
+ int id = top2.scoreDocs[hit].doc;
+ float score = top2.scoreDocs[hit].score;
+ boolean found=false;
+ // find this doc in other hits
+ for (int other=0; other<top1.totalHits; other++) {
+ if (top1.scoreDocs[other].doc == id) {
+ found=true;
+ float otherScore = top1.scoreDocs[other].score;
+ // check if scores match
+ assertEquals("Doc " + id + " scores don't match\n"
+ CheckHits.topdocsString(top1,0,0)
+ CheckHits.topdocsString(top2,0,0)
- + "for query:" + q2.toString());
+ + "for query:" + q.toString(),
+ score, otherScore, CheckHits.explainToleranceDelta(score, otherScore));
+ }
}
+
+ // check if subset
+ if (!found) fail("Doc " + id + " not found\n"
+ + CheckHits.topdocsString(top1,0,0)
+ + CheckHits.topdocsString(top2,0,0)
+ + "for query:" + q.toString());
}
- // System.out.println("Total hits:"+tot);
}
-
+ public void testRewriteCoord1() throws Exception {
+ final Similarity oldSimilarity = s.getSimilarity();
+ try {
+ s.setSimilarity(new DefaultSimilarity() {
+ @Override
+ public float coord(int overlap, int maxOverlap) {
+ return overlap / ((float)maxOverlap + 1);
+ }
+ });
+ BooleanQuery q1 = new BooleanQuery();
+ q1.add(new TermQuery(new Term("data", "1")), BooleanClause.Occur.SHOULD);
+ BooleanQuery q2 = new BooleanQuery();
+ q2.add(new TermQuery(new Term("data", "1")), BooleanClause.Occur.SHOULD);
+ q2.setMinimumNumberShouldMatch(1);
+ TopDocs top1 = s.search(q1,null,100);
+ TopDocs top2 = s.search(q2,null,100);
+ assertSubsetOfSameScores(q2, top1, top2);
+ } finally {
+ s.setSimilarity(oldSimilarity);
+ }
+ }
+
+ public void testRewriteNegate() throws Exception {
+ final Similarity oldSimilarity = s.getSimilarity();
+ try {
+ s.setSimilarity(new DefaultSimilarity() {
+ @Override
+ public float coord(int overlap, int maxOverlap) {
+ return overlap / ((float)maxOverlap + 1);
+ }
+ });
+ BooleanQuery q1 = new BooleanQuery();
+ q1.add(new TermQuery(new Term("data", "1")), BooleanClause.Occur.SHOULD);
+ BooleanQuery q2 = new BooleanQuery();
+ q2.add(new TermQuery(new Term("data", "1")), BooleanClause.Occur.SHOULD);
+ q2.add(new TermQuery(new Term("data", "Z")), BooleanClause.Occur.MUST_NOT);
+ TopDocs top1 = s.search(q1,null,100);
+ TopDocs top2 = s.search(q2,null,100);
+ assertSubsetOfSameScores(q2, top1, top2);
+ } finally {
+ s.setSimilarity(oldSimilarity);
+ }
+ }
protected void printHits(String test, ScoreDoc[] h, IndexSearcher searcher) throws Exception {