You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2012/08/10 13:09:51 UTC
svn commit: r1371656 - in /lucene/dev/branches/lucene_solr_3_6: ./ lucene/
lucene/backwards/src/test/org/apache/lucene/search/
lucene/core/src/java/org/apache/lucene/search/
lucene/core/src/test/org/apache/lucene/search/
Author: rmuir
Date: Fri Aug 10 11:09:50 2012
New Revision: 1371656
URL: http://svn.apache.org/viewvc?rev=1371656&view=rev
Log:
LUCENE-4300: BooleanQuery's rewrite was unsafe if coord(1,1) != 1
Removed:
lucene/dev/branches/lucene_solr_3_6/lucene/backwards/src/test/org/apache/lucene/search/TestBooleanScorer.java
Modified:
lucene/dev/branches/lucene_solr_3_6/ (props changed)
lucene/dev/branches/lucene_solr_3_6/lucene/CHANGES.txt
lucene/dev/branches/lucene_solr_3_6/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java
lucene/dev/branches/lucene_solr_3_6/lucene/core/src/java/org/apache/lucene/search/BooleanScorer.java
lucene/dev/branches/lucene_solr_3_6/lucene/core/src/java/org/apache/lucene/search/BooleanScorer2.java
lucene/dev/branches/lucene_solr_3_6/lucene/core/src/test/org/apache/lucene/search/TestBooleanMinShouldMatch.java
lucene/dev/branches/lucene_solr_3_6/lucene/core/src/test/org/apache/lucene/search/TestBooleanScorer.java
Modified: lucene/dev/branches/lucene_solr_3_6/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_3_6/lucene/CHANGES.txt?rev=1371656&r1=1371655&r2=1371656&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_3_6/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/lucene_solr_3_6/lucene/CHANGES.txt Fri Aug 10 11:09:50 2012
@@ -23,6 +23,10 @@ Bug Fixes:
than 1 when overlap == maxOverlap (always the case for conjunctions),
then the score would be incorrect. (Pascal Chollet, Robert Muir)
+* LUCENE-4300: BooleanQuery's rewrite was not always safe: if you
+ had a custom Similarity where coord(1,1) != 1F, then the rewritten
+ query would be scored differently. (Robert Muir)
+
======================= Lucene 3.6.1 =======================
More information about this release, including any errata related to the
release notes, upgrade instructions, or other changes may be found online at:
Modified: lucene/dev/branches/lucene_solr_3_6/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_3_6/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java?rev=1371656&r1=1371655&r2=1371656&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_3_6/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java (original)
+++ lucene/dev/branches/lucene_solr_3_6/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java Fri Aug 10 11:09:50 2012
@@ -200,6 +200,13 @@ public class BooleanQuery extends Query
return sum ;
}
+ float coord(int overlap, int maxOverlap) {
+ // LUCENE-4300: in most cases of maxOverlap=1, BQ rewrites itself away,
+ // so coord() is not applied. But when BQ cannot optimize itself away
+ // for a single clause (minNrShouldMatch, prohibited clauses, etc), its
+ // important not to apply coord(1,1) for consistency, it might not be 1.0F
+ return maxOverlap == 1 ? 1F : similarity.coord(overlap, maxOverlap);
+ }
@Override
public void normalize(float norm) {
@@ -272,7 +279,7 @@ public class BooleanQuery extends Query
sumExpl.setMatch(0 < coord ? Boolean.TRUE : Boolean.FALSE);
sumExpl.setValue(sum);
- final float coordFactor = disableCoord ? 1.0f : similarity.coord(coord, maxCoord);
+ final float coordFactor = disableCoord ? 1.0f : coord(coord, maxCoord);
if (coordFactor == 1.0f) {
return sumExpl; // eliminate wrapper
} else {
Modified: lucene/dev/branches/lucene_solr_3_6/lucene/core/src/java/org/apache/lucene/search/BooleanScorer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_3_6/lucene/core/src/java/org/apache/lucene/search/BooleanScorer.java?rev=1371656&r1=1371655&r2=1371656&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_3_6/lucene/core/src/java/org/apache/lucene/search/BooleanScorer.java (original)
+++ lucene/dev/branches/lucene_solr_3_6/lucene/core/src/java/org/apache/lucene/search/BooleanScorer.java Fri Aug 10 11:09:50 2012
@@ -22,6 +22,7 @@ import java.util.List;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.BooleanClause.Occur;
+import org.apache.lucene.search.BooleanQuery.BooleanWeight;
/* Description from Doug Cutting (excerpted from
* LUCENE-1483):
@@ -203,7 +204,7 @@ final class BooleanScorer extends Scorer
// Any time a prohibited clause matches we set bit 0:
private static final int PROHIBITED_MASK = 1;
- BooleanScorer(Weight weight, boolean disableCoord, Similarity similarity, int minNrShouldMatch,
+ BooleanScorer(BooleanWeight weight, boolean disableCoord, Similarity similarity, int minNrShouldMatch,
List<Scorer> optionalScorers, List<Scorer> prohibitedScorers, int maxCoord) throws IOException {
super(weight);
this.minNrShouldMatch = minNrShouldMatch;
@@ -226,7 +227,7 @@ final class BooleanScorer extends Scorer
coordFactors = new float[optionalScorers.size() + 1];
for (int i = 0; i < coordFactors.length; i++) {
- coordFactors[i] = disableCoord ? 1.0f : similarity.coord(i, maxCoord);
+ coordFactors[i] = disableCoord ? 1.0f : weight.coord(i, maxCoord);
}
}
Modified: lucene/dev/branches/lucene_solr_3_6/lucene/core/src/java/org/apache/lucene/search/BooleanScorer2.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_3_6/lucene/core/src/java/org/apache/lucene/search/BooleanScorer2.java?rev=1371656&r1=1371655&r2=1371656&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_3_6/lucene/core/src/java/org/apache/lucene/search/BooleanScorer2.java (original)
+++ lucene/dev/branches/lucene_solr_3_6/lucene/core/src/java/org/apache/lucene/search/BooleanScorer2.java Fri Aug 10 11:09:50 2012
@@ -22,6 +22,7 @@ import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.search.BooleanClause.Occur;
+import org.apache.lucene.search.BooleanQuery.BooleanWeight;
/* See the description in BooleanScorer.java, comparing
* BooleanScorer & BooleanScorer2 */
@@ -42,10 +43,10 @@ class BooleanScorer2 extends Scorer {
int maxCoord = 0; // to be increased for each non prohibited scorer
int nrMatchers; // to be increased by score() of match counting scorers.
- void init(Similarity sim, boolean disableCoord) { // use after all scorers have been added.
+ void init(BooleanWeight weight, boolean disableCoord) { // use after all scorers have been added.
coordFactors = new float[optionalScorers.size() + requiredScorers.size() + 1];
for (int i = 0; i < coordFactors.length; i++) {
- coordFactors[i] = disableCoord ? 1.0f : sim.coord(i, maxCoord);
+ coordFactors[i] = disableCoord ? 1.0f : weight.coord(i, maxCoord);
}
}
}
@@ -83,7 +84,7 @@ class BooleanScorer2 extends Scorer {
* @param optional
* the list of optional scorers.
*/
- public BooleanScorer2(Weight weight, boolean disableCoord, Similarity similarity, int minNrShouldMatch,
+ public BooleanScorer2(BooleanWeight weight, boolean disableCoord, Similarity similarity, int minNrShouldMatch,
List<Scorer> required, List<Scorer> prohibited, List<Scorer> optional, int maxCoord) throws IOException {
super(weight);
if (minNrShouldMatch < 0) {
@@ -97,7 +98,7 @@ class BooleanScorer2 extends Scorer {
requiredScorers = required;
prohibitedScorers = prohibited;
- coordinator.init(similarity, disableCoord);
+ coordinator.init(weight, disableCoord);
countingSumScorer = makeCountingSumScorer(disableCoord, similarity);
}
Modified: lucene/dev/branches/lucene_solr_3_6/lucene/core/src/test/org/apache/lucene/search/TestBooleanMinShouldMatch.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_3_6/lucene/core/src/test/org/apache/lucene/search/TestBooleanMinShouldMatch.java?rev=1371656&r1=1371655&r2=1371656&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_3_6/lucene/core/src/test/org/apache/lucene/search/TestBooleanMinShouldMatch.java (original)
+++ lucene/dev/branches/lucene_solr_3_6/lucene/core/src/test/org/apache/lucene/search/TestBooleanMinShouldMatch.java Fri Aug 10 11:09:50 2012
@@ -23,6 +23,8 @@ import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
+import org.apache.lucene.search.DefaultSimilarity;
+import org.apache.lucene.search.Similarity;
import org.apache.lucene.store.Directory;
import org.junit.AfterClass;
import org.junit.BeforeClass;
@@ -295,8 +297,8 @@ public class TestBooleanMinShouldMatch e
}
public void testRandomQueries() throws Exception {
- String field="data";
- String[] vals = {"1","2","3","4","5","6","A","Z","B","Y","Z","X","foo"};
+ final String field="data";
+ final String[] vals = {"1","2","3","4","5","6","A","Z","B","Y","Z","X","foo"};
int maxLev=4;
// callback object to set a random setMinimumNumberShouldMatch
@@ -308,13 +310,18 @@ public class TestBooleanMinShouldMatch e
if (c[i].getOccur() == BooleanClause.Occur.SHOULD) opt++;
}
q.setMinimumNumberShouldMatch(random.nextInt(opt+2));
+ if (random.nextBoolean()) {
+ // also add a random negation
+ Term randomTerm = new Term(field, vals[random.nextInt(vals.length)]);
+ q.add(new TermQuery(randomTerm), BooleanClause.Occur.MUST_NOT);
+ }
}
};
// increase number of iterations for more complete testing
- int num = atLeast(10);
+ int num = atLeast(20);
for (int i = 0; i < num; i++) {
int lev = random.nextInt(maxLev);
final long seed = random.nextLong();
@@ -334,44 +341,90 @@ public class TestBooleanMinShouldMatch e
QueryUtils.check(random, q1,s);
QueryUtils.check(random, q2,s);
}
- // The constrained query
- // should be a superset to the unconstrained query.
- if (top2.totalHits > top1.totalHits) {
- fail("Constrained results not a subset:\n"
- + CheckHits.topdocsString(top1,0,0)
- + CheckHits.topdocsString(top2,0,0)
- + "for query:" + q2.toString());
- }
-
- for (int hit=0; hit<top2.totalHits; hit++) {
- int id = top2.scoreDocs[hit].doc;
- float score = top2.scoreDocs[hit].score;
- boolean found=false;
- // find this doc in other hits
- for (int other=0; other<top1.totalHits; other++) {
- if (top1.scoreDocs[other].doc == id) {
- found=true;
- float otherScore = top1.scoreDocs[other].score;
- // check if scores match
- assertEquals("Doc " + id + " scores don't match\n"
- + CheckHits.topdocsString(top1,0,0)
- + CheckHits.topdocsString(top2,0,0)
- + "for query:" + q2.toString(),
- score, otherScore, 1.0e-6f);
- }
- }
+ assertSubsetOfSameScores(q2, top1, top2);
+ }
+ // System.out.println("Total hits:"+tot);
+ }
+
+ private void assertSubsetOfSameScores(Query q, TopDocs top1, TopDocs top2) {
+ // The constrained query
+ // should be a subset to the unconstrained query.
+ if (top2.totalHits > top1.totalHits) {
+ fail("Constrained results not a subset:\n"
+ + CheckHits.topdocsString(top1,0,0)
+ + CheckHits.topdocsString(top2,0,0)
+ + "for query:" + q.toString());
+ }
- // check if subset
- if (!found) fail("Doc " + id + " not found\n"
+ for (int hit=0; hit<top2.totalHits; hit++) {
+ int id = top2.scoreDocs[hit].doc;
+ float score = top2.scoreDocs[hit].score;
+ boolean found=false;
+ // find this doc in other hits
+ for (int other=0; other<top1.totalHits; other++) {
+ if (top1.scoreDocs[other].doc == id) {
+ found=true;
+ float otherScore = top1.scoreDocs[other].score;
+ // check if scores match
+ assertEquals("Doc " + id + " scores don't match\n"
+ CheckHits.topdocsString(top1,0,0)
+ CheckHits.topdocsString(top2,0,0)
- + "for query:" + q2.toString());
+ + "for query:" + q.toString(),
+ score, otherScore, CheckHits.EXPLAIN_SCORE_TOLERANCE_DELTA);
+ }
}
+
+ // check if subset
+ if (!found) fail("Doc " + id + " not found\n"
+ + CheckHits.topdocsString(top1,0,0)
+ + CheckHits.topdocsString(top2,0,0)
+ + "for query:" + q.toString());
}
- // System.out.println("Total hits:"+tot);
}
-
+ public void testRewriteCoord1() throws Exception {
+ final Similarity oldSimilarity = s.getSimilarity();
+ try {
+ s.setSimilarity(new DefaultSimilarity() {
+ @Override
+ public float coord(int overlap, int maxOverlap) {
+ return overlap / ((float)maxOverlap + 1);
+ }
+ });
+ BooleanQuery q1 = new BooleanQuery();
+ q1.add(new TermQuery(new Term("data", "1")), BooleanClause.Occur.SHOULD);
+ BooleanQuery q2 = new BooleanQuery();
+ q2.add(new TermQuery(new Term("data", "1")), BooleanClause.Occur.SHOULD);
+ q2.setMinimumNumberShouldMatch(1);
+ TopDocs top1 = s.search(q1,null,100);
+ TopDocs top2 = s.search(q2,null,100);
+ assertSubsetOfSameScores(q2, top1, top2);
+ } finally {
+ s.setSimilarity(oldSimilarity);
+ }
+ }
+
+ public void testRewriteNegate() throws Exception {
+ final Similarity oldSimilarity = s.getSimilarity();
+ try {
+ s.setSimilarity(new DefaultSimilarity() {
+ @Override
+ public float coord(int overlap, int maxOverlap) {
+ return overlap / ((float)maxOverlap + 1);
+ }
+ });
+ BooleanQuery q1 = new BooleanQuery();
+ q1.add(new TermQuery(new Term("data", "1")), BooleanClause.Occur.SHOULD);
+ BooleanQuery q2 = new BooleanQuery();
+ q2.add(new TermQuery(new Term("data", "1")), BooleanClause.Occur.SHOULD);
+ q2.add(new TermQuery(new Term("data", "Z")), BooleanClause.Occur.MUST_NOT);
+ TopDocs top1 = s.search(q1,null,100);
+ TopDocs top2 = s.search(q2,null,100);
+ assertSubsetOfSameScores(q2, top1, top2);
+ } finally {
+ s.setSimilarity(oldSimilarity);
+ }
+ }
protected void printHits(String test, ScoreDoc[] h, Searcher searcher) throws Exception {
Modified: lucene/dev/branches/lucene_solr_3_6/lucene/core/src/test/org/apache/lucene/search/TestBooleanScorer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_3_6/lucene/core/src/test/org/apache/lucene/search/TestBooleanScorer.java?rev=1371656&r1=1371655&r2=1371656&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_3_6/lucene/core/src/test/org/apache/lucene/search/TestBooleanScorer.java (original)
+++ lucene/dev/branches/lucene_solr_3_6/lucene/core/src/test/org/apache/lucene/search/TestBooleanScorer.java Fri Aug 10 11:09:50 2012
@@ -27,6 +27,7 @@ import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
+import org.apache.lucene.search.BooleanQuery.BooleanWeight;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
@@ -85,7 +86,14 @@ public class TestBooleanScorer extends L
}
}};
- BooleanScorer bs = new BooleanScorer(null, false, sim, 1, Arrays.asList(scorers), null, scorers.length);
+ Directory directory = newDirectory();
+ RandomIndexWriter writer = new RandomIndexWriter(random, directory);
+ writer.commit();
+ IndexReader ir = writer.getReader();
+ writer.close();
+ IndexSearcher searcher = newSearcher(ir);
+ BooleanWeight weight = (BooleanWeight) new BooleanQuery().createWeight(searcher);
+ BooleanScorer bs = new BooleanScorer(weight, false, sim, 1, Arrays.asList(scorers), null, scorers.length);
final List<Integer> hits = new ArrayList<Integer>();
bs.score(new Collector() {
@@ -112,6 +120,8 @@ public class TestBooleanScorer extends L
assertEquals("should have only 1 hit", 1, hits.size());
assertEquals("hit should have been docID=3000", 3000, hits.get(0).intValue());
+ ir.close();
+ directory.close();
}
public void testMoreThan32ProhibitedClauses() throws Exception {