You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2012/08/10 13:09:51 UTC

svn commit: r1371656 - in /lucene/dev/branches/lucene_solr_3_6: ./ lucene/ lucene/backwards/src/test/org/apache/lucene/search/ lucene/core/src/java/org/apache/lucene/search/ lucene/core/src/test/org/apache/lucene/search/

Author: rmuir
Date: Fri Aug 10 11:09:50 2012
New Revision: 1371656

URL: http://svn.apache.org/viewvc?rev=1371656&view=rev
Log:
LUCENE-4300: BooleanQuery's rewrite was unsafe if coord(1,1) != 1

Removed:
    lucene/dev/branches/lucene_solr_3_6/lucene/backwards/src/test/org/apache/lucene/search/TestBooleanScorer.java
Modified:
    lucene/dev/branches/lucene_solr_3_6/   (props changed)
    lucene/dev/branches/lucene_solr_3_6/lucene/CHANGES.txt
    lucene/dev/branches/lucene_solr_3_6/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java
    lucene/dev/branches/lucene_solr_3_6/lucene/core/src/java/org/apache/lucene/search/BooleanScorer.java
    lucene/dev/branches/lucene_solr_3_6/lucene/core/src/java/org/apache/lucene/search/BooleanScorer2.java
    lucene/dev/branches/lucene_solr_3_6/lucene/core/src/test/org/apache/lucene/search/TestBooleanMinShouldMatch.java
    lucene/dev/branches/lucene_solr_3_6/lucene/core/src/test/org/apache/lucene/search/TestBooleanScorer.java

Modified: lucene/dev/branches/lucene_solr_3_6/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_3_6/lucene/CHANGES.txt?rev=1371656&r1=1371655&r2=1371656&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_3_6/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/lucene_solr_3_6/lucene/CHANGES.txt Fri Aug 10 11:09:50 2012
@@ -23,6 +23,10 @@ Bug Fixes:
   than 1 when overlap == maxOverlap (always the case for conjunctions),
   then the score would be incorrect.  (Pascal Chollet, Robert Muir)
 
+* LUCENE-4300: BooleanQuery's rewrite was not always safe: if you
+  had a custom Similarity where coord(1,1) != 1F, then the rewritten
+  query would be scored differently.  (Robert Muir)
+
 ======================= Lucene 3.6.1 =======================
 More information about this release, including any errata related to the 
 release notes, upgrade instructions, or other changes may be found online at:

Modified: lucene/dev/branches/lucene_solr_3_6/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_3_6/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java?rev=1371656&r1=1371655&r2=1371656&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_3_6/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java (original)
+++ lucene/dev/branches/lucene_solr_3_6/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java Fri Aug 10 11:09:50 2012
@@ -200,6 +200,13 @@ public class BooleanQuery extends Query 
       return sum ;
     }
 
+    float coord(int overlap, int maxOverlap) {
+      // LUCENE-4300: in most cases of maxOverlap=1, BQ rewrites itself away,
+      // so coord() is not applied. But when BQ cannot optimize itself away
+      // for a single clause (minNrShouldMatch, prohibited clauses, etc), its
+      // important not to apply coord(1,1) for consistency, it might not be 1.0F
+      return maxOverlap == 1 ? 1F : similarity.coord(overlap, maxOverlap);
+    }
 
     @Override
     public void normalize(float norm) {
@@ -272,7 +279,7 @@ public class BooleanQuery extends Query 
       sumExpl.setMatch(0 < coord ? Boolean.TRUE : Boolean.FALSE);
       sumExpl.setValue(sum);
       
-      final float coordFactor = disableCoord ? 1.0f : similarity.coord(coord, maxCoord);
+      final float coordFactor = disableCoord ? 1.0f : coord(coord, maxCoord);
       if (coordFactor == 1.0f) {
         return sumExpl;                             // eliminate wrapper
       } else {

Modified: lucene/dev/branches/lucene_solr_3_6/lucene/core/src/java/org/apache/lucene/search/BooleanScorer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_3_6/lucene/core/src/java/org/apache/lucene/search/BooleanScorer.java?rev=1371656&r1=1371655&r2=1371656&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_3_6/lucene/core/src/java/org/apache/lucene/search/BooleanScorer.java (original)
+++ lucene/dev/branches/lucene_solr_3_6/lucene/core/src/java/org/apache/lucene/search/BooleanScorer.java Fri Aug 10 11:09:50 2012
@@ -22,6 +22,7 @@ import java.util.List;
 
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.search.BooleanClause.Occur;
+import org.apache.lucene.search.BooleanQuery.BooleanWeight;
 
 /* Description from Doug Cutting (excerpted from
  * LUCENE-1483):
@@ -203,7 +204,7 @@ final class BooleanScorer extends Scorer
   // Any time a prohibited clause matches we set bit 0:
   private static final int PROHIBITED_MASK = 1;
   
-  BooleanScorer(Weight weight, boolean disableCoord, Similarity similarity, int minNrShouldMatch,
+  BooleanScorer(BooleanWeight weight, boolean disableCoord, Similarity similarity, int minNrShouldMatch,
       List<Scorer> optionalScorers, List<Scorer> prohibitedScorers, int maxCoord) throws IOException {
     super(weight);
     this.minNrShouldMatch = minNrShouldMatch;
@@ -226,7 +227,7 @@ final class BooleanScorer extends Scorer
 
     coordFactors = new float[optionalScorers.size() + 1];
     for (int i = 0; i < coordFactors.length; i++) {
-      coordFactors[i] = disableCoord ? 1.0f : similarity.coord(i, maxCoord); 
+      coordFactors[i] = disableCoord ? 1.0f : weight.coord(i, maxCoord); 
     }
   }
 

Modified: lucene/dev/branches/lucene_solr_3_6/lucene/core/src/java/org/apache/lucene/search/BooleanScorer2.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_3_6/lucene/core/src/java/org/apache/lucene/search/BooleanScorer2.java?rev=1371656&r1=1371655&r2=1371656&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_3_6/lucene/core/src/java/org/apache/lucene/search/BooleanScorer2.java (original)
+++ lucene/dev/branches/lucene_solr_3_6/lucene/core/src/java/org/apache/lucene/search/BooleanScorer2.java Fri Aug 10 11:09:50 2012
@@ -22,6 +22,7 @@ import java.util.ArrayList;
 import java.util.List;
 
 import org.apache.lucene.search.BooleanClause.Occur;
+import org.apache.lucene.search.BooleanQuery.BooleanWeight;
 
 /* See the description in BooleanScorer.java, comparing
  * BooleanScorer & BooleanScorer2 */
@@ -42,10 +43,10 @@ class BooleanScorer2 extends Scorer {
     int maxCoord = 0; // to be increased for each non prohibited scorer
     int nrMatchers; // to be increased by score() of match counting scorers.
     
-    void init(Similarity sim, boolean disableCoord) { // use after all scorers have been added.
+    void init(BooleanWeight weight, boolean disableCoord) { // use after all scorers have been added.
       coordFactors = new float[optionalScorers.size() + requiredScorers.size() + 1];
       for (int i = 0; i < coordFactors.length; i++) {
-        coordFactors[i] = disableCoord ? 1.0f : sim.coord(i, maxCoord);
+        coordFactors[i] = disableCoord ? 1.0f : weight.coord(i, maxCoord);
       }
     }
   }
@@ -83,7 +84,7 @@ class BooleanScorer2 extends Scorer {
    * @param optional
    *          the list of optional scorers.
    */
-  public BooleanScorer2(Weight weight, boolean disableCoord, Similarity similarity, int minNrShouldMatch,
+  public BooleanScorer2(BooleanWeight weight, boolean disableCoord, Similarity similarity, int minNrShouldMatch,
       List<Scorer> required, List<Scorer> prohibited, List<Scorer> optional, int maxCoord) throws IOException {
     super(weight);
     if (minNrShouldMatch < 0) {
@@ -97,7 +98,7 @@ class BooleanScorer2 extends Scorer {
     requiredScorers = required;    
     prohibitedScorers = prohibited;
     
-    coordinator.init(similarity, disableCoord);
+    coordinator.init(weight, disableCoord);
     countingSumScorer = makeCountingSumScorer(disableCoord, similarity);
   }
   

Modified: lucene/dev/branches/lucene_solr_3_6/lucene/core/src/test/org/apache/lucene/search/TestBooleanMinShouldMatch.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_3_6/lucene/core/src/test/org/apache/lucene/search/TestBooleanMinShouldMatch.java?rev=1371656&r1=1371655&r2=1371656&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_3_6/lucene/core/src/test/org/apache/lucene/search/TestBooleanMinShouldMatch.java (original)
+++ lucene/dev/branches/lucene_solr_3_6/lucene/core/src/test/org/apache/lucene/search/TestBooleanMinShouldMatch.java Fri Aug 10 11:09:50 2012
@@ -23,6 +23,8 @@ import org.apache.lucene.document.Field;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.RandomIndexWriter;
 import org.apache.lucene.index.Term;
+import org.apache.lucene.search.DefaultSimilarity;
+import org.apache.lucene.search.Similarity;
 import org.apache.lucene.store.Directory;
 import org.junit.AfterClass;
 import org.junit.BeforeClass;
@@ -295,8 +297,8 @@ public class TestBooleanMinShouldMatch e
     }
 
     public void testRandomQueries() throws Exception {
-      String field="data";
-      String[] vals = {"1","2","3","4","5","6","A","Z","B","Y","Z","X","foo"};
+      final String field="data";
+      final String[] vals = {"1","2","3","4","5","6","A","Z","B","Y","Z","X","foo"};
       int maxLev=4;
 
       // callback object to set a random setMinimumNumberShouldMatch
@@ -308,13 +310,18 @@ public class TestBooleanMinShouldMatch e
             if (c[i].getOccur() == BooleanClause.Occur.SHOULD) opt++;
           }
           q.setMinimumNumberShouldMatch(random.nextInt(opt+2));
+          if (random.nextBoolean()) {
+            // also add a random negation
+            Term randomTerm = new Term(field, vals[random.nextInt(vals.length)]);
+            q.add(new TermQuery(randomTerm), BooleanClause.Occur.MUST_NOT);
+          }
         }
       };
 
 
 
       // increase number of iterations for more complete testing      
-      int num = atLeast(10);
+      int num = atLeast(20);
       for (int i = 0; i < num; i++) {
         int lev = random.nextInt(maxLev);
         final long seed = random.nextLong();
@@ -334,44 +341,90 @@ public class TestBooleanMinShouldMatch e
           QueryUtils.check(random, q1,s);
           QueryUtils.check(random, q2,s);
         }
-        // The constrained query
-        // should be a superset to the unconstrained query.
-        if (top2.totalHits > top1.totalHits) {
-          fail("Constrained results not a subset:\n"
-                        + CheckHits.topdocsString(top1,0,0)
-                        + CheckHits.topdocsString(top2,0,0)
-                        + "for query:" + q2.toString());
-        }
-
-        for (int hit=0; hit<top2.totalHits; hit++) {
-          int id = top2.scoreDocs[hit].doc;
-          float score = top2.scoreDocs[hit].score;
-          boolean found=false;
-          // find this doc in other hits
-          for (int other=0; other<top1.totalHits; other++) {
-            if (top1.scoreDocs[other].doc == id) {
-              found=true;
-              float otherScore = top1.scoreDocs[other].score;
-              // check if scores match
-              assertEquals("Doc " + id + " scores don't match\n"
-                  + CheckHits.topdocsString(top1,0,0)
-                  + CheckHits.topdocsString(top2,0,0)
-                  + "for query:" + q2.toString(),
-                  score, otherScore, 1.0e-6f);
-            }
-          }
+        assertSubsetOfSameScores(q2, top1, top2);
+      }
+      // System.out.println("Total hits:"+tot);
+    }
+    
+    private void assertSubsetOfSameScores(Query q, TopDocs top1, TopDocs top2) {
+      // The constrained query
+      // should be a subset to the unconstrained query.
+      if (top2.totalHits > top1.totalHits) {
+        fail("Constrained results not a subset:\n"
+                      + CheckHits.topdocsString(top1,0,0)
+                      + CheckHits.topdocsString(top2,0,0)
+                      + "for query:" + q.toString());
+      }
 
-          // check if subset
-          if (!found) fail("Doc " + id + " not found\n"
+      for (int hit=0; hit<top2.totalHits; hit++) {
+        int id = top2.scoreDocs[hit].doc;
+        float score = top2.scoreDocs[hit].score;
+        boolean found=false;
+        // find this doc in other hits
+        for (int other=0; other<top1.totalHits; other++) {
+          if (top1.scoreDocs[other].doc == id) {
+            found=true;
+            float otherScore = top1.scoreDocs[other].score;
+            // check if scores match
+            assertEquals("Doc " + id + " scores don't match\n"
                 + CheckHits.topdocsString(top1,0,0)
                 + CheckHits.topdocsString(top2,0,0)
-                + "for query:" + q2.toString());
+                + "for query:" + q.toString(),
+                score, otherScore, CheckHits.EXPLAIN_SCORE_TOLERANCE_DELTA);
+          }
         }
+
+        // check if subset
+        if (!found) fail("Doc " + id + " not found\n"
+              + CheckHits.topdocsString(top1,0,0)
+              + CheckHits.topdocsString(top2,0,0)
+              + "for query:" + q.toString());
       }
-      // System.out.println("Total hits:"+tot);
     }
 
-
+    public void testRewriteCoord1() throws Exception {
+      final Similarity oldSimilarity = s.getSimilarity();
+      try {
+        s.setSimilarity(new DefaultSimilarity() {
+          @Override
+          public float coord(int overlap, int maxOverlap) {
+            return overlap / ((float)maxOverlap + 1);
+          }
+        });
+        BooleanQuery q1 = new BooleanQuery();
+        q1.add(new TermQuery(new Term("data", "1")), BooleanClause.Occur.SHOULD);
+        BooleanQuery q2 = new BooleanQuery();
+        q2.add(new TermQuery(new Term("data", "1")), BooleanClause.Occur.SHOULD);
+        q2.setMinimumNumberShouldMatch(1);
+        TopDocs top1 = s.search(q1,null,100);
+        TopDocs top2 = s.search(q2,null,100);
+        assertSubsetOfSameScores(q2, top1, top2);
+      } finally {
+        s.setSimilarity(oldSimilarity);
+      }
+    }
+    
+    public void testRewriteNegate() throws Exception {
+      final Similarity oldSimilarity = s.getSimilarity();
+      try {
+        s.setSimilarity(new DefaultSimilarity() {
+          @Override
+          public float coord(int overlap, int maxOverlap) {
+            return overlap / ((float)maxOverlap + 1);
+          }
+        });
+        BooleanQuery q1 = new BooleanQuery();
+        q1.add(new TermQuery(new Term("data", "1")), BooleanClause.Occur.SHOULD);
+        BooleanQuery q2 = new BooleanQuery();
+        q2.add(new TermQuery(new Term("data", "1")), BooleanClause.Occur.SHOULD);
+        q2.add(new TermQuery(new Term("data", "Z")), BooleanClause.Occur.MUST_NOT);
+        TopDocs top1 = s.search(q1,null,100);
+        TopDocs top2 = s.search(q2,null,100);
+        assertSubsetOfSameScores(q2, top1, top2);
+      } finally {
+        s.setSimilarity(oldSimilarity);
+      }
+    }
 
     protected void printHits(String test, ScoreDoc[] h, Searcher searcher) throws Exception {
 

Modified: lucene/dev/branches/lucene_solr_3_6/lucene/core/src/test/org/apache/lucene/search/TestBooleanScorer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_3_6/lucene/core/src/test/org/apache/lucene/search/TestBooleanScorer.java?rev=1371656&r1=1371655&r2=1371656&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_3_6/lucene/core/src/test/org/apache/lucene/search/TestBooleanScorer.java (original)
+++ lucene/dev/branches/lucene_solr_3_6/lucene/core/src/test/org/apache/lucene/search/TestBooleanScorer.java Fri Aug 10 11:09:50 2012
@@ -27,6 +27,7 @@ import org.apache.lucene.document.Field;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.RandomIndexWriter;
 import org.apache.lucene.index.Term;
+import org.apache.lucene.search.BooleanQuery.BooleanWeight;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.LuceneTestCase;
 
@@ -85,7 +86,14 @@ public class TestBooleanScorer extends L
       }
       
     }};
-    BooleanScorer bs = new BooleanScorer(null, false, sim, 1, Arrays.asList(scorers), null, scorers.length);
+    Directory directory = newDirectory();
+    RandomIndexWriter writer = new RandomIndexWriter(random, directory);
+    writer.commit();
+    IndexReader ir = writer.getReader();
+    writer.close();
+    IndexSearcher searcher = newSearcher(ir);
+    BooleanWeight weight = (BooleanWeight) new BooleanQuery().createWeight(searcher);
+    BooleanScorer bs = new BooleanScorer(weight, false, sim, 1, Arrays.asList(scorers), null, scorers.length);
 
     final List<Integer> hits = new ArrayList<Integer>();
     bs.score(new Collector() {
@@ -112,6 +120,8 @@ public class TestBooleanScorer extends L
 
     assertEquals("should have only 1 hit", 1, hits.size());
     assertEquals("hit should have been docID=3000", 3000, hits.get(0).intValue());
+    ir.close();
+    directory.close();
   }
 
   public void testMoreThan32ProhibitedClauses() throws Exception {