You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by do...@apache.org on 2007/06/04 22:41:08 UTC

svn commit: r544254 - in /lucene/java/trunk: ./ src/java/org/apache/lucene/search/ src/java/org/apache/lucene/search/payloads/ src/test/org/apache/lucene/search/ src/test/org/apache/lucene/search/payloads/

Author: doronc
Date: Mon Jun  4 13:41:06 2007
New Revision: 544254

URL: http://svn.apache.org/viewvc?view=rev&rev=544254
Log:
LUCENE-903: FilteredQuery explanation inaccuracy with boost.

Modified:
    lucene/java/trunk/CHANGES.txt
    lucene/java/trunk/src/java/org/apache/lucene/search/FilteredQuery.java
    lucene/java/trunk/src/java/org/apache/lucene/search/payloads/BoostingTermQuery.java
    lucene/java/trunk/src/test/org/apache/lucene/search/CheckHits.java
    lucene/java/trunk/src/test/org/apache/lucene/search/QueryUtils.java
    lucene/java/trunk/src/test/org/apache/lucene/search/TestExplanations.java
    lucene/java/trunk/src/test/org/apache/lucene/search/TestSimpleExplanations.java
    lucene/java/trunk/src/test/org/apache/lucene/search/payloads/TestBoostingTermQuery.java

Modified: lucene/java/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/CHANGES.txt?view=diff&rev=544254&r1=544253&r2=544254
==============================================================================
--- lucene/java/trunk/CHANGES.txt (original)
+++ lucene/java/trunk/CHANGES.txt Mon Jun  4 13:41:06 2007
@@ -152,6 +152,10 @@
 
 20. LUCENE-763: Spellchecker: LuceneDictionary used to skip first word in 
     enumeration. (Christian Mallwitz via Daniel Naber)
+    
+21. LUCENE-903: FilteredQuery explanation inaccuracy with boost.
+    Explanation tests now "deep" check the explanation details.
+    (Chris Hostetter, Doron Cohen)
 
 New features
 

Modified: lucene/java/trunk/src/java/org/apache/lucene/search/FilteredQuery.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/search/FilteredQuery.java?view=diff&rev=544254&r1=544253&r2=544254
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/search/FilteredQuery.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/search/FilteredQuery.java Mon Jun  4 13:41:06 2007
@@ -79,7 +79,12 @@
       }
       public Explanation explain (IndexReader ir, int i) throws IOException {
         Explanation inner = weight.explain (ir, i);
-        inner.setValue(getBoost() * inner.getValue());
+        if (getBoost()!=1) {
+          Explanation preBoost = inner;
+          inner = new Explanation(inner.getValue()*getBoost(),"product of:");
+          inner.addDetail(new Explanation(getBoost(),"boost"));
+          inner.addDetail(preBoost);
+        }
         Filter f = FilteredQuery.this.filter;
         BitSet matches = f.bits(ir);
         if (matches.get(i))

Modified: lucene/java/trunk/src/java/org/apache/lucene/search/payloads/BoostingTermQuery.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/search/payloads/BoostingTermQuery.java?view=diff&rev=544254&r1=544253&r2=544254
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/search/payloads/BoostingTermQuery.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/search/payloads/BoostingTermQuery.java Mon Jun  4 13:41:06 2007
@@ -166,7 +166,7 @@
         //GSI: I suppose we could toString the payload, but I don't think that would be a good idea 
         payloadBoost.setDescription("scorePayload(...)");
         result.setValue(nonPayloadExpl.getValue() * avgPayloadScore);
-        result.setDescription("btq");
+        result.setDescription("btq, product of:");
         return result;
       }
     }

Modified: lucene/java/trunk/src/test/org/apache/lucene/search/CheckHits.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/search/CheckHits.java?view=diff&rev=544254&r1=544253&r2=544254
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/search/CheckHits.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/search/CheckHits.java Mon Jun  4 13:41:06 2007
@@ -29,6 +29,13 @@
 public class CheckHits {
   
   /**
+   * Some explains methods calculate their vlaues though a slightly
+   * differnet  order of operations from the acctaul scoring method ...
+   * this allows for a small amount of variation
+   */
+  public static float EXPLAIN_SCORE_TOLERANCE_DELTA = 0.00005f;
+    
+  /**
    * Tests that all documents up to maxDoc which are *not* in the
    * expected result set, have an explanation which indicates no match
    * (ie: Explanation value of 0.0f)
@@ -226,10 +233,13 @@
   }
 
   /**
-   * Asserts that the score explanation for every document matching a
-   * query corrisponds with the true score.
+   * Asserts that the explanation value for every document matching a
+   * query corresponds with the true score. 
    *
    * @see ExplanationAsserter
+   * @see #checkExplanations(Query, String, Searcher, boolean) for a
+   * "deep" testing of the explanation details.
+   *   
    * @param query the query to test
    * @param searcher the searcher to test the query against
    * @param defaultFieldName used for displaing the query in assertion messages
@@ -237,16 +247,123 @@
   public static void checkExplanations(Query query,
                                        String defaultFieldName,
                                        Searcher searcher) throws IOException {
+    checkExplanations(query, defaultFieldName, searcher, false);
+  }
+
+  /**
+   * Asserts that the explanation value for every document matching a
+   * query corresponds with the true score.  Optionally does "deep" 
+   * testing of the explanation details.
+   *
+   * @see ExplanationAsserter
+   * @param query the query to test
+   * @param searcher the searcher to test the query against
+   * @param defaultFieldName used for displaing the query in assertion messages
+   * @param deep indicates whether a deep comparison of sub-Explanation details should be executed
+   */
+  public static void checkExplanations(Query query,
+                                       String defaultFieldName,
+                                       Searcher searcher, 
+                                       boolean deep) throws IOException {
 
     searcher.search(query,
                     new ExplanationAsserter
-                    (query, defaultFieldName, searcher));
+                    (query, defaultFieldName, searcher, deep));
 
   }
 
+  /** 
+   * Assert that an explanation has the expected score, and optionally that its
+   * sub-details max/sum/factor match to that score.
+   *
+   * @param q String representation of the query for assertion messages
+   * @param doc Document ID for assertion messages
+   * @param score Real score value of doc with query q
+   * @param deep indicates whether a deep comparison of sub-Explanation details should be executed
+   * @param expl The Explanation to match against score
+   */
+  public static void verifyExplanation(String q, 
+                                       int doc, 
+                                       float score,
+                                       boolean deep,
+                                       Explanation expl) {
+    float value = expl.getValue();
+    TestCase.assertEquals(q+": score(doc="+doc+")="+score+
+        " != explanationScore="+value+" Explanation: "+expl,
+        score,value,EXPLAIN_SCORE_TOLERANCE_DELTA);
+
+    if (!deep) return;
+
+    Explanation detail[] = expl.getDetails();
+    if (detail!=null) {
+      if (detail.length==1) {
+        // simple containment, no matter what the description says, 
+        // just verify contained expl has same score
+        verifyExplanation(q,doc,score,deep,detail[0]);
+      } else {
+        // explanation must either:
+        // - end with one of: "product of:", "sum of:", "max of:", or
+        // - have "max plus <x> times others" (where <x> is float).
+        float x = 0;
+        String descr = expl.getDescription().toLowerCase();
+        boolean productOf = descr.endsWith("product of:");
+        boolean sumOf = descr.endsWith("sum of:");
+        boolean maxOf = descr.endsWith("max of:");
+        boolean maxTimesOthers = false;
+        if (!(productOf || sumOf || maxOf)) {
+          // maybe 'max plus x times others'
+          int k1 = descr.indexOf("max plus ");
+          if (k1>=0) {
+            k1 += "max plus ".length();
+            int k2 = descr.indexOf(" ",k1);
+            try {
+              x = Float.parseFloat(descr.substring(k1,k2).trim());
+              if (descr.substring(k2).trim().equals("times others of:")) {
+                maxTimesOthers = true;
+              }
+            } catch (NumberFormatException e) {
+            }
+          }
+        }
+        TestCase.assertTrue(
+            q+": multi valued explanation description=\""+descr
+            +"\" must be 'max of plus x times others' or end with 'prodoct of'"
+            +" or 'sum of:' or 'max of:' - "+expl,
+            productOf || sumOf || maxOf || maxTimesOthers);
+        float sum = 0;
+        float product = 1;
+        float max = 0;
+        for (int i=0; i<detail.length; i++) {
+          float dval = detail[i].getValue();
+          verifyExplanation(q,doc,dval,deep,detail[i]);
+          product *= dval;
+          sum += dval;
+          max = Math.max(max,dval);
+        }
+        float combined = 0;
+        if (productOf) {
+          combined = product;
+        } else if (sumOf) {
+          combined = sum;
+        } else if (maxOf) {
+          combined = max;
+        } else if (maxTimesOthers) {
+          combined = max + x * (sum - max);
+        } else {
+            TestCase.assertTrue("should never get here!",false);
+        }
+        TestCase.assertEquals(q+": actual subDetails combined=="+combined+
+            " != value="+value+" Explanation: "+expl,
+            combined,value,EXPLAIN_SCORE_TOLERANCE_DELTA);
+      }
+    }
+  }
+
   /**
    * an IndexSearcher that implicitly checks hte explanation of every match
-   * whenever it executes a search
+   * whenever it executes a search.
+   *
+   * @see ExplanationAsserter
    */
   public static class ExplanationAssertingSearcher extends IndexSearcher {
     public ExplanationAssertingSearcher(Directory d) throws IOException {
@@ -300,28 +417,37 @@
     
   /**
    * Asserts that the score explanation for every document matching a
-   * query corrisponds with the true score.
+   * query corresponds with the true score.
    *
    * NOTE: this HitCollector should only be used with the Query and Searcher
    * specified at when it is constructed.
+   *
+   * @see CheckHits#verifyExplanation
    */
   public static class ExplanationAsserter extends HitCollector {
 
     /**
-     * Some explains methods calculate their vlaues though a slightly
-     * differnet  order of operations from the acctaul scoring method ...
-     * this allows for a small amount of variation
+     * @deprecated
+     * @see CheckHits#EXPLAIN_SCORE_TOLERANCE_DELTA
      */
     public static float SCORE_TOLERANCE_DELTA = 0.00005f;
-    
+
     Query q;
     Searcher s;
     String d;
+    boolean deep;
+    
+    /** Constructs an instance which does shallow tests on the Explanation */
     public ExplanationAsserter(Query q, String defaultFieldName, Searcher s) {
+      this(q,defaultFieldName,s,false);
+    }      
+    public ExplanationAsserter(Query q, String defaultFieldName, Searcher s, boolean deep) {
       this.q=q;
       this.s=s;
       this.d = q.toString(defaultFieldName);
+      this.deep=deep;
     }      
+
     public void collect(int doc, float score) {
       Explanation exp = null;
       
@@ -334,9 +460,7 @@
       
       TestCase.assertNotNull("Explanation of [["+d+"]] for #"+doc+" is null",
                              exp);
-      TestCase.assertEquals("Score of [["+d+"]] for #"+doc+
-                            " does not match explanation: " + exp.toString(),
-                            score, exp.getValue(), SCORE_TOLERANCE_DELTA);
+      verifyExplanation(d,doc,score,deep,exp);
     }
     
   }

Modified: lucene/java/trunk/src/test/org/apache/lucene/search/QueryUtils.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/search/QueryUtils.java?view=diff&rev=544254&r1=544253&r2=544254
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/search/QueryUtils.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/search/QueryUtils.java Mon Jun  4 13:41:06 2007
@@ -64,15 +64,27 @@
     // happens, please change test to use a different example.
     TestCase.assertTrue(q1.hashCode() != q2.hashCode());
   }
-
-
-  /** various query sanity checks on a searcher */
+  
+  /** deep check that explanations of a query 'score' correctly */
+  public static void checkExplanations (final Query q, final Searcher s) throws IOException {
+    CheckHits.checkExplanations(q, null, s, true);
+  }
+  
+  /** 
+   * various query sanity checks on a searcher, including explanation checks.
+   * @see #checkExplanations
+   * @see #checkSkipTo
+   * @see #check(Query)
+   */
   public static void check(Query q1, Searcher s) {
     try {
       check(q1);
-      if (s!=null && s instanceof IndexSearcher) {
-        IndexSearcher is = (IndexSearcher)s;
-        checkSkipTo(q1,is);
+      if (s!=null) {
+        if (s instanceof IndexSearcher) {
+          IndexSearcher is = (IndexSearcher)s;
+          checkSkipTo(q1,is);
+        }
+        checkExplanations(q1,s);
       }
     } catch (IOException e) {
       throw new RuntimeException(e);

Modified: lucene/java/trunk/src/test/org/apache/lucene/search/TestExplanations.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/search/TestExplanations.java?view=diff&rev=544254&r1=544253&r2=544254
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/search/TestExplanations.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/search/TestExplanations.java Mon Jun  4 13:41:06 2007
@@ -83,13 +83,14 @@
     return qp.parse(queryText);
   }
 
+  /** check the expDocNrs first, then check the query (and the explanations) */
   public void qtest(String queryText, int[] expDocNrs) throws Exception {
     qtest(makeQuery(queryText), expDocNrs);
   }
+  
+  /** check the expDocNrs first, then check the query (and the explanations) */
   public void qtest(Query q, int[] expDocNrs) throws Exception {
-    // check that the expDocNrs first, then check the explanations
     CheckHits.checkHitCollector(q, FIELD, searcher, expDocNrs);
-    CheckHits.checkExplanations(q, FIELD, searcher);
   }
 
   /**

Modified: lucene/java/trunk/src/test/org/apache/lucene/search/TestSimpleExplanations.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/search/TestSimpleExplanations.java?view=diff&rev=544254&r1=544253&r2=544254
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/search/TestSimpleExplanations.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/search/TestSimpleExplanations.java Mon Jun  4 13:41:06 2007
@@ -17,26 +17,6 @@
  * limitations under the License.
  */
 
-
-import org.apache.lucene.store.RAMDirectory;
-
-import org.apache.lucene.index.IndexWriter;
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.Term;
-
-import org.apache.lucene.analysis.WhitespaceAnalyzer;
-
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
-
-import org.apache.lucene.queryParser.QueryParser;
-import org.apache.lucene.queryParser.ParseException;
-
-import junit.framework.TestCase;
-
-import java.util.Random;
-import java.util.BitSet;
-
 /**
  * TestExplanations subclass focusing on basic query types
  */

Modified: lucene/java/trunk/src/test/org/apache/lucene/search/payloads/TestBoostingTermQuery.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/search/payloads/TestBoostingTermQuery.java?view=diff&rev=544254&r1=544253&r2=544254
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/search/payloads/TestBoostingTermQuery.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/search/payloads/TestBoostingTermQuery.java Mon Jun  4 13:41:06 2007
@@ -130,7 +130,7 @@
       ScoreDoc doc = hits.scoreDocs[i];
       assertTrue(doc.score + " does not equal: " + 1, doc.score == 1);
     }
-    CheckHits.checkExplanations(query, "field", searcher);
+    CheckHits.checkExplanations(query, "field", searcher, true);
     Spans spans = query.getSpans(searcher.getIndexReader());
     assertTrue("spans is null and it shouldn't be", spans != null);
     assertTrue("spans is not an instanceof " + TermSpans.class, spans instanceof TermSpans);
@@ -170,7 +170,7 @@
       }
     }
     assertTrue(numTens + " does not equal: " + 10, numTens == 10);
-    CheckHits.checkExplanations(query, "field", searcher);
+    CheckHits.checkExplanations(query, "field", searcher, true);
     Spans spans = query.getSpans(searcher.getIndexReader());
     assertTrue("spans is null and it shouldn't be", spans != null);
     assertTrue("spans is not an instanceof " + TermSpans.class, spans instanceof TermSpans);