You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2011/08/20 19:16:03 UTC

svn commit: r1159888 - in /lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities: BasicModelBE.java BasicModelD.java BasicModelP.java EasySimilarity.java LMDirichletSimilarity.java LMSimilarity.java TestEasySimilarity.java

Author: rmuir
Date: Sat Aug 20 17:16:02 2011
New Revision: 1159888

URL: http://svn.apache.org/viewvc?rev=1159888&view=rev
Log:
LUCENE-3357: add unit and integration tests for new sims (and workarounds for formula bugs the tests found)

Modified:
    lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/BasicModelBE.java
    lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/BasicModelD.java
    lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/BasicModelP.java
    lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/EasySimilarity.java
    lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/LMDirichletSimilarity.java
    lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/LMSimilarity.java
    lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/TestEasySimilarity.java

Modified: lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/BasicModelBE.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/BasicModelBE.java?rev=1159888&r1=1159887&r2=1159888&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/BasicModelBE.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/BasicModelBE.java Sat Aug 20 17:16:02 2011
@@ -33,7 +33,7 @@ public class BasicModelBE extends BasicM
 //    long F = stats.getTotalTermFreq() + 1;
     long F = Math.max(stats.getTotalTermFreq(), (long)(tfn + 0.5) + 1);
     return (float)(-log2((N - 1) * Math.E)
-        + f(N + F -1, N + F - tfn - 2) - f(F, F - tfn));
+        + f(N + F - 1, N + F - tfn - 2) - f(F, F - tfn));
   }
   
   /** The <em>f</em> helper function defined for <em>B<sub>E</sub></em>. */

Modified: lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/BasicModelD.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/BasicModelD.java?rev=1159888&r1=1159887&r2=1159888&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/BasicModelD.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/BasicModelD.java Sat Aug 20 17:16:02 2011
@@ -36,7 +36,8 @@ public class BasicModelD extends BasicMo
     double nphi = 1 - phi;
     double p = 1.0 / (stats.getNumberOfDocuments() + 1);
     double D = phi * log2(phi / p) + nphi * log2(nphi / (1 - p));
-    return (float)(D * F + 0.5 * log2(2 * Math.PI * tfn * nphi));
+    // nocommit return (float)(D * F + 0.5 * log2(2 * Math.PI * tfn * nphi));
+    return (float)(D * F + 0.5 * log2(1 + 2 * Math.PI * tfn * nphi));
   }
   
   @Override

Modified: lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/BasicModelP.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/BasicModelP.java?rev=1159888&r1=1159887&r2=1159888&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/BasicModelP.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/BasicModelP.java Sat Aug 20 17:16:02 2011
@@ -24,12 +24,19 @@ import static org.apache.lucene.search.s
  * @lucene.experimental
  */
 public class BasicModelP extends BasicModel {
+  /** {@code log2(Math.E)}, precomputed. */
+  protected static double LOG2_E = log2(Math.E);
+  
   @Override
   public final float score(EasyStats stats, float tfn) {
     float lambda = (float)stats.getTotalTermFreq() / stats.getNumberOfDocuments();
-    return (float)(tfn * log2(tfn / lambda)
-        + (lambda + 1 / 12 / tfn - tfn) * log2(Math.E)
+//    System.out.printf("tfn=%f, lambda=%f, log1=%f, log2=%f%n", tfn, lambda,
+//        tfn / lambda, 2 * Math.PI * tfn);
+    // nocommit
+    float score = (float)(tfn * log2(tfn / lambda)
+        + (lambda + 1 / (12 * tfn) - tfn) * LOG2_E
         + 0.5 * log2(2 * Math.PI * tfn));
+    return score > 0.0f ? score : 0.0f;
   }
 
   @Override

Modified: lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/EasySimilarity.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/EasySimilarity.java?rev=1159888&r1=1159887&r2=1159888&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/EasySimilarity.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/EasySimilarity.java Sat Aug 20 17:16:02 2011
@@ -91,17 +91,15 @@ public abstract class EasySimilarity ext
       totalTermFreq = Math.min(totalTermFreq, context.totalTermFreq());
     }
     
-    // nocommit: we have to provide something if codec doesnt supply these measures,
+    // We have to provide something if codec doesnt supply these measures,
     // or if someone omitted frequencies for the field... negative values cause
     // NaN/Inf for some scorers.
-    
     if (numberOfFieldTokens == -1) {
-      numberOfFieldTokens = 1;
+      numberOfFieldTokens = docFreq;
       avgFieldLength = 1;
     }
-    
     if (totalTermFreq == -1) {
-      totalTermFreq = 1;
+      totalTermFreq = docFreq;
     }
     
     stats.setNumberOfDocuments(numberOfDocuments);
@@ -248,14 +246,15 @@ public abstract class EasySimilarity ext
     
     @Override
     public float score(int doc, int freq) {
-      // nocommit: we have to supply something in case norms are omitted
-      return EasySimilarity.this.score(stats, freq, norms == null ? 1 : decodeNormValue(norms[doc]));
+      // We have to supply something in case norms are omitted
+      return EasySimilarity.this.score(stats, freq,
+          norms == null ? (int)(freq + 0.5) : decodeNormValue(norms[doc]));
     }
     
     @Override
     public Explanation explain(int doc, Explanation freq) {
-      return EasySimilarity.this.explain(
-          stats, doc, freq, decodeNormValue(norms[doc]));
+      return EasySimilarity.this.explain(stats, doc, freq,
+          norms == null ? (int)(freq.getValue() + 0.5) : decodeNormValue(norms[doc]));
     }
   }
   
@@ -276,13 +275,14 @@ public abstract class EasySimilarity ext
     
     @Override
     public float score(int doc, float freq) {
-      // nocommit: we have to supply something in case norms are omitted
-      return EasySimilarity.this.score(stats, freq, norms == null ? 1 : decodeNormValue(norms[doc]));
+      // We have to supply something in case norms are omitted
+      return EasySimilarity.this.score(stats, freq,
+          norms == null ? (int)(freq + 0.5) : decodeNormValue(norms[doc]));
     }
     @Override
     public Explanation explain(int doc, Explanation freq) {
-      return EasySimilarity.this.explain(
-          stats, doc, freq, decodeNormValue(norms[doc]));
+      return EasySimilarity.this.explain(stats, doc, freq,
+          norms == null ? (int)(freq.getValue() + 0.5) : decodeNormValue(norms[doc]));
     }
 
     @Override

Modified: lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/LMDirichletSimilarity.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/LMDirichletSimilarity.java?rev=1159888&r1=1159887&r2=1159888&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/LMDirichletSimilarity.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/LMDirichletSimilarity.java Sat Aug 20 17:16:02 2011
@@ -25,6 +25,12 @@ import org.apache.lucene.search.Explanat
  * Ad Hoc information retrieval. In Proceedings of the 24th annual international
  * ACM SIGIR conference on Research and development in information retrieval
  * (SIGIR '01). ACM, New York, NY, USA, 334-342.
+ * <p>
+ * The formula as defined the paper assigns a negative score to documents that
+ * contain the term, but with fewer occurrences than predicted by the collection
+ * language model. The Lucene implementation returns {@code 0} for such
+ * documents.
+ * </p>
  * 
  * @lucene.experimental
  */
@@ -55,10 +61,10 @@ public class LMDirichletSimilarity exten
   
   @Override
   protected float score(EasyStats stats, float freq, int docLen) {
-    return stats.getTotalBoost() *
-        (float)(Math.log(1 + freq /
-            (mu * ((LMStats)stats).getCollectionProbability())) +
+    float score = stats.getTotalBoost() * (float)(Math.log(1 + freq /
+        (mu * ((LMStats)stats).getCollectionProbability())) +
         Math.log(mu / (docLen + mu)));
+    return score > 0.0f ? score : 0.0f;
   }
   
   @Override

Modified: lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/LMSimilarity.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/LMSimilarity.java?rev=1159888&r1=1159887&r2=1159888&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/LMSimilarity.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/LMSimilarity.java Sat Aug 20 17:16:02 2011
@@ -136,12 +136,12 @@ public abstract class LMSimilarity exten
   
   /**
    * Models {@code p(w|C)} as the number of occurrences of the term in the
-   * collection, divided by the total number of tokens.
+   * collection, divided by the total number of tokens {@code + 1}.
    */
   public static class DefaultCollectionModel implements CollectionModel {
     @Override
     public float computeProbability(EasyStats stats) {
-      return (float)stats.getTotalTermFreq() / stats.getNumberOfFieldTokens();
+      return (float)stats.getTotalTermFreq() / (stats.getNumberOfFieldTokens() +1);
     }
     
     @Override

Modified: lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/TestEasySimilarity.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/TestEasySimilarity.java?rev=1159888&r1=1159887&r2=1159888&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/TestEasySimilarity.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/TestEasySimilarity.java Sat Aug 20 17:16:02 2011
@@ -27,6 +27,8 @@ import org.apache.lucene.index.IndexRead
 import org.apache.lucene.index.OrdTermState;
 import org.apache.lucene.index.RandomIndexWriter;
 import org.apache.lucene.index.Term;
+import org.apache.lucene.index.codecs.CodecProvider;
+import org.apache.lucene.search.Explanation;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.SimilarityProvider;
@@ -37,11 +39,13 @@ import org.apache.lucene.util.LuceneTest
 import org.apache.lucene.util.TermContext;
 
 /**
- * Tests the {@link EasySimilarity}-based Similarities. Contains unit tests and
- * integration tests as well. This class maintains a list of
+ * Tests the {@link EasySimilarity}-based Similarities. Contains unit tests and 
+ * integration tests for all Similarities and correctness tests for a select
+ * few.
+ * <p>This class maintains a list of
  * {@code EasySimilarity} subclasses. Each test case performs its test on all
  * items in the list. If a test case fails, the name of the Similarity that
- * caused the failure is returned as part of the assertion error message.
+ * caused the failure is returned as part of the assertion error message.</p>
  * <p>Unit testing is performed by constructing statistics manually and calling
  * the {@link EasySimilarity#score(EasyStats, float, int)} method of the
  * Similarities. The statistics represent corner cases of corpus distributions.
@@ -53,10 +57,18 @@ import org.apache.lucene.util.TermContex
  * <p>Note: the list of Similarities is maintained by hand. If a new Similarity
  * is added to the {@code org.apache.lucene.search.similarities} package, the
  * list should be updated accordingly.</p>
+ * <p>
+ * In the correctness tests, the score is verified against the result of manual
+ * computation. Since it would be impossible to test all Similarities
+ * (e.g. all possible DFR combinations, all parameter values for LM), only 
+ * the best performing setups in the original papers are verified.
+ * </p>
  */
 public class TestEasySimilarity extends LuceneTestCase {
   private static String FIELD_BODY = "body";
   private static String FIELD_ID = "id";
+  /** The tolerance range for float equality. */
+  private static float FLOAT_EPSILON = 1e-5f;
   /** The DFR basic models to test. */
   private static BasicModel[] BASIC_MODELS;
   /** The DFR aftereffects to test. */
@@ -183,23 +195,14 @@ public class TestEasySimilarity extends 
     for (EasySimilarity sim : sims) {
       EasyStats realStats = sim.computeStats(new SpoofIndexSearcher(stats),
           "spoof", stats.getTotalBoost(), tc);
-//      System.out.printf("Before: %d %d %f %d %d%n",
-//          realStats.getNumberOfDocuments(), realStats.getNumberOfFieldTokens(),
-//          realStats.getAvgFieldLength(), realStats.getDocFreq(),
-//          realStats.getTotalTermFreq());
-//      realStats.setNumberOfDocuments(stats.getNumberOfDocuments());
-//      realStats.setNumberOfFieldTokens(stats.getNumberOfFieldTokens());
-//      realStats.setAvgFieldLength(stats.getAvgFieldLength());
-//      realStats.setDocFreq(stats.getDocFreq());
-//      realStats.setTotalTermFreq(stats.getTotalTermFreq());
-//      System.out.printf("After: %d %d %f %d %d%n",
-//          realStats.getNumberOfDocuments(), realStats.getNumberOfFieldTokens(),
-//          realStats.getAvgFieldLength(), realStats.getDocFreq(),
-//          realStats.getTotalTermFreq());
       float score = sim.score(realStats, freq, docLen);
+      float explScore = sim.explain(
+          realStats, 1, new Explanation(freq, "freq"), docLen).getValue();
       assertFalse("Score infinite: " + sim.toString(), Float.isInfinite(score));
       assertFalse("Score NaN: " + sim.toString(), Float.isNaN(score));
       assertTrue("Score negative: " + sim.toString(), score >= 0);
+      assertEquals("score() and explain() return different values: "
+          + sim.toString(), score, explScore, FLOAT_EPSILON);
     }
   }
   
@@ -217,7 +220,7 @@ public class TestEasySimilarity extends 
     stats.setNumberOfFieldTokens(stats.getNumberOfDocuments());
     stats.setTotalTermFreq(stats.getDocFreq());
     stats.setAvgFieldLength(
-        stats.getNumberOfFieldTokens() / stats.getNumberOfDocuments());
+        (float)stats.getNumberOfFieldTokens() / stats.getNumberOfDocuments());
     unitTestCore(stats, FREQ, DOC_LEN);
   }
 
@@ -230,10 +233,10 @@ public class TestEasySimilarity extends 
     stats.setNumberOfFieldTokens(stats.getNumberOfDocuments() * 2 / 3);
     stats.setTotalTermFreq(stats.getDocFreq());
     stats.setAvgFieldLength(
-        stats.getNumberOfFieldTokens() / stats.getNumberOfDocuments());
+        (float)stats.getNumberOfFieldTokens() / stats.getNumberOfDocuments());
     unitTestCore(stats, FREQ, DOC_LEN);
   }
-
+  
   /**
    * Tests correct behavior when
    * {@code NumberOfDocuments = 1}.
@@ -383,6 +386,155 @@ public class TestEasySimilarity extends 
     unitTestCore(stats, FREQ, (int)stats.getAvgFieldLength());
   }
   
+  // ---------------------------- Correctness tests ----------------------------
+  
+  /** Correctness test for the Dirichlet LM model. */
+  public void testLMDirichlet() throws IOException {
+    float p =
+        (FREQ + 2000.0f * TOTAL_TERM_FREQ / (NUMBER_OF_FIELD_TOKENS + 1.0f)) /
+        (DOC_LEN + 2000.0f);
+    float a = 2000.0f / (DOC_LEN + 2000.0f);
+    float gold = (float)(
+        Math.log(p / (a * TOTAL_TERM_FREQ / (NUMBER_OF_FIELD_TOKENS + 1.0f))) +
+        Math.log(a));
+    correctnessTestCore(new LMDirichletSimilarity(), gold);
+  }
+  
+  /** Correctness test for the Jelinek-Mercer LM model. */
+  public void testLMJelinekMercer() throws IOException {
+    float p = (1 - 0.1f) * FREQ / DOC_LEN +
+              0.1f * TOTAL_TERM_FREQ / (NUMBER_OF_FIELD_TOKENS + 1.0f);
+    float gold = (float)(Math.log(
+        p / (0.1f * TOTAL_TERM_FREQ / (NUMBER_OF_FIELD_TOKENS + 1.0f))));
+    correctnessTestCore(new LMJelinekMercerSimilarity(0.1f), gold);
+  }
+  
+  /**
+   * Correctness test for the LL IB model with DF-based lambda and
+   * no normalization.
+   */
+  public void testLLForIB() throws IOException {
+    EasySimilarity sim = new IBSimilarity(new DistributionLL(), new LambdaDF());
+    correctnessTestCore(sim, 4.26267987704f);
+  }
+  
+  /**
+   * Correctness test for the SPL IB model with TTF-based lambda and
+   * no normalization.
+   */
+  public void testSPLForIB() throws IOException {
+    EasySimilarity sim =
+      new IBSimilarity(new DistributionSPL(), new LambdaTTF());
+    correctnessTestCore(sim, 2.24069910825f);
+  }
+  
+  /** Correctness test for the PL2 DFR model. */
+  public void testPL2() throws IOException {
+    EasySimilarity sim = new DFRSimilarity(
+        new BasicModelP(), new AfterEffectL(), new NormalizationH2());
+    float tfn = (float)(FREQ * EasySimilarity.log2(
+        1 + AVG_FIELD_LENGTH / DOC_LEN));  // 8.1894750101
+    float l = 1.0f / (tfn + 1.0f);         // 0.108820144666
+    float lambda = (1.0f * TOTAL_TERM_FREQ) / NUMBER_OF_DOCUMENTS;  // 0.7
+    float p = (float)(tfn * EasySimilarity.log2(tfn / lambda) +
+              (lambda + 1 / (12 * tfn) - tfn) * EasySimilarity.log2(Math.E) +
+              0.5 * EasySimilarity.log2(2 * Math.PI * tfn)); // 21.1113611585
+    float gold = l * p;                    // 2.29734137536
+    correctnessTestCore(sim, gold);
+  }
+
+  /** Correctness test for the IneB2 DFR model. */
+  public void testIneB2() throws IOException {
+    EasySimilarity sim = new DFRSimilarity(
+        new BasicModelIne(), new AfterEffectB(), new NormalizationH2());
+    correctnessTestCore(sim, 6.23455315685f);
+  }
+  
+  /** Correctness test for the GL1 DFR model. */
+  public void testGL1() throws IOException {
+    EasySimilarity sim = new DFRSimilarity(
+        new BasicModelG(), new AfterEffectL(), new NormalizationH1());
+    correctnessTestCore(sim, 1.22733118352f);
+  }
+  
+  /** Correctness test for the BEB1 DFR model. */
+  public void testBEB1() throws IOException {
+    EasySimilarity sim = new DFRSimilarity(
+        new BasicModelBE(), new AfterEffectB(), new NormalizationH1());
+    float tfn = FREQ * AVG_FIELD_LENGTH / DOC_LEN;  // 8.75
+    float b = (TOTAL_TERM_FREQ + 1) / (DOC_FREQ * (tfn + 1));  // 0.728205128205
+    float n1 = NUMBER_OF_DOCUMENTS + 1 + TOTAL_TERM_FREQ - 1;        // 170
+    float m1 = NUMBER_OF_DOCUMENTS + 1 + TOTAL_TERM_FREQ - tfn - 2;  // 160.25
+    float n2 = TOTAL_TERM_FREQ;                                      // 70
+    float m2 = TOTAL_TERM_FREQ - tfn;                                // 61.25
+    float be = (float)(-EasySimilarity.log2(NUMBER_OF_DOCUMENTS + 1 - 1) -
+               EasySimilarity.log2(Math.E) +                   // -8.08655123066
+               ((m1 + 0.5f) * EasySimilarity.log2(n1 / m1) +
+                (n1 - m1) * EasySimilarity.log2(n1)) -         // 85.9391317425
+               ((m2 + 0.5f) * EasySimilarity.log2(n2 / m2) +
+                (n2 - m2) * EasySimilarity.log2(n2)));         // 65.5270599612
+               // 12.3255205506
+    float gold = b * be;                                       // 8.97550727277
+    correctnessTestCore(sim, gold);
+  }
+
+  /** Correctness test for the D DFR model (basic model only). */
+  public void testD() throws IOException {
+    EasySimilarity sim = new DFRSimilarity(new BasicModelD());
+    double p = 1.0 / (NUMBER_OF_DOCUMENTS + 1);                // 0.009900990099
+    double phi = FREQ / TOTAL_TERM_FREQ;                       // 0.1
+    double D = phi * EasySimilarity.log2(phi / p) +            // 0.209745318365
+              (1 - phi) * EasySimilarity.log2((1 - phi) / (1 - p));
+    float gold = (float)(TOTAL_TERM_FREQ * D + 0.5 * EasySimilarity.log2(
+                 1 + 2 * Math.PI * FREQ * (1 - phi)));         // 17.3535930644
+    correctnessTestCore(sim, gold);
+  }
+  
+  /** Correctness test for the In2 DFR model with no aftereffect. */
+  public void testIn2() throws IOException {
+    EasySimilarity sim = new DFRSimilarity(
+        new BasicModelIn(), new NormalizationH2());
+    float tfn = (float)(FREQ * EasySimilarity.log2(            // 8.1894750101
+                1 + AVG_FIELD_LENGTH / DOC_LEN));
+    float gold = (float)(tfn * EasySimilarity.log2(            // 26.7459577898
+                 (NUMBER_OF_DOCUMENTS + 1) / (DOC_FREQ + 0.5)));
+    correctnessTestCore(sim, gold);
+  }
+  
+  /** Correctness test for the IFB DFR model with no normalization. */
+  public void testIFB() throws IOException {
+    EasySimilarity sim = new DFRSimilarity(
+        new BasicModelIF(), new AfterEffectB());
+    float B = (TOTAL_TERM_FREQ + 1) / (DOC_FREQ * (FREQ + 1)); // 0.8875
+    float IF = (float)(FREQ * EasySimilarity.log2(             // 8.97759389642
+               1 + (NUMBER_OF_DOCUMENTS + 1) / (TOTAL_TERM_FREQ + 0.5)));
+    float gold = B * IF;                                       // 7.96761458307
+    correctnessTestCore(sim, gold);
+  }
+  
+  /**
+   * The generic test core called by all correctness test methods. It calls the
+   * {@link EasySimilarity#score(EasyStats, float, int)} method of all
+   * Similarities in {@link #sims} and compares the score against the manually
+   * computed {@code gold}.
+   */
+  private void correctnessTestCore(EasySimilarity sim, float gold)
+      throws IOException {
+    // We have to fake everything, because computeStats() can be overridden and
+    // there is no way to inject false data after fillEasyStats().
+    EasyStats stats = createStats();
+    SpoofIndexSearcher searcher = new SpoofIndexSearcher(stats);
+    TermContext tc = new TermContext(
+        searcher.getIndexReader().getTopReaderContext(),
+        new OrdTermState(), 0, stats.getDocFreq(), stats.getTotalTermFreq());
+    
+    EasyStats realStats = sim.computeStats(
+        searcher, "spoof", stats.getTotalBoost(), tc);
+    float score = sim.score(realStats, FREQ, DOC_LEN);
+    assertEquals(
+        sim.toString() + " score not correct.", gold, score, FLOAT_EPSILON);
+  }
+  
   // ---------------------------- Integration tests ----------------------------
 
   /** The "collection" for the integration tests. */
@@ -413,6 +565,9 @@ public class TestEasySimilarity extends 
   
   /** Test whether all similarities return document 3 before documents 7 and 8. */
   public void testHeartRanking() throws IOException {
+    assumeFalse("PreFlex codec does not support the stats necessary for this test!", 
+        "PreFlex".equals(CodecProvider.getDefault().getDefaultFieldCodec()));
+
     Query q = new TermQuery(new Term(FIELD_BODY, "heart"));
     
     for (EasySimilarity sim : sims) {