You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2011/08/20 19:16:03 UTC
svn commit: r1159888 - in
/lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities:
BasicModelBE.java BasicModelD.java BasicModelP.java EasySimilarity.java
LMDirichletSimilarity.java LMSimilarity.java TestEasySimilarity.java
Author: rmuir
Date: Sat Aug 20 17:16:02 2011
New Revision: 1159888
URL: http://svn.apache.org/viewvc?rev=1159888&view=rev
Log:
LUCENE-3357: add unit and integration tests for new sims (and workarounds for formula bugs the tests found)
Modified:
lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/BasicModelBE.java
lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/BasicModelD.java
lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/BasicModelP.java
lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/EasySimilarity.java
lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/LMDirichletSimilarity.java
lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/LMSimilarity.java
lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/TestEasySimilarity.java
Modified: lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/BasicModelBE.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/BasicModelBE.java?rev=1159888&r1=1159887&r2=1159888&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/BasicModelBE.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/BasicModelBE.java Sat Aug 20 17:16:02 2011
@@ -33,7 +33,7 @@ public class BasicModelBE extends BasicM
// long F = stats.getTotalTermFreq() + 1;
long F = Math.max(stats.getTotalTermFreq(), (long)(tfn + 0.5) + 1);
return (float)(-log2((N - 1) * Math.E)
- + f(N + F -1, N + F - tfn - 2) - f(F, F - tfn));
+ + f(N + F - 1, N + F - tfn - 2) - f(F, F - tfn));
}
/** The <em>f</em> helper function defined for <em>B<sub>E</sub></em>. */
Modified: lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/BasicModelD.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/BasicModelD.java?rev=1159888&r1=1159887&r2=1159888&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/BasicModelD.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/BasicModelD.java Sat Aug 20 17:16:02 2011
@@ -36,7 +36,8 @@ public class BasicModelD extends BasicMo
double nphi = 1 - phi;
double p = 1.0 / (stats.getNumberOfDocuments() + 1);
double D = phi * log2(phi / p) + nphi * log2(nphi / (1 - p));
- return (float)(D * F + 0.5 * log2(2 * Math.PI * tfn * nphi));
+ // nocommit return (float)(D * F + 0.5 * log2(2 * Math.PI * tfn * nphi));
+ return (float)(D * F + 0.5 * log2(1 + 2 * Math.PI * tfn * nphi));
}
@Override
Modified: lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/BasicModelP.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/BasicModelP.java?rev=1159888&r1=1159887&r2=1159888&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/BasicModelP.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/BasicModelP.java Sat Aug 20 17:16:02 2011
@@ -24,12 +24,19 @@ import static org.apache.lucene.search.s
* @lucene.experimental
*/
public class BasicModelP extends BasicModel {
+ /** {@code log2(Math.E)}, precomputed. */
+ protected static double LOG2_E = log2(Math.E);
+
@Override
public final float score(EasyStats stats, float tfn) {
float lambda = (float)stats.getTotalTermFreq() / stats.getNumberOfDocuments();
- return (float)(tfn * log2(tfn / lambda)
- + (lambda + 1 / 12 / tfn - tfn) * log2(Math.E)
+// System.out.printf("tfn=%f, lambda=%f, log1=%f, log2=%f%n", tfn, lambda,
+// tfn / lambda, 2 * Math.PI * tfn);
+ // nocommit
+ float score = (float)(tfn * log2(tfn / lambda)
+ + (lambda + 1 / (12 * tfn) - tfn) * LOG2_E
+ 0.5 * log2(2 * Math.PI * tfn));
+ return score > 0.0f ? score : 0.0f;
}
@Override
Modified: lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/EasySimilarity.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/EasySimilarity.java?rev=1159888&r1=1159887&r2=1159888&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/EasySimilarity.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/EasySimilarity.java Sat Aug 20 17:16:02 2011
@@ -91,17 +91,15 @@ public abstract class EasySimilarity ext
totalTermFreq = Math.min(totalTermFreq, context.totalTermFreq());
}
- // nocommit: we have to provide something if codec doesnt supply these measures,
+ // We have to provide something if codec doesnt supply these measures,
// or if someone omitted frequencies for the field... negative values cause
// NaN/Inf for some scorers.
-
if (numberOfFieldTokens == -1) {
- numberOfFieldTokens = 1;
+ numberOfFieldTokens = docFreq;
avgFieldLength = 1;
}
-
if (totalTermFreq == -1) {
- totalTermFreq = 1;
+ totalTermFreq = docFreq;
}
stats.setNumberOfDocuments(numberOfDocuments);
@@ -248,14 +246,15 @@ public abstract class EasySimilarity ext
@Override
public float score(int doc, int freq) {
- // nocommit: we have to supply something in case norms are omitted
- return EasySimilarity.this.score(stats, freq, norms == null ? 1 : decodeNormValue(norms[doc]));
+ // We have to supply something in case norms are omitted
+ return EasySimilarity.this.score(stats, freq,
+ norms == null ? (int)(freq + 0.5) : decodeNormValue(norms[doc]));
}
@Override
public Explanation explain(int doc, Explanation freq) {
- return EasySimilarity.this.explain(
- stats, doc, freq, decodeNormValue(norms[doc]));
+ return EasySimilarity.this.explain(stats, doc, freq,
+ norms == null ? (int)(freq.getValue() + 0.5) : decodeNormValue(norms[doc]));
}
}
@@ -276,13 +275,14 @@ public abstract class EasySimilarity ext
@Override
public float score(int doc, float freq) {
- // nocommit: we have to supply something in case norms are omitted
- return EasySimilarity.this.score(stats, freq, norms == null ? 1 : decodeNormValue(norms[doc]));
+ // We have to supply something in case norms are omitted
+ return EasySimilarity.this.score(stats, freq,
+ norms == null ? (int)(freq + 0.5) : decodeNormValue(norms[doc]));
}
@Override
public Explanation explain(int doc, Explanation freq) {
- return EasySimilarity.this.explain(
- stats, doc, freq, decodeNormValue(norms[doc]));
+ return EasySimilarity.this.explain(stats, doc, freq,
+ norms == null ? (int)(freq.getValue() + 0.5) : decodeNormValue(norms[doc]));
}
@Override
Modified: lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/LMDirichletSimilarity.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/LMDirichletSimilarity.java?rev=1159888&r1=1159887&r2=1159888&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/LMDirichletSimilarity.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/LMDirichletSimilarity.java Sat Aug 20 17:16:02 2011
@@ -25,6 +25,12 @@ import org.apache.lucene.search.Explanat
* Ad Hoc information retrieval. In Proceedings of the 24th annual international
* ACM SIGIR conference on Research and development in information retrieval
* (SIGIR '01). ACM, New York, NY, USA, 334-342.
+ * <p>
+ * The formula as defined the paper assigns a negative score to documents that
+ * contain the term, but with fewer occurrences than predicted by the collection
+ * language model. The Lucene implementation returns {@code 0} for such
+ * documents.
+ * </p>
*
* @lucene.experimental
*/
@@ -55,10 +61,10 @@ public class LMDirichletSimilarity exten
@Override
protected float score(EasyStats stats, float freq, int docLen) {
- return stats.getTotalBoost() *
- (float)(Math.log(1 + freq /
- (mu * ((LMStats)stats).getCollectionProbability())) +
+ float score = stats.getTotalBoost() * (float)(Math.log(1 + freq /
+ (mu * ((LMStats)stats).getCollectionProbability())) +
Math.log(mu / (docLen + mu)));
+ return score > 0.0f ? score : 0.0f;
}
@Override
Modified: lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/LMSimilarity.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/LMSimilarity.java?rev=1159888&r1=1159887&r2=1159888&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/LMSimilarity.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/LMSimilarity.java Sat Aug 20 17:16:02 2011
@@ -136,12 +136,12 @@ public abstract class LMSimilarity exten
/**
* Models {@code p(w|C)} as the number of occurrences of the term in the
- * collection, divided by the total number of tokens.
+ * collection, divided by the total number of tokens {@code + 1}.
*/
public static class DefaultCollectionModel implements CollectionModel {
@Override
public float computeProbability(EasyStats stats) {
- return (float)stats.getTotalTermFreq() / stats.getNumberOfFieldTokens();
+ return (float)stats.getTotalTermFreq() / (stats.getNumberOfFieldTokens() +1);
}
@Override
Modified: lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/TestEasySimilarity.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/TestEasySimilarity.java?rev=1159888&r1=1159887&r2=1159888&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/TestEasySimilarity.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/TestEasySimilarity.java Sat Aug 20 17:16:02 2011
@@ -27,6 +27,8 @@ import org.apache.lucene.index.IndexRead
import org.apache.lucene.index.OrdTermState;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
+import org.apache.lucene.index.codecs.CodecProvider;
+import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.SimilarityProvider;
@@ -37,11 +39,13 @@ import org.apache.lucene.util.LuceneTest
import org.apache.lucene.util.TermContext;
/**
- * Tests the {@link EasySimilarity}-based Similarities. Contains unit tests and
- * integration tests as well. This class maintains a list of
+ * Tests the {@link EasySimilarity}-based Similarities. Contains unit tests and
+ * integration tests for all Similarities and correctness tests for a select
+ * few.
+ * <p>This class maintains a list of
* {@code EasySimilarity} subclasses. Each test case performs its test on all
* items in the list. If a test case fails, the name of the Similarity that
- * caused the failure is returned as part of the assertion error message.
+ * caused the failure is returned as part of the assertion error message.</p>
* <p>Unit testing is performed by constructing statistics manually and calling
* the {@link EasySimilarity#score(EasyStats, float, int)} method of the
* Similarities. The statistics represent corner cases of corpus distributions.
@@ -53,10 +57,18 @@ import org.apache.lucene.util.TermContex
* <p>Note: the list of Similarities is maintained by hand. If a new Similarity
* is added to the {@code org.apache.lucene.search.similarities} package, the
* list should be updated accordingly.</p>
+ * <p>
+ * In the correctness tests, the score is verified against the result of manual
+ * computation. Since it would be impossible to test all Similarities
+ * (e.g. all possible DFR combinations, all parameter values for LM), only
+ * the best performing setups in the original papers are verified.
+ * </p>
*/
public class TestEasySimilarity extends LuceneTestCase {
private static String FIELD_BODY = "body";
private static String FIELD_ID = "id";
+ /** The tolerance range for float equality. */
+ private static float FLOAT_EPSILON = 1e-5f;
/** The DFR basic models to test. */
private static BasicModel[] BASIC_MODELS;
/** The DFR aftereffects to test. */
@@ -183,23 +195,14 @@ public class TestEasySimilarity extends
for (EasySimilarity sim : sims) {
EasyStats realStats = sim.computeStats(new SpoofIndexSearcher(stats),
"spoof", stats.getTotalBoost(), tc);
-// System.out.printf("Before: %d %d %f %d %d%n",
-// realStats.getNumberOfDocuments(), realStats.getNumberOfFieldTokens(),
-// realStats.getAvgFieldLength(), realStats.getDocFreq(),
-// realStats.getTotalTermFreq());
-// realStats.setNumberOfDocuments(stats.getNumberOfDocuments());
-// realStats.setNumberOfFieldTokens(stats.getNumberOfFieldTokens());
-// realStats.setAvgFieldLength(stats.getAvgFieldLength());
-// realStats.setDocFreq(stats.getDocFreq());
-// realStats.setTotalTermFreq(stats.getTotalTermFreq());
-// System.out.printf("After: %d %d %f %d %d%n",
-// realStats.getNumberOfDocuments(), realStats.getNumberOfFieldTokens(),
-// realStats.getAvgFieldLength(), realStats.getDocFreq(),
-// realStats.getTotalTermFreq());
float score = sim.score(realStats, freq, docLen);
+ float explScore = sim.explain(
+ realStats, 1, new Explanation(freq, "freq"), docLen).getValue();
assertFalse("Score infinite: " + sim.toString(), Float.isInfinite(score));
assertFalse("Score NaN: " + sim.toString(), Float.isNaN(score));
assertTrue("Score negative: " + sim.toString(), score >= 0);
+ assertEquals("score() and explain() return different values: "
+ + sim.toString(), score, explScore, FLOAT_EPSILON);
}
}
@@ -217,7 +220,7 @@ public class TestEasySimilarity extends
stats.setNumberOfFieldTokens(stats.getNumberOfDocuments());
stats.setTotalTermFreq(stats.getDocFreq());
stats.setAvgFieldLength(
- stats.getNumberOfFieldTokens() / stats.getNumberOfDocuments());
+ (float)stats.getNumberOfFieldTokens() / stats.getNumberOfDocuments());
unitTestCore(stats, FREQ, DOC_LEN);
}
@@ -230,10 +233,10 @@ public class TestEasySimilarity extends
stats.setNumberOfFieldTokens(stats.getNumberOfDocuments() * 2 / 3);
stats.setTotalTermFreq(stats.getDocFreq());
stats.setAvgFieldLength(
- stats.getNumberOfFieldTokens() / stats.getNumberOfDocuments());
+ (float)stats.getNumberOfFieldTokens() / stats.getNumberOfDocuments());
unitTestCore(stats, FREQ, DOC_LEN);
}
-
+
/**
* Tests correct behavior when
* {@code NumberOfDocuments = 1}.
@@ -383,6 +386,155 @@ public class TestEasySimilarity extends
unitTestCore(stats, FREQ, (int)stats.getAvgFieldLength());
}
+ // ---------------------------- Correctness tests ----------------------------
+
+ /** Correctness test for the Dirichlet LM model. */
+ public void testLMDirichlet() throws IOException {
+ float p =
+ (FREQ + 2000.0f * TOTAL_TERM_FREQ / (NUMBER_OF_FIELD_TOKENS + 1.0f)) /
+ (DOC_LEN + 2000.0f);
+ float a = 2000.0f / (DOC_LEN + 2000.0f);
+ float gold = (float)(
+ Math.log(p / (a * TOTAL_TERM_FREQ / (NUMBER_OF_FIELD_TOKENS + 1.0f))) +
+ Math.log(a));
+ correctnessTestCore(new LMDirichletSimilarity(), gold);
+ }
+
+ /** Correctness test for the Jelinek-Mercer LM model. */
+ public void testLMJelinekMercer() throws IOException {
+ float p = (1 - 0.1f) * FREQ / DOC_LEN +
+ 0.1f * TOTAL_TERM_FREQ / (NUMBER_OF_FIELD_TOKENS + 1.0f);
+ float gold = (float)(Math.log(
+ p / (0.1f * TOTAL_TERM_FREQ / (NUMBER_OF_FIELD_TOKENS + 1.0f))));
+ correctnessTestCore(new LMJelinekMercerSimilarity(0.1f), gold);
+ }
+
+ /**
+ * Correctness test for the LL IB model with DF-based lambda and
+ * no normalization.
+ */
+ public void testLLForIB() throws IOException {
+ EasySimilarity sim = new IBSimilarity(new DistributionLL(), new LambdaDF());
+ correctnessTestCore(sim, 4.26267987704f);
+ }
+
+ /**
+ * Correctness test for the SPL IB model with TTF-based lambda and
+ * no normalization.
+ */
+ public void testSPLForIB() throws IOException {
+ EasySimilarity sim =
+ new IBSimilarity(new DistributionSPL(), new LambdaTTF());
+ correctnessTestCore(sim, 2.24069910825f);
+ }
+
+ /** Correctness test for the PL2 DFR model. */
+ public void testPL2() throws IOException {
+ EasySimilarity sim = new DFRSimilarity(
+ new BasicModelP(), new AfterEffectL(), new NormalizationH2());
+ float tfn = (float)(FREQ * EasySimilarity.log2(
+ 1 + AVG_FIELD_LENGTH / DOC_LEN)); // 8.1894750101
+ float l = 1.0f / (tfn + 1.0f); // 0.108820144666
+ float lambda = (1.0f * TOTAL_TERM_FREQ) / NUMBER_OF_DOCUMENTS; // 0.7
+ float p = (float)(tfn * EasySimilarity.log2(tfn / lambda) +
+ (lambda + 1 / (12 * tfn) - tfn) * EasySimilarity.log2(Math.E) +
+ 0.5 * EasySimilarity.log2(2 * Math.PI * tfn)); // 21.1113611585
+ float gold = l * p; // 2.29734137536
+ correctnessTestCore(sim, gold);
+ }
+
+ /** Correctness test for the IneB2 DFR model. */
+ public void testIneB2() throws IOException {
+ EasySimilarity sim = new DFRSimilarity(
+ new BasicModelIne(), new AfterEffectB(), new NormalizationH2());
+ correctnessTestCore(sim, 6.23455315685f);
+ }
+
+ /** Correctness test for the GL1 DFR model. */
+ public void testGL1() throws IOException {
+ EasySimilarity sim = new DFRSimilarity(
+ new BasicModelG(), new AfterEffectL(), new NormalizationH1());
+ correctnessTestCore(sim, 1.22733118352f);
+ }
+
+ /** Correctness test for the BEB1 DFR model. */
+ public void testBEB1() throws IOException {
+ EasySimilarity sim = new DFRSimilarity(
+ new BasicModelBE(), new AfterEffectB(), new NormalizationH1());
+ float tfn = FREQ * AVG_FIELD_LENGTH / DOC_LEN; // 8.75
+ float b = (TOTAL_TERM_FREQ + 1) / (DOC_FREQ * (tfn + 1)); // 0.728205128205
+ float n1 = NUMBER_OF_DOCUMENTS + 1 + TOTAL_TERM_FREQ - 1; // 170
+ float m1 = NUMBER_OF_DOCUMENTS + 1 + TOTAL_TERM_FREQ - tfn - 2; // 160.25
+ float n2 = TOTAL_TERM_FREQ; // 70
+ float m2 = TOTAL_TERM_FREQ - tfn; // 61.25
+ float be = (float)(-EasySimilarity.log2(NUMBER_OF_DOCUMENTS + 1 - 1) -
+ EasySimilarity.log2(Math.E) + // -8.08655123066
+ ((m1 + 0.5f) * EasySimilarity.log2(n1 / m1) +
+ (n1 - m1) * EasySimilarity.log2(n1)) - // 85.9391317425
+ ((m2 + 0.5f) * EasySimilarity.log2(n2 / m2) +
+ (n2 - m2) * EasySimilarity.log2(n2))); // 65.5270599612
+ // 12.3255205506
+ float gold = b * be; // 8.97550727277
+ correctnessTestCore(sim, gold);
+ }
+
+ /** Correctness test for the D DFR model (basic model only). */
+ public void testD() throws IOException {
+ EasySimilarity sim = new DFRSimilarity(new BasicModelD());
+ double p = 1.0 / (NUMBER_OF_DOCUMENTS + 1); // 0.009900990099
+ double phi = FREQ / TOTAL_TERM_FREQ; // 0.1
+ double D = phi * EasySimilarity.log2(phi / p) + // 0.209745318365
+ (1 - phi) * EasySimilarity.log2((1 - phi) / (1 - p));
+ float gold = (float)(TOTAL_TERM_FREQ * D + 0.5 * EasySimilarity.log2(
+ 1 + 2 * Math.PI * FREQ * (1 - phi))); // 17.3535930644
+ correctnessTestCore(sim, gold);
+ }
+
+ /** Correctness test for the In2 DFR model with no aftereffect. */
+ public void testIn2() throws IOException {
+ EasySimilarity sim = new DFRSimilarity(
+ new BasicModelIn(), new NormalizationH2());
+ float tfn = (float)(FREQ * EasySimilarity.log2( // 8.1894750101
+ 1 + AVG_FIELD_LENGTH / DOC_LEN));
+ float gold = (float)(tfn * EasySimilarity.log2( // 26.7459577898
+ (NUMBER_OF_DOCUMENTS + 1) / (DOC_FREQ + 0.5)));
+ correctnessTestCore(sim, gold);
+ }
+
+ /** Correctness test for the IFB DFR model with no normalization. */
+ public void testIFB() throws IOException {
+ EasySimilarity sim = new DFRSimilarity(
+ new BasicModelIF(), new AfterEffectB());
+ float B = (TOTAL_TERM_FREQ + 1) / (DOC_FREQ * (FREQ + 1)); // 0.8875
+ float IF = (float)(FREQ * EasySimilarity.log2( // 8.97759389642
+ 1 + (NUMBER_OF_DOCUMENTS + 1) / (TOTAL_TERM_FREQ + 0.5)));
+ float gold = B * IF; // 7.96761458307
+ correctnessTestCore(sim, gold);
+ }
+
+ /**
+ * The generic test core called by all correctness test methods. It calls the
+ * {@link EasySimilarity#score(EasyStats, float, int)} method of all
+ * Similarities in {@link #sims} and compares the score against the manually
+ * computed {@code gold}.
+ */
+ private void correctnessTestCore(EasySimilarity sim, float gold)
+ throws IOException {
+ // We have to fake everything, because computeStats() can be overridden and
+ // there is no way to inject false data after fillEasyStats().
+ EasyStats stats = createStats();
+ SpoofIndexSearcher searcher = new SpoofIndexSearcher(stats);
+ TermContext tc = new TermContext(
+ searcher.getIndexReader().getTopReaderContext(),
+ new OrdTermState(), 0, stats.getDocFreq(), stats.getTotalTermFreq());
+
+ EasyStats realStats = sim.computeStats(
+ searcher, "spoof", stats.getTotalBoost(), tc);
+ float score = sim.score(realStats, FREQ, DOC_LEN);
+ assertEquals(
+ sim.toString() + " score not correct.", gold, score, FLOAT_EPSILON);
+ }
+
// ---------------------------- Integration tests ----------------------------
/** The "collection" for the integration tests. */
@@ -413,6 +565,9 @@ public class TestEasySimilarity extends
/** Test whether all similarities return document 3 before documents 7 and 8. */
public void testHeartRanking() throws IOException {
+ assumeFalse("PreFlex codec does not support the stats necessary for this test!",
+ "PreFlex".equals(CodecProvider.getDefault().getDefaultFieldCodec()));
+
Query q = new TermQuery(new Term(FIELD_BODY, "heart"));
for (EasySimilarity sim : sims) {