You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2011/08/11 03:23:09 UTC
svn commit: r1156429 -
/lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/
Author: rmuir
Date: Thu Aug 11 01:23:08 2011
New Revision: 1156429
URL: http://svn.apache.org/viewvc?rev=1156429&view=rev
Log:
LUCENE-3357: fix NaN/Inf issues in some DFR/IB models, fix IB TTF/DF confusion, add tests (some fail at the moment due to negative scores)
Modified:
lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/AfterEffect.java
lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/AfterEffectB.java
lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/AfterEffectL.java
lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/BasicModel.java
lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/BasicModelBE.java
lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/BasicModelD.java
lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/BasicModelG.java
lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/BasicModelIF.java
lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/BasicModelIn.java
lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/BasicModelIne.java
lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/BasicModelP.java
lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/DFRSimilarity.java
lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/Distribution.java
lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/DistributionLL.java
lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/DistributionSPL.java
lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/EasySimilarity.java
lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/EasyStats.java
lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/IBSimilarity.java
lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/LMDirichletSimilarity.java
lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/LMJelinekMercerSimilarity.java
lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/LMSimilarity.java
lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/Lambda.java
lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/LambdaDF.java
lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/LambdaTTF.java
lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/Normalization.java
lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/NormalizationH1.java
lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/NormalizationH2.java
Modified: lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/AfterEffect.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/AfterEffect.java?rev=1156429&r1=1156428&r2=1156429&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/AfterEffect.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/AfterEffect.java Thu Aug 11 01:23:08 2011
@@ -47,5 +47,17 @@ public abstract class AfterEffect {
public final Explanation explain(EasyStats stats, float tfn) {
return new Explanation(1, "no aftereffect");
}
+
+ @Override
+ public String toString() {
+ return "";
+ }
}
+
+ /**
+ * Subclasses must override this method to return the code of the
+ * after effect formula. Refer to the original paper for the list.
+ */
+ @Override
+ public abstract String toString();
}
Modified: lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/AfterEffectB.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/AfterEffectB.java?rev=1156429&r1=1156428&r2=1156429&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/AfterEffectB.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/AfterEffectB.java Thu Aug 11 01:23:08 2011
@@ -41,4 +41,9 @@ public class AfterEffectB extends AfterE
result.addDetail(new Explanation(stats.getDocFreq(), "docFreq"));
return result;
}
+
+ @Override
+ public String toString() {
+ return "B";
+ }
}
Modified: lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/AfterEffectL.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/AfterEffectL.java?rev=1156429&r1=1156428&r2=1156429&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/AfterEffectL.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/AfterEffectL.java Thu Aug 11 01:23:08 2011
@@ -37,4 +37,9 @@ public class AfterEffectL extends AfterE
result.addDetail(new Explanation(tfn, "tfn"));
return result;
}
+
+ @Override
+ public String toString() {
+ return "L";
+ }
}
Modified: lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/BasicModel.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/BasicModel.java?rev=1156429&r1=1156428&r2=1156429&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/BasicModel.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/BasicModel.java Thu Aug 11 01:23:08 2011
@@ -50,4 +50,11 @@ public abstract class BasicModel {
new Explanation(stats.getTotalTermFreq(), "totalTermFreq"));
return result;
}
+
+ /**
+ * Subclasses must override this method to return the code of the
+ * basic model formula. Refer to the original paper for the list.
+ */
+ @Override
+ public abstract String toString();
}
Modified: lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/BasicModelBE.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/BasicModelBE.java?rev=1156429&r1=1156428&r2=1156429&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/BasicModelBE.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/BasicModelBE.java Thu Aug 11 01:23:08 2011
@@ -20,14 +20,18 @@ package org.apache.lucene.search.similar
import static org.apache.lucene.search.similarities.EasySimilarity.log2;
/**
- * Limiting form of the Bose-Einstein model.
+ * Limiting form of the Bose-Einstein model. The formula used in Lucene differs
+ * slightly from the one in the original paper: to avoid underflow for small
+ * values of {@code N} and {@code F}, {@code N} is increased by {@code 1} and
+ * {@code F} is ensured to be at least {@code tfn + 1}.
* @lucene.experimental
*/
public class BasicModelBE extends BasicModel {
@Override
public final float score(EasyStats stats, float tfn) {
- long N = stats.getNumberOfDocuments();
- long F = stats.getTotalTermFreq();
+ long N = stats.getNumberOfDocuments() + 1;
+// long F = stats.getTotalTermFreq() + 1;
+ long F = Math.max(stats.getTotalTermFreq(), (long)(tfn + 0.5) + 1);
return (float)(-log2((N - 1) * Math.E)
+ f(N + F -1, N + F - tfn - 2) - f(F, F - tfn));
}
@@ -36,4 +40,9 @@ public class BasicModelBE extends BasicM
private final double f(long n, float m) {
return (m + 0.5) * log2((double)n / m) + (n - m) * log2(n);
}
+
+ @Override
+ public String toString() {
+ return "Be";
+ }
}
Modified: lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/BasicModelD.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/BasicModelD.java?rev=1156429&r1=1156428&r2=1156429&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/BasicModelD.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/BasicModelD.java Thu Aug 11 01:23:08 2011
@@ -21,17 +21,26 @@ import static org.apache.lucene.search.s
/**
* Implements the approximation of the binomial model with the divergence
- * for DFR.
+ * for DFR. The formula used in Lucene differs slightly from the one in the
+ * original paper: to avoid underflow for small values of {@code N} and
+ * {@code F}, {@code N} is increased by {@code 1} and
+ * {@code F} is ensured to be at least {@code tfn + 1}.
* @lucene.experimental
*/
public class BasicModelD extends BasicModel {
@Override
public final float score(EasyStats stats, float tfn) {
- long F = stats.getTotalTermFreq();
+ long F = Math.max(stats.getTotalTermFreq(), (long)(tfn + 0.5) + 1);
+// long F = stats.getTotalTermFreq() + 1;
double phi = (double)tfn / F;
double nphi = 1 - phi;
- double p = 1.0 / stats.getNumberOfDocuments();
+ double p = 1.0 / (stats.getNumberOfDocuments() + 1);
double D = phi * log2(phi / p) + nphi * log2(nphi / (1 - p));
return (float)(D * F + 0.5 * log2(2 * Math.PI * tfn * nphi));
}
+
+ @Override
+ public String toString() {
+ return "D";
+ }
}
Modified: lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/BasicModelG.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/BasicModelG.java?rev=1156429&r1=1156428&r2=1156429&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/BasicModelG.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/BasicModelG.java Thu Aug 11 01:23:08 2011
@@ -30,4 +30,9 @@ public class BasicModelG extends BasicMo
// -log(1 / (lambda + 1)) -> log(lambda + 1)
return (float)(log2(lambda + 1) + tfn * log2((1 + lambda) / lambda));
}
+
+ @Override
+ public String toString() {
+ return "G";
+ }
}
Modified: lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/BasicModelIF.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/BasicModelIF.java?rev=1156429&r1=1156428&r2=1156429&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/BasicModelIF.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/BasicModelIF.java Thu Aug 11 01:23:08 2011
@@ -30,4 +30,9 @@ public class BasicModelIF extends BasicM
long F = stats.getTotalTermFreq();
return tfn * (float)(log2((N + 1) / (F + 0.5)));
}
+
+ @Override
+ public String toString() {
+ return "I(F)";
+ }
}
Modified: lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/BasicModelIn.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/BasicModelIn.java?rev=1156429&r1=1156428&r2=1156429&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/BasicModelIn.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/BasicModelIn.java Thu Aug 11 01:23:08 2011
@@ -44,4 +44,9 @@ public class BasicModelIn extends BasicM
new Explanation(stats.getDocFreq(), "docFreq"));
return result;
}
+
+ @Override
+ public String toString() {
+ return "I(n)";
+ }
}
Modified: lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/BasicModelIne.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/BasicModelIne.java?rev=1156429&r1=1156428&r2=1156429&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/BasicModelIne.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/BasicModelIne.java Thu Aug 11 01:23:08 2011
@@ -32,4 +32,9 @@ public class BasicModelIne extends Basic
double ne = N * (1 - Math.pow((N - 1) / (double)N, F));
return tfn * (float)(log2((N + 1) / (ne + 0.5)));
}
+
+ @Override
+ public String toString() {
+ return "I(ne)";
+ }
}
Modified: lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/BasicModelP.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/BasicModelP.java?rev=1156429&r1=1156428&r2=1156429&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/BasicModelP.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/BasicModelP.java Thu Aug 11 01:23:08 2011
@@ -31,4 +31,9 @@ public class BasicModelP extends BasicMo
+ (lambda + 1 / 12 / tfn - tfn) * log2(Math.E)
+ 0.5 * log2(2 * Math.PI * tfn));
}
+
+ @Override
+ public String toString() {
+ return "P";
+ }
}
Modified: lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/DFRSimilarity.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/DFRSimilarity.java?rev=1156429&r1=1156428&r2=1156429&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/DFRSimilarity.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/DFRSimilarity.java Thu Aug 11 01:23:08 2011
@@ -97,4 +97,10 @@ public class DFRSimilarity extends EasyS
expl.addDetail(basicModel.explain(stats, tfn));
expl.addDetail(afterEffect.explain(stats, tfn));
}
+
+ @Override
+ public String toString() {
+ return "DFR " + basicModel.toString() + afterEffect.toString()
+ + normalization.toString();
+ }
}
Modified: lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/Distribution.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/Distribution.java?rev=1156429&r1=1156428&r2=1156429&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/Distribution.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/Distribution.java Thu Aug 11 01:23:08 2011
@@ -35,4 +35,11 @@ public abstract class Distribution {
return new Explanation(
score(stats, tfn, lambda), getClass().getSimpleName());
}
+
+ /**
+ * Subclasses must override this method to return the name of the
+ * distribution.
+ */
+ @Override
+ public abstract String toString();
}
Modified: lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/DistributionLL.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/DistributionLL.java?rev=1156429&r1=1156428&r2=1156429&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/DistributionLL.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/DistributionLL.java Thu Aug 11 01:23:08 2011
@@ -29,4 +29,9 @@ public class DistributionLL extends Dist
public final float score(EasyStats stats, float tfn, float lambda) {
return (float)-Math.log(lambda / (tfn + lambda));
}
+
+ @Override
+ public String toString() {
+ return "LL";
+ }
}
Modified: lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/DistributionSPL.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/DistributionSPL.java?rev=1156429&r1=1156428&r2=1156429&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/DistributionSPL.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/DistributionSPL.java Thu Aug 11 01:23:08 2011
@@ -28,7 +28,15 @@ package org.apache.lucene.search.similar
public class DistributionSPL extends Distribution {
@Override
public final float score(EasyStats stats, float tfn, float lambda) {
+ if (lambda == 1f) {
+ lambda = 0.99f;
+ }
return (float)-Math.log(
(Math.pow(lambda, (tfn / (tfn + 1))) - lambda) / (1 - lambda));
}
+
+ @Override
+ public String toString() {
+ return "SPL";
+ }
}
Modified: lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/EasySimilarity.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/EasySimilarity.java?rev=1156429&r1=1156428&r2=1156429&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/EasySimilarity.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/EasySimilarity.java Thu Aug 11 01:23:08 2011
@@ -165,6 +165,13 @@ public abstract class EasySimilarity ext
return new EasySloppyDocScorer((EasyStats) stats,
context.reader.norms(fieldName));
}
+
+ /**
+ * Subclasses must override this method to return the name of the Similarity
+ * and preferably the values of parameters (if any) as well.
+ */
+ @Override
+ public abstract String toString(); // nocommit: to Similarity?
// ------------------------------ Norm handling ------------------------------
Modified: lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/EasyStats.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/EasyStats.java?rev=1156429&r1=1156428&r2=1156429&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/EasyStats.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/EasyStats.java Thu Aug 11 01:23:08 2011
@@ -51,6 +51,7 @@ public class EasyStats extends Similarit
/** Constructor. Sets the query boost. */
public EasyStats(float queryBoost) {
this.queryBoost = queryBoost;
+ this.totalBoost = queryBoost;
}
// ------------------------- Getter/setter methods -------------------------
Modified: lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/IBSimilarity.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/IBSimilarity.java?rev=1156429&r1=1156428&r2=1156429&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/IBSimilarity.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/IBSimilarity.java Thu Aug 11 01:23:08 2011
@@ -84,4 +84,16 @@ public class IBSimilarity extends EasySi
expl.addDetail(distribution.explain(
stats, normExpl.getValue(), lambdaExpl.getValue()));
}
+
+ /**
+ * The name of IB methods follow the pattern
+ * {@code IB <distribution> <lambda><normalization>}. The name of the
+ * distribution is the same as in the original paper; for the names of lambda
+ * parameters, refer to the javadoc of the {@link Lambda} classes.
+ */
+ @Override
+ public String toString() {
+ return "IB " + distribution.toString() + "-" + lambda.toString()
+ + normalization.toString();
+ }
}
Modified: lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/LMDirichletSimilarity.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/LMDirichletSimilarity.java?rev=1156429&r1=1156428&r2=1156429&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/LMDirichletSimilarity.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/LMDirichletSimilarity.java Thu Aug 11 01:23:08 2011
@@ -83,4 +83,9 @@ public class LMDirichletSimilarity exten
public float getMu() {
return mu;
}
+
+ @Override
+ public String getName() {
+ return String.format("Dirichlet(%f)", getMu());
+ }
}
Modified: lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/LMJelinekMercerSimilarity.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/LMJelinekMercerSimilarity.java?rev=1156429&r1=1156428&r2=1156429&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/LMJelinekMercerSimilarity.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/LMJelinekMercerSimilarity.java Thu Aug 11 01:23:08 2011
@@ -70,4 +70,9 @@ public class LMJelinekMercerSimilarity e
public float getLambda() {
return lambda;
}
+
+ @Override
+ public String getName() {
+ return String.format("Jelinek-Mercer(%f)", getLambda());
+ }
}
Modified: lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/LMSimilarity.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/LMSimilarity.java?rev=1156429&r1=1156428&r2=1156429&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/LMSimilarity.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/LMSimilarity.java Thu Aug 11 01:23:08 2011
@@ -71,6 +71,30 @@ public abstract class LMSimilarity exten
expl.addDetail(new Explanation(collectionModel.computeProbability(stats),
"collection probability"));
}
+
+ /**
+ * Returns the name of the LM method. The values of the parameters should be
+ * included as well.
+ * <p>Used in {@link #toString()}</p>.
+ */
+ public abstract String getName();
+
+ /**
+ * Returns the name of the LM method. If a custom collection model strategy is
+ * used, its name is included as well.
+ * @see #getName()
+ * @see CollectionModel#getName()
+ * @see DefaultCollectionModel
+ */
+ @Override
+ public String toString() {
+ String coll = collectionModel.getName();
+ if (coll != null) {
+ return String.format("LM %s - %s", getName(), coll);
+ } else {
+ return String.format("LM %s", getName());
+ }
+ }
/** Stores the collection distribution of the current term. */
public static class LMStats extends EasyStats {
@@ -105,6 +129,9 @@ public abstract class LMSimilarity exten
* strategy for the current term.
*/
public float computeProbability(EasyStats stats);
+
+ /** The name of the collection model strategy. */
+ public String getName();
}
/**
@@ -116,5 +143,10 @@ public abstract class LMSimilarity exten
public float computeProbability(EasyStats stats) {
return (float)stats.getTotalTermFreq() / stats.getNumberOfFieldTokens();
}
+
+ @Override
+ public String getName() {
+ return null;
+ }
}
}
Modified: lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/Lambda.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/Lambda.java?rev=1156429&r1=1156428&r2=1156429&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/Lambda.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/Lambda.java Thu Aug 11 01:23:08 2011
@@ -30,4 +30,13 @@ public abstract class Lambda {
public abstract float lambda(EasyStats stats);
/** Explains the lambda parameter. */
public abstract Explanation explain(EasyStats stats);
+
+ /**
+ * Subclasses must override this method to return the code of the lambda
+ * formula. Since the original paper is not very clear on this matter, and
+ * also uses the DFR naming scheme incorrectly, the codes here were chosen
+ * arbitrarily.
+ */
+ @Override
+ public abstract String toString();
}
Modified: lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/LambdaDF.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/LambdaDF.java?rev=1156429&r1=1156428&r2=1156429&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/LambdaDF.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/LambdaDF.java Thu Aug 11 01:23:08 2011
@@ -26,18 +26,23 @@ import org.apache.lucene.search.Explanat
public class LambdaDF extends Lambda {
@Override
public final float lambda(EasyStats stats) {
- return (float)stats.getTotalTermFreq() / stats.getNumberOfDocuments();
+ return (float)stats.getDocFreq() / stats.getNumberOfDocuments();
}
-
+
@Override
public final Explanation explain(EasyStats stats) {
Explanation result = new Explanation();
result.setDescription(getClass().getSimpleName() + ", computed from: ");
result.setValue(lambda(stats));
result.addDetail(
- new Explanation(stats.getTotalTermFreq(), "totalTermFreq"));
+ new Explanation(stats.getDocFreq(), "docFreq"));
result.addDetail(
new Explanation(stats.getNumberOfDocuments(), "numberOfDocuments"));
return result;
}
+
+ @Override
+ public String toString() {
+ return "D";
+ }
}
Modified: lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/LambdaTTF.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/LambdaTTF.java?rev=1156429&r1=1156428&r2=1156429&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/LambdaTTF.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/LambdaTTF.java Thu Aug 11 01:23:08 2011
@@ -23,21 +23,26 @@ import org.apache.lucene.search.Explanat
* Computes lambda as {@code docFreq / numberOfDocuments}.
* @lucene.experimental
*/
-public class LambdaTTF extends Lambda {
+public class LambdaTTF extends Lambda {
@Override
public final float lambda(EasyStats stats) {
- return (float)stats.getDocFreq() / stats.getNumberOfDocuments();
+ return (float)stats.getTotalTermFreq() / stats.getNumberOfDocuments();
}
-
+
@Override
public final Explanation explain(EasyStats stats) {
Explanation result = new Explanation();
result.setDescription(getClass().getSimpleName() + ", computed from: ");
result.setValue(lambda(stats));
result.addDetail(
- new Explanation(stats.getDocFreq(), "docFreq"));
+ new Explanation(stats.getTotalTermFreq(), "totalTermFreq"));
result.addDetail(
new Explanation(stats.getNumberOfDocuments(), "numberOfDocuments"));
return result;
}
+
+ @Override
+ public String toString() {
+ return "L";
+ }
}
Modified: lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/Normalization.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/Normalization.java?rev=1156429&r1=1156428&r2=1156429&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/Normalization.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/Normalization.java Thu Aug 11 01:23:08 2011
@@ -59,5 +59,17 @@ public abstract class Normalization {
public final Explanation explain(EasyStats stats, float tf, int len) {
return new Explanation(1, "no normalization");
}
+
+ @Override
+ public String toString() {
+ return "";
+ }
}
+
+ /**
+ * Subclasses must override this method to return the code of the
+ * normalization formula. Refer to the original paper for the list.
+ */
+ @Override
+ public abstract String toString();
}
Modified: lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/NormalizationH1.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/NormalizationH1.java?rev=1156429&r1=1156428&r2=1156429&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/NormalizationH1.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/NormalizationH1.java Thu Aug 11 01:23:08 2011
@@ -25,4 +25,9 @@ public class NormalizationH1 extends Nor
public final float tfn(EasyStats stats, float tf, int len) {
return tf * stats.getAvgFieldLength() / len;
}
+
+ @Override
+ public String toString() {
+ return "1";
+ }
}
Modified: lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/NormalizationH2.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/NormalizationH2.java?rev=1156429&r1=1156428&r2=1156429&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/NormalizationH2.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/NormalizationH2.java Thu Aug 11 01:23:08 2011
@@ -28,4 +28,9 @@ public class NormalizationH2 extends Nor
public final float tfn(EasyStats stats, float tf, int len) {
return (float)(tf * log2(1 + stats.getAvgFieldLength() / len));
}
+
+ @Override
+ public String toString() {
+ return "2";
+ }
}
\ No newline at end of file