You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2017/12/29 09:06:54 UTC

[3/4] lucene-solr:master: LUCENE-8010: Fix similarities so that they pass tests.

LUCENE-8010: Fix similarities so that they pass tests.


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/b2f24816
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/b2f24816
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/b2f24816

Branch: refs/heads/master
Commit: b2f248164c1a3ddf213a56778d55c9252a022f18
Parents: 8e439a0
Author: Adrien Grand <jp...@gmail.com>
Authored: Thu Dec 28 17:44:01 2017 +0100
Committer: Adrien Grand <jp...@gmail.com>
Committed: Fri Dec 29 10:06:00 2017 +0100

----------------------------------------------------------------------
 .../lucene/search/similarities/Axiomatic.java   | 28 +++++++++++++++++++-
 .../search/similarities/AxiomaticF1EXP.java     |  2 +-
 .../search/similarities/AxiomaticF1LOG.java     |  2 +-
 .../search/similarities/AxiomaticF3EXP.java     |  2 +-
 .../search/similarities/AxiomaticF3LOG.java     |  2 +-
 .../search/similarities/DistributionSPL.java    | 26 +++++++++++++++---
 .../lucene/search/similarities/LambdaDF.java    |  7 ++++-
 .../lucene/search/similarities/LambdaTTF.java   |  7 ++++-
 .../search/similarities/TestAxiomaticF1EXP.java |  4 ---
 .../search/similarities/TestAxiomaticF1LOG.java |  4 ---
 .../search/similarities/TestAxiomaticF3EXP.java |  7 +----
 .../search/similarities/TestAxiomaticF3LOG.java |  7 +----
 .../similarities/TestDistributionSPL.java       |  4 ---
 .../search/similarities/TestSimilarity2.java    |  7 +++++
 .../search/similarities/RandomSimilarity.java   | 11 +++++---
 15 files changed, 81 insertions(+), 39 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b2f24816/lucene/core/src/java/org/apache/lucene/search/similarities/Axiomatic.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/Axiomatic.java b/lucene/core/src/java/org/apache/lucene/search/similarities/Axiomatic.java
index 403773e..553fd42 100644
--- a/lucene/core/src/java/org/apache/lucene/search/similarities/Axiomatic.java
+++ b/lucene/core/src/java/org/apache/lucene/search/similarities/Axiomatic.java
@@ -17,6 +17,7 @@
 package org.apache.lucene.search.similarities;
 
 
+import java.util.ArrayList;
 import java.util.List;
 
 import org.apache.lucene.search.Explanation;
@@ -101,11 +102,13 @@ public abstract class Axiomatic extends SimilarityBase {
 
   @Override
   public double score(BasicStats stats, double freq, double docLen) {
-    return tf(stats, freq, docLen)
+    double score = tf(stats, freq, docLen)
         * ln(stats, freq, docLen)
         * tfln(stats, freq, docLen)
         * idf(stats, freq, docLen)
         - gamma(stats, freq, docLen);
+    // AxiomaticF3 similarities might produce negative scores due to their gamma component
+    return Math.max(0, score);
   }
 
   @Override
@@ -115,6 +118,29 @@ public abstract class Axiomatic extends SimilarityBase {
   }
 
   @Override
+  protected Explanation explain(
+      BasicStats stats, int doc, Explanation freq, double docLen) {    
+    List<Explanation> subs = new ArrayList<>();
+    explain(subs, stats, doc, freq.getValue(), docLen);
+    
+    double score = tf(stats, freq.getValue(), docLen)
+        * ln(stats, freq.getValue(), docLen)
+        * tfln(stats, freq.getValue(), docLen)
+        * idf(stats, freq.getValue(), docLen)
+        - gamma(stats, freq.getValue(), docLen);
+
+    Explanation explanation = Explanation.match((float) score,
+        "score(" + getClass().getSimpleName() + ", doc=" + doc + ", freq=" + freq.getValue() +"), computed from:",
+        subs);
+    if (score < 0) {
+      explanation = Explanation.match(0, "max of:",
+          Explanation.match(0, "Minimum legal score"),
+          explanation);
+    }
+    return explanation;
+  }
+
+  @Override
   protected void explain(List<Explanation> subs, BasicStats stats, int doc,
                          double freq, double docLen) {
     if (stats.getBoost() != 1.0d) {

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b2f24816/lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF1EXP.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF1EXP.java b/lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF1EXP.java
index c026feb..ca5c42b 100644
--- a/lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF1EXP.java
+++ b/lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF1EXP.java
@@ -60,7 +60,7 @@ public class AxiomaticF1EXP extends Axiomatic {
    */
   @Override
   protected double tf(BasicStats stats, double freq, double docLen) {
-    if (freq <= 0.0) return 0.0;
+    freq += 1; // otherwise gives negative scores for freqs < 1
     return 1 + Math.log(1 + Math.log(freq));
   }
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b2f24816/lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF1LOG.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF1LOG.java b/lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF1LOG.java
index 2e19255..6ef3587 100644
--- a/lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF1LOG.java
+++ b/lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF1LOG.java
@@ -52,7 +52,7 @@ public class AxiomaticF1LOG extends Axiomatic {
    */
   @Override
   protected double tf(BasicStats stats, double freq, double docLen) {
-    if (freq <= 0.0) return 0.0;
+    freq += 1; // otherwise gives negative scores for freqs < 1
     return 1 + Math.log(1 + Math.log(freq));
   }
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b2f24816/lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF3EXP.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF3EXP.java b/lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF3EXP.java
index 635dc68..a54c754 100644
--- a/lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF3EXP.java
+++ b/lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF3EXP.java
@@ -58,7 +58,7 @@ public class AxiomaticF3EXP extends Axiomatic {
    */
   @Override
   protected double tf(BasicStats stats, double freq, double docLen) {
-    if (freq <= 0.0) return 0.0;
+    freq += 1; // otherwise gives negative scores for freqs < 1
     return 1 + Math.log(1 + Math.log(freq));
   }
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b2f24816/lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF3LOG.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF3LOG.java b/lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF3LOG.java
index 4753e4e..194b70a 100644
--- a/lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF3LOG.java
+++ b/lucene/core/src/java/org/apache/lucene/search/similarities/AxiomaticF3LOG.java
@@ -47,7 +47,7 @@ public class AxiomaticF3LOG extends Axiomatic {
    */
   @Override
   protected double tf(BasicStats stats, double freq, double docLen) {
-    if (freq <= 0.0) return 0.0;
+    freq += 1; // otherwise gives negative scores for freqs < 1
     return 1 + Math.log(1 + Math.log(freq));
   }
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b2f24816/lucene/core/src/java/org/apache/lucene/search/similarities/DistributionSPL.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/DistributionSPL.java b/lucene/core/src/java/org/apache/lucene/search/similarities/DistributionSPL.java
index fc05d72..2ab44df 100644
--- a/lucene/core/src/java/org/apache/lucene/search/similarities/DistributionSPL.java
+++ b/lucene/core/src/java/org/apache/lucene/search/similarities/DistributionSPL.java
@@ -34,11 +34,29 @@ public class DistributionSPL extends Distribution {
 
   @Override
   public final double score(BasicStats stats, double tfn, double lambda) {
-    if (lambda == 1d) {
-      lambda = 0.99d;
+    assert lambda != 1;
+
+    // tfn/(tfn+1) -> 1 - 1/(tfn+1), guaranteed to be non decreasing when tfn increases
+    double q = 1 - 1 / (tfn + 1);
+    if (q == 1) {
+      q = Math.nextDown(1.0);
+    }
+
+    double pow = Math.pow(lambda, q);
+    if (pow == lambda) {
+      // this can happen because of floating-point rounding
+      // but then we return infinity when taking the log, so we enforce
+      // that pow is different from lambda
+      if (lambda < 1) {
+        // x^y > x when x < 1 and y < 1
+        pow = Math.nextUp(lambda);
+      } else {
+        // x^y < x when x > 1 and y < 1
+        pow = Math.nextDown(lambda);
+      }
     }
-    return -Math.log(
-        (Math.pow(lambda, (tfn / (tfn + 1))) - lambda) / (1 - lambda));
+
+    return -Math.log((pow - lambda) / (1 - lambda));
   }
   
   @Override

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b2f24816/lucene/core/src/java/org/apache/lucene/search/similarities/LambdaDF.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/LambdaDF.java b/lucene/core/src/java/org/apache/lucene/search/similarities/LambdaDF.java
index 7dc320d..6b7dbb2 100644
--- a/lucene/core/src/java/org/apache/lucene/search/similarities/LambdaDF.java
+++ b/lucene/core/src/java/org/apache/lucene/search/similarities/LambdaDF.java
@@ -30,7 +30,12 @@ public class LambdaDF extends Lambda {
 
   @Override
   public final float lambda(BasicStats stats) {
-    return (stats.getDocFreq()+1F) / (stats.getNumberOfDocuments()+1F);
+    float lambda = (float) ((stats.getDocFreq() + 1.0) / (stats.getNumberOfDocuments() + 1.0));
+    if (lambda == 1) {
+      // Distribution SPL cannot work with values of lambda that are equal to 1
+      lambda = Math.nextDown(lambda);
+    }
+    return lambda;
   }
   
   @Override

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b2f24816/lucene/core/src/java/org/apache/lucene/search/similarities/LambdaTTF.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/LambdaTTF.java b/lucene/core/src/java/org/apache/lucene/search/similarities/LambdaTTF.java
index 6dc54a3..72eae4c 100644
--- a/lucene/core/src/java/org/apache/lucene/search/similarities/LambdaTTF.java
+++ b/lucene/core/src/java/org/apache/lucene/search/similarities/LambdaTTF.java
@@ -30,7 +30,12 @@ public class LambdaTTF extends Lambda {
 
   @Override
   public final float lambda(BasicStats stats) {
-    return (stats.getTotalTermFreq()+1F) / (stats.getNumberOfDocuments()+1F);
+    float lambda = (float) ((stats.getTotalTermFreq() + 1.0) / (stats.getNumberOfDocuments() + 1.0));
+    if (lambda == 1) {
+      // Distribution SPL cannot work with values of lambda that are equal to 1
+      lambda = Math.nextUp(lambda);
+    }
+    return lambda;
   }
 
   public final Explanation explain(BasicStats stats) {

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b2f24816/lucene/core/src/test/org/apache/lucene/search/similarities/TestAxiomaticF1EXP.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/search/similarities/TestAxiomaticF1EXP.java b/lucene/core/src/test/org/apache/lucene/search/similarities/TestAxiomaticF1EXP.java
index 16da903..9e2edf1 100644
--- a/lucene/core/src/test/org/apache/lucene/search/similarities/TestAxiomaticF1EXP.java
+++ b/lucene/core/src/test/org/apache/lucene/search/similarities/TestAxiomaticF1EXP.java
@@ -16,10 +16,6 @@
  */
 package org.apache.lucene.search.similarities;
 
-import org.apache.lucene.util.LuceneTestCase.AwaitsFix;
-
-// returns NaN scores for sloppy freqs < 1 (due to log without floor)
-@AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/LUCENE-8010")
 public class TestAxiomaticF1EXP extends AxiomaticTestCase {
 
   @Override

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b2f24816/lucene/core/src/test/org/apache/lucene/search/similarities/TestAxiomaticF1LOG.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/search/similarities/TestAxiomaticF1LOG.java b/lucene/core/src/test/org/apache/lucene/search/similarities/TestAxiomaticF1LOG.java
index 88ad18e..0d6ba48 100644
--- a/lucene/core/src/test/org/apache/lucene/search/similarities/TestAxiomaticF1LOG.java
+++ b/lucene/core/src/test/org/apache/lucene/search/similarities/TestAxiomaticF1LOG.java
@@ -16,10 +16,6 @@
  */
 package org.apache.lucene.search.similarities;
 
-import org.apache.lucene.util.LuceneTestCase.AwaitsFix;
-
-// returns NaN scores for sloppy freqs < 1 (due to log without floor)
-@AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/LUCENE-8010")
 public class TestAxiomaticF1LOG extends AxiomaticTestCase {
 
   @Override

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b2f24816/lucene/core/src/test/org/apache/lucene/search/similarities/TestAxiomaticF3EXP.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/search/similarities/TestAxiomaticF3EXP.java b/lucene/core/src/test/org/apache/lucene/search/similarities/TestAxiomaticF3EXP.java
index 69ab719..63ad87a 100644
--- a/lucene/core/src/test/org/apache/lucene/search/similarities/TestAxiomaticF3EXP.java
+++ b/lucene/core/src/test/org/apache/lucene/search/similarities/TestAxiomaticF3EXP.java
@@ -16,16 +16,11 @@
  */
 package org.apache.lucene.search.similarities;
 
-import org.apache.lucene.util.LuceneTestCase.AwaitsFix;
-
-// returns negative scores at least, but it (now) warns it has problems
-@AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/LUCENE-8010")
 public class TestAxiomaticF3EXP extends AxiomaticTestCase {
 
   @Override
   protected final Similarity getAxiomaticModel(float s, int queryLen, float k) {
-    // TODO: use the randomized parameters and not these hardcoded ones
-    return new AxiomaticF3EXP(0.25f, 1);
+    return new AxiomaticF3EXP(s, queryLen);
   }
 
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b2f24816/lucene/core/src/test/org/apache/lucene/search/similarities/TestAxiomaticF3LOG.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/search/similarities/TestAxiomaticF3LOG.java b/lucene/core/src/test/org/apache/lucene/search/similarities/TestAxiomaticF3LOG.java
index 6863277..cb92984 100644
--- a/lucene/core/src/test/org/apache/lucene/search/similarities/TestAxiomaticF3LOG.java
+++ b/lucene/core/src/test/org/apache/lucene/search/similarities/TestAxiomaticF3LOG.java
@@ -16,16 +16,11 @@
  */
 package org.apache.lucene.search.similarities;
 
-import org.apache.lucene.util.LuceneTestCase.AwaitsFix;
-
-// returns negative scores at least, but it (now) warns it has problems
-@AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/LUCENE-8010")
 public class TestAxiomaticF3LOG extends AxiomaticTestCase {
 
   @Override
   protected final Similarity getAxiomaticModel(float s, int queryLen, float k) {
-    // TODO: use the randomized parameters and not these hardcoded ones
-    return new AxiomaticF3LOG(0.25f, 1);
+    return new AxiomaticF3LOG(s, queryLen);
   }
 
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b2f24816/lucene/core/src/test/org/apache/lucene/search/similarities/TestDistributionSPL.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/search/similarities/TestDistributionSPL.java b/lucene/core/src/test/org/apache/lucene/search/similarities/TestDistributionSPL.java
index 984915a..20a2b32 100644
--- a/lucene/core/src/test/org/apache/lucene/search/similarities/TestDistributionSPL.java
+++ b/lucene/core/src/test/org/apache/lucene/search/similarities/TestDistributionSPL.java
@@ -16,10 +16,6 @@
  */
 package org.apache.lucene.search.similarities;
 
-import org.apache.lucene.util.LuceneTestCase.AwaitsFix;
-
-// scores go infinite, but it warns it has problems
-@AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/LUCENE-8010")
 public class TestDistributionSPL extends DistributionTestCase {
 
   @Override

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b2f24816/lucene/core/src/test/org/apache/lucene/search/similarities/TestSimilarity2.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/search/similarities/TestSimilarity2.java b/lucene/core/src/test/org/apache/lucene/search/similarities/TestSimilarity2.java
index 6fd38bd..49dc154 100644
--- a/lucene/core/src/test/org/apache/lucene/search/similarities/TestSimilarity2.java
+++ b/lucene/core/src/test/org/apache/lucene/search/similarities/TestSimilarity2.java
@@ -54,6 +54,13 @@ public class TestSimilarity2 extends LuceneTestCase {
     sims = new ArrayList<>();
     sims.add(new ClassicSimilarity());
     sims.add(new BM25Similarity());
+    sims.add(new BooleanSimilarity());
+    sims.add(new AxiomaticF1EXP());
+    sims.add(new AxiomaticF1LOG());
+    sims.add(new AxiomaticF2EXP());
+    sims.add(new AxiomaticF2LOG());
+    sims.add(new AxiomaticF3EXP(0.25f, 3));
+    sims.add(new AxiomaticF3LOG(0.25f, 3));
     // TODO: not great that we dup this all with TestSimilarityBase
     for (BasicModel basicModel : TestSimilarityBase.BASIC_MODELS) {
       for (AfterEffect afterEffect : TestSimilarityBase.AFTER_EFFECTS) {

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b2f24816/lucene/test-framework/src/java/org/apache/lucene/search/similarities/RandomSimilarity.java
----------------------------------------------------------------------
diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/similarities/RandomSimilarity.java b/lucene/test-framework/src/java/org/apache/lucene/search/similarities/RandomSimilarity.java
index 444e8ef..0925aee 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/search/similarities/RandomSimilarity.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/search/similarities/RandomSimilarity.java
@@ -89,8 +89,12 @@ public class RandomSimilarity extends PerFieldSimilarityWrapper {
     allSims = new ArrayList<>();
     allSims.add(new ClassicSimilarity());
     allSims.add(new BM25Similarity());
-    // We cannot do this, because this similarity behaves in "non-traditional" ways:
-    // allSims.add(new BooleanSimilarity());
+    allSims.add(new AxiomaticF1EXP());
+    allSims.add(new AxiomaticF1LOG());
+    allSims.add(new AxiomaticF2EXP());
+    allSims.add(new AxiomaticF2LOG());
+
+    allSims.add(new BooleanSimilarity());
     for (BasicModel basicModel : BASIC_MODELS) {
       for (AfterEffect afterEffect : AFTER_EFFECTS) {
         for (Normalization normalization : NORMALIZATIONS) {
@@ -105,8 +109,7 @@ public class RandomSimilarity extends PerFieldSimilarityWrapper {
         }
       }
     }
-    /* TODO: enable Dirichlet 
-    allSims.add(new LMDirichletSimilarity()); */
+    allSims.add(new LMDirichletSimilarity());
     allSims.add(new LMJelinekMercerSimilarity(0.1f));
     allSims.add(new LMJelinekMercerSimilarity(0.7f));
     for (Independence independence : INDEPENDENCE_MEASURES) {