You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ab...@apache.org on 2017/12/07 12:45:32 UTC
[38/50] [abbrv] lucene-solr:jira/solr-11285-sim: LUCENE-8015: Fixed
DFR similarities' scores to not decrease when tfn increases.
LUCENE-8015: Fixed DFR similarities' scores to not decrease when tfn increases.
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/63b63c57
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/63b63c57
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/63b63c57
Branch: refs/heads/jira/solr-11285-sim
Commit: 63b63c573487fe6b054afb6073c057a88a15288f
Parents: 70b3666
Author: Adrien Grand <jp...@gmail.com>
Authored: Wed Dec 6 18:19:57 2017 +0100
Committer: Adrien Grand <jp...@gmail.com>
Committed: Wed Dec 6 18:19:57 2017 +0100
----------------------------------------------------------------------
.../lucene/search/similarities/AfterEffect.java | 27 +---
.../search/similarities/AfterEffectB.java | 6 +-
.../search/similarities/AfterEffectL.java | 6 +-
.../lucene/search/similarities/BasicModel.java | 10 +-
.../search/similarities/BasicModelBE.java | 55 -------
.../lucene/search/similarities/BasicModelD.java | 56 -------
.../lucene/search/similarities/BasicModelG.java | 12 +-
.../search/similarities/BasicModelIF.java | 11 +-
.../search/similarities/BasicModelIn.java | 15 +-
.../search/similarities/BasicModelIne.java | 11 +-
.../lucene/search/similarities/BasicModelP.java | 49 ------
.../search/similarities/DFRSimilarity.java | 22 +--
.../search/similarities/BasicModelTestCase.java | 5 +-
.../search/similarities/TestBasicModelBE.java | 30 ----
.../search/similarities/TestBasicModelD.java | 30 ----
.../search/similarities/TestBasicModelP.java | 30 ----
.../search/similarities/TestSimilarityBase.java | 62 +-------
.../similarities/BaseSimilarityTestCase.java | 153 ++++++++++++++-----
.../search/similarities/RandomSimilarity.java | 6 +-
.../similarities/DFRSimilarityFactory.java | 18 +--
.../solr/collection1/conf/schema-dfr.xml | 2 +-
.../similarities/TestDFRSimilarityFactory.java | 4 +-
22 files changed, 193 insertions(+), 427 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/63b63c57/lucene/core/src/java/org/apache/lucene/search/similarities/AfterEffect.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/AfterEffect.java b/lucene/core/src/java/org/apache/lucene/search/similarities/AfterEffect.java
index e62513e..cbcd789 100644
--- a/lucene/core/src/java/org/apache/lucene/search/similarities/AfterEffect.java
+++ b/lucene/core/src/java/org/apache/lucene/search/similarities/AfterEffect.java
@@ -37,33 +37,12 @@ public abstract class AfterEffect {
*/
public AfterEffect() {}
- /** Returns the aftereffect score. */
- public abstract double score(BasicStats stats, double tfn);
+ /** Returns the product of the after effect with {@code 1+tfn}.
+ * This may not depend on the value of {@code tfn}. */
+ public abstract double scoreTimes1pTfn(BasicStats stats);
/** Returns an explanation for the score. */
public abstract Explanation explain(BasicStats stats, double tfn);
-
- /** Implementation used when there is no aftereffect. */
- public static final class NoAfterEffect extends AfterEffect {
-
- /** Sole constructor: parameter-free */
- public NoAfterEffect() {}
-
- @Override
- public double score(BasicStats stats, double tfn) {
- return 1.0;
- }
-
- @Override
- public Explanation explain(BasicStats stats, double tfn) {
- return Explanation.match(1, "no aftereffect");
- }
-
- @Override
- public String toString() {
- return "";
- }
- }
/**
* Subclasses must override this method to return the code of the
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/63b63c57/lucene/core/src/java/org/apache/lucene/search/similarities/AfterEffectB.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/AfterEffectB.java b/lucene/core/src/java/org/apache/lucene/search/similarities/AfterEffectB.java
index b1bff96..6678cd9 100644
--- a/lucene/core/src/java/org/apache/lucene/search/similarities/AfterEffectB.java
+++ b/lucene/core/src/java/org/apache/lucene/search/similarities/AfterEffectB.java
@@ -29,16 +29,16 @@ public class AfterEffectB extends AfterEffect {
public AfterEffectB() {}
@Override
- public final double score(BasicStats stats, double tfn) {
+ public final double scoreTimes1pTfn(BasicStats stats) {
long F = stats.getTotalTermFreq()+1;
long n = stats.getDocFreq()+1;
- return (F + 1) / (n * (tfn + 1));
+ return (F + 1.0) / n;
}
@Override
public final Explanation explain(BasicStats stats, double tfn) {
return Explanation.match(
- (float) score(stats, tfn),
+ (float) (scoreTimes1pTfn(stats) / (1 + tfn)),
getClass().getSimpleName() + ", computed from: ",
Explanation.match((float) tfn, "tfn"),
Explanation.match(stats.getTotalTermFreq(), "totalTermFreq"),
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/63b63c57/lucene/core/src/java/org/apache/lucene/search/similarities/AfterEffectL.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/AfterEffectL.java b/lucene/core/src/java/org/apache/lucene/search/similarities/AfterEffectL.java
index a8ee53d..60a1b1d 100644
--- a/lucene/core/src/java/org/apache/lucene/search/similarities/AfterEffectL.java
+++ b/lucene/core/src/java/org/apache/lucene/search/similarities/AfterEffectL.java
@@ -29,14 +29,14 @@ public class AfterEffectL extends AfterEffect {
public AfterEffectL() {}
@Override
- public final double score(BasicStats stats, double tfn) {
- return 1 / (tfn + 1);
+ public final double scoreTimes1pTfn(BasicStats stats) {
+ return 1.0;
}
@Override
public final Explanation explain(BasicStats stats, double tfn) {
return Explanation.match(
- (float) score(stats, tfn),
+ (float) (scoreTimes1pTfn(stats) / (1 + tfn)),
getClass().getSimpleName() + ", computed from: ",
Explanation.match((float) tfn, "tfn"));
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/63b63c57/lucene/core/src/java/org/apache/lucene/search/similarities/BasicModel.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/BasicModel.java b/lucene/core/src/java/org/apache/lucene/search/similarities/BasicModel.java
index 20dee40..51d4571 100644
--- a/lucene/core/src/java/org/apache/lucene/search/similarities/BasicModel.java
+++ b/lucene/core/src/java/org/apache/lucene/search/similarities/BasicModel.java
@@ -36,8 +36,10 @@ public abstract class BasicModel {
*/
public BasicModel() {}
- /** Returns the informative content score. */
- public abstract double score(BasicStats stats, double tfn);
+ /** Returns the informative content score combined with the after effect, more specifically
+ * {@code informationContentScore * aeTimes1pTfn / (1 + tfn)}. This function must be
+ * non-decreasing with {@code tfn}. */
+ public abstract double score(BasicStats stats, double tfn, double aeTimes1pTfn);
/**
* Returns an explanation for the score.
@@ -46,9 +48,9 @@ public abstract class BasicModel {
* explanation for such models. Subclasses that use other statistics must
* override this method.</p>
*/
- public Explanation explain(BasicStats stats, double tfn) {
+ public Explanation explain(BasicStats stats, double tfn, double aeTimes1pTfn) {
return Explanation.match(
- (float) score(stats, tfn),
+ (float) (score(stats, tfn, aeTimes1pTfn) * (1 + tfn) / aeTimes1pTfn),
getClass().getSimpleName() + ", computed from: ",
Explanation.match(stats.getNumberOfDocuments(), "numberOfDocuments"),
Explanation.match(stats.getTotalTermFreq(), "totalTermFreq"));
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/63b63c57/lucene/core/src/java/org/apache/lucene/search/similarities/BasicModelBE.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/BasicModelBE.java b/lucene/core/src/java/org/apache/lucene/search/similarities/BasicModelBE.java
deleted file mode 100644
index 0ba5686..0000000
--- a/lucene/core/src/java/org/apache/lucene/search/similarities/BasicModelBE.java
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.search.similarities;
-
-
-import static org.apache.lucene.search.similarities.SimilarityBase.log2;
-
-/**
- * Limiting form of the Bose-Einstein model. The formula used in Lucene differs
- * slightly from the one in the original paper: {@code F} is increased by {@code tfn+1}
- * and {@code N} is increased by {@code F}
- * @lucene.experimental
- * NOTE: in some corner cases this model may give poor performance or infinite scores with
- * Normalizations that return large or small values for {@code tfn} such as NormalizationH3.
- * Consider using the geometric approximation ({@link BasicModelG}) instead, which provides
- * the same relevance but with less practical problems.
- */
-public class BasicModelBE extends BasicModel {
-
- /** Sole constructor: parameter-free */
- public BasicModelBE() {}
-
- @Override
- public final double score(BasicStats stats, double tfn) {
- double F = stats.getTotalTermFreq() + 1 + tfn;
- // approximation only holds true when F << N, so we use N += F
- double N = F + stats.getNumberOfDocuments();
- return (-log2((N - 1) * Math.E)
- + f(N + F - 1, N + F - tfn - 2) - f(F, F - tfn));
- }
-
- /** The <em>f</em> helper function defined for <em>B<sub>E</sub></em>. */
- private final double f(double n, double m) {
- return (m + 0.5) * log2(n / m) + (n - m) * log2(n);
- }
-
- @Override
- public String toString() {
- return "Be";
- }
-}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/63b63c57/lucene/core/src/java/org/apache/lucene/search/similarities/BasicModelD.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/BasicModelD.java b/lucene/core/src/java/org/apache/lucene/search/similarities/BasicModelD.java
deleted file mode 100644
index 70b004b..0000000
--- a/lucene/core/src/java/org/apache/lucene/search/similarities/BasicModelD.java
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.search.similarities;
-
-
-import static org.apache.lucene.search.similarities.SimilarityBase.log2;
-
-/**
- * Implements the approximation of the binomial model with the divergence
- * for DFR. The formula used in Lucene differs slightly from the one in the
- * original paper: to avoid underflow for small values of {@code N} and
- * {@code F}, {@code N} is increased by {@code 1} and
- * {@code F} is always increased by {@code tfn+1}.
- * <p>
- * WARNING: for terms that do not meet the expected random distribution
- * (e.g. stopwords), this model may give poor performance, such as
- * abnormally high or NaN scores for low tf values.
- * @lucene.experimental
- */
-public class BasicModelD extends BasicModel {
-
- /** Sole constructor: parameter-free */
- public BasicModelD() {}
-
- @Override
- public final double score(BasicStats stats, double tfn) {
- // we have to ensure phi is always < 1 for tiny TTF values, otherwise nphi can go negative,
- // resulting in NaN. cleanest way is to unconditionally always add tfn to totalTermFreq
- // to create a 'normalized' F.
- double F = stats.getTotalTermFreq() + 1 + tfn;
- double phi = tfn / F;
- double nphi = 1 - phi;
- double p = 1.0 / (stats.getNumberOfDocuments() + 1);
- double D = phi * log2(phi / p) + nphi * log2(nphi / (1 - p));
- return D * F + 0.5 * log2(1 + 2 * Math.PI * tfn * nphi);
- }
-
- @Override
- public String toString() {
- return "D";
- }
-}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/63b63c57/lucene/core/src/java/org/apache/lucene/search/similarities/BasicModelG.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/BasicModelG.java b/lucene/core/src/java/org/apache/lucene/search/similarities/BasicModelG.java
index 2f8cb43..ce87196 100644
--- a/lucene/core/src/java/org/apache/lucene/search/similarities/BasicModelG.java
+++ b/lucene/core/src/java/org/apache/lucene/search/similarities/BasicModelG.java
@@ -31,13 +31,21 @@ public class BasicModelG extends BasicModel {
public BasicModelG() {}
@Override
- public final double score(BasicStats stats, double tfn) {
+ public final double score(BasicStats stats, double tfn, double aeTimes1pTfn) {
// just like in BE, approximation only holds true when F << N, so we use lambda = F / (N + F)
double F = stats.getTotalTermFreq() + 1;
double N = stats.getNumberOfDocuments();
double lambda = F / (N + F);
// -log(1 / (lambda + 1)) -> log(lambda + 1)
- return log2(lambda + 1) + tfn * log2((1 + lambda) / lambda);
+ double A = log2(lambda + 1);
+ double B = log2((1 + lambda) / lambda);
+
+ // basic model G should return (A + B * tfn)
+ // which we rewrite to B * (1 + tfn) - (B - A)
+ // so that it can be combined with the after effect while still guaranteeing
+ // that the result is non-decreasing with tfn since B >= A
+
+ return (B - (B - A) / (1 + tfn)) * aeTimes1pTfn;
}
@Override
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/63b63c57/lucene/core/src/java/org/apache/lucene/search/similarities/BasicModelIF.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/BasicModelIF.java b/lucene/core/src/java/org/apache/lucene/search/similarities/BasicModelIF.java
index 5b7350b..16781cd 100644
--- a/lucene/core/src/java/org/apache/lucene/search/similarities/BasicModelIF.java
+++ b/lucene/core/src/java/org/apache/lucene/search/similarities/BasicModelIF.java
@@ -29,10 +29,17 @@ public class BasicModelIF extends BasicModel {
public BasicModelIF() {}
@Override
- public final double score(BasicStats stats, double tfn) {
+ public final double score(BasicStats stats, double tfn, double aeTimes1pTfn) {
long N = stats.getNumberOfDocuments();
long F = stats.getTotalTermFreq();
- return tfn * log2(1 + (N + 1) / (F + 0.5));
+ double A = log2(1 + (N + 1) / (F + 0.5));
+
+ // basic model IF should return A * tfn
+ // which we rewrite to A * (1 + tfn) - A
+ // so that it can be combined with the after effect while still guaranteeing
+ // that the result is non-decreasing with tfn
+
+ return A * aeTimes1pTfn * (1 - 1 / (1 + tfn));
}
@Override
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/63b63c57/lucene/core/src/java/org/apache/lucene/search/similarities/BasicModelIn.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/BasicModelIn.java b/lucene/core/src/java/org/apache/lucene/search/similarities/BasicModelIn.java
index a09eedb..5f1e181 100644
--- a/lucene/core/src/java/org/apache/lucene/search/similarities/BasicModelIn.java
+++ b/lucene/core/src/java/org/apache/lucene/search/similarities/BasicModelIn.java
@@ -30,16 +30,23 @@ public class BasicModelIn extends BasicModel {
public BasicModelIn() {}
@Override
- public final double score(BasicStats stats, double tfn) {
+ public final double score(BasicStats stats, double tfn, double aeTimes1pTfn) {
long N = stats.getNumberOfDocuments();
long n = stats.getDocFreq();
- return tfn * log2((N + 1) / (n + 0.5));
+ double A = log2((N + 1) / (n + 0.5));
+
+ // basic model I(n) should return A * tfn
+ // which we rewrite to A * (1 + tfn) - A
+ // so that it can be combined with the after effect while still guaranteeing
+ // that the result is non-decreasing with tfn
+
+ return A * aeTimes1pTfn * (1 - 1 / (1 + tfn));
}
@Override
- public final Explanation explain(BasicStats stats, double tfn) {
+ public final Explanation explain(BasicStats stats, double tfn, double aeTimes1pTfn) {
return Explanation.match(
- (float) score(stats, tfn),
+ (float) (score(stats, tfn, aeTimes1pTfn) * (1 + tfn) / aeTimes1pTfn),
getClass().getSimpleName() + ", computed from: ",
Explanation.match(stats.getNumberOfDocuments(), "numberOfDocuments"),
Explanation.match(stats.getDocFreq(), "docFreq"));
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/63b63c57/lucene/core/src/java/org/apache/lucene/search/similarities/BasicModelIne.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/BasicModelIne.java b/lucene/core/src/java/org/apache/lucene/search/similarities/BasicModelIne.java
index b4e830d..fb755fa 100644
--- a/lucene/core/src/java/org/apache/lucene/search/similarities/BasicModelIne.java
+++ b/lucene/core/src/java/org/apache/lucene/search/similarities/BasicModelIne.java
@@ -30,11 +30,18 @@ public class BasicModelIne extends BasicModel {
public BasicModelIne() {}
@Override
- public final double score(BasicStats stats, double tfn) {
+ public final double score(BasicStats stats, double tfn, double aeTimes1pTfn) {
long N = stats.getNumberOfDocuments();
long F = stats.getTotalTermFreq();
double ne = N * (1 - Math.pow((N - 1) / (double)N, F));
- return tfn * log2((N + 1) / (ne + 0.5));
+ double A = log2((N + 1) / (ne + 0.5));
+
+ // basic model I(ne) should return A * tfn
+ // which we rewrite to A * (1 + tfn) - A
+ // so that it can be combined with the after effect while still guaranteeing
+ // that the result is non-decreasing with tfn
+
+ return A * aeTimes1pTfn * (1 - 1 / (1 + tfn));
}
@Override
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/63b63c57/lucene/core/src/java/org/apache/lucene/search/similarities/BasicModelP.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/BasicModelP.java b/lucene/core/src/java/org/apache/lucene/search/similarities/BasicModelP.java
deleted file mode 100644
index f66e3d0..0000000
--- a/lucene/core/src/java/org/apache/lucene/search/similarities/BasicModelP.java
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.search.similarities;
-
-
-import static org.apache.lucene.search.similarities.SimilarityBase.log2;
-
-/**
- * Implements the Poisson approximation for the binomial model for DFR.
- * @lucene.experimental
- * <p>
- * WARNING: for terms that do not meet the expected random distribution
- * (e.g. stopwords), this model may give poor performance, such as
- * abnormally high scores for low tf values.
- */
-public class BasicModelP extends BasicModel {
- /** {@code log2(Math.E)}, precomputed. */
- protected static double LOG2_E = log2(Math.E);
-
- /** Sole constructor: parameter-free */
- public BasicModelP() {}
-
- @Override
- public final double score(BasicStats stats, double tfn) {
- double lambda = (stats.getTotalTermFreq()+1) / (double) (stats.getNumberOfDocuments()+1);
- return tfn * log2(tfn / lambda)
- + (lambda + 1 / (12 * tfn) - tfn) * LOG2_E
- + 0.5 * log2(2 * Math.PI * tfn);
- }
-
- @Override
- public String toString() {
- return "P";
- }
-}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/63b63c57/lucene/core/src/java/org/apache/lucene/search/similarities/DFRSimilarity.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/DFRSimilarity.java b/lucene/core/src/java/org/apache/lucene/search/similarities/DFRSimilarity.java
index aacd246..d793d94 100644
--- a/lucene/core/src/java/org/apache/lucene/search/similarities/DFRSimilarity.java
+++ b/lucene/core/src/java/org/apache/lucene/search/similarities/DFRSimilarity.java
@@ -20,7 +20,6 @@ package org.apache.lucene.search.similarities;
import java.util.List;
import org.apache.lucene.search.Explanation;
-import org.apache.lucene.search.similarities.AfterEffect.NoAfterEffect;
import org.apache.lucene.search.similarities.Normalization.NoNormalization;
/**
@@ -40,10 +39,7 @@ import org.apache.lucene.search.similarities.Normalization.NoNormalization;
* <ol>
* <li>{@link BasicModel}: Basic model of information content:
* <ul>
- * <li>{@link BasicModelBE}: Limiting form of Bose-Einstein
* <li>{@link BasicModelG}: Geometric approximation of Bose-Einstein
- * <li>{@link BasicModelP}: Poisson approximation of the Binomial
- * <li>{@link BasicModelD}: Divergence approximation of the Binomial
* <li>{@link BasicModelIn}: Inverse document frequency
* <li>{@link BasicModelIne}: Inverse expected document
* frequency [mixture of Poisson and IDF]
@@ -55,7 +51,6 @@ import org.apache.lucene.search.similarities.Normalization.NoNormalization;
* <ul>
* <li>{@link AfterEffectL}: Laplace's law of succession
* <li>{@link AfterEffectB}: Ratio of two Bernoulli processes
- * <li>{@link NoAfterEffect}: no first normalization
* </ul>
* <li>{@link Normalization}: Second (length) normalization:
* <ul>
@@ -72,6 +67,10 @@ import org.apache.lucene.search.similarities.Normalization.NoNormalization;
* </ol>
* <p>Note that <em>qtf</em>, the multiplicity of term-occurrence in the query,
* is not handled by this implementation.</p>
+ * <p> Note that basic models BE (Limiting form of Bose-Einstein), P (Poisson
+ * approximation of the Binomial) and D (Divergence approximation of the
+ * Binomial) are not implemented because their formula couldn't be written in
+ * a way that makes scores non-decreasing with the normalized term frequency.
* @see BasicModel
* @see AfterEffect
* @see Normalization
@@ -89,8 +88,8 @@ public class DFRSimilarity extends SimilarityBase {
* Creates DFRSimilarity from the three components.
* <p>
* Note that <code>null</code> values are not allowed:
- * if you want no normalization or after-effect, instead pass
- * {@link NoNormalization} or {@link NoAfterEffect} respectively.
+ * if you want no normalization, instead pass
+ * {@link NoNormalization}.
* @param basicModel Basic model of information content
* @param afterEffect First normalization of information gain
* @param normalization Second (length) normalization
@@ -109,8 +108,8 @@ public class DFRSimilarity extends SimilarityBase {
@Override
protected double score(BasicStats stats, double freq, double docLen) {
double tfn = normalization.tfn(stats, freq, docLen);
- return stats.getBoost() *
- basicModel.score(stats, tfn) * afterEffect.score(stats, tfn);
+ double aeTimes1pTfn = afterEffect.scoreTimes1pTfn(stats);
+ return stats.getBoost() * basicModel.score(stats, tfn, aeTimes1pTfn);
}
@Override
@@ -121,9 +120,10 @@ public class DFRSimilarity extends SimilarityBase {
}
Explanation normExpl = normalization.explain(stats, freq, docLen);
- float tfn = normExpl.getValue();
+ double tfn = normalization.tfn(stats, freq, docLen);
+ double aeTimes1pTfn = afterEffect.scoreTimes1pTfn(stats);
subs.add(normExpl);
- subs.add(basicModel.explain(stats, tfn));
+ subs.add(basicModel.explain(stats, tfn, aeTimes1pTfn));
subs.add(afterEffect.explain(stats, tfn));
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/63b63c57/lucene/core/src/test/org/apache/lucene/search/similarities/BasicModelTestCase.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/search/similarities/BasicModelTestCase.java b/lucene/core/src/test/org/apache/lucene/search/similarities/BasicModelTestCase.java
index 6623666..3cb83e2 100644
--- a/lucene/core/src/test/org/apache/lucene/search/similarities/BasicModelTestCase.java
+++ b/lucene/core/src/test/org/apache/lucene/search/similarities/BasicModelTestCase.java
@@ -23,11 +23,8 @@ public abstract class BasicModelTestCase extends BaseSimilarityTestCase {
@Override
protected final Similarity getSimilarity(Random random) {
final AfterEffect afterEffect;
- switch(random.nextInt(3)) {
+ switch(random.nextInt(2)) {
case 0:
- afterEffect = new AfterEffect.NoAfterEffect();
- break;
- case 1:
afterEffect = new AfterEffectL();
break;
default:
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/63b63c57/lucene/core/src/test/org/apache/lucene/search/similarities/TestBasicModelBE.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/search/similarities/TestBasicModelBE.java b/lucene/core/src/test/org/apache/lucene/search/similarities/TestBasicModelBE.java
deleted file mode 100644
index 2dc956f..0000000
--- a/lucene/core/src/test/org/apache/lucene/search/similarities/TestBasicModelBE.java
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.search.similarities;
-
-import org.apache.lucene.util.LuceneTestCase.AwaitsFix;
-
-// returns negative scores at least, but it warns it has problems
-@AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/LUCENE-8010")
-public class TestBasicModelBE extends BasicModelTestCase {
-
- @Override
- protected BasicModel getBasicModel() {
- return new BasicModelBE();
- }
-
-}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/63b63c57/lucene/core/src/test/org/apache/lucene/search/similarities/TestBasicModelD.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/search/similarities/TestBasicModelD.java b/lucene/core/src/test/org/apache/lucene/search/similarities/TestBasicModelD.java
deleted file mode 100644
index 7eee359..0000000
--- a/lucene/core/src/test/org/apache/lucene/search/similarities/TestBasicModelD.java
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.search.similarities;
-
-import org.apache.lucene.util.LuceneTestCase.AwaitsFix;
-
-// scores go backwards with respect to TF, but it warns it has problems
-@AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/LUCENE-8010")
-public class TestBasicModelD extends BasicModelTestCase {
-
- @Override
- protected BasicModel getBasicModel() {
- return new BasicModelD();
- }
-
-}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/63b63c57/lucene/core/src/test/org/apache/lucene/search/similarities/TestBasicModelP.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/search/similarities/TestBasicModelP.java b/lucene/core/src/test/org/apache/lucene/search/similarities/TestBasicModelP.java
deleted file mode 100644
index 2788ff8..0000000
--- a/lucene/core/src/test/org/apache/lucene/search/similarities/TestBasicModelP.java
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.search.similarities;
-
-import org.apache.lucene.util.LuceneTestCase.AwaitsFix;
-
-//scores go backwards with respect to TF, but it warns it has problems
-@AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/LUCENE-8010")
-public class TestBasicModelP extends BasicModelTestCase {
-
- @Override
- protected BasicModel getBasicModel() {
- return new BasicModelP();
- }
-
-}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/63b63c57/lucene/core/src/test/org/apache/lucene/search/similarities/TestSimilarityBase.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/search/similarities/TestSimilarityBase.java b/lucene/core/src/test/org/apache/lucene/search/similarities/TestSimilarityBase.java
index 8a6227c..be85801 100644
--- a/lucene/core/src/test/org/apache/lucene/search/similarities/TestSimilarityBase.java
+++ b/lucene/core/src/test/org/apache/lucene/search/similarities/TestSimilarityBase.java
@@ -76,13 +76,12 @@ public class TestSimilarityBase extends LuceneTestCase {
private static float FLOAT_EPSILON = 1e-5f;
/** The DFR basic models to test. */
static BasicModel[] BASIC_MODELS = {
- new BasicModelBE(), new BasicModelD(), new BasicModelG(),
- new BasicModelIF(), new BasicModelIn(), new BasicModelIne(),
- new BasicModelP()
+ new BasicModelG(), new BasicModelIF(), new BasicModelIn(),
+ new BasicModelIne()
};
/** The DFR aftereffects to test. */
static AfterEffect[] AFTER_EFFECTS = {
- new AfterEffectB(), new AfterEffectL(), new AfterEffect.NoAfterEffect()
+ new AfterEffectB(), new AfterEffectL()
};
/** The DFR normalizations to test. */
static Normalization[] NORMALIZATIONS = {
@@ -445,21 +444,6 @@ public class TestSimilarityBase extends LuceneTestCase {
new IBSimilarity(new DistributionSPL(), new LambdaTTF(), new Normalization.NoNormalization());
correctnessTestCore(sim, 2.2387237548828125f);
}
-
- /** Correctness test for the PL2 DFR model. */
- public void testPL2() throws IOException {
- SimilarityBase sim = new DFRSimilarity(
- new BasicModelP(), new AfterEffectL(), new NormalizationH2());
- float tfn = (float)(FREQ * SimilarityBase.log2(
- 1 + AVG_FIELD_LENGTH / DOC_LEN)); // 8.1894750101
- float l = 1.0f / (tfn + 1.0f); // 0.108820144666
- float lambda = (1.0f + TOTAL_TERM_FREQ) / (1f + NUMBER_OF_DOCUMENTS); // 0.7029703
- float p = (float)(tfn * SimilarityBase.log2(tfn / lambda) +
- (lambda + 1 / (12 * tfn) - tfn) * SimilarityBase.log2(Math.E) +
- 0.5 * SimilarityBase.log2(2 * Math.PI * tfn)); // 21.065619
- float gold = l * p; // 2.2923636
- correctnessTestCore(sim, gold);
- }
/** Correctness test for the IneB2 DFR model. */
public void testIneB2() throws IOException {
@@ -475,50 +459,14 @@ public class TestSimilarityBase extends LuceneTestCase {
correctnessTestCore(sim, 1.6390540599822998f);
}
- /** Correctness test for the BEB1 DFR model. */
- public void testBEB1() throws IOException {
- SimilarityBase sim = new DFRSimilarity(
- new BasicModelBE(), new AfterEffectB(), new NormalizationH1());
- float tfn = FREQ * AVG_FIELD_LENGTH / DOC_LEN; // 8.75
- float b = (TOTAL_TERM_FREQ + 1 + 1) / ((DOC_FREQ + 1) * (tfn + 1)); // 0.67132866
- double f = TOTAL_TERM_FREQ + 1 + tfn;
- double n = f + NUMBER_OF_DOCUMENTS;
- double n1 = n + f - 1; // 258.5
- double m1 = n + f - tfn - 2; // 248.75
- double n2 = f; // 79.75
- double m2 = f - tfn; // 71.0
- float be = (float)(-SimilarityBase.log2(n - 1) -
- SimilarityBase.log2(Math.E) + // -8.924494472554715
- ((m1 + 0.5f) * SimilarityBase.log2(n1 / m1) +
- (n1 - m1) * SimilarityBase.log2(n1)) - // 91.9620374903885
- ((m2 + 0.5f) * SimilarityBase.log2(n2 / m2) +
- (n2 - m2) * SimilarityBase.log2(n2))); // 67.26544321004599
- // 15.7720995
- float gold = b * be; // 10.588263
- correctnessTestCore(sim, gold);
- }
-
- /** Correctness test for the D DFR model (basic model only). */
- public void testD() throws IOException {
- SimilarityBase sim = new DFRSimilarity(new BasicModelD(), new AfterEffect.NoAfterEffect(), new Normalization.NoNormalization());
- double totalTermFreqNorm = TOTAL_TERM_FREQ + FREQ + 1;
- double p = 1.0 / (NUMBER_OF_DOCUMENTS + 1); // 0.009900990099009901
- double phi = FREQ / totalTermFreqNorm; // 0.08974358974358974
- double D = phi * SimilarityBase.log2(phi / p) + // 0.17498542370019005
- (1 - phi) * SimilarityBase.log2((1 - phi) / (1 - p));
- float gold = (float)(totalTermFreqNorm * D + 0.5 * SimilarityBase.log2(
- 1 + 2 * Math.PI * FREQ * (1 - phi))); // 16.328257
- correctnessTestCore(sim, gold);
- }
-
/** Correctness test for the In2 DFR model with no aftereffect. */
public void testIn2() throws IOException {
SimilarityBase sim = new DFRSimilarity(
- new BasicModelIn(), new AfterEffect.NoAfterEffect(), new NormalizationH2());
+ new BasicModelIn(), new AfterEffectL(), new NormalizationH2());
float tfn = (float)(FREQ * SimilarityBase.log2( // 8.1894750101
1 + AVG_FIELD_LENGTH / DOC_LEN));
float gold = (float)(tfn * SimilarityBase.log2( // 26.7459577898
- (NUMBER_OF_DOCUMENTS + 1) / (DOC_FREQ + 0.5)));
+ (NUMBER_OF_DOCUMENTS + 1) / (DOC_FREQ + 0.5)) / (1 + tfn));
correctnessTestCore(sim, gold);
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/63b63c57/lucene/test-framework/src/java/org/apache/lucene/search/similarities/BaseSimilarityTestCase.java
----------------------------------------------------------------------
diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/similarities/BaseSimilarityTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/search/similarities/BaseSimilarityTestCase.java
index a0f2ece..85a3d6c 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/search/similarities/BaseSimilarityTestCase.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/search/similarities/BaseSimilarityTestCase.java
@@ -193,20 +193,46 @@ public abstract class BaseSimilarityTestCase extends LuceneTestCase {
lowerBound = SmallFloat.byte4ToInt((byte) norm);
}
final long maxDoc;
- if (random.nextBoolean()) {
- // small collection
- maxDoc = TestUtil.nextLong(random, 1, 100000);
- } else {
- // yuge collection
- maxDoc = TestUtil.nextLong(random, 1, MAXDOC_FORTESTING);
+ switch (random.nextInt(6)) {
+ case 0:
+ // 1 doc collection
+ maxDoc = 1;
+ break;
+ case 1:
+ // 2 doc collection
+ maxDoc = 2;
+ break;
+ case 2:
+ // tiny collection
+ maxDoc = TestUtil.nextLong(random, 3, 16);
+ break;
+ case 3:
+ // small collection
+ maxDoc = TestUtil.nextLong(random, 16, 100000);
+ break;
+ case 4:
+ // big collection
+ maxDoc = TestUtil.nextLong(random, 100000, MAXDOC_FORTESTING);
+ break;
+ default:
+ // yuge collection
+ maxDoc = MAXDOC_FORTESTING;
+ break;
}
final long docCount;
- if (random.nextBoolean()) {
- // sparse field
- docCount = TestUtil.nextLong(random, 1, maxDoc);
- } else {
- // fully populated
- docCount = maxDoc;
+ switch (random.nextInt(3)) {
+ case 0:
+ // sparsest field
+ docCount = 1;
+ break;
+ case 1:
+ // sparse field
+ docCount = TestUtil.nextLong(random, 1, maxDoc);
+ break;
+ default:
+ // fully populated
+ docCount = maxDoc;
+ break;
}
// random docsize: but can't require docs to have > 2B tokens
long upperBound;
@@ -216,15 +242,22 @@ public abstract class BaseSimilarityTestCase extends LuceneTestCase {
upperBound = MAXTOKENS_FORTESTING;
}
final long sumDocFreq;
- if (random.nextBoolean()) {
- // shortest possible docs
- sumDocFreq = docCount;
- } else {
- // random docsize
- sumDocFreq = TestUtil.nextLong(random, docCount, upperBound + 1 - lowerBound);
+ switch (random.nextInt(3)) {
+ case 0:
+ // shortest possible docs
+ sumDocFreq = docCount;
+ break;
+ case 1:
+ // biggest possible docs
+ sumDocFreq = upperBound + 1 - lowerBound;
+ break;
+ default:
+ // random docsize
+ sumDocFreq = TestUtil.nextLong(random, docCount, upperBound + 1 - lowerBound);
+ break;
}
final long sumTotalTermFreq;
- switch (random.nextInt(3)) {
+ switch (random.nextInt(4)) {
case 0:
// term frequencies were omitted
sumTotalTermFreq = sumDocFreq;
@@ -233,6 +266,10 @@ public abstract class BaseSimilarityTestCase extends LuceneTestCase {
// no repetition of terms (except to satisfy this norm)
sumTotalTermFreq = sumDocFreq - 1 + lowerBound;
break;
+ case 2:
+ // maximum repetition of terms
+ sumTotalTermFreq = upperBound;
+ break;
default:
// random repetition
assert sumDocFreq - 1 + lowerBound <= upperBound;
@@ -249,29 +286,46 @@ public abstract class BaseSimilarityTestCase extends LuceneTestCase {
*/
static TermStatistics newTerm(Random random, CollectionStatistics corpus) {
final long docFreq;
- if (random.nextBoolean()) {
- // rare term
- docFreq = 1;
- } else {
- // random specificity
- docFreq = TestUtil.nextLong(random, 1, corpus.docCount());
+ switch (random.nextInt(3)) {
+ case 0:
+ // rare term
+ docFreq = 1;
+ break;
+ case 1:
+ // common term
+ docFreq = corpus.docCount();
+ break;
+ default:
+ // random specificity
+ docFreq = TestUtil.nextLong(random, 1, corpus.docCount());
+ break;
}
final long totalTermFreq;
+ // can't require docs to have > 2B tokens
+ long upperBound;
+ try {
+ upperBound = Math.min(corpus.sumTotalTermFreq(), Math.multiplyExact(docFreq, Integer.MAX_VALUE));
+ } catch (ArithmeticException overflow) {
+ upperBound = corpus.sumTotalTermFreq();
+ }
if (corpus.sumTotalTermFreq() == corpus.sumDocFreq()) {
// omitTF
totalTermFreq = docFreq;
- } else if (random.nextBoolean()) {
- // no repetition
- totalTermFreq = docFreq;
} else {
- // random repetition: but can't require docs to have > 2B tokens
- long upperBound;
- try {
- upperBound = Math.min(corpus.sumTotalTermFreq(), Math.multiplyExact(docFreq, Integer.MAX_VALUE));
- } catch (ArithmeticException overflow) {
- upperBound = corpus.sumTotalTermFreq();
+ switch (random.nextInt(3)) {
+ case 0:
+ // no repetition
+ totalTermFreq = docFreq;
+ break;
+ case 1:
+ // maximum repetition
+ totalTermFreq = upperBound;
+ break;
+ default:
+ // random repetition
+ totalTermFreq = TestUtil.nextLong(random, docFreq, upperBound);
+ break;
}
- totalTermFreq = TestUtil.nextLong(random, docFreq, upperBound);
}
return new TermStatistics(TERM, docFreq, totalTermFreq);
}
@@ -315,9 +369,34 @@ public abstract class BaseSimilarityTestCase extends LuceneTestCase {
// there is at least one other document, and those must have at least 1 instance each.
int upperBound = Math.toIntExact(Math.min(term.totalTermFreq() - term.docFreq() + 1, Integer.MAX_VALUE));
if (random.nextBoolean()) {
- freq = TestUtil.nextInt(random, 1, upperBound);
+ // integer freq
+ switch (random.nextInt(3)) {
+ case 0:
+ // smallest freq
+ freq = 1;
+ break;
+ case 1:
+ // largest freq
+ freq = upperBound;
+ break;
+ default:
+ // random freq
+ freq = TestUtil.nextInt(random, 1, upperBound);
+ break;
+ }
} else {
- float freqCandidate = upperBound * random.nextFloat();
+ // float freq
+ float freqCandidate;
+ switch (random.nextInt(2)) {
+ case 0:
+ // smallest freq
+ freqCandidate = Float.MIN_VALUE;
+ break;
+ default:
+ // random freq
+ freqCandidate = upperBound * random.nextFloat();
+ break;
+ }
// we need to be 2nd float value at a minimum, the pairwise test will check MIN_VALUE in this case.
// this avoids testing frequencies of 0 which seem wrong to allow (we should enforce computeSlopFactor etc)
if (freqCandidate <= Float.MIN_VALUE) {
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/63b63c57/lucene/test-framework/src/java/org/apache/lucene/search/similarities/RandomSimilarity.java
----------------------------------------------------------------------
diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/similarities/RandomSimilarity.java b/lucene/test-framework/src/java/org/apache/lucene/search/similarities/RandomSimilarity.java
index f880935..444e8ef 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/search/similarities/RandomSimilarity.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/search/similarities/RandomSimilarity.java
@@ -58,13 +58,11 @@ public class RandomSimilarity extends PerFieldSimilarityWrapper {
// all the similarities that we rotate through
/** The DFR basic models to test. */
static BasicModel[] BASIC_MODELS = {
- /* TODO: enable new BasicModelBE(), */ /* TODO: enable new BasicModelD(), */ new BasicModelG(),
- new BasicModelIF(), new BasicModelIn(), new BasicModelIne(),
- /* TODO: enable new BasicModelP() */
+ new BasicModelG(), new BasicModelIF(), new BasicModelIn(), new BasicModelIne(),
};
/** The DFR aftereffects to test. */
static AfterEffect[] AFTER_EFFECTS = {
- new AfterEffectB(), new AfterEffectL(), new AfterEffect.NoAfterEffect()
+ new AfterEffectB(), new AfterEffectL()
};
/** The DFR normalizations to test. */
static Normalization[] NORMALIZATIONS = {
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/63b63c57/solr/core/src/java/org/apache/solr/search/similarities/DFRSimilarityFactory.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/search/similarities/DFRSimilarityFactory.java b/solr/core/src/java/org/apache/solr/search/similarities/DFRSimilarityFactory.java
index 18fde0d..572b32d 100644
--- a/solr/core/src/java/org/apache/solr/search/similarities/DFRSimilarityFactory.java
+++ b/solr/core/src/java/org/apache/solr/search/similarities/DFRSimilarityFactory.java
@@ -17,17 +17,13 @@
package org.apache.solr.search.similarities;
import org.apache.lucene.search.similarities.AfterEffect;
-import org.apache.lucene.search.similarities.AfterEffect.NoAfterEffect; // javadoc
import org.apache.lucene.search.similarities.AfterEffectB;
import org.apache.lucene.search.similarities.AfterEffectL;
import org.apache.lucene.search.similarities.BasicModel;
-import org.apache.lucene.search.similarities.BasicModelBE;
-import org.apache.lucene.search.similarities.BasicModelD;
import org.apache.lucene.search.similarities.BasicModelG;
import org.apache.lucene.search.similarities.BasicModelIF;
import org.apache.lucene.search.similarities.BasicModelIn;
import org.apache.lucene.search.similarities.BasicModelIne;
-import org.apache.lucene.search.similarities.BasicModelP;
import org.apache.lucene.search.similarities.DFRSimilarity;
import org.apache.lucene.search.similarities.Normalization;
import org.apache.lucene.search.similarities.Normalization.NoNormalization; // javadoc
@@ -48,10 +44,7 @@ import org.apache.solr.schema.SimilarityFactory;
* <ol>
* <li>{@link BasicModel basicModel}: Basic model of information content:
* <ul>
- * <li>{@link BasicModelBE Be}: Limiting form of Bose-Einstein
* <li>{@link BasicModelG G}: Geometric approximation of Bose-Einstein
- * <li>{@link BasicModelP P}: Poisson approximation of the Binomial
- * <li>{@link BasicModelD D}: Divergence approximation of the Binomial
* <li>{@link BasicModelIn I(n)}: Inverse document frequency
* <li>{@link BasicModelIne I(ne)}: Inverse expected document
* frequency [mixture of Poisson and IDF]
@@ -63,7 +56,6 @@ import org.apache.solr.schema.SimilarityFactory;
* <ul>
* <li>{@link AfterEffectL L}: Laplace's law of succession
* <li>{@link AfterEffectB B}: Ratio of two Bernoulli processes
- * <li>{@link NoAfterEffect none}: no first normalization
* </ul>
* <li>{@link Normalization normalization}: Second (length) normalization:
* <ul>
@@ -122,11 +114,7 @@ public class DFRSimilarityFactory extends SimilarityFactory {
}
private BasicModel parseBasicModel(String expr) {
- if ("Be".equals(expr)) {
- return new BasicModelBE();
- } else if ("D".equals(expr)) {
- return new BasicModelD();
- } else if ("G".equals(expr)) {
+ if ("G".equals(expr)) {
return new BasicModelG();
} else if ("I(F)".equals(expr)) {
return new BasicModelIF();
@@ -134,8 +122,6 @@ public class DFRSimilarityFactory extends SimilarityFactory {
return new BasicModelIn();
} else if ("I(ne)".equals(expr)) {
return new BasicModelIne();
- } else if ("P".equals(expr)) {
- return new BasicModelP();
} else {
throw new RuntimeException("Invalid basicModel: " + expr);
}
@@ -146,8 +132,6 @@ public class DFRSimilarityFactory extends SimilarityFactory {
return new AfterEffectB();
} else if ("L".equals(expr)) {
return new AfterEffectL();
- } else if ("none".equals(expr)) {
- return new AfterEffect.NoAfterEffect();
} else {
throw new RuntimeException("Invalid afterEffect: " + expr);
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/63b63c57/solr/core/src/test-files/solr/collection1/conf/schema-dfr.xml
----------------------------------------------------------------------
diff --git a/solr/core/src/test-files/solr/collection1/conf/schema-dfr.xml b/solr/core/src/test-files/solr/collection1/conf/schema-dfr.xml
index 3083510..78c3b7f 100644
--- a/solr/core/src/test-files/solr/collection1/conf/schema-dfr.xml
+++ b/solr/core/src/test-files/solr/collection1/conf/schema-dfr.xml
@@ -46,7 +46,7 @@
<fieldType name="text_paramc" class="solr.TextField">
<analyzer class="org.apache.lucene.analysis.standard.StandardAnalyzer"/>
<similarity class="solr.DFRSimilarityFactory">
- <str name="basicModel">P</str>
+ <str name="basicModel">G</str>
<str name="afterEffect">L</str>
<str name="normalization">H2</str>
<float name="c">7</float>
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/63b63c57/solr/core/src/test/org/apache/solr/search/similarities/TestDFRSimilarityFactory.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/search/similarities/TestDFRSimilarityFactory.java b/solr/core/src/test/org/apache/solr/search/similarities/TestDFRSimilarityFactory.java
index 2159f1a..f3b05b3 100644
--- a/solr/core/src/test/org/apache/solr/search/similarities/TestDFRSimilarityFactory.java
+++ b/solr/core/src/test/org/apache/solr/search/similarities/TestDFRSimilarityFactory.java
@@ -18,8 +18,8 @@ package org.apache.solr.search.similarities;
import org.apache.lucene.search.similarities.AfterEffectB;
import org.apache.lucene.search.similarities.AfterEffectL;
+import org.apache.lucene.search.similarities.BasicModelG;
import org.apache.lucene.search.similarities.BasicModelIF;
-import org.apache.lucene.search.similarities.BasicModelP;
import org.apache.lucene.search.similarities.DFRSimilarity;
import org.apache.lucene.search.similarities.NormalizationH2;
import org.apache.lucene.search.similarities.NormalizationH3;
@@ -62,7 +62,7 @@ public class TestDFRSimilarityFactory extends BaseSimilarityTestCase {
Similarity sim = getSimilarity("text_paramc");
assertEquals(DFRSimilarity.class, sim.getClass());
DFRSimilarity dfr = (DFRSimilarity) sim;
- assertEquals(BasicModelP.class, dfr.getBasicModel().getClass());
+ assertEquals(BasicModelG.class, dfr.getBasicModel().getClass());
assertEquals(AfterEffectL.class, dfr.getAfterEffect().getClass());
assertEquals(NormalizationH2.class, dfr.getNormalization().getClass());
NormalizationH2 norm = (NormalizationH2) dfr.getNormalization();