You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2011/08/23 15:25:11 UTC
svn commit: r1160678 [1/2] - in /lucene/dev/branches/flexscoring/lucene/src:
java/org/apache/lucene/search/similarities/
test/org/apache/lucene/search/similarities/
Author: rmuir
Date: Tue Aug 23 13:25:10 2011
New Revision: 1160678
URL: http://svn.apache.org/viewvc?rev=1160678&view=rev
Log:
LUCENE-3393: EasySimilarity -> SimilarityBase
Added:
lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/BasicStats.java (with props)
lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/SimilarityBase.java (with props)
lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/TestSimilarityBase.java (with props)
Removed:
lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/EasySimilarity.java
lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/EasyStats.java
lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/TestEasySimilarity.java
Modified:
lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/AfterEffect.java
lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/AfterEffectB.java
lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/AfterEffectL.java
lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/BasicModel.java
lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/BasicModelBE.java
lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/BasicModelD.java
lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/BasicModelG.java
lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/BasicModelIF.java
lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/BasicModelIn.java
lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/BasicModelIne.java
lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/BasicModelP.java
lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/DFRSimilarity.java
lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/Distribution.java
lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/DistributionLL.java
lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/DistributionSPL.java
lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/IBSimilarity.java
lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/LMDirichletSimilarity.java
lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/LMJelinekMercerSimilarity.java
lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/LMSimilarity.java
lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/Lambda.java
lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/LambdaDF.java
lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/LambdaTTF.java
lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/Normalization.java
lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/NormalizationH1.java
lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/NormalizationH2.java
lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/TFIDFSimilarity.java
lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/package.html
lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/SpoofIndexSearcher.java
Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/AfterEffect.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/AfterEffect.java?rev=1160678&r1=1160677&r2=1160678&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/AfterEffect.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/AfterEffect.java Tue Aug 23 13:25:10 2011
@@ -31,20 +31,20 @@ import org.apache.lucene.search.Explanat
*/
public abstract class AfterEffect {
/** Returns the aftereffect score. */
- public abstract float score(EasyStats stats, float tfn);
+ public abstract float score(BasicStats stats, float tfn);
/** Returns an explanation for the score. */
- public abstract Explanation explain(EasyStats stats, float tfn);
+ public abstract Explanation explain(BasicStats stats, float tfn);
/** Implementation used when there is no aftereffect. */
public static final class NoAfterEffect extends AfterEffect {
@Override
- public final float score(EasyStats stats, float tfn) {
+ public final float score(BasicStats stats, float tfn) {
return 1f;
}
@Override
- public final Explanation explain(EasyStats stats, float tfn) {
+ public final Explanation explain(BasicStats stats, float tfn) {
return new Explanation(1, "no aftereffect");
}
Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/AfterEffectB.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/AfterEffectB.java?rev=1160678&r1=1160677&r2=1160678&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/AfterEffectB.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/AfterEffectB.java Tue Aug 23 13:25:10 2011
@@ -25,14 +25,14 @@ import org.apache.lucene.search.Explanat
*/
public class AfterEffectB extends AfterEffect {
@Override
- public final float score(EasyStats stats, float tfn) {
+ public final float score(BasicStats stats, float tfn) {
long F = stats.getTotalTermFreq();
int n = stats.getDocFreq();
return (F + 1) / (n * (tfn + 1));
}
@Override
- public final Explanation explain(EasyStats stats, float tfn) {
+ public final Explanation explain(BasicStats stats, float tfn) {
Explanation result = new Explanation();
result.setDescription(getClass().getSimpleName() + ", computed from: ");
result.setValue(score(stats, tfn));
Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/AfterEffectL.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/AfterEffectL.java?rev=1160678&r1=1160677&r2=1160678&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/AfterEffectL.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/AfterEffectL.java Tue Aug 23 13:25:10 2011
@@ -25,12 +25,12 @@ import org.apache.lucene.search.Explanat
*/
public class AfterEffectL extends AfterEffect {
@Override
- public final float score(EasyStats stats, float tfn) {
+ public final float score(BasicStats stats, float tfn) {
return 1 / (tfn + 1);
}
@Override
- public final Explanation explain(EasyStats stats, float tfn) {
+ public final Explanation explain(BasicStats stats, float tfn) {
Explanation result = new Explanation();
result.setDescription(getClass().getSimpleName() + ", computed from: ");
result.setValue(score(stats, tfn));
Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/BasicModel.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/BasicModel.java?rev=1160678&r1=1160677&r2=1160678&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/BasicModel.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/BasicModel.java Tue Aug 23 13:25:10 2011
@@ -30,7 +30,7 @@ import org.apache.lucene.search.Explanat
*/
public abstract class BasicModel {
/** Returns the informative content score. */
- public abstract float score(EasyStats stats, float tfn);
+ public abstract float score(BasicStats stats, float tfn);
/**
* Returns an explanation for the score.
@@ -39,7 +39,7 @@ public abstract class BasicModel {
* explanation for such models. Subclasses that use other statistics must
* override this method.</p>
*/
- public Explanation explain(EasyStats stats, float tfn) {
+ public Explanation explain(BasicStats stats, float tfn) {
Explanation result = new Explanation();
result.setDescription(getClass().getSimpleName() + ", computed from: ");
result.setValue(score(stats, tfn));
Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/BasicModelBE.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/BasicModelBE.java?rev=1160678&r1=1160677&r2=1160678&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/BasicModelBE.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/BasicModelBE.java Tue Aug 23 13:25:10 2011
@@ -17,7 +17,7 @@ package org.apache.lucene.search.similar
* limitations under the License.
*/
-import static org.apache.lucene.search.similarities.EasySimilarity.log2;
+import static org.apache.lucene.search.similarities.SimilarityBase.log2;
/**
* Limiting form of the Bose-Einstein model. The formula used in Lucene differs
@@ -28,7 +28,7 @@ import static org.apache.lucene.search.s
*/
public class BasicModelBE extends BasicModel {
@Override
- public final float score(EasyStats stats, float tfn) {
+ public final float score(BasicStats stats, float tfn) {
long N = stats.getNumberOfDocuments() + 1;
// long F = stats.getTotalTermFreq() + 1;
long F = Math.max(stats.getTotalTermFreq(), (long)(tfn + 0.5) + 1);
Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/BasicModelD.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/BasicModelD.java?rev=1160678&r1=1160677&r2=1160678&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/BasicModelD.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/BasicModelD.java Tue Aug 23 13:25:10 2011
@@ -17,7 +17,7 @@ package org.apache.lucene.search.similar
* limitations under the License.
*/
-import static org.apache.lucene.search.similarities.EasySimilarity.log2;
+import static org.apache.lucene.search.similarities.SimilarityBase.log2;
/**
* Implements the approximation of the binomial model with the divergence
@@ -29,7 +29,7 @@ import static org.apache.lucene.search.s
*/
public class BasicModelD extends BasicModel {
@Override
- public final float score(EasyStats stats, float tfn) {
+ public final float score(BasicStats stats, float tfn) {
long F = Math.max(stats.getTotalTermFreq(), (long)(tfn + 0.5) + 1);
// long F = stats.getTotalTermFreq() + 1;
double phi = (double)tfn / F;
Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/BasicModelG.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/BasicModelG.java?rev=1160678&r1=1160677&r2=1160678&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/BasicModelG.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/BasicModelG.java Tue Aug 23 13:25:10 2011
@@ -17,7 +17,7 @@ package org.apache.lucene.search.similar
* limitations under the License.
*/
-import static org.apache.lucene.search.similarities.EasySimilarity.log2;
+import static org.apache.lucene.search.similarities.SimilarityBase.log2;
/**
* Geometric as limiting form of the Bose-Einstein model.
@@ -25,7 +25,7 @@ import static org.apache.lucene.search.s
*/
public class BasicModelG extends BasicModel {
@Override
- public final float score(EasyStats stats, float tfn) {
+ public final float score(BasicStats stats, float tfn) {
double lambda = stats.getTotalTermFreq() / (double) stats.getNumberOfDocuments();
// -log(1 / (lambda + 1)) -> log(lambda + 1)
return (float)(log2(lambda + 1) + tfn * log2((1 + lambda) / lambda));
Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/BasicModelIF.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/BasicModelIF.java?rev=1160678&r1=1160677&r2=1160678&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/BasicModelIF.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/BasicModelIF.java Tue Aug 23 13:25:10 2011
@@ -17,7 +17,7 @@ package org.apache.lucene.search.similar
* limitations under the License.
*/
-import static org.apache.lucene.search.similarities.EasySimilarity.log2;
+import static org.apache.lucene.search.similarities.SimilarityBase.log2;
/**
* An approximation of the <em>I(n<sub>e</sub>)</em> model.
@@ -25,7 +25,7 @@ import static org.apache.lucene.search.s
*/
public class BasicModelIF extends BasicModel {
@Override
- public final float score(EasyStats stats, float tfn) {
+ public final float score(BasicStats stats, float tfn) {
int N = stats.getNumberOfDocuments();
long F = stats.getTotalTermFreq();
return tfn * (float)(log2(1 + (N + 1) / (F + 0.5)));
Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/BasicModelIn.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/BasicModelIn.java?rev=1160678&r1=1160677&r2=1160678&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/BasicModelIn.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/BasicModelIn.java Tue Aug 23 13:25:10 2011
@@ -18,7 +18,7 @@ package org.apache.lucene.search.similar
*/
import org.apache.lucene.search.Explanation;
-import static org.apache.lucene.search.similarities.EasySimilarity.log2;
+import static org.apache.lucene.search.similarities.SimilarityBase.log2;
/**
* The basic tf-idf model of randomness.
@@ -26,14 +26,14 @@ import static org.apache.lucene.search.s
*/
public class BasicModelIn extends BasicModel {
@Override
- public final float score(EasyStats stats, float tfn) {
+ public final float score(BasicStats stats, float tfn) {
int N = stats.getNumberOfDocuments();
int n = stats.getDocFreq();
return tfn * (float)(log2((N + 1) / (n + 0.5)));
}
@Override
- public final Explanation explain(EasyStats stats, float tfn) {
+ public final Explanation explain(BasicStats stats, float tfn) {
Explanation result = new Explanation();
result.setDescription(getClass().getSimpleName() + ", computed from: ");
result.setValue(score(stats, tfn));
Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/BasicModelIne.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/BasicModelIne.java?rev=1160678&r1=1160677&r2=1160678&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/BasicModelIne.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/BasicModelIne.java Tue Aug 23 13:25:10 2011
@@ -17,7 +17,7 @@ package org.apache.lucene.search.similar
* limitations under the License.
*/
-import static org.apache.lucene.search.similarities.EasySimilarity.log2;
+import static org.apache.lucene.search.similarities.SimilarityBase.log2;
/**
* Tf-idf model of randomness, based on a mixture of Poisson and inverse
@@ -26,7 +26,7 @@ import static org.apache.lucene.search.s
*/
public class BasicModelIne extends BasicModel {
@Override
- public final float score(EasyStats stats, float tfn) {
+ public final float score(BasicStats stats, float tfn) {
int N = stats.getNumberOfDocuments();
long F = stats.getTotalTermFreq();
double ne = N * (1 - Math.pow((N - 1) / (double)N, F));
Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/BasicModelP.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/BasicModelP.java?rev=1160678&r1=1160677&r2=1160678&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/BasicModelP.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/BasicModelP.java Tue Aug 23 13:25:10 2011
@@ -17,7 +17,7 @@ package org.apache.lucene.search.similar
* limitations under the License.
*/
-import static org.apache.lucene.search.similarities.EasySimilarity.log2;
+import static org.apache.lucene.search.similarities.SimilarityBase.log2;
/**
* Implements the Poisson approximation for the binomial model for DFR.
@@ -28,7 +28,7 @@ public class BasicModelP extends BasicMo
protected static double LOG2_E = log2(Math.E);
@Override
- public final float score(EasyStats stats, float tfn) {
+ public final float score(BasicStats stats, float tfn) {
float lambda = (float)stats.getTotalTermFreq() / stats.getNumberOfDocuments();
// System.out.printf("tfn=%f, lambda=%f, log1=%f, log2=%f%n", tfn, lambda,
// tfn / lambda, 2 * Math.PI * tfn);
Added: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/BasicStats.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/BasicStats.java?rev=1160678&view=auto
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/BasicStats.java (added)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/BasicStats.java Tue Aug 23 13:25:10 2011
@@ -0,0 +1,144 @@
+package org.apache.lucene.search.similarities;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.index.Terms;
+
+/**
+ * Stores all statistics commonly used ranking methods.
+ * @lucene.experimental
+ */
+public class BasicStats extends Similarity.Stats {
+ /** The number of documents. */
+ protected int numberOfDocuments;
+ /** The total number of tokens in the field. */
+ protected long numberOfFieldTokens;
+ /** The average field length. */
+ protected float avgFieldLength;
+ /** The document frequency. */
+ protected int docFreq;
+ /** The total number of occurrences of this term across all documents. */
+ protected long totalTermFreq;
+
+ // -------------------------- Boost-related stuff --------------------------
+
+ /** Query's inner boost. */
+ protected final float queryBoost;
+ /** Any outer query's boost. */
+ protected float topLevelBoost;
+ /** For most Similarities, the immediate and the top level query boosts are
+ * not handled differently. Hence, this field is just the product of the
+ * other two. */
+ protected float totalBoost;
+
+ /** Constructor. Sets the query boost. */
+ public BasicStats(float queryBoost) {
+ this.queryBoost = queryBoost;
+ this.totalBoost = queryBoost;
+ }
+
+ // ------------------------- Getter/setter methods -------------------------
+
+ /** Returns the number of documents. */
+ public int getNumberOfDocuments() {
+ return numberOfDocuments;
+ }
+
+ /** Sets the number of documents. */
+ public void setNumberOfDocuments(int numberOfDocuments) {
+ this.numberOfDocuments = numberOfDocuments;
+ }
+
+ /**
+ * Returns the total number of tokens in the field.
+ * @see Terms#getSumTotalTermFreq()
+ */
+ public long getNumberOfFieldTokens() {
+ return numberOfFieldTokens;
+ }
+
+ /**
+ * Sets the total number of tokens in the field.
+ * @see Terms#getSumTotalTermFreq()
+ */
+ public void setNumberOfFieldTokens(long numberOfFieldTokens) {
+ this.numberOfFieldTokens = numberOfFieldTokens;
+ }
+
+ /** Returns the average field length. */
+ public float getAvgFieldLength() {
+ return avgFieldLength;
+ }
+
+ /** Sets the average field length. */
+ public void setAvgFieldLength(float avgFieldLength) {
+ this.avgFieldLength = avgFieldLength;
+ }
+
+ /** Returns the document frequency. */
+ public int getDocFreq() {
+ return docFreq;
+ }
+
+ /** Sets the document frequency. */
+ public void setDocFreq(int docFreq) {
+ this.docFreq = docFreq;
+ }
+
+ /** Returns the total number of occurrences of this term across all documents. */
+ public long getTotalTermFreq() {
+ return totalTermFreq;
+ }
+
+ /** Sets the total number of occurrences of this term across all documents. */
+ public void setTotalTermFreq(long totalTermFreq) {
+ this.totalTermFreq = totalTermFreq;
+ }
+
+ // -------------------------- Boost-related stuff --------------------------
+
+ /** The square of the raw normalization value.
+ * @see #rawNormalizationValue() */
+ @Override
+ public float getValueForNormalization() {
+ float rawValue = rawNormalizationValue();
+ return rawValue * rawValue;
+ }
+
+ /** Computes the raw normalization value. This basic implementation returns
+ * the query boost. Subclasses may override this method to include other
+ * factors (such as idf), or to save the value for inclusion in
+ * {@link #normalize(float, float)}, etc.
+ */
+ protected float rawNormalizationValue() {
+ return queryBoost;
+ }
+
+ /** No normalization is done. {@code topLevelBoost} is saved in the object,
+ * however. */
+ @Override
+ public void normalize(float queryNorm, float topLevelBoost) {
+ this.topLevelBoost = topLevelBoost;
+ totalBoost = queryBoost * topLevelBoost;
+ }
+
+ /** Returns the total boost. */
+ public float getTotalBoost() {
+ return totalBoost;
+ }
+}
Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/DFRSimilarity.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/DFRSimilarity.java?rev=1160678&r1=1160677&r2=1160678&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/DFRSimilarity.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/DFRSimilarity.java Tue Aug 23 13:25:10 2011
@@ -39,7 +39,7 @@ import org.apache.lucene.search.Explanat
* @see Normalization
* @lucene.experimental
*/
-public class DFRSimilarity extends EasySimilarity {
+public class DFRSimilarity extends SimilarityBase {
/** The basic model for information content. */
protected final BasicModel basicModel;
/** The first normalization of the information content. */
@@ -78,7 +78,7 @@ public class DFRSimilarity extends EasyS
}
@Override
- protected float score(EasyStats stats, float freq, float docLen) {
+ protected float score(BasicStats stats, float freq, float docLen) {
float tfn = normalization.tfn(stats, freq, docLen);
return stats.getTotalBoost() *
basicModel.score(stats, tfn) * afterEffect.score(stats, tfn);
@@ -86,7 +86,7 @@ public class DFRSimilarity extends EasyS
@Override
protected void explain(Explanation expl,
- EasyStats stats, int doc, float freq, float docLen) {
+ BasicStats stats, int doc, float freq, float docLen) {
if (stats.getTotalBoost() != 1.0f) {
expl.addDetail(new Explanation(stats.getTotalBoost(), "boost"));
}
Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/Distribution.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/Distribution.java?rev=1160678&r1=1160677&r2=1160678&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/Distribution.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/Distribution.java Tue Aug 23 13:25:10 2011
@@ -27,11 +27,11 @@ import org.apache.lucene.search.Explanat
*/
public abstract class Distribution {
/** Computes the score. */
- public abstract float score(EasyStats stats, float tfn, float lambda);
+ public abstract float score(BasicStats stats, float tfn, float lambda);
/** Explains the score. Returns the name of the model only, since
* both {@code tfn} and {@code lambda} are explained elsewhere. */
- public Explanation explain(EasyStats stats, float tfn, float lambda) {
+ public Explanation explain(BasicStats stats, float tfn, float lambda) {
return new Explanation(
score(stats, tfn, lambda), getClass().getSimpleName());
}
Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/DistributionLL.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/DistributionLL.java?rev=1160678&r1=1160677&r2=1160678&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/DistributionLL.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/DistributionLL.java Tue Aug 23 13:25:10 2011
@@ -26,7 +26,7 @@ package org.apache.lucene.search.similar
*/
public class DistributionLL extends Distribution {
@Override
- public final float score(EasyStats stats, float tfn, float lambda) {
+ public final float score(BasicStats stats, float tfn, float lambda) {
return (float)-Math.log(lambda / (tfn + lambda));
}
Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/DistributionSPL.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/DistributionSPL.java?rev=1160678&r1=1160677&r2=1160678&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/DistributionSPL.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/DistributionSPL.java Tue Aug 23 13:25:10 2011
@@ -27,7 +27,7 @@ package org.apache.lucene.search.similar
*/
public class DistributionSPL extends Distribution {
@Override
- public final float score(EasyStats stats, float tfn, float lambda) {
+ public final float score(BasicStats stats, float tfn, float lambda) {
if (lambda == 1f) {
lambda = 0.99f;
}
Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/IBSimilarity.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/IBSimilarity.java?rev=1160678&r1=1160677&r2=1160678&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/IBSimilarity.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/IBSimilarity.java Tue Aug 23 13:25:10 2011
@@ -41,7 +41,7 @@ import org.apache.lucene.search.Explanat
* Similarities will be merged at one point.</p>
* @lucene.experimental
*/
-public class IBSimilarity extends EasySimilarity {
+public class IBSimilarity extends SimilarityBase {
/** The probabilistic distribution used to model term occurrence. */
protected final Distribution distribution;
/** The <em>lambda (λ<sub>w</sub>)</em> parameter. */
@@ -63,7 +63,7 @@ public class IBSimilarity extends EasySi
}
@Override
- protected float score(EasyStats stats, float freq, float docLen) {
+ protected float score(BasicStats stats, float freq, float docLen) {
return stats.getTotalBoost() *
distribution.score(
stats,
@@ -73,7 +73,7 @@ public class IBSimilarity extends EasySi
@Override
protected void explain(
- Explanation expl, EasyStats stats, int doc, float freq, float docLen) {
+ Explanation expl, BasicStats stats, int doc, float freq, float docLen) {
if (stats.getTotalBoost() != 1.0f) {
expl.addDetail(new Explanation(stats.getTotalBoost(), "boost"));
}
Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/LMDirichletSimilarity.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/LMDirichletSimilarity.java?rev=1160678&r1=1160677&r2=1160678&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/LMDirichletSimilarity.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/LMDirichletSimilarity.java Tue Aug 23 13:25:10 2011
@@ -60,7 +60,7 @@ public class LMDirichletSimilarity exten
}
@Override
- protected float score(EasyStats stats, float freq, float docLen) {
+ protected float score(BasicStats stats, float freq, float docLen) {
float score = stats.getTotalBoost() * (float)(Math.log(1 + freq /
(mu * ((LMStats)stats).getCollectionProbability())) +
Math.log(mu / (docLen + mu)));
@@ -68,7 +68,7 @@ public class LMDirichletSimilarity exten
}
@Override
- protected void explain(Explanation expl, EasyStats stats, int doc,
+ protected void explain(Explanation expl, BasicStats stats, int doc,
float freq, float docLen) {
if (stats.getTotalBoost() != 1.0f) {
expl.addDetail(new Explanation(stats.getTotalBoost(), "boost"));
Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/LMJelinekMercerSimilarity.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/LMJelinekMercerSimilarity.java?rev=1160678&r1=1160677&r2=1160678&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/LMJelinekMercerSimilarity.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/LMJelinekMercerSimilarity.java Tue Aug 23 13:25:10 2011
@@ -48,7 +48,7 @@ public class LMJelinekMercerSimilarity e
}
@Override
- protected float score(EasyStats stats, float freq, float docLen) {
+ protected float score(BasicStats stats, float freq, float docLen) {
return stats.getTotalBoost() *
(float)Math.log(1 +
((1 - lambda) * freq / docLen) /
@@ -56,7 +56,7 @@ public class LMJelinekMercerSimilarity e
}
@Override
- protected void explain(Explanation expl, EasyStats stats, int doc,
+ protected void explain(Explanation expl, BasicStats stats, int doc,
float freq, float docLen) {
if (stats.getTotalBoost() != 1.0f) {
expl.addDetail(new Explanation(stats.getTotalBoost(), "boost"));
Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/LMSimilarity.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/LMSimilarity.java?rev=1160678&r1=1160677&r2=1160678&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/LMSimilarity.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/LMSimilarity.java Tue Aug 23 13:25:10 2011
@@ -38,7 +38,7 @@ import org.apache.lucene.util.TermContex
*
* @lucene.experimental
*/
-public abstract class LMSimilarity extends EasySimilarity {
+public abstract class LMSimilarity extends SimilarityBase {
/** The collection model. */
protected final CollectionModel collectionModel;
@@ -57,16 +57,16 @@ public abstract class LMSimilarity exten
* usual statistics.
*/
@Override
- public EasyStats computeStats(IndexSearcher searcher, String fieldName,
+ public BasicStats computeStats(IndexSearcher searcher, String fieldName,
float queryBoost, TermContext... termContexts) throws IOException {
LMStats stats = new LMStats(queryBoost);
- fillEasyStats(stats, searcher, fieldName, termContexts);
+ fillBasicStats(stats, searcher, fieldName, termContexts);
stats.setCollectionProbability(collectionModel.computeProbability(stats));
return stats;
}
@Override
- protected void explain(Explanation expl, EasyStats stats, int doc,
+ protected void explain(Explanation expl, BasicStats stats, int doc,
float freq, float docLen) {
expl.addDetail(new Explanation(collectionModel.computeProbability(stats),
"collection probability"));
@@ -97,7 +97,7 @@ public abstract class LMSimilarity exten
}
/** Stores the collection distribution of the current term. */
- public static class LMStats extends EasyStats {
+ public static class LMStats extends BasicStats {
/** The probability that the current term is generated by the collection. */
private float collectionProbability;
@@ -128,7 +128,7 @@ public abstract class LMSimilarity exten
* Computes the probability {@code p(w|C)} according to the language model
* strategy for the current term.
*/
- public float computeProbability(EasyStats stats);
+ public float computeProbability(BasicStats stats);
/** The name of the collection model strategy. */
public String getName();
@@ -140,7 +140,7 @@ public abstract class LMSimilarity exten
*/
public static class DefaultCollectionModel implements CollectionModel {
@Override
- public float computeProbability(EasyStats stats) {
+ public float computeProbability(BasicStats stats) {
return (float)stats.getTotalTermFreq() / (stats.getNumberOfFieldTokens() +1);
}
Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/Lambda.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/Lambda.java?rev=1160678&r1=1160677&r2=1160678&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/Lambda.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/Lambda.java Tue Aug 23 13:25:10 2011
@@ -27,9 +27,9 @@ import org.apache.lucene.search.Explanat
*/
public abstract class Lambda {
/** Computes the lambda parameter. */
- public abstract float lambda(EasyStats stats);
+ public abstract float lambda(BasicStats stats);
/** Explains the lambda parameter. */
- public abstract Explanation explain(EasyStats stats);
+ public abstract Explanation explain(BasicStats stats);
/**
* Subclasses must override this method to return the code of the lambda
Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/LambdaDF.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/LambdaDF.java?rev=1160678&r1=1160677&r2=1160678&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/LambdaDF.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/LambdaDF.java Tue Aug 23 13:25:10 2011
@@ -25,12 +25,12 @@ import org.apache.lucene.search.Explanat
*/
public class LambdaDF extends Lambda {
@Override
- public final float lambda(EasyStats stats) {
+ public final float lambda(BasicStats stats) {
return (float)stats.getDocFreq() / stats.getNumberOfDocuments();
}
@Override
- public final Explanation explain(EasyStats stats) {
+ public final Explanation explain(BasicStats stats) {
Explanation result = new Explanation();
result.setDescription(getClass().getSimpleName() + ", computed from: ");
result.setValue(lambda(stats));
Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/LambdaTTF.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/LambdaTTF.java?rev=1160678&r1=1160677&r2=1160678&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/LambdaTTF.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/LambdaTTF.java Tue Aug 23 13:25:10 2011
@@ -25,12 +25,12 @@ import org.apache.lucene.search.Explanat
*/
public class LambdaTTF extends Lambda {
@Override
- public final float lambda(EasyStats stats) {
+ public final float lambda(BasicStats stats) {
return (float)stats.getTotalTermFreq() / stats.getNumberOfDocuments();
}
@Override
- public final Explanation explain(EasyStats stats) {
+ public final Explanation explain(BasicStats stats) {
Explanation result = new Explanation();
result.setDescription(getClass().getSimpleName() + ", computed from: ");
result.setValue(lambda(stats));
Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/Normalization.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/Normalization.java?rev=1160678&r1=1160677&r2=1160678&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/Normalization.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/Normalization.java Tue Aug 23 13:25:10 2011
@@ -29,7 +29,7 @@ import org.apache.lucene.search.Explanat
public abstract class Normalization {
/** Returns the normalized term frequency.
* @param len the field length. */
- public abstract float tfn(EasyStats stats, float tf, float len);
+ public abstract float tfn(BasicStats stats, float tf, float len);
/** Returns an explanation for the normalized term frequency.
* <p>The default normalization methods use the field length of the document
@@ -37,7 +37,7 @@ public abstract class Normalization {
* This method provides a generic explanation for such methods.
* Subclasses that use other statistics must override this method.</p>
*/
- public Explanation explain(EasyStats stats, float tf, float len) {
+ public Explanation explain(BasicStats stats, float tf, float len) {
Explanation result = new Explanation();
result.setDescription(getClass().getSimpleName() + ", computed from: ");
result.setValue(tfn(stats, tf, len));
@@ -51,12 +51,12 @@ public abstract class Normalization {
/** Implementation used when there is no normalization. */
public static final class NoNormalization extends Normalization {
@Override
- public final float tfn(EasyStats stats, float tf, float len) {
+ public final float tfn(BasicStats stats, float tf, float len) {
return tf;
}
@Override
- public final Explanation explain(EasyStats stats, float tf, float len) {
+ public final Explanation explain(BasicStats stats, float tf, float len) {
return new Explanation(1, "no normalization");
}
Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/NormalizationH1.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/NormalizationH1.java?rev=1160678&r1=1160677&r2=1160678&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/NormalizationH1.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/NormalizationH1.java Tue Aug 23 13:25:10 2011
@@ -22,7 +22,7 @@ package org.apache.lucene.search.similar
*/
public class NormalizationH1 extends Normalization {
@Override
- public final float tfn(EasyStats stats, float tf, float len) {
+ public final float tfn(BasicStats stats, float tf, float len) {
return tf * stats.getAvgFieldLength() / len;
}
Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/NormalizationH2.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/NormalizationH2.java?rev=1160678&r1=1160677&r2=1160678&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/NormalizationH2.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/NormalizationH2.java Tue Aug 23 13:25:10 2011
@@ -17,7 +17,7 @@ package org.apache.lucene.search.similar
* limitations under the License.
*/
-import static org.apache.lucene.search.similarities.EasySimilarity.log2;
+import static org.apache.lucene.search.similarities.SimilarityBase.log2;
/**
* Normalization model in which the term frequency is inversely related to the
@@ -25,7 +25,7 @@ import static org.apache.lucene.search.s
*/
public class NormalizationH2 extends Normalization {
@Override
- public final float tfn(EasyStats stats, float tf, float len) {
+ public final float tfn(BasicStats stats, float tf, float len) {
return (float)(tf * log2(1 + stats.getAvgFieldLength() / len));
}
Added: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/SimilarityBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/SimilarityBase.java?rev=1160678&view=auto
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/SimilarityBase.java (added)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/SimilarityBase.java Tue Aug 23 13:25:10 2011
@@ -0,0 +1,302 @@
+package org.apache.lucene.search.similarities;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.index.FieldInvertState;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexReader.AtomicReaderContext;
+import org.apache.lucene.index.MultiFields;
+import org.apache.lucene.search.Explanation;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.SmallFloat;
+import org.apache.lucene.util.TermContext;
+
+/**
+ * A subclass of {@code Similarity} that provides a simplified API for its
+ * descendants. Subclasses are only required to implement the {@link #score}
+ * and {@link #toString()} methods. Implementing
+ * {@link #explain(Explanation, BasicStats, int, float, float)} is optional,
+ * inasmuch as SimilarityBase already provides a basic explanation of the score
+ * and the term frequency. However, implementers of a subclass are encouraged to
+ * include as much detail about the scoring method as possible.
+ * @lucene.experimental
+ */
+public abstract class SimilarityBase extends Similarity {
+ /** For {@link #log2(double)}. Precomputed for efficiency reasons. */
+ private static final double LOG_2 = Math.log(2);
+
+ /** @see #setDiscountOverlaps */
+ protected boolean discountOverlaps = true;
+
+ /** Determines whether overlap tokens (Tokens with
+ * 0 position increment) are ignored when computing
+ * norm. By default this is true, meaning overlap
+ * tokens do not count when computing norms.
+ *
+ * @lucene.experimental
+ *
+ * @see #computeNorm
+ */
+ public void setDiscountOverlaps(boolean v) {
+ discountOverlaps = v;
+ }
+
+ /** @see #setDiscountOverlaps */
+ public boolean getDiscountOverlaps() {
+ return discountOverlaps;
+ }
+
+ /**
+ * Calls {@link #fillBasicStats(BasicStats, IndexSearcher, String, TermContext...)}.
+ * Subclasses that override this method may invoke {@code fillStats} with any
+ * subclass of {@code BasicStats}.
+ */
+ @Override
+ public BasicStats computeStats(IndexSearcher searcher, String fieldName,
+ float queryBoost, TermContext... termContexts) throws IOException {
+ BasicStats stats = new BasicStats(queryBoost);
+ fillBasicStats(stats, searcher, fieldName, termContexts);
+ return stats;
+ }
+
+ /** Fills all member fields defined in {@code BasicStats} in {@code stats}. */
+ protected final void fillBasicStats(BasicStats stats, IndexSearcher searcher,
+ String fieldName, TermContext... termContexts) throws IOException {
+ IndexReader reader = searcher.getIndexReader();
+ int numberOfDocuments = reader.maxDoc();
+ long numberOfFieldTokens = MultiFields.getTerms(searcher.getIndexReader(),
+ fieldName).getSumTotalTermFreq();
+ float avgFieldLength = (float)numberOfFieldTokens / numberOfDocuments;
+
+ // nocommit Take the minimum of term frequencies for phrases. This is not
+ // correct though, we'll need something like a scorePhrase(MultiStats ...)
+ int docFreq = Integer.MAX_VALUE;
+ long totalTermFreq = Integer.MAX_VALUE;
+ for (final TermContext context : termContexts) {
+ docFreq = Math.min(docFreq, context.docFreq());
+ totalTermFreq = Math.min(totalTermFreq, context.totalTermFreq());
+ }
+
+ // We have to provide something if codec doesnt supply these measures,
+ // or if someone omitted frequencies for the field... negative values cause
+ // NaN/Inf for some scorers.
+ if (numberOfFieldTokens == -1) {
+ numberOfFieldTokens = docFreq;
+ avgFieldLength = 1;
+ }
+ if (totalTermFreq == -1) {
+ totalTermFreq = docFreq;
+ }
+
+ stats.setNumberOfDocuments(numberOfDocuments);
+ stats.setNumberOfFieldTokens(numberOfFieldTokens);
+ stats.setAvgFieldLength(avgFieldLength);
+ stats.setDocFreq(docFreq);
+ stats.setTotalTermFreq(totalTermFreq);
+ }
+
+ /**
+ * Scores the document {@code doc}.
+ * <p>Subclasses must apply their scoring formula in this class.</p>
+ * @param stats the corpus level statistics.
+ * @param freq the term frequency.
+ * @param docLen the document length.
+ * @return the score.
+ */
+ protected abstract float score(BasicStats stats, float freq, float docLen);
+
+ /**
+ * Subclasses should implement this method to explain the score. {@code expl}
+ * already contains the score, the name of the class and the doc id, as well
+ * as the term frequency and its explanation; subclasses can add additional
+ * clauses to explain details of their scoring formulae.
+ * <p>The default implementation does nothing.</p>
+ *
+ * @param expl the explanation to extend with details.
+ * @param stats the corpus level statistics.
+ * @param doc the document id.
+ * @param freq the term frequency.
+ * @param docLen the document length.
+ */
+ protected void explain(
+ Explanation expl, BasicStats stats, int doc, float freq, float docLen) {}
+
+ /**
+ * Explains the score. The implementation here provides a basic explanation
+ * in the format <em>score(name-of-similarity, doc=doc-id,
+ * freq=term-frequency), computed from:</em>, and
+ * attaches the score (computed via the {@link #score(BasicStats, float, float)}
+ * method) and the explanation for the term frequency. Subclasses content with
+ * this format may add additional details in
+ * {@link #explain(Explanation, BasicStats, int, float, float)}.
+ *
+ * @param stats the corpus level statistics.
+ * @param doc the document id.
+ * @param freq the term frequency and its explanation.
+ * @param docLen the document length.
+ * @return the explanation.
+ */
+ protected Explanation explain(
+ BasicStats stats, int doc, Explanation freq, float docLen) {
+ Explanation result = new Explanation();
+ result.setValue(score(stats, freq.getValue(), docLen));
+ result.setDescription("score(" + getClass().getSimpleName() +
+ ", doc=" + doc + ", freq=" + freq.getValue() +"), computed from:");
+ result.addDetail(freq);
+
+ explain(result, stats, doc, freq.getValue(), docLen);
+
+ return result;
+ }
+
+ @Override
+ public ExactDocScorer exactDocScorer(Stats stats, String fieldName,
+ AtomicReaderContext context) throws IOException {
+ return new BasicExactDocScorer((BasicStats) stats,
+ context.reader.norms(fieldName));
+ }
+
+ @Override
+ public SloppyDocScorer sloppyDocScorer(Stats stats, String fieldName,
+ AtomicReaderContext context) throws IOException {
+ return new BasicSloppyDocScorer((BasicStats) stats,
+ context.reader.norms(fieldName));
+ }
+
+ /**
+ * Subclasses must override this method to return the name of the Similarity
+ * and preferably the values of parameters (if any) as well.
+ */
+ @Override
+ public abstract String toString(); // nocommit: to Similarity?
+
+ // ------------------------------ Norm handling ------------------------------
+
+ /** Norm -> document length map. */
+ private static final float[] NORM_TABLE = new float[256];
+
+ static {
+ for (int i = 0; i < 256; i++) {
+ float floatNorm = SmallFloat.byte315ToFloat((byte)i);
+ NORM_TABLE[i] = 1.0f / (floatNorm * floatNorm);
+ }
+ }
+
+ /** Encodes the document length in the same way as {@link TFIDFSimilarity}. */
+ @Override
+ public byte computeNorm(FieldInvertState state) {
+ final float numTerms;
+ if (discountOverlaps)
+ numTerms = state.getLength() - state.getNumOverlap();
+ else
+ numTerms = state.getLength() / state.getBoost();
+ return encodeNormValue(numTerms);
+ }
+
+ /** Decodes a normalization factor (document length) stored in an index.
+ * @see #encodeNormValue(float)
+ */
+ protected float decodeNormValue(byte norm) {
+ return NORM_TABLE[norm & 0xFF]; // & 0xFF maps negative bytes to positive above 127
+ }
+
+ /** Encodes the length to a byte via SmallFloat. */
+ protected byte encodeNormValue(float length) {
+ return SmallFloat.floatToByte315((float)(1.0 / Math.sqrt(length)));
+ }
+
+ // ----------------------------- Static methods ------------------------------
+
+ /** Returns the base two logarithm of {@code x}. */
+ public static double log2(double x) {
+ // Put this to a 'util' class if we need more of these.
+ return Math.log(x) / LOG_2;
+ }
+
+ // --------------------------------- Classes ---------------------------------
+
+ /** Delegates the {@link #score(int, int)} and
+ * {@link #explain(int, Explanation)} methods to
+ * {@link SimilarityBase#score(BasicStats, float, int)} and
+ * {@link SimilarityBase#explain(BasicStats, int, Explanation, int)},
+ * respectively.
+ */
+ private class BasicExactDocScorer extends ExactDocScorer {
+ private final BasicStats stats;
+ private final byte[] norms;
+
+ BasicExactDocScorer(BasicStats stats, byte norms[]) {
+ this.stats = stats;
+ this.norms = norms;
+ }
+
+ @Override
+ public float score(int doc, int freq) {
+ // We have to supply something in case norms are omitted
+ return SimilarityBase.this.score(stats, freq,
+ norms == null ? freq : decodeNormValue(norms[doc]));
+ }
+
+ @Override
+ public Explanation explain(int doc, Explanation freq) {
+ return SimilarityBase.this.explain(stats, doc, freq,
+ norms == null ? freq.getValue() : decodeNormValue(norms[doc]));
+ }
+ }
+
+ /** Delegates the {@link #score(int, int)} and
+ * {@link #explain(int, Explanation)} methods to
+ * {@link SimilarityBase#score(BasicStats, float, int)} and
+ * {@link SimilarityBase#explain(BasicStats, int, Explanation, int)},
+ * respectively.
+ */
+ private class BasicSloppyDocScorer extends SloppyDocScorer {
+ private final BasicStats stats;
+ private final byte[] norms;
+
+ BasicSloppyDocScorer(BasicStats stats, byte norms[]) {
+ this.stats = stats;
+ this.norms = norms;
+ }
+
+ @Override
+ public float score(int doc, float freq) {
+ // We have to supply something in case norms are omitted
+ return SimilarityBase.this.score(stats, freq,
+ norms == null ? freq : decodeNormValue(norms[doc]));
+ }
+ @Override
+ public Explanation explain(int doc, Explanation freq) {
+ return SimilarityBase.this.explain(stats, doc, freq,
+ norms == null ? freq.getValue() : decodeNormValue(norms[doc]));
+ }
+
+ @Override
+ public float computeSlopFactor(int distance) {
+ return 1.0f / (distance + 1);
+ }
+
+ @Override
+ public float computePayloadFactor(int doc, int start, int end, BytesRef payload) {
+ return 1f;
+ }
+ }
+}
Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/TFIDFSimilarity.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/TFIDFSimilarity.java?rev=1160678&r1=1160677&r2=1160678&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/TFIDFSimilarity.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/TFIDFSimilarity.java Tue Aug 23 13:25:10 2011
@@ -22,16 +22,9 @@ import java.io.IOException;
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.index.Term;
-import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.PhraseQuery;
-import org.apache.lucene.search.Query;
-import org.apache.lucene.search.TermQuery;
-import org.apache.lucene.search.Weight;
-import org.apache.lucene.search.similarities.Similarity.ExactDocScorer;
-import org.apache.lucene.search.similarities.Similarity.SloppyDocScorer;
-import org.apache.lucene.search.similarities.Similarity.Stats;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.TermContext;
import org.apache.lucene.util.SmallFloat;
Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/package.html
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/package.html?rev=1160678&r1=1160677&r2=1160678&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/package.html (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/package.html Tue Aug 23 13:25:10 2011
@@ -46,10 +46,11 @@ information, see {@link org.apache.lucen
<p>{@link org.apache.lucene.search.similarities.BM25Similarity} is an optimized
implementation of the successful Okapi BM25 model.</p>
-<p>{@link org.apache.lucene.search.similarities.EasySimilarity} provides a basic
+<p>{@link org.apache.lucene.search.similarities.SimilarityBase} provides a basic
implementation of the Similarity contract and exposes a highly simplified
interface, which makes it an ideal starting point for new ranking functions.
-Lucene ships the following EasySimilarity-based methods:
+Lucene ships the following methods built on
+{@link org.apache.lucene.search.similarities.SimilarityBase}:
<a name="framework"></a>
<ul>
@@ -60,7 +61,8 @@ Lucene ships the following EasySimilarit
Zhai and Lafferty's paper.</li>
</ul>
-Since EasySimilarity is not optimized to the same extent as
+Since {@link org.apache.lucene.search.similarities.SimilarityBase} is not
+optimized to the same extent as
{@link org.apache.lucene.search.similarities.DefaultSimilarity} and
{@link org.apache.lucene.search.similarities.BM25Similarity}, a difference in
performance is to be expected when using the methods listed above. However,
@@ -100,7 +102,7 @@ either, the query norm is defined as <co
<p>To make this change, implement your own {@link org.apache.lucene.search.similarities.Similarity} (likely
you'll want to simply subclass an existing method, be it
{@link org.apache.lucene.search.similarities.DefaultSimilarity} or a descendant of
- {@link org.apache.lucene.search.similarities.EasySimilarity}) and
+ {@link org.apache.lucene.search.similarities.SimilarityBase}) and
{@link org.apache.lucene.search.similarities.SimilarityProvider} (or use
{@link org.apache.lucene.search.similarities.BasicSimilarityProvider}), and
then register the new class by calling
@@ -110,17 +112,17 @@ either, the query norm is defined as <co
before searching.
</p>
-<h3>Extending {@linkplain org.apache.lucene.search.similarities.EasySimilarity}</h3>
+<h3>Extending {@linkplain org.apache.lucene.search.similarities.SimilarityBase}</h3>
<p>
The easiest way to quickly implement a new ranking method is to extend
-{@link org.apache.lucene.search.similarities.EasySimilarity}, which provides
+{@link org.apache.lucene.search.similarities.SimilarityBase}, which provides
basic implementations for the low level . Subclasses are only required to
-implement the {@link org.apache.lucene.search.similarities.EasySimilarity#score(EasyStats, float, float)}
-and {@link org.apache.lucene.search.similarities.EasySimilarity#toString()}
+implement the {@link org.apache.lucene.search.similarities.SimilarityBase#score(BasicStats, float, float)}
+and {@link org.apache.lucene.search.similarities.SimilarityBase#toString()}
methods.</p>
<p>Another options is to extend one of the <a href="#framework">frameworks</a>
-based on {@link org.apache.lucene.search.similarities.EasySimilarity}. These
+based on {@link org.apache.lucene.search.similarities.SimilarityBase}. These
Similarities are implemented modularly, e.g.
{@link org.apache.lucene.search.similarities.DFRSimilarity} delegates
computation of the three parts of its formula to the classes
Modified: lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/SpoofIndexSearcher.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/SpoofIndexSearcher.java?rev=1160678&r1=1160677&r2=1160678&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/SpoofIndexSearcher.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/SpoofIndexSearcher.java Tue Aug 23 13:25:10 2011
@@ -39,22 +39,22 @@ import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
/**
- * Index searcher implementation that takes an {@link EasyStats} instance and
+ * Index searcher implementation that takes an {@link BasicStats} instance and
* returns statistics accordingly. Most of the methods are not implemented, so
* it can only be used for Similarity unit testing.
*/
public class SpoofIndexSearcher extends IndexSearcher {
- public SpoofIndexSearcher(EasyStats stats) {
+ public SpoofIndexSearcher(BasicStats stats) {
super(new SpoofIndexReader(stats));
}
public static class SpoofIndexReader extends IndexReader {
/** The stats the reader has to return. */
- protected EasyStats stats;
+ protected BasicStats stats;
/** The fields the reader has to return. */
protected SpoofFields fields;
- public SpoofIndexReader(EasyStats stats) {
+ public SpoofIndexReader(BasicStats stats) {
this.stats = stats;
this.fields = new SpoofFields(stats);
}
@@ -163,7 +163,7 @@ public class SpoofIndexSearcher extends
/** The stats the object has to return. */
protected SpoofTerms terms;
- public SpoofFields(EasyStats stats) {
+ public SpoofFields(BasicStats stats) {
this.terms = new SpoofTerms(stats);
}
@@ -183,9 +183,9 @@ public class SpoofIndexSearcher extends
/** Spoof Terms class for Similarity testing. */
public static class SpoofTerms extends Terms {
/** The stats the object has to return. */
- protected EasyStats stats;
+ protected BasicStats stats;
- public SpoofTerms(EasyStats stats) {
+ public SpoofTerms(BasicStats stats) {
this.stats = stats;
}