You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2011/08/23 15:25:11 UTC

svn commit: r1160678 [1/2] - in /lucene/dev/branches/flexscoring/lucene/src: java/org/apache/lucene/search/similarities/ test/org/apache/lucene/search/similarities/

Author: rmuir
Date: Tue Aug 23 13:25:10 2011
New Revision: 1160678

URL: http://svn.apache.org/viewvc?rev=1160678&view=rev
Log:
LUCENE-3393: EasySimilarity -> SimilarityBase

Added:
    lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/BasicStats.java   (with props)
    lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/SimilarityBase.java   (with props)
    lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/TestSimilarityBase.java   (with props)
Removed:
    lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/EasySimilarity.java
    lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/EasyStats.java
    lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/TestEasySimilarity.java
Modified:
    lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/AfterEffect.java
    lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/AfterEffectB.java
    lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/AfterEffectL.java
    lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/BasicModel.java
    lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/BasicModelBE.java
    lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/BasicModelD.java
    lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/BasicModelG.java
    lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/BasicModelIF.java
    lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/BasicModelIn.java
    lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/BasicModelIne.java
    lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/BasicModelP.java
    lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/DFRSimilarity.java
    lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/Distribution.java
    lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/DistributionLL.java
    lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/DistributionSPL.java
    lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/IBSimilarity.java
    lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/LMDirichletSimilarity.java
    lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/LMJelinekMercerSimilarity.java
    lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/LMSimilarity.java
    lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/Lambda.java
    lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/LambdaDF.java
    lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/LambdaTTF.java
    lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/Normalization.java
    lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/NormalizationH1.java
    lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/NormalizationH2.java
    lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/TFIDFSimilarity.java
    lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/package.html
    lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/SpoofIndexSearcher.java

Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/AfterEffect.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/AfterEffect.java?rev=1160678&r1=1160677&r2=1160678&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/AfterEffect.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/AfterEffect.java Tue Aug 23 13:25:10 2011
@@ -31,20 +31,20 @@ import org.apache.lucene.search.Explanat
  */
 public abstract class AfterEffect {
   /** Returns the aftereffect score. */
-  public abstract float score(EasyStats stats, float tfn);
+  public abstract float score(BasicStats stats, float tfn);
   
   /** Returns an explanation for the score. */
-  public abstract Explanation explain(EasyStats stats, float tfn);
+  public abstract Explanation explain(BasicStats stats, float tfn);
 
   /** Implementation used when there is no aftereffect. */
   public static final class NoAfterEffect extends AfterEffect {
     @Override
-    public final float score(EasyStats stats, float tfn) {
+    public final float score(BasicStats stats, float tfn) {
       return 1f;
     }
 
     @Override
-    public final Explanation explain(EasyStats stats, float tfn) {
+    public final Explanation explain(BasicStats stats, float tfn) {
       return new Explanation(1, "no aftereffect");
     }
     

Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/AfterEffectB.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/AfterEffectB.java?rev=1160678&r1=1160677&r2=1160678&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/AfterEffectB.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/AfterEffectB.java Tue Aug 23 13:25:10 2011
@@ -25,14 +25,14 @@ import org.apache.lucene.search.Explanat
  */
 public class AfterEffectB extends AfterEffect {
   @Override
-  public final float score(EasyStats stats, float tfn) {
+  public final float score(BasicStats stats, float tfn) {
     long F = stats.getTotalTermFreq();
     int n = stats.getDocFreq();
     return (F + 1) / (n * (tfn + 1));
   }
   
   @Override
-  public final Explanation explain(EasyStats stats, float tfn) {
+  public final Explanation explain(BasicStats stats, float tfn) {
     Explanation result = new Explanation();
     result.setDescription(getClass().getSimpleName() + ", computed from: ");
     result.setValue(score(stats, tfn));

Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/AfterEffectL.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/AfterEffectL.java?rev=1160678&r1=1160677&r2=1160678&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/AfterEffectL.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/AfterEffectL.java Tue Aug 23 13:25:10 2011
@@ -25,12 +25,12 @@ import org.apache.lucene.search.Explanat
  */
 public class AfterEffectL extends AfterEffect {
   @Override
-  public final float score(EasyStats stats, float tfn) {
+  public final float score(BasicStats stats, float tfn) {
     return 1 / (tfn + 1);
   }
   
   @Override
-  public final Explanation explain(EasyStats stats, float tfn) {
+  public final Explanation explain(BasicStats stats, float tfn) {
     Explanation result = new Explanation();
     result.setDescription(getClass().getSimpleName() + ", computed from: ");
     result.setValue(score(stats, tfn));

Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/BasicModel.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/BasicModel.java?rev=1160678&r1=1160677&r2=1160678&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/BasicModel.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/BasicModel.java Tue Aug 23 13:25:10 2011
@@ -30,7 +30,7 @@ import org.apache.lucene.search.Explanat
  */
 public abstract class BasicModel {
   /** Returns the informative content score. */
-  public abstract float score(EasyStats stats, float tfn);
+  public abstract float score(BasicStats stats, float tfn);
   
   /**
    * Returns an explanation for the score.
@@ -39,7 +39,7 @@ public abstract class BasicModel {
    * explanation for such models. Subclasses that use other statistics must
    * override this method.</p>
    */
-  public Explanation explain(EasyStats stats, float tfn) {
+  public Explanation explain(BasicStats stats, float tfn) {
     Explanation result = new Explanation();
     result.setDescription(getClass().getSimpleName() + ", computed from: ");
     result.setValue(score(stats, tfn));

Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/BasicModelBE.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/BasicModelBE.java?rev=1160678&r1=1160677&r2=1160678&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/BasicModelBE.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/BasicModelBE.java Tue Aug 23 13:25:10 2011
@@ -17,7 +17,7 @@ package org.apache.lucene.search.similar
  * limitations under the License.
  */
 
-import static org.apache.lucene.search.similarities.EasySimilarity.log2;
+import static org.apache.lucene.search.similarities.SimilarityBase.log2;
 
 /**
  * Limiting form of the Bose-Einstein model. The formula used in Lucene differs
@@ -28,7 +28,7 @@ import static org.apache.lucene.search.s
  */
 public class BasicModelBE extends BasicModel {
   @Override
-  public final float score(EasyStats stats, float tfn) {
+  public final float score(BasicStats stats, float tfn) {
     long N = stats.getNumberOfDocuments() + 1;
 //    long F = stats.getTotalTermFreq() + 1;
     long F = Math.max(stats.getTotalTermFreq(), (long)(tfn + 0.5) + 1);

Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/BasicModelD.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/BasicModelD.java?rev=1160678&r1=1160677&r2=1160678&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/BasicModelD.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/BasicModelD.java Tue Aug 23 13:25:10 2011
@@ -17,7 +17,7 @@ package org.apache.lucene.search.similar
  * limitations under the License.
  */
 
-import static org.apache.lucene.search.similarities.EasySimilarity.log2;
+import static org.apache.lucene.search.similarities.SimilarityBase.log2;
 
 /**
  * Implements the approximation of the binomial model with the divergence
@@ -29,7 +29,7 @@ import static org.apache.lucene.search.s
  */
 public class BasicModelD extends BasicModel {
   @Override
-  public final float score(EasyStats stats, float tfn) {
+  public final float score(BasicStats stats, float tfn) {
     long F = Math.max(stats.getTotalTermFreq(), (long)(tfn + 0.5) + 1);
 //    long F = stats.getTotalTermFreq() + 1;
     double phi = (double)tfn / F;

Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/BasicModelG.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/BasicModelG.java?rev=1160678&r1=1160677&r2=1160678&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/BasicModelG.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/BasicModelG.java Tue Aug 23 13:25:10 2011
@@ -17,7 +17,7 @@ package org.apache.lucene.search.similar
  * limitations under the License.
  */
 
-import static org.apache.lucene.search.similarities.EasySimilarity.log2;
+import static org.apache.lucene.search.similarities.SimilarityBase.log2;
 
 /**
  * Geometric as limiting form of the Bose-Einstein model.
@@ -25,7 +25,7 @@ import static org.apache.lucene.search.s
  */
 public class BasicModelG extends BasicModel {
   @Override
-  public final float score(EasyStats stats, float tfn) {
+  public final float score(BasicStats stats, float tfn) {
     double lambda = stats.getTotalTermFreq() / (double) stats.getNumberOfDocuments();
     // -log(1 / (lambda + 1)) -> log(lambda + 1)
     return (float)(log2(lambda + 1) + tfn * log2((1 + lambda) / lambda));

Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/BasicModelIF.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/BasicModelIF.java?rev=1160678&r1=1160677&r2=1160678&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/BasicModelIF.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/BasicModelIF.java Tue Aug 23 13:25:10 2011
@@ -17,7 +17,7 @@ package org.apache.lucene.search.similar
  * limitations under the License.
  */
 
-import static org.apache.lucene.search.similarities.EasySimilarity.log2;
+import static org.apache.lucene.search.similarities.SimilarityBase.log2;
 
 /**
  * An approximation of the <em>I(n<sub>e</sub>)</em> model.
@@ -25,7 +25,7 @@ import static org.apache.lucene.search.s
  */ 
 public class BasicModelIF extends BasicModel {
   @Override
-  public final float score(EasyStats stats, float tfn) {
+  public final float score(BasicStats stats, float tfn) {
     int N = stats.getNumberOfDocuments();
     long F = stats.getTotalTermFreq();
     return tfn * (float)(log2(1 + (N + 1) / (F + 0.5)));

Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/BasicModelIn.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/BasicModelIn.java?rev=1160678&r1=1160677&r2=1160678&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/BasicModelIn.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/BasicModelIn.java Tue Aug 23 13:25:10 2011
@@ -18,7 +18,7 @@ package org.apache.lucene.search.similar
  */
 
 import org.apache.lucene.search.Explanation;
-import static org.apache.lucene.search.similarities.EasySimilarity.log2;
+import static org.apache.lucene.search.similarities.SimilarityBase.log2;
 
 /**
  * The basic tf-idf model of randomness.
@@ -26,14 +26,14 @@ import static org.apache.lucene.search.s
  */ 
 public class BasicModelIn extends BasicModel {
   @Override
-  public final float score(EasyStats stats, float tfn) {
+  public final float score(BasicStats stats, float tfn) {
     int N = stats.getNumberOfDocuments();
     int n = stats.getDocFreq();
     return tfn * (float)(log2((N + 1) / (n + 0.5)));
   }
   
   @Override
-  public final Explanation explain(EasyStats stats, float tfn) {
+  public final Explanation explain(BasicStats stats, float tfn) {
     Explanation result = new Explanation();
     result.setDescription(getClass().getSimpleName() + ", computed from: ");
     result.setValue(score(stats, tfn));

Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/BasicModelIne.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/BasicModelIne.java?rev=1160678&r1=1160677&r2=1160678&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/BasicModelIne.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/BasicModelIne.java Tue Aug 23 13:25:10 2011
@@ -17,7 +17,7 @@ package org.apache.lucene.search.similar
  * limitations under the License.
  */
 
-import static org.apache.lucene.search.similarities.EasySimilarity.log2;
+import static org.apache.lucene.search.similarities.SimilarityBase.log2;
 
 /**
  * Tf-idf model of randomness, based on a mixture of Poisson and inverse
@@ -26,7 +26,7 @@ import static org.apache.lucene.search.s
  */ 
 public class BasicModelIne extends BasicModel {
   @Override
-  public final float score(EasyStats stats, float tfn) {
+  public final float score(BasicStats stats, float tfn) {
     int N = stats.getNumberOfDocuments();
     long F = stats.getTotalTermFreq();
     double ne = N * (1 - Math.pow((N - 1) / (double)N, F));

Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/BasicModelP.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/BasicModelP.java?rev=1160678&r1=1160677&r2=1160678&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/BasicModelP.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/BasicModelP.java Tue Aug 23 13:25:10 2011
@@ -17,7 +17,7 @@ package org.apache.lucene.search.similar
  * limitations under the License.
  */
 
-import static org.apache.lucene.search.similarities.EasySimilarity.log2;
+import static org.apache.lucene.search.similarities.SimilarityBase.log2;
 
 /**
  * Implements the Poisson approximation for the binomial model for DFR.
@@ -28,7 +28,7 @@ public class BasicModelP extends BasicMo
   protected static double LOG2_E = log2(Math.E);
   
   @Override
-  public final float score(EasyStats stats, float tfn) {
+  public final float score(BasicStats stats, float tfn) {
     float lambda = (float)stats.getTotalTermFreq() / stats.getNumberOfDocuments();
 //    System.out.printf("tfn=%f, lambda=%f, log1=%f, log2=%f%n", tfn, lambda,
 //        tfn / lambda, 2 * Math.PI * tfn);

Added: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/BasicStats.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/BasicStats.java?rev=1160678&view=auto
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/BasicStats.java (added)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/BasicStats.java Tue Aug 23 13:25:10 2011
@@ -0,0 +1,144 @@
+package org.apache.lucene.search.similarities;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.index.Terms;
+
+/**
+ * Stores all statistics commonly used ranking methods.
+ * @lucene.experimental
+ */
+public class BasicStats extends Similarity.Stats {
+  /** The number of documents. */
+  protected int numberOfDocuments;
+  /** The total number of tokens in the field. */
+  protected long numberOfFieldTokens;
+  /** The average field length. */
+  protected float avgFieldLength;
+  /** The document frequency. */
+  protected int docFreq;
+  /** The total number of occurrences of this term across all documents. */
+  protected long totalTermFreq;
+  
+  // -------------------------- Boost-related stuff --------------------------
+  
+  /** Query's inner boost. */
+  protected final float queryBoost;
+  /** Any outer query's boost. */
+  protected float topLevelBoost;
+  /** For most Similarities, the immediate and the top level query boosts are
+   * not handled differently. Hence, this field is just the product of the
+   * other two. */
+  protected float totalBoost;
+  
+  /** Constructor. Sets the query boost. */
+  public BasicStats(float queryBoost) {
+    this.queryBoost = queryBoost;
+    this.totalBoost = queryBoost;
+  }
+  
+  // ------------------------- Getter/setter methods -------------------------
+  
+  /** Returns the number of documents. */
+  public int getNumberOfDocuments() {
+    return numberOfDocuments;
+  }
+  
+  /** Sets the number of documents. */
+  public void setNumberOfDocuments(int numberOfDocuments) {
+    this.numberOfDocuments = numberOfDocuments;
+  }
+  
+  /**
+   * Returns the total number of tokens in the field.
+   * @see Terms#getSumTotalTermFreq()
+   */
+  public long getNumberOfFieldTokens() {
+    return numberOfFieldTokens;
+  }
+  
+  /**
+   * Sets the total number of tokens in the field.
+   * @see Terms#getSumTotalTermFreq()
+   */
+  public void setNumberOfFieldTokens(long numberOfFieldTokens) {
+    this.numberOfFieldTokens = numberOfFieldTokens;
+  }
+  
+  /** Returns the average field length. */
+  public float getAvgFieldLength() {
+    return avgFieldLength;
+  }
+  
+  /** Sets the average field length. */
+  public void setAvgFieldLength(float avgFieldLength) {
+    this.avgFieldLength = avgFieldLength;
+  }
+  
+  /** Returns the document frequency. */
+  public int getDocFreq() {
+    return docFreq;
+  }
+  
+  /** Sets the document frequency. */
+  public void setDocFreq(int docFreq) {
+    this.docFreq = docFreq;
+  }
+  
+  /** Returns the total number of occurrences of this term across all documents. */
+  public long getTotalTermFreq() {
+    return totalTermFreq;
+  }
+  
+  /** Sets the total number of occurrences of this term across all documents. */
+  public void setTotalTermFreq(long totalTermFreq) {
+    this.totalTermFreq = totalTermFreq;
+  }
+  
+  // -------------------------- Boost-related stuff --------------------------
+  
+  /** The square of the raw normalization value.
+   * @see #rawNormalizationValue() */
+  @Override
+  public float getValueForNormalization() {
+    float rawValue = rawNormalizationValue();
+    return rawValue * rawValue;
+  }
+  
+  /** Computes the raw normalization value. This basic implementation returns
+   * the query boost. Subclasses may override this method to include other
+   * factors (such as idf), or to save the value for inclusion in
+   * {@link #normalize(float, float)}, etc.
+   */
+  protected float rawNormalizationValue() {
+    return queryBoost;
+  }
+  
+  /** No normalization is done. {@code topLevelBoost} is saved in the object,
+   * however. */
+  @Override
+  public void normalize(float queryNorm, float topLevelBoost) {
+    this.topLevelBoost = topLevelBoost;
+    totalBoost = queryBoost * topLevelBoost;
+  }
+  
+  /** Returns the total boost. */
+  public float getTotalBoost() {
+    return totalBoost;
+  }
+}

Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/DFRSimilarity.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/DFRSimilarity.java?rev=1160678&r1=1160677&r2=1160678&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/DFRSimilarity.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/DFRSimilarity.java Tue Aug 23 13:25:10 2011
@@ -39,7 +39,7 @@ import org.apache.lucene.search.Explanat
  * @see Normalization
  * @lucene.experimental
  */
-public class DFRSimilarity extends EasySimilarity {
+public class DFRSimilarity extends SimilarityBase {
   /** The basic model for information content. */
   protected final BasicModel basicModel;
   /** The first normalization of the information content. */
@@ -78,7 +78,7 @@ public class DFRSimilarity extends EasyS
   }
   
   @Override
-  protected float score(EasyStats stats, float freq, float docLen) {
+  protected float score(BasicStats stats, float freq, float docLen) {
     float tfn = normalization.tfn(stats, freq, docLen);
     return stats.getTotalBoost() *
         basicModel.score(stats, tfn) * afterEffect.score(stats, tfn);
@@ -86,7 +86,7 @@ public class DFRSimilarity extends EasyS
 
   @Override
   protected void explain(Explanation expl,
-      EasyStats stats, int doc, float freq, float docLen) {
+      BasicStats stats, int doc, float freq, float docLen) {
     if (stats.getTotalBoost() != 1.0f) {
       expl.addDetail(new Explanation(stats.getTotalBoost(), "boost"));
     }

Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/Distribution.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/Distribution.java?rev=1160678&r1=1160677&r2=1160678&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/Distribution.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/Distribution.java Tue Aug 23 13:25:10 2011
@@ -27,11 +27,11 @@ import org.apache.lucene.search.Explanat
  */
 public abstract class Distribution {
   /** Computes the score. */
-  public abstract float score(EasyStats stats, float tfn, float lambda);
+  public abstract float score(BasicStats stats, float tfn, float lambda);
   
   /** Explains the score. Returns the name of the model only, since
    * both {@code tfn} and {@code lambda} are explained elsewhere. */
-  public Explanation explain(EasyStats stats, float tfn, float lambda) {
+  public Explanation explain(BasicStats stats, float tfn, float lambda) {
     return new Explanation(
         score(stats, tfn, lambda), getClass().getSimpleName());
   }

Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/DistributionLL.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/DistributionLL.java?rev=1160678&r1=1160677&r2=1160678&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/DistributionLL.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/DistributionLL.java Tue Aug 23 13:25:10 2011
@@ -26,7 +26,7 @@ package org.apache.lucene.search.similar
  */
 public class DistributionLL extends Distribution {
   @Override
-  public final float score(EasyStats stats, float tfn, float lambda) {
+  public final float score(BasicStats stats, float tfn, float lambda) {
     return (float)-Math.log(lambda / (tfn + lambda));
   }
   

Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/DistributionSPL.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/DistributionSPL.java?rev=1160678&r1=1160677&r2=1160678&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/DistributionSPL.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/DistributionSPL.java Tue Aug 23 13:25:10 2011
@@ -27,7 +27,7 @@ package org.apache.lucene.search.similar
  */
 public class DistributionSPL extends Distribution {
   @Override
-  public final float score(EasyStats stats, float tfn, float lambda) {
+  public final float score(BasicStats stats, float tfn, float lambda) {
     if (lambda == 1f) {
       lambda = 0.99f;
     }

Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/IBSimilarity.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/IBSimilarity.java?rev=1160678&r1=1160677&r2=1160678&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/IBSimilarity.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/IBSimilarity.java Tue Aug 23 13:25:10 2011
@@ -41,7 +41,7 @@ import org.apache.lucene.search.Explanat
  * Similarities will be merged at one point.</p>
  * @lucene.experimental 
  */
-public class IBSimilarity extends EasySimilarity {
+public class IBSimilarity extends SimilarityBase {
   /** The probabilistic distribution used to model term occurrence. */
   protected final Distribution distribution;
   /** The <em>lambda (&lambda;<sub>w</sub>)</em> parameter. */
@@ -63,7 +63,7 @@ public class IBSimilarity extends EasySi
   }
   
   @Override
-  protected float score(EasyStats stats, float freq, float docLen) {
+  protected float score(BasicStats stats, float freq, float docLen) {
     return stats.getTotalBoost() *
         distribution.score(
             stats,
@@ -73,7 +73,7 @@ public class IBSimilarity extends EasySi
 
   @Override
   protected void explain(
-      Explanation expl, EasyStats stats, int doc, float freq, float docLen) {
+      Explanation expl, BasicStats stats, int doc, float freq, float docLen) {
     if (stats.getTotalBoost() != 1.0f) {
       expl.addDetail(new Explanation(stats.getTotalBoost(), "boost"));
     }

Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/LMDirichletSimilarity.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/LMDirichletSimilarity.java?rev=1160678&r1=1160677&r2=1160678&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/LMDirichletSimilarity.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/LMDirichletSimilarity.java Tue Aug 23 13:25:10 2011
@@ -60,7 +60,7 @@ public class LMDirichletSimilarity exten
   }
   
   @Override
-  protected float score(EasyStats stats, float freq, float docLen) {
+  protected float score(BasicStats stats, float freq, float docLen) {
     float score = stats.getTotalBoost() * (float)(Math.log(1 + freq /
         (mu * ((LMStats)stats).getCollectionProbability())) +
         Math.log(mu / (docLen + mu)));
@@ -68,7 +68,7 @@ public class LMDirichletSimilarity exten
   }
   
   @Override
-  protected void explain(Explanation expl, EasyStats stats, int doc,
+  protected void explain(Explanation expl, BasicStats stats, int doc,
       float freq, float docLen) {
     if (stats.getTotalBoost() != 1.0f) {
       expl.addDetail(new Explanation(stats.getTotalBoost(), "boost"));

Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/LMJelinekMercerSimilarity.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/LMJelinekMercerSimilarity.java?rev=1160678&r1=1160677&r2=1160678&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/LMJelinekMercerSimilarity.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/LMJelinekMercerSimilarity.java Tue Aug 23 13:25:10 2011
@@ -48,7 +48,7 @@ public class LMJelinekMercerSimilarity e
   }
   
   @Override
-  protected float score(EasyStats stats, float freq, float docLen) {
+  protected float score(BasicStats stats, float freq, float docLen) {
     return stats.getTotalBoost() *
         (float)Math.log(1 +
             ((1 - lambda) * freq / docLen) /
@@ -56,7 +56,7 @@ public class LMJelinekMercerSimilarity e
   }
   
   @Override
-  protected void explain(Explanation expl, EasyStats stats, int doc,
+  protected void explain(Explanation expl, BasicStats stats, int doc,
       float freq, float docLen) {
     if (stats.getTotalBoost() != 1.0f) {
       expl.addDetail(new Explanation(stats.getTotalBoost(), "boost"));

Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/LMSimilarity.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/LMSimilarity.java?rev=1160678&r1=1160677&r2=1160678&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/LMSimilarity.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/LMSimilarity.java Tue Aug 23 13:25:10 2011
@@ -38,7 +38,7 @@ import org.apache.lucene.util.TermContex
  * 
  * @lucene.experimental
  */
-public abstract class LMSimilarity extends EasySimilarity {
+public abstract class LMSimilarity extends SimilarityBase {
   /** The collection model. */
   protected final CollectionModel collectionModel;
   
@@ -57,16 +57,16 @@ public abstract class LMSimilarity exten
    * usual statistics.
    */
   @Override
-  public EasyStats computeStats(IndexSearcher searcher, String fieldName,
+  public BasicStats computeStats(IndexSearcher searcher, String fieldName,
       float queryBoost, TermContext... termContexts) throws IOException {
     LMStats stats = new LMStats(queryBoost);
-    fillEasyStats(stats, searcher, fieldName, termContexts);
+    fillBasicStats(stats, searcher, fieldName, termContexts);
     stats.setCollectionProbability(collectionModel.computeProbability(stats));
     return stats;
   }
 
   @Override
-  protected void explain(Explanation expl, EasyStats stats, int doc,
+  protected void explain(Explanation expl, BasicStats stats, int doc,
       float freq, float docLen) {
     expl.addDetail(new Explanation(collectionModel.computeProbability(stats),
                                    "collection probability"));
@@ -97,7 +97,7 @@ public abstract class LMSimilarity exten
   }
 
   /** Stores the collection distribution of the current term. */
-  public static class LMStats extends EasyStats {
+  public static class LMStats extends BasicStats {
     /** The probability that the current term is generated by the collection. */
     private float collectionProbability;
     
@@ -128,7 +128,7 @@ public abstract class LMSimilarity exten
      * Computes the probability {@code p(w|C)} according to the language model
      * strategy for the current term.
      */
-    public float computeProbability(EasyStats stats);
+    public float computeProbability(BasicStats stats);
     
     /** The name of the collection model strategy. */
     public String getName();
@@ -140,7 +140,7 @@ public abstract class LMSimilarity exten
    */
   public static class DefaultCollectionModel implements CollectionModel {
     @Override
-    public float computeProbability(EasyStats stats) {
+    public float computeProbability(BasicStats stats) {
       return (float)stats.getTotalTermFreq() / (stats.getNumberOfFieldTokens() +1);
     }
     

Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/Lambda.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/Lambda.java?rev=1160678&r1=1160677&r2=1160678&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/Lambda.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/Lambda.java Tue Aug 23 13:25:10 2011
@@ -27,9 +27,9 @@ import org.apache.lucene.search.Explanat
  */
 public abstract class Lambda {
   /** Computes the lambda parameter. */
-  public abstract float lambda(EasyStats stats);
+  public abstract float lambda(BasicStats stats);
   /** Explains the lambda parameter. */
-  public abstract Explanation explain(EasyStats stats);
+  public abstract Explanation explain(BasicStats stats);
   
   /**
    * Subclasses must override this method to return the code of the lambda

Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/LambdaDF.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/LambdaDF.java?rev=1160678&r1=1160677&r2=1160678&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/LambdaDF.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/LambdaDF.java Tue Aug 23 13:25:10 2011
@@ -25,12 +25,12 @@ import org.apache.lucene.search.Explanat
  */
 public class LambdaDF extends Lambda {
   @Override
-  public final float lambda(EasyStats stats) {
+  public final float lambda(BasicStats stats) {
     return (float)stats.getDocFreq() / stats.getNumberOfDocuments();
   }
   
   @Override
-  public final Explanation explain(EasyStats stats) {
+  public final Explanation explain(BasicStats stats) {
     Explanation result = new Explanation();
     result.setDescription(getClass().getSimpleName() + ", computed from: ");
     result.setValue(lambda(stats));

Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/LambdaTTF.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/LambdaTTF.java?rev=1160678&r1=1160677&r2=1160678&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/LambdaTTF.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/LambdaTTF.java Tue Aug 23 13:25:10 2011
@@ -25,12 +25,12 @@ import org.apache.lucene.search.Explanat
  */
 public class LambdaTTF extends Lambda {  
   @Override
-  public final float lambda(EasyStats stats) {
+  public final float lambda(BasicStats stats) {
     return (float)stats.getTotalTermFreq() / stats.getNumberOfDocuments();
   }
 
   @Override
-  public final Explanation explain(EasyStats stats) {
+  public final Explanation explain(BasicStats stats) {
     Explanation result = new Explanation();
     result.setDescription(getClass().getSimpleName() + ", computed from: ");
     result.setValue(lambda(stats));

Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/Normalization.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/Normalization.java?rev=1160678&r1=1160677&r2=1160678&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/Normalization.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/Normalization.java Tue Aug 23 13:25:10 2011
@@ -29,7 +29,7 @@ import org.apache.lucene.search.Explanat
 public abstract class Normalization {
   /** Returns the normalized term frequency.
    * @param len the field length. */
-  public abstract float tfn(EasyStats stats, float tf, float len);
+  public abstract float tfn(BasicStats stats, float tf, float len);
   
   /** Returns an explanation for the normalized term frequency.
    * <p>The default normalization methods use the field length of the document
@@ -37,7 +37,7 @@ public abstract class Normalization {
    * This method provides a generic explanation for such methods.
    * Subclasses that use other statistics must override this method.</p>
    */
-  public Explanation explain(EasyStats stats, float tf, float len) {
+  public Explanation explain(BasicStats stats, float tf, float len) {
     Explanation result = new Explanation();
     result.setDescription(getClass().getSimpleName() + ", computed from: ");
     result.setValue(tfn(stats, tf, len));
@@ -51,12 +51,12 @@ public abstract class Normalization {
   /** Implementation used when there is no normalization. */
   public static final class NoNormalization extends Normalization {
     @Override
-    public final float tfn(EasyStats stats, float tf, float len) {
+    public final float tfn(BasicStats stats, float tf, float len) {
       return tf;
     }
 
     @Override
-    public final Explanation explain(EasyStats stats, float tf, float len) {
+    public final Explanation explain(BasicStats stats, float tf, float len) {
       return new Explanation(1, "no normalization");
     }
     

Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/NormalizationH1.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/NormalizationH1.java?rev=1160678&r1=1160677&r2=1160678&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/NormalizationH1.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/NormalizationH1.java Tue Aug 23 13:25:10 2011
@@ -22,7 +22,7 @@ package org.apache.lucene.search.similar
  */
 public class NormalizationH1 extends Normalization {
   @Override
-  public final float tfn(EasyStats stats, float tf, float len) {
+  public final float tfn(BasicStats stats, float tf, float len) {
     return tf * stats.getAvgFieldLength() / len;
   }
 

Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/NormalizationH2.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/NormalizationH2.java?rev=1160678&r1=1160677&r2=1160678&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/NormalizationH2.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/NormalizationH2.java Tue Aug 23 13:25:10 2011
@@ -17,7 +17,7 @@ package org.apache.lucene.search.similar
  * limitations under the License.
  */
 
-import static org.apache.lucene.search.similarities.EasySimilarity.log2;
+import static org.apache.lucene.search.similarities.SimilarityBase.log2;
 
 /**
  * Normalization model in which the term frequency is inversely related to the
@@ -25,7 +25,7 @@ import static org.apache.lucene.search.s
  */
 public class NormalizationH2 extends Normalization {
   @Override
-  public final float tfn(EasyStats stats, float tf, float len) {
+  public final float tfn(BasicStats stats, float tf, float len) {
     return (float)(tf * log2(1 + stats.getAvgFieldLength() / len));
   }
 

Added: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/SimilarityBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/SimilarityBase.java?rev=1160678&view=auto
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/SimilarityBase.java (added)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/SimilarityBase.java Tue Aug 23 13:25:10 2011
@@ -0,0 +1,302 @@
+package org.apache.lucene.search.similarities;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.index.FieldInvertState;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexReader.AtomicReaderContext;
+import org.apache.lucene.index.MultiFields;
+import org.apache.lucene.search.Explanation;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.SmallFloat;
+import org.apache.lucene.util.TermContext;
+
+/**
+ * A subclass of {@code Similarity} that provides a simplified API for its
+ * descendants. Subclasses are only required to implement the {@link #score}
+ * and {@link #toString()} methods. Implementing
+ * {@link #explain(Explanation, BasicStats, int, float, float)} is optional,
+ * inasmuch as SimilarityBase already provides a basic explanation of the score
+ * and the term frequency. However, implementers of a subclass are encouraged to
+ * include as much detail about the scoring method as possible.
+ * @lucene.experimental
+ */
+public abstract class SimilarityBase extends Similarity {
+  /** For {@link #log2(double)}. Precomputed for efficiency reasons. */
+  private static final double LOG_2 = Math.log(2);
+  
+  /** @see #setDiscountOverlaps */
+  protected boolean discountOverlaps = true;
+  
+  /** Determines whether overlap tokens (Tokens with
+   *  0 position increment) are ignored when computing
+   *  norm.  By default this is true, meaning overlap
+   *  tokens do not count when computing norms.
+   *
+   *  @lucene.experimental
+   *
+   *  @see #computeNorm
+   */
+  public void setDiscountOverlaps(boolean v) {
+    discountOverlaps = v;
+  }
+
+  /** @see #setDiscountOverlaps */
+  public boolean getDiscountOverlaps() {
+    return discountOverlaps;
+  }
+  
+  /**
+   * Calls {@link #fillBasicStats(BasicStats, IndexSearcher, String, TermContext...)}.
+   * Subclasses that override this method may invoke {@code fillStats} with any
+   * subclass of {@code BasicStats}.
+   */
+  @Override
+  public BasicStats computeStats(IndexSearcher searcher, String fieldName,
+      float queryBoost, TermContext... termContexts) throws IOException {
+    BasicStats stats = new BasicStats(queryBoost);
+    fillBasicStats(stats, searcher, fieldName, termContexts);
+    return stats;
+  }
+  
+  /** Fills all member fields defined in {@code BasicStats} in {@code stats}. */
+  protected final void fillBasicStats(BasicStats stats, IndexSearcher searcher,
+      String fieldName, TermContext... termContexts) throws IOException {
+    IndexReader reader = searcher.getIndexReader();
+    int numberOfDocuments = reader.maxDoc();
+    long numberOfFieldTokens = MultiFields.getTerms(searcher.getIndexReader(),
+        fieldName).getSumTotalTermFreq();
+    float avgFieldLength = (float)numberOfFieldTokens / numberOfDocuments;
+    
+    // nocommit Take the minimum of term frequencies for phrases. This is not
+    // correct though, we'll need something like a scorePhrase(MultiStats ...)
+    int docFreq = Integer.MAX_VALUE;
+    long totalTermFreq = Integer.MAX_VALUE;
+    for (final TermContext context : termContexts) {
+      docFreq = Math.min(docFreq, context.docFreq());
+      totalTermFreq = Math.min(totalTermFreq, context.totalTermFreq());
+    }
+    
+    // We have to provide something if codec doesnt supply these measures,
+    // or if someone omitted frequencies for the field... negative values cause
+    // NaN/Inf for some scorers.
+    if (numberOfFieldTokens == -1) {
+      numberOfFieldTokens = docFreq;
+      avgFieldLength = 1;
+    }
+    if (totalTermFreq == -1) {
+      totalTermFreq = docFreq;
+    }
+    
+    stats.setNumberOfDocuments(numberOfDocuments);
+    stats.setNumberOfFieldTokens(numberOfFieldTokens);
+    stats.setAvgFieldLength(avgFieldLength);
+    stats.setDocFreq(docFreq);
+    stats.setTotalTermFreq(totalTermFreq);
+  }
+  
+  /**
+   * Scores the document {@code doc}.
+   * <p>Subclasses must apply their scoring formula in this class.</p>
+   * @param stats the corpus level statistics.
+   * @param freq the term frequency.
+   * @param docLen the document length.
+   * @return the score.
+   */
+  protected abstract float score(BasicStats stats, float freq, float docLen);
+  
+  /**
+   * Subclasses should implement this method to explain the score. {@code expl}
+   * already contains the score, the name of the class and the doc id, as well
+   * as the term frequency and its explanation; subclasses can add additional
+   * clauses to explain details of their scoring formulae.
+   * <p>The default implementation does nothing.</p>
+   * 
+   * @param expl the explanation to extend with details.
+   * @param stats the corpus level statistics.
+   * @param doc the document id.
+   * @param freq the term frequency.
+   * @param docLen the document length.
+   */
+  protected void explain(
+      Explanation expl, BasicStats stats, int doc, float freq, float docLen) {}
+  
+  /**
+   * Explains the score. The implementation here provides a basic explanation
+   * in the format <em>score(name-of-similarity, doc=doc-id,
+   * freq=term-frequency), computed from:</em>, and
+   * attaches the score (computed via the {@link #score(BasicStats, float, float)}
+   * method) and the explanation for the term frequency. Subclasses content with
+   * this format may add additional details in
+   * {@link #explain(Explanation, BasicStats, int, float, float)}.
+   *  
+   * @param stats the corpus level statistics.
+   * @param doc the document id.
+   * @param freq the term frequency and its explanation.
+   * @param docLen the document length.
+   * @return the explanation.
+   */
+  protected Explanation explain(
+      BasicStats stats, int doc, Explanation freq, float docLen) {
+    Explanation result = new Explanation(); 
+    result.setValue(score(stats, freq.getValue(), docLen));
+    result.setDescription("score(" + getClass().getSimpleName() +
+        ", doc=" + doc + ", freq=" + freq.getValue() +"), computed from:");
+    result.addDetail(freq);
+    
+    explain(result, stats, doc, freq.getValue(), docLen);
+    
+    return result;
+  }
+  
+  @Override
+  public ExactDocScorer exactDocScorer(Stats stats, String fieldName,
+      AtomicReaderContext context) throws IOException {
+    return new BasicExactDocScorer((BasicStats) stats,
+                                  context.reader.norms(fieldName));
+  }
+  
+  @Override
+  public SloppyDocScorer sloppyDocScorer(Stats stats, String fieldName,
+      AtomicReaderContext context) throws IOException {
+    return new BasicSloppyDocScorer((BasicStats) stats,
+                                   context.reader.norms(fieldName));
+  }
+  
+  /**
+   * Subclasses must override this method to return the name of the Similarity
+   * and preferably the values of parameters (if any) as well.
+   */
+  @Override
+  public abstract String toString();  // nocommit: to Similarity?
+
+  // ------------------------------ Norm handling ------------------------------
+  
+  /** Norm -> document length map. */
+  private static final float[] NORM_TABLE = new float[256];
+
+  static {
+    for (int i = 0; i < 256; i++) {
+      float floatNorm = SmallFloat.byte315ToFloat((byte)i);
+      NORM_TABLE[i] = 1.0f / (floatNorm * floatNorm);
+    }
+  }
+
+  /** Encodes the document length in the same way as {@link TFIDFSimilarity}. */
+  @Override
+  public byte computeNorm(FieldInvertState state) {
+    final float numTerms;
+    if (discountOverlaps)
+      numTerms = state.getLength() - state.getNumOverlap();
+    else
+      numTerms = state.getLength() / state.getBoost();
+    return encodeNormValue(numTerms);
+  }
+  
+  /** Decodes a normalization factor (document length) stored in an index.
+   * @see #encodeNormValue(float)
+   */
+  protected float decodeNormValue(byte norm) {
+    return NORM_TABLE[norm & 0xFF];  // & 0xFF maps negative bytes to positive above 127
+  }
+  
+  /** Encodes the length to a byte via SmallFloat. */
+  protected byte encodeNormValue(float length) {
+    return SmallFloat.floatToByte315((float)(1.0 / Math.sqrt(length)));
+  }
+  
+  // ----------------------------- Static methods ------------------------------
+  
+  /** Returns the base two logarithm of {@code x}. */
+  public static double log2(double x) {
+    // Put this to a 'util' class if we need more of these.
+    return Math.log(x) / LOG_2;
+  }
+  
+  // --------------------------------- Classes ---------------------------------
+  
+  /** Delegates the {@link #score(int, int)} and
+   * {@link #explain(int, Explanation)} methods to
+   * {@link SimilarityBase#score(BasicStats, float, int)} and
+   * {@link SimilarityBase#explain(BasicStats, int, Explanation, int)},
+   * respectively.
+   */
+  private class BasicExactDocScorer extends ExactDocScorer {
+    private final BasicStats stats;
+    private final byte[] norms;
+    
+    BasicExactDocScorer(BasicStats stats, byte norms[]) {
+      this.stats = stats;
+      this.norms = norms;
+    }
+    
+    @Override
+    public float score(int doc, int freq) {
+      // We have to supply something in case norms are omitted
+      return SimilarityBase.this.score(stats, freq,
+          norms == null ? freq : decodeNormValue(norms[doc]));
+    }
+    
+    @Override
+    public Explanation explain(int doc, Explanation freq) {
+      return SimilarityBase.this.explain(stats, doc, freq,
+          norms == null ? freq.getValue() : decodeNormValue(norms[doc]));
+    }
+  }
+  
+  /** Delegates the {@link #score(int, int)} and
+   * {@link #explain(int, Explanation)} methods to
+   * {@link SimilarityBase#score(BasicStats, float, int)} and
+   * {@link SimilarityBase#explain(BasicStats, int, Explanation, int)},
+   * respectively.
+   */
+  private class BasicSloppyDocScorer extends SloppyDocScorer {
+    private final BasicStats stats;
+    private final byte[] norms;
+    
+    BasicSloppyDocScorer(BasicStats stats, byte norms[]) {
+      this.stats = stats;
+      this.norms = norms;
+    }
+    
+    @Override
+    public float score(int doc, float freq) {
+      // We have to supply something in case norms are omitted
+      return SimilarityBase.this.score(stats, freq,
+          norms == null ? freq : decodeNormValue(norms[doc]));
+    }
+    @Override
+    public Explanation explain(int doc, Explanation freq) {
+      return SimilarityBase.this.explain(stats, doc, freq,
+          norms == null ? freq.getValue() : decodeNormValue(norms[doc]));
+    }
+
+    @Override
+    public float computeSlopFactor(int distance) {
+      return 1.0f / (distance + 1);
+    }
+
+    @Override
+    public float computePayloadFactor(int doc, int start, int end, BytesRef payload) {
+      return 1f;
+    }
+  }
+}

Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/TFIDFSimilarity.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/TFIDFSimilarity.java?rev=1160678&r1=1160677&r2=1160678&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/TFIDFSimilarity.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/TFIDFSimilarity.java Tue Aug 23 13:25:10 2011
@@ -22,16 +22,9 @@ import java.io.IOException;
 
 import org.apache.lucene.index.IndexReader.AtomicReaderContext;
 import org.apache.lucene.index.Term;
-import org.apache.lucene.search.BooleanQuery;
 import org.apache.lucene.search.Explanation;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.PhraseQuery;
-import org.apache.lucene.search.Query;
-import org.apache.lucene.search.TermQuery;
-import org.apache.lucene.search.Weight;
-import org.apache.lucene.search.similarities.Similarity.ExactDocScorer;
-import org.apache.lucene.search.similarities.Similarity.SloppyDocScorer;
-import org.apache.lucene.search.similarities.Similarity.Stats;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.TermContext;
 import org.apache.lucene.util.SmallFloat;

Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/package.html
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/package.html?rev=1160678&r1=1160677&r2=1160678&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/package.html (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/similarities/package.html Tue Aug 23 13:25:10 2011
@@ -46,10 +46,11 @@ information, see {@link org.apache.lucen
 <p>{@link org.apache.lucene.search.similarities.BM25Similarity} is an optimized
 implementation of the successful Okapi BM25 model.</p>
 
-<p>{@link org.apache.lucene.search.similarities.EasySimilarity} provides a basic
+<p>{@link org.apache.lucene.search.similarities.SimilarityBase} provides a basic
 implementation of the Similarity contract and exposes a highly simplified
 interface, which makes it an ideal starting point for new ranking functions.
-Lucene ships the following EasySimilarity-based methods:
+Lucene ships the following methods built on
+{@link org.apache.lucene.search.similarities.SimilarityBase}:
 
 <a name="framework"></a>
 <ul>
@@ -60,7 +61,8 @@ Lucene ships the following EasySimilarit
   Zhai and Lafferty's paper.</li>
 </ul>
 
-Since EasySimilarity is not optimized to the same extent as
+Since {@link org.apache.lucene.search.similarities.SimilarityBase} is not
+optimized to the same extent as
 {@link org.apache.lucene.search.similarities.DefaultSimilarity} and
 {@link org.apache.lucene.search.similarities.BM25Similarity}, a difference in
 performance is to be expected when using the methods listed above. However,
@@ -100,7 +102,7 @@ either, the query norm is defined as <co
 <p>To make this change, implement your own {@link org.apache.lucene.search.similarities.Similarity} (likely
     you'll want to simply subclass an existing method, be it
     {@link org.apache.lucene.search.similarities.DefaultSimilarity} or a descendant of
-    {@link org.apache.lucene.search.similarities.EasySimilarity}) and
+    {@link org.apache.lucene.search.similarities.SimilarityBase}) and
     {@link org.apache.lucene.search.similarities.SimilarityProvider} (or use
     {@link org.apache.lucene.search.similarities.BasicSimilarityProvider}), and
     then register the new class by calling
@@ -110,17 +112,17 @@ either, the query norm is defined as <co
     before searching.
 </p>
 
-<h3>Extending {@linkplain org.apache.lucene.search.similarities.EasySimilarity}</h3>
+<h3>Extending {@linkplain org.apache.lucene.search.similarities.SimilarityBase}</h3>
 <p>
 The easiest way to quickly implement a new ranking method is to extend
-{@link org.apache.lucene.search.similarities.EasySimilarity}, which provides
+{@link org.apache.lucene.search.similarities.SimilarityBase}, which provides
 basic implementations for the low level . Subclasses are only required to
-implement the {@link org.apache.lucene.search.similarities.EasySimilarity#score(EasyStats, float, float)}
-and {@link org.apache.lucene.search.similarities.EasySimilarity#toString()}
+implement the {@link org.apache.lucene.search.similarities.SimilarityBase#score(BasicStats, float, float)}
+and {@link org.apache.lucene.search.similarities.SimilarityBase#toString()}
 methods.</p>
 
 <p>Another options is to extend one of the <a href="#framework">frameworks</a>
-based on {@link org.apache.lucene.search.similarities.EasySimilarity}. These
+based on {@link org.apache.lucene.search.similarities.SimilarityBase}. These
 Similarities are implemented modularly, e.g.
 {@link org.apache.lucene.search.similarities.DFRSimilarity} delegates
 computation of the three parts of its formula to the classes

Modified: lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/SpoofIndexSearcher.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/SpoofIndexSearcher.java?rev=1160678&r1=1160677&r2=1160678&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/SpoofIndexSearcher.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/similarities/SpoofIndexSearcher.java Tue Aug 23 13:25:10 2011
@@ -39,22 +39,22 @@ import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
 
 /**
- * Index searcher implementation that takes an {@link EasyStats} instance and
+ * Index searcher implementation that takes an {@link BasicStats} instance and
  * returns statistics accordingly. Most of the methods are not implemented, so
  * it can only be used for Similarity unit testing.
  */
 public class SpoofIndexSearcher extends IndexSearcher {
-  public SpoofIndexSearcher(EasyStats stats) {
+  public SpoofIndexSearcher(BasicStats stats) {
     super(new SpoofIndexReader(stats));
   }
   
   public static class SpoofIndexReader extends IndexReader {
     /** The stats the reader has to return. */
-    protected EasyStats stats;
+    protected BasicStats stats;
     /** The fields the reader has to return. */
     protected SpoofFields fields;
     
-    public SpoofIndexReader(EasyStats stats) {
+    public SpoofIndexReader(BasicStats stats) {
       this.stats = stats;
       this.fields = new SpoofFields(stats);
     }
@@ -163,7 +163,7 @@ public class SpoofIndexSearcher extends 
     /** The stats the object has to return. */
     protected SpoofTerms terms;
     
-    public SpoofFields(EasyStats stats) {
+    public SpoofFields(BasicStats stats) {
       this.terms = new SpoofTerms(stats);
     }
     
@@ -183,9 +183,9 @@ public class SpoofIndexSearcher extends 
   /** Spoof Terms class for Similarity testing. */
   public static class SpoofTerms extends Terms {
     /** The stats the object has to return. */
-    protected EasyStats stats;
+    protected BasicStats stats;
     
-    public SpoofTerms(EasyStats stats) {
+    public SpoofTerms(BasicStats stats) {
       this.stats = stats;
     }