You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2011/03/28 07:02:29 UTC

svn commit: r1086118 - in /lucene/dev/branches/flexscoring: lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/ lucene/contrib/memory/src/java/org/apache/lucene/index/memory/ lucene/src/java/org/apache/lucene/index/ lucene/src/te...

Author: rmuir
Date: Mon Mar 28 05:02:29 2011
New Revision: 1086118

URL: http://svn.apache.org/viewvc?rev=1086118&view=rev
Log:
LUCENE-2392: add avgdl, mockbm25

Added:
    lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/index/TestNormsSum.java   (with props)
    lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/MockBM25Similarity.java   (with props)
    lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/MockBM25SimilarityProvider.java   (with props)
Modified:
    lucene/dev/branches/flexscoring/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexReader.java
    lucene/dev/branches/flexscoring/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
    lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/DirectoryReader.java
    lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java
    lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/IndexReader.java
    lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/MultiReader.java
    lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/ParallelReader.java
    lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/SegmentReader.java
    lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/MockLMSimilarity.java
    lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/MockLMSimilarityProvider.java
    lucene/dev/branches/flexscoring/modules/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/QueryDriver.java

Modified: lucene/dev/branches/flexscoring/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexReader.java?rev=1086118&r1=1086117&r2=1086118&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexReader.java (original)
+++ lucene/dev/branches/flexscoring/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexReader.java Mon Mar 28 05:02:29 2011
@@ -336,6 +336,12 @@ public class InstantiatedIndexReader ext
   }
 
   @Override
+  public long getSumOfNorms(String field) throws IOException {
+    // nocommit: add support?
+    return 1; // lie for now
+  }
+
+  @Override
   protected void doSetNorm(int doc, String field, byte value) throws IOException {
     if (uncommittedNormsByFieldNameAndDocumentNumber == null) {
       uncommittedNormsByFieldNameAndDocumentNumber = new HashMap<String,List<NormUpdate>>(getIndex().getNormsByFieldNameAndDocumentNumber().size());

Modified: lucene/dev/branches/flexscoring/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java?rev=1086118&r1=1086117&r2=1086118&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java (original)
+++ lucene/dev/branches/flexscoring/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java Mon Mar 28 05:02:29 2011
@@ -1204,6 +1204,11 @@ public class MemoryIndex {
     }
   
     @Override
+    public long getSumOfNorms(String field) throws IOException {
+      return norms(field)[0] & 0xff;
+    }
+
+    @Override
     protected void doSetNorm(int doc, String fieldName, byte value) {
       throw new UnsupportedOperationException();
     }

Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/DirectoryReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/DirectoryReader.java?rev=1086118&r1=1086117&r2=1086118&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/DirectoryReader.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/DirectoryReader.java Mon Mar 28 05:02:29 2011
@@ -629,6 +629,15 @@ class DirectoryReader extends IndexReade
   }
 
   @Override
+  public long getSumOfNorms(String field) throws IOException {
+    ensureOpen();
+    long total = 0;
+    for (int i = 0; i < subReaders.length; i++)
+      total += subReaders[i].getSumOfNorms(field);
+    return total;
+  }
+  
+  @Override
   protected void doSetNorm(int n, String field, byte value)
     throws CorruptIndexException, IOException {
     int i = readerIndex(n);                           // find segment num

Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java?rev=1086118&r1=1086117&r2=1086118&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java Mon Mar 28 05:02:29 2011
@@ -368,6 +368,12 @@ public class FilterIndexReader extends I
   }
 
   @Override
+  public long getSumOfNorms(String field) throws IOException {
+    ensureOpen();
+    return in.getSumOfNorms(field);
+  }
+
+  @Override
   protected void doSetNorm(int d, String f, byte b) throws CorruptIndexException, IOException {
     in.setNorm(d, f, b);
   }

Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/IndexReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/IndexReader.java?rev=1086118&r1=1086117&r2=1086118&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/IndexReader.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/IndexReader.java Mon Mar 28 05:02:29 2011
@@ -1034,6 +1034,13 @@ public abstract class IndexReader implem
   protected abstract void doSetNorm(int doc, String field, byte value)
           throws CorruptIndexException, IOException;
 
+  /** Returns the sum of all byte-encoded normalization factors (treated
+   * as unsigned) for the named field of every document.
+   *
+   * @lucene.experimental
+   */
+  public abstract long getSumOfNorms(String field) throws IOException;
+  
   /** Flex API: returns {@link Fields} for this reader.
    *  This method may return null if the reader has no
    *  postings.

Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/MultiReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/MultiReader.java?rev=1086118&r1=1086117&r2=1086118&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/MultiReader.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/MultiReader.java Mon Mar 28 05:02:29 2011
@@ -307,6 +307,15 @@ public class MultiReader extends IndexRe
   }
 
   @Override
+  public long getSumOfNorms(String field) throws IOException {
+    ensureOpen();
+    long total = 0;
+    for (int i = 0; i < subReaders.length; i++)
+      total += subReaders[i].getSumOfNorms(field);
+    return total;
+  }
+  
+  @Override
   protected void doSetNorm(int n, String field, byte value)
     throws CorruptIndexException, IOException {
     int i = readerIndex(n);                           // find segment num

Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/ParallelReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/ParallelReader.java?rev=1086118&r1=1086117&r2=1086118&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/ParallelReader.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/ParallelReader.java Mon Mar 28 05:02:29 2011
@@ -447,6 +447,18 @@ public class ParallelReader extends Inde
   }
 
   @Override
+  public long getSumOfNorms(String field) throws IOException {
+    ensureOpen();
+    IndexReader reader = fieldToReader.get(field);
+
+    if (reader == null) {
+      return 0;
+    } else {
+      return reader.getSumOfNorms(field);
+    }
+  }
+  
+  @Override
   protected void doSetNorm(int n, String field, byte value)
     throws CorruptIndexException, IOException {
     IndexReader reader = fieldToReader.get(field);

Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/SegmentReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/SegmentReader.java?rev=1086118&r1=1086117&r2=1086118&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/SegmentReader.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/SegmentReader.java Mon Mar 28 05:02:29 2011
@@ -271,6 +271,7 @@ public class SegmentReader extends Index
     private boolean dirty;
     private int number;
     private boolean rollbackDirty;
+    private long sum;
     
     public Norm(IndexInput in, int number, long normSeek) {
       this.in = in;
@@ -335,6 +336,7 @@ public class SegmentReader extends Index
           bytes = origNorm.bytes();
           bytesRef = origNorm.bytesRef;
           bytesRef.incrementAndGet();
+          sum = origNorm.sum;
 
           // Once we've loaded the bytes we no longer need
           // origNorm:
@@ -354,6 +356,11 @@ public class SegmentReader extends Index
           synchronized(in) {
             in.seek(normSeek);
             in.readBytes(bytes, 0, count, false);
+            // nocommit: version the file, and add this sum.
+            sum = 0;
+            for (int i = 0; i < count; i++) {
+              sum += (bytes[i] & 0xff);
+            }
           }
 
           bytesRef = new AtomicInteger(1);
@@ -938,6 +945,15 @@ public class SegmentReader extends Index
   }
 
   @Override
+  public synchronized long getSumOfNorms(String field) throws IOException {
+    ensureOpen();
+    Norm norm = norms.get(field);
+    if (norm == null) return 0; // not indexed, or norms not stored
+    norm.bytes(); // load norms if not loaded
+    return norm.sum;
+  }
+
+  @Override
   protected void doSetNorm(int doc, String field, byte value)
           throws IOException {
     Norm norm = norms.get(field);
@@ -946,6 +962,9 @@ public class SegmentReader extends Index
 
     normsDirty = true;
     norm.copyOnWrite()[doc] = value;                    // set the value
+    // TODO: maybe we should update the norm sum here,
+    // but its probably ok not to: in general reader changes
+    // like deleting docs don't update docfreq, etc.
   }
 
   private void openNorms(Directory cfsDir, int readBufferSize) throws IOException {

Added: lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/index/TestNormsSum.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/index/TestNormsSum.java?rev=1086118&view=auto
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/index/TestNormsSum.java (added)
+++ lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/index/TestNormsSum.java Mon Mar 28 05:02:29 2011
@@ -0,0 +1,124 @@
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+
+import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.search.DefaultSimilarity;
+import org.apache.lucene.search.DefaultSimilarityProvider;
+import org.apache.lucene.search.Similarity;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util._TestUtil;
+
+/**
+ * Tests the getSumOfNorms statistic in IndexReader
+ */
+public class TestNormsSum extends LuceneTestCase { 
+  Directory dir;
+  IndexReader reader;
+  /* expected norm values for our documents */
+  ArrayList<Integer> expected = new ArrayList<Integer>();
+  
+  @Override
+  public void setUp() throws Exception {
+    super.setUp();
+    dir = newDirectory();
+    IndexWriterConfig config = newIndexWriterConfig(TEST_VERSION_CURRENT, 
+        new MockAnalyzer(MockTokenizer.SIMPLE, true));
+    config.setSimilarityProvider(new DefaultSimilarityProvider() {
+      public Similarity get(String field) {
+        return new TestSimilarity();
+      }
+    });
+    RandomIndexWriter writer = new RandomIndexWriter(random, dir, config);
+    Document doc = new Document();
+    Field foo = newField("foo", "", Field.Store.NO, Field.Index.ANALYZED);
+    doc.add(foo);
+    for (int i = 0; i < 100; i++) {
+      foo.setValue(addValue());
+      writer.addDocument(doc);
+    }
+    reader = writer.getReader();
+    writer.close();
+  }
+  
+  @Override
+  public void tearDown() throws Exception {
+    reader.close();
+    dir.close();
+    super.tearDown();
+  }
+  
+  public void test() throws Exception {
+    long sum = reader.getSumOfNorms("foo");
+    long expectedSum = 0;
+    for (int i = 0; i < expected.size(); i++)
+      expectedSum += expected.get(i);
+    assertEquals(expectedSum, sum);
+  }
+
+  /**
+   * Makes a bunch of single-char tokens (the max freq will at most be 255).
+   * shuffles them around, and returns the whole list with Arrays.toString().
+   * This works fine because we use lettertokenizer.
+   * puts the max-frequency term into expected, to be checked against the norm.
+   */
+  private String addValue() {
+    List<String> terms = new ArrayList<String>();
+    int maxCeiling = _TestUtil.nextInt(random, 0, 255);
+    int max = 0;
+    for (char ch = 'a'; ch <= 'z'; ch++) {
+      int num = _TestUtil.nextInt(random, 0, maxCeiling);
+      for (int i = 0; i < num; i++)
+        terms.add(Character.toString(ch));
+      max = Math.max(max, num);
+    }
+    expected.add(max);
+    Collections.shuffle(terms, random);
+    return Arrays.toString(terms.toArray(new String[terms.size()]));
+  }
+  
+  /**
+   * Simple similarity that encodes maxTermFrequency directly as a byte
+   */
+  class TestSimilarity extends DefaultSimilarity {
+
+    @Override
+    public byte encodeNormValue(float f) {
+      return (byte) f;
+    }
+    
+    @Override
+    public float decodeNormValue(byte b) {
+      return (float) b;
+    }
+
+    @Override
+    public float computeNorm(FieldInvertState state) {
+      return (float) state.getMaxTermFrequency();
+    }
+  }
+}

Added: lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/MockBM25Similarity.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/MockBM25Similarity.java?rev=1086118&view=auto
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/MockBM25Similarity.java (added)
+++ lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/MockBM25Similarity.java Mon Mar 28 05:02:29 2011
@@ -0,0 +1,167 @@
+package org.apache.lucene.search;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.index.FieldInvertState;
+import org.apache.lucene.index.IndexReader.ReaderContext;
+import org.apache.lucene.index.IndexReader.AtomicReaderContext;
+import org.apache.lucene.search.Explanation.IDFExplanation;
+import org.apache.lucene.util.ReaderUtil;
+import org.apache.lucene.util.TermContext;
+import org.apache.lucene.util.SmallFloat;
+
+/**
+ * BM25 Similarity.
+ */
+public class MockBM25Similarity extends Similarity {
+  // TODO: the norm table can probably be per-sim so you can configure these
+  // its also pretty nice that we don't bake the parameter into the index... you can tune it at runtime.
+  private static final float k1 = 2f;
+  private static final float b = 0.75f;
+  
+  /**
+   * Our normalization is k1 * ((1 - b) + b * numTerms / avgNumTerms)
+   * currently we put doclen into the boost byte (divided by boost) for simple quantization
+   * our decoder precomputes the full formula into the norm table
+   * 
+   * this is pretty crappy for doc/field boosting, but with a static schema you can boost per-field
+   * in your sim anyway (sorta dumb to bake into the index)
+   */
+  @Override
+  public float computeNorm(FieldInvertState state) {
+    final int numTerms = state.getLength() - state.getNumOverlap();
+    return numTerms / state.getBoost();
+  }
+  
+  /** Cache of decoded bytes. */
+  private static final float[] NORM_TABLE = new float[256];
+
+  static {
+    for (int i = 0; i < 256; i++) {
+      NORM_TABLE[i] = SmallFloat.byte315ToFloat((byte)i);
+    }
+  }
+  
+  @Override
+  public float decodeNormValue(byte b) {
+    return NORM_TABLE[b & 0xFF];
+  }
+
+  @Override
+  public byte encodeNormValue(float f) {
+    return SmallFloat.floatToByte315(f);
+  }
+
+  @Override
+  public float sloppyFreq(int distance) {
+    return 1.0f / (distance + 1);
+  }
+
+  // weight for a term as log(1 + ((n - dfj + 0.5F)/(dfj + 0.5F)))
+  // nocommit: nuke IDFExplanation!
+  // nocommit: are we summing this in the right place for phrase estimation????
+  @Override
+  public IDFExplanation computeWeight(IndexSearcher searcher, String fieldName, TermContext... termStats) throws IOException {
+    float value = 0.0f;
+    final StringBuilder exp = new StringBuilder();
+
+    final int max = searcher.maxDoc();
+    
+    for (final TermContext stat : termStats ) {
+      final int dfj = stat.docFreq();
+      value += Math.log(1 + ((max - dfj + 0.5F)/(dfj + 0.5F)));
+      exp.append(" ");
+      exp.append(dfj);
+    }
+    
+    final float idfValue = value;
+    return new IDFExplanation() {
+      @Override
+      public float getIdf() {
+        return idfValue;
+      }
+      @Override
+      public String explain() {
+        return exp.toString();
+      }
+    };
+  }
+
+  @Override
+  public ExactDocScorer exactDocScorer(Weight weight, String fieldName, AtomicReaderContext context) throws IOException {
+    byte[] norms = context.reader.norms(fieldName);
+    float avgdl = norms == null ? 0f : avgDocumentLength(fieldName, context);
+    return new ExactBM25DocScorer((float) Math.sqrt(weight.getValue()), norms, avgdl);
+  }
+
+  @Override
+  public SloppyDocScorer sloppyDocScorer(Weight weight, String fieldName, AtomicReaderContext context) throws IOException {
+    byte[] norms = context.reader.norms(fieldName);
+    float avgdl = norms == null ? 0f : avgDocumentLength(fieldName, context);
+    return new SloppyBM25DocScorer((float) Math.sqrt(weight.getValue()), norms, avgdl);
+  }
+  
+  private float avgDocumentLength(String field, ReaderContext context) throws IOException {
+    // nocommit: crap that we calc this over and over redundantly for each segment (we should just do it once in the weight, once its generalized)
+    context = ReaderUtil.getTopLevelContext(context);
+    long normsum = context.reader.getSumOfNorms(field);
+    long maxdoc = context.reader.maxDoc();
+    int avgnorm = (int) (normsum / (double) maxdoc);
+    return decodeNormValue((byte)avgnorm);
+  }
+
+  private class ExactBM25DocScorer extends ExactDocScorer {
+    private final float weightValue;
+    private final byte[] norms;
+    private final float avgdl;
+    
+    ExactBM25DocScorer(float weightValue, byte norms[], float avgdl) {
+      this.weightValue = weightValue;
+      this.norms = norms;
+      this.avgdl = avgdl;
+    }
+    
+    // todo: optimize
+    @Override
+    public float score(int doc, int freq) {
+      float norm = norms == null ? 0 : k1 * ((1 - b) + b * (decodeNormValue(norms[doc])) / (avgdl));
+      return weightValue * (freq * (k1 + 1)) / (freq + norm);
+    }
+  }
+  
+  private class SloppyBM25DocScorer extends SloppyDocScorer {
+    private final float weightValue;
+    private final byte[] norms;
+    private final float avgdl;
+    
+    SloppyBM25DocScorer(float weightValue, byte norms[], float avgdl) {
+      this.weightValue = weightValue;
+      this.norms = norms;
+      this.avgdl = avgdl;
+    }
+    
+    // todo: optimize
+    @Override
+    public float score(int doc, float freq) {
+      float norm = norms == null ? 0 : k1 * ((1 - b) + b * (decodeNormValue(norms[doc])) / (avgdl));
+      return weightValue * (freq * (k1 + 1)) / (freq + norm);
+    }
+  }
+}

Added: lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/MockBM25SimilarityProvider.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/MockBM25SimilarityProvider.java?rev=1086118&view=auto
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/MockBM25SimilarityProvider.java (added)
+++ lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/MockBM25SimilarityProvider.java Mon Mar 28 05:02:29 2011
@@ -0,0 +1,32 @@
+package org.apache.lucene.search;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class MockBM25SimilarityProvider extends DefaultSimilarityProvider {
+
+  private final Similarity impl = new MockBM25Similarity();
+
+  public Similarity get(String field) {
+    return impl;
+  }
+
+  //nocommit: maybe ok given how bm25 works?
+  public float queryNorm(float sumOfSquaredWeights) {
+    return 1f;
+  }
+}

Modified: lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/MockLMSimilarity.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/MockLMSimilarity.java?rev=1086118&r1=1086117&r2=1086118&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/MockLMSimilarity.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/MockLMSimilarity.java Mon Mar 28 05:02:29 2011
@@ -44,7 +44,8 @@ import org.apache.lucene.util.SmallFloat
  * </code>
  * <p>
  * NOTE: to use this Similarity, use MockLMSimilarityProvider (as this formula already incorporates coord()
- * and currently depends upon a disabled queryNorm) 
+ * and currently depends upon a disabled queryNorm)
+ * WARNING: doesn't work with preflex codec
  */
 public class MockLMSimilarity extends Similarity {
   // TODO: the norm table can probably be per-sim so you can configure this.

Modified: lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/MockLMSimilarityProvider.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/MockLMSimilarityProvider.java?rev=1086118&r1=1086117&r2=1086118&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/MockLMSimilarityProvider.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/MockLMSimilarityProvider.java Mon Mar 28 05:02:29 2011
@@ -23,6 +23,7 @@ package org.apache.lucene.search;
  *   <li> disables coord, because its already factored into the formula
  *   <li> disables queryNorm, because we (currently) shove part of the formula in there as "idf"
  * </ul>
+ * WARNING: doesn't work with preflex codec
  */
 public class MockLMSimilarityProvider implements SimilarityProvider {
   private static final Similarity impl = new MockLMSimilarity();

Modified: lucene/dev/branches/flexscoring/modules/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/QueryDriver.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/modules/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/QueryDriver.java?rev=1086118&r1=1086117&r2=1086118&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/modules/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/QueryDriver.java (original)
+++ lucene/dev/branches/flexscoring/modules/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/QueryDriver.java Mon Mar 28 05:02:29 2011
@@ -21,7 +21,6 @@ import org.apache.lucene.benchmark.quali
 import org.apache.lucene.benchmark.quality.utils.SubmissionReport;
 import org.apache.lucene.benchmark.quality.*;
 import org.apache.lucene.search.IndexSearcher;
-import org.apache.lucene.search.MockLMSimilarityProvider;
 import org.apache.lucene.store.FSDirectory;
 
 import java.io.BufferedReader;
@@ -56,7 +55,8 @@ public class QueryDriver {
     String fieldSpec = args.length == 5 ? args[4] : "T"; // default to Title-only if not specified.
     IndexSearcher searcher = new IndexSearcher(dir, true);
     // nocommit
-    // searcher.setSimilarityProvider(new MockLMSimilarityProvider());
+    //searcher.setSimilarityProvider(new MockLMSimilarityProvider());
+    //searcher.setSimilarityProvider(new MockBM25SimilarityProvider());
     int maxResults = 1000;
     String docNameField = "docname";