You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2011/03/28 07:02:29 UTC
svn commit: r1086118 - in /lucene/dev/branches/flexscoring:
lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/
lucene/contrib/memory/src/java/org/apache/lucene/index/memory/
lucene/src/java/org/apache/lucene/index/ lucene/src/te...
Author: rmuir
Date: Mon Mar 28 05:02:29 2011
New Revision: 1086118
URL: http://svn.apache.org/viewvc?rev=1086118&view=rev
Log:
LUCENE-2392: add avgdl, mockbm25
Added:
lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/index/TestNormsSum.java (with props)
lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/MockBM25Similarity.java (with props)
lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/MockBM25SimilarityProvider.java (with props)
Modified:
lucene/dev/branches/flexscoring/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexReader.java
lucene/dev/branches/flexscoring/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/DirectoryReader.java
lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java
lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/IndexReader.java
lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/MultiReader.java
lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/ParallelReader.java
lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/SegmentReader.java
lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/MockLMSimilarity.java
lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/MockLMSimilarityProvider.java
lucene/dev/branches/flexscoring/modules/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/QueryDriver.java
Modified: lucene/dev/branches/flexscoring/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexReader.java?rev=1086118&r1=1086117&r2=1086118&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexReader.java (original)
+++ lucene/dev/branches/flexscoring/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexReader.java Mon Mar 28 05:02:29 2011
@@ -336,6 +336,12 @@ public class InstantiatedIndexReader ext
}
@Override
+ public long getSumOfNorms(String field) throws IOException {
+ // nocommit: add support?
+ return 1; // lie for now
+ }
+
+ @Override
protected void doSetNorm(int doc, String field, byte value) throws IOException {
if (uncommittedNormsByFieldNameAndDocumentNumber == null) {
uncommittedNormsByFieldNameAndDocumentNumber = new HashMap<String,List<NormUpdate>>(getIndex().getNormsByFieldNameAndDocumentNumber().size());
Modified: lucene/dev/branches/flexscoring/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java?rev=1086118&r1=1086117&r2=1086118&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java (original)
+++ lucene/dev/branches/flexscoring/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java Mon Mar 28 05:02:29 2011
@@ -1204,6 +1204,11 @@ public class MemoryIndex {
}
@Override
+ public long getSumOfNorms(String field) throws IOException {
+ return norms(field)[0] & 0xff;
+ }
+
+ @Override
protected void doSetNorm(int doc, String fieldName, byte value) {
throw new UnsupportedOperationException();
}
Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/DirectoryReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/DirectoryReader.java?rev=1086118&r1=1086117&r2=1086118&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/DirectoryReader.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/DirectoryReader.java Mon Mar 28 05:02:29 2011
@@ -629,6 +629,15 @@ class DirectoryReader extends IndexReade
}
@Override
+ public long getSumOfNorms(String field) throws IOException {
+ ensureOpen();
+ long total = 0;
+ for (int i = 0; i < subReaders.length; i++)
+ total += subReaders[i].getSumOfNorms(field);
+ return total;
+ }
+
+ @Override
protected void doSetNorm(int n, String field, byte value)
throws CorruptIndexException, IOException {
int i = readerIndex(n); // find segment num
Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java?rev=1086118&r1=1086117&r2=1086118&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java Mon Mar 28 05:02:29 2011
@@ -368,6 +368,12 @@ public class FilterIndexReader extends I
}
@Override
+ public long getSumOfNorms(String field) throws IOException {
+ ensureOpen();
+ return in.getSumOfNorms(field);
+ }
+
+ @Override
protected void doSetNorm(int d, String f, byte b) throws CorruptIndexException, IOException {
in.setNorm(d, f, b);
}
Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/IndexReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/IndexReader.java?rev=1086118&r1=1086117&r2=1086118&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/IndexReader.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/IndexReader.java Mon Mar 28 05:02:29 2011
@@ -1034,6 +1034,13 @@ public abstract class IndexReader implem
protected abstract void doSetNorm(int doc, String field, byte value)
throws CorruptIndexException, IOException;
+ /** Returns the sum of all byte-encoded normalization factors (treated
+ * as unsigned) for the named field of every document.
+ *
+ * @lucene.experimental
+ */
+ public abstract long getSumOfNorms(String field) throws IOException;
+
/** Flex API: returns {@link Fields} for this reader.
* This method may return null if the reader has no
* postings.
Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/MultiReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/MultiReader.java?rev=1086118&r1=1086117&r2=1086118&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/MultiReader.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/MultiReader.java Mon Mar 28 05:02:29 2011
@@ -307,6 +307,15 @@ public class MultiReader extends IndexRe
}
@Override
+ public long getSumOfNorms(String field) throws IOException {
+ ensureOpen();
+ long total = 0;
+ for (int i = 0; i < subReaders.length; i++)
+ total += subReaders[i].getSumOfNorms(field);
+ return total;
+ }
+
+ @Override
protected void doSetNorm(int n, String field, byte value)
throws CorruptIndexException, IOException {
int i = readerIndex(n); // find segment num
Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/ParallelReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/ParallelReader.java?rev=1086118&r1=1086117&r2=1086118&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/ParallelReader.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/ParallelReader.java Mon Mar 28 05:02:29 2011
@@ -447,6 +447,18 @@ public class ParallelReader extends Inde
}
@Override
+ public long getSumOfNorms(String field) throws IOException {
+ ensureOpen();
+ IndexReader reader = fieldToReader.get(field);
+
+ if (reader == null) {
+ return 0;
+ } else {
+ return reader.getSumOfNorms(field);
+ }
+ }
+
+ @Override
protected void doSetNorm(int n, String field, byte value)
throws CorruptIndexException, IOException {
IndexReader reader = fieldToReader.get(field);
Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/SegmentReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/SegmentReader.java?rev=1086118&r1=1086117&r2=1086118&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/SegmentReader.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/index/SegmentReader.java Mon Mar 28 05:02:29 2011
@@ -271,6 +271,7 @@ public class SegmentReader extends Index
private boolean dirty;
private int number;
private boolean rollbackDirty;
+ private long sum;
public Norm(IndexInput in, int number, long normSeek) {
this.in = in;
@@ -335,6 +336,7 @@ public class SegmentReader extends Index
bytes = origNorm.bytes();
bytesRef = origNorm.bytesRef;
bytesRef.incrementAndGet();
+ sum = origNorm.sum;
// Once we've loaded the bytes we no longer need
// origNorm:
@@ -354,6 +356,11 @@ public class SegmentReader extends Index
synchronized(in) {
in.seek(normSeek);
in.readBytes(bytes, 0, count, false);
+ // nocommit: version the file, and add this sum.
+ sum = 0;
+ for (int i = 0; i < count; i++) {
+ sum += (bytes[i] & 0xff);
+ }
}
bytesRef = new AtomicInteger(1);
@@ -938,6 +945,15 @@ public class SegmentReader extends Index
}
@Override
+ public synchronized long getSumOfNorms(String field) throws IOException {
+ ensureOpen();
+ Norm norm = norms.get(field);
+ if (norm == null) return 0; // not indexed, or norms not stored
+ norm.bytes(); // load norms if not loaded
+ return norm.sum;
+ }
+
+ @Override
protected void doSetNorm(int doc, String field, byte value)
throws IOException {
Norm norm = norms.get(field);
@@ -946,6 +962,9 @@ public class SegmentReader extends Index
normsDirty = true;
norm.copyOnWrite()[doc] = value; // set the value
+ // TODO: maybe we should update the norm sum here,
+ // but its probably ok not to: in general reader changes
+ // like deleting docs don't update docfreq, etc.
}
private void openNorms(Directory cfsDir, int readBufferSize) throws IOException {
Added: lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/index/TestNormsSum.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/index/TestNormsSum.java?rev=1086118&view=auto
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/index/TestNormsSum.java (added)
+++ lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/index/TestNormsSum.java Mon Mar 28 05:02:29 2011
@@ -0,0 +1,124 @@
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+
+import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.search.DefaultSimilarity;
+import org.apache.lucene.search.DefaultSimilarityProvider;
+import org.apache.lucene.search.Similarity;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util._TestUtil;
+
+/**
+ * Tests the getSumOfNorms statistic in IndexReader
+ */
+public class TestNormsSum extends LuceneTestCase {
+ Directory dir;
+ IndexReader reader;
+ /* expected norm values for our documents */
+ ArrayList<Integer> expected = new ArrayList<Integer>();
+
+ @Override
+ public void setUp() throws Exception {
+ super.setUp();
+ dir = newDirectory();
+ IndexWriterConfig config = newIndexWriterConfig(TEST_VERSION_CURRENT,
+ new MockAnalyzer(MockTokenizer.SIMPLE, true));
+ config.setSimilarityProvider(new DefaultSimilarityProvider() {
+ public Similarity get(String field) {
+ return new TestSimilarity();
+ }
+ });
+ RandomIndexWriter writer = new RandomIndexWriter(random, dir, config);
+ Document doc = new Document();
+ Field foo = newField("foo", "", Field.Store.NO, Field.Index.ANALYZED);
+ doc.add(foo);
+ for (int i = 0; i < 100; i++) {
+ foo.setValue(addValue());
+ writer.addDocument(doc);
+ }
+ reader = writer.getReader();
+ writer.close();
+ }
+
+ @Override
+ public void tearDown() throws Exception {
+ reader.close();
+ dir.close();
+ super.tearDown();
+ }
+
+ public void test() throws Exception {
+ long sum = reader.getSumOfNorms("foo");
+ long expectedSum = 0;
+ for (int i = 0; i < expected.size(); i++)
+ expectedSum += expected.get(i);
+ assertEquals(expectedSum, sum);
+ }
+
+ /**
+ * Makes a bunch of single-char tokens (the max freq will at most be 255).
+ * shuffles them around, and returns the whole list with Arrays.toString().
+ * This works fine because we use lettertokenizer.
+ * puts the max-frequency term into expected, to be checked against the norm.
+ */
+ private String addValue() {
+ List<String> terms = new ArrayList<String>();
+ int maxCeiling = _TestUtil.nextInt(random, 0, 255);
+ int max = 0;
+ for (char ch = 'a'; ch <= 'z'; ch++) {
+ int num = _TestUtil.nextInt(random, 0, maxCeiling);
+ for (int i = 0; i < num; i++)
+ terms.add(Character.toString(ch));
+ max = Math.max(max, num);
+ }
+ expected.add(max);
+ Collections.shuffle(terms, random);
+ return Arrays.toString(terms.toArray(new String[terms.size()]));
+ }
+
+ /**
+ * Simple similarity that encodes maxTermFrequency directly as a byte
+ */
+ class TestSimilarity extends DefaultSimilarity {
+
+ @Override
+ public byte encodeNormValue(float f) {
+ return (byte) f;
+ }
+
+ @Override
+ public float decodeNormValue(byte b) {
+ return (float) b;
+ }
+
+ @Override
+ public float computeNorm(FieldInvertState state) {
+ return (float) state.getMaxTermFrequency();
+ }
+ }
+}
Added: lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/MockBM25Similarity.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/MockBM25Similarity.java?rev=1086118&view=auto
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/MockBM25Similarity.java (added)
+++ lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/MockBM25Similarity.java Mon Mar 28 05:02:29 2011
@@ -0,0 +1,167 @@
+package org.apache.lucene.search;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.index.FieldInvertState;
+import org.apache.lucene.index.IndexReader.ReaderContext;
+import org.apache.lucene.index.IndexReader.AtomicReaderContext;
+import org.apache.lucene.search.Explanation.IDFExplanation;
+import org.apache.lucene.util.ReaderUtil;
+import org.apache.lucene.util.TermContext;
+import org.apache.lucene.util.SmallFloat;
+
+/**
+ * BM25 Similarity.
+ */
+public class MockBM25Similarity extends Similarity {
+ // TODO: the norm table can probably be per-sim so you can configure these
+ // its also pretty nice that we don't bake the parameter into the index... you can tune it at runtime.
+ private static final float k1 = 2f;
+ private static final float b = 0.75f;
+
+ /**
+ * Our normalization is k1 * ((1 - b) + b * numTerms / avgNumTerms)
+ * currently we put doclen into the boost byte (divided by boost) for simple quantization
+ * our decoder precomputes the full formula into the norm table
+ *
+ * this is pretty crappy for doc/field boosting, but with a static schema you can boost per-field
+ * in your sim anyway (sorta dumb to bake into the index)
+ */
+ @Override
+ public float computeNorm(FieldInvertState state) {
+ final int numTerms = state.getLength() - state.getNumOverlap();
+ return numTerms / state.getBoost();
+ }
+
+ /** Cache of decoded bytes. */
+ private static final float[] NORM_TABLE = new float[256];
+
+ static {
+ for (int i = 0; i < 256; i++) {
+ NORM_TABLE[i] = SmallFloat.byte315ToFloat((byte)i);
+ }
+ }
+
+ @Override
+ public float decodeNormValue(byte b) {
+ return NORM_TABLE[b & 0xFF];
+ }
+
+ @Override
+ public byte encodeNormValue(float f) {
+ return SmallFloat.floatToByte315(f);
+ }
+
+ @Override
+ public float sloppyFreq(int distance) {
+ return 1.0f / (distance + 1);
+ }
+
+ // weight for a term as log(1 + ((n - dfj + 0.5F)/(dfj + 0.5F)))
+ // nocommit: nuke IDFExplanation!
+ // nocommit: are we summing this in the right place for phrase estimation????
+ @Override
+ public IDFExplanation computeWeight(IndexSearcher searcher, String fieldName, TermContext... termStats) throws IOException {
+ float value = 0.0f;
+ final StringBuilder exp = new StringBuilder();
+
+ final int max = searcher.maxDoc();
+
+ for (final TermContext stat : termStats ) {
+ final int dfj = stat.docFreq();
+ value += Math.log(1 + ((max - dfj + 0.5F)/(dfj + 0.5F)));
+ exp.append(" ");
+ exp.append(dfj);
+ }
+
+ final float idfValue = value;
+ return new IDFExplanation() {
+ @Override
+ public float getIdf() {
+ return idfValue;
+ }
+ @Override
+ public String explain() {
+ return exp.toString();
+ }
+ };
+ }
+
+ @Override
+ public ExactDocScorer exactDocScorer(Weight weight, String fieldName, AtomicReaderContext context) throws IOException {
+ byte[] norms = context.reader.norms(fieldName);
+ float avgdl = norms == null ? 0f : avgDocumentLength(fieldName, context);
+ return new ExactBM25DocScorer((float) Math.sqrt(weight.getValue()), norms, avgdl);
+ }
+
+ @Override
+ public SloppyDocScorer sloppyDocScorer(Weight weight, String fieldName, AtomicReaderContext context) throws IOException {
+ byte[] norms = context.reader.norms(fieldName);
+ float avgdl = norms == null ? 0f : avgDocumentLength(fieldName, context);
+ return new SloppyBM25DocScorer((float) Math.sqrt(weight.getValue()), norms, avgdl);
+ }
+
+ private float avgDocumentLength(String field, ReaderContext context) throws IOException {
+ // nocommit: crap that we calc this over and over redundantly for each segment (we should just do it once in the weight, once its generalized)
+ context = ReaderUtil.getTopLevelContext(context);
+ long normsum = context.reader.getSumOfNorms(field);
+ long maxdoc = context.reader.maxDoc();
+ int avgnorm = (int) (normsum / (double) maxdoc);
+ return decodeNormValue((byte)avgnorm);
+ }
+
+ private class ExactBM25DocScorer extends ExactDocScorer {
+ private final float weightValue;
+ private final byte[] norms;
+ private final float avgdl;
+
+ ExactBM25DocScorer(float weightValue, byte norms[], float avgdl) {
+ this.weightValue = weightValue;
+ this.norms = norms;
+ this.avgdl = avgdl;
+ }
+
+ // todo: optimize
+ @Override
+ public float score(int doc, int freq) {
+ float norm = norms == null ? 0 : k1 * ((1 - b) + b * (decodeNormValue(norms[doc])) / (avgdl));
+ return weightValue * (freq * (k1 + 1)) / (freq + norm);
+ }
+ }
+
+ private class SloppyBM25DocScorer extends SloppyDocScorer {
+ private final float weightValue;
+ private final byte[] norms;
+ private final float avgdl;
+
+ SloppyBM25DocScorer(float weightValue, byte norms[], float avgdl) {
+ this.weightValue = weightValue;
+ this.norms = norms;
+ this.avgdl = avgdl;
+ }
+
+ // todo: optimize
+ @Override
+ public float score(int doc, float freq) {
+ float norm = norms == null ? 0 : k1 * ((1 - b) + b * (decodeNormValue(norms[doc])) / (avgdl));
+ return weightValue * (freq * (k1 + 1)) / (freq + norm);
+ }
+ }
+}
Added: lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/MockBM25SimilarityProvider.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/MockBM25SimilarityProvider.java?rev=1086118&view=auto
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/MockBM25SimilarityProvider.java (added)
+++ lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/MockBM25SimilarityProvider.java Mon Mar 28 05:02:29 2011
@@ -0,0 +1,32 @@
+package org.apache.lucene.search;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class MockBM25SimilarityProvider extends DefaultSimilarityProvider {
+
+ private final Similarity impl = new MockBM25Similarity();
+
+ public Similarity get(String field) {
+ return impl;
+ }
+
+ //nocommit: maybe ok given how bm25 works?
+ public float queryNorm(float sumOfSquaredWeights) {
+ return 1f;
+ }
+}
Modified: lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/MockLMSimilarity.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/MockLMSimilarity.java?rev=1086118&r1=1086117&r2=1086118&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/MockLMSimilarity.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/MockLMSimilarity.java Mon Mar 28 05:02:29 2011
@@ -44,7 +44,8 @@ import org.apache.lucene.util.SmallFloat
* </code>
* <p>
* NOTE: to use this Similarity, use MockLMSimilarityProvider (as this formula already incorporates coord()
- * and currently depends upon a disabled queryNorm)
+ * and currently depends upon a disabled queryNorm)
+ * WARNING: doesn't work with preflex codec
*/
public class MockLMSimilarity extends Similarity {
// TODO: the norm table can probably be per-sim so you can configure this.
Modified: lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/MockLMSimilarityProvider.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/MockLMSimilarityProvider.java?rev=1086118&r1=1086117&r2=1086118&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/MockLMSimilarityProvider.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/MockLMSimilarityProvider.java Mon Mar 28 05:02:29 2011
@@ -23,6 +23,7 @@ package org.apache.lucene.search;
* <li> disables coord, because its already factored into the formula
* <li> disables queryNorm, because we (currently) shove part of the formula in there as "idf"
* </ul>
+ * WARNING: doesn't work with preflex codec
*/
public class MockLMSimilarityProvider implements SimilarityProvider {
private static final Similarity impl = new MockLMSimilarity();
Modified: lucene/dev/branches/flexscoring/modules/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/QueryDriver.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/modules/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/QueryDriver.java?rev=1086118&r1=1086117&r2=1086118&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/modules/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/QueryDriver.java (original)
+++ lucene/dev/branches/flexscoring/modules/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/QueryDriver.java Mon Mar 28 05:02:29 2011
@@ -21,7 +21,6 @@ import org.apache.lucene.benchmark.quali
import org.apache.lucene.benchmark.quality.utils.SubmissionReport;
import org.apache.lucene.benchmark.quality.*;
import org.apache.lucene.search.IndexSearcher;
-import org.apache.lucene.search.MockLMSimilarityProvider;
import org.apache.lucene.store.FSDirectory;
import java.io.BufferedReader;
@@ -56,7 +55,8 @@ public class QueryDriver {
String fieldSpec = args.length == 5 ? args[4] : "T"; // default to Title-only if not specified.
IndexSearcher searcher = new IndexSearcher(dir, true);
// nocommit
- // searcher.setSimilarityProvider(new MockLMSimilarityProvider());
+ //searcher.setSimilarityProvider(new MockLMSimilarityProvider());
+ //searcher.setSimilarityProvider(new MockBM25SimilarityProvider());
int maxResults = 1000;
String docNameField = "docname";