You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2011/01/24 20:13:33 UTC
svn commit: r1062927 [1/2] - in /lucene/dev/trunk: lucene/
lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/
lucene/contrib/memory/src/java/org/apache/lucene/index/memory/
lucene/contrib/misc/src/java/org/apache/lucene/index/ l...
Author: rmuir
Date: Mon Jan 24 19:13:31 2011
New Revision: 1062927
URL: http://svn.apache.org/viewvc?rev=1062927&view=rev
Log:
LUCENE-2236: per-field similarity
Added:
lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/SimilarityProvider.java (with props)
lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestSimilarityProvider.java (with props)
Modified:
lucene/dev/trunk/lucene/CHANGES.txt
lucene/dev/trunk/lucene/MIGRATE.txt
lucene/dev/trunk/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexWriter.java
lucene/dev/trunk/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
lucene/dev/trunk/lucene/contrib/misc/src/java/org/apache/lucene/index/FieldNormModifier.java
lucene/dev/trunk/lucene/contrib/misc/src/test/org/apache/lucene/index/TestFieldNormModifier.java
lucene/dev/trunk/lucene/contrib/misc/src/test/org/apache/lucene/misc/TestLengthNormModifier.java
lucene/dev/trunk/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/builders/StandardBooleanQueryNodeBuilder.java
lucene/dev/trunk/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/nodes/StandardBooleanQueryNode.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocumentsWriterThreadState.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexWriterConfig.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/NormsWriterPerField.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/BooleanQuery.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/DefaultSimilarity.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/IndexSearcher.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/MatchAllDocsQuery.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/PhraseQuery.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/Query.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/Similarity.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/TermQuery.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/Weight.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/spans/SpanWeight.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/DocHelper.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestDeletionPolicy.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexFileDeleter.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexReader.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexReaderClone.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexReaderCloneNorms.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexReaderReopen.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexWriterConfig.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestMaxTermFrequency.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestNorms.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestOmitTf.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestParallelReader.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/JustCompileSearch.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/QueryUtils.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestBoolean2.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestComplexExplanations.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestConstantScoreQuery.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestDisjunctionMaxQuery.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestMatchAllDocsQuery.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestMultiPhraseQuery.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestSetNorm.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestSimilarity.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/payloads/PayloadHelper.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/payloads/TestPayloadNearQuery.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/payloads/TestPayloadTermQuery.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/spans/TestPayloadSpans.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/spans/TestSpans.java
lucene/dev/trunk/solr/src/java/org/apache/solr/schema/IndexSchema.java
lucene/dev/trunk/solr/src/java/org/apache/solr/schema/SimilarityFactory.java
lucene/dev/trunk/solr/src/java/org/apache/solr/search/SolrConstantScoreQuery.java
lucene/dev/trunk/solr/src/java/org/apache/solr/search/SolrIndexSearcher.java
lucene/dev/trunk/solr/src/java/org/apache/solr/search/function/IDFValueSource.java
lucene/dev/trunk/solr/src/java/org/apache/solr/search/function/NormValueSource.java
lucene/dev/trunk/solr/src/java/org/apache/solr/search/function/TFValueSource.java
lucene/dev/trunk/solr/src/java/org/apache/solr/update/SolrIndexConfig.java
lucene/dev/trunk/solr/src/test/org/apache/solr/schema/CustomSimilarityFactory.java
lucene/dev/trunk/solr/src/test/org/apache/solr/schema/IndexSchemaTest.java
Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1062927&r1=1062926&r2=1062927&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Mon Jan 24 19:13:31 2011
@@ -131,6 +131,9 @@ Changes in backwards compatibility polic
* LUCENE-2882: Cut over SpanQuery#getSpans to AtomicReaderContext to enforce
per segment semantics on SpanQuery & Spans. (Simon Willnauer)
+* LUCENE-2236: Similarity can now be configured on a per-field basis. See the
+ migration notes in MIGRATE.txt for more details. (Robert Muir, Doron Cohen)
+
Changes in Runtime Behavior
* LUCENE-2846: omitNorms now behaves like omitTermFrequencyAndPositions, if you
Modified: lucene/dev/trunk/lucene/MIGRATE.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/MIGRATE.txt?rev=1062927&r1=1062926&r2=1062927&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/MIGRATE.txt (original)
+++ lucene/dev/trunk/lucene/MIGRATE.txt Mon Jan 24 19:13:31 2011
@@ -331,3 +331,9 @@ LUCENE-1458, LUCENE-2111: Flexible Index
toString() is no longer implemented by AttributeImpl, so if you have overridden
toString(), port your customization over to reflectWith(). reflectAsString() would
then return what toString() did before.
+
+* LUCENE-2236: DefaultSimilarity can no longer be set statically (and dangerously) for the entire JVM.
+ Instead, IndexWriterConfig and IndexSearcher now take a SimilarityProvider.
+ Similarity can now be configured on a per-field basis.
+ Similarity retains only the field-specific relevance methods such as tf() and idf().
+ Methods that apply to the entire query such as coord() and queryNorm() exist in SimilarityProvider.
Modified: lucene/dev/trunk/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexWriter.java?rev=1062927&r1=1062926&r2=1062927&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexWriter.java (original)
+++ lucene/dev/trunk/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexWriter.java Mon Jan 24 19:13:31 2011
@@ -42,7 +42,8 @@ import org.apache.lucene.index.FieldInve
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermVectorOffsetInfo;
-import org.apache.lucene.search.Similarity;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.SimilarityProvider;
import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.CollectionUtil;
@@ -67,7 +68,7 @@ public class InstantiatedIndexWriter imp
private final InstantiatedIndex index;
private final Analyzer analyzer;
- private Similarity similarity = Similarity.getDefault(); // how to normalize;
+ private SimilarityProvider similarityProvider = IndexSearcher.getDefaultSimilarityProvider(); // how to normalize;
private transient Set<String> fieldNameBuffer;
/**
@@ -236,11 +237,12 @@ public class InstantiatedIndexWriter imp
termsInDocument += eFieldTermDocInfoFactoriesByTermText.getValue().size();
if (eFieldTermDocInfoFactoriesByTermText.getKey().indexed && !eFieldTermDocInfoFactoriesByTermText.getKey().omitNorms) {
+ final String fieldName = eFieldTermDocInfoFactoriesByTermText.getKey().fieldName;
final FieldInvertState invertState = new FieldInvertState();
invertState.setBoost(eFieldTermDocInfoFactoriesByTermText.getKey().boost * document.getDocument().getBoost());
invertState.setLength(eFieldTermDocInfoFactoriesByTermText.getKey().fieldLength);
- final float norm = similarity.computeNorm(eFieldTermDocInfoFactoriesByTermText.getKey().fieldName, invertState);
- normsByFieldNameAndDocumentNumber.get(eFieldTermDocInfoFactoriesByTermText.getKey().fieldName)[document.getDocumentNumber()] = similarity.encodeNormValue(norm);
+ final float norm = similarityProvider.get(fieldName).computeNorm(fieldName, invertState);
+ normsByFieldNameAndDocumentNumber.get(fieldName)[document.getDocumentNumber()] = similarityProvider.get(fieldName).encodeNormValue(norm);
} else {
System.currentTimeMillis();
}
@@ -659,12 +661,12 @@ public class InstantiatedIndexWriter imp
addDocument(doc, analyzer);
}
- public Similarity getSimilarity() {
- return similarity;
+ public SimilarityProvider getSimilarityProvider() {
+ return similarityProvider;
}
- public void setSimilarity(Similarity similarity) {
- this.similarity = similarity;
+ public void setSimilarityProvider(SimilarityProvider similarityProvider) {
+ this.similarityProvider = similarityProvider;
}
public Analyzer getAnalyzer() {
Modified: lucene/dev/trunk/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java?rev=1062927&r1=1062926&r2=1062927&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java (original)
+++ lucene/dev/trunk/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java Mon Jan 24 19:13:31 2011
@@ -57,6 +57,7 @@ import org.apache.lucene.search.IndexSea
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Similarity;
+import org.apache.lucene.search.SimilarityProvider;
import org.apache.lucene.store.RAMDirectory; // for javadocs
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
@@ -1169,9 +1170,9 @@ public class MemoryIndex implements Seri
};
}
- private Similarity getSimilarity() {
- if (searcher != null) return searcher.getSimilarity();
- return Similarity.getDefault();
+ private SimilarityProvider getSimilarityProvider() {
+ if (searcher != null) return searcher.getSimilarityProvider();
+ return IndexSearcher.getDefaultSimilarityProvider();
}
private void setSearcher(IndexSearcher searcher) {
@@ -1181,20 +1182,21 @@ public class MemoryIndex implements Seri
/** performance hack: cache norms to avoid repeated expensive calculations */
private byte[] cachedNorms;
private String cachedFieldName;
- private Similarity cachedSimilarity;
+ private SimilarityProvider cachedSimilarity;
@Override
public byte[] norms(String fieldName) {
byte[] norms = cachedNorms;
- Similarity sim = getSimilarity();
+ SimilarityProvider sim = getSimilarityProvider();
if (fieldName != cachedFieldName || sim != cachedSimilarity) { // not cached?
Info info = getInfo(fieldName);
+ Similarity fieldSim = sim.get(fieldName);
int numTokens = info != null ? info.numTokens : 0;
int numOverlapTokens = info != null ? info.numOverlapTokens : 0;
float boost = info != null ? info.getBoost() : 1.0f;
FieldInvertState invertState = new FieldInvertState(0, numTokens, numOverlapTokens, 0, boost);
- float n = sim.computeNorm(fieldName, invertState);
- byte norm = sim.encodeNormValue(n);
+ float n = fieldSim.computeNorm(fieldName, invertState);
+ byte norm = fieldSim.encodeNormValue(n);
norms = new byte[] {norm};
// cache it for future reuse
Modified: lucene/dev/trunk/lucene/contrib/misc/src/java/org/apache/lucene/index/FieldNormModifier.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/misc/src/java/org/apache/lucene/index/FieldNormModifier.java?rev=1062927&r1=1062926&r2=1062927&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/misc/src/java/org/apache/lucene/index/FieldNormModifier.java (original)
+++ lucene/dev/trunk/lucene/contrib/misc/src/java/org/apache/lucene/index/FieldNormModifier.java Mon Jan 24 19:13:31 2011
@@ -24,6 +24,7 @@ import java.util.ArrayList;
import org.apache.lucene.search.DefaultSimilarity;
import org.apache.lucene.search.Similarity;
+import org.apache.lucene.search.SimilarityProvider;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.StringHelper;
@@ -57,13 +58,13 @@ public class FieldNormModifier {
System.exit(1);
}
- Similarity s = null;
+ SimilarityProvider s = null;
if (args[1].equals("-d"))
args[1] = DefaultSimilarity.class.getName();
try {
- s = Class.forName(args[1]).asSubclass(Similarity.class).newInstance();
+ s = Class.forName(args[1]).asSubclass(SimilarityProvider.class).newInstance();
} catch (Exception e) {
System.err.println("Couldn't instantiate similarity with empty constructor: " + args[1]);
e.printStackTrace(System.err);
@@ -84,7 +85,7 @@ public class FieldNormModifier {
private Directory dir;
- private Similarity sim;
+ private SimilarityProvider sim;
/**
* Constructor for code that wishes to use this class programmatically
@@ -93,7 +94,7 @@ public class FieldNormModifier {
* @param d the Directory to modify
* @param s the Similarity to use (can be null)
*/
- public FieldNormModifier(Directory d, Similarity s) {
+ public FieldNormModifier(Directory d, SimilarityProvider s) {
dir = d;
sim = s;
}
@@ -111,7 +112,7 @@ public class FieldNormModifier {
*/
public void reSetNorms(String field) throws IOException {
String fieldName = StringHelper.intern(field);
-
+ Similarity fieldSim = sim.get(field);
IndexReader reader = null;
try {
reader = IndexReader.open(dir, false);
@@ -148,7 +149,7 @@ public class FieldNormModifier {
for (int d = 0; d < termCounts.length; d++) {
if (delDocs == null || !delDocs.get(d)) {
invertState.setLength(termCounts[d]);
- subReader.setNorm(d, fieldName, sim.encodeNormValue(sim.computeNorm(fieldName, invertState)));
+ subReader.setNorm(d, fieldName, fieldSim.encodeNormValue(fieldSim.computeNorm(fieldName, invertState)));
}
}
}
Modified: lucene/dev/trunk/lucene/contrib/misc/src/test/org/apache/lucene/index/TestFieldNormModifier.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/misc/src/test/org/apache/lucene/index/TestFieldNormModifier.java?rev=1062927&r1=1062926&r2=1062927&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/misc/src/test/org/apache/lucene/index/TestFieldNormModifier.java (original)
+++ lucene/dev/trunk/lucene/contrib/misc/src/test/org/apache/lucene/index/TestFieldNormModifier.java Mon Jan 24 19:13:31 2011
@@ -28,7 +28,7 @@ import org.apache.lucene.search.Collecto
import org.apache.lucene.search.DefaultSimilarity;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Scorer;
-import org.apache.lucene.search.Similarity;
+import org.apache.lucene.search.SimilarityProvider;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
@@ -42,7 +42,7 @@ public class TestFieldNormModifier exten
public Directory store;
/** inverts the normal notion of lengthNorm */
- public static Similarity s = new DefaultSimilarity() {
+ public static SimilarityProvider s = new DefaultSimilarity() {
@Override
public float computeNorm(String fieldName, FieldInvertState state) {
return state.getBoost() * (discountOverlaps ? state.getLength() - state.getNumOverlap() : state.getLength());
Modified: lucene/dev/trunk/lucene/contrib/misc/src/test/org/apache/lucene/misc/TestLengthNormModifier.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/misc/src/test/org/apache/lucene/misc/TestLengthNormModifier.java?rev=1062927&r1=1062926&r2=1062927&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/misc/src/test/org/apache/lucene/misc/TestLengthNormModifier.java (original)
+++ lucene/dev/trunk/lucene/contrib/misc/src/test/org/apache/lucene/misc/TestLengthNormModifier.java Mon Jan 24 19:13:31 2011
@@ -33,7 +33,7 @@ import org.apache.lucene.search.Collecto
import org.apache.lucene.search.DefaultSimilarity;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Scorer;
-import org.apache.lucene.search.Similarity;
+import org.apache.lucene.search.SimilarityProvider;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
@@ -47,7 +47,7 @@ public class TestLengthNormModifier exte
public Directory store;
/** inverts the normal notion of lengthNorm */
- public static Similarity s = new DefaultSimilarity() {
+ public static SimilarityProvider s = new DefaultSimilarity() {
@Override
public float computeNorm(String fieldName, FieldInvertState state) {
return state.getBoost() * (discountOverlaps ? state.getLength() - state.getNumOverlap() : state.getLength());
@@ -163,7 +163,7 @@ public class TestLengthNormModifier exte
}
// override the norms to be inverted
- Similarity s = new DefaultSimilarity() {
+ SimilarityProvider s = new DefaultSimilarity() {
@Override
public float computeNorm(String fieldName, FieldInvertState state) {
return state.getBoost() * (discountOverlaps ? state.getLength() - state.getNumOverlap() : state.getLength());
Modified: lucene/dev/trunk/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/builders/StandardBooleanQueryNodeBuilder.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/builders/StandardBooleanQueryNodeBuilder.java?rev=1062927&r1=1062926&r2=1062927&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/builders/StandardBooleanQueryNodeBuilder.java (original)
+++ lucene/dev/trunk/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/builders/StandardBooleanQueryNodeBuilder.java Mon Jan 24 19:13:31 2011
@@ -31,7 +31,7 @@ import org.apache.lucene.queryParser.sta
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Query;
-import org.apache.lucene.search.Similarity;
+import org.apache.lucene.search.SimilarityProvider;
import org.apache.lucene.search.BooleanQuery.TooManyClauses;
/**
@@ -41,7 +41,7 @@ import org.apache.lucene.search.BooleanQ
*
* @see BooleanQueryNodeBuilder
* @see BooleanQuery
- * @see Similarity#coord(int, int)
+ * @see SimilarityProvider#coord(int, int)
*/
public class StandardBooleanQueryNodeBuilder implements StandardQueryBuilder {
Modified: lucene/dev/trunk/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/nodes/StandardBooleanQueryNode.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/nodes/StandardBooleanQueryNode.java?rev=1062927&r1=1062926&r2=1062927&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/nodes/StandardBooleanQueryNode.java (original)
+++ lucene/dev/trunk/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/nodes/StandardBooleanQueryNode.java Mon Jan 24 19:13:31 2011
@@ -22,14 +22,14 @@ import java.util.List;
import org.apache.lucene.queryParser.core.nodes.BooleanQueryNode;
import org.apache.lucene.queryParser.core.nodes.QueryNode;
import org.apache.lucene.search.BooleanQuery;
-import org.apache.lucene.search.Similarity;
+import org.apache.lucene.search.SimilarityProvider;
/**
* A {@link StandardBooleanQueryNode} has the same behavior as
* {@link BooleanQueryNode}. It only indicates if the coord should be enabled or
* not for this boolean query. <br/>
*
- * @see Similarity#coord(int, int)
+ * @see SimilarityProvider#coord(int, int)
* @see BooleanQuery
*/
public class StandardBooleanQueryNode extends BooleanQueryNode {
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java?rev=1062927&r1=1062926&r2=1062927&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java Mon Jan 24 19:13:31 2011
@@ -30,7 +30,7 @@ import java.util.concurrent.atomic.Atomi
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.search.Query;
-import org.apache.lucene.search.Similarity;
+import org.apache.lucene.search.SimilarityProvider;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMFile;
@@ -127,7 +127,7 @@ final class DocumentsWriter {
private boolean aborting; // True if an abort is pending
PrintStream infoStream;
- Similarity similarity;
+ SimilarityProvider similarityProvider;
// max # simultaneous threads; if there are more than
// this, they wait for others to finish first
@@ -140,7 +140,7 @@ final class DocumentsWriter {
DocumentsWriter docWriter;
Analyzer analyzer;
PrintStream infoStream;
- Similarity similarity;
+ SimilarityProvider similarityProvider;
int docID;
Document doc;
String maxTermPrefix;
@@ -284,7 +284,7 @@ final class DocumentsWriter {
DocumentsWriter(Directory directory, IndexWriter writer, IndexingChain indexingChain, int maxThreadStates, FieldInfos fieldInfos, BufferedDeletes bufferedDeletes) throws IOException {
this.directory = directory;
this.writer = writer;
- this.similarity = writer.getConfig().getSimilarity();
+ this.similarityProvider = writer.getConfig().getSimilarityProvider();
this.maxThreadStates = maxThreadStates;
this.fieldInfos = fieldInfos;
this.bufferedDeletes = bufferedDeletes;
@@ -357,10 +357,10 @@ final class DocumentsWriter {
}
}
- synchronized void setSimilarity(Similarity similarity) {
- this.similarity = similarity;
+ synchronized void setSimilarityProvider(SimilarityProvider similarity) {
+ this.similarityProvider = similarity;
for(int i=0;i<threadStates.length;i++) {
- threadStates[i].docState.similarity = similarity;
+ threadStates[i].docState.similarityProvider = similarity;
}
}
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocumentsWriterThreadState.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocumentsWriterThreadState.java?rev=1062927&r1=1062926&r2=1062927&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocumentsWriterThreadState.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocumentsWriterThreadState.java Mon Jan 24 19:13:31 2011
@@ -36,7 +36,7 @@ final class DocumentsWriterThreadState {
this.docWriter = docWriter;
docState = new DocumentsWriter.DocState();
docState.infoStream = docWriter.infoStream;
- docState.similarity = docWriter.similarity;
+ docState.similarityProvider = docWriter.similarityProvider;
docState.docWriter = docWriter;
consumer = docWriter.consumer.addThread(this);
}
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexWriterConfig.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexWriterConfig.java?rev=1062927&r1=1062926&r2=1062927&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexWriterConfig.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexWriterConfig.java Mon Jan 24 19:13:31 2011
@@ -21,7 +21,8 @@ import org.apache.lucene.analysis.Analyz
import org.apache.lucene.index.DocumentsWriter.IndexingChain;
import org.apache.lucene.index.IndexWriter.IndexReaderWarmer;
import org.apache.lucene.index.codecs.CodecProvider;
-import org.apache.lucene.search.Similarity;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.SimilarityProvider;
import org.apache.lucene.util.Version;
/**
@@ -111,7 +112,7 @@ public final class IndexWriterConfig imp
private IndexDeletionPolicy delPolicy;
private IndexCommit commit;
private OpenMode openMode;
- private Similarity similarity;
+ private SimilarityProvider similarityProvider;
private int termIndexInterval; // TODO: this should be private to the codec, not settable here
private MergeScheduler mergeScheduler;
private long writeLockTimeout;
@@ -142,7 +143,7 @@ public final class IndexWriterConfig imp
delPolicy = new KeepOnlyLastCommitDeletionPolicy();
commit = null;
openMode = OpenMode.CREATE_OR_APPEND;
- similarity = Similarity.getDefault();
+ similarityProvider = IndexSearcher.getDefaultSimilarityProvider();
termIndexInterval = DEFAULT_TERM_INDEX_INTERVAL; // TODO: this should be private to the codec, not settable here
mergeScheduler = new ConcurrentMergeScheduler();
writeLockTimeout = WRITE_LOCK_TIMEOUT;
@@ -234,25 +235,22 @@ public final class IndexWriterConfig imp
}
/**
- * Expert: set the {@link Similarity} implementation used by this IndexWriter.
+ * Expert: set the {@link SimilarityProvider} implementation used by this IndexWriter.
* <p>
- * <b>NOTE:</b> the similarity cannot be null. If <code>null</code> is passed,
- * the similarity will be set to the default.
- *
- * @see Similarity#setDefault(Similarity)
+ * <b>NOTE:</b> the similarity provider cannot be null. If <code>null</code> is passed,
+ * the similarity provider will be set to the default implementation (unspecified).
*/
- public IndexWriterConfig setSimilarity(Similarity similarity) {
- this.similarity = similarity == null ? Similarity.getDefault() : similarity;
+ public IndexWriterConfig setSimilarityProvider(SimilarityProvider similarityProvider) {
+ this.similarityProvider = similarityProvider == null ? IndexSearcher.getDefaultSimilarityProvider() : similarityProvider;
return this;
}
/**
- * Expert: returns the {@link Similarity} implementation used by this
- * IndexWriter. This defaults to the current value of
- * {@link Similarity#getDefault()}.
+ * Expert: returns the {@link SimilarityProvider} implementation used by this
+ * IndexWriter.
*/
- public Similarity getSimilarity() {
- return similarity;
+ public SimilarityProvider getSimilarityProvider() {
+ return similarityProvider;
}
/**
@@ -576,7 +574,7 @@ public final class IndexWriterConfig imp
sb.append("delPolicy=").append(delPolicy.getClass().getName()).append("\n");
sb.append("commit=").append(commit == null ? "null" : commit).append("\n");
sb.append("openMode=").append(openMode).append("\n");
- sb.append("similarity=").append(similarity.getClass().getName()).append("\n");
+ sb.append("similarityProvider=").append(similarityProvider.getClass().getName()).append("\n");
sb.append("termIndexInterval=").append(termIndexInterval).append("\n"); // TODO: this should be private to the codec, not settable here
sb.append("mergeScheduler=").append(mergeScheduler.getClass().getName()).append("\n");
sb.append("default WRITE_LOCK_TIMEOUT=").append(WRITE_LOCK_TIMEOUT).append("\n");
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/NormsWriterPerField.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/NormsWriterPerField.java?rev=1062927&r1=1062926&r2=1062927&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/NormsWriterPerField.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/NormsWriterPerField.java Mon Jan 24 19:13:31 2011
@@ -17,6 +17,7 @@ package org.apache.lucene.index;
* limitations under the License.
*/
+import org.apache.lucene.search.Similarity;
import org.apache.lucene.util.ArrayUtil;
/** Taps into DocInverter, as an InvertedDocEndConsumer,
@@ -29,7 +30,8 @@ final class NormsWriterPerField extends
final NormsWriterPerThread perThread;
final FieldInfo fieldInfo;
final DocumentsWriter.DocState docState;
-
+ final Similarity similarity;
+
// Holds all docID/norm pairs we've seen
int[] docIDs = new int[1];
byte[] norms = new byte[1];
@@ -49,6 +51,7 @@ final class NormsWriterPerField extends
this.fieldInfo = fieldInfo;
docState = perThread.docState;
fieldState = docInverterPerField.fieldState;
+ similarity = docState.similarityProvider.get(fieldInfo.name);
}
@Override
@@ -71,8 +74,8 @@ final class NormsWriterPerField extends
assert norms.length == upto;
norms = ArrayUtil.grow(norms, 1+upto);
}
- final float norm = docState.similarity.computeNorm(fieldInfo.name, fieldState);
- norms[upto] = docState.similarity.encodeNormValue(norm);
+ final float norm = similarity.computeNorm(fieldInfo.name, fieldState);
+ norms[upto] = similarity.encodeNormValue(norm);
docIDs[upto] = docState.docID;
upto++;
}
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/BooleanQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/BooleanQuery.java?rev=1062927&r1=1062926&r2=1062927&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/BooleanQuery.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/BooleanQuery.java Mon Jan 24 19:13:31 2011
@@ -72,18 +72,18 @@ public class BooleanQuery extends Query
/** Constructs an empty boolean query.
*
- * {@link Similarity#coord(int,int)} may be disabled in scoring, as
+ * {@link SimilarityProvider#coord(int,int)} may be disabled in scoring, as
* appropriate. For example, this score factor does not make sense for most
* automatically generated queries, like {@link WildcardQuery} and {@link
* FuzzyQuery}.
*
- * @param disableCoord disables {@link Similarity#coord(int,int)} in scoring.
+ * @param disableCoord disables {@link SimilarityProvider#coord(int,int)} in scoring.
*/
public BooleanQuery(boolean disableCoord) {
this.disableCoord = disableCoord;
}
- /** Returns true iff {@link Similarity#coord(int,int)} is disabled in
+ /** Returns true iff {@link SimilarityProvider#coord(int,int)} is disabled in
* scoring for this query instance.
* @see #BooleanQuery(boolean)
*/
@@ -162,14 +162,14 @@ public class BooleanQuery extends Query
*/
protected class BooleanWeight extends Weight {
/** The Similarity implementation. */
- protected Similarity similarity;
+ protected SimilarityProvider similarityProvider;
protected ArrayList<Weight> weights;
protected int maxCoord; // num optional + num required
private final boolean disableCoord;
public BooleanWeight(IndexSearcher searcher, boolean disableCoord)
throws IOException {
- this.similarity = searcher.getSimilarity();
+ this.similarityProvider = searcher.getSimilarityProvider();
this.disableCoord = disableCoord;
weights = new ArrayList<Weight>(clauses.size());
for (int i = 0 ; i < clauses.size(); i++) {
@@ -202,7 +202,7 @@ public class BooleanQuery extends Query
}
public float coord(int overlap, int maxOverlap) {
- return similarity.coord(overlap, maxOverlap);
+ return similarityProvider.coord(overlap, maxOverlap);
}
@Override
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/DefaultSimilarity.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/DefaultSimilarity.java?rev=1062927&r1=1062926&r2=1062927&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/DefaultSimilarity.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/DefaultSimilarity.java Mon Jan 24 19:13:31 2011
@@ -20,7 +20,7 @@ import org.apache.lucene.index.FieldInve
*/
/** Expert: Default scoring implementation. */
-public class DefaultSimilarity extends Similarity {
+public class DefaultSimilarity extends Similarity implements SimilarityProvider {
/** Implemented as
* <code>state.getBoost()*lengthNorm(numTerms)</code>, where
@@ -41,7 +41,6 @@ public class DefaultSimilarity extends S
}
/** Implemented as <code>1/sqrt(sumOfSquaredWeights)</code>. */
- @Override
public float queryNorm(float sumOfSquaredWeights) {
return (float)(1.0 / Math.sqrt(sumOfSquaredWeights));
}
@@ -65,7 +64,6 @@ public class DefaultSimilarity extends S
}
/** Implemented as <code>overlap / maxOverlap</code>. */
- @Override
public float coord(int overlap, int maxOverlap) {
return overlap / (float)maxOverlap;
}
@@ -90,4 +88,12 @@ public class DefaultSimilarity extends S
public boolean getDiscountOverlaps() {
return discountOverlaps;
}
+
+ /**
+ * Returns this default implementation for all fields.
+ * Override this method to customize scoring on a per-field basis.
+ */
+ public Similarity get(String field) {
+ return this;
+ }
}
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/IndexSearcher.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/IndexSearcher.java?rev=1062927&r1=1062926&r2=1062927&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/IndexSearcher.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/IndexSearcher.java Mon Jan 24 19:13:31 2011
@@ -70,8 +70,22 @@ public class IndexSearcher {
private final ExecutorService executor;
protected final IndexSearcher[] subSearchers;
- /** The Similarity implementation used by this searcher. */
- private Similarity similarity = Similarity.getDefault();
+ // the default SimilarityProvider
+ private static final SimilarityProvider defaultProvider = new DefaultSimilarity();
+
+ /**
+ * Expert: returns a default SimilarityProvider instance.
+ * In general, this method is only called to initialize searchers and writers.
+ * User code and query implementations should respect
+ * {@link IndexSearcher#getSimilarityProvider()}.
+ * @lucene.internal
+ */
+ public static SimilarityProvider getDefaultSimilarityProvider() {
+ return defaultProvider;
+ }
+
+ /** The SimilarityProvider implementation used by this searcher. */
+ private SimilarityProvider similarityProvider = defaultProvider;
/** Creates a searcher searching the index in the named
* directory, with readOnly=true
@@ -248,16 +262,15 @@ public class IndexSearcher {
return reader.document(docID, fieldSelector);
}
- /** Expert: Set the Similarity implementation used by this Searcher.
+ /** Expert: Set the SimilarityProvider implementation used by this Searcher.
*
- * @see Similarity#setDefault(Similarity)
*/
- public void setSimilarity(Similarity similarity) {
- this.similarity = similarity;
+ public void setSimilarityProvider(SimilarityProvider similarityProvider) {
+ this.similarityProvider = similarityProvider;
}
- public Similarity getSimilarity() {
- return similarity;
+ public SimilarityProvider getSimilarityProvider() {
+ return similarityProvider;
}
/**
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/MatchAllDocsQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/MatchAllDocsQuery.java?rev=1062927&r1=1062926&r2=1062927&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/MatchAllDocsQuery.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/MatchAllDocsQuery.java Mon Jan 24 19:13:31 2011
@@ -98,7 +98,7 @@ public class MatchAllDocsQuery extends Q
private float queryNorm;
public MatchAllDocsWeight(IndexSearcher searcher) {
- this.similarity = searcher.getSimilarity();
+ this.similarity = normsField == null ? null : searcher.getSimilarityProvider().get(normsField);
}
@Override
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java?rev=1062927&r1=1062926&r2=1062927&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java Mon Jan 24 19:13:31 2011
@@ -139,7 +139,7 @@ public class MultiPhraseQuery extends Qu
public MultiPhraseWeight(IndexSearcher searcher)
throws IOException {
- this.similarity = searcher.getSimilarity();
+ this.similarity = searcher.getSimilarityProvider().get(field);
// compute idf
ArrayList<Term> allTerms = new ArrayList<Term>();
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/PhraseQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/PhraseQuery.java?rev=1062927&r1=1062926&r2=1062927&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/PhraseQuery.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/PhraseQuery.java Mon Jan 24 19:13:31 2011
@@ -146,7 +146,7 @@ public class PhraseQuery extends Query {
public PhraseWeight(IndexSearcher searcher)
throws IOException {
- this.similarity = searcher.getSimilarity();
+ this.similarity = searcher.getSimilarityProvider().get(field);
idfExp = similarity.idfExplain(terms, searcher);
idf = idfExp.getIdf();
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/Query.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/Query.java?rev=1062927&r1=1062926&r2=1062927&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/Query.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/Query.java Mon Jan 24 19:13:31 2011
@@ -98,7 +98,7 @@ public abstract class Query implements j
Query query = searcher.rewrite(this);
Weight weight = query.createWeight(searcher);
float sum = weight.sumOfSquaredWeights();
- float norm = searcher.getSimilarity().queryNorm(sum);
+ float norm = searcher.getSimilarityProvider().queryNorm(sum);
if (Float.isInfinite(norm) || Float.isNaN(norm))
norm = 1.0f;
weight.normalize(norm);
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/Similarity.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/Similarity.java?rev=1062927&r1=1062926&r2=1062927&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/Similarity.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/Similarity.java Mon Jan 24 19:13:31 2011
@@ -362,7 +362,7 @@ import org.apache.lucene.util.SmallFloat
* Typically, a document that contains more of the query's terms will receive a higher score
* than another document with fewer query terms.
* This is a search time factor computed in
- * {@link #coord(int, int) coord(q,d)}
+ * {@link SimilarityProvider#coord(int, int) coord(q,d)}
* by the Similarity in effect at search time.
* <br> <br>
* </li>
@@ -522,40 +522,13 @@ import org.apache.lucene.util.SmallFloat
* </li>
* </ol>
*
- * @see #setDefault(Similarity)
- * @see org.apache.lucene.index.IndexWriterConfig#setSimilarity(Similarity)
- * @see IndexSearcher#setSimilarity(Similarity)
+ * @see org.apache.lucene.index.IndexWriterConfig#setSimilarityProvider(SimilarityProvider)
+ * @see IndexSearcher#setSimilarityProvider(SimilarityProvider)
*/
public abstract class Similarity implements Serializable {
- /**
- * The Similarity implementation used by default.
- **/
- private static Similarity defaultImpl = new DefaultSimilarity();
public static final int NO_DOC_ID_PROVIDED = -1;
- /** Set the default Similarity implementation used by indexing and search
- * code.
- *
- * @see IndexSearcher#setSimilarity(Similarity)
- * @see org.apache.lucene.index.IndexWriterConfig#setSimilarity(Similarity)
- */
- public static void setDefault(Similarity similarity) {
- Similarity.defaultImpl = similarity;
- }
-
- /** Return the default Similarity implementation used by indexing and search
- * code.
- *
- * <p>This is initially an instance of {@link DefaultSimilarity}.
- *
- * @see IndexSearcher#setSimilarity(Similarity)
- * @see org.apache.lucene.index.IndexWriterConfig#setSimilarity(Similarity)
- */
- public static Similarity getDefault() {
- return Similarity.defaultImpl;
- }
-
/** Cache of decoded bytes. */
private static final float[] NORM_TABLE = new float[256];
@@ -632,21 +605,6 @@ public abstract class Similarity impleme
throw new UnsupportedOperationException("please use computeNorm instead");
}
- /** Computes the normalization value for a query given the sum of the squared
- * weights of each of the query terms. This value is multiplied into the
- * weight of each query term. While the classic query normalization factor is
- * computed as 1/sqrt(sumOfSquaredWeights), other implementations might
- * completely ignore sumOfSquaredWeights (ie return 1).
- *
- * <p>This does not affect ranking, but the default implementation does make scores
- * from different queries more comparable than they would be by eliminating the
- * magnitude of the Query vector as a factor in the score.
- *
- * @param sumOfSquaredWeights the sum of the squares of query term weights
- * @return a normalization factor for query weights
- */
- public abstract float queryNorm(float sumOfSquaredWeights);
-
/** Encodes a normalization factor for storage in an index.
*
* <p>The encoding uses a three-bit mantissa, a five-bit exponent, and
@@ -816,20 +774,6 @@ public abstract class Similarity impleme
*/
public abstract float idf(int docFreq, int numDocs);
- /** Computes a score factor based on the fraction of all query terms that a
- * document contains. This value is multiplied into scores.
- *
- * <p>The presence of a large portion of the query terms indicates a better
- * match with the query, so implementations of this method usually return
- * larger values when the ratio between these parameters is large and smaller
- * values when the ratio between them is small.
- *
- * @param overlap the number of query terms matched in the document
- * @param maxOverlap the total number of terms in the query
- * @return a score factor based on term overlap with the query
- */
- public abstract float coord(int overlap, int maxOverlap);
-
/**
* Calculate a scoring factor based on the data in the payload. Overriding implementations
* are responsible for interpreting what is in the payload. Lucene makes no assumptions about
Added: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/SimilarityProvider.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/SimilarityProvider.java?rev=1062927&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/SimilarityProvider.java (added)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/SimilarityProvider.java Mon Jan 24 19:13:31 2011
@@ -0,0 +1,66 @@
+package org.apache.lucene.search;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Expert: Scoring API.
+ *
+ * Provides top-level scoring functions that aren't specific to a field,
+ * and work across multi-field queries (such as {@link BooleanQuery}).
+ *
+ * Field-specific scoring is accomplished through {@link Similarity}.
+ *
+ * @lucene.experimental
+ */
+public interface SimilarityProvider {
+
+ /** Computes a score factor based on the fraction of all query terms that a
+ * document contains. This value is multiplied into scores.
+ *
+ * <p>The presence of a large portion of the query terms indicates a better
+ * match with the query, so implementations of this method usually return
+ * larger values when the ratio between these parameters is large and smaller
+ * values when the ratio between them is small.
+ *
+ * @param overlap the number of query terms matched in the document
+ * @param maxOverlap the total number of terms in the query
+ * @return a score factor based on term overlap with the query
+ */
+ public abstract float coord(int overlap, int maxOverlap);
+
+ /** Computes the normalization value for a query given the sum of the squared
+ * weights of each of the query terms. This value is multiplied into the
+ * weight of each query term. While the classic query normalization factor is
+ * computed as 1/sqrt(sumOfSquaredWeights), other implementations might
+ * completely ignore sumOfSquaredWeights (ie return 1).
+ *
+ * <p>This does not affect ranking, but the default implementation does make scores
+ * from different queries more comparable than they would be by eliminating the
+ * magnitude of the Query vector as a factor in the score.
+ *
+ * @param sumOfSquaredWeights the sum of the squares of query term weights
+ * @return a normalization factor for query weights
+ */
+ public abstract float queryNorm(float sumOfSquaredWeights);
+
+ /** Returns a {@link Similarity} for scoring a field
+ * @param field field name.
+ * @return a field-specific Similarity.
+ */
+ public abstract Similarity get(String field);
+}
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/TermQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/TermQuery.java?rev=1062927&r1=1062926&r2=1062927&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/TermQuery.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/TermQuery.java Mon Jan 24 19:13:31 2011
@@ -54,7 +54,7 @@ public class TermQuery extends Query {
throws IOException {
assert termStates != null : "PerReaderTermState must not be null";
this.termStates = termStates;
- this.similarity = searcher.getSimilarity();
+ this.similarity = searcher.getSimilarityProvider().get(term.field());
if (docFreq != -1) {
idfExp = similarity.idfExplain(term, searcher, docFreq);
} else {
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/Weight.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/Weight.java?rev=1062927&r1=1062926&r2=1062927&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/Weight.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/Weight.java Mon Jan 24 19:13:31 2011
@@ -44,7 +44,7 @@ import org.apache.lucene.index.IndexRead
* <code>IndexSearcher</code> ({@link Query#createWeight(IndexSearcher)}).
* <li>The {@link #sumOfSquaredWeights()} method is called on the
* <code>Weight</code> to compute the query normalization factor
- * {@link Similarity#queryNorm(float)} of the query clauses contained in the
+ * {@link SimilarityProvider#queryNorm(float)} of the query clauses contained in the
* query.
* <li>The query normalization factor is passed to {@link #normalize(float)}. At
* this point the weighting is complete.
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/spans/SpanWeight.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/spans/SpanWeight.java?rev=1062927&r1=1062926&r2=1062927&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/spans/SpanWeight.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/spans/SpanWeight.java Mon Jan 24 19:13:31 2011
@@ -42,7 +42,7 @@ public class SpanWeight extends Weight {
public SpanWeight(SpanQuery query, IndexSearcher searcher)
throws IOException {
- this.similarity = searcher.getSimilarity();
+ this.similarity = searcher.getSimilarityProvider().get(query.getField());
this.query = query;
terms=new HashSet<Term>();
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/DocHelper.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/DocHelper.java?rev=1062927&r1=1062926&r2=1062927&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/DocHelper.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/DocHelper.java Mon Jan 24 19:13:31 2011
@@ -28,7 +28,7 @@ import org.apache.lucene.analysis.MockTo
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Fieldable;
-import org.apache.lucene.search.Similarity;
+import org.apache.lucene.search.SimilarityProvider;
import org.apache.lucene.store.Directory;
import static org.apache.lucene.util.LuceneTestCase.TEST_VERSION_CURRENT;
@@ -220,7 +220,7 @@ class DocHelper {
*/
public static SegmentInfo writeDoc(Directory dir, Document doc) throws IOException
{
- return writeDoc(dir, new MockAnalyzer(MockTokenizer.WHITESPACE, false), Similarity.getDefault(), doc);
+ return writeDoc(dir, new MockAnalyzer(MockTokenizer.WHITESPACE, false), null, doc);
}
/**
@@ -233,9 +233,9 @@ class DocHelper {
* @param doc
* @throws IOException
*/
- public static SegmentInfo writeDoc(Directory dir, Analyzer analyzer, Similarity similarity, Document doc) throws IOException {
+ public static SegmentInfo writeDoc(Directory dir, Analyzer analyzer, SimilarityProvider similarity, Document doc) throws IOException {
IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(
- TEST_VERSION_CURRENT, analyzer).setSimilarity(similarity));
+ TEST_VERSION_CURRENT, analyzer).setSimilarityProvider(similarity));
//writer.setUseCompoundFile(false);
writer.addDocument(doc);
writer.commit();
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java?rev=1062927&r1=1062926&r2=1062927&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java Mon Jan 24 19:13:31 2011
@@ -38,12 +38,13 @@ import org.apache.lucene.document.Field;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.document.NumericField;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
+import org.apache.lucene.search.DefaultSimilarity;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.NumericRangeQuery;
import org.apache.lucene.search.ScoreDoc;
-import org.apache.lucene.search.Similarity;
+import org.apache.lucene.search.SimilarityProvider;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Bits;
@@ -412,7 +413,7 @@ public class TestBackwardsCompatibility
Term searchTerm = new Term("id", "6");
int delCount = reader.deleteDocuments(searchTerm);
assertEquals("wrong delete count", 1, delCount);
- reader.setNorm(searcher.search(new TermQuery(new Term("id", "22")), 10).scoreDocs[0].doc, "content", Similarity.getDefault().encodeNormValue(2.0f));
+ reader.setNorm(searcher.search(new TermQuery(new Term("id", "22")), 10).scoreDocs[0].doc, "content", searcher.getSimilarityProvider().get("content").encodeNormValue(2.0f));
reader.close();
searcher.close();
@@ -460,7 +461,7 @@ public class TestBackwardsCompatibility
Term searchTerm = new Term("id", "6");
int delCount = reader.deleteDocuments(searchTerm);
assertEquals("wrong delete count", 1, delCount);
- reader.setNorm(22, "content", Similarity.getDefault().encodeNormValue(2.0f));
+ reader.setNorm(22, "content", searcher.getSimilarityProvider().get("content").encodeNormValue(2.0f));
reader.close();
// make sure they "took":
@@ -519,7 +520,7 @@ public class TestBackwardsCompatibility
assertEquals("didn't delete the right number of documents", 1, delCount);
// Set one norm so we get a .s0 file:
- reader.setNorm(21, "content", Similarity.getDefault().encodeNormValue(1.5f));
+ reader.setNorm(21, "content", conf.getSimilarityProvider().get("content").encodeNormValue(1.5f));
reader.close();
}
@@ -556,7 +557,8 @@ public class TestBackwardsCompatibility
assertEquals("didn't delete the right number of documents", 1, delCount);
// Set one norm so we get a .s0 file:
- reader.setNorm(21, "content", Similarity.getDefault().encodeNormValue(1.5f));
+ SimilarityProvider sim = new DefaultSimilarity();
+ reader.setNorm(21, "content", sim.get("content").encodeNormValue(1.5f));
reader.close();
// The numbering of fields can vary depending on which
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestDeletionPolicy.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestDeletionPolicy.java?rev=1062927&r1=1062926&r2=1062927&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestDeletionPolicy.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestDeletionPolicy.java Mon Jan 24 19:13:31 2011
@@ -30,7 +30,6 @@ import org.apache.lucene.index.IndexWrit
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
-import org.apache.lucene.search.Similarity;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
@@ -608,7 +607,7 @@ public class TestDeletionPolicy extends
writer.close();
IndexReader reader = IndexReader.open(dir, policy, false);
reader.deleteDocument(3*i+1);
- reader.setNorm(4*i+1, "content", Similarity.getDefault().encodeNormValue(2.0F));
+ reader.setNorm(4*i+1, "content", conf.getSimilarityProvider().get("content").encodeNormValue(2.0F));
IndexSearcher searcher = new IndexSearcher(reader);
ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
assertEquals(16*(1+i), hits.length);
@@ -716,7 +715,7 @@ public class TestDeletionPolicy extends
writer.close();
IndexReader reader = IndexReader.open(dir, policy, false);
reader.deleteDocument(3);
- reader.setNorm(5, "content", Similarity.getDefault().encodeNormValue(2.0F));
+ reader.setNorm(5, "content", conf.getSimilarityProvider().get("content").encodeNormValue(2.0F));
IndexSearcher searcher = new IndexSearcher(reader);
ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
assertEquals(16, hits.length);
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexFileDeleter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexFileDeleter.java?rev=1062927&r1=1062926&r2=1062927&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexFileDeleter.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexFileDeleter.java Mon Jan 24 19:13:31 2011
@@ -18,6 +18,7 @@ package org.apache.lucene.index;
*/
import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.search.DefaultSimilarity;
import org.apache.lucene.search.Similarity;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexInput;
@@ -67,9 +68,9 @@ public class TestIndexFileDeleter extend
Term searchTerm = new Term("id", "7");
int delCount = reader.deleteDocuments(searchTerm);
assertEquals("didn't delete the right number of documents", 1, delCount);
-
+ Similarity sim = new DefaultSimilarity().get("content");
// Set one norm so we get a .s0 file:
- reader.setNorm(21, "content", Similarity.getDefault().encodeNormValue(1.5f));
+ reader.setNorm(21, "content", sim.encodeNormValue(1.5f));
reader.close();
// Now, artificially create an extra .del file & extra
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexReader.java?rev=1062927&r1=1062926&r2=1062927&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexReader.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexReader.java Mon Jan 24 19:13:31 2011
@@ -39,11 +39,12 @@ import org.apache.lucene.document.SetBas
import org.apache.lucene.index.IndexReader.FieldOption;
import org.apache.lucene.index.codecs.CodecProvider;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
+import org.apache.lucene.search.DefaultSimilarity;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.FieldCache;
+import org.apache.lucene.search.Similarity;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.ScoreDoc;
-import org.apache.lucene.search.Similarity;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.Directory;
@@ -464,8 +465,9 @@ public class TestIndexReader extends Luc
// expected
}
+ Similarity sim = new DefaultSimilarity().get("aaa");
try {
- reader.setNorm(5, "aaa", Similarity.getDefault().encodeNormValue(2.0f));
+ reader.setNorm(5, "aaa", sim.encodeNormValue(2.0f));
fail("setNorm after close failed to throw IOException");
} catch (AlreadyClosedException e) {
// expected
@@ -504,8 +506,9 @@ public class TestIndexReader extends Luc
// expected
}
+ Similarity sim = new DefaultSimilarity().get("aaa");
try {
- reader.setNorm(5, "aaa", Similarity.getDefault().encodeNormValue(2.0f));
+ reader.setNorm(5, "aaa", sim.encodeNormValue(2.0f));
fail("setNorm should have hit LockObtainFailedException");
} catch (LockObtainFailedException e) {
// expected
@@ -535,7 +538,8 @@ public class TestIndexReader extends Luc
// now open reader & set norm for doc 0
IndexReader reader = IndexReader.open(dir, false);
- reader.setNorm(0, "content", Similarity.getDefault().encodeNormValue(2.0f));
+ Similarity sim = new DefaultSimilarity().get("content");
+ reader.setNorm(0, "content", sim.encodeNormValue(2.0f));
// we should be holding the write lock now:
assertTrue("locked", IndexWriter.isLocked(dir));
@@ -549,7 +553,7 @@ public class TestIndexReader extends Luc
IndexReader reader2 = IndexReader.open(dir, false);
// set norm again for doc 0
- reader.setNorm(0, "content", Similarity.getDefault().encodeNormValue(3.0f));
+ reader.setNorm(0, "content", sim.encodeNormValue(3.0f));
assertTrue("locked", IndexWriter.isLocked(dir));
reader.close();
@@ -579,15 +583,16 @@ public class TestIndexReader extends Luc
addDoc(writer, searchTerm.text());
writer.close();
+ Similarity sim = new DefaultSimilarity().get("content");
// now open reader & set norm for doc 0 (writes to
// _0_1.s0)
reader = IndexReader.open(dir, false);
- reader.setNorm(0, "content", Similarity.getDefault().encodeNormValue(2.0f));
+ reader.setNorm(0, "content", sim.encodeNormValue(2.0f));
reader.close();
// now open reader again & set norm for doc 0 (writes to _0_2.s0)
reader = IndexReader.open(dir, false);
- reader.setNorm(0, "content", Similarity.getDefault().encodeNormValue(2.0f));
+ reader.setNorm(0, "content", sim.encodeNormValue(2.0f));
reader.close();
assertFalse("failed to remove first generation norms file on writing second generation",
dir.fileExists("_0_1.s0"));
@@ -966,13 +971,13 @@ public class TestIndexReader extends Luc
dir.setMaxSizeInBytes(thisDiskFree);
dir.setRandomIOExceptionRate(rate);
-
+ Similarity sim = new DefaultSimilarity().get("content");
try {
if (0 == x) {
int docId = 12;
for(int i=0;i<13;i++) {
reader.deleteDocument(docId);
- reader.setNorm(docId, "content", Similarity.getDefault().encodeNormValue(2.0f));
+ reader.setNorm(docId, "content", sim.encodeNormValue(2.0f));
docId += 12;
}
}
@@ -1130,8 +1135,9 @@ public class TestIndexReader extends Luc
}
reader = IndexReader.open(dir, false);
+ Similarity sim = new DefaultSimilarity().get("content");
try {
- reader.setNorm(1, "content", Similarity.getDefault().encodeNormValue(2.0f));
+ reader.setNorm(1, "content", sim.encodeNormValue(2.0f));
fail("did not hit exception when calling setNorm on an invalid doc number");
} catch (ArrayIndexOutOfBoundsException e) {
// expected
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexReaderClone.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexReaderClone.java?rev=1062927&r1=1062926&r2=1062927&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexReaderClone.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexReaderClone.java Mon Jan 24 19:13:31 2011
@@ -18,6 +18,7 @@ package org.apache.lucene.index;
*/
import org.apache.lucene.index.SegmentReader.Norm;
+import org.apache.lucene.search.DefaultSimilarity;
import org.apache.lucene.search.Similarity;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
@@ -272,13 +273,14 @@ public class TestIndexReaderClone extend
* @throws Exception
*/
private void performDefaultTests(IndexReader r1) throws Exception {
- float norm1 = Similarity.getDefault().decodeNormValue(MultiNorms.norms(r1, "field1")[4]);
+ Similarity sim = new DefaultSimilarity().get("field1");
+ float norm1 = sim.decodeNormValue(MultiNorms.norms(r1, "field1")[4]);
IndexReader pr1Clone = (IndexReader) r1.clone();
pr1Clone.deleteDocument(10);
- pr1Clone.setNorm(4, "field1", Similarity.getDefault().encodeNormValue(0.5f));
- assertTrue(Similarity.getDefault().decodeNormValue(MultiNorms.norms(r1, "field1")[4]) == norm1);
- assertTrue(Similarity.getDefault().decodeNormValue(MultiNorms.norms(pr1Clone, "field1")[4]) != norm1);
+ pr1Clone.setNorm(4, "field1", sim.encodeNormValue(0.5f));
+ assertTrue(sim.decodeNormValue(MultiNorms.norms(r1, "field1")[4]) == norm1);
+ assertTrue(sim.decodeNormValue(MultiNorms.norms(pr1Clone, "field1")[4]) != norm1);
final Bits delDocs = MultiFields.getDeletedDocs(r1);
assertTrue(delDocs == null || !delDocs.get(10));
@@ -327,7 +329,8 @@ public class TestIndexReaderClone extend
TestIndexReaderReopen.createIndex(random, dir1, false);
SegmentReader origSegmentReader = getOnlySegmentReader(IndexReader.open(dir1, false));
origSegmentReader.deleteDocument(1);
- origSegmentReader.setNorm(4, "field1", Similarity.getDefault().encodeNormValue(0.5f));
+ Similarity sim = new DefaultSimilarity().get("field1");
+ origSegmentReader.setNorm(4, "field1", sim.encodeNormValue(0.5f));
SegmentReader clonedSegmentReader = (SegmentReader) origSegmentReader
.clone();
@@ -426,8 +429,9 @@ public class TestIndexReaderClone extend
final Directory dir1 = newDirectory();
TestIndexReaderReopen.createIndex(random, dir1, false);
IndexReader orig = IndexReader.open(dir1, false);
- orig.setNorm(1, "field1", Similarity.getDefault().encodeNormValue(17.0f));
- final byte encoded = Similarity.getDefault().encodeNormValue(17.0f);
+ Similarity sim = new DefaultSimilarity().get("field1");
+ orig.setNorm(1, "field1", sim.encodeNormValue(17.0f));
+ final byte encoded = sim.encodeNormValue(17.0f);
assertEquals(encoded, MultiNorms.norms(orig, "field1")[1]);
// the cloned segmentreader should have 2 references, 1 to itself, and 1 to
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexReaderCloneNorms.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexReaderCloneNorms.java?rev=1062927&r1=1062926&r2=1062927&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexReaderCloneNorms.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexReaderCloneNorms.java Mon Jan 24 19:13:31 2011
@@ -32,6 +32,7 @@ import org.apache.lucene.index.IndexWrit
import org.apache.lucene.index.SegmentReader.Norm;
import org.apache.lucene.search.DefaultSimilarity;
import org.apache.lucene.search.Similarity;
+import org.apache.lucene.search.SimilarityProvider;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
@@ -50,7 +51,7 @@ public class TestIndexReaderCloneNorms e
private static final int NUM_FIELDS = 10;
- private Similarity similarityOne;
+ private SimilarityProvider similarityOne;
private Analyzer anlzr;
@@ -203,19 +204,20 @@ public class TestIndexReaderCloneNorms e
IndexReader reader4C = (IndexReader) reader3C.clone();
SegmentReader segmentReader4C = getOnlySegmentReader(reader4C);
assertEquals(4, reader3CCNorm.bytesRef().get());
- reader4C.setNorm(5, "field1", Similarity.getDefault().encodeNormValue(0.33f));
+ Similarity sim = new DefaultSimilarity().get("field1");
+ reader4C.setNorm(5, "field1", sim.encodeNormValue(0.33f));
// generate a cannot update exception in reader1
try {
- reader3C.setNorm(1, "field1", Similarity.getDefault().encodeNormValue(0.99f));
+ reader3C.setNorm(1, "field1", sim.encodeNormValue(0.99f));
fail("did not hit expected exception");
} catch (Exception ex) {
// expected
}
// norm values should be different
- assertTrue(Similarity.getDefault().decodeNormValue(segmentReader3C.norms("field1")[5])
- != Similarity.getDefault().decodeNormValue(segmentReader4C.norms("field1")[5]));
+ assertTrue(sim.decodeNormValue(segmentReader3C.norms("field1")[5])
+ != sim.decodeNormValue(segmentReader4C.norms("field1")[5]));
Norm reader4CCNorm = segmentReader4C.norms.get("field1");
assertEquals(3, reader3CCNorm.bytesRef().get());
assertEquals(1, reader4CCNorm.bytesRef().get());
@@ -223,7 +225,7 @@ public class TestIndexReaderCloneNorms e
IndexReader reader5C = (IndexReader) reader4C.clone();
SegmentReader segmentReader5C = getOnlySegmentReader(reader5C);
Norm reader5CCNorm = segmentReader5C.norms.get("field1");
- reader5C.setNorm(5, "field1", Similarity.getDefault().encodeNormValue(0.7f));
+ reader5C.setNorm(5, "field1", sim.encodeNormValue(0.7f));
assertEquals(1, reader5CCNorm.bytesRef().get());
reader5C.close();
@@ -237,7 +239,7 @@ public class TestIndexReaderCloneNorms e
private void createIndex(Random random, Directory dir) throws IOException {
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(
TEST_VERSION_CURRENT, anlzr).setOpenMode(OpenMode.CREATE)
- .setMaxBufferedDocs(5).setSimilarity(similarityOne));
+ .setMaxBufferedDocs(5).setSimilarityProvider(similarityOne));
LogMergePolicy lmp = (LogMergePolicy) iw.getConfig().getMergePolicy();
lmp.setMergeFactor(3);
lmp.setUseCompoundFile(true);
@@ -256,8 +258,9 @@ public class TestIndexReaderCloneNorms e
// System.out.println(" and: for "+k+" from "+newNorm+" to "+origNorm);
modifiedNorms.set(i, Float.valueOf(newNorm));
modifiedNorms.set(k, Float.valueOf(origNorm));
- ir.setNorm(i, "f" + 1, Similarity.getDefault().encodeNormValue(newNorm));
- ir.setNorm(k, "f" + 1, Similarity.getDefault().encodeNormValue(origNorm));
+ Similarity sim = new DefaultSimilarity().get("f" + 1);
+ ir.setNorm(i, "f" + 1, sim.encodeNormValue(newNorm));
+ ir.setNorm(k, "f" + 1, sim.encodeNormValue(origNorm));
// System.out.println("setNorm i: "+i);
// break;
}
@@ -277,7 +280,8 @@ public class TestIndexReaderCloneNorms e
assertEquals("number of norms mismatches", numDocNorms, b.length);
ArrayList<Float> storedNorms = (i == 1 ? modifiedNorms : norms);
for (int j = 0; j < b.length; j++) {
- float norm = Similarity.getDefault().decodeNormValue(b[j]);
+ Similarity sim = new DefaultSimilarity().get(field);
+ float norm = sim.decodeNormValue(b[j]);
float norm1 = storedNorms.get(j).floatValue();
assertEquals("stored norm value of " + field + " for doc " + j + " is "
+ norm + " - a mismatch!", norm, norm1, 0.000001);
@@ -289,7 +293,7 @@ public class TestIndexReaderCloneNorms e
throws IOException {
IndexWriterConfig conf = newIndexWriterConfig(
TEST_VERSION_CURRENT, anlzr).setOpenMode(OpenMode.APPEND)
- .setMaxBufferedDocs(5).setSimilarity(similarityOne);
+ .setMaxBufferedDocs(5).setSimilarityProvider(similarityOne);
LogMergePolicy lmp = (LogMergePolicy) conf.getMergePolicy();
lmp.setMergeFactor(3);
lmp.setUseCompoundFile(compound);
@@ -303,7 +307,7 @@ public class TestIndexReaderCloneNorms e
// create the next document
private Document newDoc() {
Document d = new Document();
- float boost = nextNorm();
+ float boost = nextNorm("anyfield"); // in this test the same similarity is used for all fields so it does not matter what field is passed
for (int i = 0; i < 10; i++) {
Field f = newField("f" + i, "v" + i, Store.NO, Index.NOT_ANALYZED);
f.setBoost(boost);
@@ -313,11 +317,12 @@ public class TestIndexReaderCloneNorms e
}
// return unique norm values that are unchanged by encoding/decoding
- private float nextNorm() {
+ private float nextNorm(String fname) {
float norm = lastNorm + normDelta;
+ Similarity sim = new DefaultSimilarity().get(fname);
do {
- float norm1 = Similarity.getDefault().decodeNormValue(
- Similarity.getDefault().encodeNormValue(norm));
+ float norm1 = sim.decodeNormValue(
+ sim.encodeNormValue(norm));
if (norm1 > lastNorm) {
// System.out.println(norm1+" > "+lastNorm);
norm = norm1;
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexReaderReopen.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexReaderReopen.java?rev=1062927&r1=1062926&r2=1062927&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexReaderReopen.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexReaderReopen.java Mon Jan 24 19:13:31 2011
@@ -35,9 +35,11 @@ import org.apache.lucene.document.Field.
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
+import org.apache.lucene.search.DefaultSimilarity;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Similarity;
+import org.apache.lucene.search.SimilarityProvider;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.Directory;
@@ -615,8 +617,9 @@ public class TestIndexReaderReopen exten
IndexReader reader2 = reader1.reopen();
modifier = IndexReader.open(dir1, false);
- modifier.setNorm(1, "field1", Similarity.getDefault().encodeNormValue(50f));
- modifier.setNorm(1, "field2", Similarity.getDefault().encodeNormValue(50f));
+ SimilarityProvider sim = new DefaultSimilarity();
+ modifier.setNorm(1, "field1", sim.get("field1").encodeNormValue(50f));
+ modifier.setNorm(1, "field2", sim.get("field2").encodeNormValue(50f));
modifier.close();
IndexReader reader3 = reader2.reopen();
@@ -709,7 +712,8 @@ public class TestIndexReaderReopen exten
protected void modifyIndex(int i) throws IOException {
if (i % 3 == 0) {
IndexReader modifier = IndexReader.open(dir, false);
- modifier.setNorm(i, "field1", Similarity.getDefault().encodeNormValue(50f));
+ Similarity sim = new DefaultSimilarity().get("field1");
+ modifier.setNorm(i, "field1", sim.encodeNormValue(50f));
modifier.close();
} else if (i % 3 == 1) {
IndexReader modifier = IndexReader.open(dir, false);
@@ -989,9 +993,10 @@ public class TestIndexReaderReopen exten
}
case 1: {
IndexReader reader = IndexReader.open(dir, false);
- reader.setNorm(4, "field1", Similarity.getDefault().encodeNormValue(123f));
- reader.setNorm(44, "field2", Similarity.getDefault().encodeNormValue(222f));
- reader.setNorm(44, "field4", Similarity.getDefault().encodeNormValue(22f));
+ SimilarityProvider sim = new DefaultSimilarity();
+ reader.setNorm(4, "field1", sim.get("field1").encodeNormValue(123f));
+ reader.setNorm(44, "field2", sim.get("field2").encodeNormValue(222f));
+ reader.setNorm(44, "field4", sim.get("field4").encodeNormValue(22f));
reader.close();
break;
}
@@ -1012,8 +1017,9 @@ public class TestIndexReaderReopen exten
}
case 4: {
IndexReader reader = IndexReader.open(dir, false);
- reader.setNorm(5, "field1", Similarity.getDefault().encodeNormValue(123f));
- reader.setNorm(55, "field2", Similarity.getDefault().encodeNormValue(222f));
+ SimilarityProvider sim = new DefaultSimilarity();
+ reader.setNorm(5, "field1", sim.get("field1").encodeNormValue(123f));
+ reader.setNorm(55, "field2", sim.get("field2").encodeNormValue(222f));
reader.close();
break;
}
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexWriterConfig.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexWriterConfig.java?rev=1062927&r1=1062926&r2=1062927&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexWriterConfig.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexWriterConfig.java Mon Jan 24 19:13:31 2011
@@ -27,7 +27,7 @@ import org.apache.lucene.analysis.MockAn
import org.apache.lucene.index.DocumentsWriter.IndexingChain;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.search.DefaultSimilarity;
-import org.apache.lucene.search.Similarity;
+import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.util.LuceneTestCase;
import org.junit.Test;
@@ -55,7 +55,8 @@ public class TestIndexWriterConfig exten
assertEquals(KeepOnlyLastCommitDeletionPolicy.class, conf.getIndexDeletionPolicy().getClass());
assertEquals(ConcurrentMergeScheduler.class, conf.getMergeScheduler().getClass());
assertEquals(OpenMode.CREATE_OR_APPEND, conf.getOpenMode());
- assertTrue(Similarity.getDefault() == conf.getSimilarity());
+ // we don't need to assert this, it should be unspecified
+ assertTrue(IndexSearcher.getDefaultSimilarityProvider() == conf.getSimilarityProvider());
assertEquals(IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL, conf.getTermIndexInterval());
assertEquals(IndexWriterConfig.getDefaultWriteLockTimeout(), conf.getWriteLockTimeout());
assertEquals(IndexWriterConfig.WRITE_LOCK_TIMEOUT, IndexWriterConfig.getDefaultWriteLockTimeout());
@@ -77,7 +78,7 @@ public class TestIndexWriterConfig exten
getters.add("getMaxFieldLength");
getters.add("getMergeScheduler");
getters.add("getOpenMode");
- getters.add("getSimilarity");
+ getters.add("getSimilarityProvider");
getters.add("getTermIndexInterval");
getters.add("getWriteLockTimeout");
getters.add("getDefaultWriteLockTimeout");
@@ -173,12 +174,13 @@ public class TestIndexWriterConfig exten
conf.setMergeScheduler(null);
assertEquals(ConcurrentMergeScheduler.class, conf.getMergeScheduler().getClass());
- // Test Similarity
- assertTrue(Similarity.getDefault() == conf.getSimilarity());
- conf.setSimilarity(new MySimilarity());
- assertEquals(MySimilarity.class, conf.getSimilarity().getClass());
- conf.setSimilarity(null);
- assertTrue(Similarity.getDefault() == conf.getSimilarity());
+ // Test Similarity:
+ // we shouldnt assert what the default is, just that its not null.
+ assertTrue(IndexSearcher.getDefaultSimilarityProvider() == conf.getSimilarityProvider());
+ conf.setSimilarityProvider(new MySimilarity());
+ assertEquals(MySimilarity.class, conf.getSimilarityProvider().getClass());
+ conf.setSimilarityProvider(null);
+ assertTrue(IndexSearcher.getDefaultSimilarityProvider() == conf.getSimilarityProvider());
// Test IndexingChain
assertTrue(DocumentsWriter.defaultIndexingChain == conf.getIndexingChain());
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestMaxTermFrequency.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestMaxTermFrequency.java?rev=1062927&r1=1062926&r2=1062927&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestMaxTermFrequency.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestMaxTermFrequency.java Mon Jan 24 19:13:31 2011
@@ -46,7 +46,7 @@ public class TestMaxTermFrequency extend
dir = newDirectory();
IndexWriterConfig config = newIndexWriterConfig(TEST_VERSION_CURRENT,
new MockAnalyzer(MockTokenizer.SIMPLE, true));
- config.setSimilarity(new TestSimilarity());
+ config.setSimilarityProvider(new TestSimilarity());
RandomIndexWriter writer = new RandomIndexWriter(random, dir, config);
Document doc = new Document();
Field foo = newField("foo", "", Field.Store.NO, Field.Index.ANALYZED);
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestNorms.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestNorms.java?rev=1062927&r1=1062926&r2=1062927&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestNorms.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestNorms.java Mon Jan 24 19:13:31 2011
@@ -30,6 +30,7 @@ import org.apache.lucene.document.Field.
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.search.DefaultSimilarity;
import org.apache.lucene.search.Similarity;
+import org.apache.lucene.search.SimilarityProvider;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
@@ -49,7 +50,7 @@ public class TestNorms extends LuceneTes
private static final int NUM_FIELDS = 10;
- private Similarity similarityOne;
+ private SimilarityProvider similarityOne;
private Analyzer anlzr;
private int numDocNorms;
private ArrayList<Float> norms;
@@ -151,7 +152,7 @@ public class TestNorms extends LuceneTes
private void createIndex(Random random, Directory dir) throws IOException {
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(
TEST_VERSION_CURRENT, anlzr).setOpenMode(OpenMode.CREATE)
- .setMaxBufferedDocs(5).setSimilarity(similarityOne));
+ .setMaxBufferedDocs(5).setSimilarityProvider(similarityOne));
LogMergePolicy lmp = (LogMergePolicy) iw.getConfig().getMergePolicy();
lmp.setMergeFactor(3);
lmp.setUseCompoundFile(true);
@@ -169,8 +170,9 @@ public class TestNorms extends LuceneTes
//System.out.println(" and: for "+k+" from "+newNorm+" to "+origNorm);
modifiedNorms.set(i, Float.valueOf(newNorm));
modifiedNorms.set(k, Float.valueOf(origNorm));
- ir.setNorm(i, "f"+1, Similarity.getDefault().encodeNormValue(newNorm));
- ir.setNorm(k, "f"+1, Similarity.getDefault().encodeNormValue(origNorm));
+ Similarity sim = new DefaultSimilarity().get("f"+1);
+ ir.setNorm(i, "f"+1, sim.encodeNormValue(newNorm));
+ ir.setNorm(k, "f"+1, sim.encodeNormValue(origNorm));
}
ir.close();
}
@@ -184,7 +186,7 @@ public class TestNorms extends LuceneTes
assertEquals("number of norms mismatches",numDocNorms,b.length);
ArrayList<Float> storedNorms = (i==1 ? modifiedNorms : norms);
for (int j = 0; j < b.length; j++) {
- float norm = similarityOne.decodeNormValue(b[j]);
+ float norm = similarityOne.get(field).decodeNormValue(b[j]);
float norm1 = storedNorms.get(j).floatValue();
assertEquals("stored norm value of "+field+" for doc "+j+" is "+norm+" - a mismatch!", norm, norm1, 0.000001);
}
@@ -195,7 +197,7 @@ public class TestNorms extends LuceneTes
private void addDocs(Random random, Directory dir, int ndocs, boolean compound) throws IOException {
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(
TEST_VERSION_CURRENT, anlzr).setOpenMode(OpenMode.APPEND)
- .setMaxBufferedDocs(5).setSimilarity(similarityOne));
+ .setMaxBufferedDocs(5).setSimilarityProvider(similarityOne));
LogMergePolicy lmp = (LogMergePolicy) iw.getConfig().getMergePolicy();
lmp.setMergeFactor(3);
lmp.setUseCompoundFile(compound);
@@ -208,7 +210,7 @@ public class TestNorms extends LuceneTes
// create the next document
private Document newDoc() {
Document d = new Document();
- float boost = nextNorm();
+ float boost = nextNorm("anyfield"); // in this test the same similarity is used for all fields so it does not matter what field is passed
for (int i = 0; i < 10; i++) {
Field f = newField("f"+i,"v"+i,Store.NO,Index.NOT_ANALYZED);
f.setBoost(boost);
@@ -218,10 +220,11 @@ public class TestNorms extends LuceneTes
}
// return unique norm values that are unchanged by encoding/decoding
- private float nextNorm() {
+ private float nextNorm(String fname) {
float norm = lastNorm + normDelta;
+ Similarity similarity = similarityOne.get(fname);
do {
- float norm1 = similarityOne.decodeNormValue(similarityOne.encodeNormValue(norm));
+ float norm1 = similarity.decodeNormValue(similarity.encodeNormValue(norm));
if (norm1 > lastNorm) {
//System.out.println(norm1+" > "+lastNorm);
norm = norm1;
@@ -258,7 +261,7 @@ public class TestNorms extends LuceneTes
public void testCustomEncoder() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig config = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer());
- config.setSimilarity(new CustomNormEncodingSimilarity());
+ config.setSimilarityProvider(new CustomNormEncodingSimilarity());
RandomIndexWriter writer = new RandomIndexWriter(random, dir, config);
Document doc = new Document();
Field foo = newField("foo", "", Field.Store.NO, Field.Index.ANALYZED);