You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2011/01/24 04:33:45 UTC
svn commit: r1062633 - in /lucene/dev/trunk/lucene: CHANGES.txt
src/java/org/apache/lucene/search/MultiPhraseQuery.java
src/test/org/apache/lucene/search/TestMultiPhraseQuery.java
Author: rmuir
Date: Mon Jan 24 03:33:45 2011
New Revision: 1062633
URL: http://svn.apache.org/viewvc?rev=1062633&view=rev
Log:
LUCENE-2879: MultiPhraseQuery summed its own idf instead of Similarity.
Modified:
lucene/dev/trunk/lucene/CHANGES.txt
lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestMultiPhraseQuery.java
Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1062633&r1=1062632&r2=1062633&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Mon Jan 24 03:33:45 2011
@@ -682,6 +682,10 @@ Bug fixes
* LUCENE-2809: Fixed IndexWriter.numDocs to take into account
applied but not yet flushed deletes. (Mike McCandless)
+* LUCENE-2879: MultiPhraseQuery previously calculated its phrase IDF by summing
+ internally, it now calls Similarity.idfExplain(Collection, IndexSearcher).
+ (Robert Muir)
+
New features
* LUCENE-2128: Parallelized fetching document frequencies during weight
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java?rev=1062633&r1=1062632&r2=1062633&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java Mon Jan 24 03:33:45 2011
@@ -25,6 +25,7 @@ import org.apache.lucene.index.IndexRead
import org.apache.lucene.index.Term;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.DocsAndPositionsEnum;
+import org.apache.lucene.search.Explanation.IDFExplanation;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.ToStringUtils;
@@ -131,6 +132,7 @@ public class MultiPhraseQuery extends Qu
private class MultiPhraseWeight extends Weight {
private Similarity similarity;
private float value;
+ private final IDFExplanation idfExp;
private float idf;
private float queryNorm;
private float queryWeight;
@@ -140,12 +142,14 @@ public class MultiPhraseQuery extends Qu
this.similarity = searcher.getSimilarity();
// compute idf
- final int maxDoc = searcher.maxDoc();
+ ArrayList<Term> allTerms = new ArrayList<Term>();
for(final Term[] terms: termArrays) {
for (Term term: terms) {
- idf += this.similarity.idf(searcher.docFreq(term), maxDoc);
+ allTerms.add(term);
}
}
+ idfExp = similarity.idfExplain(allTerms, searcher);
+ idf = idfExp.getIdf();
}
@Override
@@ -238,7 +242,7 @@ public class MultiPhraseQuery extends Qu
ComplexExplanation result = new ComplexExplanation();
result.setDescription("weight("+getQuery()+" in "+doc+"), product of:");
- Explanation idfExpl = new Explanation(idf, "idf("+getQuery()+")");
+ Explanation idfExpl = new Explanation(idf, "idf(" + field + ":" + idfExp.explain() +")");
// explain query weight
Explanation queryExpl = new Explanation();
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestMultiPhraseQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestMultiPhraseQuery.java?rev=1062633&r1=1062632&r2=1062633&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestMultiPhraseQuery.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestMultiPhraseQuery.java Mon Jan 24 03:33:45 2011
@@ -22,6 +22,7 @@ import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.MultiFields;
+import org.apache.lucene.search.Explanation.IDFExplanation;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.document.Document;
@@ -30,6 +31,7 @@ import org.apache.lucene.document.Field;
import org.apache.lucene.util.LuceneTestCase;
import java.io.IOException;
+import java.util.Collection;
import java.util.LinkedList;
/**
@@ -285,4 +287,44 @@ public class TestMultiPhraseQuery extend
new MultiPhraseQuery().toString();
}
+ public void testCustomIDF() throws Exception {
+ Directory indexStore = newDirectory();
+ RandomIndexWriter writer = new RandomIndexWriter(random, indexStore);
+ add("This is a test", "object", writer);
+ add("a note", "note", writer);
+
+ IndexReader reader = writer.getReader();
+ IndexSearcher searcher = new IndexSearcher(reader);
+ searcher.setSimilarity(new DefaultSimilarity() {
+
+ @Override
+ public IDFExplanation idfExplain(Collection<Term> terms,
+ IndexSearcher searcher) throws IOException {
+ return new IDFExplanation() {
+
+ @Override
+ public float getIdf() {
+ return 10f;
+ }
+
+ @Override
+ public String explain() {
+ return "just a test";
+ }
+
+ };
+ }
+ });
+
+ MultiPhraseQuery query = new MultiPhraseQuery();
+ query.add(new Term[] { new Term("body", "this"), new Term("body", "that") });
+ query.add(new Term("body", "is"));
+ Weight weight = query.createWeight(searcher);
+ assertEquals(10f * 10f, weight.sumOfSquaredWeights(), 0.001f);
+
+ writer.close();
+ searcher.close();
+ reader.close();
+ indexStore.close();
+ }
}