You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2012/08/06 17:33:30 UTC
svn commit: r1369865 - in
/lucene/dev/branches/lucene_solr_3_6/lucene/contrib: ./
highlighter/src/java/org/apache/lucene/search/highlight/
Author: rmuir
Date: Mon Aug 6 15:33:30 2012
New Revision: 1369865
URL: http://svn.apache.org/viewvc?rev=1369865&view=rev
Log:
LUCENE-4289: fix highlighter idf inconsistencies/inefficiencies
Modified:
lucene/dev/branches/lucene_solr_3_6/lucene/contrib/CHANGES.txt
lucene/dev/branches/lucene_solr_3_6/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryTermExtractor.java
lucene/dev/branches/lucene_solr_3_6/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java
Modified: lucene/dev/branches/lucene_solr_3_6/lucene/contrib/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_3_6/lucene/contrib/CHANGES.txt?rev=1369865&r1=1369864&r2=1369865&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_3_6/lucene/contrib/CHANGES.txt (original)
+++ lucene/dev/branches/lucene_solr_3_6/lucene/contrib/CHANGES.txt Mon Aug 6 15:33:30 2012
@@ -12,6 +12,9 @@ Bug Fixes
* LUCENE-4269: Deprecate BalancedSegmentMergePolicy (Mike McCandless)
+* LUCENE-4289: Fix minor idf inconsistencies/inefficiencies in highlighter.
+ (Robert Muir)
+
======================= Lucene 3.6.1 ================
Bug Fixes
Modified: lucene/dev/branches/lucene_solr_3_6/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryTermExtractor.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_3_6/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryTermExtractor.java?rev=1369865&r1=1369864&r2=1369865&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_3_6/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryTermExtractor.java (original)
+++ lucene/dev/branches/lucene_solr_3_6/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryTermExtractor.java Mon Aug 6 15:33:30 2012
@@ -61,18 +61,14 @@ public final class QueryTermExtractor
public static final WeightedTerm[] getIdfWeightedTerms(Query query, IndexReader reader, String fieldName)
{
WeightedTerm[] terms=getTerms(query,false, fieldName);
- int totalNumDocs=reader.numDocs();
+ int totalNumDocs=reader.maxDoc();
for (int i = 0; i < terms.length; i++)
{
try
{
int docFreq=reader.docFreq(new Term(fieldName,terms[i].term));
- // docFreq counts deletes
- if(totalNumDocs < docFreq) {
- docFreq = totalNumDocs;
- }
//IDF algorithm taken from DefaultSimilarity class
- float idf=(float)(Math.log((float)totalNumDocs/(double)(docFreq+1)) + 1.0);
+ float idf=(float)(Math.log(totalNumDocs/(double)(docFreq+1)) + 1.0);
terms[i].weight*=idf;
}
catch (IOException e)
Modified: lucene/dev/branches/lucene_solr_3_6/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_3_6/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java?rev=1369865&r1=1369864&r2=1369865&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_3_6/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java (original)
+++ lucene/dev/branches/lucene_solr_3_6/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java Mon Aug 6 15:33:30 2012
@@ -425,7 +425,7 @@ public class WeightedSpanTermExtractor {
Map<String,WeightedSpanTerm> terms = new PositionCheckingMap<String>();
extract(query, terms);
- int totalNumDocs = reader.numDocs();
+ int totalNumDocs = reader.maxDoc();
Set<String> weightedTerms = terms.keySet();
Iterator<String> it = weightedTerms.iterator();
@@ -433,12 +433,8 @@ public class WeightedSpanTermExtractor {
while (it.hasNext()) {
WeightedSpanTerm weightedSpanTerm = terms.get(it.next());
int docFreq = reader.docFreq(new Term(fieldName, weightedSpanTerm.term));
- // docFreq counts deletes
- if(totalNumDocs < docFreq) {
- docFreq = totalNumDocs;
- }
// IDF algorithm taken from DefaultSimilarity class
- float idf = (float) (Math.log((float) totalNumDocs / (double) (docFreq + 1)) + 1.0);
+ float idf = (float) (Math.log(totalNumDocs / (double) (docFreq + 1)) + 1.0);
weightedSpanTerm.weight *= idf;
}
} finally {