You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2012/08/06 17:33:30 UTC

svn commit: r1369865 - in /lucene/dev/branches/lucene_solr_3_6/lucene/contrib: ./ highlighter/src/java/org/apache/lucene/search/highlight/

Author: rmuir
Date: Mon Aug  6 15:33:30 2012
New Revision: 1369865

URL: http://svn.apache.org/viewvc?rev=1369865&view=rev
Log:
LUCENE-4289: fix highlighter idf inconsistencies/inefficiencies

Modified:
    lucene/dev/branches/lucene_solr_3_6/lucene/contrib/CHANGES.txt
    lucene/dev/branches/lucene_solr_3_6/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryTermExtractor.java
    lucene/dev/branches/lucene_solr_3_6/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java

Modified: lucene/dev/branches/lucene_solr_3_6/lucene/contrib/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_3_6/lucene/contrib/CHANGES.txt?rev=1369865&r1=1369864&r2=1369865&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_3_6/lucene/contrib/CHANGES.txt (original)
+++ lucene/dev/branches/lucene_solr_3_6/lucene/contrib/CHANGES.txt Mon Aug  6 15:33:30 2012
@@ -12,6 +12,9 @@ Bug Fixes
 
 * LUCENE-4269: Deprecate BalancedSegmentMergePolicy (Mike McCandless)
 
+* LUCENE-4289: Fix minor idf inconsistencies/inefficiencies in highlighter.
+  (Robert Muir)
+
 ======================= Lucene 3.6.1 ================
 
 Bug Fixes

Modified: lucene/dev/branches/lucene_solr_3_6/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryTermExtractor.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_3_6/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryTermExtractor.java?rev=1369865&r1=1369864&r2=1369865&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_3_6/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryTermExtractor.java (original)
+++ lucene/dev/branches/lucene_solr_3_6/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryTermExtractor.java Mon Aug  6 15:33:30 2012
@@ -61,18 +61,14 @@ public final class QueryTermExtractor
 	public static final WeightedTerm[] getIdfWeightedTerms(Query query, IndexReader reader, String fieldName) 
 	{
 	    WeightedTerm[] terms=getTerms(query,false, fieldName);
-	    int totalNumDocs=reader.numDocs();
+	    int totalNumDocs=reader.maxDoc();
 	    for (int i = 0; i < terms.length; i++)
         {
 	        try
             {
                 int docFreq=reader.docFreq(new Term(fieldName,terms[i].term));
-                // docFreq counts deletes
-                if(totalNumDocs < docFreq) {
-                  docFreq = totalNumDocs;
-                }
                 //IDF algorithm taken from DefaultSimilarity class
-                float idf=(float)(Math.log((float)totalNumDocs/(double)(docFreq+1)) + 1.0);
+                float idf=(float)(Math.log(totalNumDocs/(double)(docFreq+1)) + 1.0);
                 terms[i].weight*=idf;
             } 
 	        catch (IOException e)

Modified: lucene/dev/branches/lucene_solr_3_6/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_3_6/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java?rev=1369865&r1=1369864&r2=1369865&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_3_6/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java (original)
+++ lucene/dev/branches/lucene_solr_3_6/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java Mon Aug  6 15:33:30 2012
@@ -425,7 +425,7 @@ public class WeightedSpanTermExtractor {
     Map<String,WeightedSpanTerm> terms = new PositionCheckingMap<String>();
     extract(query, terms);
 
-    int totalNumDocs = reader.numDocs();
+    int totalNumDocs = reader.maxDoc();
     Set<String> weightedTerms = terms.keySet();
     Iterator<String> it = weightedTerms.iterator();
 
@@ -433,12 +433,8 @@ public class WeightedSpanTermExtractor {
       while (it.hasNext()) {
         WeightedSpanTerm weightedSpanTerm = terms.get(it.next());
         int docFreq = reader.docFreq(new Term(fieldName, weightedSpanTerm.term));
-        // docFreq counts deletes
-        if(totalNumDocs < docFreq) {
-          docFreq = totalNumDocs;
-        }
         // IDF algorithm taken from DefaultSimilarity class
-        float idf = (float) (Math.log((float) totalNumDocs / (double) (docFreq + 1)) + 1.0);
+        float idf = (float) (Math.log(totalNumDocs / (double) (docFreq + 1)) + 1.0);
         weightedSpanTerm.weight *= idf;
       }
     } finally {