You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2011/01/14 12:11:18 UTC

svn commit: r1058944 - in /lucene/dev/branches/branch_3x: ./ lucene/ lucene/src/java/org/apache/lucene/index/ lucene/src/test/org/apache/lucene/index/ solr/

Author: rmuir
Date: Fri Jan 14 11:11:17 2011
New Revision: 1058944

URL: http://svn.apache.org/viewvc?rev=1058944&view=rev
Log:
LUCENE-2864: add FieldInvertState.getMaxTermFrequency

Added:
    lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/index/TestMaxTermFrequency.java
      - copied, changed from r1058939, lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestMaxTermFrequency.java
Modified:
    lucene/dev/branches/branch_3x/   (props changed)
    lucene/dev/branches/branch_3x/lucene/   (props changed)
    lucene/dev/branches/branch_3x/lucene/CHANGES.txt
    lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/FieldInvertState.java
    lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java
    lucene/dev/branches/branch_3x/solr/   (props changed)

Modified: lucene/dev/branches/branch_3x/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/CHANGES.txt?rev=1058944&r1=1058943&r2=1058944&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/branch_3x/lucene/CHANGES.txt Fri Jan 14 11:11:17 2011
@@ -427,6 +427,10 @@ New features
   is no longer needed and discouraged for that use case. Directly wrapping
   Query improves performance, as out-of-order collection is now supported.
   (Uwe Schindler)
+
+* LUCENE-2864: Add getMaxTermFrequency (maximum within-document TF) to 
+  FieldInvertState so that it can be used in Similarity.computeNorm.
+  (Robert Muir)
   
 Optimizations
 

Modified: lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/FieldInvertState.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/FieldInvertState.java?rev=1058944&r1=1058943&r2=1058944&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/FieldInvertState.java (original)
+++ lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/FieldInvertState.java Fri Jan 14 11:11:17 2011
@@ -30,6 +30,7 @@ public final class FieldInvertState {
   int length;
   int numOverlap;
   int offset;
+  int maxTermFrequency;
   float boost;
   AttributeSource attributeSource;
 
@@ -53,6 +54,7 @@ public final class FieldInvertState {
     length = 0;
     numOverlap = 0;
     offset = 0;
+    maxTermFrequency = 0;
     boost = docBoost;
     attributeSource = null;
   }
@@ -110,6 +112,15 @@ public final class FieldInvertState {
   public void setBoost(float boost) {
     this.boost = boost;
   }
+
+  /**
+   * Get the maximum term-frequency encountered for any term in the field.  A
+   * field containing "the quick brown fox jumps over the lazy dog" would have
+   * a value of 2, because "the" appears twice.
+   */
+  public int getMaxTermFrequency() {
+    return maxTermFrequency;
+  }
   
   public AttributeSource getAttributeSource() {
     return attributeSource;

Modified: lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java?rev=1058944&r1=1058943&r2=1058944&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java (original)
+++ lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java Fri Jan 14 11:11:17 2011
@@ -125,6 +125,7 @@ final class FreqProxTermsWriterPerField 
       postings.docFreqs[termID] = 1;
       writeProx(termID, fieldState.position);
     }
+    fieldState.maxTermFrequency = Math.max(1, fieldState.maxTermFrequency);
   }
 
   @Override
@@ -158,11 +159,12 @@ final class FreqProxTermsWriterPerField 
           termsHashPerField.writeVInt(0, postings.docFreqs[termID]);
         }
         postings.docFreqs[termID] = 1;
+        fieldState.maxTermFrequency = Math.max(1, fieldState.maxTermFrequency);
         postings.lastDocCodes[termID] = (docState.docID - postings.lastDocIDs[termID]) << 1;
         postings.lastDocIDs[termID] = docState.docID;
         writeProx(termID, fieldState.position);
       } else {
-        postings.docFreqs[termID]++;
+        fieldState.maxTermFrequency = Math.max(fieldState.maxTermFrequency, ++postings.docFreqs[termID]);
         writeProx(termID, fieldState.position-postings.lastPositions[termID]);
       }
     }

Copied: lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/index/TestMaxTermFrequency.java (from r1058939, lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestMaxTermFrequency.java)
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/index/TestMaxTermFrequency.java?p2=lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/index/TestMaxTermFrequency.java&p1=lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestMaxTermFrequency.java&r1=1058939&r2=1058944&rev=1058944&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestMaxTermFrequency.java (original)
+++ lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/index/TestMaxTermFrequency.java Fri Jan 14 11:11:17 2011
@@ -23,7 +23,6 @@ import java.util.Collections;
 import java.util.List;
 
 import org.apache.lucene.analysis.MockAnalyzer;
-import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.search.DefaultSimilarity;
@@ -45,7 +44,7 @@ public class TestMaxTermFrequency extend
     super.setUp();
     dir = newDirectory();
     IndexWriterConfig config = newIndexWriterConfig(TEST_VERSION_CURRENT, 
-        new MockAnalyzer(MockTokenizer.SIMPLE, true));
+        new MockAnalyzer(MockAnalyzer.SIMPLE, true));
     config.setSimilarity(new TestSimilarity());
     RandomIndexWriter writer = new RandomIndexWriter(random, dir, config);
     Document doc = new Document();
@@ -67,7 +66,7 @@ public class TestMaxTermFrequency extend
   }
   
   public void test() throws Exception {
-    byte fooNorms[] = MultiNorms.norms(reader, "foo");
+    byte fooNorms[] = reader.norms("foo");
     for (int i = 0; i < reader.maxDoc(); i++)
       assertEquals(expected.get(i).intValue(), fooNorms[i] & 0xff);
   }