You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2011/01/14 12:11:18 UTC
svn commit: r1058944 - in /lucene/dev/branches/branch_3x: ./ lucene/
lucene/src/java/org/apache/lucene/index/
lucene/src/test/org/apache/lucene/index/ solr/
Author: rmuir
Date: Fri Jan 14 11:11:17 2011
New Revision: 1058944
URL: http://svn.apache.org/viewvc?rev=1058944&view=rev
Log:
LUCENE-2864: add FieldInvertState.getMaxTermFrequency
Added:
lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/index/TestMaxTermFrequency.java
- copied, changed from r1058939, lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestMaxTermFrequency.java
Modified:
lucene/dev/branches/branch_3x/ (props changed)
lucene/dev/branches/branch_3x/lucene/ (props changed)
lucene/dev/branches/branch_3x/lucene/CHANGES.txt
lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/FieldInvertState.java
lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java
lucene/dev/branches/branch_3x/solr/ (props changed)
Modified: lucene/dev/branches/branch_3x/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/CHANGES.txt?rev=1058944&r1=1058943&r2=1058944&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/branch_3x/lucene/CHANGES.txt Fri Jan 14 11:11:17 2011
@@ -427,6 +427,10 @@ New features
is no longer needed and discouraged for that use case. Directly wrapping
Query improves performance, as out-of-order collection is now supported.
(Uwe Schindler)
+
+* LUCENE-2864: Add getMaxTermFrequency (maximum within-document TF) to
+ FieldInvertState so that it can be used in Similarity.computeNorm.
+ (Robert Muir)
Optimizations
Modified: lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/FieldInvertState.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/FieldInvertState.java?rev=1058944&r1=1058943&r2=1058944&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/FieldInvertState.java (original)
+++ lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/FieldInvertState.java Fri Jan 14 11:11:17 2011
@@ -30,6 +30,7 @@ public final class FieldInvertState {
int length;
int numOverlap;
int offset;
+ int maxTermFrequency;
float boost;
AttributeSource attributeSource;
@@ -53,6 +54,7 @@ public final class FieldInvertState {
length = 0;
numOverlap = 0;
offset = 0;
+ maxTermFrequency = 0;
boost = docBoost;
attributeSource = null;
}
@@ -110,6 +112,15 @@ public final class FieldInvertState {
public void setBoost(float boost) {
this.boost = boost;
}
+
+ /**
+ * Get the maximum term-frequency encountered for any term in the field. A
+ * field containing "the quick brown fox jumps over the lazy dog" would have
+ * a value of 2, because "the" appears twice.
+ */
+ public int getMaxTermFrequency() {
+ return maxTermFrequency;
+ }
public AttributeSource getAttributeSource() {
return attributeSource;
Modified: lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java?rev=1058944&r1=1058943&r2=1058944&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java (original)
+++ lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java Fri Jan 14 11:11:17 2011
@@ -125,6 +125,7 @@ final class FreqProxTermsWriterPerField
postings.docFreqs[termID] = 1;
writeProx(termID, fieldState.position);
}
+ fieldState.maxTermFrequency = Math.max(1, fieldState.maxTermFrequency);
}
@Override
@@ -158,11 +159,12 @@ final class FreqProxTermsWriterPerField
termsHashPerField.writeVInt(0, postings.docFreqs[termID]);
}
postings.docFreqs[termID] = 1;
+ fieldState.maxTermFrequency = Math.max(1, fieldState.maxTermFrequency);
postings.lastDocCodes[termID] = (docState.docID - postings.lastDocIDs[termID]) << 1;
postings.lastDocIDs[termID] = docState.docID;
writeProx(termID, fieldState.position);
} else {
- postings.docFreqs[termID]++;
+ fieldState.maxTermFrequency = Math.max(fieldState.maxTermFrequency, ++postings.docFreqs[termID]);
writeProx(termID, fieldState.position-postings.lastPositions[termID]);
}
}
Copied: lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/index/TestMaxTermFrequency.java (from r1058939, lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestMaxTermFrequency.java)
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/index/TestMaxTermFrequency.java?p2=lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/index/TestMaxTermFrequency.java&p1=lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestMaxTermFrequency.java&r1=1058939&r2=1058944&rev=1058944&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestMaxTermFrequency.java (original)
+++ lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/index/TestMaxTermFrequency.java Fri Jan 14 11:11:17 2011
@@ -23,7 +23,6 @@ import java.util.Collections;
import java.util.List;
import org.apache.lucene.analysis.MockAnalyzer;
-import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.search.DefaultSimilarity;
@@ -45,7 +44,7 @@ public class TestMaxTermFrequency extend
super.setUp();
dir = newDirectory();
IndexWriterConfig config = newIndexWriterConfig(TEST_VERSION_CURRENT,
- new MockAnalyzer(MockTokenizer.SIMPLE, true));
+ new MockAnalyzer(MockAnalyzer.SIMPLE, true));
config.setSimilarity(new TestSimilarity());
RandomIndexWriter writer = new RandomIndexWriter(random, dir, config);
Document doc = new Document();
@@ -67,7 +66,7 @@ public class TestMaxTermFrequency extend
}
public void test() throws Exception {
- byte fooNorms[] = MultiNorms.norms(reader, "foo");
+ byte fooNorms[] = reader.norms("foo");
for (int i = 0; i < reader.maxDoc(); i++)
assertEquals(expected.get(i).intValue(), fooNorms[i] & 0xff);
}