You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by si...@apache.org on 2011/02/09 10:36:03 UTC
svn commit: r1068809 [3/36] - in /lucene/dev/branches/docvalues: ./
dev-tools/eclipse/ dev-tools/idea/.idea/ dev-tools/idea/.idea/copyright/
dev-tools/idea/lucene/ dev-tools/idea/lucene/contrib/ant/
dev-tools/idea/lucene/contrib/queryparser/ dev-tools/...
Modified: lucene/dev/branches/docvalues/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedDocsAndPositionsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedDocsAndPositionsEnum.java?rev=1068809&r1=1068808&r2=1068809&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedDocsAndPositionsEnum.java (original)
+++ lucene/dev/branches/docvalues/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedDocsAndPositionsEnum.java Wed Feb 9 09:35:27 2011
@@ -82,14 +82,17 @@ public class InstantiatedDocsAndPosition
return currentDoc.getTermPositions().length;
}
+ @Override
public int nextPosition() {
return currentDoc.getTermPositions()[++posUpto];
}
+ @Override
public boolean hasPayload() {
return currentDoc.getPayloads()[posUpto] != null;
}
+ @Override
public BytesRef getPayload() {
payload.bytes = currentDoc.getPayloads()[posUpto];
payload.length = payload.bytes.length;
Modified: lucene/dev/branches/docvalues/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndex.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndex.java?rev=1068809&r1=1068808&r2=1068809&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndex.java (original)
+++ lucene/dev/branches/docvalues/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndex.java Wed Feb 9 09:35:27 2011
@@ -238,6 +238,10 @@ public class InstantiatedIndex
while((text = termsEnum.next()) != null) {
String termText = text.utf8ToString();
InstantiatedTerm instantiatedTerm = new InstantiatedTerm(field, termText);
+ final long totalTermFreq = termsEnum.totalTermFreq();
+ if (totalTermFreq != -1) {
+ instantiatedTerm.addPositionsCount(totalTermFreq);
+ }
getTermsByFieldAndText().get(field).put(termText, instantiatedTerm);
instantiatedTerm.setTermIndex(terms.size());
terms.add(instantiatedTerm);
Modified: lucene/dev/branches/docvalues/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexReader.java?rev=1068809&r1=1068808&r2=1068809&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexReader.java (original)
+++ lucene/dev/branches/docvalues/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexReader.java Wed Feb 9 09:35:27 2011
@@ -19,6 +19,7 @@ package org.apache.lucene.store.instanti
import java.io.IOException;
import java.util.Arrays;
import java.util.Collection;
+import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
@@ -32,6 +33,7 @@ import org.apache.lucene.document.Docume
import org.apache.lucene.document.FieldSelector;
import org.apache.lucene.index.*;
import org.apache.lucene.index.values.DocValues;
+import org.apache.lucene.index.IndexReader.ReaderContext;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BitVector;
import org.apache.lucene.util.BytesRef;
@@ -46,10 +48,13 @@ import org.apache.lucene.util.Bits;
public class InstantiatedIndexReader extends IndexReader {
private final InstantiatedIndex index;
+ private ReaderContext context = new AtomicReaderContext(this);
+
public InstantiatedIndexReader(InstantiatedIndex index) {
super();
this.index = index;
+ readerFinishedListeners = Collections.synchronizedSet(new HashSet<ReaderFinishedListener>());
}
/**
@@ -332,15 +337,6 @@ public class InstantiatedIndexReader ext
}
@Override
- public void norms(String field, byte[] bytes, int offset) throws IOException {
- byte[] norms = getIndex().getNormsByFieldNameAndDocumentNumber().get(field);
- if (norms == null) {
- return;
- }
- System.arraycopy(norms, 0, bytes, offset, norms.length);
- }
-
- @Override
protected void doSetNorm(int doc, String field, byte value) throws IOException {
if (uncommittedNormsByFieldNameAndDocumentNumber == null) {
uncommittedNormsByFieldNameAndDocumentNumber = new HashMap<String,List<NormUpdate>>(getIndex().getNormsByFieldNameAndDocumentNumber().size());
@@ -410,12 +406,22 @@ public class InstantiatedIndexReader ext
if (i < 0) {
i = -i - 1;
}
- if (i >= orderedTerms.length || !orderedTerms[i].field().equals(field)) {
+ if (i >= orderedTerms.length || orderedTerms[i].field() != field) {
// field does not exist
return null;
}
final int startLoc = i;
+ // TODO: heavy to do this here; would be better to
+ // do it up front & cache
+ long sum = 0;
+ int upto = i;
+ while(upto < orderedTerms.length && orderedTerms[i].field() == field) {
+ sum += orderedTerms[i].getTotalTermFreq();
+ upto++;
+ }
+ final long sumTotalTermFreq = sum;
+
return new Terms() {
@Override
public TermsEnum iterator() {
@@ -423,6 +429,11 @@ public class InstantiatedIndexReader ext
}
@Override
+ public long getSumTotalTermFreq() {
+ return sumTotalTermFreq;
+ }
+
+ @Override
public Comparator<BytesRef> getComparator() {
return BytesRef.getUTF8SortedAsUnicodeComparator();
}
@@ -435,6 +446,11 @@ public class InstantiatedIndexReader ext
}
};
}
+
+ @Override
+ public ReaderContext getTopReaderContext() {
+ return context;
+ }
@Override
public TermFreqVector[] getTermFreqVectors(int docNumber) throws IOException {
Modified: lucene/dev/branches/docvalues/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexWriter.java?rev=1068809&r1=1068808&r2=1068809&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexWriter.java (original)
+++ lucene/dev/branches/docvalues/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexWriter.java Wed Feb 9 09:35:27 2011
@@ -38,10 +38,12 @@ import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Fieldable;
+import org.apache.lucene.index.FieldInvertState;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermVectorOffsetInfo;
-import org.apache.lucene.search.Similarity;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.SimilarityProvider;
import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.CollectionUtil;
@@ -66,7 +68,7 @@ public class InstantiatedIndexWriter imp
private final InstantiatedIndex index;
private final Analyzer analyzer;
- private Similarity similarity = Similarity.getDefault(); // how to normalize;
+ private SimilarityProvider similarityProvider = IndexSearcher.getDefaultSimilarityProvider(); // how to normalize;
private transient Set<String> fieldNameBuffer;
/**
@@ -112,14 +114,14 @@ public class InstantiatedIndexWriter imp
* MAddDocs_20000 - 7 4000 100 false - - 1 - - 20000 - - 535,8 - - 37,33 - 309 680 640 - 501 968 896
* </pre>
*
- * @see org.apache.lucene.index.IndexWriter#setMergeFactor(int)
+ * @see org.apache.lucene.index.LogMergePolicy#setMergeFactor(int)
*/
public void setMergeFactor(int mergeFactor) {
this.mergeFactor = mergeFactor;
}
/**
- * @see org.apache.lucene.index.IndexWriter#getMergeFactor()
+ * @see org.apache.lucene.index.LogMergePolicy#getMergeFactor()
*/
public int getMergeFactor() {
return mergeFactor;
@@ -200,9 +202,9 @@ public class InstantiatedIndexWriter imp
byte[] oldNorms = index.getNormsByFieldNameAndDocumentNumber().get(field);
if (oldNorms != null) {
System.arraycopy(oldNorms, 0, norms, 0, oldNorms.length);
- Arrays.fill(norms, oldNorms.length, norms.length, similarity.encodeNormValue(1.0f));
+ Arrays.fill(norms, oldNorms.length, norms.length, (byte) 0);
} else {
- Arrays.fill(norms, 0, norms.length, similarity.encodeNormValue(1.0f));
+ Arrays.fill(norms, 0, norms.length, (byte) 0);
}
normsByFieldNameAndDocumentNumber.put(field, norms);
fieldNames.remove(field);
@@ -210,7 +212,7 @@ public class InstantiatedIndexWriter imp
for (String field : fieldNames) {
//System.out.println(field);
byte[] norms = new byte[index.getDocumentsByNumber().length + termDocumentInformationFactoryByDocument.size()];
- Arrays.fill(norms, 0, norms.length, similarity.encodeNormValue(1.0f));
+ Arrays.fill(norms, 0, norms.length, (byte) 0);
normsByFieldNameAndDocumentNumber.put(field, norms);
}
fieldNames.clear();
@@ -235,10 +237,12 @@ public class InstantiatedIndexWriter imp
termsInDocument += eFieldTermDocInfoFactoriesByTermText.getValue().size();
if (eFieldTermDocInfoFactoriesByTermText.getKey().indexed && !eFieldTermDocInfoFactoriesByTermText.getKey().omitNorms) {
- float norm = eFieldTermDocInfoFactoriesByTermText.getKey().boost;
- norm *= document.getDocument().getBoost();
- norm *= similarity.lengthNorm(eFieldTermDocInfoFactoriesByTermText.getKey().fieldName, eFieldTermDocInfoFactoriesByTermText.getKey().fieldLength);
- normsByFieldNameAndDocumentNumber.get(eFieldTermDocInfoFactoriesByTermText.getKey().fieldName)[document.getDocumentNumber()] = similarity.encodeNormValue(norm);
+ final String fieldName = eFieldTermDocInfoFactoriesByTermText.getKey().fieldName;
+ final FieldInvertState invertState = new FieldInvertState();
+ invertState.setBoost(eFieldTermDocInfoFactoriesByTermText.getKey().boost * document.getDocument().getBoost());
+ invertState.setLength(eFieldTermDocInfoFactoriesByTermText.getKey().fieldLength);
+ final float norm = similarityProvider.get(fieldName).computeNorm(fieldName, invertState);
+ normsByFieldNameAndDocumentNumber.get(fieldName)[document.getDocumentNumber()] = similarityProvider.get(fieldName).encodeNormValue(norm);
} else {
System.currentTimeMillis();
}
@@ -313,6 +317,7 @@ public class InstantiatedIndexWriter imp
}
associatedDocuments[associatedDocuments.length - 1] = info;
term.setAssociatedDocuments(associatedDocuments);
+ term.addPositionsCount(positions.length);
// todo optimize, only if term vector?
informationByTermOfCurrentDocument.put(term, info);
@@ -656,12 +661,12 @@ public class InstantiatedIndexWriter imp
addDocument(doc, analyzer);
}
- public Similarity getSimilarity() {
- return similarity;
+ public SimilarityProvider getSimilarityProvider() {
+ return similarityProvider;
}
- public void setSimilarity(Similarity similarity) {
- this.similarity = similarity;
+ public void setSimilarityProvider(SimilarityProvider similarityProvider) {
+ this.similarityProvider = similarityProvider;
}
public Analyzer getAnalyzer() {
Modified: lucene/dev/branches/docvalues/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTerm.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTerm.java?rev=1068809&r1=1068808&r2=1068809&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTerm.java (original)
+++ lucene/dev/branches/docvalues/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTerm.java Wed Feb 9 09:35:27 2011
@@ -45,6 +45,8 @@ public class InstantiatedTerm
private Term term;
+ private long totalTermFreq;
+
/**
* index of term in InstantiatedIndex
* @see org.apache.lucene.store.instantiated.InstantiatedIndex#getOrderedTerms() */
@@ -92,6 +94,14 @@ public class InstantiatedTerm
this.associatedDocuments = associatedDocuments;
}
+ void addPositionsCount(long count) {
+ totalTermFreq += count;
+ }
+
+ public long getTotalTermFreq() {
+ return totalTermFreq;
+ }
+
/**
* Finds index to the first beyond the current whose document number is
* greater than or equal to <i>target</i>, -1 if there is no such element.
Modified: lucene/dev/branches/docvalues/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermsEnum.java?rev=1068809&r1=1068808&r2=1068809&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermsEnum.java (original)
+++ lucene/dev/branches/docvalues/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermsEnum.java Wed Feb 9 09:35:27 2011
@@ -18,10 +18,14 @@ package org.apache.lucene.store.instanti
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.index.OrdTermState;
import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermState;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.DocsAndPositionsEnum;
+
+import java.io.IOException;
import java.util.Arrays;
import java.util.Comparator;
@@ -91,10 +95,6 @@ public class InstantiatedTermsEnum exten
}
@Override
- public void cacheCurrentTerm() {
- }
-
- @Override
public BytesRef term() {
return br;
}
@@ -110,6 +110,12 @@ public class InstantiatedTermsEnum exten
}
@Override
+ public long totalTermFreq() {
+ final long v = terms[upto].getTotalTermFreq();
+ return v == 0 ? -1 : v;
+ }
+
+ @Override
public DocsEnum docs(Bits skipDocs, DocsEnum reuse) {
if (reuse == null || !(reuse instanceof InstantiatedDocsEnum)) {
reuse = new InstantiatedDocsEnum();
@@ -129,5 +135,18 @@ public class InstantiatedTermsEnum exten
public Comparator<BytesRef> getComparator() {
return BytesRef.getUTF8SortedAsUnicodeComparator();
}
+
+ @Override
+ public TermState termState() throws IOException {
+ final OrdTermState state = new OrdTermState();
+ state.ord = upto - start;
+ return state;
+ }
+
+ @Override
+ public void seek(BytesRef term, TermState state) throws IOException {
+ assert state != null && state instanceof OrdTermState;
+ seek(((OrdTermState)state).ord); // just use the ord for simplicity
+ }
}
Modified: lucene/dev/branches/docvalues/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestEmptyIndex.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestEmptyIndex.java?rev=1068809&r1=1068808&r2=1068809&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestEmptyIndex.java (original)
+++ lucene/dev/branches/docvalues/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestEmptyIndex.java Wed Feb 9 09:35:27 2011
@@ -37,7 +37,7 @@ public class TestEmptyIndex extends Luce
InstantiatedIndex ii = new InstantiatedIndex();
IndexReader r = new InstantiatedIndexReader(ii);
- IndexSearcher s = new IndexSearcher(r);
+ IndexSearcher s = newSearcher(r);
TopDocs td = s.search(new TermQuery(new Term("foo", "bar")), 1);
@@ -71,12 +71,6 @@ public class TestEmptyIndex extends Luce
byte[] norms = MultiNorms.norms(r, "foo");
if (norms != null) {
assertEquals(0, norms.length);
- norms = new byte[10];
- Arrays.fill(norms, (byte)10);
- r.norms("foo", norms, 10);
- for (byte b : norms) {
- assertEquals((byte)10, b);
- }
}
}
Modified: lucene/dev/branches/docvalues/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java?rev=1068809&r1=1068808&r2=1068809&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java (original)
+++ lucene/dev/branches/docvalues/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java Wed Feb 9 09:35:27 2011
@@ -65,7 +65,8 @@ public class TestIndicesEquals extends L
// create dir data
IndexWriter indexWriter = new IndexWriter(dir, newIndexWriterConfig(
- TEST_VERSION_CURRENT, new MockAnalyzer()));
+ TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newInOrderLogMergePolicy()));
+
for (int i = 0; i < 20; i++) {
Document document = new Document();
assembleDocument(document, i);
@@ -90,7 +91,11 @@ public class TestIndicesEquals extends L
// create dir data
IndexWriter indexWriter = new IndexWriter(dir, newIndexWriterConfig(
- TEST_VERSION_CURRENT, new MockAnalyzer()));
+ TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newInOrderLogMergePolicy()));
+ indexWriter.setInfoStream(VERBOSE ? System.out : null);
+ if (VERBOSE) {
+ System.out.println("TEST: make test index");
+ }
for (int i = 0; i < 500; i++) {
Document document = new Document();
assembleDocument(document, i);
@@ -320,6 +325,9 @@ public class TestIndicesEquals extends L
protected void testEquals(Directory aprioriIndex, InstantiatedIndex testIndex) throws Exception {
+ if (VERBOSE) {
+ System.out.println("TEST: testEquals");
+ }
testTermDocsSomeMore(aprioriIndex, testIndex);
IndexReader aprioriReader = IndexReader.open(aprioriIndex, false);
@@ -351,35 +359,6 @@ public class TestIndicesEquals extends L
for (int i = 0; i < aprioriNorms.length; i++) {
assertEquals("norms does not equals for field " + field + " in document " + i, aprioriNorms[i], testNorms[i]);
}
-
- // test norms as used by multireader
-
- aprioriNorms = new byte[aprioriReader.maxDoc()];
- MultiNorms.norms(aprioriReader, (String) field, aprioriNorms, 0);
-
- testNorms = new byte[testReader.maxDoc()];
- MultiNorms.norms(testReader, (String) field, testNorms, 0);
-
- assertEquals(aprioriNorms.length, testNorms.length);
-
- for (int i = 0; i < aprioriNorms.length; i++) {
- assertEquals("norms does not equals for field " + field + " in document " + i, aprioriNorms[i], testNorms[i]);
- }
-
-
- // test norms as used by multireader
-
- aprioriNorms = new byte[aprioriReader.maxDoc() + 10];
- MultiNorms.norms(aprioriReader, (String) field, aprioriNorms, 10);
-
- testNorms = new byte[testReader.maxDoc() + 10];
- MultiNorms.norms(testReader, (String) field, testNorms, 10);
-
- assertEquals(aprioriNorms.length, testNorms.length);
-
- for (int i = 0; i < aprioriNorms.length; i++) {
- assertEquals("norms does not equals for field " + field + " in document " + i, aprioriNorms[i], testNorms[i]);
- }
}
}
@@ -401,6 +380,9 @@ public class TestIndicesEquals extends L
String aprioriField;
while((aprioriField = aprioriFieldsEnum.next()) != null) {
String testField = testFieldsEnum.next();
+ if (VERBOSE) {
+ System.out.println("TEST: verify field=" + testField);
+ }
assertEquals(aprioriField, testField);
TermsEnum aprioriTermEnum = aprioriFieldsEnum.terms();
@@ -409,8 +391,15 @@ public class TestIndicesEquals extends L
BytesRef aprioriText;
while((aprioriText = aprioriTermEnum.next()) != null) {
assertEquals(aprioriText, testTermEnum.next());
+ if (VERBOSE) {
+ System.out.println("TEST: verify term=" + aprioriText.utf8ToString());
+ }
assertTrue(aprioriTermEnum.docFreq() == testTermEnum.docFreq());
+ final long totalTermFreq = aprioriTermEnum.totalTermFreq();
+ if (totalTermFreq != -1) {
+ assertEquals(totalTermFreq, testTermEnum.totalTermFreq());
+ }
// compare termDocs seeking
@@ -434,6 +423,10 @@ public class TestIndicesEquals extends L
assertEquals(DocsEnum.NO_MORE_DOCS, testTermDocs.nextDoc());
break;
}
+ if (VERBOSE) {
+ System.out.println("TEST: verify doc=" + aprioriTermDocs.docID());
+ }
+
assertTrue(testTermDocs.nextDoc() != DocsEnum.NO_MORE_DOCS);
assertEquals(aprioriTermDocs.docID(), testTermDocs.docID());
@@ -445,12 +438,19 @@ public class TestIndicesEquals extends L
DocsAndPositionsEnum aprioriTermPositions = aprioriTermEnum.docsAndPositions(MultiFields.getDeletedDocs(aprioriReader), null);
DocsAndPositionsEnum testTermPositions = testTermEnum.docsAndPositions(MultiFields.getDeletedDocs(testReader), null);
+ if (VERBOSE) {
+ System.out.println("TEST: enum1=" + aprioriTermPositions + " enum2=" + testTermPositions);
+ }
if (aprioriTermPositions != null) {
for (int docIndex = 0; docIndex < aprioriReader.maxDoc(); docIndex++) {
boolean hasNext = aprioriTermPositions.nextDoc() != DocsEnum.NO_MORE_DOCS;
if (hasNext) {
assertTrue(testTermPositions.nextDoc() != DocsEnum.NO_MORE_DOCS);
+
+ if (VERBOSE) {
+ System.out.println("TEST: verify doc=" + aprioriTermPositions.docID());
+ }
assertEquals(aprioriTermPositions.freq(), testTermPositions.freq());
@@ -458,6 +458,10 @@ public class TestIndicesEquals extends L
int aprioriPos = aprioriTermPositions.nextPosition();
int testPos = testTermPositions.nextPosition();
+ if (VERBOSE) {
+ System.out.println("TEST: verify pos=" + aprioriPos);
+ }
+
assertEquals(aprioriPos, testPos);
assertEquals(aprioriTermPositions.hasPayload(), testTermPositions.hasPayload());
Modified: lucene/dev/branches/docvalues/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestRealTime.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestRealTime.java?rev=1068809&r1=1068808&r2=1068809&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestRealTime.java (original)
+++ lucene/dev/branches/docvalues/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestRealTime.java Wed Feb 9 09:35:27 2011
@@ -20,8 +20,8 @@ import org.apache.lucene.search.TermQuer
import org.apache.lucene.search.Scorer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
+import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.index.Term;
-import org.apache.lucene.index.IndexReader;
import org.apache.lucene.util.LuceneTestCase;
/**
@@ -36,7 +36,7 @@ public class TestRealTime extends Lucene
InstantiatedIndex index = new InstantiatedIndex();
InstantiatedIndexReader reader = new InstantiatedIndexReader(index);
- IndexSearcher searcher = new IndexSearcher(reader);
+ IndexSearcher searcher = newSearcher(reader);
InstantiatedIndexWriter writer = new InstantiatedIndexWriter(index);
Document doc;
@@ -67,7 +67,7 @@ public class TestRealTime extends Lucene
@Override
public void setScorer(Scorer scorer) {}
@Override
- public void setNextReader(IndexReader reader, int docBase) {}
+ public void setNextReader(AtomicReaderContext context) {}
@Override
public boolean acceptsDocsOutOfOrder() { return true; }
@Override
Modified: lucene/dev/branches/docvalues/lucene/contrib/lucli/src/java/lucli/LuceneMethods.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/contrib/lucli/src/java/lucli/LuceneMethods.java?rev=1068809&r1=1068808&r2=1068809&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/contrib/lucli/src/java/lucli/LuceneMethods.java (original)
+++ lucene/dev/branches/docvalues/lucene/contrib/lucli/src/java/lucli/LuceneMethods.java Wed Feb 9 09:35:27 2011
@@ -41,6 +41,7 @@ import org.apache.lucene.analysis.tokena
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Fields;
@@ -232,7 +233,7 @@ class LuceneMethods {
}
@Override
- public void setNextReader(IndexReader reader, int docBase) {}
+ public void setNextReader(AtomicReaderContext context) {}
@Override
public boolean acceptsDocsOutOfOrder() {
return true;
Modified: lucene/dev/branches/docvalues/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java?rev=1068809&r1=1068808&r2=1068809&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java (original)
+++ lucene/dev/branches/docvalues/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java Wed Feb 9 09:35:27 2011
@@ -25,6 +25,7 @@ import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
+import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
@@ -38,6 +39,9 @@ import org.apache.lucene.document.Docume
import org.apache.lucene.document.FieldSelector;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Fields;
+import org.apache.lucene.index.IndexReader.AtomicReaderContext;
+import org.apache.lucene.index.OrdTermState;
+import org.apache.lucene.index.TermState;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.index.FieldsEnum;
@@ -48,12 +52,14 @@ import org.apache.lucene.index.TermFreqV
import org.apache.lucene.index.TermPositionVector;
import org.apache.lucene.index.TermVectorMapper;
import org.apache.lucene.index.FieldInvertState;
+import org.apache.lucene.index.IndexReader.ReaderContext;
import org.apache.lucene.index.values.DocValues;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Similarity;
+import org.apache.lucene.search.SimilarityProvider;
import org.apache.lucene.store.RAMDirectory; // for javadocs
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
@@ -443,7 +449,7 @@ public class MemoryIndex implements Seri
}
@Override
- public void setNextReader(IndexReader reader, int docBase) { }
+ public void setNextReader(AtomicReaderContext context) { }
});
float score = scores[0];
return score;
@@ -607,6 +613,8 @@ public class MemoryIndex implements Seri
/** Term for this field's fieldName, lazily computed on demand */
public transient Term template;
+ private final long sumTotalTermFreq;
+
private static final long serialVersionUID = 2882195016849084649L;
public Info(HashMap<BytesRef,ArrayIntList> terms, int numTokens, int numOverlapTokens, float boost) {
@@ -614,6 +622,15 @@ public class MemoryIndex implements Seri
this.numTokens = numTokens;
this.numOverlapTokens = numOverlapTokens;
this.boost = boost;
+ long sum = 0;
+ for(Map.Entry<BytesRef,ArrayIntList> ent : terms.entrySet()) {
+ sum += ent.getValue().size();
+ }
+ sumTotalTermFreq = sum;
+ }
+
+ public long getSumTotalTermFreq() {
+ return sumTotalTermFreq;
}
/**
@@ -739,9 +756,11 @@ public class MemoryIndex implements Seri
private final class MemoryIndexReader extends IndexReader {
private IndexSearcher searcher; // needed to find searcher.getSimilarity()
+ private final ReaderContext readerInfos = new AtomicReaderContext(this);
private MemoryIndexReader() {
super(); // avoid as much superclass baggage as possible
+ readerFinishedListeners = Collections.synchronizedSet(new HashSet<ReaderFinishedListener>());
}
private Info getInfo(String fieldName) {
@@ -765,6 +784,11 @@ public class MemoryIndex implements Seri
if (DEBUG) System.err.println("MemoryIndexReader.docFreq: " + term + ", freq:" + freq);
return freq;
}
+
+ @Override
+ public ReaderContext getTopReaderContext() {
+ return readerInfos;
+ }
@Override
public Fields fields() {
@@ -823,6 +847,11 @@ public class MemoryIndex implements Seri
public long getUniqueTermCount() {
return info.sortedTerms.length;
}
+
+ @Override
+ public long getSumTotalTermFreq() {
+ return info.getSumTotalTermFreq();
+ }
};
}
}
@@ -889,10 +918,6 @@ public class MemoryIndex implements Seri
}
@Override
- public void cacheCurrentTerm() {
- }
-
- @Override
public long ord() {
return termUpto;
}
@@ -903,6 +928,11 @@ public class MemoryIndex implements Seri
}
@Override
+ public long totalTermFreq() {
+ return info.sortedTerms[termUpto].getValue().size();
+ }
+
+ @Override
public DocsEnum docs(Bits skipDocs, DocsEnum reuse) {
if (reuse == null || !(reuse instanceof MemoryDocsEnum)) {
reuse = new MemoryDocsEnum();
@@ -922,8 +952,21 @@ public class MemoryIndex implements Seri
public Comparator<BytesRef> getComparator() {
return BytesRef.getUTF8SortedAsUnicodeComparator();
}
- }
+ @Override
+ public void seek(BytesRef term, TermState state) throws IOException {
+ assert state != null;
+ this.seek(((OrdTermState)state).ord);
+ }
+
+ @Override
+ public TermState termState() throws IOException {
+ OrdTermState ts = new OrdTermState();
+ ts.ord = termUpto;
+ return ts;
+ }
+ }
+
private class MemoryDocsEnum extends DocsEnum {
private ArrayIntList positions;
private boolean hasNext;
@@ -1142,9 +1185,9 @@ public class MemoryIndex implements Seri
};
}
- private Similarity getSimilarity() {
- if (searcher != null) return searcher.getSimilarity();
- return Similarity.getDefault();
+ private SimilarityProvider getSimilarityProvider() {
+ if (searcher != null) return searcher.getSimilarityProvider();
+ return IndexSearcher.getDefaultSimilarityProvider();
}
private void setSearcher(IndexSearcher searcher) {
@@ -1154,20 +1197,21 @@ public class MemoryIndex implements Seri
/** performance hack: cache norms to avoid repeated expensive calculations */
private byte[] cachedNorms;
private String cachedFieldName;
- private Similarity cachedSimilarity;
+ private SimilarityProvider cachedSimilarity;
@Override
public byte[] norms(String fieldName) {
byte[] norms = cachedNorms;
- Similarity sim = getSimilarity();
+ SimilarityProvider sim = getSimilarityProvider();
if (fieldName != cachedFieldName || sim != cachedSimilarity) { // not cached?
Info info = getInfo(fieldName);
+ Similarity fieldSim = sim.get(fieldName);
int numTokens = info != null ? info.numTokens : 0;
int numOverlapTokens = info != null ? info.numOverlapTokens : 0;
float boost = info != null ? info.getBoost() : 1.0f;
FieldInvertState invertState = new FieldInvertState(0, numTokens, numOverlapTokens, 0, boost);
- float n = sim.computeNorm(fieldName, invertState);
- byte norm = sim.encodeNormValue(n);
+ float n = fieldSim.computeNorm(fieldName, invertState);
+ byte norm = fieldSim.encodeNormValue(n);
norms = new byte[] {norm};
// cache it for future reuse
@@ -1180,13 +1224,6 @@ public class MemoryIndex implements Seri
}
@Override
- public void norms(String fieldName, byte[] bytes, int offset) {
- if (DEBUG) System.err.println("MemoryIndexReader.norms*: " + fieldName);
- byte[] norms = norms(fieldName);
- System.arraycopy(norms, 0, bytes, offset, norms.length);
- }
-
- @Override
protected void doSetNorm(int doc, String fieldName, byte value) {
throw new UnsupportedOperationException();
}
Modified: lucene/dev/branches/docvalues/lucene/contrib/misc/src/java/org/apache/lucene/index/FieldNormModifier.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/contrib/misc/src/java/org/apache/lucene/index/FieldNormModifier.java?rev=1068809&r1=1068808&r2=1068809&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/contrib/misc/src/java/org/apache/lucene/index/FieldNormModifier.java (original)
+++ lucene/dev/branches/docvalues/lucene/contrib/misc/src/java/org/apache/lucene/index/FieldNormModifier.java Wed Feb 9 09:35:27 2011
@@ -24,6 +24,7 @@ import java.util.ArrayList;
import org.apache.lucene.search.DefaultSimilarity;
import org.apache.lucene.search.Similarity;
+import org.apache.lucene.search.SimilarityProvider;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.StringHelper;
@@ -33,7 +34,7 @@ import org.apache.lucene.util.ReaderUtil
/**
* Given a directory and a list of fields, updates the fieldNorms in place for every document.
*
- * If Similarity class is specified, uses its lengthNorm method to set norms.
+ * If Similarity class is specified, uses its computeNorm method to set norms.
* If -n command line argument is used, removed field norms, as if
* {@link org.apache.lucene.document.Field.Index}.NO_NORMS was used.
*
@@ -57,13 +58,13 @@ public class FieldNormModifier {
System.exit(1);
}
- Similarity s = null;
+ SimilarityProvider s = null;
if (args[1].equals("-d"))
args[1] = DefaultSimilarity.class.getName();
try {
- s = Class.forName(args[1]).asSubclass(Similarity.class).newInstance();
+ s = Class.forName(args[1]).asSubclass(SimilarityProvider.class).newInstance();
} catch (Exception e) {
System.err.println("Couldn't instantiate similarity with empty constructor: " + args[1]);
e.printStackTrace(System.err);
@@ -84,7 +85,7 @@ public class FieldNormModifier {
private Directory dir;
- private Similarity sim;
+ private SimilarityProvider sim;
/**
* Constructor for code that wishes to use this class programmatically
@@ -93,7 +94,7 @@ public class FieldNormModifier {
* @param d the Directory to modify
* @param s the Similarity to use (can be null)
*/
- public FieldNormModifier(Directory d, Similarity s) {
+ public FieldNormModifier(Directory d, SimilarityProvider s) {
dir = d;
sim = s;
}
@@ -111,7 +112,7 @@ public class FieldNormModifier {
*/
public void reSetNorms(String field) throws IOException {
String fieldName = StringHelper.intern(field);
-
+ Similarity fieldSim = sim.get(field);
IndexReader reader = null;
try {
reader = IndexReader.open(dir, false);
@@ -119,6 +120,7 @@ public class FieldNormModifier {
final List<IndexReader> subReaders = new ArrayList<IndexReader>();
ReaderUtil.gatherSubReaders(subReaders, reader);
+ final FieldInvertState invertState = new FieldInvertState();
for(IndexReader subReader : subReaders) {
final Bits delDocs = subReader.getDeletedDocs();
@@ -143,9 +145,11 @@ public class FieldNormModifier {
}
}
+ invertState.setBoost(1.0f);
for (int d = 0; d < termCounts.length; d++) {
if (delDocs == null || !delDocs.get(d)) {
- subReader.setNorm(d, fieldName, sim.encodeNormValue(sim.lengthNorm(fieldName, termCounts[d])));
+ invertState.setLength(termCounts[d]);
+ subReader.setNorm(d, fieldName, fieldSim.encodeNormValue(fieldSim.computeNorm(fieldName, invertState)));
}
}
}
Modified: lucene/dev/branches/docvalues/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingCodec.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingCodec.java?rev=1068809&r1=1068808&r2=1068809&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingCodec.java (original)
+++ lucene/dev/branches/docvalues/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingCodec.java Wed Feb 9 09:35:27 2011
@@ -32,7 +32,7 @@ import org.apache.lucene.index.codecs.Po
import org.apache.lucene.index.codecs.standard.StandardPostingsReader;
import org.apache.lucene.index.codecs.PostingsWriterBase;
import org.apache.lucene.index.codecs.standard.StandardPostingsWriter;
-import org.apache.lucene.index.codecs.PrefixCodedTermsReader;
+import org.apache.lucene.index.codecs.BlockTermsReader;
import org.apache.lucene.index.codecs.TermsIndexReaderBase;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
@@ -131,7 +131,7 @@ public class AppendingCodec extends Code
public void files(Directory dir, SegmentInfo segmentInfo, String codecId, Set<String> files)
throws IOException {
StandardPostingsReader.files(dir, segmentInfo, codecId, files);
- PrefixCodedTermsReader.files(dir, segmentInfo, codecId, files);
+ BlockTermsReader.files(dir, segmentInfo, codecId, files);
FixedGapTermsIndexReader.files(dir, segmentInfo, codecId, files);
}
Modified: lucene/dev/branches/docvalues/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingTermsDictReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingTermsDictReader.java?rev=1068809&r1=1068808&r2=1068809&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingTermsDictReader.java (original)
+++ lucene/dev/branches/docvalues/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingTermsDictReader.java Wed Feb 9 09:35:27 2011
@@ -22,15 +22,15 @@ import java.util.Comparator;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.codecs.PostingsReaderBase;
-import org.apache.lucene.index.codecs.PrefixCodedTermsReader;
-import org.apache.lucene.index.codecs.PrefixCodedTermsWriter;
+import org.apache.lucene.index.codecs.BlockTermsReader;
+import org.apache.lucene.index.codecs.BlockTermsWriter;
import org.apache.lucene.index.codecs.TermsIndexReaderBase;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CodecUtil;
-public class AppendingTermsDictReader extends PrefixCodedTermsReader {
+public class AppendingTermsDictReader extends BlockTermsReader {
public AppendingTermsDictReader(TermsIndexReaderBase indexReader,
Directory dir, FieldInfos fieldInfos, String segment,
@@ -43,7 +43,7 @@ public class AppendingTermsDictReader ex
@Override
protected void readHeader(IndexInput in) throws IOException {
CodecUtil.checkHeader(in, AppendingTermsDictWriter.CODEC_NAME,
- PrefixCodedTermsWriter.VERSION_START, PrefixCodedTermsWriter.VERSION_CURRENT);
+ BlockTermsWriter.VERSION_START, BlockTermsWriter.VERSION_CURRENT);
}
@Override
Modified: lucene/dev/branches/docvalues/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingTermsDictWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingTermsDictWriter.java?rev=1068809&r1=1068808&r2=1068809&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingTermsDictWriter.java (original)
+++ lucene/dev/branches/docvalues/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingTermsDictWriter.java Wed Feb 9 09:35:27 2011
@@ -22,13 +22,13 @@ import java.util.Comparator;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.codecs.PostingsWriterBase;
-import org.apache.lucene.index.codecs.PrefixCodedTermsWriter;
+import org.apache.lucene.index.codecs.BlockTermsWriter;
import org.apache.lucene.index.codecs.TermsIndexWriterBase;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CodecUtil;
-public class AppendingTermsDictWriter extends PrefixCodedTermsWriter {
+public class AppendingTermsDictWriter extends BlockTermsWriter {
final static String CODEC_NAME = "APPENDING_TERMS_DICT";
public AppendingTermsDictWriter(TermsIndexWriterBase indexWriter,
Modified: lucene/dev/branches/docvalues/lucene/contrib/misc/src/java/org/apache/lucene/misc/HighFreqTerms.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/contrib/misc/src/java/org/apache/lucene/misc/HighFreqTerms.java?rev=1068809&r1=1068808&r2=1068809&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/contrib/misc/src/java/org/apache/lucene/misc/HighFreqTerms.java (original)
+++ lucene/dev/branches/docvalues/lucene/contrib/misc/src/java/org/apache/lucene/misc/HighFreqTerms.java Wed Feb 9 09:35:27 2011
@@ -176,15 +176,34 @@ public class HighFreqTerms {
return ts;
}
- public static long getTotalTermFreq(IndexReader reader, String field, BytesRef termtext) throws Exception {
- BytesRef br = termtext;
+ public static long getTotalTermFreq(IndexReader reader, String field, BytesRef termText) throws Exception {
+
long totalTF = 0;
- Bits skipDocs = MultiFields.getDeletedDocs(reader);
- DocsEnum de = MultiFields.getTermDocsEnum(reader, skipDocs, field, br);
- // if term is not in index return totalTF of 0
- if (de == null) {
+
+ Terms terms = MultiFields.getTerms(reader, field);
+ if (terms == null) {
+ return 0;
+ }
+
+ TermsEnum termsEnum = terms.iterator();
+ if (termsEnum.seek(termText) != TermsEnum.SeekStatus.FOUND) {
return 0;
}
+
+ Bits skipDocs = MultiFields.getDeletedDocs(reader);
+ if (skipDocs == null) {
+ // TODO: we could do this up front, during the scan
+ // (next()), instead of after-the-fact here w/ seek,
+ // if the codec supports it and there are no del
+ // docs...
+ final long totTF = termsEnum.totalTermFreq();
+ if (totTF != -1) {
+ return totTF;
+ }
+ }
+
+ DocsEnum de = termsEnum.docs(skipDocs, null);
+
// use DocsEnum.read() and BulkResult api
final DocsEnum.BulkReadResult bulkresult = de.getBulkResult();
int count;
Modified: lucene/dev/branches/docvalues/lucene/contrib/misc/src/java/org/apache/lucene/misc/SweetSpotSimilarity.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/contrib/misc/src/java/org/apache/lucene/misc/SweetSpotSimilarity.java?rev=1068809&r1=1068808&r2=1068809&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/contrib/misc/src/java/org/apache/lucene/misc/SweetSpotSimilarity.java (original)
+++ lucene/dev/branches/docvalues/lucene/contrib/misc/src/java/org/apache/lucene/misc/SweetSpotSimilarity.java Wed Feb 9 09:35:27 2011
@@ -146,7 +146,7 @@ public class SweetSpotSimilarity extends
else
numTokens = state.getLength();
- return state.getBoost() * lengthNorm(fieldName, numTokens);
+ return state.getBoost() * computeLengthNorm(fieldName, numTokens);
}
/**
@@ -167,8 +167,7 @@ public class SweetSpotSimilarity extends
*
* @see #setLengthNormFactors
*/
- @Override
- public float lengthNorm(String fieldName, int numTerms) {
+ public float computeLengthNorm(String fieldName, int numTerms) {
int l = ln_min;
int h = ln_max;
float s = ln_steep;
Modified: lucene/dev/branches/docvalues/lucene/contrib/misc/src/java/org/apache/lucene/misc/TermStats.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/contrib/misc/src/java/org/apache/lucene/misc/TermStats.java?rev=1068809&r1=1068808&r2=1068809&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/contrib/misc/src/java/org/apache/lucene/misc/TermStats.java (original)
+++ lucene/dev/branches/docvalues/lucene/contrib/misc/src/java/org/apache/lucene/misc/TermStats.java Wed Feb 9 09:35:27 2011
@@ -41,4 +41,9 @@ public final class TermStats {
String getTermText() {
return termtext.utf8ToString();
}
+
+ @Override
+ public String toString() {
+ return("TermStats: term=" + termtext.utf8ToString() + " docFreq=" + docFreq + " totalTermFreq=" + totalTermFreq);
+ }
}
\ No newline at end of file
Modified: lucene/dev/branches/docvalues/lucene/contrib/misc/src/java/org/apache/lucene/store/NativePosixUtil.cpp
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/contrib/misc/src/java/org/apache/lucene/store/NativePosixUtil.cpp?rev=1068809&r1=1068808&r2=1068809&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/contrib/misc/src/java/org/apache/lucene/store/NativePosixUtil.cpp (original)
+++ lucene/dev/branches/docvalues/lucene/contrib/misc/src/java/org/apache/lucene/store/NativePosixUtil.cpp Wed Feb 9 09:35:27 2011
@@ -1,3 +1,20 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to You under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
#include <jni.h>
#include <fcntl.h> // posix_fadvise, constants for open
#include <string.h> // strerror
Modified: lucene/dev/branches/docvalues/lucene/contrib/misc/src/java/org/apache/lucene/store/WindowsDirectory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/contrib/misc/src/java/org/apache/lucene/store/WindowsDirectory.java?rev=1068809&r1=1068808&r2=1068809&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/contrib/misc/src/java/org/apache/lucene/store/WindowsDirectory.java (original)
+++ lucene/dev/branches/docvalues/lucene/contrib/misc/src/java/org/apache/lucene/store/WindowsDirectory.java Wed Feb 9 09:35:27 2011
@@ -64,6 +64,7 @@ public class WindowsDirectory extends FS
super(path, null);
}
+ @Override
public IndexInput openInput(String name, int bufferSize) throws IOException {
ensureOpen();
return new WindowsIndexInput(new File(getDirectory(), name), Math.max(bufferSize, DEFAULT_BUFFERSIZE));
@@ -82,14 +83,17 @@ public class WindowsDirectory extends FS
isOpen = true;
}
+ @Override
protected void readInternal(byte[] b, int offset, int length) throws IOException {
if (WindowsDirectory.read(fd, b, offset, length, getFilePointer()) != length)
throw new IOException("Read past EOF");
}
+ @Override
protected void seekInternal(long pos) throws IOException {
}
+ @Override
public synchronized void close() throws IOException {
// NOTE: we synchronize and track "isOpen" because Lucene sometimes closes IIs twice!
if (!isClone && isOpen) {
@@ -98,6 +102,7 @@ public class WindowsDirectory extends FS
}
}
+ @Override
public long length() {
return length;
}
Modified: lucene/dev/branches/docvalues/lucene/contrib/misc/src/test/org/apache/lucene/index/TestFieldNormModifier.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/contrib/misc/src/test/org/apache/lucene/index/TestFieldNormModifier.java?rev=1068809&r1=1068808&r2=1068809&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/contrib/misc/src/test/org/apache/lucene/index/TestFieldNormModifier.java (original)
+++ lucene/dev/branches/docvalues/lucene/contrib/misc/src/test/org/apache/lucene/index/TestFieldNormModifier.java Wed Feb 9 09:35:27 2011
@@ -23,11 +23,12 @@ import java.util.Arrays;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
+import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.DefaultSimilarity;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Scorer;
-import org.apache.lucene.search.Similarity;
+import org.apache.lucene.search.SimilarityProvider;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
@@ -41,10 +42,10 @@ public class TestFieldNormModifier exten
public Directory store;
/** inverts the normal notion of lengthNorm */
- public static Similarity s = new DefaultSimilarity() {
+ public static SimilarityProvider s = new DefaultSimilarity() {
@Override
- public float lengthNorm(String fieldName, int numTokens) {
- return numTokens;
+ public float computeNorm(String fieldName, FieldInvertState state) {
+ return state.getBoost() * (discountOverlaps ? state.getLength() - state.getNumOverlap() : state.getLength());
}
};
@@ -53,7 +54,7 @@ public class TestFieldNormModifier exten
super.setUp();
store = newDirectory();
IndexWriter writer = new IndexWriter(store, newIndexWriterConfig(
- TEST_VERSION_CURRENT, new MockAnalyzer()));
+ TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newInOrderLogMergePolicy()));
for (int i = 0; i < NUM_DOCS; i++) {
Document d = new Document();
@@ -122,8 +123,8 @@ public class TestFieldNormModifier exten
scores[doc + docBase] = scorer.score();
}
@Override
- public void setNextReader(IndexReader reader, int docBase) {
- this.docBase = docBase;
+ public void setNextReader(AtomicReaderContext context) {
+ docBase = context.docBase;
}
@Override
public void setScorer(Scorer scorer) throws IOException {
@@ -157,8 +158,8 @@ public class TestFieldNormModifier exten
scores[doc + docBase] = scorer.score();
}
@Override
- public void setNextReader(IndexReader reader, int docBase) {
- this.docBase = docBase;
+ public void setNextReader(AtomicReaderContext context) {
+ docBase = context.docBase;
}
@Override
public void setScorer(Scorer scorer) throws IOException {
@@ -209,8 +210,8 @@ public class TestFieldNormModifier exten
scores[doc + docBase] = scorer.score();
}
@Override
- public void setNextReader(IndexReader reader, int docBase) {
- this.docBase = docBase;
+ public void setNextReader(AtomicReaderContext context) {
+ docBase = context.docBase;
}
@Override
public void setScorer(Scorer scorer) throws IOException {
Modified: lucene/dev/branches/docvalues/lucene/contrib/misc/src/test/org/apache/lucene/index/TestMultiPassIndexSplitter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/contrib/misc/src/test/org/apache/lucene/index/TestMultiPassIndexSplitter.java?rev=1068809&r1=1068808&r2=1068809&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/contrib/misc/src/test/org/apache/lucene/index/TestMultiPassIndexSplitter.java (original)
+++ lucene/dev/branches/docvalues/lucene/contrib/misc/src/test/org/apache/lucene/index/TestMultiPassIndexSplitter.java Wed Feb 9 09:35:27 2011
@@ -32,7 +32,7 @@ public class TestMultiPassIndexSplitter
public void setUp() throws Exception {
super.setUp();
dir = newDirectory();
- IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
+ IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newInOrderLogMergePolicy()));
Document doc;
for (int i = 0; i < NUM_DOCS; i++) {
doc = new Document();
Modified: lucene/dev/branches/docvalues/lucene/contrib/misc/src/test/org/apache/lucene/misc/SweetSpotSimilarityTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/contrib/misc/src/test/org/apache/lucene/misc/SweetSpotSimilarityTest.java?rev=1068809&r1=1068808&r2=1068809&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/contrib/misc/src/test/org/apache/lucene/misc/SweetSpotSimilarityTest.java (original)
+++ lucene/dev/branches/docvalues/lucene/contrib/misc/src/test/org/apache/lucene/misc/SweetSpotSimilarityTest.java Wed Feb 9 09:35:27 2011
@@ -21,13 +21,14 @@ package org.apache.lucene.misc;
import org.apache.lucene.search.DefaultSimilarity;
import org.apache.lucene.search.Similarity;
import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.index.FieldInvertState;
/**
* Test of the SweetSpotSimilarity
*/
public class SweetSpotSimilarityTest extends LuceneTestCase {
- public void testSweetSpotLengthNorm() {
+ public void testSweetSpotComputeNorm() {
SweetSpotSimilarity ss = new SweetSpotSimilarity();
ss.setLengthNormFactors(1,1,0.5f);
@@ -37,10 +38,13 @@ public class SweetSpotSimilarityTest ext
// base case, should degrade
-
+ final FieldInvertState invertState = new FieldInvertState();
+ invertState.setBoost(1.0f);
for (int i = 1; i < 1000; i++) {
+ invertState.setLength(i);
assertEquals("base case: i="+i,
- d.lengthNorm("foo",i), s.lengthNorm("foo",i),
+ d.computeNorm("foo", invertState),
+ s.computeNorm("foo", invertState),
0.0f);
}
@@ -49,14 +53,21 @@ public class SweetSpotSimilarityTest ext
ss.setLengthNormFactors(3,10,0.5f);
for (int i = 3; i <=10; i++) {
+ invertState.setLength(i);
assertEquals("3,10: spot i="+i,
- 1.0f, s.lengthNorm("foo",i),
+ 1.0f,
+ s.computeNorm("foo", invertState),
0.0f);
}
for (int i = 10; i < 1000; i++) {
+ invertState.setLength(i-9);
+ final float normD = d.computeNorm("foo", invertState);
+ invertState.setLength(i);
+ final float normS = s.computeNorm("foo", invertState);
assertEquals("3,10: 10<x : i="+i,
- d.lengthNorm("foo",i-9), s.lengthNorm("foo",i),
+ normD,
+ normS,
0.0f);
}
@@ -68,33 +79,54 @@ public class SweetSpotSimilarityTest ext
for (int i = 3; i <=10; i++) {
+ invertState.setLength(i);
assertEquals("f: 3,10: spot i="+i,
- 1.0f, s.lengthNorm("foo",i),
+ 1.0f,
+ s.computeNorm("foo", invertState),
0.0f);
}
for (int i = 10; i < 1000; i++) {
+ invertState.setLength(i-9);
+ final float normD = d.computeNorm("foo", invertState);
+ invertState.setLength(i);
+ final float normS = s.computeNorm("foo", invertState);
assertEquals("f: 3,10: 10<x : i="+i,
- d.lengthNorm("foo",i-9), s.lengthNorm("foo",i),
+ normD,
+ normS,
0.0f);
}
for (int i = 8; i <=13; i++) {
+ invertState.setLength(i);
assertEquals("f: 8,13: spot i="+i,
- 1.0f, s.lengthNorm("bar",i),
+ 1.0f,
+ s.computeNorm("bar", invertState),
0.0f);
}
for (int i = 6; i <=9; i++) {
+ invertState.setLength(i);
assertEquals("f: 6,9: spot i="+i,
- 1.0f, s.lengthNorm("yak",i),
+ 1.0f,
+ s.computeNorm("yak", invertState),
0.0f);
}
for (int i = 13; i < 1000; i++) {
+ invertState.setLength(i-12);
+ final float normD = d.computeNorm("foo", invertState);
+ invertState.setLength(i);
+ final float normS = s.computeNorm("bar", invertState);
assertEquals("f: 8,13: 13<x : i="+i,
- d.lengthNorm("foo",i-12), s.lengthNorm("bar",i),
+ normD,
+ normS,
0.0f);
}
for (int i = 9; i < 1000; i++) {
+ invertState.setLength(i-8);
+ final float normD = d.computeNorm("foo", invertState);
+ invertState.setLength(i);
+ final float normS = s.computeNorm("yak", invertState);
assertEquals("f: 6,9: 9<x : i="+i,
- d.lengthNorm("foo",i-8), s.lengthNorm("yak",i),
+ normD,
+ normS,
0.0f);
}
@@ -105,9 +137,12 @@ public class SweetSpotSimilarityTest ext
ss.setLengthNormFactors("b",5,8,0.1f, false);
for (int i = 9; i < 1000; i++) {
- assertTrue("s: i="+i+" : a="+ss.lengthNorm("a",i)+
- " < b="+ss.lengthNorm("b",i),
- ss.lengthNorm("a",i) < s.lengthNorm("b",i));
+ invertState.setLength(i);
+ final float normSS = ss.computeNorm("a", invertState);
+ final float normS = s.computeNorm("b", invertState);
+ assertTrue("s: i="+i+" : a="+normSS+
+ " < b="+normS,
+ normSS < normS);
}
}
Modified: lucene/dev/branches/docvalues/lucene/contrib/misc/src/test/org/apache/lucene/misc/TestHighFreqTerms.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/contrib/misc/src/test/org/apache/lucene/misc/TestHighFreqTerms.java?rev=1068809&r1=1068808&r2=1068809&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/contrib/misc/src/test/org/apache/lucene/misc/TestHighFreqTerms.java (original)
+++ lucene/dev/branches/docvalues/lucene/contrib/misc/src/test/org/apache/lucene/misc/TestHighFreqTerms.java Wed Feb 9 09:35:27 2011
@@ -17,15 +17,16 @@ package org.apache.lucene.misc;
* limitations under the License.
*/
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.IndexWriter;
-import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.LuceneTestCase;
-import org.apache.lucene.store.Directory;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util._TestUtil;
import org.junit.AfterClass;
import org.junit.BeforeClass;
@@ -41,8 +42,10 @@ public class TestHighFreqTerms extends L
writer = new IndexWriter(dir, newIndexWriterConfig(random,
TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.WHITESPACE, false))
.setMaxBufferedDocs(2));
+ writer.setInfoStream(VERBOSE ? System.out : null);
indexDocs(writer);
reader = IndexReader.open(dir, true);
+ _TestUtil.checkIndex(dir);
}
@AfterClass
@@ -75,8 +78,8 @@ public class TestHighFreqTerms extends L
String field="FIELD_1";
TermStats[] terms = HighFreqTerms.getHighFreqTerms(reader, numTerms, field);
for (int i = 0; i < terms.length; i++) {
- if (i >0){
- assertTrue ("out of order " + terms[i-1].docFreq + "should be >= " + terms[i].docFreq,terms[i-1].docFreq >= terms[i].docFreq);
+ if (i > 0) {
+ assertTrue ("out of order " + terms[i-1].docFreq + "should be >= " + terms[i].docFreq,terms[i-1].docFreq >= terms[i].docFreq);
}
}
}
@@ -134,11 +137,12 @@ public class TestHighFreqTerms extends L
TermStats[] terms = HighFreqTerms.getHighFreqTerms(reader, numTerms, field);
TermStats[] termsWithTF = HighFreqTerms.sortByTotalTermFreq(reader, terms);
- for (int i = 0; i < termsWithTF.length; i++) {
- // check that they are sorted by descending termfreq order
- if (i >0){
- assertTrue ("out of order" +termsWithTF[i-1]+ " > " +termsWithTF[i],termsWithTF[i-1].totalTermFreq > termsWithTF[i].totalTermFreq);
- }
+ for (int i = 0; i < termsWithTF.length; i++) {
+ // check that they are sorted by descending termfreq
+ // order
+ if (i > 0) {
+ assertTrue ("out of order" +termsWithTF[i-1]+ " > " +termsWithTF[i],termsWithTF[i-1].totalTermFreq >= termsWithTF[i].totalTermFreq);
+ }
}
}
Modified: lucene/dev/branches/docvalues/lucene/contrib/misc/src/test/org/apache/lucene/misc/TestLengthNormModifier.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/contrib/misc/src/test/org/apache/lucene/misc/TestLengthNormModifier.java?rev=1068809&r1=1068808&r2=1068809&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/contrib/misc/src/test/org/apache/lucene/misc/TestLengthNormModifier.java (original)
+++ lucene/dev/branches/docvalues/lucene/contrib/misc/src/test/org/apache/lucene/misc/TestLengthNormModifier.java Wed Feb 9 09:35:27 2011
@@ -22,8 +22,10 @@ import java.io.IOException;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
+import org.apache.lucene.index.FieldInvertState;
import org.apache.lucene.index.FieldNormModifier;
import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.MultiNorms;
import org.apache.lucene.index.Term;
@@ -31,7 +33,7 @@ import org.apache.lucene.search.Collecto
import org.apache.lucene.search.DefaultSimilarity;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Scorer;
-import org.apache.lucene.search.Similarity;
+import org.apache.lucene.search.SimilarityProvider;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
@@ -45,19 +47,19 @@ public class TestLengthNormModifier exte
public Directory store;
/** inverts the normal notion of lengthNorm */
- public static Similarity s = new DefaultSimilarity() {
- @Override
- public float lengthNorm(String fieldName, int numTokens) {
- return numTokens;
- }
- };
+ public static SimilarityProvider s = new DefaultSimilarity() {
+ @Override
+ public float computeNorm(String fieldName, FieldInvertState state) {
+ return state.getBoost() * (discountOverlaps ? state.getLength() - state.getNumOverlap() : state.getLength());
+ }
+ };
@Override
public void setUp() throws Exception {
super.setUp();
store = newDirectory();
IndexWriter writer = new IndexWriter(store, newIndexWriterConfig(
- TEST_VERSION_CURRENT, new MockAnalyzer()));
+ TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newInOrderLogMergePolicy()));
for (int i = 0; i < NUM_DOCS; i++) {
Document d = new Document();
@@ -138,8 +140,8 @@ public class TestLengthNormModifier exte
scores[doc + docBase] = scorer.score();
}
@Override
- public void setNextReader(IndexReader reader, int docBase) {
- this.docBase = docBase;
+ public void setNextReader(AtomicReaderContext context) {
+ docBase = context.docBase;
}
@Override
public void setScorer(Scorer scorer) throws IOException {
@@ -161,12 +163,12 @@ public class TestLengthNormModifier exte
}
// override the norms to be inverted
- Similarity s = new DefaultSimilarity() {
- @Override
- public float lengthNorm(String fieldName, int numTokens) {
- return numTokens;
- }
- };
+ SimilarityProvider s = new DefaultSimilarity() {
+ @Override
+ public float computeNorm(String fieldName, FieldInvertState state) {
+ return state.getBoost() * (discountOverlaps ? state.getLength() - state.getNumOverlap() : state.getLength());
+ }
+ };
FieldNormModifier fnm = new FieldNormModifier(store, s);
fnm.reSetNorms("field");
@@ -180,8 +182,8 @@ public class TestLengthNormModifier exte
scores[doc + docBase] = scorer.score();
}
@Override
- public void setNextReader(IndexReader reader, int docBase) {
- this.docBase = docBase;
+ public void setNextReader(AtomicReaderContext context) {
+ docBase = context.docBase;
}
@Override
public void setScorer(Scorer scorer) throws IOException {
Modified: lucene/dev/branches/docvalues/lucene/contrib/queries/src/java/org/apache/lucene/search/BooleanFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/contrib/queries/src/java/org/apache/lucene/search/BooleanFilter.java?rev=1068809&r1=1068808&r2=1068809&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/contrib/queries/src/java/org/apache/lucene/search/BooleanFilter.java (original)
+++ lucene/dev/branches/docvalues/lucene/contrib/queries/src/java/org/apache/lucene/search/BooleanFilter.java Wed Feb 9 09:35:27 2011
@@ -21,6 +21,7 @@ import java.io.IOException;
import java.util.ArrayList;
import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.util.OpenBitSet;
import org.apache.lucene.util.OpenBitSetDISI;
@@ -41,10 +42,10 @@ public class BooleanFilter extends Filte
ArrayList<Filter> notFilters = null;
ArrayList<Filter> mustFilters = null;
- private DocIdSetIterator getDISI(ArrayList<Filter> filters, int index, IndexReader reader)
+ private DocIdSetIterator getDISI(ArrayList<Filter> filters, int index, AtomicReaderContext context)
throws IOException
{
- return filters.get(index).getDocIdSet(reader).iterator();
+ return filters.get(index).getDocIdSet(context).iterator();
}
/**
@@ -52,21 +53,21 @@ public class BooleanFilter extends Filte
* of the filters that have been added.
*/
@Override
- public DocIdSet getDocIdSet(IndexReader reader) throws IOException
+ public DocIdSet getDocIdSet(AtomicReaderContext context) throws IOException
{
OpenBitSetDISI res = null;
-
+ final IndexReader reader = context.reader;
if (shouldFilters != null) {
for (int i = 0; i < shouldFilters.size(); i++) {
if (res == null) {
- res = new OpenBitSetDISI(getDISI(shouldFilters, i, reader), reader.maxDoc());
+ res = new OpenBitSetDISI(getDISI(shouldFilters, i, context), reader.maxDoc());
} else {
- DocIdSet dis = shouldFilters.get(i).getDocIdSet(reader);
+ DocIdSet dis = shouldFilters.get(i).getDocIdSet(context);
if(dis instanceof OpenBitSet) {
// optimized case for OpenBitSets
res.or((OpenBitSet) dis);
} else {
- res.inPlaceOr(getDISI(shouldFilters, i, reader));
+ res.inPlaceOr(getDISI(shouldFilters, i, context));
}
}
}
@@ -75,15 +76,15 @@ public class BooleanFilter extends Filte
if (notFilters!=null) {
for (int i = 0; i < notFilters.size(); i++) {
if (res == null) {
- res = new OpenBitSetDISI(getDISI(notFilters, i, reader), reader.maxDoc());
+ res = new OpenBitSetDISI(getDISI(notFilters, i, context), reader.maxDoc());
res.flip(0, reader.maxDoc()); // NOTE: may set bits on deleted docs
} else {
- DocIdSet dis = notFilters.get(i).getDocIdSet(reader);
+ DocIdSet dis = notFilters.get(i).getDocIdSet(context);
if(dis instanceof OpenBitSet) {
// optimized case for OpenBitSets
res.andNot((OpenBitSet) dis);
} else {
- res.inPlaceNot(getDISI(notFilters, i, reader));
+ res.inPlaceNot(getDISI(notFilters, i, context));
}
}
}
@@ -92,14 +93,14 @@ public class BooleanFilter extends Filte
if (mustFilters!=null) {
for (int i = 0; i < mustFilters.size(); i++) {
if (res == null) {
- res = new OpenBitSetDISI(getDISI(mustFilters, i, reader), reader.maxDoc());
+ res = new OpenBitSetDISI(getDISI(mustFilters, i, context), reader.maxDoc());
} else {
- DocIdSet dis = mustFilters.get(i).getDocIdSet(reader);
+ DocIdSet dis = mustFilters.get(i).getDocIdSet(context);
if(dis instanceof OpenBitSet) {
// optimized case for OpenBitSets
res.and((OpenBitSet) dis);
} else {
- res.inPlaceAnd(getDISI(mustFilters, i, reader));
+ res.inPlaceAnd(getDISI(mustFilters, i, context));
}
}
}
Modified: lucene/dev/branches/docvalues/lucene/contrib/queries/src/java/org/apache/lucene/search/BoostingQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/contrib/queries/src/java/org/apache/lucene/search/BoostingQuery.java?rev=1068809&r1=1068808&r2=1068809&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/contrib/queries/src/java/org/apache/lucene/search/BoostingQuery.java (original)
+++ lucene/dev/branches/docvalues/lucene/contrib/queries/src/java/org/apache/lucene/search/BoostingQuery.java Wed Feb 9 09:35:27 2011
@@ -21,10 +21,9 @@ import java.io.IOException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.BooleanQuery;
-import org.apache.lucene.search.DefaultSimilarity;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.IndexSearcher;
-import org.apache.lucene.search.Similarity;
+
/**
* The BoostingQuery class can be used to effectively demote results that match a given query.
* Unlike the "NOT" clause, this still selects documents that contain undesirable terms,
@@ -56,10 +55,9 @@ public class BoostingQuery extends Query
@Override
public Query rewrite(IndexReader reader) throws IOException {
BooleanQuery result = new BooleanQuery() {
-
@Override
- public Similarity getSimilarity(IndexSearcher searcher) {
- return new DefaultSimilarity() {
+ public Weight createWeight(IndexSearcher searcher) throws IOException {
+ return new BooleanWeight(searcher, false) {
@Override
public float coord(int overlap, int max) {
Modified: lucene/dev/branches/docvalues/lucene/contrib/queries/src/java/org/apache/lucene/search/ChainedFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/contrib/queries/src/java/org/apache/lucene/search/ChainedFilter.java?rev=1068809&r1=1068808&r2=1068809&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/contrib/queries/src/java/org/apache/lucene/search/ChainedFilter.java (original)
+++ lucene/dev/branches/docvalues/lucene/contrib/queries/src/java/org/apache/lucene/search/ChainedFilter.java Wed Feb 9 09:35:27 2011
@@ -20,6 +20,7 @@ package org.apache.lucene.search;
import java.io.IOException;
import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Filter;
@@ -96,21 +97,21 @@ public class ChainedFilter extends Filte
* {@link Filter#getDocIdSet}.
*/
@Override
- public DocIdSet getDocIdSet(IndexReader reader) throws IOException
+ public DocIdSet getDocIdSet(AtomicReaderContext context) throws IOException
{
int[] index = new int[1]; // use array as reference to modifiable int;
index[0] = 0; // an object attribute would not be thread safe.
if (logic != -1)
- return getDocIdSet(reader, logic, index);
+ return getDocIdSet(context, logic, index);
else if (logicArray != null)
- return getDocIdSet(reader, logicArray, index);
+ return getDocIdSet(context, logicArray, index);
else
- return getDocIdSet(reader, DEFAULT, index);
+ return getDocIdSet(context, DEFAULT, index);
}
- private DocIdSetIterator getDISI(Filter filter, IndexReader reader)
+ private DocIdSetIterator getDISI(Filter filter, AtomicReaderContext context)
throws IOException {
- DocIdSet docIdSet = filter.getDocIdSet(reader);
+ DocIdSet docIdSet = filter.getDocIdSet(context);
if (docIdSet == null) {
return DocIdSet.EMPTY_DOCIDSET.iterator();
} else {
@@ -123,9 +124,10 @@ public class ChainedFilter extends Filte
}
}
- private OpenBitSetDISI initialResult(IndexReader reader, int logic, int[] index)
+ private OpenBitSetDISI initialResult(AtomicReaderContext context, int logic, int[] index)
throws IOException
{
+ IndexReader reader = context.reader;
OpenBitSetDISI result;
/**
* First AND operation takes place against a completely false
@@ -133,12 +135,12 @@ public class ChainedFilter extends Filte
*/
if (logic == AND)
{
- result = new OpenBitSetDISI(getDISI(chain[index[0]], reader), reader.maxDoc());
+ result = new OpenBitSetDISI(getDISI(chain[index[0]], context), reader.maxDoc());
++index[0];
}
else if (logic == ANDNOT)
{
- result = new OpenBitSetDISI(getDISI(chain[index[0]], reader), reader.maxDoc());
+ result = new OpenBitSetDISI(getDISI(chain[index[0]], context), reader.maxDoc());
result.flip(0,reader.maxDoc()); // NOTE: may set bits for deleted docs.
++index[0];
}
@@ -155,13 +157,13 @@ public class ChainedFilter extends Filte
* @param logic Logical operation
* @return DocIdSet
*/
- private DocIdSet getDocIdSet(IndexReader reader, int logic, int[] index)
+ private DocIdSet getDocIdSet(AtomicReaderContext context, int logic, int[] index)
throws IOException
{
- OpenBitSetDISI result = initialResult(reader, logic, index);
+ OpenBitSetDISI result = initialResult(context, logic, index);
for (; index[0] < chain.length; index[0]++)
{
- doChain(result, logic, chain[index[0]].getDocIdSet(reader));
+ doChain(result, logic, chain[index[0]].getDocIdSet(context));
}
return result;
}
@@ -172,16 +174,16 @@ public class ChainedFilter extends Filte
* @param logic Logical operation
* @return DocIdSet
*/
- private DocIdSet getDocIdSet(IndexReader reader, int[] logic, int[] index)
+ private DocIdSet getDocIdSet(AtomicReaderContext info, int[] logic, int[] index)
throws IOException
{
if (logic.length != chain.length)
throw new IllegalArgumentException("Invalid number of elements in logic array");
- OpenBitSetDISI result = initialResult(reader, logic[0], index);
+ OpenBitSetDISI result = initialResult(info, logic[0], index);
for (; index[0] < chain.length; index[0]++)
{
- doChain(result, logic[index[0]], chain[index[0]].getDocIdSet(reader));
+ doChain(result, logic[index[0]], chain[index[0]].getDocIdSet(info));
}
return result;
}
Modified: lucene/dev/branches/docvalues/lucene/contrib/queries/src/java/org/apache/lucene/search/DuplicateFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/contrib/queries/src/java/org/apache/lucene/search/DuplicateFilter.java?rev=1068809&r1=1068808&r2=1068809&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/contrib/queries/src/java/org/apache/lucene/search/DuplicateFilter.java (original)
+++ lucene/dev/branches/docvalues/lucene/contrib/queries/src/java/org/apache/lucene/search/DuplicateFilter.java Wed Feb 9 09:35:27 2011
@@ -19,6 +19,7 @@ import java.io.IOException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.TermsEnum;
@@ -27,7 +28,8 @@ import org.apache.lucene.util.OpenBitSet
import org.apache.lucene.util.Bits;
public class DuplicateFilter extends Filter
-{
+{ // TODO: make duplicate filter aware of ReaderContext such that we can
+ // filter duplicates across segments
String fieldName;
@@ -70,15 +72,15 @@ public class DuplicateFilter extends Fil
}
@Override
- public DocIdSet getDocIdSet(IndexReader reader) throws IOException
+ public DocIdSet getDocIdSet(AtomicReaderContext context) throws IOException
{
if(processingMode==PM_FAST_INVALIDATION)
{
- return fastBits(reader);
+ return fastBits(context.reader);
}
else
{
- return correctBits(reader);
+ return correctBits(context.reader);
}
}
@@ -96,7 +98,7 @@ public class DuplicateFilter extends Fil
} else {
docs = termsEnum.docs(delDocs, docs);
int doc = docs.nextDoc();
- if (doc != docs.NO_MORE_DOCS) {
+ if (doc != DocsEnum.NO_MORE_DOCS) {
if (keepMode == KM_USE_FIRST_OCCURRENCE) {
bits.set(doc);
} else {
@@ -104,7 +106,7 @@ public class DuplicateFilter extends Fil
while (true) {
lastDoc = doc;
doc = docs.nextDoc();
- if (doc == docs.NO_MORE_DOCS) {
+ if (doc == DocsEnum.NO_MORE_DOCS) {
break;
}
}
@@ -136,7 +138,7 @@ public class DuplicateFilter extends Fil
// unset potential duplicates
docs = termsEnum.docs(delDocs, docs);
int doc = docs.nextDoc();
- if (doc != docs.NO_MORE_DOCS) {
+ if (doc != DocsEnum.NO_MORE_DOCS) {
if (keepMode == KM_USE_FIRST_OCCURRENCE) {
doc = docs.nextDoc();
}
@@ -147,7 +149,7 @@ public class DuplicateFilter extends Fil
lastDoc = doc;
bits.clear(lastDoc);
doc = docs.nextDoc();
- if (doc == docs.NO_MORE_DOCS) {
+ if (doc == DocsEnum.NO_MORE_DOCS) {
break;
}
}
Modified: lucene/dev/branches/docvalues/lucene/contrib/queries/src/java/org/apache/lucene/search/FieldCacheRewriteMethod.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/contrib/queries/src/java/org/apache/lucene/search/FieldCacheRewriteMethod.java?rev=1068809&r1=1068808&r2=1068809&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/contrib/queries/src/java/org/apache/lucene/search/FieldCacheRewriteMethod.java (original)
+++ lucene/dev/branches/docvalues/lucene/contrib/queries/src/java/org/apache/lucene/search/FieldCacheRewriteMethod.java Wed Feb 9 09:35:27 2011
@@ -21,6 +21,7 @@ import java.io.IOException;
import java.util.Comparator;
import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.util.BytesRef;
@@ -108,8 +109,8 @@ public final class FieldCacheRewriteMeth
* results.
*/
@Override
- public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
- final FieldCache.DocTermsIndex fcsi = FieldCache.DEFAULT.getTermsIndex(reader, query.field);
+ public DocIdSet getDocIdSet(AtomicReaderContext context) throws IOException {
+ final FieldCache.DocTermsIndex fcsi = FieldCache.DEFAULT.getTermsIndex(context.reader, query.field);
final OpenBitSet termSet = new OpenBitSet(fcsi.numOrd());
TermsEnum termsEnum = query.getTermsEnum(new Terms() {
@@ -122,7 +123,11 @@ public final class FieldCacheRewriteMeth
public TermsEnum iterator() throws IOException {
return fcsi.getTermsEnum();
}
-
+
+ @Override
+ public long getSumTotalTermFreq() {
+ return -1;
+ }
});
assert termsEnum != null;
@@ -142,7 +147,7 @@ public final class FieldCacheRewriteMeth
return DocIdSet.EMPTY_DOCIDSET;
}
- return new FieldCacheRangeFilter.FieldCacheDocIdSet(reader, true) {
+ return new FieldCacheRangeFilter.FieldCacheDocIdSet(context.reader, true) {
@Override
boolean matchDoc(int doc) throws ArrayIndexOutOfBoundsException {
return termSet.fastGet(fcsi.getOrd(doc));
Modified: lucene/dev/branches/docvalues/lucene/contrib/queries/src/java/org/apache/lucene/search/FuzzyLikeThisQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/contrib/queries/src/java/org/apache/lucene/search/FuzzyLikeThisQuery.java?rev=1068809&r1=1068808&r2=1068809&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/contrib/queries/src/java/org/apache/lucene/search/FuzzyLikeThisQuery.java (original)
+++ lucene/dev/branches/docvalues/lucene/contrib/queries/src/java/org/apache/lucene/search/FuzzyLikeThisQuery.java Wed Feb 9 09:35:27 2011
@@ -292,7 +292,7 @@ public class FuzzyLikeThisQuery extends
{
//optimize where only one selected variant
ScoreTerm st= variants.get(0);
- TermQuery tq = new FuzzyTermQuery(st.term,ignoreTF);
+ Query tq = ignoreTF ? new ConstantScoreQuery(new TermQuery(st.term)) : new TermQuery(st.term, 1);
tq.setBoost(st.score); // set the boost to a mix of IDF and score
bq.add(tq, BooleanClause.Occur.SHOULD);
}
@@ -303,7 +303,8 @@ public class FuzzyLikeThisQuery extends
.hasNext();)
{
ScoreTerm st = iterator2.next();
- TermQuery tq = new FuzzyTermQuery(st.term,ignoreTF); // found a match
+ // found a match
+ Query tq = ignoreTF ? new ConstantScoreQuery(new TermQuery(st.term)) : new TermQuery(st.term, 1);
tq.setBoost(st.score); // set the boost using the ScoreTerm's score
termVariants.add(tq, BooleanClause.Occur.SHOULD); // add to query
}
@@ -348,45 +349,8 @@ public class FuzzyLikeThisQuery extends
return termA.score < termB.score;
}
- }
-
- //overrides basic TermQuery to negate effects of IDF (idf is factored into boost of containing BooleanQuery)
- private static class FuzzyTermQuery extends TermQuery
- {
- boolean ignoreTF;
- public FuzzyTermQuery(Term t, boolean ignoreTF)
- {
- super(t);
- this.ignoreTF=ignoreTF;
- }
- @Override
- public Similarity getSimilarity(IndexSearcher searcher)
- {
- Similarity result = super.getSimilarity(searcher);
- result = new SimilarityDelegator(result) {
-
- @Override
- public float tf(float freq)
- {
- if(ignoreTF)
- {
- return 1; //ignore tf
- }
- return super.tf(freq);
- }
- @Override
- public float idf(int docFreq, int numDocs)
- {
- //IDF is already factored into individual term boosts
- return 1;
- }
- };
- return result;
- }
- }
+ }
-
-
/* (non-Javadoc)
* @see org.apache.lucene.search.Query#toString(java.lang.String)
*/