You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2016/06/10 15:58:49 UTC
lucene-solr:master: LUCENE-7326: don't use postings format by name in
this test
Repository: lucene-solr
Updated Branches:
refs/heads/master 0eacfe87c -> 816fae962
LUCENE-7326: don't use postings format by name in this test
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/816fae96
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/816fae96
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/816fae96
Branch: refs/heads/master
Commit: 816fae9622d9719fd38a5381a7029383e54d2e77
Parents: 0eacfe8
Author: Mike McCandless <mi...@apache.org>
Authored: Fri Jun 10 11:57:14 2016 -0400
Committer: Mike McCandless <mi...@apache.org>
Committed: Fri Jun 10 11:57:41 2016 -0400
----------------------------------------------------------------------
.../index/BasePostingsFormatTestCase.java | 283 +++++++++----------
1 file changed, 138 insertions(+), 145 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/816fae96/lucene/test-framework/src/java/org/apache/lucene/index/BasePostingsFormatTestCase.java
----------------------------------------------------------------------
diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/BasePostingsFormatTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/index/BasePostingsFormatTestCase.java
index 10ed5b1..d6cf1e5 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/index/BasePostingsFormatTestCase.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/index/BasePostingsFormatTestCase.java
@@ -16,13 +16,6 @@
*/
package org.apache.lucene.index;
-import static org.apache.lucene.index.PostingsEnum.ALL;
-import static org.apache.lucene.index.PostingsEnum.FREQS;
-import static org.apache.lucene.index.PostingsEnum.NONE;
-import static org.apache.lucene.index.PostingsEnum.OFFSETS;
-import static org.apache.lucene.index.PostingsEnum.PAYLOADS;
-import static org.apache.lucene.index.PostingsEnum.POSITIONS;
-
import java.io.IOException;
import java.nio.file.Path;
import java.util.Collections;
@@ -38,9 +31,8 @@ import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.codecs.FieldsConsumer;
import org.apache.lucene.codecs.FieldsProducer;
+import org.apache.lucene.codecs.FilterCodec;
import org.apache.lucene.codecs.PostingsFormat;
-import org.apache.lucene.codecs.asserting.AssertingCodec;
-import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
@@ -51,13 +43,19 @@ import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LineFileDocs;
import org.apache.lucene.util.RamUsageTester;
import org.apache.lucene.util.TestUtil;
import org.junit.AfterClass;
import org.junit.BeforeClass;
+import static org.apache.lucene.index.PostingsEnum.ALL;
+import static org.apache.lucene.index.PostingsEnum.FREQS;
+import static org.apache.lucene.index.PostingsEnum.NONE;
+import static org.apache.lucene.index.PostingsEnum.OFFSETS;
+import static org.apache.lucene.index.PostingsEnum.PAYLOADS;
+import static org.apache.lucene.index.PostingsEnum.POSITIONS;
+
/**
* Abstract class to do basic tests for a postings format.
* NOTE: This test focuses on the postings
@@ -165,8 +163,9 @@ public abstract class BasePostingsFormatTestCase extends BaseIndexFileFormatTest
te.seekExact(fieldAndTerm.term);
checkReuse(te, PostingsEnum.FREQS, PostingsEnum.ALL, false);
- if (isPostingsEnumReuseImplemented())
+ if (isPostingsEnumReuseImplemented()) {
checkReuse(te, PostingsEnum.ALL, PostingsEnum.ALL, true);
+ }
fieldsProducer.close();
dir.close();
@@ -175,10 +174,11 @@ public abstract class BasePostingsFormatTestCase extends BaseIndexFileFormatTest
protected static void checkReuse(TermsEnum termsEnum, int firstFlags, int secondFlags, boolean shouldReuse) throws IOException {
PostingsEnum postings1 = termsEnum.postings(null, firstFlags);
PostingsEnum postings2 = termsEnum.postings(postings1, secondFlags);
- if (shouldReuse)
+ if (shouldReuse) {
assertSame("Expected PostingsEnum " + postings1.getClass().getName() + " to be reused", postings1, postings2);
- else
+ } else {
assertNotSame("Expected PostingsEnum " + postings1.getClass().getName() + " to not be reused", postings1, postings2);
+ }
}
public void testJustEmptyField() throws Exception {
@@ -337,61 +337,104 @@ public abstract class BasePostingsFormatTestCase extends BaseIndexFileFormatTest
// TODO: would be better to use / delegate to the current
// Codec returned by getCodec()
- iwc.setCodec(new AssertingCodec() {
+ iwc.setCodec(new FilterCodec(getCodec().getName(), getCodec()) {
@Override
- public PostingsFormat getPostingsFormatForField(String field) {
+ public PostingsFormat postingsFormat() {
- PostingsFormat p = getCodec().postingsFormat();
- if (p instanceof PerFieldPostingsFormat) {
- p = ((PerFieldPostingsFormat) p).getPostingsFormatForField(field);
- }
- final PostingsFormat defaultPostingsFormat = p;
+ final PostingsFormat defaultPostingsFormat = delegate.postingsFormat();
final Thread mainThread = Thread.currentThread();
- if (field.equals("body")) {
-
- // A PF that counts up some stats and then in
- // the end we verify the stats match what the
- // final IndexReader says, just to exercise the
- // new freedom of iterating the postings more
- // than once at flush/merge:
-
- return new PostingsFormat(defaultPostingsFormat.getName()) {
-
- @Override
- public FieldsConsumer fieldsConsumer(final SegmentWriteState state) throws IOException {
-
- final FieldsConsumer fieldsConsumer = defaultPostingsFormat.fieldsConsumer(state);
-
- return new FieldsConsumer() {
- @Override
- public void write(Fields fields) throws IOException {
- fieldsConsumer.write(fields);
-
- boolean isMerge = state.context.context == IOContext.Context.MERGE;
-
- // We only use one thread for flushing
- // in this test:
- assert isMerge || Thread.currentThread() == mainThread;
+ // A PF that counts up some stats and then in
+ // the end we verify the stats match what the
+ // final IndexReader says, just to exercise the
+ // new freedom of iterating the postings more
+ // than once at flush/merge:
+
+ return new PostingsFormat(defaultPostingsFormat.getName()) {
+
+ @Override
+ public FieldsConsumer fieldsConsumer(final SegmentWriteState state) throws IOException {
+
+ final FieldsConsumer fieldsConsumer = defaultPostingsFormat.fieldsConsumer(state);
+
+ return new FieldsConsumer() {
+ @Override
+ public void write(Fields fields) throws IOException {
+ fieldsConsumer.write(fields);
+
+ boolean isMerge = state.context.context == IOContext.Context.MERGE;
+
+ // We only use one thread for flushing
+ // in this test:
+ assert isMerge || Thread.currentThread() == mainThread;
+
+ // We iterate the provided TermsEnum
+ // twice, so we excercise this new freedom
+ // with the inverted API; if
+ // addOnSecondPass is true, we add up
+ // term stats on the 2nd iteration:
+ boolean addOnSecondPass = random().nextBoolean();
+
+ //System.out.println("write isMerge=" + isMerge + " 2ndPass=" + addOnSecondPass);
+
+ // Gather our own stats:
+ Terms terms = fields.terms("body");
+ assert terms != null;
+
+ TermsEnum termsEnum = terms.iterator();
+ PostingsEnum docs = null;
+ while(termsEnum.next() != null) {
+ BytesRef term = termsEnum.term();
+ // TODO: also sometimes ask for payloads/offsets?
+ boolean noPositions = random().nextBoolean();
+ if (noPositions) {
+ docs = termsEnum.postings(docs, PostingsEnum.FREQS);
+ } else {
+ docs = termsEnum.postings(null, PostingsEnum.POSITIONS);
+ }
+ int docFreq = 0;
+ long totalTermFreq = 0;
+ while (docs.nextDoc() != PostingsEnum.NO_MORE_DOCS) {
+ docFreq++;
+ totalTermFreq += docs.freq();
+ int limit = TestUtil.nextInt(random(), 1, docs.freq());
+ if (!noPositions) {
+ for (int i = 0; i < limit; i++) {
+ docs.nextPosition();
+ }
+ }
+ }
- // We iterate the provided TermsEnum
- // twice, so we excercise this new freedom
- // with the inverted API; if
- // addOnSecondPass is true, we add up
- // term stats on the 2nd iteration:
- boolean addOnSecondPass = random().nextBoolean();
+ String termString = term.utf8ToString();
- //System.out.println("write isMerge=" + isMerge + " 2ndPass=" + addOnSecondPass);
+ // During merge we should only see terms
+ // we had already seen during a
+ // previous flush:
+ assertTrue(isMerge==false || termFreqs.containsKey(termString));
- // Gather our own stats:
- Terms terms = fields.terms("body");
- assert terms != null;
+ if (isMerge == false) {
+ if (addOnSecondPass == false) {
+ TermFreqs tf = termFreqs.get(termString);
+ if (tf == null) {
+ tf = new TermFreqs();
+ termFreqs.put(termString, tf);
+ }
+ tf.docFreq += docFreq;
+ tf.totalTermFreq += totalTermFreq;
+ sumDocFreq.addAndGet(docFreq);
+ sumTotalTermFreq.addAndGet(totalTermFreq);
+ } else if (termFreqs.containsKey(termString) == false) {
+ // Add placeholder (2nd pass will
+ // set its counts):
+ termFreqs.put(termString, new TermFreqs());
+ }
+ }
+ }
- TermsEnum termsEnum = terms.iterator();
- PostingsEnum docs = null;
- while(termsEnum.next() != null) {
- BytesRef term = termsEnum.term();
+ // Also test seeking the TermsEnum:
+ for(String term : termFreqs.keySet()) {
+ if (termsEnum.seekExact(new BytesRef(term))) {
// TODO: also sometimes ask for payloads/offsets?
boolean noPositions = random().nextBoolean();
if (noPositions) {
@@ -399,6 +442,7 @@ public abstract class BasePostingsFormatTestCase extends BaseIndexFileFormatTest
} else {
docs = termsEnum.postings(null, PostingsEnum.POSITIONS);
}
+
int docFreq = 0;
long totalTermFreq = 0;
while (docs.nextDoc() != PostingsEnum.NO_MORE_DOCS) {
@@ -412,96 +456,43 @@ public abstract class BasePostingsFormatTestCase extends BaseIndexFileFormatTest
}
}
- String termString = term.utf8ToString();
-
- // During merge we should only see terms
- // we had already seen during a
- // previous flush:
- assertTrue(isMerge==false || termFreqs.containsKey(termString));
-
- if (isMerge == false) {
- if (addOnSecondPass == false) {
- TermFreqs tf = termFreqs.get(termString);
- if (tf == null) {
- tf = new TermFreqs();
- termFreqs.put(termString, tf);
- }
- tf.docFreq += docFreq;
- tf.totalTermFreq += totalTermFreq;
- sumDocFreq.addAndGet(docFreq);
- sumTotalTermFreq.addAndGet(totalTermFreq);
- } else if (termFreqs.containsKey(termString) == false) {
- // Add placeholder (2nd pass will
- // set its counts):
- termFreqs.put(termString, new TermFreqs());
- }
- }
- }
-
- // Also test seeking the TermsEnum:
- for(String term : termFreqs.keySet()) {
- if (termsEnum.seekExact(new BytesRef(term))) {
- // TODO: also sometimes ask for payloads/offsets?
- boolean noPositions = random().nextBoolean();
- if (noPositions) {
- docs = termsEnum.postings(docs, PostingsEnum.FREQS);
- } else {
- docs = termsEnum.postings(null, PostingsEnum.POSITIONS);
- }
-
- int docFreq = 0;
- long totalTermFreq = 0;
- while (docs.nextDoc() != PostingsEnum.NO_MORE_DOCS) {
- docFreq++;
- totalTermFreq += docs.freq();
- int limit = TestUtil.nextInt(random(), 1, docs.freq());
- if (!noPositions) {
- for (int i = 0; i < limit; i++) {
- docs.nextPosition();
- }
- }
- }
-
- if (isMerge == false && addOnSecondPass) {
- TermFreqs tf = termFreqs.get(term);
- assert tf != null;
- tf.docFreq += docFreq;
- tf.totalTermFreq += totalTermFreq;
- sumDocFreq.addAndGet(docFreq);
- sumTotalTermFreq.addAndGet(totalTermFreq);
- }
-
- //System.out.println(" term=" + term + " docFreq=" + docFreq + " ttDF=" + termToDocFreq.get(term));
- assertTrue(docFreq <= termFreqs.get(term).docFreq);
- assertTrue(totalTermFreq <= termFreqs.get(term).totalTermFreq);
+ if (isMerge == false && addOnSecondPass) {
+ TermFreqs tf = termFreqs.get(term);
+ assert tf != null;
+ tf.docFreq += docFreq;
+ tf.totalTermFreq += totalTermFreq;
+ sumDocFreq.addAndGet(docFreq);
+ sumTotalTermFreq.addAndGet(totalTermFreq);
}
- }
- // Also test seekCeil
- for(int iter=0;iter<10;iter++) {
- BytesRef term = new BytesRef(TestUtil.randomRealisticUnicodeString(random()));
- SeekStatus status = termsEnum.seekCeil(term);
- if (status == SeekStatus.NOT_FOUND) {
- assertTrue(term.compareTo(termsEnum.term()) < 0);
- }
+ //System.out.println(" term=" + term + " docFreq=" + docFreq + " ttDF=" + termToDocFreq.get(term));
+ assertTrue(docFreq <= termFreqs.get(term).docFreq);
+ assertTrue(totalTermFreq <= termFreqs.get(term).totalTermFreq);
}
}
- @Override
- public void close() throws IOException {
- fieldsConsumer.close();
+ // Also test seekCeil
+ for(int iter=0;iter<10;iter++) {
+ BytesRef term = new BytesRef(TestUtil.randomRealisticUnicodeString(random()));
+ SeekStatus status = termsEnum.seekCeil(term);
+ if (status == SeekStatus.NOT_FOUND) {
+ assertTrue(term.compareTo(termsEnum.term()) < 0);
+ }
}
- };
- }
-
- @Override
- public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
- return defaultPostingsFormat.fieldsProducer(state);
- }
- };
- } else {
- return defaultPostingsFormat;
- }
+ }
+
+ @Override
+ public void close() throws IOException {
+ fieldsConsumer.close();
+ }
+ };
+ }
+
+ @Override
+ public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
+ return defaultPostingsFormat.fieldsProducer(state);
+ }
+ };
}
});
@@ -512,8 +503,10 @@ public abstract class BasePostingsFormatTestCase extends BaseIndexFileFormatTest
int bytesIndexed = 0;
while (bytesIndexed < bytesToIndex) {
Document doc = docs.nextDoc();
- w.addDocument(doc);
- bytesIndexed += RamUsageTester.sizeOf(doc);
+ Document justBodyDoc = new Document();
+ justBodyDoc.add(doc.getField("body"));
+ w.addDocument(justBodyDoc);
+ bytesIndexed += RamUsageTester.sizeOf(justBodyDoc);
}
IndexReader r = w.getReader();