You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2016/06/10 15:58:49 UTC

lucene-solr:master: LUCENE-7326: don't use postings format by name in this test

Repository: lucene-solr
Updated Branches:
  refs/heads/master 0eacfe87c -> 816fae962


LUCENE-7326: don't use postings format by name in this test


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/816fae96
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/816fae96
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/816fae96

Branch: refs/heads/master
Commit: 816fae9622d9719fd38a5381a7029383e54d2e77
Parents: 0eacfe8
Author: Mike McCandless <mi...@apache.org>
Authored: Fri Jun 10 11:57:14 2016 -0400
Committer: Mike McCandless <mi...@apache.org>
Committed: Fri Jun 10 11:57:41 2016 -0400

----------------------------------------------------------------------
 .../index/BasePostingsFormatTestCase.java       | 283 +++++++++----------
 1 file changed, 138 insertions(+), 145 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/816fae96/lucene/test-framework/src/java/org/apache/lucene/index/BasePostingsFormatTestCase.java
----------------------------------------------------------------------
diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/BasePostingsFormatTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/index/BasePostingsFormatTestCase.java
index 10ed5b1..d6cf1e5 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/index/BasePostingsFormatTestCase.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/index/BasePostingsFormatTestCase.java
@@ -16,13 +16,6 @@
  */
 package org.apache.lucene.index;
 
-import static org.apache.lucene.index.PostingsEnum.ALL;
-import static org.apache.lucene.index.PostingsEnum.FREQS;
-import static org.apache.lucene.index.PostingsEnum.NONE;
-import static org.apache.lucene.index.PostingsEnum.OFFSETS;
-import static org.apache.lucene.index.PostingsEnum.PAYLOADS;
-import static org.apache.lucene.index.PostingsEnum.POSITIONS;
-
 import java.io.IOException;
 import java.nio.file.Path;
 import java.util.Collections;
@@ -38,9 +31,8 @@ import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.Token;
 import org.apache.lucene.codecs.FieldsConsumer;
 import org.apache.lucene.codecs.FieldsProducer;
+import org.apache.lucene.codecs.FilterCodec;
 import org.apache.lucene.codecs.PostingsFormat;
-import org.apache.lucene.codecs.asserting.AssertingCodec;
-import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.FieldType;
@@ -51,13 +43,19 @@ import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.IOContext;
 import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.LineFileDocs;
 import org.apache.lucene.util.RamUsageTester;
 import org.apache.lucene.util.TestUtil;
 import org.junit.AfterClass;
 import org.junit.BeforeClass;
 
+import static org.apache.lucene.index.PostingsEnum.ALL;
+import static org.apache.lucene.index.PostingsEnum.FREQS;
+import static org.apache.lucene.index.PostingsEnum.NONE;
+import static org.apache.lucene.index.PostingsEnum.OFFSETS;
+import static org.apache.lucene.index.PostingsEnum.PAYLOADS;
+import static org.apache.lucene.index.PostingsEnum.POSITIONS;
+
 /**
  * Abstract class to do basic tests for a postings format.
  * NOTE: This test focuses on the postings
@@ -165,8 +163,9 @@ public abstract class BasePostingsFormatTestCase extends BaseIndexFileFormatTest
 
     te.seekExact(fieldAndTerm.term);
     checkReuse(te, PostingsEnum.FREQS, PostingsEnum.ALL, false);
-    if (isPostingsEnumReuseImplemented())
+    if (isPostingsEnumReuseImplemented()) {
       checkReuse(te, PostingsEnum.ALL, PostingsEnum.ALL, true);
+    }
 
     fieldsProducer.close();
     dir.close();
@@ -175,10 +174,11 @@ public abstract class BasePostingsFormatTestCase extends BaseIndexFileFormatTest
   protected static void checkReuse(TermsEnum termsEnum, int firstFlags, int secondFlags, boolean shouldReuse) throws IOException {
     PostingsEnum postings1 = termsEnum.postings(null, firstFlags);
     PostingsEnum postings2 = termsEnum.postings(postings1, secondFlags);
-    if (shouldReuse)
+    if (shouldReuse) {
       assertSame("Expected PostingsEnum " + postings1.getClass().getName() + " to be reused", postings1, postings2);
-    else
+    } else {
       assertNotSame("Expected PostingsEnum " + postings1.getClass().getName() + " to not be reused", postings1, postings2);
+    }
   }
   
   public void testJustEmptyField() throws Exception {
@@ -337,61 +337,104 @@ public abstract class BasePostingsFormatTestCase extends BaseIndexFileFormatTest
     // TODO: would be better to use / delegate to the current
     // Codec returned by getCodec()
 
-    iwc.setCodec(new AssertingCodec() {
+    iwc.setCodec(new FilterCodec(getCodec().getName(), getCodec()) {
         @Override
-        public PostingsFormat getPostingsFormatForField(String field) {
+        public PostingsFormat postingsFormat() {
 
-          PostingsFormat p = getCodec().postingsFormat();
-          if (p instanceof PerFieldPostingsFormat) {
-            p = ((PerFieldPostingsFormat) p).getPostingsFormatForField(field);
-          }
-          final PostingsFormat defaultPostingsFormat = p;
+          final PostingsFormat defaultPostingsFormat = delegate.postingsFormat();
 
           final Thread mainThread = Thread.currentThread();
 
-          if (field.equals("body")) {
-
-            // A PF that counts up some stats and then in
-            // the end we verify the stats match what the
-            // final IndexReader says, just to exercise the
-            // new freedom of iterating the postings more
-            // than once at flush/merge:
-
-            return new PostingsFormat(defaultPostingsFormat.getName()) {
-
-              @Override
-              public FieldsConsumer fieldsConsumer(final SegmentWriteState state) throws IOException {
-
-                final FieldsConsumer fieldsConsumer = defaultPostingsFormat.fieldsConsumer(state);
-
-                return new FieldsConsumer() {
-                  @Override
-                  public void write(Fields fields) throws IOException {
-                    fieldsConsumer.write(fields);
-
-                    boolean isMerge = state.context.context == IOContext.Context.MERGE;
-
-                    // We only use one thread for flushing
-                    // in this test:
-                    assert isMerge || Thread.currentThread() == mainThread;
+          // A PF that counts up some stats and then in
+          // the end we verify the stats match what the
+          // final IndexReader says, just to exercise the
+          // new freedom of iterating the postings more
+          // than once at flush/merge:
+
+          return new PostingsFormat(defaultPostingsFormat.getName()) {
+
+            @Override
+            public FieldsConsumer fieldsConsumer(final SegmentWriteState state) throws IOException {
+
+              final FieldsConsumer fieldsConsumer = defaultPostingsFormat.fieldsConsumer(state);
+
+              return new FieldsConsumer() {
+                @Override
+                public void write(Fields fields) throws IOException {
+                  fieldsConsumer.write(fields);
+
+                  boolean isMerge = state.context.context == IOContext.Context.MERGE;
+
+                  // We only use one thread for flushing
+                  // in this test:
+                  assert isMerge || Thread.currentThread() == mainThread;
+
+                  // We iterate the provided TermsEnum
+                  // twice, so we excercise this new freedom
+                  // with the inverted API; if
+                  // addOnSecondPass is true, we add up
+                  // term stats on the 2nd iteration:
+                  boolean addOnSecondPass = random().nextBoolean();
+
+                  //System.out.println("write isMerge=" + isMerge + " 2ndPass=" + addOnSecondPass);
+
+                  // Gather our own stats:
+                  Terms terms = fields.terms("body");
+                  assert terms != null;
+
+                  TermsEnum termsEnum = terms.iterator();
+                  PostingsEnum docs = null;
+                  while(termsEnum.next() != null) {
+                    BytesRef term = termsEnum.term();
+                    // TODO: also sometimes ask for payloads/offsets?
+                    boolean noPositions = random().nextBoolean();
+                    if (noPositions) {
+                      docs = termsEnum.postings(docs, PostingsEnum.FREQS);
+                    } else {
+                      docs = termsEnum.postings(null, PostingsEnum.POSITIONS);
+                    }
+                    int docFreq = 0;
+                    long totalTermFreq = 0;
+                    while (docs.nextDoc() != PostingsEnum.NO_MORE_DOCS) {
+                      docFreq++;
+                      totalTermFreq += docs.freq();
+                      int limit = TestUtil.nextInt(random(), 1, docs.freq());
+                      if (!noPositions) {
+                        for (int i = 0; i < limit; i++) {
+                          docs.nextPosition();
+                        }
+                      }
+                    }
 
-                    // We iterate the provided TermsEnum
-                    // twice, so we excercise this new freedom
-                    // with the inverted API; if
-                    // addOnSecondPass is true, we add up
-                    // term stats on the 2nd iteration:
-                    boolean addOnSecondPass = random().nextBoolean();
+                    String termString = term.utf8ToString();
 
-                    //System.out.println("write isMerge=" + isMerge + " 2ndPass=" + addOnSecondPass);
+                    // During merge we should only see terms
+                    // we had already seen during a
+                    // previous flush:
+                    assertTrue(isMerge==false || termFreqs.containsKey(termString));
 
-                    // Gather our own stats:
-                    Terms terms = fields.terms("body");
-                    assert terms != null;
+                    if (isMerge == false) {
+                      if (addOnSecondPass == false) {
+                        TermFreqs tf = termFreqs.get(termString);
+                        if (tf == null) {
+                          tf = new TermFreqs();
+                          termFreqs.put(termString, tf);
+                        }
+                        tf.docFreq += docFreq;
+                        tf.totalTermFreq += totalTermFreq;
+                        sumDocFreq.addAndGet(docFreq);
+                        sumTotalTermFreq.addAndGet(totalTermFreq);
+                      } else if (termFreqs.containsKey(termString) == false) {
+                        // Add placeholder (2nd pass will
+                        // set its counts):
+                        termFreqs.put(termString, new TermFreqs());
+                      }
+                    }
+                  }
 
-                    TermsEnum termsEnum = terms.iterator();
-                    PostingsEnum docs = null;
-                    while(termsEnum.next() != null) {
-                      BytesRef term = termsEnum.term();
+                  // Also test seeking the TermsEnum:
+                  for(String term : termFreqs.keySet()) {
+                    if (termsEnum.seekExact(new BytesRef(term))) {
                       // TODO: also sometimes ask for payloads/offsets?
                       boolean noPositions = random().nextBoolean();
                       if (noPositions) {
@@ -399,6 +442,7 @@ public abstract class BasePostingsFormatTestCase extends BaseIndexFileFormatTest
                       } else {
                         docs = termsEnum.postings(null, PostingsEnum.POSITIONS);
                       }
+
                       int docFreq = 0;
                       long totalTermFreq = 0;
                       while (docs.nextDoc() != PostingsEnum.NO_MORE_DOCS) {
@@ -412,96 +456,43 @@ public abstract class BasePostingsFormatTestCase extends BaseIndexFileFormatTest
                         }
                       }
 
-                      String termString = term.utf8ToString();
-
-                      // During merge we should only see terms
-                      // we had already seen during a
-                      // previous flush:
-                      assertTrue(isMerge==false || termFreqs.containsKey(termString));
-
-                      if (isMerge == false) {
-                        if (addOnSecondPass == false) {
-                          TermFreqs tf = termFreqs.get(termString);
-                          if (tf == null) {
-                            tf = new TermFreqs();
-                            termFreqs.put(termString, tf);
-                          }
-                          tf.docFreq += docFreq;
-                          tf.totalTermFreq += totalTermFreq;
-                          sumDocFreq.addAndGet(docFreq);
-                          sumTotalTermFreq.addAndGet(totalTermFreq);
-                        } else if (termFreqs.containsKey(termString) == false) {
-                          // Add placeholder (2nd pass will
-                          // set its counts):
-                          termFreqs.put(termString, new TermFreqs());
-                        }
-                      }
-                    }
-
-                    // Also test seeking the TermsEnum:
-                    for(String term : termFreqs.keySet()) {
-                      if (termsEnum.seekExact(new BytesRef(term))) {
-                        // TODO: also sometimes ask for payloads/offsets?
-                        boolean noPositions = random().nextBoolean();
-                        if (noPositions) {
-                          docs = termsEnum.postings(docs, PostingsEnum.FREQS);
-                        } else {
-                          docs = termsEnum.postings(null, PostingsEnum.POSITIONS);
-                        }
-
-                        int docFreq = 0;
-                        long totalTermFreq = 0;
-                        while (docs.nextDoc() != PostingsEnum.NO_MORE_DOCS) {
-                          docFreq++;
-                          totalTermFreq += docs.freq();
-                          int limit = TestUtil.nextInt(random(), 1, docs.freq());
-                          if (!noPositions) {
-                            for (int i = 0; i < limit; i++) {
-                              docs.nextPosition();
-                            }
-                          }
-                        }
-
-                        if (isMerge == false && addOnSecondPass) {
-                          TermFreqs tf = termFreqs.get(term);
-                          assert tf != null;
-                          tf.docFreq += docFreq;
-                          tf.totalTermFreq += totalTermFreq;
-                          sumDocFreq.addAndGet(docFreq);
-                          sumTotalTermFreq.addAndGet(totalTermFreq);
-                        }
-
-                        //System.out.println("  term=" + term + " docFreq=" + docFreq + " ttDF=" + termToDocFreq.get(term));
-                        assertTrue(docFreq <= termFreqs.get(term).docFreq);
-                        assertTrue(totalTermFreq <= termFreqs.get(term).totalTermFreq);
+                      if (isMerge == false && addOnSecondPass) {
+                        TermFreqs tf = termFreqs.get(term);
+                        assert tf != null;
+                        tf.docFreq += docFreq;
+                        tf.totalTermFreq += totalTermFreq;
+                        sumDocFreq.addAndGet(docFreq);
+                        sumTotalTermFreq.addAndGet(totalTermFreq);
                       }
-                    }
 
-                    // Also test seekCeil
-                    for(int iter=0;iter<10;iter++) {
-                      BytesRef term = new BytesRef(TestUtil.randomRealisticUnicodeString(random()));
-                      SeekStatus status = termsEnum.seekCeil(term);
-                      if (status == SeekStatus.NOT_FOUND) {
-                        assertTrue(term.compareTo(termsEnum.term()) < 0);
-                      }
+                      //System.out.println("  term=" + term + " docFreq=" + docFreq + " ttDF=" + termToDocFreq.get(term));
+                      assertTrue(docFreq <= termFreqs.get(term).docFreq);
+                      assertTrue(totalTermFreq <= termFreqs.get(term).totalTermFreq);
                     }
                   }
 
-                  @Override
-                  public void close() throws IOException {
-                    fieldsConsumer.close();
+                  // Also test seekCeil
+                  for(int iter=0;iter<10;iter++) {
+                    BytesRef term = new BytesRef(TestUtil.randomRealisticUnicodeString(random()));
+                    SeekStatus status = termsEnum.seekCeil(term);
+                    if (status == SeekStatus.NOT_FOUND) {
+                      assertTrue(term.compareTo(termsEnum.term()) < 0);
+                    }
                   }
-                };
-              }
-
-              @Override
-              public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
-                return defaultPostingsFormat.fieldsProducer(state);
-              }
-            };
-          } else {
-            return defaultPostingsFormat;
-          }
+                }
+
+                @Override
+                public void close() throws IOException {
+                  fieldsConsumer.close();
+                }
+              };
+            }
+
+            @Override
+            public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
+              return defaultPostingsFormat.fieldsProducer(state);
+            }
+          };
         }
       });
 
@@ -512,8 +503,10 @@ public abstract class BasePostingsFormatTestCase extends BaseIndexFileFormatTest
     int bytesIndexed = 0;
     while (bytesIndexed < bytesToIndex) {
       Document doc = docs.nextDoc();
-      w.addDocument(doc);
-      bytesIndexed += RamUsageTester.sizeOf(doc);
+      Document justBodyDoc = new Document();
+      justBodyDoc.add(doc.getField("body"));
+      w.addDocument(justBodyDoc);
+      bytesIndexed += RamUsageTester.sizeOf(justBodyDoc);
     }
 
     IndexReader r = w.getReader();