You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2015/04/02 17:05:49 UTC
svn commit: r1670918 [5/5] - in /lucene/dev/trunk/lucene: ./
codecs/src/java/org/apache/lucene/codecs/autoprefix/
codecs/src/resources/META-INF/services/
codecs/src/test/org/apache/lucene/codecs/autoprefix/
core/src/java/org/apache/lucene/codecs/ core/...
Added: lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/index/RandomPostingsTester.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/index/RandomPostingsTester.java?rev=1670918&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/index/RandomPostingsTester.java (added)
+++ lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/index/RandomPostingsTester.java Thu Apr 2 15:05:48 2015
@@ -0,0 +1,1318 @@
+package org.apache.lucene.index;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.nio.file.Path;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.EnumSet;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.NoSuchElementException;
+import java.util.Random;
+import java.util.Set;
+import java.util.SortedMap;
+import java.util.TreeMap;
+
+import org.apache.lucene.codecs.Codec;
+import org.apache.lucene.codecs.FieldsConsumer;
+import org.apache.lucene.codecs.FieldsProducer;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.FlushInfo;
+import org.apache.lucene.store.IOContext;
+import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.FixedBitSet;
+import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.StringHelper;
+import org.apache.lucene.util.TestUtil;
+import org.apache.lucene.util.UnicodeUtil;
+import org.apache.lucene.util.Version;
+import org.apache.lucene.util.automaton.Automaton;
+import org.apache.lucene.util.automaton.AutomatonTestUtil.RandomAcceptedStrings;
+import org.apache.lucene.util.automaton.AutomatonTestUtil;
+import org.apache.lucene.util.automaton.CompiledAutomaton;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+
+
+/** Helper class extracted from BasePostingsFormatTestCase to exercise a postings format. */
+public class RandomPostingsTester {
+
+ /** Which features to test. */
+ public enum Option {
+ // Sometimes use .advance():
+ SKIPPING,
+
+ // Sometimes reuse the PostingsEnum across terms:
+ REUSE_ENUMS,
+
+ // Sometimes pass non-null live docs:
+ LIVE_DOCS,
+
+ // Sometimes seek to term using previously saved TermState:
+ TERM_STATE,
+
+ // Sometimes don't fully consume docs from the enum
+ PARTIAL_DOC_CONSUME,
+
+ // Sometimes don't fully consume positions at each doc
+ PARTIAL_POS_CONSUME,
+
+ // Sometimes check payloads
+ PAYLOADS,
+
+ // Test w/ multiple threads
+ THREADS
+ };
+
+ private long totalPostings;
+ private long totalPayloadBytes;
+
+ // Holds all postings:
+ private Map<String,SortedMap<BytesRef,SeedAndOrd>> fields;
+
+ private FieldInfos fieldInfos;
+
+ private FixedBitSet globalLiveDocs;
+
+ List<FieldAndTerm> allTerms;
+ private int maxDoc;
+
+ final Random random;
+
+ public RandomPostingsTester(Random random) throws IOException {
+ fields = new TreeMap<>();
+
+ this.random = random;
+
+ final int numFields = TestUtil.nextInt(random, 1, 5);
+ if (LuceneTestCase.VERBOSE) {
+ System.out.println("TEST: " + numFields + " fields");
+ }
+ maxDoc = 0;
+
+ FieldInfo[] fieldInfoArray = new FieldInfo[numFields];
+ int fieldUpto = 0;
+ while (fieldUpto < numFields) {
+ String field = TestUtil.randomSimpleString(random);
+ if (fields.containsKey(field)) {
+ continue;
+ }
+
+ fieldInfoArray[fieldUpto] = new FieldInfo(field, fieldUpto, false, false, true,
+ IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS,
+ DocValuesType.NONE, -1, new HashMap<>());
+ fieldUpto++;
+
+ SortedMap<BytesRef,SeedAndOrd> postings = new TreeMap<>();
+ fields.put(field, postings);
+ Set<String> seenTerms = new HashSet<>();
+
+ int numTerms;
+ if (random.nextInt(10) == 7) {
+ numTerms = LuceneTestCase.atLeast(random, 50);
+ } else {
+ numTerms = TestUtil.nextInt(random, 2, 20);
+ }
+
+ while (postings.size() < numTerms) {
+ int termUpto = postings.size();
+ // Cannot contain surrogates else default Java string sort order (by UTF16 code unit) is different from Lucene:
+ String term = TestUtil.randomSimpleString(random);
+ if (seenTerms.contains(term)) {
+ continue;
+ }
+ seenTerms.add(term);
+
+ if (LuceneTestCase.TEST_NIGHTLY && termUpto == 0 && fieldUpto == 1) {
+ // Make 1 big term:
+ term = "big_" + term;
+ } else if (termUpto == 1 && fieldUpto == 1) {
+ // Make 1 medium term:
+ term = "medium_" + term;
+ } else if (random.nextBoolean()) {
+ // Low freq term:
+ term = "low_" + term;
+ } else {
+ // Very low freq term (don't multiply by RANDOM_MULTIPLIER):
+ term = "verylow_" + term;
+ }
+
+ long termSeed = random.nextLong();
+ postings.put(new BytesRef(term), new SeedAndOrd(termSeed));
+
+ // NOTE: sort of silly: we enum all the docs just to
+ // get the maxDoc
+ PostingsEnum postingsEnum = getSeedPostings(term, termSeed, false, globalLiveDocs, IndexOptions.DOCS, true);
+ int doc;
+ int lastDoc = 0;
+ while((doc = postingsEnum.nextDoc()) != PostingsEnum.NO_MORE_DOCS) {
+ lastDoc = doc;
+ }
+ maxDoc = Math.max(lastDoc, maxDoc);
+ }
+
+ // assign ords
+ long ord = 0;
+ for(SeedAndOrd ent : postings.values()) {
+ ent.ord = ord++;
+ }
+ }
+
+ fieldInfos = new FieldInfos(fieldInfoArray);
+
+ // It's the count, not the last docID:
+ maxDoc++;
+
+ globalLiveDocs = new FixedBitSet(maxDoc);
+ double liveRatio = random.nextDouble();
+ for(int i=0;i<maxDoc;i++) {
+ if (random.nextDouble() <= liveRatio) {
+ globalLiveDocs.set(i);
+ }
+ }
+
+ allTerms = new ArrayList<>();
+ for(Map.Entry<String,SortedMap<BytesRef,SeedAndOrd>> fieldEnt : fields.entrySet()) {
+ String field = fieldEnt.getKey();
+ long ord = 0;
+ for(Map.Entry<BytesRef,SeedAndOrd> termEnt : fieldEnt.getValue().entrySet()) {
+ allTerms.add(new FieldAndTerm(field, termEnt.getKey(), ord++));
+ }
+ }
+
+ if (LuceneTestCase.VERBOSE) {
+ System.out.println("TEST: done init postings; " + allTerms.size() + " total terms, across " + fieldInfos.size() + " fields");
+ }
+ }
+
+ public static SeedPostings getSeedPostings(String term, long seed, boolean withLiveDocs, Bits globalLiveDocs, IndexOptions options, boolean allowPayloads) {
+ int minDocFreq, maxDocFreq;
+ if (term.startsWith("big_")) {
+ minDocFreq = LuceneTestCase.RANDOM_MULTIPLIER * 50000;
+ maxDocFreq = LuceneTestCase.RANDOM_MULTIPLIER * 70000;
+ } else if (term.startsWith("medium_")) {
+ minDocFreq = LuceneTestCase.RANDOM_MULTIPLIER * 3000;
+ maxDocFreq = LuceneTestCase.RANDOM_MULTIPLIER * 6000;
+ } else if (term.startsWith("low_")) {
+ minDocFreq = LuceneTestCase.RANDOM_MULTIPLIER;
+ maxDocFreq = LuceneTestCase.RANDOM_MULTIPLIER * 40;
+ } else {
+ minDocFreq = 1;
+ maxDocFreq = 3;
+ }
+
+ return new SeedPostings(seed, minDocFreq, maxDocFreq, withLiveDocs ? globalLiveDocs : null, options, allowPayloads);
+ }
+
+ /** Given the same random seed this always enumerates the
+ * same random postings */
+ public static class SeedPostings extends PostingsEnum {
+ // Used only to generate docIDs; this way if you pull w/
+ // or w/o positions you get the same docID sequence:
+ private final Random docRandom;
+ private final Random random;
+ public int docFreq;
+ private final int maxDocSpacing;
+ private final int payloadSize;
+ private final boolean fixedPayloads;
+ private final Bits liveDocs;
+ private final BytesRef payload;
+ private final IndexOptions options;
+ private final boolean doPositions;
+ private final boolean allowPayloads;
+
+ private int docID = -1;
+ private int freq;
+ public int upto;
+
+ private int pos;
+ private int offset;
+ private int startOffset;
+ private int endOffset;
+ private int posSpacing;
+ private int posUpto;
+
+ public SeedPostings(long seed, int minDocFreq, int maxDocFreq, Bits liveDocs, IndexOptions options, boolean allowPayloads) {
+ random = new Random(seed);
+ docRandom = new Random(random.nextLong());
+ docFreq = TestUtil.nextInt(random, minDocFreq, maxDocFreq);
+ this.liveDocs = liveDocs;
+ this.allowPayloads = allowPayloads;
+
+ // TODO: more realistic to inversely tie this to numDocs:
+ maxDocSpacing = TestUtil.nextInt(random, 1, 100);
+
+ if (random.nextInt(10) == 7) {
+ // 10% of the time create big payloads:
+ payloadSize = 1 + random.nextInt(3);
+ } else {
+ payloadSize = 1 + random.nextInt(1);
+ }
+
+ fixedPayloads = random.nextBoolean();
+ byte[] payloadBytes = new byte[payloadSize];
+ payload = new BytesRef(payloadBytes);
+ this.options = options;
+ doPositions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS.compareTo(options) <= 0;
+ }
+
+ @Override
+ public int nextDoc() {
+ while(true) {
+ _nextDoc();
+ if (liveDocs == null || docID == NO_MORE_DOCS || liveDocs.get(docID)) {
+ return docID;
+ }
+ }
+ }
+
+ private int _nextDoc() {
+ if (docID == -1) {
+ docID = 0;
+ }
+ // Must consume random:
+ while(posUpto < freq) {
+ nextPosition();
+ }
+
+ if (upto < docFreq) {
+ if (upto == 0 && docRandom.nextBoolean()) {
+ // Sometimes index docID = 0
+ } else if (maxDocSpacing == 1) {
+ docID++;
+ } else {
+ // TODO: sometimes have a biggish gap here!
+ docID += TestUtil.nextInt(docRandom, 1, maxDocSpacing);
+ }
+
+ if (random.nextInt(200) == 17) {
+ freq = TestUtil.nextInt(random, 1, 1000);
+ } else if (random.nextInt(10) == 17) {
+ freq = TestUtil.nextInt(random, 1, 20);
+ } else {
+ freq = TestUtil.nextInt(random, 1, 4);
+ }
+
+ pos = 0;
+ offset = 0;
+ posUpto = 0;
+ posSpacing = TestUtil.nextInt(random, 1, 100);
+
+ upto++;
+ return docID;
+ } else {
+ return docID = NO_MORE_DOCS;
+ }
+ }
+
+ @Override
+ public int docID() {
+ return docID;
+ }
+
+ @Override
+ public int freq() {
+ return freq;
+ }
+
+ @Override
+ public int nextPosition() {
+ if (!doPositions) {
+ posUpto = freq;
+ return -1;
+ }
+ assert posUpto < freq;
+
+ if (posUpto == 0 && random.nextBoolean()) {
+ // Sometimes index pos = 0
+ } else if (posSpacing == 1) {
+ pos++;
+ } else {
+ pos += TestUtil.nextInt(random, 1, posSpacing);
+ }
+
+ if (payloadSize != 0) {
+ if (fixedPayloads) {
+ payload.length = payloadSize;
+ random.nextBytes(payload.bytes);
+ } else {
+ int thisPayloadSize = random.nextInt(payloadSize);
+ if (thisPayloadSize != 0) {
+ payload.length = payloadSize;
+ random.nextBytes(payload.bytes);
+ } else {
+ payload.length = 0;
+ }
+ }
+ } else {
+ payload.length = 0;
+ }
+ if (!allowPayloads) {
+ payload.length = 0;
+ }
+
+ startOffset = offset + random.nextInt(5);
+ endOffset = startOffset + random.nextInt(10);
+ offset = endOffset;
+
+ posUpto++;
+ return pos;
+ }
+
+ @Override
+ public int startOffset() {
+ return startOffset;
+ }
+
+ @Override
+ public int endOffset() {
+ return endOffset;
+ }
+
+ @Override
+ public BytesRef getPayload() {
+ return payload.length == 0 ? null : payload;
+ }
+
+ @Override
+ public int advance(int target) throws IOException {
+ return slowAdvance(target);
+ }
+
+ @Override
+ public long cost() {
+ return docFreq;
+ }
+ }
+
+ /** Holds one field, term and ord. */
+ public static class FieldAndTerm {
+ final String field;
+ final BytesRef term;
+ final long ord;
+
+ public FieldAndTerm(String field, BytesRef term, long ord) {
+ this.field = field;
+ this.term = BytesRef.deepCopyOf(term);
+ this.ord = ord;
+ }
+ }
+
+ private static class SeedAndOrd {
+ final long seed;
+ long ord;
+
+ public SeedAndOrd(long seed) {
+ this.seed = seed;
+ }
+ }
+
+ private static class SeedFields extends Fields {
+ final Map<String,SortedMap<BytesRef,SeedAndOrd>> fields;
+ final FieldInfos fieldInfos;
+ final IndexOptions maxAllowed;
+ final boolean allowPayloads;
+
+ public SeedFields(Map<String,SortedMap<BytesRef,SeedAndOrd>> fields, FieldInfos fieldInfos, IndexOptions maxAllowed, boolean allowPayloads) {
+ this.fields = fields;
+ this.fieldInfos = fieldInfos;
+ this.maxAllowed = maxAllowed;
+ this.allowPayloads = allowPayloads;
+ }
+
+ @Override
+ public Iterator<String> iterator() {
+ return fields.keySet().iterator();
+ }
+
+ @Override
+ public Terms terms(String field) {
+ SortedMap<BytesRef,SeedAndOrd> terms = fields.get(field);
+ if (terms == null) {
+ return null;
+ } else {
+ return new SeedTerms(terms, fieldInfos.fieldInfo(field), maxAllowed, allowPayloads);
+ }
+ }
+
+ @Override
+ public int size() {
+ return fields.size();
+ }
+ }
+
+ private static class SeedTerms extends Terms {
+ final SortedMap<BytesRef,SeedAndOrd> terms;
+ final FieldInfo fieldInfo;
+ final IndexOptions maxAllowed;
+ final boolean allowPayloads;
+
+ public SeedTerms(SortedMap<BytesRef,SeedAndOrd> terms, FieldInfo fieldInfo, IndexOptions maxAllowed, boolean allowPayloads) {
+ this.terms = terms;
+ this.fieldInfo = fieldInfo;
+ this.maxAllowed = maxAllowed;
+ this.allowPayloads = allowPayloads;
+ }
+
+ @Override
+ public TermsEnum iterator(TermsEnum reuse) {
+ SeedTermsEnum termsEnum;
+ if (reuse != null && reuse instanceof SeedTermsEnum) {
+ termsEnum = (SeedTermsEnum) reuse;
+ if (termsEnum.terms != terms) {
+ termsEnum = new SeedTermsEnum(terms, maxAllowed, allowPayloads);
+ }
+ } else {
+ termsEnum = new SeedTermsEnum(terms, maxAllowed, allowPayloads);
+ }
+ termsEnum.reset();
+
+ return termsEnum;
+ }
+
+ @Override
+ public long size() {
+ return terms.size();
+ }
+
+ @Override
+ public long getSumTotalTermFreq() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public long getSumDocFreq() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public int getDocCount() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public boolean hasFreqs() {
+ return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
+ }
+
+ @Override
+ public boolean hasOffsets() {
+ return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
+ }
+
+ @Override
+ public boolean hasPositions() {
+ return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
+ }
+
+ @Override
+ public boolean hasPayloads() {
+ return allowPayloads && fieldInfo.hasPayloads();
+ }
+ }
+
+ private static class SeedTermsEnum extends TermsEnum {
+ final SortedMap<BytesRef,SeedAndOrd> terms;
+ final IndexOptions maxAllowed;
+ final boolean allowPayloads;
+
+ private Iterator<Map.Entry<BytesRef,SeedAndOrd>> iterator;
+
+ private Map.Entry<BytesRef,SeedAndOrd> current;
+
+ public SeedTermsEnum(SortedMap<BytesRef,SeedAndOrd> terms, IndexOptions maxAllowed, boolean allowPayloads) {
+ this.terms = terms;
+ this.maxAllowed = maxAllowed;
+ this.allowPayloads = allowPayloads;
+ }
+
+ void reset() {
+ iterator = terms.entrySet().iterator();
+ }
+
+ @Override
+ public SeekStatus seekCeil(BytesRef text) {
+ SortedMap<BytesRef,SeedAndOrd> tailMap = terms.tailMap(text);
+ if (tailMap.isEmpty()) {
+ return SeekStatus.END;
+ } else {
+ iterator = tailMap.entrySet().iterator();
+ current = iterator.next();
+ if (tailMap.firstKey().equals(text)) {
+ return SeekStatus.FOUND;
+ } else {
+ return SeekStatus.NOT_FOUND;
+ }
+ }
+ }
+
+ @Override
+ public BytesRef next() {
+ if (iterator.hasNext()) {
+ current = iterator.next();
+ return term();
+ } else {
+ return null;
+ }
+ }
+
+ @Override
+ public void seekExact(long ord) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public BytesRef term() {
+ return current.getKey();
+ }
+
+ @Override
+ public long ord() {
+ return current.getValue().ord;
+ }
+
+ @Override
+ public int docFreq() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public long totalTermFreq() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public final PostingsEnum postings(Bits liveDocs, PostingsEnum reuse, int flags) throws IOException {
+ if (liveDocs != null) {
+ throw new IllegalArgumentException("liveDocs must be null");
+ }
+ if (PostingsEnum.featureRequested(flags, PostingsEnum.POSITIONS)) {
+ if (maxAllowed.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) {
+ return null;
+ }
+ if (PostingsEnum.featureRequested(flags, PostingsEnum.OFFSETS) && maxAllowed.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) < 0) {
+ return null;
+ }
+ if (PostingsEnum.featureRequested(flags, PostingsEnum.PAYLOADS) && allowPayloads == false) {
+ return null;
+ }
+ }
+ if (PostingsEnum.featureRequested(flags, PostingsEnum.FREQS) && maxAllowed.compareTo(IndexOptions.DOCS_AND_FREQS) < 0) {
+ return null;
+ }
+ return getSeedPostings(current.getKey().utf8ToString(), current.getValue().seed, false, null, maxAllowed, allowPayloads);
+ }
+ }
+
+ private static class ThreadState {
+ // Only used with REUSE option:
+ public PostingsEnum reusePostingsEnum;
+ }
+
+ private FieldInfos currentFieldInfos;
+
+ // maxAllowed = the "highest" we can index, but we will still
+ // randomly index at lower IndexOption
+ public FieldsProducer buildIndex(Codec codec, Directory dir, IndexOptions maxAllowed, boolean allowPayloads, boolean alwaysTestMax) throws IOException {
+ SegmentInfo segmentInfo = new SegmentInfo(dir, Version.LATEST, "_0", maxDoc, false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>());
+
+ int maxIndexOption = Arrays.asList(IndexOptions.values()).indexOf(maxAllowed);
+ if (LuceneTestCase.VERBOSE) {
+ System.out.println("\nTEST: now build index");
+ }
+
+ // TODO use allowPayloads
+
+ FieldInfo[] newFieldInfoArray = new FieldInfo[fields.size()];
+ for(int fieldUpto=0;fieldUpto<fields.size();fieldUpto++) {
+ FieldInfo oldFieldInfo = fieldInfos.fieldInfo(fieldUpto);
+
+ // Randomly picked the IndexOptions to index this
+ // field with:
+ IndexOptions indexOptions = IndexOptions.values()[alwaysTestMax ? maxIndexOption : TestUtil.nextInt(random, 1, maxIndexOption)];
+ boolean doPayloads = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0 && allowPayloads;
+
+ newFieldInfoArray[fieldUpto] = new FieldInfo(oldFieldInfo.name,
+ fieldUpto,
+ false,
+ false,
+ doPayloads,
+ indexOptions,
+ DocValuesType.NONE,
+ -1,
+ new HashMap<>());
+ }
+
+ FieldInfos newFieldInfos = new FieldInfos(newFieldInfoArray);
+
+ // Estimate that flushed segment size will be 25% of
+ // what we use in RAM:
+ long bytes = totalPostings * 8 + totalPayloadBytes;
+
+ SegmentWriteState writeState = new SegmentWriteState(null, dir,
+ segmentInfo, newFieldInfos,
+ null, new IOContext(new FlushInfo(maxDoc, bytes)));
+
+ Fields seedFields = new SeedFields(fields, newFieldInfos, maxAllowed, allowPayloads);
+
+ FieldsConsumer consumer = codec.postingsFormat().fieldsConsumer(writeState);
+ boolean success = false;
+ try {
+ consumer.write(seedFields);
+ success = true;
+ } finally {
+ if (success) {
+ IOUtils.close(consumer);
+ } else {
+ IOUtils.closeWhileHandlingException(consumer);
+ }
+ }
+
+ if (LuceneTestCase.VERBOSE) {
+ System.out.println("TEST: after indexing: files=");
+ for(String file : dir.listAll()) {
+ System.out.println(" " + file + ": " + dir.fileLength(file) + " bytes");
+ }
+ }
+
+ currentFieldInfos = newFieldInfos;
+
+ SegmentReadState readState = new SegmentReadState(dir, segmentInfo, newFieldInfos, IOContext.READ);
+
+ return codec.postingsFormat().fieldsProducer(readState);
+ }
+
+ private void verifyEnum(Random random,
+ ThreadState threadState,
+ String field,
+ BytesRef term,
+ TermsEnum termsEnum,
+
+ // Maximum options (docs/freqs/positions/offsets) to test:
+ IndexOptions maxTestOptions,
+
+ IndexOptions maxIndexOptions,
+
+ EnumSet<Option> options,
+ boolean alwaysTestMax) throws IOException {
+
+ if (LuceneTestCase.VERBOSE) {
+ System.out.println(" verifyEnum: options=" + options + " maxTestOptions=" + maxTestOptions);
+ }
+
+ // Make sure TermsEnum really is positioned on the
+ // expected term:
+ assertEquals(term, termsEnum.term());
+
+ // 50% of the time time pass liveDocs:
+ boolean useLiveDocs = options.contains(Option.LIVE_DOCS) && random.nextBoolean();
+ Bits liveDocs;
+ if (useLiveDocs) {
+ liveDocs = globalLiveDocs;
+ if (LuceneTestCase.VERBOSE) {
+ System.out.println(" use liveDocs: " + globalLiveDocs.length());
+ }
+ } else {
+ liveDocs = null;
+ if (LuceneTestCase.VERBOSE) {
+ System.out.println(" no liveDocs");
+ }
+ }
+
+ FieldInfo fieldInfo = currentFieldInfos.fieldInfo(field);
+
+ // NOTE: can be empty list if we are using liveDocs:
+ SeedPostings expected = getSeedPostings(term.utf8ToString(),
+ fields.get(field).get(term).seed,
+ useLiveDocs,
+ globalLiveDocs,
+ maxIndexOptions,
+ true);
+ assertEquals(expected.docFreq, termsEnum.docFreq());
+
+ boolean allowFreqs = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0 &&
+ maxTestOptions.compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
+ boolean doCheckFreqs = allowFreqs && (alwaysTestMax || random.nextInt(3) <= 2);
+
+ boolean allowPositions = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0 &&
+ maxTestOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
+ boolean doCheckPositions = allowPositions && (alwaysTestMax || random.nextInt(3) <= 2);
+
+ boolean allowOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0 &&
+ maxTestOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
+ boolean doCheckOffsets = allowOffsets && (alwaysTestMax || random.nextInt(3) <= 2);
+
+ boolean doCheckPayloads = options.contains(Option.PAYLOADS) && allowPositions && fieldInfo.hasPayloads() && (alwaysTestMax || random.nextInt(3) <= 2);
+
+ PostingsEnum prevPostingsEnum = null;
+
+ PostingsEnum postingsEnum;
+
+ if (!doCheckPositions) {
+ if (allowPositions && random.nextInt(10) == 7) {
+ // 10% of the time, even though we will not check positions, pull a DocsAndPositions enum
+
+ if (options.contains(Option.REUSE_ENUMS) && random.nextInt(10) < 9) {
+ prevPostingsEnum = threadState.reusePostingsEnum;
+ }
+
+ int flags = PostingsEnum.POSITIONS;
+ if (alwaysTestMax || random.nextBoolean()) {
+ flags |= PostingsEnum.OFFSETS;
+ }
+ if (alwaysTestMax || random.nextBoolean()) {
+ flags |= PostingsEnum.PAYLOADS;
+ }
+
+ if (LuceneTestCase.VERBOSE) {
+ System.out.println(" get DocsEnum (but we won't check positions) flags=" + flags);
+ }
+
+ threadState.reusePostingsEnum = termsEnum.postings(liveDocs, prevPostingsEnum, flags);
+ postingsEnum = threadState.reusePostingsEnum;
+ } else {
+ if (LuceneTestCase.VERBOSE) {
+ System.out.println(" get DocsEnum");
+ }
+ if (options.contains(Option.REUSE_ENUMS) && random.nextInt(10) < 9) {
+ prevPostingsEnum = threadState.reusePostingsEnum;
+ }
+ threadState.reusePostingsEnum = termsEnum.postings(liveDocs, prevPostingsEnum, doCheckFreqs ? PostingsEnum.FREQS : PostingsEnum.NONE);
+ postingsEnum = threadState.reusePostingsEnum;
+ }
+ } else {
+ if (options.contains(Option.REUSE_ENUMS) && random.nextInt(10) < 9) {
+ prevPostingsEnum = threadState.reusePostingsEnum;
+ }
+
+ int flags = PostingsEnum.POSITIONS;
+ if (alwaysTestMax || doCheckOffsets || random.nextInt(3) == 1) {
+ flags |= PostingsEnum.OFFSETS;
+ }
+ if (alwaysTestMax || doCheckPayloads|| random.nextInt(3) == 1) {
+ flags |= PostingsEnum.PAYLOADS;
+ }
+
+ if (LuceneTestCase.VERBOSE) {
+ System.out.println(" get DocsEnum flags=" + flags);
+ }
+
+ threadState.reusePostingsEnum = termsEnum.postings(liveDocs, prevPostingsEnum, flags);
+ postingsEnum = threadState.reusePostingsEnum;
+ }
+
+ assertNotNull("null DocsEnum", postingsEnum);
+ int initialDocID = postingsEnum.docID();
+ assertEquals("inital docID should be -1" + postingsEnum, -1, initialDocID);
+
+ if (LuceneTestCase.VERBOSE) {
+ if (prevPostingsEnum == null) {
+ System.out.println(" got enum=" + postingsEnum);
+ } else if (prevPostingsEnum == postingsEnum) {
+ System.out.println(" got reuse enum=" + postingsEnum);
+ } else {
+ System.out.println(" got enum=" + postingsEnum + " (reuse of " + prevPostingsEnum + " failed)");
+ }
+ }
+
+ // 10% of the time don't consume all docs:
+ int stopAt;
+ if (!alwaysTestMax && options.contains(Option.PARTIAL_DOC_CONSUME) && expected.docFreq > 1 && random.nextInt(10) == 7) {
+ stopAt = random.nextInt(expected.docFreq-1);
+ if (LuceneTestCase.VERBOSE) {
+ System.out.println(" will not consume all docs (" + stopAt + " vs " + expected.docFreq + ")");
+ }
+ } else {
+ stopAt = expected.docFreq;
+ if (LuceneTestCase.VERBOSE) {
+ System.out.println(" consume all docs");
+ }
+ }
+
+ double skipChance = alwaysTestMax ? 0.5 : random.nextDouble();
+ int numSkips = expected.docFreq < 3 ? 1 : TestUtil.nextInt(random, 1, Math.min(20, expected.docFreq / 3));
+ int skipInc = expected.docFreq/numSkips;
+ int skipDocInc = maxDoc/numSkips;
+
+ // Sometimes do 100% skipping:
+ boolean doAllSkipping = options.contains(Option.SKIPPING) && random.nextInt(7) == 1;
+
+ double freqAskChance = alwaysTestMax ? 1.0 : random.nextDouble();
+ double payloadCheckChance = alwaysTestMax ? 1.0 : random.nextDouble();
+ double offsetCheckChance = alwaysTestMax ? 1.0 : random.nextDouble();
+
+ if (LuceneTestCase.VERBOSE) {
+ if (options.contains(Option.SKIPPING)) {
+ System.out.println(" skipChance=" + skipChance + " numSkips=" + numSkips);
+ } else {
+ System.out.println(" no skipping");
+ }
+ if (doCheckFreqs) {
+ System.out.println(" freqAskChance=" + freqAskChance);
+ }
+ if (doCheckPayloads) {
+ System.out.println(" payloadCheckChance=" + payloadCheckChance);
+ }
+ if (doCheckOffsets) {
+ System.out.println(" offsetCheckChance=" + offsetCheckChance);
+ }
+ }
+
+ while (expected.upto <= stopAt) {
+ if (expected.upto == stopAt) {
+ if (stopAt == expected.docFreq) {
+ assertEquals("DocsEnum should have ended but didn't", PostingsEnum.NO_MORE_DOCS, postingsEnum.nextDoc());
+
+ // Common bug is to forget to set this.doc=NO_MORE_DOCS in the enum!:
+ assertEquals("DocsEnum should have ended but didn't", PostingsEnum.NO_MORE_DOCS, postingsEnum.docID());
+ }
+ break;
+ }
+
+ if (options.contains(Option.SKIPPING) && (doAllSkipping || random.nextDouble() <= skipChance)) {
+ int targetDocID = -1;
+ if (expected.upto < stopAt && random.nextBoolean()) {
+ // Pick target we know exists:
+ final int skipCount = TestUtil.nextInt(random, 1, skipInc);
+ for(int skip=0;skip<skipCount;skip++) {
+ if (expected.nextDoc() == PostingsEnum.NO_MORE_DOCS) {
+ break;
+ }
+ }
+ } else {
+ // Pick random target (might not exist):
+ final int skipDocIDs = TestUtil.nextInt(random, 1, skipDocInc);
+ if (skipDocIDs > 0) {
+ targetDocID = expected.docID() + skipDocIDs;
+ expected.advance(targetDocID);
+ }
+ }
+
+ if (expected.upto >= stopAt) {
+ int target = random.nextBoolean() ? maxDoc : PostingsEnum.NO_MORE_DOCS;
+ if (LuceneTestCase.VERBOSE) {
+ System.out.println(" now advance to end (target=" + target + ")");
+ }
+ assertEquals("DocsEnum should have ended but didn't", PostingsEnum.NO_MORE_DOCS, postingsEnum.advance(target));
+ break;
+ } else {
+ if (LuceneTestCase.VERBOSE) {
+ if (targetDocID != -1) {
+ System.out.println(" now advance to random target=" + targetDocID + " (" + expected.upto + " of " + stopAt + ") current=" + postingsEnum.docID());
+ } else {
+ System.out.println(" now advance to known-exists target=" + expected.docID() + " (" + expected.upto + " of " + stopAt + ") current=" + postingsEnum.docID());
+ }
+ }
+ int docID = postingsEnum.advance(targetDocID != -1 ? targetDocID : expected.docID());
+ assertEquals("docID is wrong", expected.docID(), docID);
+ }
+ } else {
+ expected.nextDoc();
+ if (LuceneTestCase.VERBOSE) {
+ System.out.println(" now nextDoc to " + expected.docID() + " (" + expected.upto + " of " + stopAt + ")");
+ }
+ int docID = postingsEnum.nextDoc();
+ assertEquals("docID is wrong", expected.docID(), docID);
+ if (docID == PostingsEnum.NO_MORE_DOCS) {
+ break;
+ }
+ }
+
+ if (doCheckFreqs && random.nextDouble() <= freqAskChance) {
+ if (LuceneTestCase.VERBOSE) {
+ System.out.println(" now freq()=" + expected.freq());
+ }
+ int freq = postingsEnum.freq();
+ assertEquals("freq is wrong", expected.freq(), freq);
+ }
+
+ if (doCheckPositions) {
+ int freq = postingsEnum.freq();
+ int numPosToConsume;
+ if (!alwaysTestMax && options.contains(Option.PARTIAL_POS_CONSUME) && random.nextInt(5) == 1) {
+ numPosToConsume = random.nextInt(freq);
+ } else {
+ numPosToConsume = freq;
+ }
+
+ for(int i=0;i<numPosToConsume;i++) {
+ int pos = expected.nextPosition();
+ if (LuceneTestCase.VERBOSE) {
+ System.out.println(" now nextPosition to " + pos);
+ }
+ assertEquals("position is wrong", pos, postingsEnum.nextPosition());
+
+ if (doCheckPayloads) {
+ BytesRef expectedPayload = expected.getPayload();
+ if (random.nextDouble() <= payloadCheckChance) {
+ if (LuceneTestCase.VERBOSE) {
+ System.out.println(" now check expectedPayload length=" + (expectedPayload == null ? 0 : expectedPayload.length));
+ }
+ if (expectedPayload == null || expectedPayload.length == 0) {
+ assertNull("should not have payload", postingsEnum.getPayload());
+ } else {
+ BytesRef payload = postingsEnum.getPayload();
+ assertNotNull("should have payload but doesn't", payload);
+
+ assertEquals("payload length is wrong", expectedPayload.length, payload.length);
+ for(int byteUpto=0;byteUpto<expectedPayload.length;byteUpto++) {
+ assertEquals("payload bytes are wrong",
+ expectedPayload.bytes[expectedPayload.offset + byteUpto],
+ payload.bytes[payload.offset+byteUpto]);
+ }
+
+ // make a deep copy
+ payload = BytesRef.deepCopyOf(payload);
+ assertEquals("2nd call to getPayload returns something different!", payload, postingsEnum.getPayload());
+ }
+ } else {
+ if (LuceneTestCase.VERBOSE) {
+ System.out.println(" skip check payload length=" + (expectedPayload == null ? 0 : expectedPayload.length));
+ }
+ }
+ }
+
+ if (doCheckOffsets) {
+ if (random.nextDouble() <= offsetCheckChance) {
+ if (LuceneTestCase.VERBOSE) {
+ System.out.println(" now check offsets: startOff=" + expected.startOffset() + " endOffset=" + expected.endOffset());
+ }
+ assertEquals("startOffset is wrong", expected.startOffset(), postingsEnum.startOffset());
+ assertEquals("endOffset is wrong", expected.endOffset(), postingsEnum.endOffset());
+ } else {
+ if (LuceneTestCase.VERBOSE) {
+ System.out.println(" skip check offsets");
+ }
+ }
+ } else if (fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) < 0) {
+ if (LuceneTestCase.VERBOSE) {
+ System.out.println(" now check offsets are -1");
+ }
+ assertEquals("startOffset isn't -1", -1, postingsEnum.startOffset());
+ assertEquals("endOffset isn't -1", -1, postingsEnum.endOffset());
+ }
+ }
+ }
+ }
+ }
+
+ private static class TestThread extends Thread {
+ private Fields fieldsSource;
+ private EnumSet<Option> options;
+ private IndexOptions maxIndexOptions;
+ private IndexOptions maxTestOptions;
+ private boolean alwaysTestMax;
+ private RandomPostingsTester postingsTester;
+ private Random random;
+
+ public TestThread(Random random, RandomPostingsTester postingsTester, Fields fieldsSource, EnumSet<Option> options, IndexOptions maxTestOptions,
+ IndexOptions maxIndexOptions, boolean alwaysTestMax) {
+ this.random = random;
+ this.fieldsSource = fieldsSource;
+ this.options = options;
+ this.maxTestOptions = maxTestOptions;
+ this.maxIndexOptions = maxIndexOptions;
+ this.alwaysTestMax = alwaysTestMax;
+ this.postingsTester = postingsTester;
+ }
+
+ @Override
+ public void run() {
+ try {
+ try {
+ postingsTester.testTermsOneThread(random, fieldsSource, options, maxTestOptions, maxIndexOptions, alwaysTestMax);
+ } catch (Throwable t) {
+ throw new RuntimeException(t);
+ }
+ } finally {
+ fieldsSource = null;
+ postingsTester = null;
+ }
+ }
+ }
+
+ public void testTerms(final Fields fieldsSource, final EnumSet<Option> options,
+ final IndexOptions maxTestOptions,
+ final IndexOptions maxIndexOptions,
+ final boolean alwaysTestMax) throws Exception {
+
+ if (options.contains(Option.THREADS)) {
+ int numThreads = TestUtil.nextInt(random, 2, 5);
+ Thread[] threads = new Thread[numThreads];
+ for(int threadUpto=0;threadUpto<numThreads;threadUpto++) {
+ threads[threadUpto] = new TestThread(new Random(random.nextLong()), this, fieldsSource, options, maxTestOptions, maxIndexOptions, alwaysTestMax);
+ threads[threadUpto].start();
+ }
+ for(int threadUpto=0;threadUpto<numThreads;threadUpto++) {
+ threads[threadUpto].join();
+ }
+ } else {
+ testTermsOneThread(random, fieldsSource, options, maxTestOptions, maxIndexOptions, alwaysTestMax);
+ }
+ }
+
+ private void testTermsOneThread(Random random, Fields fieldsSource, EnumSet<Option> options,
+ IndexOptions maxTestOptions,
+ IndexOptions maxIndexOptions, boolean alwaysTestMax) throws IOException {
+
+ ThreadState threadState = new ThreadState();
+
+ // Test random terms/fields:
+ List<TermState> termStates = new ArrayList<>();
+ List<FieldAndTerm> termStateTerms = new ArrayList<>();
+
+ boolean supportsOrds = true;
+
+ Collections.shuffle(allTerms, random);
+ int upto = 0;
+ while (upto < allTerms.size()) {
+
+ boolean useTermState = termStates.size() != 0 && random.nextInt(5) == 1;
+ boolean useTermOrd = supportsOrds && useTermState == false && random.nextInt(5) == 1;
+
+ FieldAndTerm fieldAndTerm;
+ TermsEnum termsEnum;
+
+ TermState termState = null;
+
+ if (!useTermState) {
+ // Seek by random field+term:
+ fieldAndTerm = allTerms.get(upto++);
+ if (LuceneTestCase.VERBOSE) {
+ if (useTermOrd) {
+ System.out.println("\nTEST: seek to term=" + fieldAndTerm.field + ":" + fieldAndTerm.term.utf8ToString() + " using ord=" + fieldAndTerm.ord);
+ } else {
+ System.out.println("\nTEST: seek to term=" + fieldAndTerm.field + ":" + fieldAndTerm.term.utf8ToString() );
+ }
+ }
+ } else {
+ // Seek by previous saved TermState
+ int idx = random.nextInt(termStates.size());
+ fieldAndTerm = termStateTerms.get(idx);
+ if (LuceneTestCase.VERBOSE) {
+ System.out.println("\nTEST: seek using TermState to term=" + fieldAndTerm.field + ":" + fieldAndTerm.term.utf8ToString());
+ }
+ termState = termStates.get(idx);
+ }
+
+ Terms terms = fieldsSource.terms(fieldAndTerm.field);
+ assertNotNull(terms);
+ termsEnum = terms.iterator(null);
+
+ if (!useTermState) {
+ if (useTermOrd) {
+ // Try seek by ord sometimes:
+ try {
+ termsEnum.seekExact(fieldAndTerm.ord);
+ } catch (UnsupportedOperationException uoe) {
+ supportsOrds = false;
+ assertTrue(termsEnum.seekExact(fieldAndTerm.term));
+ }
+ } else {
+ assertTrue(termsEnum.seekExact(fieldAndTerm.term));
+ }
+ } else {
+ termsEnum.seekExact(fieldAndTerm.term, termState);
+ }
+
+ long termOrd;
+ if (supportsOrds) {
+ try {
+ termOrd = termsEnum.ord();
+ } catch (UnsupportedOperationException uoe) {
+ supportsOrds = false;
+ termOrd = -1;
+ }
+ } else {
+ termOrd = -1;
+ }
+
+ if (termOrd != -1) {
+ // PostingsFormat supports ords
+ assertEquals(fieldAndTerm.ord, termsEnum.ord());
+ }
+
+ boolean savedTermState = false;
+
+ if (options.contains(Option.TERM_STATE) && !useTermState && random.nextInt(5) == 1) {
+ // Save away this TermState:
+ termStates.add(termsEnum.termState());
+ termStateTerms.add(fieldAndTerm);
+ savedTermState = true;
+ }
+
+ verifyEnum(random, threadState,
+ fieldAndTerm.field,
+ fieldAndTerm.term,
+ termsEnum,
+ maxTestOptions,
+ maxIndexOptions,
+ options,
+ alwaysTestMax);
+
+ // Sometimes save term state after pulling the enum:
+ if (options.contains(Option.TERM_STATE) && !useTermState && !savedTermState && random.nextInt(5) == 1) {
+ // Save away this TermState:
+ termStates.add(termsEnum.termState());
+ termStateTerms.add(fieldAndTerm);
+ useTermState = true;
+ }
+
+ // 10% of the time make sure you can pull another enum
+ // from the same term:
+ if (alwaysTestMax || random.nextInt(10) == 7) {
+ // Try same term again
+ if (LuceneTestCase.VERBOSE) {
+ System.out.println("TEST: try enum again on same term");
+ }
+
+ verifyEnum(random, threadState,
+ fieldAndTerm.field,
+ fieldAndTerm.term,
+ termsEnum,
+ maxTestOptions,
+ maxIndexOptions,
+ options,
+ alwaysTestMax);
+ }
+ }
+
+ // Test Terms.intersect:
+ for(String field : fields.keySet()) {
+ while (true) {
+ Automaton a = AutomatonTestUtil.randomAutomaton(random);
+ CompiledAutomaton ca = new CompiledAutomaton(a, null, true, Integer.MAX_VALUE, false);
+ if (ca.type != CompiledAutomaton.AUTOMATON_TYPE.NORMAL) {
+ // Keep retrying until we get an A that will really "use" the PF's intersect code:
+ continue;
+ }
+ // System.out.println("A:\n" + a.toDot());
+
+ BytesRef startTerm = null;
+ if (random.nextBoolean()) {
+ RandomAcceptedStrings ras = new RandomAcceptedStrings(a);
+ for (int iter=0;iter<100;iter++) {
+ int[] codePoints = ras.getRandomAcceptedString(random);
+ if (codePoints.length == 0) {
+ continue;
+ }
+ startTerm = new BytesRef(UnicodeUtil.newString(codePoints, 0, codePoints.length));
+ break;
+ }
+ // Don't allow empty string startTerm:
+ if (startTerm == null) {
+ continue;
+ }
+ }
+ TermsEnum intersected = fieldsSource.terms(field).intersect(ca, startTerm);
+
+ Set<BytesRef> intersectedTerms = new HashSet<BytesRef>();
+ BytesRef term;
+ while ((term = intersected.next()) != null) {
+ if (startTerm != null) {
+ // NOTE: not <=
+ assertTrue(startTerm.compareTo(term) < 0);
+ }
+ intersectedTerms.add(BytesRef.deepCopyOf(term));
+ verifyEnum(random, threadState,
+ field,
+ term,
+ intersected,
+ maxTestOptions,
+ maxIndexOptions,
+ options,
+ alwaysTestMax);
+ }
+
+ if (ca.runAutomaton == null) {
+ assertTrue(intersectedTerms.isEmpty());
+ } else {
+ for(BytesRef term2 : fields.get(field).keySet()) {
+ boolean expected;
+ if (startTerm != null && startTerm.compareTo(term2) >= 0) {
+ expected = false;
+ } else {
+ expected = ca.runAutomaton.run(term2.bytes, term2.offset, term2.length);
+ }
+ assertEquals("term=" + term2, expected, intersectedTerms.contains(term2));
+ }
+ }
+
+ break;
+ }
+ }
+ }
+
+ public void testFields(Fields fields) throws Exception {
+ Iterator<String> iterator = fields.iterator();
+ while (iterator.hasNext()) {
+ iterator.next();
+ try {
+ iterator.remove();
+ throw new AssertionError("Fields.iterator() allows for removal");
+ } catch (UnsupportedOperationException expected) {
+ // expected;
+ }
+ }
+ assertFalse(iterator.hasNext());
+ try {
+ iterator.next();
+ throw new AssertionError("Fields.iterator() doesn't throw NoSuchElementException when past the end");
+ } catch (NoSuchElementException expected) {
+ // expected
+ }
+ }
+
+ /** Indexes all fields/terms at the specified
+ * IndexOptions, and fully tests at that IndexOptions. */
+ public void testFull(Codec codec, Path path, IndexOptions options, boolean withPayloads) throws Exception {
+ Directory dir = LuceneTestCase.newFSDirectory(path);
+
+ // TODO test thread safety of buildIndex too
+ FieldsProducer fieldsProducer = buildIndex(codec, dir, options, withPayloads, true);
+
+ testFields(fieldsProducer);
+
+ IndexOptions[] allOptions = IndexOptions.values();
+ int maxIndexOption = Arrays.asList(allOptions).indexOf(options);
+
+ for(int i=0;i<=maxIndexOption;i++) {
+ testTerms(fieldsProducer, EnumSet.allOf(Option.class), allOptions[i], options, true);
+ if (withPayloads) {
+ // If we indexed w/ payloads, also test enums w/o accessing payloads:
+ testTerms(fieldsProducer, EnumSet.complementOf(EnumSet.of(Option.PAYLOADS)), allOptions[i], options, true);
+ }
+ }
+
+ fieldsProducer.close();
+ dir.close();
+ IOUtils.rm(path);
+ }
+}
Modified: lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/util/TestUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/util/TestUtil.java?rev=1670918&r1=1670917&r2=1670918&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/util/TestUtil.java (original)
+++ lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/util/TestUtil.java Thu Apr 2 15:05:48 2015
@@ -831,6 +831,14 @@ public final class TestUtil {
return new String(buffer, 0, i);
}
+ /** Returns a random binary term. */
+ public static BytesRef randomBinaryTerm(Random r) {
+ int length = r.nextInt(15);
+ BytesRef b = new BytesRef(length);
+ r.nextBytes(b.bytes);
+ b.length = length;
+ return b;
+ }
/** Return a Codec that can read any of the
* default codecs and formats, but always writes in the specified
@@ -858,7 +866,7 @@ public final class TestUtil {
// (and maybe their params, too) to infostream on flush and merge.
// otherwise in a real debugging situation we won't know whats going on!
if (LuceneTestCase.VERBOSE) {
- System.out.println("forcing docvalues format to:" + format);
+ System.out.println("TestUtil: forcing docvalues format to:" + format);
}
return new AssertingCodec() {
@Override
@@ -1284,6 +1292,24 @@ public final class TestUtil {
return sb.toString();
}
}
+
+ /** For debugging: tries to include br.utf8ToString(), but if that
+ * fails (because it's not valid utf8, which is fine!), just
+ * use ordinary toString. */
+ public static String bytesRefToString(BytesRef br) {
+ if (br == null) {
+ return "(null)";
+ } else {
+ try {
+ return br.utf8ToString() + " " + br.toString();
+ } catch (IllegalArgumentException t) {
+ // If BytesRef isn't actually UTF8, or it's eg a
+ // prefix of UTF8 that ends mid-unicode-char, we
+ // fallback to hex:
+ return br.toString();
+ }
+ }
+ }
/** Returns a copy of directory, entirely in RAM */
public static RAMDirectory ramCopyOf(Directory dir) throws IOException {