You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2015/04/02 17:37:41 UTC
svn commit: r1670929 [3/5] - in /lucene/dev/branches/lucene6271: ./ lucene/
lucene/codecs/ lucene/codecs/src/java/org/apache/lucene/codecs/autoprefix/
lucene/codecs/src/resources/META-INF/services/
lucene/codecs/src/test/org/apache/lucene/codecs/autopr...
Modified: lucene/dev/branches/lucene6271/lucene/core/src/test/org/apache/lucene/search/TestPrefixQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6271/lucene/core/src/test/org/apache/lucene/search/TestPrefixQuery.java?rev=1670929&r1=1670928&r2=1670929&view=diff
==============================================================================
--- lucene/dev/branches/lucene6271/lucene/core/src/test/org/apache/lucene/search/TestPrefixQuery.java (original)
+++ lucene/dev/branches/lucene6271/lucene/core/src/test/org/apache/lucene/search/TestPrefixQuery.java Thu Apr 2 15:37:39 2015
@@ -17,25 +17,32 @@ package org.apache.lucene.search;
* limitations under the License.
*/
+import java.io.IOException;
import java.util.ArrayList;
+import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
+import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
+import org.apache.lucene.codecs.autoprefix.AutoPrefixPostingsFormat;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.AttributeImpl;
+import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.StringHelper;
@@ -70,7 +77,6 @@ public class TestPrefixQuery extends Luc
assertEquals("One in /Computers/Mac", 1, hits.length);
query = new PrefixQuery(new Term("category", ""));
- Terms terms = MultiFields.getTerms(searcher.getIndexReader(), "category");
hits = searcher.search(query, 1000).scoreDocs;
assertEquals("everything", 3, hits.length);
writer.close();
@@ -78,6 +84,92 @@ public class TestPrefixQuery extends Luc
directory.close();
}
+ /** Make sure auto prefix terms are used with PrefixQuery. */
+ public void testAutoPrefixTermsKickIn() throws Exception {
+
+ List<String> prefixes = new ArrayList<>();
+ for(int i=1;i<5;i++) {
+ char[] chars = new char[i];
+ Arrays.fill(chars, 'a');
+ prefixes.add(new String(chars));
+ }
+
+ Set<String> randomTerms = new HashSet<>();
+ int numTerms = atLeast(10000);
+ while (randomTerms.size() < numTerms) {
+ for(String prefix : prefixes) {
+ randomTerms.add(prefix + TestUtil.randomRealisticUnicodeString(random()));
+ }
+ }
+
+ int actualCount = 0;
+ for(String term : randomTerms) {
+ if (term.startsWith("aa")) {
+ actualCount++;
+ }
+ }
+
+ //System.out.println("actual count " + actualCount);
+
+ Directory dir = newDirectory();
+ IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
+ int minTermsInBlock = TestUtil.nextInt(random(), 2, 100);
+ int maxTermsInBlock = Math.max(2, (minTermsInBlock-1)*2 + random().nextInt(100));
+
+ // As long as this is never > actualCount, aa should always see at least one auto-prefix term:
+ int minTermsAutoPrefix = TestUtil.nextInt(random(), 2, actualCount);
+ int maxTermsAutoPrefix = random().nextBoolean() ? Math.max(2, (minTermsAutoPrefix-1)*2 + random().nextInt(100)) : Integer.MAX_VALUE;
+
+ iwc.setCodec(TestUtil.alwaysPostingsFormat(new AutoPrefixPostingsFormat(minTermsInBlock, maxTermsInBlock,
+ minTermsAutoPrefix, maxTermsAutoPrefix)));
+ RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
+
+ for (String term : randomTerms) {
+ Document doc = new Document();
+ doc.add(new StringField("field", term, Field.Store.NO));
+ w.addDocument(doc);
+ }
+
+ w.forceMerge(1);
+ IndexReader r = w.getReader();
+ final Terms terms = MultiFields.getTerms(r, "field");
+ IndexSearcher s = new IndexSearcher(r);
+ final int finalActualCount = actualCount;
+ PrefixQuery q = new PrefixQuery(new Term("field", "aa")) {
+ public PrefixQuery checkTerms() throws IOException {
+ TermsEnum termsEnum = getTermsEnum(terms, new AttributeSource());
+ int count = 0;
+ while (termsEnum.next() != null) {
+ //System.out.println("got term: " + termsEnum.term().utf8ToString());
+ count++;
+ }
+
+ // Auto-prefix term(s) should have kicked in, so we should have visited fewer than the total number of aa* terms:
+ assertTrue(count < finalActualCount);
+
+ return this;
+ }
+ }.checkTerms();
+
+ int x = BooleanQuery.getMaxClauseCount();
+ try {
+ BooleanQuery.setMaxClauseCount(randomTerms.size());
+ if (random().nextBoolean()) {
+ q.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_REWRITE);
+ } else if (random().nextBoolean()) {
+ q.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_BOOLEAN_REWRITE);
+ }
+
+ assertEquals(actualCount, s.search(q, 1).totalHits);
+ } finally {
+ BooleanQuery.setMaxClauseCount(x);
+ }
+
+ r.close();
+ w.close();
+ dir.close();
+ }
+
public void testMatchAll() throws Exception {
Directory directory = newDirectory();
@@ -92,8 +184,6 @@ public class TestPrefixQuery extends Luc
IndexSearcher searcher = newSearcher(reader);
assertEquals(1, searcher.search(query, 1000).totalHits);
-
- Terms terms = MultiFields.getTerms(searcher.getIndexReader(), "field");
writer.close();
reader.close();
directory.close();
Modified: lucene/dev/branches/lucene6271/lucene/core/src/test/org/apache/lucene/search/TestTermRangeQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6271/lucene/core/src/test/org/apache/lucene/search/TestTermRangeQuery.java?rev=1670929&r1=1670928&r2=1670929&view=diff
==============================================================================
--- lucene/dev/branches/lucene6271/lucene/core/src/test/org/apache/lucene/search/TestTermRangeQuery.java (original)
+++ lucene/dev/branches/lucene6271/lucene/core/src/test/org/apache/lucene/search/TestTermRangeQuery.java Thu Apr 2 15:37:39 2015
@@ -18,20 +18,32 @@ package org.apache.lucene.search;
*/
import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.List;
import java.util.Set;
import org.apache.lucene.analysis.*;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.codecs.autoprefix.AutoPrefixPostingsFormat;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
+import org.apache.lucene.document.StringField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig.OpenMode;
+import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.MultiFields;
+import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Terms;
-import org.apache.lucene.index.IndexWriterConfig.OpenMode;
+import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.AttributeSource;
+import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.TestUtil;
public class TestTermRangeQuery extends LuceneTestCase {
@@ -104,19 +116,18 @@ public class TestTermRangeQuery extends
initializeIndex(new String[]{"A", "B", "C", "D"});
IndexReader reader = DirectoryReader.open(dir);
IndexSearcher searcher = newSearcher(reader);
+
TermRangeQuery query = new TermRangeQuery("content", null, null, true, true);
- Terms terms = MultiFields.getTerms(searcher.getIndexReader(), "content");
- assertFalse(query.getTermsEnum(terms) instanceof TermRangeTermsEnum);
assertEquals(4, searcher.search(query, 1000).scoreDocs.length);
- query = new TermRangeQuery("content", null, null, false, false);
- assertFalse(query.getTermsEnum(terms) instanceof TermRangeTermsEnum);
+
+ query = TermRangeQuery.newStringRange("content", "", null, true, true);
assertEquals(4, searcher.search(query, 1000).scoreDocs.length);
+
query = TermRangeQuery.newStringRange("content", "", null, true, false);
- assertFalse(query.getTermsEnum(terms) instanceof TermRangeTermsEnum);
assertEquals(4, searcher.search(query, 1000).scoreDocs.length);
- // and now anothe one
- query = TermRangeQuery.newStringRange("content", "B", null, true, false);
- assertTrue(query.getTermsEnum(terms) instanceof TermRangeTermsEnum);
+
+ // and now another one
+ query = TermRangeQuery.newStringRange("content", "B", null, true, true);
assertEquals(3, searcher.search(query, 1000).scoreDocs.length);
reader.close();
}
@@ -336,4 +347,127 @@ public class TestTermRangeQuery extends
//assertEquals("C added => A,B,<empty string>,C in range", 3, hits.length());
reader.close();
}
+
+ /** Make sure auto prefix terms are used with TermRangeQuery */
+ public void testAutoPrefixTermsKickIn() throws Exception {
+
+ List<String> prefixes = new ArrayList<>();
+ for(int i=1;i<5;i++) {
+ char[] chars = new char[i];
+ Arrays.fill(chars, 'a');
+ prefixes.add(new String(chars));
+ }
+
+ Set<String> randomTerms = new HashSet<>();
+ int numTerms = atLeast(10000);
+ while (randomTerms.size() < numTerms) {
+ for(String prefix : prefixes) {
+ randomTerms.add(prefix + TestUtil.randomSimpleString(random()));
+ }
+ }
+
+ // We make term range aa<start> - aa<end>
+ char start;
+ char end;
+
+ int actualCount;
+ boolean startInclusive = random().nextBoolean();
+ boolean endInclusive = random().nextBoolean();
+ String startTerm;
+ String endTerm;
+
+ while (true) {
+ start = (char) TestUtil.nextInt(random(), 'a', 'm');
+ end = (char) TestUtil.nextInt(random(), start+1, 'z');
+
+ actualCount = 0;
+
+ startTerm = "aa" + start;
+ endTerm = "aa" + end;
+
+ for(String term : randomTerms) {
+ int cmpStart = startTerm.compareTo(term);
+ int cmpEnd = endTerm.compareTo(term);
+ if ((cmpStart < 0 || (startInclusive && cmpStart == 0)) &&
+ (cmpEnd > 0 || (endInclusive && cmpEnd == 0))) {
+ actualCount++;
+ }
+ }
+
+ if (actualCount > 2000) {
+ break;
+ }
+ }
+
+ //System.out.println("start " + startTerm + " inclusive? " + startInclusive);
+ //System.out.println("end " + endTerm + " inclusive? " + endInclusive);
+ //System.out.println("actual count " + actualCount);
+
+ Directory dir = newDirectory();
+ IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
+ int minTermsInBlock = TestUtil.nextInt(random(), 2, 100);
+ int maxTermsInBlock = Math.max(2, (minTermsInBlock-1)*2 + random().nextInt(100));
+
+ int minTermsAutoPrefix = TestUtil.nextInt(random(), 2, 100);
+ int maxTermsAutoPrefix = random().nextBoolean() ? Math.max(2, (minTermsAutoPrefix-1)*2 + random().nextInt(100)) : Integer.MAX_VALUE;
+
+ //System.out.println("minTermsAutoPrefix " + minTermsAutoPrefix);
+ //System.out.println("maxTermsAutoPrefix " + maxTermsAutoPrefix);
+
+ iwc.setCodec(TestUtil.alwaysPostingsFormat(new AutoPrefixPostingsFormat(minTermsInBlock, maxTermsInBlock,
+ minTermsAutoPrefix, maxTermsAutoPrefix)));
+ RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
+
+ //System.out.println("TEST: index terms");
+ for (String term : randomTerms) {
+ Document doc = new Document();
+ doc.add(new StringField("field", term, Field.Store.NO));
+ w.addDocument(doc);
+ //System.out.println(" " + term);
+ }
+
+ //System.out.println("TEST: now force merge");
+ w.forceMerge(1);
+ IndexReader r = w.getReader();
+ final Terms terms = MultiFields.getTerms(r, "field");
+ IndexSearcher s = new IndexSearcher(r);
+ final int finalActualCount = actualCount;
+ //System.out.println("start=" + startTerm + " end=" + endTerm + " startIncl=" + startInclusive + " endIncl=" + endInclusive);
+ TermRangeQuery q = new TermRangeQuery("field", new BytesRef(startTerm), new BytesRef(endTerm), startInclusive, endInclusive) {
+ public TermRangeQuery checkTerms() throws IOException {
+ TermsEnum termsEnum = getTermsEnum(terms, new AttributeSource());
+ int count = 0;
+ while (termsEnum.next() != null) {
+ //System.out.println("got term: " + termsEnum.term().utf8ToString());
+ count++;
+ }
+ //System.out.println("count " + count + " vs finalActualCount=" + finalActualCount);
+
+ // Auto-prefix term(s) should have kicked in, so we should have visited fewer than the total number of aa* terms:
+ assertTrue(count < finalActualCount);
+
+ return this;
+ }
+ }.checkTerms();
+
+ if (random().nextBoolean()) {
+ q.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_REWRITE);
+ } else if (random().nextBoolean()) {
+ q.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_BOOLEAN_REWRITE);
+ }
+
+ assertEquals(actualCount, s.search(q, 1).totalHits);
+
+ // Test when min == max:
+ List<String> randomTermsList = new ArrayList<>(randomTerms);
+ for(int iter=0;iter<100*RANDOM_MULTIPLIER;iter++) {
+ String term = randomTermsList.get(random().nextInt(randomTermsList.size()));
+ q = new TermRangeQuery("field", new BytesRef(term), new BytesRef(term), true, true);
+ assertEquals(1, s.search(q, 1).totalHits);
+ }
+
+ r.close();
+ w.close();
+ dir.close();
+ }
}
Modified: lucene/dev/branches/lucene6271/lucene/core/src/test/org/apache/lucene/search/TestWildcard.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6271/lucene/core/src/test/org/apache/lucene/search/TestWildcard.java?rev=1670929&r1=1670928&r2=1670929&view=diff
==============================================================================
--- lucene/dev/branches/lucene6271/lucene/core/src/test/org/apache/lucene/search/TestWildcard.java (original)
+++ lucene/dev/branches/lucene6271/lucene/core/src/test/org/apache/lucene/search/TestWildcard.java Thu Apr 2 15:37:39 2015
@@ -34,14 +34,8 @@ import java.io.IOException;
/**
* TestWildcard tests the '*' and '?' wildcard characters.
*/
-public class TestWildcard
- extends LuceneTestCase {
+public class TestWildcard extends LuceneTestCase {
- @Override
- public void setUp() throws Exception {
- super.setUp();
- }
-
public void testEquals() {
WildcardQuery wq1 = new WildcardQuery(new Term("field", "b*a"));
WildcardQuery wq2 = new WildcardQuery(new Term("field", "b*a"));
@@ -126,10 +120,10 @@ public class TestWildcard
MultiTermQuery wq = new WildcardQuery(new Term("field", "prefix*"));
assertMatches(searcher, wq, 2);
- Terms terms = MultiFields.getTerms(searcher.getIndexReader(), "field");
wq = new WildcardQuery(new Term("field", "*"));
assertMatches(searcher, wq, 2);
+ Terms terms = MultiFields.getTerms(searcher.getIndexReader(), "field");
assertFalse(wq.getTermsEnum(terms).getClass().getSimpleName().contains("AutomatonTermsEnum"));
reader.close();
indexStore.close();
Modified: lucene/dev/branches/lucene6271/lucene/core/src/test/org/apache/lucene/util/automaton/TestAutomaton.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6271/lucene/core/src/test/org/apache/lucene/util/automaton/TestAutomaton.java?rev=1670929&r1=1670928&r2=1670929&view=diff
==============================================================================
--- lucene/dev/branches/lucene6271/lucene/core/src/test/org/apache/lucene/util/automaton/TestAutomaton.java (original)
+++ lucene/dev/branches/lucene6271/lucene/core/src/test/org/apache/lucene/util/automaton/TestAutomaton.java Thu Apr 2 15:37:39 2015
@@ -232,7 +232,7 @@ public class TestAutomaton extends Lucen
}
public void testInterval() throws Exception {
- Automaton a = Operations.determinize(Automata.makeInterval(17, 100, 3),
+ Automaton a = Operations.determinize(Automata.makeDecimalInterval(17, 100, 3),
DEFAULT_MAX_DETERMINIZED_STATES);
assertFalse(Operations.run(a, ""));
assertTrue(Operations.run(a, "017"));
@@ -431,7 +431,7 @@ public class TestAutomaton extends Lucen
}
public void testOneInterval() throws Exception {
- Automaton a = Automata.makeInterval(999, 1032, 0);
+ Automaton a = Automata.makeDecimalInterval(999, 1032, 0);
a = Operations.determinize(a, DEFAULT_MAX_DETERMINIZED_STATES);
assertTrue(Operations.run(a, "0999"));
assertTrue(Operations.run(a, "00999"));
@@ -439,7 +439,7 @@ public class TestAutomaton extends Lucen
}
public void testAnotherInterval() throws Exception {
- Automaton a = Automata.makeInterval(1, 2, 0);
+ Automaton a = Automata.makeDecimalInterval(1, 2, 0);
a = Operations.determinize(a, DEFAULT_MAX_DETERMINIZED_STATES);
assertTrue(Operations.run(a, "01"));
}
@@ -462,7 +462,7 @@ public class TestAutomaton extends Lucen
}
String prefix = b.toString();
- Automaton a = Operations.determinize(Automata.makeInterval(min, max, digits),
+ Automaton a = Operations.determinize(Automata.makeDecimalInterval(min, max, digits),
DEFAULT_MAX_DETERMINIZED_STATES);
if (random().nextBoolean()) {
a = MinimizationOperations.minimize(a, DEFAULT_MAX_DETERMINIZED_STATES);
@@ -942,7 +942,7 @@ public class TestAutomaton extends Lucen
if (VERBOSE) {
System.out.println(" op=union interval min=" + min + " max=" + max + " digits=" + digits);
}
- a = Operations.union(a, Automata.makeInterval(min, max, digits));
+ a = Operations.union(a, Automata.makeDecimalInterval(min, max, digits));
StringBuilder b = new StringBuilder();
for(int i=0;i<digits;i++) {
b.append('0');
@@ -1105,6 +1105,138 @@ public class TestAutomaton extends Lucen
}
}
+ public void testMakeBinaryIntervalRandom() throws Exception {
+ int iters = atLeast(100);
+ for(int iter=0;iter<iters;iter++) {
+ BytesRef minTerm = TestUtil.randomBinaryTerm(random());
+ boolean minInclusive = random().nextBoolean();
+ BytesRef maxTerm = TestUtil.randomBinaryTerm(random());
+ boolean maxInclusive = random().nextBoolean();
+
+ if (VERBOSE) {
+ System.out.println("TEST: iter=" + iter + " minTerm=" + minTerm + " minInclusive=" + minInclusive + " maxTerm=" + maxTerm + " maxInclusive=" + maxInclusive);
+ }
+
+ Automaton a = Automata.makeBinaryInterval(minTerm, minInclusive, maxTerm, maxInclusive);
+
+ Automaton minA = MinimizationOperations.minimize(a, Integer.MAX_VALUE);
+ if (minA.getNumStates() != a.getNumStates()) {
+ assertTrue(minA.getNumStates() < a.getNumStates());
+ System.out.println("Original was not minimal:");
+ System.out.println("Original:\n" + a.toDot());
+ System.out.println("Minimized:\n" + minA.toDot());
+ fail("auotmaton was not minimal");
+ }
+
+ if (VERBOSE) {
+ System.out.println(a.toDot());
+ }
+
+ for(int iter2=0;iter2<500;iter2++) {
+ BytesRef term = TestUtil.randomBinaryTerm(random());
+ int minCmp = minTerm.compareTo(term);
+ int maxCmp = maxTerm.compareTo(term);
+
+ boolean expected;
+ if (minCmp > 0 || maxCmp < 0) {
+ expected = false;
+ } else if (minCmp == 0 && maxCmp == 0) {
+ expected = minInclusive && maxInclusive;
+ } else if (minCmp == 0) {
+ expected = minInclusive;
+ } else if (maxCmp == 0) {
+ expected = maxInclusive;
+ } else {
+ expected = true;
+ }
+
+ if (VERBOSE) {
+ System.out.println(" check term=" + term + " expected=" + expected);
+ }
+ IntsRefBuilder intsBuilder = new IntsRefBuilder();
+ Util.toIntsRef(term, intsBuilder);
+ assertEquals(expected, Operations.run(a, intsBuilder.toIntsRef()));
+ }
+ }
+ }
+
+ private static IntsRef intsRef(String s) {
+ IntsRefBuilder intsBuilder = new IntsRefBuilder();
+ Util.toIntsRef(new BytesRef(s), intsBuilder);
+ return intsBuilder.toIntsRef();
+ }
+
+ public void testMakeBinaryIntervalBasic() throws Exception {
+ Automaton a = Automata.makeBinaryInterval(new BytesRef("bar"), true, new BytesRef("foo"), true);
+ assertTrue(Operations.run(a, intsRef("bar")));
+ assertTrue(Operations.run(a, intsRef("foo")));
+ assertTrue(Operations.run(a, intsRef("beep")));
+ assertFalse(Operations.run(a, intsRef("baq")));
+ assertTrue(Operations.run(a, intsRef("bara")));
+ }
+
+ public void testMakeBinaryIntervalEqual() throws Exception {
+ Automaton a = Automata.makeBinaryInterval(new BytesRef("bar"), true, new BytesRef("bar"), true);
+ assertTrue(Operations.run(a, intsRef("bar")));
+ assertTrue(Operations.isFinite(a));
+ assertEquals(1, Operations.getFiniteStrings(a, 10).size());
+ }
+
+ public void testMakeBinaryIntervalCommonPrefix() throws Exception {
+ Automaton a = Automata.makeBinaryInterval(new BytesRef("bar"), true, new BytesRef("barfoo"), true);
+ assertFalse(Operations.run(a, intsRef("bam")));
+ assertTrue(Operations.run(a, intsRef("bar")));
+ assertTrue(Operations.run(a, intsRef("bara")));
+ assertTrue(Operations.run(a, intsRef("barf")));
+ assertTrue(Operations.run(a, intsRef("barfo")));
+ assertTrue(Operations.run(a, intsRef("barfoo")));
+ assertTrue(Operations.run(a, intsRef("barfonz")));
+ assertFalse(Operations.run(a, intsRef("barfop")));
+ assertFalse(Operations.run(a, intsRef("barfoop")));
+ }
+
+ public void testMakeBinaryIntervalOpenMax() throws Exception {
+ Automaton a = Automata.makeBinaryInterval(new BytesRef("bar"), true, null, true);
+ assertFalse(Operations.run(a, intsRef("bam")));
+ assertTrue(Operations.run(a, intsRef("bar")));
+ assertTrue(Operations.run(a, intsRef("bara")));
+ assertTrue(Operations.run(a, intsRef("barf")));
+ assertTrue(Operations.run(a, intsRef("barfo")));
+ assertTrue(Operations.run(a, intsRef("barfoo")));
+ assertTrue(Operations.run(a, intsRef("barfonz")));
+ assertTrue(Operations.run(a, intsRef("barfop")));
+ assertTrue(Operations.run(a, intsRef("barfoop")));
+ assertTrue(Operations.run(a, intsRef("zzz")));
+ }
+
+ public void testMakeBinaryIntervalOpenMin() throws Exception {
+ Automaton a = Automata.makeBinaryInterval(null, true, new BytesRef("foo"), true);
+ assertFalse(Operations.run(a, intsRef("foz")));
+ assertFalse(Operations.run(a, intsRef("zzz")));
+ assertTrue(Operations.run(a, intsRef("foo")));
+ assertTrue(Operations.run(a, intsRef("")));
+ assertTrue(Operations.run(a, intsRef("a")));
+ assertTrue(Operations.run(a, intsRef("aaa")));
+ assertTrue(Operations.run(a, intsRef("bz")));
+ }
+
+ public void testMakeBinaryIntervalOpenBoth() throws Exception {
+ Automaton a = Automata.makeBinaryInterval(null, true, null, true);
+ assertTrue(Operations.run(a, intsRef("foz")));
+ assertTrue(Operations.run(a, intsRef("zzz")));
+ assertTrue(Operations.run(a, intsRef("foo")));
+ assertTrue(Operations.run(a, intsRef("")));
+ assertTrue(Operations.run(a, intsRef("a")));
+ assertTrue(Operations.run(a, intsRef("aaa")));
+ assertTrue(Operations.run(a, intsRef("bz")));
+ }
+
+ public void testAcceptAllEmptyStringMin() throws Exception {
+ Automaton a = Automata.makeBinaryInterval(new BytesRef(), true, null, true);
+ System.out.println("HERE: " + a.toDot());
+ assertTrue(Operations.sameLanguage(Automata.makeAnyBinary(), a));
+ }
+
private static IntsRef toIntsRef(String s) {
IntsRefBuilder b = new IntsRefBuilder();
for (int i = 0, cp = 0; i < s.length(); i += Character.charCount(cp)) {
Modified: lucene/dev/branches/lucene6271/lucene/test-framework/src/java/org/apache/lucene/index/AssertingLeafReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6271/lucene/test-framework/src/java/org/apache/lucene/index/AssertingLeafReader.java?rev=1670929&r1=1670928&r2=1670929&view=diff
==============================================================================
--- lucene/dev/branches/lucene6271/lucene/test-framework/src/java/org/apache/lucene/index/AssertingLeafReader.java (original)
+++ lucene/dev/branches/lucene6271/lucene/test-framework/src/java/org/apache/lucene/index/AssertingLeafReader.java Thu Apr 2 15:37:39 2015
@@ -257,14 +257,14 @@ public class AssertingLeafReader extends
public TermState termState() throws IOException {
assertThread("Terms enums", creationThread);
assert state == State.POSITIONED : "termState() called on unpositioned TermsEnum";
- return super.termState();
+ return in.termState();
}
@Override
public void seekExact(BytesRef term, TermState state) throws IOException {
assertThread("Terms enums", creationThread);
assert term.isValid();
- super.seekExact(term, state);
+ in.seekExact(term, state);
this.state = State.POSITIONED;
}
Modified: lucene/dev/branches/lucene6271/lucene/test-framework/src/java/org/apache/lucene/index/BasePostingsFormatTestCase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6271/lucene/test-framework/src/java/org/apache/lucene/index/BasePostingsFormatTestCase.java?rev=1670929&r1=1670928&r2=1670929&view=diff
==============================================================================
--- lucene/dev/branches/lucene6271/lucene/test-framework/src/java/org/apache/lucene/index/BasePostingsFormatTestCase.java (original)
+++ lucene/dev/branches/lucene6271/lucene/test-framework/src/java/org/apache/lucene/index/BasePostingsFormatTestCase.java Thu Apr 2 15:37:39 2015
@@ -26,20 +26,9 @@ import static org.apache.lucene.index.Po
import java.io.IOException;
import java.nio.file.Path;
-import java.util.ArrayList;
-import java.util.Arrays;
import java.util.Collections;
import java.util.EnumSet;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.List;
import java.util.Map;
-import java.util.NoSuchElementException;
-import java.util.Random;
-import java.util.Set;
-import java.util.SortedMap;
-import java.util.TreeMap;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicLong;
@@ -48,8 +37,6 @@ import org.apache.lucene.analysis.Canned
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Token;
-import org.apache.lucene.analysis.Analyzer.TokenStreamComponents;
-import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.FieldsConsumer;
import org.apache.lucene.codecs.FieldsProducer;
import org.apache.lucene.codecs.PostingsFormat;
@@ -63,22 +50,12 @@ import org.apache.lucene.document.TextFi
import org.apache.lucene.index.TermsEnum.SeekStatus;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.FlushInfo;
import org.apache.lucene.store.IOContext;
-import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LineFileDocs;
import org.apache.lucene.util.RamUsageTester;
-import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.TestUtil;
-import org.apache.lucene.util.UnicodeUtil;
-import org.apache.lucene.util.Version;
-import org.apache.lucene.util.automaton.Automaton;
-import org.apache.lucene.util.automaton.AutomatonTestUtil;
-import org.apache.lucene.util.automaton.AutomatonTestUtil.RandomAcceptedStrings;
-import org.apache.lucene.util.automaton.CompiledAutomaton;
import org.junit.AfterClass;
import org.junit.BeforeClass;
@@ -108,1282 +85,42 @@ import org.junit.BeforeClass;
public abstract class BasePostingsFormatTestCase extends BaseIndexFileFormatTestCase {
- private enum Option {
- // Sometimes use .advance():
- SKIPPING,
-
- // Sometimes reuse the PostingsEnum across terms:
- REUSE_ENUMS,
-
- // Sometimes pass non-null live docs:
- LIVE_DOCS,
-
- // Sometimes seek to term using previously saved TermState:
- TERM_STATE,
-
- // Sometimes don't fully consume docs from the enum
- PARTIAL_DOC_CONSUME,
-
- // Sometimes don't fully consume positions at each doc
- PARTIAL_POS_CONSUME,
-
- // Sometimes check payloads
- PAYLOADS,
-
- // Test w/ multiple threads
- THREADS
- };
-
- /** Given the same random seed this always enumerates the
- * same random postings */
- private static class SeedPostings extends PostingsEnum {
- // Used only to generate docIDs; this way if you pull w/
- // or w/o positions you get the same docID sequence:
- private final Random docRandom;
- private final Random random;
- public int docFreq;
- private final int maxDocSpacing;
- private final int payloadSize;
- private final boolean fixedPayloads;
- private final Bits liveDocs;
- private final BytesRef payload;
- private final IndexOptions options;
- private final boolean doPositions;
- private final boolean allowPayloads;
-
- private int docID;
- private int freq;
- public int upto;
-
- private int pos;
- private int offset;
- private int startOffset;
- private int endOffset;
- private int posSpacing;
- private int posUpto;
-
- public SeedPostings(long seed, int minDocFreq, int maxDocFreq, Bits liveDocs, IndexOptions options, boolean allowPayloads) {
- random = new Random(seed);
- docRandom = new Random(random.nextLong());
- docFreq = TestUtil.nextInt(random, minDocFreq, maxDocFreq);
- this.liveDocs = liveDocs;
- this.allowPayloads = allowPayloads;
-
- // TODO: more realistic to inversely tie this to numDocs:
- maxDocSpacing = TestUtil.nextInt(random, 1, 100);
-
- if (random.nextInt(10) == 7) {
- // 10% of the time create big payloads:
- payloadSize = 1 + random.nextInt(3);
- } else {
- payloadSize = 1 + random.nextInt(1);
- }
-
- fixedPayloads = random.nextBoolean();
- byte[] payloadBytes = new byte[payloadSize];
- payload = new BytesRef(payloadBytes);
- this.options = options;
- doPositions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS.compareTo(options) <= 0;
- }
-
- @Override
- public int nextDoc() {
- while(true) {
- _nextDoc();
- if (liveDocs == null || docID == NO_MORE_DOCS || liveDocs.get(docID)) {
- return docID;
- }
- }
- }
+ static RandomPostingsTester postingsTester;
- private int _nextDoc() {
- // Must consume random:
- while(posUpto < freq) {
- nextPosition();
- }
-
- if (upto < docFreq) {
- if (upto == 0 && docRandom.nextBoolean()) {
- // Sometimes index docID = 0
- } else if (maxDocSpacing == 1) {
- docID++;
- } else {
- // TODO: sometimes have a biggish gap here!
- docID += TestUtil.nextInt(docRandom, 1, maxDocSpacing);
- }
-
- if (random.nextInt(200) == 17) {
- freq = TestUtil.nextInt(random, 1, 1000);
- } else if (random.nextInt(10) == 17) {
- freq = TestUtil.nextInt(random, 1, 20);
- } else {
- freq = TestUtil.nextInt(random, 1, 4);
- }
-
- pos = 0;
- offset = 0;
- posUpto = 0;
- posSpacing = TestUtil.nextInt(random, 1, 100);
-
- upto++;
- return docID;
- } else {
- return docID = NO_MORE_DOCS;
- }
- }
-
- @Override
- public int docID() {
- return docID;
- }
-
- @Override
- public int freq() {
- return freq;
- }
-
- @Override
- public int nextPosition() {
- if (!doPositions) {
- posUpto = freq;
- return -1;
- }
- assert posUpto < freq;
-
- if (posUpto == 0 && random.nextBoolean()) {
- // Sometimes index pos = 0
- } else if (posSpacing == 1) {
- pos++;
- } else {
- pos += TestUtil.nextInt(random, 1, posSpacing);
- }
-
- if (payloadSize != 0) {
- if (fixedPayloads) {
- payload.length = payloadSize;
- random.nextBytes(payload.bytes);
- } else {
- int thisPayloadSize = random.nextInt(payloadSize);
- if (thisPayloadSize != 0) {
- payload.length = payloadSize;
- random.nextBytes(payload.bytes);
- } else {
- payload.length = 0;
- }
- }
- } else {
- payload.length = 0;
- }
- if (!allowPayloads) {
- payload.length = 0;
- }
-
- startOffset = offset + random.nextInt(5);
- endOffset = startOffset + random.nextInt(10);
- offset = endOffset;
-
- posUpto++;
- return pos;
- }
-
- @Override
- public int startOffset() {
- return startOffset;
- }
-
- @Override
- public int endOffset() {
- return endOffset;
- }
-
- @Override
- public BytesRef getPayload() {
- return payload.length == 0 ? null : payload;
- }
-
- @Override
- public int advance(int target) throws IOException {
- return slowAdvance(target);
- }
-
- @Override
- public long cost() {
- return docFreq;
- }
- }
-
- private static class FieldAndTerm {
- final String field;
- final BytesRef term;
- final long ord;
-
- public FieldAndTerm(String field, BytesRef term, long ord) {
- this.field = field;
- this.term = BytesRef.deepCopyOf(term);
- this.ord = ord;
- }
- }
-
- private static class SeedAndOrd {
- final long seed;
- long ord;
-
- public SeedAndOrd(long seed) {
- this.seed = seed;
- }
- }
-
- // Holds all postings:
- private static Map<String,SortedMap<BytesRef,SeedAndOrd>> fields;
-
- private static FieldInfos fieldInfos;
-
- private static FixedBitSet globalLiveDocs;
-
- private static List<FieldAndTerm> allTerms;
- private static int maxDoc;
-
- private static long totalPostings;
- private static long totalPayloadBytes;
-
- private static SeedPostings getSeedPostings(String term, long seed, boolean withLiveDocs, IndexOptions options, boolean allowPayloads) {
- int minDocFreq, maxDocFreq;
- if (term.startsWith("big_")) {
- minDocFreq = RANDOM_MULTIPLIER * 50000;
- maxDocFreq = RANDOM_MULTIPLIER * 70000;
- } else if (term.startsWith("medium_")) {
- minDocFreq = RANDOM_MULTIPLIER * 3000;
- maxDocFreq = RANDOM_MULTIPLIER * 6000;
- } else if (term.startsWith("low_")) {
- minDocFreq = RANDOM_MULTIPLIER;
- maxDocFreq = RANDOM_MULTIPLIER * 40;
- } else {
- minDocFreq = 1;
- maxDocFreq = 3;
- }
-
- return new SeedPostings(seed, minDocFreq, maxDocFreq, withLiveDocs ? globalLiveDocs : null, options, allowPayloads);
- }
+ // TODO maybe instead of @BeforeClass just make a single test run: build postings & index & test it?
@BeforeClass
public static void createPostings() throws IOException {
- totalPostings = 0;
- totalPayloadBytes = 0;
- fields = new TreeMap<>();
-
- final int numFields = TestUtil.nextInt(random(), 1, 5);
- if (VERBOSE) {
- System.out.println("TEST: " + numFields + " fields");
- }
- maxDoc = 0;
-
- FieldInfo[] fieldInfoArray = new FieldInfo[numFields];
- int fieldUpto = 0;
- while (fieldUpto < numFields) {
- String field = TestUtil.randomSimpleString(random());
- if (fields.containsKey(field)) {
- continue;
- }
-
- fieldInfoArray[fieldUpto] = new FieldInfo(field, fieldUpto, false, false, true,
- IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS,
- DocValuesType.NONE, -1, new HashMap<>());
- fieldUpto++;
-
- SortedMap<BytesRef,SeedAndOrd> postings = new TreeMap<>();
- fields.put(field, postings);
- Set<String> seenTerms = new HashSet<>();
-
- int numTerms;
- if (random().nextInt(10) == 7) {
- numTerms = atLeast(50);
- } else {
- numTerms = TestUtil.nextInt(random(), 2, 20);
- }
-
- while (postings.size() < numTerms) {
- int termUpto = postings.size();
- // Cannot contain surrogates else default Java string sort order (by UTF16 code unit) is different from Lucene:
- String term = TestUtil.randomSimpleString(random());
- if (seenTerms.contains(term)) {
- continue;
- }
- seenTerms.add(term);
-
- if (TEST_NIGHTLY && termUpto == 0 && fieldUpto == 1) {
- // Make 1 big term:
- term = "big_" + term;
- } else if (termUpto == 1 && fieldUpto == 1) {
- // Make 1 medium term:
- term = "medium_" + term;
- } else if (random().nextBoolean()) {
- // Low freq term:
- term = "low_" + term;
- } else {
- // Very low freq term (don't multiply by RANDOM_MULTIPLIER):
- term = "verylow_" + term;
- }
-
- long termSeed = random().nextLong();
- postings.put(new BytesRef(term), new SeedAndOrd(termSeed));
-
- // NOTE: sort of silly: we enum all the docs just to
- // get the maxDoc
- PostingsEnum postingsEnum = getSeedPostings(term, termSeed, false, IndexOptions.DOCS, true);
- int doc;
- int lastDoc = 0;
- while((doc = postingsEnum.nextDoc()) != PostingsEnum.NO_MORE_DOCS) {
- lastDoc = doc;
- }
- maxDoc = Math.max(lastDoc, maxDoc);
- }
-
- // assign ords
- long ord = 0;
- for(SeedAndOrd ent : postings.values()) {
- ent.ord = ord++;
- }
- }
-
- fieldInfos = new FieldInfos(fieldInfoArray);
-
- // It's the count, not the last docID:
- maxDoc++;
-
- globalLiveDocs = new FixedBitSet(maxDoc);
- double liveRatio = random().nextDouble();
- for(int i=0;i<maxDoc;i++) {
- if (random().nextDouble() <= liveRatio) {
- globalLiveDocs.set(i);
- }
- }
-
- allTerms = new ArrayList<>();
- for(Map.Entry<String,SortedMap<BytesRef,SeedAndOrd>> fieldEnt : fields.entrySet()) {
- String field = fieldEnt.getKey();
- long ord = 0;
- for(Map.Entry<BytesRef,SeedAndOrd> termEnt : fieldEnt.getValue().entrySet()) {
- allTerms.add(new FieldAndTerm(field, termEnt.getKey(), ord++));
- }
- }
-
- if (VERBOSE) {
- System.out.println("TEST: done init postings; " + allTerms.size() + " total terms, across " + fieldInfos.size() + " fields");
- }
+ postingsTester = new RandomPostingsTester(random());
}
@AfterClass
public static void afterClass() throws Exception {
- allTerms = null;
- fieldInfos = null;
- fields = null;
- globalLiveDocs = null;
- }
-
- private static class SeedFields extends Fields {
- final Map<String,SortedMap<BytesRef,SeedAndOrd>> fields;
- final FieldInfos fieldInfos;
- final IndexOptions maxAllowed;
- final boolean allowPayloads;
-
- public SeedFields(Map<String,SortedMap<BytesRef,SeedAndOrd>> fields, FieldInfos fieldInfos, IndexOptions maxAllowed, boolean allowPayloads) {
- this.fields = fields;
- this.fieldInfos = fieldInfos;
- this.maxAllowed = maxAllowed;
- this.allowPayloads = allowPayloads;
- }
-
- @Override
- public Iterator<String> iterator() {
- return fields.keySet().iterator();
- }
-
- @Override
- public Terms terms(String field) {
- SortedMap<BytesRef,SeedAndOrd> terms = fields.get(field);
- if (terms == null) {
- return null;
- } else {
- return new SeedTerms(terms, fieldInfos.fieldInfo(field), maxAllowed, allowPayloads);
- }
- }
-
- @Override
- public int size() {
- return fields.size();
- }
- }
-
- private static class SeedTerms extends Terms {
- final SortedMap<BytesRef,SeedAndOrd> terms;
- final FieldInfo fieldInfo;
- final IndexOptions maxAllowed;
- final boolean allowPayloads;
-
- public SeedTerms(SortedMap<BytesRef,SeedAndOrd> terms, FieldInfo fieldInfo, IndexOptions maxAllowed, boolean allowPayloads) {
- this.terms = terms;
- this.fieldInfo = fieldInfo;
- this.maxAllowed = maxAllowed;
- this.allowPayloads = allowPayloads;
- }
-
- @Override
- public TermsEnum iterator(TermsEnum reuse) {
- SeedTermsEnum termsEnum;
- if (reuse != null && reuse instanceof SeedTermsEnum) {
- termsEnum = (SeedTermsEnum) reuse;
- if (termsEnum.terms != terms) {
- termsEnum = new SeedTermsEnum(terms, maxAllowed, allowPayloads);
- }
- } else {
- termsEnum = new SeedTermsEnum(terms, maxAllowed, allowPayloads);
- }
- termsEnum.reset();
-
- return termsEnum;
- }
-
- @Override
- public long size() {
- return terms.size();
- }
-
- @Override
- public long getSumTotalTermFreq() {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public long getSumDocFreq() {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public int getDocCount() {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public boolean hasFreqs() {
- return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
- }
-
- @Override
- public boolean hasOffsets() {
- return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
- }
-
- @Override
- public boolean hasPositions() {
- return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
- }
-
- @Override
- public boolean hasPayloads() {
- return allowPayloads && fieldInfo.hasPayloads();
- }
- }
-
- private static class SeedTermsEnum extends TermsEnum {
- final SortedMap<BytesRef,SeedAndOrd> terms;
- final IndexOptions maxAllowed;
- final boolean allowPayloads;
-
- private Iterator<Map.Entry<BytesRef,SeedAndOrd>> iterator;
-
- private Map.Entry<BytesRef,SeedAndOrd> current;
-
- public SeedTermsEnum(SortedMap<BytesRef,SeedAndOrd> terms, IndexOptions maxAllowed, boolean allowPayloads) {
- this.terms = terms;
- this.maxAllowed = maxAllowed;
- this.allowPayloads = allowPayloads;
- }
-
- void reset() {
- iterator = terms.entrySet().iterator();
- }
-
- @Override
- public SeekStatus seekCeil(BytesRef text) {
- SortedMap<BytesRef,SeedAndOrd> tailMap = terms.tailMap(text);
- if (tailMap.isEmpty()) {
- return SeekStatus.END;
- } else {
- iterator = tailMap.entrySet().iterator();
- if (tailMap.firstKey().equals(text)) {
- return SeekStatus.FOUND;
- } else {
- return SeekStatus.NOT_FOUND;
- }
- }
- }
-
- @Override
- public BytesRef next() {
- if (iterator.hasNext()) {
- current = iterator.next();
- return term();
- } else {
- return null;
- }
- }
-
- @Override
- public void seekExact(long ord) {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public BytesRef term() {
- return current.getKey();
- }
-
- @Override
- public long ord() {
- return current.getValue().ord;
- }
-
- @Override
- public int docFreq() {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public long totalTermFreq() {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public final PostingsEnum postings(Bits liveDocs, PostingsEnum reuse, int flags) throws IOException {
- if (liveDocs != null) {
- throw new IllegalArgumentException("liveDocs must be null");
- }
- if (PostingsEnum.featureRequested(flags, PostingsEnum.POSITIONS)) {
- if (maxAllowed.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) {
- return null;
- }
- if (PostingsEnum.featureRequested(flags, PostingsEnum.OFFSETS) && maxAllowed.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) < 0) {
- return null;
- }
- if (PostingsEnum.featureRequested(flags, PostingsEnum.PAYLOADS) && allowPayloads == false) {
- return null;
- }
- }
- if (PostingsEnum.featureRequested(flags, PostingsEnum.FREQS) && maxAllowed.compareTo(IndexOptions.DOCS_AND_FREQS) < 0) {
- return null;
- }
- return getSeedPostings(current.getKey().utf8ToString(), current.getValue().seed, false, maxAllowed, allowPayloads);
- }
-
- }
-
- // TODO maybe instead of @BeforeClass just make a single test run: build postings & index & test it?
-
- private FieldInfos currentFieldInfos;
-
- // maxAllowed = the "highest" we can index, but we will still
- // randomly index at lower IndexOption
- private FieldsProducer buildIndex(Directory dir, IndexOptions maxAllowed, boolean allowPayloads, boolean alwaysTestMax) throws IOException {
- Codec codec = getCodec();
- SegmentInfo segmentInfo = new SegmentInfo(dir, Version.LATEST, "_0", maxDoc, false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>());
-
- int maxIndexOption = Arrays.asList(IndexOptions.values()).indexOf(maxAllowed);
- if (VERBOSE) {
- System.out.println("\nTEST: now build index");
- }
-
- // TODO use allowPayloads
-
- FieldInfo[] newFieldInfoArray = new FieldInfo[fields.size()];
- for(int fieldUpto=0;fieldUpto<fields.size();fieldUpto++) {
- FieldInfo oldFieldInfo = fieldInfos.fieldInfo(fieldUpto);
-
- // Randomly picked the IndexOptions to index this
- // field with:
- IndexOptions indexOptions = IndexOptions.values()[alwaysTestMax ? maxIndexOption : TestUtil.nextInt(random(), 1, maxIndexOption)];
- boolean doPayloads = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0 && allowPayloads;
-
- newFieldInfoArray[fieldUpto] = new FieldInfo(oldFieldInfo.name,
- fieldUpto,
- false,
- false,
- doPayloads,
- indexOptions,
- DocValuesType.NONE,
- -1,
- new HashMap<>());
- }
-
- FieldInfos newFieldInfos = new FieldInfos(newFieldInfoArray);
-
- // Estimate that flushed segment size will be 25% of
- // what we use in RAM:
- long bytes = totalPostings * 8 + totalPayloadBytes;
-
- SegmentWriteState writeState = new SegmentWriteState(null, dir,
- segmentInfo, newFieldInfos,
- null, new IOContext(new FlushInfo(maxDoc, bytes)));
-
- Fields seedFields = new SeedFields(fields, newFieldInfos, maxAllowed, allowPayloads);
-
- FieldsConsumer consumer = codec.postingsFormat().fieldsConsumer(writeState);
- boolean success = false;
- try {
- consumer.write(seedFields);
- success = true;
- } finally {
- if (success) {
- IOUtils.close(consumer);
- } else {
- IOUtils.closeWhileHandlingException(consumer);
- }
- }
-
- if (VERBOSE) {
- System.out.println("TEST: after indexing: files=");
- for(String file : dir.listAll()) {
- System.out.println(" " + file + ": " + dir.fileLength(file) + " bytes");
- }
- }
-
- currentFieldInfos = newFieldInfos;
-
- SegmentReadState readState = new SegmentReadState(dir, segmentInfo, newFieldInfos, IOContext.READ);
-
- return codec.postingsFormat().fieldsProducer(readState);
- }
-
- private static class ThreadState {
- // Only used with REUSE option:
- public PostingsEnum reusePostingsEnum;
- }
-
- private void verifyEnum(ThreadState threadState,
- String field,
- BytesRef term,
- TermsEnum termsEnum,
-
- // Maximum options (docs/freqs/positions/offsets) to test:
- IndexOptions maxTestOptions,
-
- IndexOptions maxIndexOptions,
-
- EnumSet<Option> options,
- boolean alwaysTestMax) throws IOException {
-
- if (VERBOSE) {
- System.out.println(" verifyEnum: options=" + options + " maxTestOptions=" + maxTestOptions);
- }
-
- // Make sure TermsEnum really is positioned on the
- // expected term:
- assertEquals(term, termsEnum.term());
-
- // 50% of the time time pass liveDocs:
- boolean useLiveDocs = options.contains(Option.LIVE_DOCS) && random().nextBoolean();
- Bits liveDocs;
- if (useLiveDocs) {
- liveDocs = globalLiveDocs;
- if (VERBOSE) {
- System.out.println(" use liveDocs");
- }
- } else {
- liveDocs = null;
- if (VERBOSE) {
- System.out.println(" no liveDocs");
- }
- }
-
- FieldInfo fieldInfo = currentFieldInfos.fieldInfo(field);
-
- // NOTE: can be empty list if we are using liveDocs:
- SeedPostings expected = getSeedPostings(term.utf8ToString(),
- fields.get(field).get(term).seed,
- useLiveDocs,
- maxIndexOptions,
- true);
- assertEquals(expected.docFreq, termsEnum.docFreq());
-
- boolean allowFreqs = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0 &&
- maxTestOptions.compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
- boolean doCheckFreqs = allowFreqs && (alwaysTestMax || random().nextInt(3) <= 2);
-
- boolean allowPositions = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0 &&
- maxTestOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
- boolean doCheckPositions = allowPositions && (alwaysTestMax || random().nextInt(3) <= 2);
-
- boolean allowOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0 &&
- maxTestOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
- boolean doCheckOffsets = allowOffsets && (alwaysTestMax || random().nextInt(3) <= 2);
-
- boolean doCheckPayloads = options.contains(Option.PAYLOADS) && allowPositions && fieldInfo.hasPayloads() && (alwaysTestMax || random().nextInt(3) <= 2);
-
- PostingsEnum prevPostingsEnum = null;
-
- PostingsEnum postingsEnum;
-
- if (!doCheckPositions) {
- if (allowPositions && random().nextInt(10) == 7) {
- // 10% of the time, even though we will not check positions, pull a DocsAndPositions enum
-
- if (options.contains(Option.REUSE_ENUMS) && random().nextInt(10) < 9) {
- prevPostingsEnum = threadState.reusePostingsEnum;
- }
-
- int flags = PostingsEnum.POSITIONS;
- if (alwaysTestMax || random().nextBoolean()) {
- flags |= PostingsEnum.OFFSETS;
- }
- if (alwaysTestMax || random().nextBoolean()) {
- flags |= PostingsEnum.PAYLOADS;
- }
-
- if (VERBOSE) {
- System.out.println(" get DocsEnum (but we won't check positions) flags=" + flags);
- }
-
- threadState.reusePostingsEnum = termsEnum.postings(liveDocs, prevPostingsEnum, flags);
- postingsEnum = threadState.reusePostingsEnum;
- } else {
- if (VERBOSE) {
- System.out.println(" get DocsEnum");
- }
- if (options.contains(Option.REUSE_ENUMS) && random().nextInt(10) < 9) {
- prevPostingsEnum = threadState.reusePostingsEnum;
- }
- threadState.reusePostingsEnum = termsEnum.postings(liveDocs, prevPostingsEnum, doCheckFreqs ? PostingsEnum.FREQS : PostingsEnum.NONE);
- postingsEnum = threadState.reusePostingsEnum;
- }
- } else {
- if (options.contains(Option.REUSE_ENUMS) && random().nextInt(10) < 9) {
- prevPostingsEnum = threadState.reusePostingsEnum;
- }
-
- int flags = PostingsEnum.POSITIONS;
- if (alwaysTestMax || doCheckOffsets || random().nextInt(3) == 1) {
- flags |= PostingsEnum.OFFSETS;
- }
- if (alwaysTestMax || doCheckPayloads|| random().nextInt(3) == 1) {
- flags |= PostingsEnum.PAYLOADS;
- }
-
- if (VERBOSE) {
- System.out.println(" get DocsEnum flags=" + flags);
- }
-
- threadState.reusePostingsEnum = termsEnum.postings(liveDocs, prevPostingsEnum, flags);
- postingsEnum = threadState.reusePostingsEnum;
- }
-
- assertNotNull("null DocsEnum", postingsEnum);
- int initialDocID = postingsEnum.docID();
- assertEquals("inital docID should be -1" + postingsEnum, -1, initialDocID);
-
- if (VERBOSE) {
- if (prevPostingsEnum == null) {
- System.out.println(" got enum=" + postingsEnum);
- } else if (prevPostingsEnum == postingsEnum) {
- System.out.println(" got reuse enum=" + postingsEnum);
- } else {
- System.out.println(" got enum=" + postingsEnum + " (reuse of " + prevPostingsEnum + " failed)");
- }
- }
-
- // 10% of the time don't consume all docs:
- int stopAt;
- if (!alwaysTestMax && options.contains(Option.PARTIAL_DOC_CONSUME) && expected.docFreq > 1 && random().nextInt(10) == 7) {
- stopAt = random().nextInt(expected.docFreq-1);
- if (VERBOSE) {
- System.out.println(" will not consume all docs (" + stopAt + " vs " + expected.docFreq + ")");
- }
- } else {
- stopAt = expected.docFreq;
- if (VERBOSE) {
- System.out.println(" consume all docs");
- }
- }
-
- double skipChance = alwaysTestMax ? 0.5 : random().nextDouble();
- int numSkips = expected.docFreq < 3 ? 1 : TestUtil.nextInt(random(), 1, Math.min(20, expected.docFreq / 3));
- int skipInc = expected.docFreq/numSkips;
- int skipDocInc = maxDoc/numSkips;
-
- // Sometimes do 100% skipping:
- boolean doAllSkipping = options.contains(Option.SKIPPING) && random().nextInt(7) == 1;
-
- double freqAskChance = alwaysTestMax ? 1.0 : random().nextDouble();
- double payloadCheckChance = alwaysTestMax ? 1.0 : random().nextDouble();
- double offsetCheckChance = alwaysTestMax ? 1.0 : random().nextDouble();
-
- if (VERBOSE) {
- if (options.contains(Option.SKIPPING)) {
- System.out.println(" skipChance=" + skipChance + " numSkips=" + numSkips);
- } else {
- System.out.println(" no skipping");
- }
- if (doCheckFreqs) {
- System.out.println(" freqAskChance=" + freqAskChance);
- }
- if (doCheckPayloads) {
- System.out.println(" payloadCheckChance=" + payloadCheckChance);
- }
- if (doCheckOffsets) {
- System.out.println(" offsetCheckChance=" + offsetCheckChance);
- }
- }
-
- while (expected.upto <= stopAt) {
- if (expected.upto == stopAt) {
- if (stopAt == expected.docFreq) {
- assertEquals("DocsEnum should have ended but didn't", PostingsEnum.NO_MORE_DOCS, postingsEnum.nextDoc());
-
- // Common bug is to forget to set this.doc=NO_MORE_DOCS in the enum!:
- assertEquals("DocsEnum should have ended but didn't", PostingsEnum.NO_MORE_DOCS, postingsEnum.docID());
- }
- break;
- }
-
- if (options.contains(Option.SKIPPING) && (doAllSkipping || random().nextDouble() <= skipChance)) {
- int targetDocID = -1;
- if (expected.upto < stopAt && random().nextBoolean()) {
- // Pick target we know exists:
- final int skipCount = TestUtil.nextInt(random(), 1, skipInc);
- for(int skip=0;skip<skipCount;skip++) {
- if (expected.nextDoc() == PostingsEnum.NO_MORE_DOCS) {
- break;
- }
- }
- } else {
- // Pick random target (might not exist):
- final int skipDocIDs = TestUtil.nextInt(random(), 1, skipDocInc);
- if (skipDocIDs > 0) {
- targetDocID = expected.docID() + skipDocIDs;
- expected.advance(targetDocID);
- }
- }
-
- if (expected.upto >= stopAt) {
- int target = random().nextBoolean() ? maxDoc : PostingsEnum.NO_MORE_DOCS;
- if (VERBOSE) {
- System.out.println(" now advance to end (target=" + target + ")");
- }
- assertEquals("DocsEnum should have ended but didn't", PostingsEnum.NO_MORE_DOCS, postingsEnum.advance(target));
- break;
- } else {
- if (VERBOSE) {
- if (targetDocID != -1) {
- System.out.println(" now advance to random target=" + targetDocID + " (" + expected.upto + " of " + stopAt + ") current=" + postingsEnum.docID());
- } else {
- System.out.println(" now advance to known-exists target=" + expected.docID() + " (" + expected.upto + " of " + stopAt + ") current=" + postingsEnum.docID());
- }
- }
- int docID = postingsEnum.advance(targetDocID != -1 ? targetDocID : expected.docID());
- assertEquals("docID is wrong", expected.docID(), docID);
- }
- } else {
- expected.nextDoc();
- if (VERBOSE) {
- System.out.println(" now nextDoc to " + expected.docID() + " (" + expected.upto + " of " + stopAt + ")");
- }
- int docID = postingsEnum.nextDoc();
- assertEquals("docID is wrong", expected.docID(), docID);
- if (docID == PostingsEnum.NO_MORE_DOCS) {
- break;
- }
- }
-
- if (doCheckFreqs && random().nextDouble() <= freqAskChance) {
- if (VERBOSE) {
- System.out.println(" now freq()=" + expected.freq());
- }
- int freq = postingsEnum.freq();
- assertEquals("freq is wrong", expected.freq(), freq);
- }
-
- if (doCheckPositions) {
- int freq = postingsEnum.freq();
- int numPosToConsume;
- if (!alwaysTestMax && options.contains(Option.PARTIAL_POS_CONSUME) && random().nextInt(5) == 1) {
- numPosToConsume = random().nextInt(freq);
- } else {
- numPosToConsume = freq;
- }
-
- for(int i=0;i<numPosToConsume;i++) {
- int pos = expected.nextPosition();
- if (VERBOSE) {
- System.out.println(" now nextPosition to " + pos);
- }
- assertEquals("position is wrong", pos, postingsEnum.nextPosition());
-
- if (doCheckPayloads) {
- BytesRef expectedPayload = expected.getPayload();
- if (random().nextDouble() <= payloadCheckChance) {
- if (VERBOSE) {
- System.out.println(" now check expectedPayload length=" + (expectedPayload == null ? 0 : expectedPayload.length));
- }
- if (expectedPayload == null || expectedPayload.length == 0) {
- assertNull("should not have payload", postingsEnum.getPayload());
- } else {
- BytesRef payload = postingsEnum.getPayload();
- assertNotNull("should have payload but doesn't", payload);
-
- assertEquals("payload length is wrong", expectedPayload.length, payload.length);
- for(int byteUpto=0;byteUpto<expectedPayload.length;byteUpto++) {
- assertEquals("payload bytes are wrong",
- expectedPayload.bytes[expectedPayload.offset + byteUpto],
- payload.bytes[payload.offset+byteUpto]);
- }
-
- // make a deep copy
- payload = BytesRef.deepCopyOf(payload);
- assertEquals("2nd call to getPayload returns something different!", payload, postingsEnum.getPayload());
- }
- } else {
- if (VERBOSE) {
- System.out.println(" skip check payload length=" + (expectedPayload == null ? 0 : expectedPayload.length));
- }
- }
- }
-
- if (doCheckOffsets) {
- if (random().nextDouble() <= offsetCheckChance) {
- if (VERBOSE) {
- System.out.println(" now check offsets: startOff=" + expected.startOffset() + " endOffset=" + expected.endOffset());
- }
- assertEquals("startOffset is wrong", expected.startOffset(), postingsEnum.startOffset());
- assertEquals("endOffset is wrong", expected.endOffset(), postingsEnum.endOffset());
- } else {
- if (VERBOSE) {
- System.out.println(" skip check offsets");
- }
- }
- } else if (fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) < 0) {
- if (VERBOSE) {
- System.out.println(" now check offsets are -1");
- }
- assertEquals("startOffset isn't -1", -1, postingsEnum.startOffset());
- assertEquals("endOffset isn't -1", -1, postingsEnum.endOffset());
- }
- }
- }
- }
- }
-
- private static class TestThread extends Thread {
- private Fields fieldsSource;
- private EnumSet<Option> options;
- private IndexOptions maxIndexOptions;
- private IndexOptions maxTestOptions;
- private boolean alwaysTestMax;
- private BasePostingsFormatTestCase testCase;
-
- public TestThread(BasePostingsFormatTestCase testCase, Fields fieldsSource, EnumSet<Option> options, IndexOptions maxTestOptions,
- IndexOptions maxIndexOptions, boolean alwaysTestMax) {
- this.fieldsSource = fieldsSource;
- this.options = options;
- this.maxTestOptions = maxTestOptions;
- this.maxIndexOptions = maxIndexOptions;
- this.alwaysTestMax = alwaysTestMax;
- this.testCase = testCase;
- }
-
- @Override
- public void run() {
- try {
- try {
- testCase.testTermsOneThread(fieldsSource, options, maxTestOptions, maxIndexOptions, alwaysTestMax);
- } catch (Throwable t) {
- throw new RuntimeException(t);
- }
- } finally {
- fieldsSource = null;
- testCase = null;
- }
- }
- }
-
- private void testTerms(final Fields fieldsSource, final EnumSet<Option> options,
- final IndexOptions maxTestOptions,
- final IndexOptions maxIndexOptions,
- final boolean alwaysTestMax) throws Exception {
-
- if (options.contains(Option.THREADS)) {
- int numThreads = TestUtil.nextInt(random(), 2, 5);
- Thread[] threads = new Thread[numThreads];
- for(int threadUpto=0;threadUpto<numThreads;threadUpto++) {
- threads[threadUpto] = new TestThread(this, fieldsSource, options, maxTestOptions, maxIndexOptions, alwaysTestMax);
- threads[threadUpto].start();
- }
- for(int threadUpto=0;threadUpto<numThreads;threadUpto++) {
- threads[threadUpto].join();
- }
- } else {
- testTermsOneThread(fieldsSource, options, maxTestOptions, maxIndexOptions, alwaysTestMax);
- }
- }
-
- private void testTermsOneThread(Fields fieldsSource, EnumSet<Option> options,
- IndexOptions maxTestOptions,
- IndexOptions maxIndexOptions, boolean alwaysTestMax) throws IOException {
-
- ThreadState threadState = new ThreadState();
-
- // Test random terms/fields:
- List<TermState> termStates = new ArrayList<>();
- List<FieldAndTerm> termStateTerms = new ArrayList<>();
-
- boolean supportsOrds = true;
-
- Collections.shuffle(allTerms, random());
- int upto = 0;
- while (upto < allTerms.size()) {
-
- boolean useTermState = termStates.size() != 0 && random().nextInt(5) == 1;
- boolean useTermOrd = supportsOrds && useTermState == false && random().nextInt(5) == 1;
-
- FieldAndTerm fieldAndTerm;
- TermsEnum termsEnum;
-
- TermState termState = null;
-
- if (!useTermState) {
- // Seek by random field+term:
- fieldAndTerm = allTerms.get(upto++);
- if (VERBOSE) {
- if (useTermOrd) {
- System.out.println("\nTEST: seek to term=" + fieldAndTerm.field + ":" + fieldAndTerm.term.utf8ToString() + " using ord=" + fieldAndTerm.ord);
- } else {
- System.out.println("\nTEST: seek to term=" + fieldAndTerm.field + ":" + fieldAndTerm.term.utf8ToString() );
- }
- }
- } else {
- // Seek by previous saved TermState
- int idx = random().nextInt(termStates.size());
- fieldAndTerm = termStateTerms.get(idx);
- if (VERBOSE) {
- System.out.println("\nTEST: seek using TermState to term=" + fieldAndTerm.field + ":" + fieldAndTerm.term.utf8ToString());
- }
- termState = termStates.get(idx);
- }
-
- Terms terms = fieldsSource.terms(fieldAndTerm.field);
- assertNotNull(terms);
- termsEnum = terms.iterator(null);
-
- if (!useTermState) {
- if (useTermOrd) {
- // Try seek by ord sometimes:
- try {
- termsEnum.seekExact(fieldAndTerm.ord);
- } catch (UnsupportedOperationException uoe) {
- supportsOrds = false;
- assertTrue(termsEnum.seekExact(fieldAndTerm.term));
- }
- } else {
- assertTrue(termsEnum.seekExact(fieldAndTerm.term));
- }
- } else {
- termsEnum.seekExact(fieldAndTerm.term, termState);
- }
-
- long termOrd;
- if (supportsOrds) {
- try {
- termOrd = termsEnum.ord();
- } catch (UnsupportedOperationException uoe) {
- supportsOrds = false;
- termOrd = -1;
- }
- } else {
- termOrd = -1;
- }
-
- if (termOrd != -1) {
- // PostingsFormat supports ords
- assertEquals(fieldAndTerm.ord, termsEnum.ord());
- }
-
- boolean savedTermState = false;
-
- if (options.contains(Option.TERM_STATE) && !useTermState && random().nextInt(5) == 1) {
- // Save away this TermState:
- termStates.add(termsEnum.termState());
- termStateTerms.add(fieldAndTerm);
- savedTermState = true;
- }
-
- verifyEnum(threadState,
- fieldAndTerm.field,
- fieldAndTerm.term,
- termsEnum,
- maxTestOptions,
- maxIndexOptions,
- options,
- alwaysTestMax);
-
- // Sometimes save term state after pulling the enum:
- if (options.contains(Option.TERM_STATE) && !useTermState && !savedTermState && random().nextInt(5) == 1) {
- // Save away this TermState:
- termStates.add(termsEnum.termState());
- termStateTerms.add(fieldAndTerm);
- useTermState = true;
- }
-
- // 10% of the time make sure you can pull another enum
- // from the same term:
- if (alwaysTestMax || random().nextInt(10) == 7) {
- // Try same term again
- if (VERBOSE) {
- System.out.println("TEST: try enum again on same term");
- }
-
- verifyEnum(threadState,
- fieldAndTerm.field,
- fieldAndTerm.term,
- termsEnum,
- maxTestOptions,
- maxIndexOptions,
- options,
- alwaysTestMax);
- }
- }
-
- // Test Terms.intersect:
- for(String field : fields.keySet()) {
- while (true) {
- Automaton a = AutomatonTestUtil.randomAutomaton(random());
- CompiledAutomaton ca = new CompiledAutomaton(a, null, true, Integer.MAX_VALUE, false);
- if (ca.type != CompiledAutomaton.AUTOMATON_TYPE.NORMAL) {
- // Keep retrying until we get an A that will really "use" the PF's intersect code:
- continue;
- }
- // System.out.println("A:\n" + a.toDot());
-
- BytesRef startTerm = null;
- if (random().nextBoolean()) {
- RandomAcceptedStrings ras = new RandomAcceptedStrings(a);
- for (int iter=0;iter<100;iter++) {
- int[] codePoints = ras.getRandomAcceptedString(random());
- if (codePoints.length == 0) {
- continue;
- }
- startTerm = new BytesRef(UnicodeUtil.newString(codePoints, 0, codePoints.length));
- break;
- }
- // Don't allow empty string startTerm:
- if (startTerm == null) {
- continue;
- }
- }
- TermsEnum intersected = fieldsSource.terms(field).intersect(ca, startTerm);
-
- Set<BytesRef> intersectedTerms = new HashSet<BytesRef>();
- BytesRef term;
- while ((term = intersected.next()) != null) {
- if (startTerm != null) {
- // NOTE: not <=
- assertTrue(startTerm.compareTo(term) < 0);
- }
- intersectedTerms.add(BytesRef.deepCopyOf(term));
- verifyEnum(threadState,
- field,
- term,
- intersected,
- maxTestOptions,
- maxIndexOptions,
- options,
- alwaysTestMax);
- }
-
- if (ca.runAutomaton == null) {
- assertTrue(intersectedTerms.isEmpty());
- } else {
- for(BytesRef term2 : fields.get(field).keySet()) {
- boolean expected;
- if (startTerm != null && startTerm.compareTo(term2) >= 0) {
- expected = false;
- } else {
- expected = ca.runAutomaton.run(term2.bytes, term2.offset, term2.length);
- }
- assertEquals("term=" + term2, expected, intersectedTerms.contains(term2));
- }
- }
-
- break;
- }
- }
- }
-
- private void testFields(Fields fields) throws Exception {
- Iterator<String> iterator = fields.iterator();
- while (iterator.hasNext()) {
- iterator.next();
- try {
- iterator.remove();
- fail("Fields.iterator() allows for removal");
- } catch (UnsupportedOperationException expected) {
- // expected;
- }
- }
- assertFalse(iterator.hasNext());
- try {
- iterator.next();
- fail("Fields.iterator() doesn't throw NoSuchElementException when past the end");
- } catch (NoSuchElementException expected) {
- // expected
- }
- }
-
- /** Indexes all fields/terms at the specified
- * IndexOptions, and fully tests at that IndexOptions. */
- private void testFull(IndexOptions options, boolean withPayloads) throws Exception {
- Path path = createTempDir("testPostingsFormat.testExact");
- Directory dir = newFSDirectory(path);
-
- // TODO test thread safety of buildIndex too
- FieldsProducer fieldsProducer = buildIndex(dir, options, withPayloads, true);
-
- testFields(fieldsProducer);
-
- IndexOptions[] allOptions = IndexOptions.values();
- int maxIndexOption = Arrays.asList(allOptions).indexOf(options);
-
- for(int i=0;i<=maxIndexOption;i++) {
- testTerms(fieldsProducer, EnumSet.allOf(Option.class), allOptions[i], options, true);
- if (withPayloads) {
- // If we indexed w/ payloads, also test enums w/o accessing payloads:
- testTerms(fieldsProducer, EnumSet.complementOf(EnumSet.of(Option.PAYLOADS)), allOptions[i], options, true);
- }
- }
-
- fieldsProducer.close();
- dir.close();
- IOUtils.rm(path);
+ postingsTester = null;
}
public void testDocsOnly() throws Exception {
- testFull(IndexOptions.DOCS, false);
+ postingsTester.testFull(getCodec(), createTempDir("testPostingsFormat.testExact"), IndexOptions.DOCS, false);
}
public void testDocsAndFreqs() throws Exception {
- testFull(IndexOptions.DOCS_AND_FREQS, false);
+ postingsTester.testFull(getCodec(), createTempDir("testPostingsFormat.testExact"), IndexOptions.DOCS_AND_FREQS, false);
}
public void testDocsAndFreqsAndPositions() throws Exception {
- testFull(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, false);
+ postingsTester.testFull(getCodec(), createTempDir("testPostingsFormat.testExact"), IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, false);
}
public void testDocsAndFreqsAndPositionsAndPayloads() throws Exception {
- testFull(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, true);
+ postingsTester.testFull(getCodec(), createTempDir("testPostingsFormat.testExact"), IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, true);
}
public void testDocsAndFreqsAndPositionsAndOffsets() throws Exception {
- testFull(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS, false);
+ postingsTester.testFull(getCodec(), createTempDir("testPostingsFormat.testExact"), IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS, false);
}
public void testDocsAndFreqsAndPositionsAndOffsetsAndPayloads() throws Exception {
- testFull(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS, true);
+ postingsTester.testFull(getCodec(), createTempDir("testPostingsFormat.testExact"), IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS, true);
}
public void testRandom() throws Exception {
@@ -1396,13 +133,13 @@ public abstract class BasePostingsFormat
boolean indexPayloads = random().nextBoolean();
// TODO test thread safety of buildIndex too
- FieldsProducer fieldsProducer = buildIndex(dir, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS, indexPayloads, false);
+ FieldsProducer fieldsProducer = postingsTester.buildIndex(getCodec(), dir, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS, indexPayloads, false);
- testFields(fieldsProducer);
+ postingsTester.testFields(fieldsProducer);
// NOTE: you can also test "weaker" index options than
// you indexed with:
- testTerms(fieldsProducer, EnumSet.allOf(Option.class), IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS, false);
+ postingsTester.testTerms(fieldsProducer, EnumSet.allOf(RandomPostingsTester.Option.class), IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS, false);
fieldsProducer.close();
fieldsProducer = null;
@@ -1421,9 +158,9 @@ public abstract class BasePostingsFormat
Path path = createTempDir("testPostingsEnumReuse");
Directory dir = newFSDirectory(path);
- FieldsProducer fieldsProducer = buildIndex(dir, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS, random().nextBoolean(), true);
- Collections.shuffle(allTerms, random());
- FieldAndTerm fieldAndTerm = allTerms.get(0);
+ FieldsProducer fieldsProducer = postingsTester.buildIndex(getCodec(), dir, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS, random().nextBoolean(), true);
+ Collections.shuffle(postingsTester.allTerms, random());
+ RandomPostingsTester.FieldAndTerm fieldAndTerm = postingsTester.allTerms.get(0);
Terms terms = fieldsProducer.terms(fieldAndTerm.field);
TermsEnum te = terms.iterator(null);
Modified: lucene/dev/branches/lucene6271/lucene/test-framework/src/java/org/apache/lucene/util/TestUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6271/lucene/test-framework/src/java/org/apache/lucene/util/TestUtil.java?rev=1670929&r1=1670928&r2=1670929&view=diff
==============================================================================
--- lucene/dev/branches/lucene6271/lucene/test-framework/src/java/org/apache/lucene/util/TestUtil.java (original)
+++ lucene/dev/branches/lucene6271/lucene/test-framework/src/java/org/apache/lucene/util/TestUtil.java Thu Apr 2 15:37:39 2015
@@ -831,6 +831,14 @@ public final class TestUtil {
return new String(buffer, 0, i);
}
+ /** Returns a random binary term. */
+ public static BytesRef randomBinaryTerm(Random r) {
+ int length = r.nextInt(15);
+ BytesRef b = new BytesRef(length);
+ r.nextBytes(b.bytes);
+ b.length = length;
+ return b;
+ }
/** Return a Codec that can read any of the
* default codecs and formats, but always writes in the specified
@@ -858,7 +866,7 @@ public final class TestUtil {
// (and maybe their params, too) to infostream on flush and merge.
// otherwise in a real debugging situation we won't know whats going on!
if (LuceneTestCase.VERBOSE) {
- System.out.println("forcing docvalues format to:" + format);
+ System.out.println("TestUtil: forcing docvalues format to:" + format);
}
return new AssertingCodec() {
@Override
@@ -1282,6 +1290,24 @@ public final class TestUtil {
return sb.toString();
}
}
+
+ /** For debugging: tries to include br.utf8ToString(), but if that
+ * fails (because it's not valid utf8, which is fine!), just
+ * use ordinary toString. */
+ public static String bytesRefToString(BytesRef br) {
+ if (br == null) {
+ return "(null)";
+ } else {
+ try {
+ return br.utf8ToString() + " " + br.toString();
+ } catch (IllegalArgumentException t) {
+ // If BytesRef isn't actually UTF8, or it's eg a
+ // prefix of UTF8 that ends mid-unicode-char, we
+ // fallback to hex:
+ return br.toString();
+ }
+ }
+ }
/** Returns a copy of directory, entirely in RAM */
public static RAMDirectory ramCopyOf(Directory dir) throws IOException {
Modified: lucene/dev/branches/lucene6271/solr/core/src/java/org/apache/solr/client/solrj/embedded/EmbeddedSolrServer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6271/solr/core/src/java/org/apache/solr/client/solrj/embedded/EmbeddedSolrServer.java?rev=1670929&r1=1670928&r2=1670929&view=diff
==============================================================================
--- lucene/dev/branches/lucene6271/solr/core/src/java/org/apache/solr/client/solrj/embedded/EmbeddedSolrServer.java (original)
+++ lucene/dev/branches/lucene6271/solr/core/src/java/org/apache/solr/client/solrj/embedded/EmbeddedSolrServer.java Thu Apr 2 15:37:39 2015
@@ -17,6 +17,12 @@
package org.apache.solr.client.solrj.embedded;
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.file.Path;
+
import com.google.common.base.Strings;
import org.apache.solr.client.solrj.SolrClient;
import org.apache.solr.client.solrj.SolrRequest;
@@ -42,11 +48,7 @@ import org.apache.solr.response.ResultCo
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.servlet.SolrRequestParsers;
-import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.nio.file.Path;
+import static org.apache.solr.common.params.CommonParams.PATH;
/**
* SolrClient that connects directly to a CoreContainer.
@@ -170,7 +172,7 @@ public class EmbeddedSolrServer extends
}
req = _parser.buildRequestFrom(core, params, request.getContentStreams());
- req.getContext().put("path", path);
+ req.getContext().put(PATH, path);
SolrQueryResponse rsp = new SolrQueryResponse();
SolrRequestInfo.setRequestInfo(new SolrRequestInfo(req, rsp));
Modified: lucene/dev/branches/lucene6271/solr/core/src/java/org/apache/solr/cloud/Overseer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6271/solr/core/src/java/org/apache/solr/cloud/Overseer.java?rev=1670929&r1=1670928&r2=1670929&view=diff
==============================================================================
--- lucene/dev/branches/lucene6271/solr/core/src/java/org/apache/solr/cloud/Overseer.java (original)
+++ lucene/dev/branches/lucene6271/solr/core/src/java/org/apache/solr/cloud/Overseer.java Thu Apr 2 15:37:39 2015
@@ -17,6 +17,22 @@ package org.apache.solr.cloud;
* the License.
*/
+import java.io.Closeable;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.ListIterator;
+import java.util.Locale;
+import java.util.Map;
+import java.util.Random;
+import java.util.Set;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicInteger;
+
import org.apache.commons.lang.StringUtils;
import org.apache.solr.client.solrj.SolrResponse;
import org.apache.solr.cloud.overseer.ClusterStateMutator;
@@ -47,25 +63,10 @@ import org.apache.zookeeper.KeeperExcept
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import java.io.Closeable;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.LinkedList;
-import java.util.List;
-import java.util.ListIterator;
-import java.util.Locale;
-import java.util.Map;
-import java.util.Random;
-import java.util.Set;
-import java.util.concurrent.ConcurrentHashMap;
-import java.util.concurrent.TimeUnit;
-import java.util.concurrent.atomic.AtomicInteger;
-
import static org.apache.solr.cloud.OverseerCollectionProcessor.ONLY_ACTIVE_NODES;
import static org.apache.solr.cloud.OverseerCollectionProcessor.SHARD_UNIQUE;
import static org.apache.solr.common.params.CollectionParams.CollectionAction.BALANCESHARDUNIQUE;
+import static org.apache.solr.common.params.CommonParams.NAME;
/**
* Cluster leader. Responsible for processing state updates, node assignments, creating/deleting
@@ -397,7 +398,7 @@ public class Overseer implements Closeab
}
private void handleProp(ZkNodeProps message) {
- String name = message.getStr("name");
+ String name = message.getStr(NAME);
String val = message.getStr("val");
Map m = reader.getClusterProps();
if(val ==null) m.remove(name);