You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2013/10/21 20:58:44 UTC
svn commit: r1534320 [24/39] - in /lucene/dev/branches/lucene4956: ./
dev-tools/ dev-tools/idea/.idea/ dev-tools/idea/lucene/expressions/
dev-tools/idea/solr/contrib/velocity/ dev-tools/maven/
dev-tools/maven/lucene/ dev-tools/maven/lucene/expressions/...
Modified: lucene/dev/branches/lucene4956/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/FuzzySuggesterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/FuzzySuggesterTest.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/FuzzySuggesterTest.java (original)
+++ lucene/dev/branches/lucene4956/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/FuzzySuggesterTest.java Mon Oct 21 18:58:24 2013
@@ -41,8 +41,8 @@ import org.apache.lucene.analysis.Tokeni
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.search.suggest.Lookup.LookupResult;
-import org.apache.lucene.search.suggest.TermFreq;
-import org.apache.lucene.search.suggest.TermFreqArrayIterator;
+import org.apache.lucene.search.suggest.Input;
+import org.apache.lucene.search.suggest.InputArrayIterator;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.LuceneTestCase;
@@ -54,16 +54,16 @@ import org.apache.lucene.util.fst.Util;
public class FuzzySuggesterTest extends LuceneTestCase {
public void testRandomEdits() throws IOException {
- List<TermFreq> keys = new ArrayList<TermFreq>();
+ List<Input> keys = new ArrayList<Input>();
int numTerms = atLeast(100);
for (int i = 0; i < numTerms; i++) {
- keys.add(new TermFreq("boo" + _TestUtil.randomSimpleString(random()), 1 + random().nextInt(100)));
+ keys.add(new Input("boo" + _TestUtil.randomSimpleString(random()), 1 + random().nextInt(100)));
}
- keys.add(new TermFreq("foo bar boo far", 12));
+ keys.add(new Input("foo bar boo far", 12));
MockAnalyzer analyzer = new MockAnalyzer(random(), MockTokenizer.KEYWORD, false);
FuzzySuggester suggester = new FuzzySuggester(analyzer, analyzer, FuzzySuggester.EXACT_FIRST | FuzzySuggester.PRESERVE_SEP, 256, -1, FuzzySuggester.DEFAULT_MAX_EDITS, FuzzySuggester.DEFAULT_TRANSPOSITIONS,
0, FuzzySuggester.DEFAULT_MIN_FUZZY_LENGTH, FuzzySuggester.DEFAULT_UNICODE_AWARE);
- suggester.build(new TermFreqArrayIterator(keys));
+ suggester.build(new InputArrayIterator(keys));
int numIters = atLeast(10);
for (int i = 0; i < numIters; i++) {
String addRandomEdit = addRandomEdit("foo bar boo", FuzzySuggester.DEFAULT_NON_FUZZY_PREFIX);
@@ -75,16 +75,16 @@ public class FuzzySuggesterTest extends
}
public void testNonLatinRandomEdits() throws IOException {
- List<TermFreq> keys = new ArrayList<TermFreq>();
+ List<Input> keys = new ArrayList<Input>();
int numTerms = atLeast(100);
for (int i = 0; i < numTerms; i++) {
- keys.add(new TermFreq("бÑÑ" + _TestUtil.randomSimpleString(random()), 1 + random().nextInt(100)));
+ keys.add(new Input("бÑÑ" + _TestUtil.randomSimpleString(random()), 1 + random().nextInt(100)));
}
- keys.add(new TermFreq("ÑÑÑ Ð±Ð°Ñ Ð±ÑÑ ÑаÑ", 12));
+ keys.add(new Input("ÑÑÑ Ð±Ð°Ñ Ð±ÑÑ ÑаÑ", 12));
MockAnalyzer analyzer = new MockAnalyzer(random(), MockTokenizer.KEYWORD, false);
FuzzySuggester suggester = new FuzzySuggester(analyzer, analyzer, FuzzySuggester.EXACT_FIRST | FuzzySuggester.PRESERVE_SEP, 256, -1, FuzzySuggester.DEFAULT_MAX_EDITS, FuzzySuggester.DEFAULT_TRANSPOSITIONS,
0, FuzzySuggester.DEFAULT_MIN_FUZZY_LENGTH, true);
- suggester.build(new TermFreqArrayIterator(keys));
+ suggester.build(new InputArrayIterator(keys));
int numIters = atLeast(10);
for (int i = 0; i < numIters; i++) {
String addRandomEdit = addRandomEdit("ÑÑÑ Ð±Ð°Ñ Ð±ÑÑ", 0);
@@ -97,15 +97,15 @@ public class FuzzySuggesterTest extends
/** this is basically the WFST test ported to KeywordAnalyzer. so it acts the same */
public void testKeyword() throws Exception {
- TermFreq keys[] = new TermFreq[] {
- new TermFreq("foo", 50),
- new TermFreq("bar", 10),
- new TermFreq("barbar", 12),
- new TermFreq("barbara", 6)
+ Input keys[] = new Input[] {
+ new Input("foo", 50),
+ new Input("bar", 10),
+ new Input("barbar", 12),
+ new Input("barbara", 6)
};
FuzzySuggester suggester = new FuzzySuggester(new MockAnalyzer(random(), MockTokenizer.KEYWORD, false));
- suggester.build(new TermFreqArrayIterator(keys));
+ suggester.build(new InputArrayIterator(keys));
List<LookupResult> results = suggester.lookup(_TestUtil.stringToCharSequence("bariar", random()), false, 2);
assertEquals(2, results.size());
@@ -172,14 +172,14 @@ public class FuzzySuggesterTest extends
* basic "standardanalyzer" test with stopword removal
*/
public void testStandard() throws Exception {
- TermFreq keys[] = new TermFreq[] {
- new TermFreq("the ghost of christmas past", 50),
+ Input keys[] = new Input[] {
+ new Input("the ghost of christmas past", 50),
};
Analyzer standard = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET);
FuzzySuggester suggester = new FuzzySuggester(standard);
suggester.setPreservePositionIncrements(false);
- suggester.build(new TermFreqArrayIterator(keys));
+ suggester.build(new InputArrayIterator(keys));
List<LookupResult> results = suggester.lookup(_TestUtil.stringToCharSequence("the ghost of chris", random()), false, 1);
assertEquals(1, results.size());
@@ -200,16 +200,16 @@ public class FuzzySuggesterTest extends
}
public void testNoSeps() throws Exception {
- TermFreq[] keys = new TermFreq[] {
- new TermFreq("ab cd", 0),
- new TermFreq("abcd", 1),
+ Input[] keys = new Input[] {
+ new Input("ab cd", 0),
+ new Input("abcd", 1),
};
int options = 0;
Analyzer a = new MockAnalyzer(random());
FuzzySuggester suggester = new FuzzySuggester(a, a, options, 256, -1, 1, true, 1, 3, false);
- suggester.build(new TermFreqArrayIterator(keys));
+ suggester.build(new InputArrayIterator(keys));
// TODO: would be nice if "ab " would allow the test to
// pass, and more generally if the analyzer can know
// that the user's current query has ended at a word,
@@ -270,12 +270,12 @@ public class FuzzySuggesterTest extends
}
};
- TermFreq keys[] = new TermFreq[] {
- new TermFreq("wifi network is slow", 50),
- new TermFreq("wi fi network is fast", 10),
+ Input keys[] = new Input[] {
+ new Input("wifi network is slow", 50),
+ new Input("wi fi network is fast", 10),
};
FuzzySuggester suggester = new FuzzySuggester(analyzer);
- suggester.build(new TermFreqArrayIterator(keys));
+ suggester.build(new InputArrayIterator(keys));
List<LookupResult> results = suggester.lookup("wifi network", false, 10);
if (VERBOSE) {
@@ -290,7 +290,7 @@ public class FuzzySuggesterTest extends
public void testEmpty() throws Exception {
FuzzySuggester suggester = new FuzzySuggester(new MockAnalyzer(random(), MockTokenizer.KEYWORD, false));
- suggester.build(new TermFreqArrayIterator(new TermFreq[0]));
+ suggester.build(new InputArrayIterator(new Input[0]));
List<LookupResult> result = suggester.lookup("a", false, 20);
assertTrue(result.isEmpty());
@@ -344,12 +344,12 @@ public class FuzzySuggesterTest extends
}
};
- TermFreq keys[] = new TermFreq[] {
- new TermFreq("ab xc", 50),
- new TermFreq("ba xd", 50),
+ Input keys[] = new Input[] {
+ new Input("ab xc", 50),
+ new Input("ba xd", 50),
};
FuzzySuggester suggester = new FuzzySuggester(analyzer);
- suggester.build(new TermFreqArrayIterator(keys));
+ suggester.build(new InputArrayIterator(keys));
List<LookupResult> results = suggester.lookup("ab x", false, 1);
assertTrue(results.size() == 1);
}
@@ -418,11 +418,11 @@ public class FuzzySuggesterTest extends
Analyzer a = getUnusualAnalyzer();
FuzzySuggester suggester = new FuzzySuggester(a, a, AnalyzingSuggester.EXACT_FIRST | AnalyzingSuggester.PRESERVE_SEP, 256, -1, 1, true, 1, 3, false);
- suggester.build(new TermFreqArrayIterator(new TermFreq[] {
- new TermFreq("x y", 1),
- new TermFreq("x y z", 3),
- new TermFreq("x", 2),
- new TermFreq("z z z", 20),
+ suggester.build(new InputArrayIterator(new Input[] {
+ new Input("x y", 1),
+ new Input("x y z", 3),
+ new Input("x", 2),
+ new Input("z z z", 20),
}));
//System.out.println("ALL: " + suggester.lookup("x y", false, 6));
@@ -458,11 +458,11 @@ public class FuzzySuggesterTest extends
Analyzer a = getUnusualAnalyzer();
FuzzySuggester suggester = new FuzzySuggester(a, a, AnalyzingSuggester.PRESERVE_SEP, 256, -1, 1, true, 1, 3, false);
- suggester.build(new TermFreqArrayIterator(new TermFreq[] {
- new TermFreq("x y", 1),
- new TermFreq("x y z", 3),
- new TermFreq("x", 2),
- new TermFreq("z z z", 20),
+ suggester.build(new InputArrayIterator(new Input[] {
+ new Input("x y", 1),
+ new Input("x y z", 3),
+ new Input("x", 2),
+ new Input("z z z", 20),
}));
for(int topN=1;topN<6;topN++) {
@@ -491,19 +491,19 @@ public class FuzzySuggesterTest extends
}
// Holds surface form separately:
- private static class TermFreq2 implements Comparable<TermFreq2> {
+ private static class TermFreqPayload2 implements Comparable<TermFreqPayload2> {
public final String surfaceForm;
public final String analyzedForm;
public final long weight;
- public TermFreq2(String surfaceForm, String analyzedForm, long weight) {
+ public TermFreqPayload2(String surfaceForm, String analyzedForm, long weight) {
this.surfaceForm = surfaceForm;
this.analyzedForm = analyzedForm;
this.weight = weight;
}
@Override
- public int compareTo(TermFreq2 other) {
+ public int compareTo(TermFreqPayload2 other) {
int cmp = analyzedForm.compareTo(other.analyzedForm);
if (cmp != 0) {
return cmp;
@@ -596,11 +596,11 @@ public class FuzzySuggesterTest extends
int numQueries = atLeast(100);
- final List<TermFreq2> slowCompletor = new ArrayList<TermFreq2>();
+ final List<TermFreqPayload2> slowCompletor = new ArrayList<TermFreqPayload2>();
final TreeSet<String> allPrefixes = new TreeSet<String>();
final Set<String> seen = new HashSet<String>();
- TermFreq[] keys = new TermFreq[numQueries];
+ Input[] keys = new Input[numQueries];
boolean preserveSep = random().nextBoolean();
boolean unicodeAware = random().nextBoolean();
@@ -666,17 +666,17 @@ public class FuzzySuggesterTest extends
}
// we can probably do Integer.MAX_VALUE here, but why worry.
int weight = random().nextInt(1<<24);
- keys[i] = new TermFreq(key, weight);
+ keys[i] = new Input(key, weight);
- slowCompletor.add(new TermFreq2(key, analyzedKey, weight));
+ slowCompletor.add(new TermFreqPayload2(key, analyzedKey, weight));
}
if (VERBOSE) {
// Don't just sort original list, to avoid VERBOSE
// altering the test:
- List<TermFreq2> sorted = new ArrayList<TermFreq2>(slowCompletor);
+ List<TermFreqPayload2> sorted = new ArrayList<TermFreqPayload2>(slowCompletor);
Collections.sort(sorted);
- for(TermFreq2 ent : sorted) {
+ for(TermFreqPayload2 ent : sorted) {
System.out.println(" surface='" + ent.surfaceForm + " analyzed='" + ent.analyzedForm + "' weight=" + ent.weight);
}
}
@@ -684,7 +684,7 @@ public class FuzzySuggesterTest extends
Analyzer a = new MockTokenEatingAnalyzer(numStopChars, preserveHoles);
FuzzySuggester suggester = new FuzzySuggester(a, a,
preserveSep ? AnalyzingSuggester.PRESERVE_SEP : 0, 256, -1, 1, false, 1, 3, unicodeAware);
- suggester.build(new TermFreqArrayIterator(keys));
+ suggester.build(new InputArrayIterator(keys));
for (String prefix : allPrefixes) {
@@ -756,7 +756,7 @@ public class FuzzySuggesterTest extends
assertTrue(automaton.isDeterministic());
// TODO: could be faster... but its slowCompletor for a reason
BytesRef spare = new BytesRef();
- for (TermFreq2 e : slowCompletor) {
+ for (TermFreqPayload2 e : slowCompletor) {
spare.copyChars(e.analyzedForm);
Set<IntsRef> finiteStrings = suggester.toFiniteStrings(spare, tokenStreamToAutomaton);
for (IntsRef intsRef : finiteStrings) {
@@ -825,14 +825,14 @@ public class FuzzySuggesterTest extends
Analyzer a = new MockAnalyzer(random());
FuzzySuggester suggester = new FuzzySuggester(a, a, 0, 2, -1, 1, true, 1, 3, false);
- List<TermFreq> keys = Arrays.asList(new TermFreq[] {
- new TermFreq("a", 40),
- new TermFreq("a ", 50),
- new TermFreq(" a", 60),
+ List<Input> keys = Arrays.asList(new Input[] {
+ new Input("a", 40),
+ new Input("a ", 50),
+ new Input(" a", 60),
});
Collections.shuffle(keys, random());
- suggester.build(new TermFreqArrayIterator(keys));
+ suggester.build(new InputArrayIterator(keys));
List<LookupResult> results = suggester.lookup("a", false, 5);
assertEquals(2, results.size());
@@ -846,15 +846,15 @@ public class FuzzySuggesterTest extends
Analyzer a = new MockAnalyzer(random());
FuzzySuggester suggester = new FuzzySuggester(a, a, FuzzySuggester.PRESERVE_SEP, 2, -1, 2, true, 1, 3, false);
- List<TermFreq> keys = Arrays.asList(new TermFreq[] {
- new TermFreq("foo bar", 40),
- new TermFreq("foo bar baz", 50),
- new TermFreq("barbaz", 60),
- new TermFreq("barbazfoo", 10),
+ List<Input> keys = Arrays.asList(new Input[] {
+ new Input("foo bar", 40),
+ new Input("foo bar baz", 50),
+ new Input("barbaz", 60),
+ new Input("barbazfoo", 10),
});
Collections.shuffle(keys, random());
- suggester.build(new TermFreqArrayIterator(keys));
+ suggester.build(new InputArrayIterator(keys));
assertEquals("[foo bar baz/50, foo bar/40]", suggester.lookup("foobar", false, 5).toString());
assertEquals("[foo bar baz/50]", suggester.lookup("foobarbaz", false, 5).toString());
@@ -929,25 +929,25 @@ public class FuzzySuggesterTest extends
public void testRandom2() throws Throwable {
final int NUM = atLeast(200);
- final List<TermFreq> answers = new ArrayList<TermFreq>();
+ final List<Input> answers = new ArrayList<Input>();
final Set<String> seen = new HashSet<String>();
for(int i=0;i<NUM;i++) {
final String s = randomSimpleString(8);
if (!seen.contains(s)) {
- answers.add(new TermFreq(s, random().nextInt(1000)));
+ answers.add(new Input(s, random().nextInt(1000)));
seen.add(s);
}
}
- Collections.sort(answers, new Comparator<TermFreq>() {
+ Collections.sort(answers, new Comparator<Input>() {
@Override
- public int compare(TermFreq a, TermFreq b) {
+ public int compare(Input a, Input b) {
return a.term.compareTo(b.term);
}
});
if (VERBOSE) {
System.out.println("\nTEST: targets");
- for(TermFreq tf : answers) {
+ for(Input tf : answers) {
System.out.println(" " + tf.term.utf8ToString() + " freq=" + tf.v);
}
}
@@ -965,7 +965,7 @@ public class FuzzySuggesterTest extends
}
Collections.shuffle(answers, random());
- suggest.build(new TermFreqArrayIterator(answers.toArray(new TermFreq[answers.size()])));
+ suggest.build(new InputArrayIterator(answers.toArray(new Input[answers.size()])));
final int ITERS = atLeast(100);
for(int iter=0;iter<ITERS;iter++) {
@@ -1004,10 +1004,10 @@ public class FuzzySuggesterTest extends
}
}
- private List<LookupResult> slowFuzzyMatch(int prefixLen, int maxEdits, boolean allowTransposition, List<TermFreq> answers, String frag) {
+ private List<LookupResult> slowFuzzyMatch(int prefixLen, int maxEdits, boolean allowTransposition, List<Input> answers, String frag) {
final List<LookupResult> results = new ArrayList<LookupResult>();
final int fragLen = frag.length();
- for(TermFreq tf : answers) {
+ for(Input tf : answers) {
//System.out.println(" check s=" + tf.term.utf8ToString());
boolean prefixMatches = true;
for(int i=0;i<prefixLen;i++) {
Modified: lucene/dev/branches/lucene4956/lucene/suggest/src/test/org/apache/lucene/search/suggest/fst/FSTCompletionTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/suggest/src/test/org/apache/lucene/search/suggest/fst/FSTCompletionTest.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/suggest/src/test/org/apache/lucene/search/suggest/fst/FSTCompletionTest.java (original)
+++ lucene/dev/branches/lucene4956/lucene/suggest/src/test/org/apache/lucene/search/suggest/fst/FSTCompletionTest.java Mon Oct 21 18:58:24 2013
@@ -28,8 +28,8 @@ import org.apache.lucene.util.*;
* Unit tests for {@link FSTCompletion}.
*/
public class FSTCompletionTest extends LuceneTestCase {
- public static TermFreq tf(String t, int v) {
- return new TermFreq(t, v);
+ public static Input tf(String t, int v) {
+ return new Input(t, v);
}
private FSTCompletion completion;
@@ -40,15 +40,15 @@ public class FSTCompletionTest extends L
super.setUp();
FSTCompletionBuilder builder = new FSTCompletionBuilder();
- for (TermFreq tf : evalKeys()) {
+ for (Input tf : evalKeys()) {
builder.add(tf.term, (int) tf.v);
}
completion = builder.build();
completionAlphabetical = new FSTCompletion(completion.getFST(), false, true);
}
- private TermFreq[] evalKeys() {
- final TermFreq[] keys = new TermFreq[] {
+ private Input[] evalKeys() {
+ final Input[] keys = new Input[] {
tf("one", 0),
tf("oneness", 1),
tf("onerous", 1),
@@ -157,17 +157,17 @@ public class FSTCompletionTest extends L
FSTCompletionLookup lookup = new FSTCompletionLookup(10, true);
Random r = random();
- List<TermFreq> keys = new ArrayList<TermFreq>();
+ List<Input> keys = new ArrayList<Input>();
for (int i = 0; i < 5000; i++) {
- keys.add(new TermFreq(_TestUtil.randomSimpleString(r), -1));
+ keys.add(new Input(_TestUtil.randomSimpleString(r), -1));
}
- lookup.build(new TermFreqArrayIterator(keys));
+ lookup.build(new InputArrayIterator(keys));
// All the weights were constant, so all returned buckets must be constant, whatever they
// are.
Long previous = null;
- for (TermFreq tf : keys) {
+ for (Input tf : keys) {
Long current = ((Number)lookup.get(_TestUtil.bytesToCharSequence(tf.term, random()))).longValue();
if (previous != null) {
assertEquals(previous, current);
@@ -177,11 +177,11 @@ public class FSTCompletionTest extends L
}
public void testMultilingualInput() throws Exception {
- List<TermFreq> input = LookupBenchmarkTest.readTop50KWiki();
+ List<Input> input = LookupBenchmarkTest.readTop50KWiki();
FSTCompletionLookup lookup = new FSTCompletionLookup();
- lookup.build(new TermFreqArrayIterator(input));
- for (TermFreq tf : input) {
+ lookup.build(new InputArrayIterator(input));
+ for (Input tf : input) {
assertNotNull("Not found: " + tf.term.toString(), lookup.get(_TestUtil.bytesToCharSequence(tf.term, random())));
assertEquals(tf.term.utf8ToString(), lookup.lookup(_TestUtil.bytesToCharSequence(tf.term, random()), true, 1).get(0).key.toString());
}
@@ -198,17 +198,17 @@ public class FSTCompletionTest extends L
}
public void testRandom() throws Exception {
- List<TermFreq> freqs = new ArrayList<TermFreq>();
+ List<Input> freqs = new ArrayList<Input>();
Random rnd = random();
for (int i = 0; i < 2500 + rnd.nextInt(2500); i++) {
int weight = rnd.nextInt(100);
- freqs.add(new TermFreq("" + rnd.nextLong(), weight));
+ freqs.add(new Input("" + rnd.nextLong(), weight));
}
FSTCompletionLookup lookup = new FSTCompletionLookup();
- lookup.build(new TermFreqArrayIterator(freqs.toArray(new TermFreq[freqs.size()])));
+ lookup.build(new InputArrayIterator(freqs.toArray(new Input[freqs.size()])));
- for (TermFreq tf : freqs) {
+ for (Input tf : freqs) {
final String term = tf.term.utf8ToString();
for (int i = 1; i < term.length(); i++) {
String prefix = term.substring(0, i);
Modified: lucene/dev/branches/lucene4956/lucene/suggest/src/test/org/apache/lucene/search/suggest/fst/WFSTCompletionTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/suggest/src/test/org/apache/lucene/search/suggest/fst/WFSTCompletionTest.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/suggest/src/test/org/apache/lucene/search/suggest/fst/WFSTCompletionTest.java (original)
+++ lucene/dev/branches/lucene4956/lucene/suggest/src/test/org/apache/lucene/search/suggest/fst/WFSTCompletionTest.java Mon Oct 21 18:58:24 2013
@@ -20,8 +20,8 @@ package org.apache.lucene.search.suggest
import java.util.*;
import org.apache.lucene.search.suggest.Lookup.LookupResult;
-import org.apache.lucene.search.suggest.TermFreq;
-import org.apache.lucene.search.suggest.TermFreqArrayIterator;
+import org.apache.lucene.search.suggest.Input;
+import org.apache.lucene.search.suggest.InputArrayIterator;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util._TestUtil;
@@ -29,16 +29,16 @@ import org.apache.lucene.util._TestUtil;
public class WFSTCompletionTest extends LuceneTestCase {
public void testBasic() throws Exception {
- TermFreq keys[] = new TermFreq[] {
- new TermFreq("foo", 50),
- new TermFreq("bar", 10),
- new TermFreq("barbar", 12),
- new TermFreq("barbara", 6)
+ Input keys[] = new Input[] {
+ new Input("foo", 50),
+ new Input("bar", 10),
+ new Input("barbar", 12),
+ new Input("barbara", 6)
};
Random random = new Random(random().nextLong());
WFSTCompletionLookup suggester = new WFSTCompletionLookup();
- suggester.build(new TermFreqArrayIterator(keys));
+ suggester.build(new InputArrayIterator(keys));
// top N of 2, but only foo is available
List<LookupResult> results = suggester.lookup(_TestUtil.stringToCharSequence("f", random), false, 2);
@@ -81,9 +81,9 @@ public class WFSTCompletionTest extends
WFSTCompletionLookup suggester = new WFSTCompletionLookup(true);
- suggester.build(new TermFreqArrayIterator(new TermFreq[] {
- new TermFreq("x y", 20),
- new TermFreq("x", 2),
+ suggester.build(new InputArrayIterator(new Input[] {
+ new Input("x y", 20),
+ new Input("x", 2),
}));
for(int topN=1;topN<4;topN++) {
@@ -105,9 +105,9 @@ public class WFSTCompletionTest extends
WFSTCompletionLookup suggester = new WFSTCompletionLookup(false);
- suggester.build(new TermFreqArrayIterator(new TermFreq[] {
- new TermFreq("x y", 20),
- new TermFreq("x", 2),
+ suggester.build(new InputArrayIterator(new Input[] {
+ new Input("x y", 20),
+ new Input("x", 2),
}));
for(int topN=1;topN<4;topN++) {
@@ -131,7 +131,7 @@ public class WFSTCompletionTest extends
final TreeMap<String,Long> slowCompletor = new TreeMap<String,Long>();
final TreeSet<String> allPrefixes = new TreeSet<String>();
- TermFreq[] keys = new TermFreq[numWords];
+ Input[] keys = new Input[numWords];
for (int i = 0; i < numWords; i++) {
String s;
@@ -150,11 +150,11 @@ public class WFSTCompletionTest extends
// we can probably do Integer.MAX_VALUE here, but why worry.
int weight = random().nextInt(1<<24);
slowCompletor.put(s, (long)weight);
- keys[i] = new TermFreq(s, weight);
+ keys[i] = new Input(s, weight);
}
WFSTCompletionLookup suggester = new WFSTCompletionLookup(false);
- suggester.build(new TermFreqArrayIterator(keys));
+ suggester.build(new InputArrayIterator(keys));
Random random = new Random(random().nextLong());
for (String prefix : allPrefixes) {
@@ -205,16 +205,16 @@ public class WFSTCompletionTest extends
WFSTCompletionLookup suggester = new WFSTCompletionLookup(false);
- suggester.build(new TermFreqArrayIterator(new TermFreq[] {
- new TermFreq(key1, 50),
- new TermFreq(key2, 50),
+ suggester.build(new InputArrayIterator(new Input[] {
+ new Input(key1, 50),
+ new Input(key2, 50),
}));
}
public void testEmpty() throws Exception {
WFSTCompletionLookup suggester = new WFSTCompletionLookup(false);
- suggester.build(new TermFreqArrayIterator(new TermFreq[0]));
+ suggester.build(new InputArrayIterator(new Input[0]));
List<LookupResult> result = suggester.lookup("a", false, 20);
assertTrue(result.isEmpty());
}
Modified: lucene/dev/branches/lucene4956/lucene/test-framework/ivy.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/test-framework/ivy.xml?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/test-framework/ivy.xml (original)
+++ lucene/dev/branches/lucene4956/lucene/test-framework/ivy.xml Mon Oct 21 18:58:24 2013
@@ -29,11 +29,11 @@
</configurations>
<dependencies defaultconf="default">
- <dependency org="org.apache.ant" name="ant" rev="1.8.2" transitive="false" />
+ <dependency org="org.apache.ant" name="ant" rev="${/org.apache.ant/ant}" transitive="false" />
- <dependency org="junit" name="junit" rev="4.10" transitive="false" conf="default->*;junit4-stdalone->*" />
- <dependency org="com.carrotsearch.randomizedtesting" name="junit4-ant" rev="2.0.10" transitive="false" conf="default->*;junit4-stdalone->*" />
- <dependency org="com.carrotsearch.randomizedtesting" name="randomizedtesting-runner" rev="2.0.10" transitive="false" conf="default->*;junit4-stdalone->*" />
+ <dependency org="junit" name="junit" rev="${/junit/junit}" transitive="false" conf="default->*;junit4-stdalone->*" />
+ <dependency org="com.carrotsearch.randomizedtesting" name="junit4-ant" rev="${/com.carrotsearch.randomizedtesting/junit4-ant}" transitive="false" conf="default->*;junit4-stdalone->*" />
+ <dependency org="com.carrotsearch.randomizedtesting" name="randomizedtesting-runner" rev="${/com.carrotsearch.randomizedtesting/randomizedtesting-runner}" transitive="false" conf="default->*;junit4-stdalone->*" />
<exclude org="*" ext="*" matcher="regexp" type="${ivy.exclude.types}"/>
</dependencies>
Modified: lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java (original)
+++ lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java Mon Oct 21 18:58:24 2013
@@ -111,7 +111,8 @@ public abstract class BaseTokenStreamTes
// arriving to pos Y have the same endOffset)
// - offsets only move forwards (startOffset >=
// lastStartOffset)
- public static void assertTokenStreamContents(TokenStream ts, String[] output, int startOffsets[], int endOffsets[], String types[], int posIncrements[], int posLengths[], Integer finalOffset,
+ public static void assertTokenStreamContents(TokenStream ts, String[] output, int startOffsets[], int endOffsets[], String types[], int posIncrements[],
+ int posLengths[], Integer finalOffset, Integer finalPosInc, boolean[] keywordAtts,
boolean offsetsAreCorrect) throws IOException {
assertNotNull(output);
CheckClearAttributesAttribute checkClearAtt = ts.addAttribute(CheckClearAttributesAttribute.class);
@@ -135,7 +136,7 @@ public abstract class BaseTokenStreamTes
}
PositionIncrementAttribute posIncrAtt = null;
- if (posIncrements != null) {
+ if (posIncrements != null || finalPosInc != null) {
assertTrue("has no PositionIncrementAttribute", ts.hasAttribute(PositionIncrementAttribute.class));
posIncrAtt = ts.getAttribute(PositionIncrementAttribute.class);
}
@@ -145,6 +146,12 @@ public abstract class BaseTokenStreamTes
assertTrue("has no PositionLengthAttribute", ts.hasAttribute(PositionLengthAttribute.class));
posLengthAtt = ts.getAttribute(PositionLengthAttribute.class);
}
+
+ KeywordAttribute keywordAtt = null;
+ if (keywordAtts != null) {
+ assertTrue("has no KeywordAttribute", ts.hasAttribute(KeywordAttribute.class));
+ keywordAtt = ts.getAttribute(KeywordAttribute.class);
+ }
// Maps position to the start/end offset:
final Map<Integer,Integer> posToStartOffset = new HashMap<Integer,Integer>();
@@ -161,22 +168,31 @@ public abstract class BaseTokenStreamTes
if (typeAtt != null) typeAtt.setType("bogusType");
if (posIncrAtt != null) posIncrAtt.setPositionIncrement(45987657);
if (posLengthAtt != null) posLengthAtt.setPositionLength(45987653);
+ if (keywordAtt != null) keywordAtt.setKeyword((i&1) == 0);
checkClearAtt.getAndResetClearCalled(); // reset it, because we called clearAttribute() before
assertTrue("token "+i+" does not exist", ts.incrementToken());
assertTrue("clearAttributes() was not called correctly in TokenStream chain", checkClearAtt.getAndResetClearCalled());
assertEquals("term "+i, output[i], termAtt.toString());
- if (startOffsets != null)
+ if (startOffsets != null) {
assertEquals("startOffset "+i, startOffsets[i], offsetAtt.startOffset());
- if (endOffsets != null)
+ }
+ if (endOffsets != null) {
assertEquals("endOffset "+i, endOffsets[i], offsetAtt.endOffset());
- if (types != null)
+ }
+ if (types != null) {
assertEquals("type "+i, types[i], typeAtt.type());
- if (posIncrements != null)
+ }
+ if (posIncrements != null) {
assertEquals("posIncrement "+i, posIncrements[i], posIncrAtt.getPositionIncrement());
- if (posLengths != null)
+ }
+ if (posLengths != null) {
assertEquals("posLength "+i, posLengths[i], posLengthAtt.getPositionLength());
+ }
+ if (keywordAtts != null) {
+ assertEquals("keywordAtt " + i, keywordAtts[i], keywordAtt.isKeyword());
+ }
// we can enforce some basic things about a few attributes even if the caller doesn't check:
if (offsetAtt != null) {
@@ -239,17 +255,47 @@ public abstract class BaseTokenStreamTes
assertTrue("posLength must be >= 1", posLengthAtt.getPositionLength() >= 1);
}
}
- assertFalse("TokenStream has more tokens than expected (expected count=" + output.length + ")", ts.incrementToken());
+
+ if (ts.incrementToken()) {
+ fail("TokenStream has more tokens than expected (expected count=" + output.length + "); extra token=" + termAtt.toString());
+ }
+
+ // repeat our extra safety checks for end()
+ ts.clearAttributes();
+ if (termAtt != null) termAtt.setEmpty().append("bogusTerm");
+ if (offsetAtt != null) offsetAtt.setOffset(14584724,24683243);
+ if (typeAtt != null) typeAtt.setType("bogusType");
+ if (posIncrAtt != null) posIncrAtt.setPositionIncrement(45987657);
+ if (posLengthAtt != null) posLengthAtt.setPositionLength(45987653);
+
+ checkClearAtt.getAndResetClearCalled(); // reset it, because we called clearAttribute() before
+
ts.end();
+ assertTrue("super.end()/clearAttributes() was not called correctly in end()", checkClearAtt.getAndResetClearCalled());
+
if (finalOffset != null) {
- assertEquals("finalOffset ", finalOffset.intValue(), offsetAtt.endOffset());
+ assertEquals("finalOffset", finalOffset.intValue(), offsetAtt.endOffset());
}
if (offsetAtt != null) {
assertTrue("finalOffset must be >= 0", offsetAtt.endOffset() >= 0);
}
+ if (finalPosInc != null) {
+ assertEquals("finalPosInc", finalPosInc.intValue(), posIncrAtt.getPositionIncrement());
+ }
+
ts.close();
}
+ public static void assertTokenStreamContents(TokenStream ts, String[] output, int startOffsets[], int endOffsets[], String types[], int posIncrements[],
+ int posLengths[], Integer finalOffset, boolean[] keywordAtts,
+ boolean offsetsAreCorrect) throws IOException {
+ assertTokenStreamContents(ts, output, startOffsets, endOffsets, types, posIncrements, posLengths, finalOffset, null, null, offsetsAreCorrect);
+ }
+
+ public static void assertTokenStreamContents(TokenStream ts, String[] output, int startOffsets[], int endOffsets[], String types[], int posIncrements[], int posLengths[], Integer finalOffset, boolean offsetsAreCorrect) throws IOException {
+ assertTokenStreamContents(ts, output, startOffsets, endOffsets, types, posIncrements, posLengths, finalOffset, null, offsetsAreCorrect);
+ }
+
public static void assertTokenStreamContents(TokenStream ts, String[] output, int startOffsets[], int endOffsets[], String types[], int posIncrements[], int posLengths[], Integer finalOffset) throws IOException {
assertTokenStreamContents(ts, output, startOffsets, endOffsets, types, posIncrements, posLengths, finalOffset, true);
}
@@ -295,14 +341,17 @@ public abstract class BaseTokenStreamTes
}
public static void assertAnalyzesTo(Analyzer a, String input, String[] output, int startOffsets[], int endOffsets[], String types[], int posIncrements[]) throws IOException {
+ checkResetException(a, input);
assertTokenStreamContents(a.tokenStream("dummy", input), output, startOffsets, endOffsets, types, posIncrements, null, input.length());
}
public static void assertAnalyzesTo(Analyzer a, String input, String[] output, int startOffsets[], int endOffsets[], String types[], int posIncrements[], int posLengths[]) throws IOException {
+ checkResetException(a, input);
assertTokenStreamContents(a.tokenStream("dummy", input), output, startOffsets, endOffsets, types, posIncrements, posLengths, input.length());
}
public static void assertAnalyzesTo(Analyzer a, String input, String[] output, int startOffsets[], int endOffsets[], String types[], int posIncrements[], int posLengths[], boolean offsetsAreCorrect) throws IOException {
+ checkResetException(a, input);
assertTokenStreamContents(a.tokenStream("dummy", input), output, startOffsets, endOffsets, types, posIncrements, posLengths, input.length(), offsetsAreCorrect);
}
@@ -329,30 +378,43 @@ public abstract class BaseTokenStreamTes
public static void assertAnalyzesTo(Analyzer a, String input, String[] output, int startOffsets[], int endOffsets[], int[] posIncrements) throws IOException {
assertAnalyzesTo(a, input, output, startOffsets, endOffsets, null, posIncrements, null);
}
-
- public static void assertAnalyzesToReuse(Analyzer a, String input, String[] output, int startOffsets[], int endOffsets[], String types[], int posIncrements[]) throws IOException {
- assertTokenStreamContents(a.tokenStream("dummy", input), output, startOffsets, endOffsets, types, posIncrements, null, input.length());
- }
-
- public static void assertAnalyzesToReuse(Analyzer a, String input, String[] output) throws IOException {
- assertAnalyzesToReuse(a, input, output, null, null, null, null);
- }
-
- public static void assertAnalyzesToReuse(Analyzer a, String input, String[] output, String[] types) throws IOException {
- assertAnalyzesToReuse(a, input, output, null, null, types, null);
- }
-
- public static void assertAnalyzesToReuse(Analyzer a, String input, String[] output, int[] posIncrements) throws IOException {
- assertAnalyzesToReuse(a, input, output, null, null, null, posIncrements);
- }
-
- public static void assertAnalyzesToReuse(Analyzer a, String input, String[] output, int startOffsets[], int endOffsets[]) throws IOException {
- assertAnalyzesToReuse(a, input, output, startOffsets, endOffsets, null, null);
- }
-
- public static void assertAnalyzesToReuse(Analyzer a, String input, String[] output, int startOffsets[], int endOffsets[], int[] posIncrements) throws IOException {
- assertAnalyzesToReuse(a, input, output, startOffsets, endOffsets, null, posIncrements);
+ static void checkResetException(Analyzer a, String input) throws IOException {
+ TokenStream ts = a.tokenStream("bogus", input);
+ try {
+ if (ts.incrementToken()) {
+ //System.out.println(ts.reflectAsString(false));
+ fail("didn't get expected exception when reset() not called");
+ }
+ } catch (IllegalStateException expected) {
+ // ok
+ } catch (AssertionError expected) {
+ // ok: MockTokenizer
+ assertTrue(expected.getMessage(), expected.getMessage() != null && expected.getMessage().contains("wrong state"));
+ } catch (Exception unexpected) {
+ unexpected.printStackTrace(System.err);
+ fail("got wrong exception when reset() not called: " + unexpected);
+ } finally {
+ // consume correctly
+ ts.reset();
+ while (ts.incrementToken()) {}
+ ts.end();
+ ts.close();
+ }
+
+ // check for a missing close()
+ ts = a.tokenStream("bogus", input);
+ ts.reset();
+ while (ts.incrementToken()) {}
+ ts.end();
+ try {
+ ts = a.tokenStream("bogus", input);
+ fail("didn't get expected exception when close() not called");
+ } catch (IllegalStateException expected) {
+ // ok
+ } finally {
+ ts.close();
+ }
}
// simple utility method for testing stemmers
@@ -361,10 +423,6 @@ public abstract class BaseTokenStreamTes
assertAnalyzesTo(a, input, new String[]{expected});
}
- public static void checkOneTermReuse(Analyzer a, final String input, final String expected) throws IOException {
- assertAnalyzesToReuse(a, input, new String[]{expected});
- }
-
/** utility method for blasting tokenstreams with data to make sure they don't do anything crazy */
public static void checkRandomData(Random random, Analyzer a, int iterations) throws IOException {
checkRandomData(random, a, iterations, 20, false, true);
@@ -430,6 +488,7 @@ public abstract class BaseTokenStreamTes
}
public static void checkRandomData(Random random, Analyzer a, int iterations, int maxWordLength, boolean simple, boolean offsetsAreCorrect) throws IOException {
+ checkResetException(a, "best effort");
long seed = random.nextLong();
boolean useCharFilter = random.nextBoolean();
Directory dir = null;
Modified: lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/analysis/CannedTokenStream.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/analysis/CannedTokenStream.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/analysis/CannedTokenStream.java (original)
+++ lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/analysis/CannedTokenStream.java Mon Oct 21 18:58:24 2013
@@ -17,6 +17,8 @@ package org.apache.lucene.analysis;
* limitations under the License.
*/
+import java.io.IOException;
+
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
@@ -34,9 +36,28 @@ public final class CannedTokenStream ext
private final PositionLengthAttribute posLengthAtt = addAttribute(PositionLengthAttribute.class);
private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
private final PayloadAttribute payloadAtt = addAttribute(PayloadAttribute.class);
-
+ private final int finalOffset;
+ private final int finalPosInc;
+
public CannedTokenStream(Token... tokens) {
this.tokens = tokens;
+ finalOffset = 0;
+ finalPosInc = 0;
+ }
+
+ /** If you want trailing holes, pass a non-zero
+ * finalPosInc. */
+ public CannedTokenStream(int finalPosInc, int finalOffset, Token... tokens) {
+ this.tokens = tokens;
+ this.finalOffset = finalOffset;
+ this.finalPosInc = finalPosInc;
+ }
+
+ @Override
+ public void end() throws IOException {
+ super.end();
+ posIncrAtt.setPositionIncrement(finalPosInc);
+ offsetAtt.setOffset(finalOffset, finalOffset);
}
@Override
Modified: lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/analysis/CollationTestBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/analysis/CollationTestBase.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/analysis/CollationTestBase.java (original)
+++ lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/analysis/CollationTestBase.java Mon Oct 21 18:58:24 2013
@@ -258,14 +258,17 @@ public abstract class CollationTestBase
for (int i = 0; i < numTestPoints; i++) {
String term = _TestUtil.randomSimpleString(random());
- TokenStream ts = analyzer.tokenStream("fake", term);
- TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
- BytesRef bytes = termAtt.getBytesRef();
- ts.reset();
- assertTrue(ts.incrementToken());
- termAtt.fillBytesRef();
- // ensure we make a copy of the actual bytes too
- map.put(term, BytesRef.deepCopyOf(bytes));
+ try (TokenStream ts = analyzer.tokenStream("fake", term)) {
+ TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
+ BytesRef bytes = termAtt.getBytesRef();
+ ts.reset();
+ assertTrue(ts.incrementToken());
+ termAtt.fillBytesRef();
+ // ensure we make a copy of the actual bytes too
+ map.put(term, BytesRef.deepCopyOf(bytes));
+ assertFalse(ts.incrementToken());
+ ts.end();
+ }
}
Thread threads[] = new Thread[numThreads];
@@ -277,13 +280,16 @@ public abstract class CollationTestBase
for (Map.Entry<String,BytesRef> mapping : map.entrySet()) {
String term = mapping.getKey();
BytesRef expected = mapping.getValue();
- TokenStream ts = analyzer.tokenStream("fake", term);
- TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
- BytesRef bytes = termAtt.getBytesRef();
- ts.reset();
- assertTrue(ts.incrementToken());
- termAtt.fillBytesRef();
- assertEquals(expected, bytes);
+ try (TokenStream ts = analyzer.tokenStream("fake", term)) {
+ TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
+ BytesRef bytes = termAtt.getBytesRef();
+ ts.reset();
+ assertTrue(ts.incrementToken());
+ termAtt.fillBytesRef();
+ assertEquals(expected, bytes);
+ assertFalse(ts.incrementToken());
+ ts.end();
+ }
}
} catch (IOException e) {
throw new RuntimeException(e);
Modified: lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/analysis/LookaheadTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/analysis/LookaheadTokenFilter.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/analysis/LookaheadTokenFilter.java (original)
+++ lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/analysis/LookaheadTokenFilter.java Mon Oct 21 18:58:24 2013
@@ -106,7 +106,7 @@ public abstract class LookaheadTokenFilt
/** This is called when all input tokens leaving a given
* position have been returned. Override this and
- * call createToken and then set whichever token's
+ * call insertToken and then set whichever token's
* attributes you want, if you want to inject
* a token starting from this position. */
protected void afterPosition() throws IOException {
@@ -222,6 +222,18 @@ public abstract class LookaheadTokenFilt
if (DEBUG) {
System.out.println(" END");
}
+ afterPosition();
+ if (insertPending) {
+ // Subclass inserted a token at this same
+ // position:
+ if (DEBUG) {
+ System.out.println(" return inserted token");
+ }
+ assert insertedTokenConsistent();
+ insertPending = false;
+ return true;
+ }
+
return false;
}
} else {
@@ -260,7 +272,7 @@ public abstract class LookaheadTokenFilt
final int posLen = posLenAtt.getPositionLength();
final Position endPosData = positions.get(outputPos + posLen);
assert endPosData.endOffset != -1;
- assert offsetAtt.endOffset() == endPosData.endOffset;
+ assert offsetAtt.endOffset() == endPosData.endOffset: "offsetAtt.endOffset=" + offsetAtt.endOffset() + " vs expected=" + endPosData.endOffset;
return true;
}
Modified: lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/analysis/MockAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/analysis/MockAnalyzer.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/analysis/MockAnalyzer.java (original)
+++ lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/analysis/MockAnalyzer.java Mon Oct 21 18:58:24 2013
@@ -60,7 +60,7 @@ public final class MockAnalyzer extends
* @param filter DFA describing how terms should be filtered (set of stopwords, etc)
*/
public MockAnalyzer(Random random, CharacterRunAutomaton runAutomaton, boolean lowerCase, CharacterRunAutomaton filter) {
- super(new PerFieldReuseStrategy());
+ super(PER_FIELD_REUSE_STRATEGY);
// TODO: this should be solved in a different way; Random should not be shared (!).
this.random = new Random(random.nextLong());
this.runAutomaton = runAutomaton;
Modified: lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/analysis/MockGraphTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/analysis/MockGraphTokenFilter.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/analysis/MockGraphTokenFilter.java (original)
+++ lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/analysis/MockGraphTokenFilter.java Mon Oct 21 18:58:24 2013
@@ -105,10 +105,19 @@ public final class MockGraphTokenFilter
}
@Override
+ public void close() throws IOException {
+ super.close();
+ this.random = null;
+ }
+
+ @Override
public boolean incrementToken() throws IOException {
if (DEBUG) {
System.out.println("MockGraphTF.incr inputPos=" + inputPos + " outputPos=" + outputPos);
}
+ if (random == null) {
+ throw new IllegalStateException("incrementToken called in wrong state!");
+ }
return nextToken();
}
}
Modified: lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/analysis/MockTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/analysis/MockTokenFilter.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/analysis/MockTokenFilter.java (original)
+++ lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/analysis/MockTokenFilter.java Mon Oct 21 18:58:24 2013
@@ -58,7 +58,8 @@ public final class MockTokenFilter exten
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
-
+ private int skippedPositions;
+
/**
* Create a new MockTokenFilter.
*
@@ -76,7 +77,7 @@ public final class MockTokenFilter exten
// initial token with posInc=0 ever
// return the first non-stop word found
- int skippedPositions = 0;
+ skippedPositions = 0;
while (input.incrementToken()) {
if (!filter.run(termAtt.buffer(), 0, termAtt.length())) {
posIncrAtt.setPositionIncrement(posIncrAtt.getPositionIncrement() + skippedPositions);
@@ -87,4 +88,16 @@ public final class MockTokenFilter exten
// reached EOS -- return false
return false;
}
+
+ @Override
+ public void end() throws IOException {
+ super.end();
+ posIncrAtt.setPositionIncrement(posIncrAtt.getPositionIncrement() + skippedPositions);
+ }
+
+ @Override
+ public void reset() throws IOException {
+ super.reset();
+ skippedPositions = 0;
+ }
}
Modified: lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/analysis/MockTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/analysis/MockTokenizer.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/analysis/MockTokenizer.java (original)
+++ lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/analysis/MockTokenizer.java Mon Oct 21 18:58:24 2013
@@ -64,6 +64,11 @@ public class MockTokenizer extends Token
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
int off = 0;
+
+ // buffered state (previous codepoint and offset). we replay this once we
+ // hit a reject state in case its permissible as the start of a new term.
+ int bufferedCodePoint = -1; // -1 indicates empty buffer
+ int bufferedOff = -1;
// TODO: "register" with LuceneTestCase to ensure all streams are closed() ?
// currently, we can only check that the lifecycle is correct if someone is reusing,
@@ -121,8 +126,16 @@ public class MockTokenizer extends Token
: "incrementToken() called while in wrong state: " + streamState;
clearAttributes();
for (;;) {
- int startOffset = off;
- int cp = readCodePoint();
+ int startOffset;
+ int cp;
+ if (bufferedCodePoint >= 0) {
+ cp = bufferedCodePoint;
+ startOffset = bufferedOff;
+ bufferedCodePoint = -1;
+ } else {
+ startOffset = off;
+ cp = readCodePoint();
+ }
if (cp < 0) {
break;
} else if (isTokenChar(cp)) {
@@ -138,6 +151,14 @@ public class MockTokenizer extends Token
cp = readCodePoint();
} while (cp >= 0 && isTokenChar(cp));
+ if (termAtt.length() < maxTokenLength) {
+ // buffer up, in case the "rejected" char can start a new word of its own
+ bufferedCodePoint = cp;
+ bufferedOff = endOffset;
+ } else {
+ // otherwise, its because we hit term limit.
+ bufferedCodePoint = -1;
+ }
int correctedStartOffset = correctOffset(startOffset);
int correctedEndOffset = correctOffset(endOffset);
assert correctedStartOffset >= 0;
@@ -146,8 +167,11 @@ public class MockTokenizer extends Token
lastOffset = correctedStartOffset;
assert correctedEndOffset >= correctedStartOffset;
offsetAtt.setOffset(correctedStartOffset, correctedEndOffset);
- streamState = State.INCREMENT;
- return true;
+ if (state == -1 || runAutomaton.isAccept(state)) {
+ // either we hit a reject state (longest match), or end-of-text, but in an accept state
+ streamState = State.INCREMENT;
+ return true;
+ }
}
}
streamState = State.INCREMENT_FALSE;
@@ -203,9 +227,11 @@ public class MockTokenizer extends Token
}
protected boolean isTokenChar(int c) {
- state = runAutomaton.step(state, c);
if (state < 0) {
state = runAutomaton.getInitialState();
+ }
+ state = runAutomaton.step(state, c);
+ if (state < 0) {
return false;
} else {
return true;
@@ -221,6 +247,7 @@ public class MockTokenizer extends Token
super.reset();
state = runAutomaton.getInitialState();
lastOffset = off = 0;
+ bufferedCodePoint = -1;
assert !enableChecks || streamState != State.RESET : "double reset()";
streamState = State.RESET;
}
@@ -244,6 +271,7 @@ public class MockTokenizer extends Token
@Override
public void end() throws IOException {
+ super.end();
int finalOffset = correctOffset(off);
offsetAtt.setOffset(finalOffset, finalOffset);
// some tokenizers, such as limiting tokenizers, call end() before incrementToken() returns false.
Modified: lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/analysis/VocabularyAssert.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/analysis/VocabularyAssert.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/analysis/VocabularyAssert.java (original)
+++ lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/analysis/VocabularyAssert.java Mon Oct 21 18:58:24 2013
@@ -41,7 +41,7 @@ public class VocabularyAssert {
while ((inputWord = vocReader.readLine()) != null) {
String expectedWord = outputReader.readLine();
Assert.assertNotNull(expectedWord);
- BaseTokenStreamTestCase.checkOneTermReuse(a, inputWord, expectedWord);
+ BaseTokenStreamTestCase.checkOneTerm(a, inputWord, expectedWord);
}
}
@@ -55,7 +55,7 @@ public class VocabularyAssert {
if (inputLine.startsWith("#") || inputLine.trim().length() == 0)
continue; /* comment */
String words[] = inputLine.split("\t");
- BaseTokenStreamTestCase.checkOneTermReuse(a, words[0], words[1]);
+ BaseTokenStreamTestCase.checkOneTerm(a, words[0], words[1]);
}
}
Modified: lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingCodec.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingCodec.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingCodec.java (original)
+++ lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingCodec.java Mon Oct 21 18:58:24 2013
@@ -23,10 +23,10 @@ import org.apache.lucene.codecs.NormsFor
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.StoredFieldsFormat;
import org.apache.lucene.codecs.TermVectorsFormat;
-import org.apache.lucene.codecs.lucene42.Lucene42Codec;
+import org.apache.lucene.codecs.lucene46.Lucene46Codec;
/**
- * Acts like {@link Lucene42Codec} but with additional asserts.
+ * Acts like {@link Lucene46Codec} but with additional asserts.
*/
public final class AssertingCodec extends FilterCodec {
@@ -37,7 +37,7 @@ public final class AssertingCodec extend
private final NormsFormat norms = new AssertingNormsFormat();
public AssertingCodec() {
- super("Asserting", new Lucene42Codec());
+ super("Asserting", new Lucene46Codec());
}
@Override
Modified: lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingDocValuesFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingDocValuesFormat.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingDocValuesFormat.java (original)
+++ lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingDocValuesFormat.java Mon Oct 21 18:58:24 2013
@@ -24,7 +24,7 @@ import java.util.NoSuchElementException;
import org.apache.lucene.codecs.DocValuesConsumer;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.DocValuesProducer;
-import org.apache.lucene.codecs.lucene42.Lucene42DocValuesFormat;
+import org.apache.lucene.codecs.lucene45.Lucene45DocValuesFormat;
import org.apache.lucene.index.AssertingAtomicReader;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.FieldInfo;
@@ -33,15 +33,17 @@ import org.apache.lucene.index.SegmentRe
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedSetDocValues;
+import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.OpenBitSet;
+import org.apache.lucene.util.RamUsageEstimator;
/**
- * Just like {@link Lucene42DocValuesFormat} but with additional asserts.
+ * Just like {@link Lucene45DocValuesFormat} but with additional asserts.
*/
public class AssertingDocValuesFormat extends DocValuesFormat {
- private final DocValuesFormat in = new Lucene42DocValuesFormat();
+ private final DocValuesFormat in = new Lucene45DocValuesFormat();
public AssertingDocValuesFormat() {
super("Asserting");
@@ -75,11 +77,10 @@ public class AssertingDocValuesFormat ex
public void addNumericField(FieldInfo field, Iterable<Number> values) throws IOException {
int count = 0;
for (Number v : values) {
- assert v != null;
count++;
}
assert count == maxDoc;
- checkIterator(values.iterator(), maxDoc);
+ checkIterator(values.iterator(), maxDoc, true);
in.addNumericField(field, values);
}
@@ -87,12 +88,11 @@ public class AssertingDocValuesFormat ex
public void addBinaryField(FieldInfo field, Iterable<BytesRef> values) throws IOException {
int count = 0;
for (BytesRef b : values) {
- assert b != null;
- assert b.isValid();
+ assert b == null || b.isValid();
count++;
}
assert count == maxDoc;
- checkIterator(values.iterator(), maxDoc);
+ checkIterator(values.iterator(), maxDoc, true);
in.addBinaryField(field, values);
}
@@ -117,15 +117,17 @@ public class AssertingDocValuesFormat ex
for (Number v : docToOrd) {
assert v != null;
int ord = v.intValue();
- assert ord >= 0 && ord < valueCount;
- seenOrds.set(ord);
+ assert ord >= -1 && ord < valueCount;
+ if (ord >= 0) {
+ seenOrds.set(ord);
+ }
count++;
}
assert count == maxDoc;
assert seenOrds.cardinality() == valueCount;
- checkIterator(values.iterator(), valueCount);
- checkIterator(docToOrd.iterator(), maxDoc);
+ checkIterator(values.iterator(), valueCount, false);
+ checkIterator(docToOrd.iterator(), maxDoc, false);
in.addSortedField(field, values, docToOrd);
}
@@ -169,38 +171,80 @@ public class AssertingDocValuesFormat ex
assert docCount == maxDoc;
assert seenOrds.cardinality() == valueCount;
- checkIterator(values.iterator(), valueCount);
- checkIterator(docToOrdCount.iterator(), maxDoc);
- checkIterator(ords.iterator(), ordCount);
+ checkIterator(values.iterator(), valueCount, false);
+ checkIterator(docToOrdCount.iterator(), maxDoc, false);
+ checkIterator(ords.iterator(), ordCount, false);
in.addSortedSetField(field, values, docToOrdCount, ords);
}
+
+ @Override
+ public void close() throws IOException {
+ in.close();
+ }
+ }
+
+ static class AssertingNormsConsumer extends DocValuesConsumer {
+ private final DocValuesConsumer in;
+ private final int maxDoc;
+
+ AssertingNormsConsumer(DocValuesConsumer in, int maxDoc) {
+ this.in = in;
+ this.maxDoc = maxDoc;
+ }
- private <T> void checkIterator(Iterator<T> iterator, long expectedSize) {
- for (long i = 0; i < expectedSize; i++) {
- boolean hasNext = iterator.hasNext();
- assert hasNext;
- T v = iterator.next();
+ @Override
+ public void addNumericField(FieldInfo field, Iterable<Number> values) throws IOException {
+ int count = 0;
+ for (Number v : values) {
assert v != null;
- try {
- iterator.remove();
- throw new AssertionError("broken iterator (supports remove): " + iterator);
- } catch (UnsupportedOperationException expected) {
- // ok
- }
- }
- assert !iterator.hasNext();
- try {
- iterator.next();
- throw new AssertionError("broken iterator (allows next() when hasNext==false) " + iterator);
- } catch (NoSuchElementException expected) {
- // ok
+ count++;
}
+ assert count == maxDoc;
+ checkIterator(values.iterator(), maxDoc, false);
+ in.addNumericField(field, values);
}
-
+
@Override
public void close() throws IOException {
in.close();
}
+
+ @Override
+ public void addBinaryField(FieldInfo field, Iterable<BytesRef> values) throws IOException {
+ throw new IllegalStateException();
+ }
+
+ @Override
+ public void addSortedField(FieldInfo field, Iterable<BytesRef> values, Iterable<Number> docToOrd) throws IOException {
+ throw new IllegalStateException();
+ }
+
+ @Override
+ public void addSortedSetField(FieldInfo field, Iterable<BytesRef> values, Iterable<Number> docToOrdCount, Iterable<Number> ords) throws IOException {
+ throw new IllegalStateException();
+ }
+ }
+
+ private static <T> void checkIterator(Iterator<T> iterator, long expectedSize, boolean allowNull) {
+ for (long i = 0; i < expectedSize; i++) {
+ boolean hasNext = iterator.hasNext();
+ assert hasNext;
+ T v = iterator.next();
+ assert allowNull || v != null;
+ try {
+ iterator.remove();
+ throw new AssertionError("broken iterator (supports remove): " + iterator);
+ } catch (UnsupportedOperationException expected) {
+ // ok
+ }
+ }
+ assert !iterator.hasNext();
+ try {
+ iterator.next();
+ throw new AssertionError("broken iterator (allows next() when hasNext==false) " + iterator);
+ } catch (NoSuchElementException expected) {
+ // ok
+ }
}
static class AssertingDocValuesProducer extends DocValuesProducer {
@@ -244,10 +288,24 @@ public class AssertingDocValuesFormat ex
assert values != null;
return new AssertingAtomicReader.AssertingSortedSetDocValues(values, maxDoc);
}
+
+ @Override
+ public Bits getDocsWithField(FieldInfo field) throws IOException {
+ assert field.getDocValuesType() != null;
+ Bits bits = in.getDocsWithField(field);
+ assert bits != null;
+ assert bits.length() == maxDoc;
+ return new AssertingAtomicReader.AssertingBits(bits);
+ }
@Override
public void close() throws IOException {
in.close();
}
+
+ @Override
+ public long ramBytesUsed() {
+ return in.ramBytesUsed();
+ }
}
}
Modified: lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingNormsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingNormsFormat.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingNormsFormat.java (original)
+++ lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingNormsFormat.java Mon Oct 21 18:58:24 2013
@@ -22,7 +22,7 @@ import java.io.IOException;
import org.apache.lucene.codecs.DocValuesConsumer;
import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.codecs.NormsFormat;
-import org.apache.lucene.codecs.asserting.AssertingDocValuesFormat.AssertingDocValuesConsumer;
+import org.apache.lucene.codecs.asserting.AssertingDocValuesFormat.AssertingNormsConsumer;
import org.apache.lucene.codecs.asserting.AssertingDocValuesFormat.AssertingDocValuesProducer;
import org.apache.lucene.codecs.lucene42.Lucene42NormsFormat;
import org.apache.lucene.index.SegmentReadState;
@@ -38,7 +38,7 @@ public class AssertingNormsFormat extend
public DocValuesConsumer normsConsumer(SegmentWriteState state) throws IOException {
DocValuesConsumer consumer = in.normsConsumer(state);
assert consumer != null;
- return new AssertingDocValuesConsumer(consumer, state.segmentInfo.getDocCount());
+ return new AssertingNormsConsumer(consumer, state.segmentInfo.getDocCount());
}
@Override
Modified: lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingPostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingPostingsFormat.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingPostingsFormat.java (original)
+++ lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingPostingsFormat.java Mon Oct 21 18:58:24 2013
@@ -18,24 +18,22 @@ package org.apache.lucene.codecs.asserti
*/
import java.io.IOException;
-import java.util.Comparator;
import java.util.Iterator;
import org.apache.lucene.codecs.FieldsConsumer;
import org.apache.lucene.codecs.FieldsProducer;
-import org.apache.lucene.codecs.PostingsConsumer;
import org.apache.lucene.codecs.PostingsFormat;
-import org.apache.lucene.codecs.TermStats;
-import org.apache.lucene.codecs.TermsConsumer;
import org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat;
import org.apache.lucene.index.AssertingAtomicReader;
+import org.apache.lucene.index.DocsAndPositionsEnum;
+import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.FieldInfo;
-import org.apache.lucene.index.FieldInfo.IndexOptions;
+import org.apache.lucene.index.Fields;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.OpenBitSet;
/**
* Just like {@link Lucene41PostingsFormat} but with additional asserts.
@@ -49,7 +47,7 @@ public final class AssertingPostingsForm
@Override
public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
- return new AssertingFieldsConsumer(in.fieldsConsumer(state));
+ return new AssertingFieldsConsumer(state, in.fieldsConsumer(state));
}
@Override
@@ -86,165 +84,125 @@ public final class AssertingPostingsForm
public int size() {
return in.size();
}
- }
-
- static class AssertingFieldsConsumer extends FieldsConsumer {
- private final FieldsConsumer in;
-
- AssertingFieldsConsumer(FieldsConsumer in) {
- this.in = in;
- }
-
- @Override
- public TermsConsumer addField(FieldInfo field) throws IOException {
- TermsConsumer consumer = in.addField(field);
- assert consumer != null;
- return new AssertingTermsConsumer(consumer, field);
- }
@Override
- public void close() throws IOException {
- in.close();
+ public long ramBytesUsed() {
+ return in.ramBytesUsed();
}
}
-
- static enum TermsConsumerState { INITIAL, START, FINISHED };
- static class AssertingTermsConsumer extends TermsConsumer {
- private final TermsConsumer in;
- private final FieldInfo fieldInfo;
- private BytesRef lastTerm = null;
- private TermsConsumerState state = TermsConsumerState.INITIAL;
- private AssertingPostingsConsumer lastPostingsConsumer = null;
- private long sumTotalTermFreq = 0;
- private long sumDocFreq = 0;
- private OpenBitSet visitedDocs = new OpenBitSet();
-
- AssertingTermsConsumer(TermsConsumer in, FieldInfo fieldInfo) {
- this.in = in;
- this.fieldInfo = fieldInfo;
- }
-
- @Override
- public PostingsConsumer startTerm(BytesRef text) throws IOException {
- assert state == TermsConsumerState.INITIAL || state == TermsConsumerState.START && lastPostingsConsumer.docFreq == 0;
- state = TermsConsumerState.START;
- assert lastTerm == null || in.getComparator().compare(text, lastTerm) > 0;
- lastTerm = BytesRef.deepCopyOf(text);
- return lastPostingsConsumer = new AssertingPostingsConsumer(in.startTerm(text), fieldInfo, visitedDocs);
- }
- @Override
- public void finishTerm(BytesRef text, TermStats stats) throws IOException {
- assert state == TermsConsumerState.START;
- state = TermsConsumerState.INITIAL;
- assert text.equals(lastTerm);
- assert stats.docFreq > 0; // otherwise, this method should not be called.
- assert stats.docFreq == lastPostingsConsumer.docFreq;
- sumDocFreq += stats.docFreq;
- if (fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY) {
- assert stats.totalTermFreq == -1;
- } else {
- assert stats.totalTermFreq == lastPostingsConsumer.totalTermFreq;
- sumTotalTermFreq += stats.totalTermFreq;
- }
- in.finishTerm(text, stats);
- }
-
- @Override
- public void finish(long sumTotalTermFreq, long sumDocFreq, int docCount) throws IOException {
- assert state == TermsConsumerState.INITIAL || state == TermsConsumerState.START && lastPostingsConsumer.docFreq == 0;
- state = TermsConsumerState.FINISHED;
- assert docCount >= 0;
- assert docCount == visitedDocs.cardinality();
- assert sumDocFreq >= docCount;
- assert sumDocFreq == this.sumDocFreq;
- if (fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY) {
- assert sumTotalTermFreq == -1;
- } else {
- assert sumTotalTermFreq >= sumDocFreq;
- assert sumTotalTermFreq == this.sumTotalTermFreq;
- }
- in.finish(sumTotalTermFreq, sumDocFreq, docCount);
- }
+ static class AssertingFieldsConsumer extends FieldsConsumer {
+ private final FieldsConsumer in;
+ private final SegmentWriteState writeState;
- @Override
- public Comparator<BytesRef> getComparator() throws IOException {
- return in.getComparator();
- }
- }
-
- static enum PostingsConsumerState { INITIAL, START };
- static class AssertingPostingsConsumer extends PostingsConsumer {
- private final PostingsConsumer in;
- private final FieldInfo fieldInfo;
- private final OpenBitSet visitedDocs;
- private PostingsConsumerState state = PostingsConsumerState.INITIAL;
- private int freq;
- private int positionCount;
- private int lastPosition = 0;
- private int lastStartOffset = 0;
- int docFreq = 0;
- long totalTermFreq = 0;
-
- AssertingPostingsConsumer(PostingsConsumer in, FieldInfo fieldInfo, OpenBitSet visitedDocs) {
+ AssertingFieldsConsumer(SegmentWriteState writeState, FieldsConsumer in) {
+ this.writeState = writeState;
this.in = in;
- this.fieldInfo = fieldInfo;
- this.visitedDocs = visitedDocs;
}
-
- @Override
- public void startDoc(int docID, int freq) throws IOException {
- assert state == PostingsConsumerState.INITIAL;
- state = PostingsConsumerState.START;
- assert docID >= 0;
- if (fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY) {
- assert freq == -1;
- this.freq = 0; // we don't expect any positions here
- } else {
- assert freq > 0;
- this.freq = freq;
- totalTermFreq += freq;
- }
- this.positionCount = 0;
- this.lastPosition = 0;
- this.lastStartOffset = 0;
- docFreq++;
- visitedDocs.set(docID);
- in.startDoc(docID, freq);
- }
-
+
@Override
- public void addPosition(int position, BytesRef payload, int startOffset, int endOffset) throws IOException {
- assert state == PostingsConsumerState.START;
- assert positionCount < freq;
- positionCount++;
- assert position >= lastPosition || position == -1; /* we still allow -1 from old 3.x indexes */
- lastPosition = position;
- if (fieldInfo.getIndexOptions() == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) {
- assert startOffset >= 0;
- assert startOffset >= lastStartOffset;
- lastStartOffset = startOffset;
- assert endOffset >= startOffset;
- } else {
- assert startOffset == -1;
- assert endOffset == -1;
- }
- if (payload != null) {
- assert fieldInfo.hasPayloads();
- }
- in.addPosition(position, payload, startOffset, endOffset);
- }
+ public void write(Fields fields) throws IOException {
+ in.write(fields);
- @Override
- public void finishDoc() throws IOException {
- assert state == PostingsConsumerState.START;
- state = PostingsConsumerState.INITIAL;
- if (fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) {
- assert positionCount == 0; // we should not have fed any positions!
- } else {
- assert positionCount == freq;
+ // TODO: more asserts? can we somehow run a
+ // "limited" CheckIndex here??? Or ... can we improve
+ // AssertingFieldsProducer and us it also to wrap the
+ // incoming Fields here?
+
+ String lastField = null;
+ TermsEnum termsEnum = null;
+
+ for(String field : fields) {
+
+ FieldInfo fieldInfo = writeState.fieldInfos.fieldInfo(field);
+ assert fieldInfo != null;
+ assert lastField == null || lastField.compareTo(field) < 0;
+ lastField = field;
+
+ Terms terms = fields.terms(field);
+ if (terms == null) {
+ continue;
+ }
+ assert terms != null;
+
+ termsEnum = terms.iterator(termsEnum);
+ BytesRef lastTerm = null;
+ DocsEnum docsEnum = null;
+ DocsAndPositionsEnum posEnum = null;
+
+ boolean hasFreqs = fieldInfo.getIndexOptions().compareTo(FieldInfo.IndexOptions.DOCS_AND_FREQS) >= 0;
+ boolean hasPositions = fieldInfo.getIndexOptions().compareTo(FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
+ boolean hasOffsets = fieldInfo.getIndexOptions().compareTo(FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
+ boolean hasPayloads = terms.hasPayloads();
+
+ assert hasPositions == terms.hasPositions();
+ assert hasOffsets == terms.hasOffsets();
+
+ while(true) {
+ BytesRef term = termsEnum.next();
+ if (term == null) {
+ break;
+ }
+ assert lastTerm == null || lastTerm.compareTo(term) < 0;
+ if (lastTerm == null) {
+ lastTerm = BytesRef.deepCopyOf(term);
+ } else {
+ lastTerm.copyBytes(term);
+ }
+
+ int flags = 0;
+ if (hasPositions == false) {
+ if (hasFreqs) {
+ flags = flags | DocsEnum.FLAG_FREQS;
+ }
+ docsEnum = termsEnum.docs(null, docsEnum, flags);
+ } else {
+ if (hasPayloads) {
+ flags |= DocsAndPositionsEnum.FLAG_PAYLOADS;
+ }
+ if (hasOffsets) {
+ flags = flags | DocsAndPositionsEnum.FLAG_OFFSETS;
+ }
+ posEnum = termsEnum.docsAndPositions(null, posEnum, flags);
+ docsEnum = posEnum;
+ }
+
+ assert docsEnum != null : "termsEnum=" + termsEnum + " hasPositions=" + hasPositions;
+
+ int lastDocID = -1;
+
+ while(true) {
+ int docID = docsEnum.nextDoc();
+ if (docID == DocsEnum.NO_MORE_DOCS) {
+ break;
+ }
+ assert docID > lastDocID;
+ lastDocID = docID;
+ if (hasFreqs) {
+ int freq = docsEnum.freq();
+ assert freq > 0;
+
+ if (hasPositions) {
+ int lastPos = -1;
+ int lastStartOffset = -1;
+ for(int i=0;i<freq;i++) {
+ int pos = posEnum.nextPosition();
+ assert pos >= lastPos: "pos=" + pos + " vs lastPos=" + lastPos + " i=" + i + " freq=" + freq;
+ lastPos = pos;
+
+ if (hasOffsets) {
+ int startOffset = posEnum.startOffset();
+ int endOffset = posEnum.endOffset();
+ assert endOffset >= startOffset;
+ assert startOffset >= lastStartOffset;
+ lastStartOffset = startOffset;
+ }
+ }
+ }
+ }
+ }
+ }
}
- in.finishDoc();
}
}
}
Modified: lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingStoredFieldsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingStoredFieldsFormat.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingStoredFieldsFormat.java (original)
+++ lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingStoredFieldsFormat.java Mon Oct 21 18:58:24 2013
@@ -71,6 +71,11 @@ public class AssertingStoredFieldsFormat
public StoredFieldsReader clone() {
return new AssertingStoredFieldsReader(in.clone(), maxDoc);
}
+
+ @Override
+ public long ramBytesUsed() {
+ return in.ramBytesUsed();
+ }
}
enum Status {
Modified: lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingTermVectorsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingTermVectorsFormat.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingTermVectorsFormat.java (original)
+++ lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingTermVectorsFormat.java Mon Oct 21 18:58:24 2013
@@ -18,7 +18,6 @@ package org.apache.lucene.codecs.asserti
*/
import java.io.IOException;
-import java.util.Comparator;
import org.apache.lucene.codecs.TermVectorsFormat;
import org.apache.lucene.codecs.TermVectorsReader;
@@ -71,6 +70,11 @@ public class AssertingTermVectorsFormat
public TermVectorsReader clone() {
return new AssertingTermVectorsReader(in.clone());
}
+
+ @Override
+ public long ramBytesUsed() {
+ return in.ramBytesUsed();
+ }
}
enum Status {
@@ -176,11 +180,6 @@ public class AssertingTermVectorsFormat
}
@Override
- public Comparator<BytesRef> getComparator() throws IOException {
- return in.getComparator();
- }
-
- @Override
public void close() throws IOException {
in.close();
}
Modified: lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/bloom/TestBloomFilteredLucene41Postings.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/bloom/TestBloomFilteredLucene41Postings.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/bloom/TestBloomFilteredLucene41Postings.java (original)
+++ lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/bloom/TestBloomFilteredLucene41Postings.java Mon Oct 21 18:58:24 2013
@@ -71,4 +71,9 @@ public final class TestBloomFilteredLuce
throws IOException {
return delegate.fieldsProducer(state);
}
+
+ @Override
+ public String toString() {
+ return "TestBloomFilteredLucene41Postings(" + delegate + ")";
+ }
}
Modified: lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/cheapbastard/CheapBastardCodec.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/cheapbastard/CheapBastardCodec.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/cheapbastard/CheapBastardCodec.java (original)
+++ lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/cheapbastard/CheapBastardCodec.java Mon Oct 21 18:58:24 2013
@@ -23,10 +23,12 @@ import org.apache.lucene.codecs.NormsFor
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.StoredFieldsFormat;
import org.apache.lucene.codecs.TermVectorsFormat;
+import org.apache.lucene.codecs.diskdv.DiskDocValuesFormat;
+import org.apache.lucene.codecs.diskdv.DiskNormsFormat;
import org.apache.lucene.codecs.lucene40.Lucene40StoredFieldsFormat;
import org.apache.lucene.codecs.lucene40.Lucene40TermVectorsFormat;
import org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat;
-import org.apache.lucene.codecs.lucene42.Lucene42Codec;
+import org.apache.lucene.codecs.lucene46.Lucene46Codec;
/** Codec that tries to use as little ram as possible because he spent all his money on beer */
// TODO: better name :)
@@ -39,13 +41,14 @@ public class CheapBastardCodec extends F
private final StoredFieldsFormat storedFields = new Lucene40StoredFieldsFormat();
private final TermVectorsFormat termVectors = new Lucene40TermVectorsFormat();
// these go to disk for all docvalues/norms datastructures
- private final DocValuesFormat docValues = new CheapBastardDocValuesFormat();
- private final NormsFormat norms = new CheapBastardNormsFormat();
+ private final DocValuesFormat docValues = new DiskDocValuesFormat();
+ private final NormsFormat norms = new DiskNormsFormat();
public CheapBastardCodec() {
- super("CheapBastard", new Lucene42Codec());
+ super("CheapBastard", new Lucene46Codec());
}
+ @Override
public PostingsFormat postingsFormat() {
return postings;
}
Modified: lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/CompressingCodec.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/CompressingCodec.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/CompressingCodec.java (original)
+++ lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/CompressingCodec.java Mon Oct 21 18:58:24 2013
@@ -23,13 +23,13 @@ import org.apache.lucene.codecs.FilterCo
import org.apache.lucene.codecs.StoredFieldsFormat;
import org.apache.lucene.codecs.TermVectorsFormat;
import org.apache.lucene.codecs.compressing.dummy.DummyCompressingCodec;
-import org.apache.lucene.codecs.lucene42.Lucene42Codec;
+import org.apache.lucene.codecs.lucene46.Lucene46Codec;
import com.carrotsearch.randomizedtesting.generators.RandomInts;
/**
* A codec that uses {@link CompressingStoredFieldsFormat} for its stored
- * fields and delegates to {@link Lucene42Codec} for everything else.
+ * fields and delegates to {@link Lucene46Codec} for everything else.
*/
public abstract class CompressingCodec extends FilterCodec {
@@ -73,7 +73,7 @@ public abstract class CompressingCodec e
* Creates a compressing codec with a given segment suffix
*/
public CompressingCodec(String name, String segmentSuffix, CompressionMode compressionMode, int chunkSize) {
- super(name, new Lucene42Codec());
+ super(name, new Lucene46Codec());
this.storedFieldsFormat = new CompressingStoredFieldsFormat(name, segmentSuffix, compressionMode, chunkSize);
this.termVectorsFormat = new CompressingTermVectorsFormat(name, segmentSuffix, compressionMode, chunkSize);
}
Modified: lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/FastCompressingCodec.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/FastCompressingCodec.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/FastCompressingCodec.java (original)
+++ lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/FastCompressingCodec.java Mon Oct 21 18:58:24 2013
@@ -1,8 +1,6 @@
package org.apache.lucene.codecs.compressing;
-import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.NormsFormat;
-import org.apache.lucene.codecs.lucene42.Lucene42DocValuesFormat;
import org.apache.lucene.codecs.lucene42.Lucene42NormsFormat;
import org.apache.lucene.util.packed.PackedInts;
@@ -42,9 +40,4 @@ public class FastCompressingCodec extend
public NormsFormat normsFormat() {
return new Lucene42NormsFormat(PackedInts.FAST);
}
-
- @Override
- public DocValuesFormat docValuesFormat() {
- return new Lucene42DocValuesFormat(PackedInts.FAST);
- }
}