You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2011/08/23 16:07:19 UTC
svn commit: r1160700 [10/22] - in /lucene/dev/branches/flexscoring: ./
dev-tools/eclipse/ dev-tools/idea/.idea/ dev-tools/idea/lucene/contrib/
dev-tools/idea/lucene/contrib/demo/
dev-tools/idea/lucene/contrib/highlighter/ dev-tools/idea/lucene/contrib/...
Modified: lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/index/TestPayloads.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/index/TestPayloads.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/index/TestPayloads.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/index/TestPayloads.java Tue Aug 23 14:06:58 2011
@@ -20,6 +20,7 @@ package org.apache.lucene.index;
import java.io.IOException;
import java.io.Reader;
import java.io.UnsupportedEncodingException;
+import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
@@ -347,8 +348,13 @@ public class TestPayloads extends Lucene
}
+ static final Charset utf8 = Charset.forName("UTF-8");
private void generateRandomData(byte[] data) {
- random.nextBytes(data);
+ // this test needs the random data to be valid unicode
+ String s = _TestUtil.randomFixedByteLengthUnicodeString(random, data.length);
+ byte b[] = s.getBytes(utf8);
+ assert b.length == data.length;
+ System.arraycopy(b, 0, data, 0, b.length);
}
private byte[] generateRandomData(int n) {
@@ -526,7 +532,7 @@ public class TestPayloads extends Lucene
for (int i = 0; i < freq; i++) {
tp.nextPosition();
final BytesRef payload = tp.getPayload();
- assertEquals(termText, pool.bytesToString(payload.bytes, payload.offset, payload.length));
+ assertEquals(termText, payload.utf8ToString());
}
}
}
@@ -548,7 +554,7 @@ public class TestPayloads extends Lucene
this.pool = pool;
payload = pool.get();
generateRandomData(payload);
- term = pool.bytesToString(payload, 0, payload.length);
+ term = new String(payload, 0, payload.length, utf8);
first = true;
payloadAtt = addAttribute(PayloadAttribute.class);
termAtt = addAttribute(CharTermAttribute.class);
@@ -580,17 +586,6 @@ public class TestPayloads extends Lucene
pool.add(new byte[size]);
}
}
-
- static String bytesToString(byte[] bytes, int start, int length) {
- String s = new String(bytes, start, length);
- BytesRef utf8Result = new BytesRef(10);
- UnicodeUtil.UTF16toUTF8(s, 0, s.length(), utf8Result);
- try {
- return new String(utf8Result.bytes, 0, utf8Result.length, "UTF-8");
- } catch (UnsupportedEncodingException uee) {
- return null;
- }
- }
synchronized byte[] get() {
return pool.remove(0);
Modified: lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/index/TestPerSegmentDeletes.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/index/TestPerSegmentDeletes.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/index/TestPerSegmentDeletes.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/index/TestPerSegmentDeletes.java Tue Aug 23 14:06:58 2011
@@ -46,14 +46,14 @@ public class TestPerSegmentDeletes exten
IndexWriter writer = new IndexWriter(dir, iwc);
writer.setInfoStream(VERBOSE ? System.out : null);
for (int x = 0; x < 5; x++) {
- writer.addDocument(TestIndexWriterReader.createDocument(x, "1", 2));
+ writer.addDocument(DocHelper.createDocument(x, "1", 2));
//System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs());
}
//System.out.println("commit1");
writer.commit();
assertEquals(1, writer.segmentInfos.size());
for (int x = 5; x < 10; x++) {
- writer.addDocument(TestIndexWriterReader.createDocument(x, "2", 2));
+ writer.addDocument(DocHelper.createDocument(x, "2", 2));
//System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs());
}
//System.out.println("commit2");
@@ -61,7 +61,7 @@ public class TestPerSegmentDeletes exten
assertEquals(2, writer.segmentInfos.size());
for (int x = 10; x < 15; x++) {
- writer.addDocument(TestIndexWriterReader.createDocument(x, "3", 2));
+ writer.addDocument(DocHelper.createDocument(x, "3", 2));
//System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs());
}
@@ -174,12 +174,12 @@ public class TestPerSegmentDeletes exten
**/
void part2(IndexWriter writer, RangeMergePolicy fsmp) throws Exception {
for (int x = 20; x < 25; x++) {
- writer.addDocument(TestIndexWriterReader.createDocument(x, "5", 2));
+ writer.addDocument(DocHelper.createDocument(x, "5", 2));
//System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs());
}
writer.flush(false, false);
for (int x = 25; x < 30; x++) {
- writer.addDocument(TestIndexWriterReader.createDocument(x, "5", 2));
+ writer.addDocument(DocHelper.createDocument(x, "5", 2));
//System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs());
}
writer.flush(false, false);
Modified: lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/index/TestRollingUpdates.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/index/TestRollingUpdates.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/index/TestRollingUpdates.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/index/TestRollingUpdates.java Tue Aug 23 14:06:58 2011
@@ -90,6 +90,7 @@ public class TestRollingUpdates extends
for (int r = 0; r < 3; r++) {
final IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(
TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(2));
+ w.setInfoStream(VERBOSE ? System.out : null);
final int numUpdates = atLeast(20);
int numThreads = _TestUtil.nextInt(random, 2, 6);
IndexingThread[] threads = new IndexingThread[numThreads];
Modified: lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/index/TestSameTokenSamePosition.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/index/TestSameTokenSamePosition.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/index/TestSameTokenSamePosition.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/index/TestSameTokenSamePosition.java Tue Aug 23 14:06:58 2011
@@ -47,6 +47,22 @@ public class TestSameTokenSamePosition e
riw.close();
dir.close();
}
+
+ /**
+ * Same as the above, but with more docs
+ */
+ public void testMoreDocs() throws Exception {
+ Directory dir = newDirectory();
+ RandomIndexWriter riw = new RandomIndexWriter(random, dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new BugReproAnalyzer()));
+ Document doc = new Document();
+ doc.add(new Field("eng", "Six drunken" /*This shouldn't matter. */,
+ Field.Store.YES, Field.Index.ANALYZED));
+ for (int i = 0; i < 100; i++) {
+ riw.addDocument(doc);
+ }
+ riw.close();
+ dir.close();
+ }
}
final class BugReproAnalyzer extends Analyzer{
Modified: lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/index/TestStressAdvance.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/index/TestStressAdvance.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/index/TestStressAdvance.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/index/TestStressAdvance.java Tue Aug 23 14:06:58 2011
@@ -40,7 +40,7 @@ public class TestStressAdvance extends L
doc.add(f);
final Field idField = newField("id", "", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS);
doc.add(idField);
- int num = atLeast(5000);
+ int num = atLeast(4097);
for(int id=0;id<num;id++) {
if (random.nextInt(4) == 3) {
f.setValue("a");
Modified: lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/index/TestTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/index/TestTermsEnum.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/index/TestTermsEnum.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/index/TestTermsEnum.java Tue Aug 23 14:06:58 2011
@@ -17,15 +17,32 @@ package org.apache.lucene.index;
* limitations under the License.
*/
+import java.io.IOException;
import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.TreeSet;
+import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.NumericField;
+import org.apache.lucene.search.FieldCache;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LineFileDocs;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util._TestUtil;
+import org.apache.lucene.util.automaton.Automaton;
+import org.apache.lucene.util.automaton.BasicAutomata;
+import org.apache.lucene.util.automaton.CompiledAutomaton;
+import org.apache.lucene.util.automaton.DaciukMihovAutomatonBuilder;
public class TestTermsEnum extends LuceneTestCase {
@@ -140,4 +157,569 @@ public class TestTermsEnum extends Lucen
r.close();
d.close();
}
+
+ private void addDoc(RandomIndexWriter w, Collection<String> terms, Map<BytesRef,Integer> termToID, int id) throws IOException {
+ Document doc = new Document();
+ doc.add(new NumericField("id").setIntValue(id));
+ if (VERBOSE) {
+ System.out.println("TEST: addDoc id:" + id + " terms=" + terms);
+ }
+ for (String s2 : terms) {
+ doc.add(newField("f", s2, Field.Index.NOT_ANALYZED));
+ termToID.put(new BytesRef(s2), id);
+ }
+ w.addDocument(doc);
+ terms.clear();
+ }
+
+ private boolean accepts(CompiledAutomaton c, BytesRef b) {
+ int state = c.runAutomaton.getInitialState();
+ for(int idx=0;idx<b.length;idx++) {
+ assertTrue(state != -1);
+ state = c.runAutomaton.step(state, b.bytes[b.offset+idx] & 0xff);
+ }
+ return c.runAutomaton.isAccept(state);
+ }
+
+ // Tests Terms.intersect
+ public void testIntersectRandom() throws IOException {
+
+ final Directory dir = newDirectory();
+ final RandomIndexWriter w = new RandomIndexWriter(random, dir);
+
+ final int numTerms = atLeast(1000);
+
+ final Set<String> terms = new HashSet<String>();
+ final Collection<String> pendingTerms = new ArrayList<String>();
+ final Map<BytesRef,Integer> termToID = new HashMap<BytesRef,Integer>();
+ int id = 0;
+ while(terms.size() != numTerms) {
+ final String s = getRandomString();
+ if (!terms.contains(s)) {
+ terms.add(s);
+ pendingTerms.add(s);
+ if (random.nextInt(20) == 7) {
+ addDoc(w, pendingTerms, termToID, id++);
+ }
+ }
+ }
+ addDoc(w, pendingTerms, termToID, id++);
+
+ final BytesRef[] termsArray = new BytesRef[terms.size()];
+ final Set<BytesRef> termsSet = new HashSet<BytesRef>();
+ {
+ int upto = 0;
+ for(String s : terms) {
+ final BytesRef b = new BytesRef(s);
+ termsArray[upto++] = b;
+ termsSet.add(b);
+ }
+ Arrays.sort(termsArray);
+ }
+
+ if (VERBOSE) {
+ System.out.println("\nTEST: indexed terms (unicode order):");
+ for(BytesRef t : termsArray) {
+ System.out.println(" " + t.utf8ToString() + " -> id:" + termToID.get(t));
+ }
+ }
+
+ final IndexReader r = w.getReader();
+ w.close();
+
+ // NOTE: intentional insanity!!
+ final int[] docIDToID = FieldCache.DEFAULT.getInts(r, "id");
+
+ for(int iter=0;iter<10*RANDOM_MULTIPLIER;iter++) {
+
+ // TODO: can we also test infinite As here...?
+
+ // From the random terms, pick some ratio and compile an
+ // automaton:
+ final Set<String> acceptTerms = new HashSet<String>();
+ final TreeSet<BytesRef> sortedAcceptTerms = new TreeSet<BytesRef>();
+ final double keepPct = random.nextDouble();
+ Automaton a;
+ if (iter == 0) {
+ if (VERBOSE) {
+ System.out.println("\nTEST: empty automaton");
+ }
+ a = BasicAutomata.makeEmpty();
+ } else {
+ if (VERBOSE) {
+ System.out.println("\nTEST: keepPct=" + keepPct);
+ }
+ for (String s : terms) {
+ final String s2;
+ if (random.nextDouble() <= keepPct) {
+ s2 = s;
+ } else {
+ s2 = getRandomString();
+ }
+ acceptTerms.add(s2);
+ sortedAcceptTerms.add(new BytesRef(s2));
+ }
+ a = DaciukMihovAutomatonBuilder.build(sortedAcceptTerms);
+ }
+ final CompiledAutomaton c = new CompiledAutomaton(a, true, false);
+
+ final BytesRef[] acceptTermsArray = new BytesRef[acceptTerms.size()];
+ final Set<BytesRef> acceptTermsSet = new HashSet<BytesRef>();
+ int upto = 0;
+ for(String s : acceptTerms) {
+ final BytesRef b = new BytesRef(s);
+ acceptTermsArray[upto++] = b;
+ acceptTermsSet.add(b);
+ assertTrue(accepts(c, b));
+ }
+ Arrays.sort(acceptTermsArray);
+
+ if (VERBOSE) {
+ System.out.println("\nTEST: accept terms (unicode order):");
+ for(BytesRef t : acceptTermsArray) {
+ System.out.println(" " + t.utf8ToString() + (termsSet.contains(t) ? " (exists)" : ""));
+ }
+ System.out.println(a.toDot());
+ }
+
+ for(int iter2=0;iter2<100;iter2++) {
+ final BytesRef startTerm = acceptTermsArray.length == 0 || random.nextBoolean() ? null : acceptTermsArray[random.nextInt(acceptTermsArray.length)];
+
+ if (VERBOSE) {
+ System.out.println("\nTEST: iter2=" + iter2 + " startTerm=" + (startTerm == null ? "<null>" : startTerm.utf8ToString()));
+
+ if (startTerm != null) {
+ int state = c.runAutomaton.getInitialState();
+ for(int idx=0;idx<startTerm.length;idx++) {
+ final int label = startTerm.bytes[startTerm.offset+idx] & 0xff;
+ System.out.println(" state=" + state + " label=" + label);
+ state = c.runAutomaton.step(state, label);
+ assertTrue(state != -1);
+ }
+ System.out.println(" state=" + state);
+ }
+ }
+
+ final TermsEnum te = MultiFields.getTerms(r, "f").intersect(c, startTerm);
+
+ int loc;
+ if (startTerm == null) {
+ loc = 0;
+ } else {
+ loc = Arrays.binarySearch(termsArray, new BytesRef(startTerm));
+ if (loc < 0) {
+ loc = -(loc+1);
+ } else {
+ // startTerm exists in index
+ loc++;
+ }
+ }
+ while(loc < termsArray.length && !acceptTermsSet.contains(termsArray[loc])) {
+ loc++;
+ }
+
+ DocsEnum docsEnum = null;
+ while (loc < termsArray.length) {
+ final BytesRef expected = termsArray[loc];
+ final BytesRef actual = te.next();
+ if (VERBOSE) {
+ System.out.println("TEST: next() expected=" + expected.utf8ToString() + " actual=" + actual.utf8ToString());
+ }
+ assertEquals(expected, actual);
+ assertEquals(1, te.docFreq());
+ docsEnum = te.docs(null, docsEnum);
+ final int docID = docsEnum.nextDoc();
+ assertTrue(docID != DocsEnum.NO_MORE_DOCS);
+ assertEquals(docIDToID[docID], termToID.get(expected).intValue());
+ do {
+ loc++;
+ } while (loc < termsArray.length && !acceptTermsSet.contains(termsArray[loc]));
+ }
+
+ assertNull(te.next());
+ }
+ }
+
+ r.close();
+ dir.close();
+ }
+
+ private Directory d;
+ private IndexReader r;
+
+ private final String FIELD = "field";
+
+ private IndexReader makeIndex(String... terms) throws Exception {
+ d = newDirectory();
+ IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random));
+
+ /*
+ CoreCodecProvider cp = new CoreCodecProvider();
+ cp.unregister(cp.lookup("Standard"));
+ cp.register(new StandardCodec(minTermsInBlock, maxTermsInBlock));
+ cp.setDefaultFieldCodec("Standard");
+ iwc.setCodecProvider(cp);
+ */
+
+ final RandomIndexWriter w = new RandomIndexWriter(random, d, iwc);
+ w.w.setInfoStream(VERBOSE ? System.out : null);
+ for(String term : terms) {
+ Document doc = new Document();
+ Field f = newField(FIELD, term, Field.Index.NOT_ANALYZED_NO_NORMS);
+ doc.add(f);
+ w.addDocument(doc);
+ }
+ if (r != null) {
+ close();
+ }
+ r = w.getReader();
+ w.close();
+ return r;
+ }
+
+ private void close() throws Exception {
+ final Directory d = ((SegmentReader) r.getSequentialSubReaders()[0]).directory();
+ r.close();
+ d.close();
+ }
+
+ private int docFreq(IndexReader r, String term) throws Exception {
+ return r.docFreq(new Term(FIELD, term));
+ }
+
+ public void testEasy() throws Exception {
+ // No floor arcs:
+ r = makeIndex("aa0", "aa1", "aa2", "aa3", "bb0", "bb1", "bb2", "bb3", "aa");
+
+ // First term in block:
+ assertEquals(1, docFreq(r, "aa0"));
+
+ // Scan forward to another term in same block
+ assertEquals(1, docFreq(r, "aa2"));
+
+ assertEquals(1, docFreq(r, "aa"));
+
+ // Reset same block then scan forwards
+ assertEquals(1, docFreq(r, "aa1"));
+
+ // Not found, in same block
+ assertEquals(0, docFreq(r, "aa5"));
+
+ // Found, in same block
+ assertEquals(1, docFreq(r, "aa2"));
+
+ // Not found in index:
+ assertEquals(0, docFreq(r, "b0"));
+
+ // Found:
+ assertEquals(1, docFreq(r, "aa2"));
+
+ // Found, rewind:
+ assertEquals(1, docFreq(r, "aa0"));
+
+
+ // First term in block:
+ assertEquals(1, docFreq(r, "bb0"));
+
+ // Scan forward to another term in same block
+ assertEquals(1, docFreq(r, "bb2"));
+
+ // Reset same block then scan forwards
+ assertEquals(1, docFreq(r, "bb1"));
+
+ // Not found, in same block
+ assertEquals(0, docFreq(r, "bb5"));
+
+ // Found, in same block
+ assertEquals(1, docFreq(r, "bb2"));
+
+ // Not found in index:
+ assertEquals(0, docFreq(r, "b0"));
+
+ // Found:
+ assertEquals(1, docFreq(r, "bb2"));
+
+ // Found, rewind:
+ assertEquals(1, docFreq(r, "bb0"));
+
+ close();
+ }
+
+ // tests:
+ // - test same prefix has non-floor block and floor block (ie, has 2 long outputs on same term prefix)
+ // - term that's entirely in the index
+
+ public void testFloorBlocks() throws Exception {
+ final String[] terms = new String[] {"aa0", "aa1", "aa2", "aa3", "aa4", "aa5", "aa6", "aa7", "aa8", "aa9", "aa", "xx"};
+ r = makeIndex(terms);
+ //r = makeIndex("aa0", "aa1", "aa2", "aa3", "aa4", "aa5", "aa6", "aa7", "aa8", "aa9");
+
+ // First term in first block:
+ assertEquals(1, docFreq(r, "aa0"));
+ assertEquals(1, docFreq(r, "aa4"));
+
+ // No block
+ assertEquals(0, docFreq(r, "bb0"));
+
+ // Second block
+ assertEquals(1, docFreq(r, "aa4"));
+
+ // Backwards to prior floor block:
+ assertEquals(1, docFreq(r, "aa0"));
+
+ // Forwards to last floor block:
+ assertEquals(1, docFreq(r, "aa9"));
+
+ assertEquals(0, docFreq(r, "a"));
+ assertEquals(1, docFreq(r, "aa"));
+ assertEquals(0, docFreq(r, "a"));
+ assertEquals(1, docFreq(r, "aa"));
+
+ // Forwards to last floor block:
+ assertEquals(1, docFreq(r, "xx"));
+ assertEquals(1, docFreq(r, "aa1"));
+ assertEquals(0, docFreq(r, "yy"));
+
+ assertEquals(1, docFreq(r, "xx"));
+ assertEquals(1, docFreq(r, "aa9"));
+
+ assertEquals(1, docFreq(r, "xx"));
+ assertEquals(1, docFreq(r, "aa4"));
+
+ final TermsEnum te = MultiFields.getTerms(r, FIELD).iterator();
+ while(te.next() != null) {
+ //System.out.println("TEST: next term=" + te.term().utf8ToString());
+ }
+
+ assertTrue(seekExact(te, "aa1"));
+ assertEquals("aa2", next(te));
+ assertTrue(seekExact(te, "aa8"));
+ assertEquals("aa9", next(te));
+ assertEquals("xx", next(te));
+
+ testRandomSeeks(r, terms);
+ close();
+ }
+
+ public void testZeroTerms() throws Exception {
+ d = newDirectory();
+ final RandomIndexWriter w = new RandomIndexWriter(random, d);
+ w.w.setInfoStream(VERBOSE ? System.out : null);
+ Document doc = new Document();
+ doc.add(newField("field", "one two three", Field.Index.ANALYZED));
+ doc = new Document();
+ doc.add(newField("field2", "one two three", Field.Index.ANALYZED));
+ w.addDocument(doc);
+ w.commit();
+ w.deleteDocuments(new Term("field", "one"));
+ w.optimize();
+ IndexReader r = w.getReader();
+ w.close();
+ assertEquals(1, r.numDocs());
+ assertEquals(1, r.maxDoc());
+ Terms terms = MultiFields.getTerms(r, "field");
+ if (terms != null) {
+ assertNull(terms.iterator().next());
+ }
+ r.close();
+ d.close();
+ }
+
+ private String getRandomString() {
+ //return _TestUtil.randomSimpleString(random);
+ return _TestUtil.randomRealisticUnicodeString(random);
+ }
+
+ public void testRandomTerms() throws Exception {
+ final String[] terms = new String[_TestUtil.nextInt(random, 1, atLeast(1000))];
+ final Set<String> seen = new HashSet<String>();
+
+ final boolean allowEmptyString = random.nextBoolean();
+
+ if (random.nextInt(10) == 7 && terms.length > 2) {
+ // Sometimes add a bunch of terms sharing a longish common prefix:
+ final int numTermsSamePrefix = random.nextInt(terms.length/2);
+ if (numTermsSamePrefix > 0) {
+ String prefix;
+ while(true) {
+ prefix = getRandomString();
+ if (prefix.length() < 5) {
+ continue;
+ } else {
+ break;
+ }
+ }
+ while(seen.size() < numTermsSamePrefix) {
+ final String t = prefix + getRandomString();
+ if (!seen.contains(t)) {
+ terms[seen.size()] = t;
+ seen.add(t);
+ }
+ }
+ }
+ }
+
+ while(seen.size() < terms.length) {
+ final String t = getRandomString();
+ if (!seen.contains(t) && (allowEmptyString || t.length() != 0)) {
+ terms[seen.size()] = t;
+ seen.add(t);
+ }
+ }
+ r = makeIndex(terms);
+ testRandomSeeks(r, terms);
+ close();
+ }
+
+ // sugar
+ private boolean seekExact(TermsEnum te, String term) throws IOException {
+ return te.seekExact(new BytesRef(term), random.nextBoolean());
+ }
+
+ // sugar
+ private String next(TermsEnum te) throws IOException {
+ final BytesRef br = te.next();
+ if (br == null) {
+ return null;
+ } else {
+ return br.utf8ToString();
+ }
+ }
+
+ private BytesRef getNonExistTerm(BytesRef[] terms) {
+ BytesRef t = null;
+ while(true) {
+ final String ts = getRandomString();
+ t = new BytesRef(ts);
+ if (Arrays.binarySearch(terms, t) < 0) {
+ return t;
+ }
+ }
+ }
+
+ private static class TermAndState {
+ public final BytesRef term;
+ public final TermState state;
+
+ public TermAndState(BytesRef term, TermState state) {
+ this.term = term;
+ this.state = state;
+ }
+ }
+
+ private void testRandomSeeks(IndexReader r, String... validTermStrings) throws IOException {
+ final BytesRef[] validTerms = new BytesRef[validTermStrings.length];
+ for(int termIDX=0;termIDX<validTermStrings.length;termIDX++) {
+ validTerms[termIDX] = new BytesRef(validTermStrings[termIDX]);
+ }
+ Arrays.sort(validTerms);
+ if (VERBOSE) {
+ System.out.println("TEST: " + validTerms.length + " terms:");
+ for(BytesRef t : validTerms) {
+ System.out.println(" " + t.utf8ToString() + " " + t);
+ }
+ }
+ final TermsEnum te = MultiFields.getTerms(r, FIELD).iterator();
+
+ final int END_LOC = -validTerms.length-1;
+
+ final List<TermAndState> termStates = new ArrayList<TermAndState>();
+
+ for(int iter=0;iter<100*RANDOM_MULTIPLIER;iter++) {
+
+ final BytesRef t;
+ int loc;
+ final TermState termState;
+ if (random.nextInt(6) == 4) {
+ // pick term that doens't exist:
+ t = getNonExistTerm(validTerms);
+ termState = null;
+ if (VERBOSE) {
+ System.out.println("\nTEST: invalid term=" + t.utf8ToString());
+ }
+ loc = Arrays.binarySearch(validTerms, t);
+ } else if (termStates.size() != 0 && random.nextInt(4) == 1) {
+ final TermAndState ts = termStates.get(random.nextInt(termStates.size()));
+ t = ts.term;
+ loc = Arrays.binarySearch(validTerms, t);
+ assertTrue(loc >= 0);
+ termState = ts.state;
+ if (VERBOSE) {
+ System.out.println("\nTEST: valid termState term=" + t.utf8ToString());
+ }
+ } else {
+ // pick valid term
+ loc = random.nextInt(validTerms.length);
+ t = new BytesRef(validTerms[loc]);
+ termState = null;
+ if (VERBOSE) {
+ System.out.println("\nTEST: valid term=" + t.utf8ToString());
+ }
+ }
+
+ // seekCeil or seekExact:
+ final boolean doSeekExact = random.nextBoolean();
+ if (termState != null) {
+ if (VERBOSE) {
+ System.out.println(" seekExact termState");
+ }
+ te.seekExact(t, termState);
+ } else if (doSeekExact) {
+ if (VERBOSE) {
+ System.out.println(" seekExact");
+ }
+ assertEquals(loc >= 0, te.seekExact(t, random.nextBoolean()));
+ } else {
+ if (VERBOSE) {
+ System.out.println(" seekCeil");
+ }
+
+ final TermsEnum.SeekStatus result = te.seekCeil(t, random.nextBoolean());
+ if (VERBOSE) {
+ System.out.println(" got " + result);
+ }
+
+ if (loc >= 0) {
+ assertEquals(TermsEnum.SeekStatus.FOUND, result);
+ } else if (loc == END_LOC) {
+ assertEquals(TermsEnum.SeekStatus.END, result);
+ } else {
+ assert loc >= -validTerms.length;
+ assertEquals(TermsEnum.SeekStatus.NOT_FOUND, result);
+ }
+ }
+
+ if (loc >= 0) {
+ assertEquals(t, te.term());
+ } else if (doSeekExact) {
+ // TermsEnum is unpositioned if seekExact returns false
+ continue;
+ } else if (loc == END_LOC) {
+ continue;
+ } else {
+ loc = -loc-1;
+ assertEquals(validTerms[loc], te.term());
+ }
+
+ // Do a bunch of next's after the seek
+ final int numNext = random.nextInt(validTerms.length);
+
+ for(int nextCount=0;nextCount<numNext;nextCount++) {
+ if (VERBOSE) {
+ System.out.println("\nTEST: next loc=" + loc + " of " + validTerms.length);
+ }
+ final BytesRef t2 = te.next();
+ loc++;
+ if (loc == validTerms.length) {
+ assertNull(t2);
+ break;
+ } else {
+ assertEquals(validTerms[loc], t2);
+ if (random.nextInt(40) == 17 && termStates.size() < 100) {
+ termStates.add(new TermAndState(validTerms[loc], te.termState()));
+ }
+ }
+ }
+ }
+ }
}
Modified: lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/index/values/TestDocValuesIndexing.java Tue Aug 23 14:06:58 2011
@@ -223,6 +223,7 @@ public class TestDocValuesIndexing exten
return cfg;
}
+ @SuppressWarnings("fallthrough")
public void runTestNumerics(IndexWriterConfig cfg, boolean withDeletions)
throws IOException {
Directory d = newDirectory();
Modified: lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/TestAutomatonQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/TestAutomatonQuery.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/TestAutomatonQuery.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/TestAutomatonQuery.java Tue Aug 23 14:06:58 2011
@@ -154,9 +154,7 @@ public class TestAutomatonQuery extends
assertEquals(a1, a2);
assertEquals(a1, a3);
-
- assertEquals(a1.toString(), a3.toString());
-
+
// different class
AutomatonQuery w1 = new WildcardQuery(newTerm("foobar"));
// different class
Modified: lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/TestBooleanMinShouldMatch.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/TestBooleanMinShouldMatch.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/TestBooleanMinShouldMatch.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/TestBooleanMinShouldMatch.java Tue Aug 23 14:06:58 2011
@@ -87,6 +87,7 @@ public class TestBooleanMinShouldMatch e
printHits(getName(), h, s);
}
assertEquals("result count", expected, h.length);
+ //System.out.println("TEST: now check");
QueryUtils.check(random, q,s);
}
Modified: lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/TestFieldCache.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/TestFieldCache.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/TestFieldCache.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/TestFieldCache.java Tue Aug 23 14:06:58 2011
@@ -19,28 +19,33 @@ package org.apache.lucene.search;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.IndexWriter;
-import org.apache.lucene.index.RandomIndexWriter;
-import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.index.*;
import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util._TestUtil;
-import org.apache.lucene.util.BytesRef;
-import java.io.IOException;
+
import java.io.ByteArrayOutputStream;
+import java.io.IOException;
import java.io.PrintStream;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.LinkedHashSet;
+import java.util.List;
public class TestFieldCache extends LuceneTestCase {
protected IndexReader reader;
private int NUM_DOCS;
+ private int NUM_ORDS;
private String[] unicodeStrings;
+ private BytesRef[][] multiValued;
private Directory directory;
@Override
public void setUp() throws Exception {
super.setUp();
NUM_DOCS = atLeast(1000);
+ NUM_ORDS = atLeast(2);
directory = newDirectory();
RandomIndexWriter writer= new RandomIndexWriter(random, directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy()));
long theLong = Long.MAX_VALUE;
@@ -50,6 +55,7 @@ public class TestFieldCache extends Luce
int theInt = Integer.MAX_VALUE;
float theFloat = Float.MAX_VALUE;
unicodeStrings = new String[NUM_DOCS];
+ multiValued = new BytesRef[NUM_DOCS][NUM_ORDS];
if (VERBOSE) {
System.out.println("TEST: setUp");
}
@@ -65,21 +71,19 @@ public class TestFieldCache extends Luce
// sometimes skip the field:
if (random.nextInt(40) != 17) {
- String s = null;
- if (i > 0 && random.nextInt(3) == 1) {
- // reuse past string -- try to find one that's not null
- for(int iter=0;iter<10 && s==null;iter++) {
- s = unicodeStrings[random.nextInt(i)];
- }
- if (s == null) {
- s = _TestUtil.randomUnicodeString(random, 250);
- }
- } else {
- s = _TestUtil.randomUnicodeString(random, 250);
- }
- unicodeStrings[i] = s;
+ unicodeStrings[i] = generateString(i);
doc.add(newField("theRandomUnicodeString", unicodeStrings[i], Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
}
+
+ // sometimes skip the field:
+ if (random.nextInt(10) != 8) {
+ for (int j = 0; j < NUM_ORDS; j++) {
+ String newValue = generateString(i);
+ multiValued[i][j] = new BytesRef(newValue);
+ doc.add(newField("theRandomUnicodeMultiValuedField", newValue, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
+ }
+ Arrays.sort(multiValued[i]);
+ }
writer.addDocument(doc);
}
reader = writer.getReader();
@@ -210,6 +214,47 @@ public class TestFieldCache extends Luce
// test bad field
terms = cache.getTerms(reader, "bogusfield");
+ // getDocTermOrds
+ DocTermOrds termOrds = cache.getDocTermOrds(reader, "theRandomUnicodeMultiValuedField");
+ TermsEnum termsEnum = termOrds.getOrdTermsEnum(reader);
+ assertSame("Second request to cache return same DocTermOrds", termOrds, cache.getDocTermOrds(reader, "theRandomUnicodeMultiValuedField"));
+ DocTermOrds.TermOrdsIterator reuse = null;
+ for (int i = 0; i < NUM_DOCS; i++) {
+ reuse = termOrds.lookup(i, reuse);
+ final int[] buffer = new int[5];
+ // This will remove identical terms. A DocTermOrds doesn't return duplicate ords for a docId
+ List<BytesRef> values = new ArrayList<BytesRef>(new LinkedHashSet<BytesRef>(Arrays.asList(multiValued[i])));
+ for (;;) {
+ int chunk = reuse.read(buffer);
+ if (chunk == 0) {
+ for (int ord = 0; ord < values.size(); ord++) {
+ BytesRef term = values.get(ord);
+ assertNull(String.format("Document[%d] misses field must be null. Has value %s for ord %d", i, term, ord), term);
+ }
+ break;
+ }
+
+ for(int idx=0; idx < chunk; idx++) {
+ int key = buffer[idx];
+ termsEnum.seekExact((long) key);
+ String actual = termsEnum.term().utf8ToString();
+ String expected = values.get(idx).utf8ToString();
+ if (!expected.equals(actual)) {
+ reuse = termOrds.lookup(i, reuse);
+ reuse.read(buffer);
+ }
+ assertTrue(String.format("Expected value %s for doc %d and ord %d, but was %s", expected, i, idx, actual), expected.equals(actual));
+ }
+
+ if (chunk <= buffer.length) {
+ break;
+ }
+ }
+ }
+
+ // test bad field
+ termOrds = cache.getDocTermOrds(reader, "bogusfield");
+
FieldCache.DEFAULT.purge(reader);
}
@@ -223,4 +268,21 @@ public class TestFieldCache extends Luce
r.close();
dir.close();
}
+
+ private String generateString(int i) {
+ String s = null;
+ if (i > 0 && random.nextInt(3) == 1) {
+ // reuse past string -- try to find one that's not null
+ for(int iter = 0; iter < 10 && s == null;iter++) {
+ s = unicodeStrings[random.nextInt(i)];
+ }
+ if (s == null) {
+ s = _TestUtil.randomUnicodeString(random);
+ }
+ } else {
+ s = _TestUtil.randomUnicodeString(random);
+ }
+ return s;
+ }
+
}
Modified: lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/TestFuzzyQuery2.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/TestFuzzyQuery2.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/TestFuzzyQuery2.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/TestFuzzyQuery2.java Tue Aug 23 14:06:58 2011
@@ -75,6 +75,9 @@ public class TestFuzzyQuery2 extends Luc
}
public void assertFromTestData(int codePointTable[]) throws Exception {
+ if (VERBOSE) {
+ System.out.println("TEST: codePointTable=" + codePointTable);
+ }
InputStream stream = getClass().getResourceAsStream("fuzzyTestData.txt");
BufferedReader reader = new BufferedReader(new InputStreamReader(stream, "UTF-8"));
@@ -83,6 +86,8 @@ public class TestFuzzyQuery2 extends Luc
Directory dir = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random, dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.KEYWORD, false)).setMergePolicy(newLogMergePolicy()));
+
+ writer.w.setInfoStream(VERBOSE ? System.out : null);
Document doc = new Document();
Field field = newField("field", "", Field.Store.NO, Field.Index.ANALYZED);
@@ -95,6 +100,9 @@ public class TestFuzzyQuery2 extends Luc
IndexReader r = writer.getReader();
IndexSearcher searcher = newSearcher(r);
+ if (VERBOSE) {
+ System.out.println("TEST: searcher=" + searcher);
+ }
writer.close();
String line;
while ((line = reader.readLine()) != null) {
Modified: lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/TestMultiTermConstantScore.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/TestMultiTermConstantScore.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/TestMultiTermConstantScore.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/TestMultiTermConstantScore.java Tue Aug 23 14:06:58 2011
@@ -94,12 +94,18 @@ public class TestMultiTermConstantScore
public static Query csrq(String f, String l, String h, boolean il, boolean ih) {
TermRangeQuery query = TermRangeQuery.newStringRange(f, l, h, il, ih);
query.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE);
+ if (VERBOSE) {
+ System.out.println("TEST: query=" + query);
+ }
return query;
}
public static Query csrq(String f, String l, String h, boolean il, boolean ih, MultiTermQuery.RewriteMethod method) {
TermRangeQuery query = TermRangeQuery.newStringRange(f, l, h, il, ih);
query.setRewriteMethod(method);
+ if (VERBOSE) {
+ System.out.println("TEST: query=" + query + " method=" + method);
+ }
return query;
}
@@ -275,6 +281,10 @@ public class TestMultiTermConstantScore
IndexReader reader = signedIndexReader;
IndexSearcher search = newSearcher(reader);
+ if (VERBOSE) {
+ System.out.println("TEST: reader=" + reader);
+ }
+
int medId = ((maxId - minId) / 2);
String minIP = pad(minId);
Modified: lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/TestNumericRangeQuery32.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/TestNumericRangeQuery32.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/TestNumericRangeQuery32.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/TestNumericRangeQuery32.java Tue Aug 23 14:06:58 2011
@@ -56,7 +56,7 @@ public class TestNumericRangeQuery32 ext
directory = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random, directory,
newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))
- .setMaxBufferedDocs(_TestUtil.nextInt(random, 50, 1000))
+ .setMaxBufferedDocs(_TestUtil.nextInt(random, 100, 1000))
.setMergePolicy(newLogMergePolicy()));
NumericField
@@ -337,7 +337,7 @@ public class TestNumericRangeQuery32 ext
private void testRandomTrieAndClassicRangeQuery(int precisionStep) throws Exception {
String field="field"+precisionStep;
int termCountT=0,termCountC=0;
- int num = atLeast(10);
+ int num = _TestUtil.nextInt(random, 10, 20);
for (int i = 0; i < num; i++) {
int lower=(int)(random.nextDouble()*noDocs*distance)+startOffset;
int upper=(int)(random.nextDouble()*noDocs*distance)+startOffset;
@@ -415,7 +415,7 @@ public class TestNumericRangeQuery32 ext
private void testRangeSplit(int precisionStep) throws Exception {
String field="ascfield"+precisionStep;
// 10 random tests
- int num = atLeast(10);
+ int num = _TestUtil.nextInt(random, 10, 20);
for (int i =0; i< num; i++) {
int lower=(int)(random.nextDouble()*noDocs - noDocs/2);
int upper=(int)(random.nextDouble()*noDocs - noDocs/2);
@@ -491,7 +491,7 @@ public class TestNumericRangeQuery32 ext
String field="field"+precisionStep;
// 10 random tests, the index order is ascending,
// so using a reverse sort field should retun descending documents
- int num = atLeast(10);
+ int num = _TestUtil.nextInt(random, 10, 20);
for (int i = 0; i < num; i++) {
int lower=(int)(random.nextDouble()*noDocs*distance)+startOffset;
int upper=(int)(random.nextDouble()*noDocs*distance)+startOffset;
Modified: lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/TestNumericRangeQuery64.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/TestNumericRangeQuery64.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/TestNumericRangeQuery64.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/TestNumericRangeQuery64.java Tue Aug 23 14:06:58 2011
@@ -53,7 +53,7 @@ public class TestNumericRangeQuery64 ext
directory = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random, directory,
newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))
- .setMaxBufferedDocs(_TestUtil.nextInt(random, 50, 1000))
+ .setMaxBufferedDocs(_TestUtil.nextInt(random, 100, 1000))
.setMergePolicy(newLogMergePolicy()));
NumericField
@@ -354,7 +354,7 @@ public class TestNumericRangeQuery64 ext
private void testRandomTrieAndClassicRangeQuery(int precisionStep) throws Exception {
String field="field"+precisionStep;
int termCountT=0,termCountC=0;
- int num = atLeast(10);
+ int num = _TestUtil.nextInt(random, 10, 20);
for (int i = 0; i < num; i++) {
long lower=(long)(random.nextDouble()*noDocs*distance)+startOffset;
long upper=(long)(random.nextDouble()*noDocs*distance)+startOffset;
@@ -437,7 +437,7 @@ public class TestNumericRangeQuery64 ext
private void testRangeSplit(int precisionStep) throws Exception {
String field="ascfield"+precisionStep;
// 10 random tests
- int num = atLeast(10);
+ int num = _TestUtil.nextInt(random, 10, 20);
for (int i = 0; i < num; i++) {
long lower=(long)(random.nextDouble()*noDocs - noDocs/2);
long upper=(long)(random.nextDouble()*noDocs - noDocs/2);
@@ -523,7 +523,7 @@ public class TestNumericRangeQuery64 ext
String field="field"+precisionStep;
// 10 random tests, the index order is ascending,
// so using a reverse sort field should retun descending documents
- int num = atLeast(10);
+ int num = _TestUtil.nextInt(random, 10, 20);
for (int i = 0; i < num; i++) {
long lower=(long)(random.nextDouble()*noDocs*distance)+startOffset;
long upper=(long)(random.nextDouble()*noDocs*distance)+startOffset;
Modified: lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/TestPrefixRandom.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/TestPrefixRandom.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/TestPrefixRandom.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/TestPrefixRandom.java Tue Aug 23 14:06:58 2011
@@ -59,7 +59,7 @@ public class TestPrefixRandom extends Lu
// we generate aweful prefixes: good for testing.
// but for preflex codec, the test can be very slow, so use less iterations.
final String codec = CodecProvider.getDefault().getFieldCodec("field");
- int num = codec.equals("PreFlex") ? 200 * RANDOM_MULTIPLIER : atLeast(2000);
+ int num = codec.equals("PreFlex") ? 200 * RANDOM_MULTIPLIER : atLeast(1000);
for (int i = 0; i < num; i++) {
field.setValue(_TestUtil.randomUnicodeString(random, 10));
writer.addDocument(doc);
@@ -114,7 +114,7 @@ public class TestPrefixRandom extends Lu
/** test a bunch of random prefixes */
public void testPrefixes() throws Exception {
- int num = atLeast(1000);
+ int num = atLeast(100);
for (int i = 0; i < num; i++)
assertSame(_TestUtil.randomUnicodeString(random, 5));
}
Modified: lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/TestRegexpRandom2.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/TestRegexpRandom2.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/TestRegexpRandom2.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/TestRegexpRandom2.java Tue Aug 23 14:06:58 2011
@@ -18,28 +18,27 @@ package org.apache.lucene.search;
*/
import java.io.IOException;
+import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
-import java.util.ArrayList;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.MultiFields;
+import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
-import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.codecs.CodecProvider;
import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.UnicodeUtil;
import org.apache.lucene.util._TestUtil;
-import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.AutomatonTestUtil;
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
@@ -143,6 +142,9 @@ public class TestRegexpRandom2 extends L
int num = CodecProvider.getDefault().getFieldCodec("field").equals("PreFlex") ? 100 * RANDOM_MULTIPLIER : atLeast(1000);
for (int i = 0; i < num; i++) {
String reg = AutomatonTestUtil.randomRegexp(random);
+ if (VERBOSE) {
+ System.out.println("TEST: regexp=" + reg);
+ }
assertSame(reg);
}
}
@@ -153,18 +155,7 @@ public class TestRegexpRandom2 extends L
protected void assertSame(String regexp) throws IOException {
RegexpQuery smart = new RegexpQuery(new Term("field", regexp), RegExp.NONE);
DumbRegexpQuery dumb = new DumbRegexpQuery(new Term("field", regexp), RegExp.NONE);
-
- // we can't compare the two if automaton rewrites to a simpler enum.
- // for example: "a\uda07\udcc7?.*?" gets rewritten to a simpler query:
- // a\uda07* prefixquery. Prefixquery then does the "wrong" thing, which
- // isn't really wrong as the query was undefined to begin with... but not
- // automatically comparable.
-
- // TODO: does this check even matter anymore?!
- Terms terms = MultiFields.getTerms(searcher1.getIndexReader(), "field");
- if (!(smart.getTermsEnum(terms) instanceof AutomatonTermsEnum))
- return;
-
+
TopDocs smartDocs = searcher1.search(smart, 25);
TopDocs dumbDocs = searcher2.search(dumb, 25);
Modified: lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/TestSubScorerFreqs.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/TestSubScorerFreqs.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/TestSubScorerFreqs.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/TestSubScorerFreqs.java Tue Aug 23 14:06:58 2011
@@ -25,7 +25,7 @@ import org.apache.lucene.document.*;
import org.apache.lucene.index.*;
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.search.BooleanClause.Occur;
-import org.apache.lucene.search.Scorer.ScorerVisitor;
+import org.apache.lucene.search.Scorer.ChildScorer;
import org.apache.lucene.store.*;
import org.apache.lucene.util.*;
import org.junit.AfterClass;
@@ -75,44 +75,31 @@ public class TestSubScorerFreqs extends
public final Map<Integer, Map<Query, Float>> docCounts = new HashMap<Integer, Map<Query, Float>>();
private final Map<Query, Scorer> subScorers = new HashMap<Query, Scorer>();
- private final ScorerVisitor<Query, Query, Scorer> visitor = new MockScorerVisitor();
- private final EnumSet<Occur> collect;
-
- private class MockScorerVisitor extends ScorerVisitor<Query, Query, Scorer> {
-
- @Override
- public void visitOptional(Query parent, Query child, Scorer scorer) {
- if (collect.contains(Occur.SHOULD))
- subScorers.put(child, scorer);
- }
-
- @Override
- public void visitProhibited(Query parent, Query child, Scorer scorer) {
- if (collect.contains(Occur.MUST_NOT))
- subScorers.put(child, scorer);
- }
-
- @Override
- public void visitRequired(Query parent, Query child, Scorer scorer) {
- if (collect.contains(Occur.MUST))
- subScorers.put(child, scorer);
- }
-
- }
+ private final Set<String> relationships;
public CountingCollector(Collector other) {
- this(other, EnumSet.allOf(Occur.class));
+ this(other, new HashSet<String>(Arrays.asList(Occur.MUST.toString(), Occur.SHOULD.toString(), Occur.MUST_NOT.toString())));
}
- public CountingCollector(Collector other, EnumSet<Occur> collect) {
+ public CountingCollector(Collector other, Set<String> relationships) {
this.other = other;
- this.collect = collect;
+ this.relationships = relationships;
}
@Override
public void setScorer(Scorer scorer) throws IOException {
other.setScorer(scorer);
- scorer.visitScorers(visitor);
+ subScorers.clear();
+ setSubScorers(scorer, "TOP");
+ }
+
+ public void setSubScorers(Scorer scorer, String relationship) {
+ for (ChildScorer child : scorer.getChildren()) {
+ if (relationships.contains(child.relationship)) {
+ setSubScorers(child.child, child.relationship);
+ }
+ }
+ subScorers.put(scorer.getWeight().getQuery(), scorer);
}
@Override
@@ -177,14 +164,17 @@ public class TestSubScorerFreqs extends
query.add(inner, Occur.MUST);
query.add(aQuery, Occur.MUST);
query.add(dQuery, Occur.MUST);
- EnumSet<Occur>[] occurList = new EnumSet[] {EnumSet.of(Occur.MUST), EnumSet.of(Occur.MUST, Occur.SHOULD)};
- for (EnumSet<Occur> occur : occurList) {
+ Set<String>[] occurList = new Set[] {
+ Collections.singleton(Occur.MUST.toString()),
+ new HashSet<String>(Arrays.asList(Occur.MUST.toString(), Occur.SHOULD.toString()))
+ };
+ for (Set<String> occur : occurList) {
CountingCollector c = new CountingCollector(TopScoreDocCollector.create(
10, true), occur);
s.search(query, null, c);
final int maxDocs = s.maxDoc();
assertEquals(maxDocs, c.docCounts.size());
- boolean includeOptional = occur.contains(Occur.SHOULD);
+ boolean includeOptional = occur.contains(Occur.SHOULD.toString());
for (int i = 0; i < maxDocs; i++) {
Map<Query, Float> doc0 = c.docCounts.get(i);
assertEquals(includeOptional ? 5 : 4, doc0.size());
Modified: lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/TestWildcard.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/TestWildcard.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/TestWildcard.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/TestWildcard.java Tue Aug 23 14:06:58 2011
@@ -135,7 +135,7 @@ public class TestWildcard
wq = new WildcardQuery(new Term("field", "*"));
assertMatches(searcher, wq, 2);
assertFalse(wq.getTermsEnum(terms) instanceof PrefixTermsEnum);
- assertFalse(wq.getTermsEnum(terms) instanceof AutomatonTermsEnum);
+ assertFalse(wq.getTermsEnum(terms).getClass().getSimpleName().contains("AutomatonTermsEnum"));
searcher.close();
indexStore.close();
}
Modified: lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/TestWildcardRandom.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/TestWildcardRandom.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/TestWildcardRandom.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/TestWildcardRandom.java Tue Aug 23 14:06:58 2011
@@ -63,6 +63,9 @@ public class TestWildcardRandom extends
reader = writer.getReader();
searcher = newSearcher(reader);
writer.close();
+ if (VERBOSE) {
+ System.out.println("TEST: setUp searcher=" + searcher);
+ }
}
private char N() {
@@ -85,7 +88,11 @@ public class TestWildcardRandom extends
private void assertPatternHits(String pattern, int numHits) throws Exception {
// TODO: run with different rewrites
- Query wq = new WildcardQuery(new Term("field", fillPattern(pattern)));
+ final String filledPattern = fillPattern(pattern);
+ if (VERBOSE) {
+ System.out.println("TEST: run wildcard pattern=" + pattern + " filled=" + filledPattern);
+ }
+ Query wq = new WildcardQuery(new Term("field", filledPattern));
TopDocs docs = searcher.search(wq, 25);
assertEquals("Incorrect hits for pattern: " + pattern, numHits, docs.totalHits);
}
Modified: lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/payloads/TestPayloadNearQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/payloads/TestPayloadNearQuery.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/payloads/TestPayloadNearQuery.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/search/payloads/TestPayloadNearQuery.java Tue Aug 23 14:06:58 2011
@@ -152,11 +152,14 @@ public class TestPayloadNearQuery extend
}
for (int i=1;i<10;i++) {
query = newPhraseQuery("field", English.intToEnglish(i)+" hundred", true, new AveragePayloadFunction());
+ if (VERBOSE) {
+ System.out.println("TEST: run query=" + query);
+ }
// all should have score = 3 because adjacent terms have payloads of 2,4
// and all the similarity factors are set to 1
hits = searcher.search(query, null, 100);
assertTrue("hits is null and it shouldn't be", hits != null);
- assertTrue("should be 100 hits", hits.totalHits == 100);
+ assertEquals("should be 100 hits", 100, hits.totalHits);
for (int j = 0; j < hits.scoreDocs.length; j++) {
ScoreDoc doc = hits.scoreDocs[j];
// System.out.println("Doc: " + doc.toString());
Modified: lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/store/TestFileSwitchDirectory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/store/TestFileSwitchDirectory.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/store/TestFileSwitchDirectory.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/store/TestFileSwitchDirectory.java Tue Aug 23 14:06:58 2011
@@ -18,6 +18,8 @@ package org.apache.lucene.store;
*/
import java.io.IOException;
+import java.util.Arrays;
+import java.util.Collections;
import java.util.HashSet;
import java.util.Set;
@@ -28,6 +30,7 @@ import org.apache.lucene.index.IndexWrit
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.TestIndexWriterReader;
import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util._TestUtil;
public class TestFileSwitchDirectory extends LuceneTestCase {
/**
@@ -77,4 +80,76 @@ public class TestFileSwitchDirectory ext
}
fsd.close();
}
+
+ private Directory newFSSwitchDirectory(Set<String> primaryExtensions) throws IOException {
+ Directory a = new SimpleFSDirectory(_TestUtil.getTempDir("foo"));
+ Directory b = new SimpleFSDirectory(_TestUtil.getTempDir("bar"));
+ FileSwitchDirectory switchDir = new FileSwitchDirectory(primaryExtensions, a, b, true);
+ return new MockDirectoryWrapper(random, switchDir);
+ }
+
+ // LUCENE-3380 -- make sure we get exception if the directory really does not exist.
+ public void testNoDir() throws Throwable {
+ Directory dir = newFSSwitchDirectory(Collections.<String>emptySet());
+ try {
+ IndexReader.open(dir, true);
+ fail("did not hit expected exception");
+ } catch (NoSuchDirectoryException nsde) {
+ // expected
+ }
+ dir.close();
+ }
+
+ // LUCENE-3380 test that we can add a file, and then when we call list() we get it back
+ public void testDirectoryFilter() throws IOException {
+ Directory dir = newFSSwitchDirectory(Collections.<String>emptySet());
+ String name = "file";
+ try {
+ dir.createOutput(name, newIOContext(random)).close();
+ assertTrue(dir.fileExists(name));
+ assertTrue(Arrays.asList(dir.listAll()).contains(name));
+ } finally {
+ dir.close();
+ }
+ }
+
+ // LUCENE-3380 test that delegate compound files correctly.
+ public void testCompoundFileAppendTwice() throws IOException {
+ Directory newDir = newFSSwitchDirectory(Collections.singleton("cfs"));
+ CompoundFileDirectory csw = newDir.createCompoundOutput("d.cfs", newIOContext(random));
+ createSequenceFile(newDir, "d1", (byte) 0, 15);
+ IndexOutput out = csw.createOutput("d.xyz", newIOContext(random));
+ out.writeInt(0);
+ try {
+ newDir.copy(csw, "d1", "d1", newIOContext(random));
+ fail("file does already exist");
+ } catch (IOException e) {
+ //
+ }
+ out.close();
+ assertEquals(1, csw.listAll().length);
+ assertEquals("d.xyz", csw.listAll()[0]);
+
+ csw.close();
+
+ CompoundFileDirectory cfr = newDir.openCompoundInput("d.cfs", newIOContext(random));
+ assertEquals(1, cfr.listAll().length);
+ assertEquals("d.xyz", cfr.listAll()[0]);
+ cfr.close();
+ newDir.close();
+ }
+
+ /** Creates a file of the specified size with sequential data. The first
+ * byte is written as the start byte provided. All subsequent bytes are
+ * computed as start + offset where offset is the number of the byte.
+ */
+ private void createSequenceFile(Directory dir, String name, byte start, int size) throws IOException {
+ IndexOutput os = dir.createOutput(name, newIOContext(random));
+ for (int i=0; i < size; i++) {
+ os.writeByte(start);
+ start ++;
+ }
+ os.close();
+ }
+
}
Modified: lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/util/TestFixedBitSet.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/util/TestFixedBitSet.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/util/TestFixedBitSet.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/util/TestFixedBitSet.java Tue Aug 23 14:06:58 2011
@@ -197,7 +197,8 @@ public class TestFixedBitSet extends Luc
*/
public void testEquals() {
- final int numBits = random.nextInt(2000);
+ // This test can't handle numBits==0:
+ final int numBits = random.nextInt(2000) + 1;
FixedBitSet b1 = new FixedBitSet(numBits);
FixedBitSet b2 = new FixedBitSet(numBits);
assertTrue(b1.equals(b2));
@@ -219,7 +220,8 @@ public class TestFixedBitSet extends Luc
}
public void testHashCodeEquals() {
- final int numBits = random.nextInt(2000);
+ // This test can't handle numBits==0:
+ final int numBits = random.nextInt(2000) + 1;
FixedBitSet b1 = new FixedBitSet(numBits);
FixedBitSet b2 = new FixedBitSet(numBits);
assertTrue(b1.equals(b2));
@@ -237,6 +239,22 @@ public class TestFixedBitSet extends Luc
}
}
+ public void testSmallBitSets() {
+ // Make sure size 0-10 bit sets are OK:
+ for(int numBits=0;numBits<10;numBits++) {
+ FixedBitSet b1 = new FixedBitSet(numBits);
+ FixedBitSet b2 = new FixedBitSet(numBits);
+ assertTrue(b1.equals(b2));
+ assertEquals(b1.hashCode(), b2.hashCode());
+ assertEquals(0, b1.cardinality());
+ if (numBits > 0) {
+ b1.set(0, numBits);
+ assertEquals(numBits, b1.cardinality());
+ b1.flip(0, numBits);
+ assertEquals(0, b1.cardinality());
+ }
+ }
+ }
private FixedBitSet makeFixedBitSet(int[] a, int numBits) {
FixedBitSet bs = new FixedBitSet(numBits);
Modified: lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/util/fst/TestFSTs.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/util/fst/TestFSTs.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/util/fst/TestFSTs.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/test/org/apache/lucene/util/fst/TestFSTs.java Tue Aug 23 14:06:58 2011
@@ -24,19 +24,25 @@ import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
+import java.io.StringWriter;
import java.io.Writer;
import java.util.*;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.MultiFields;
+import org.apache.lucene.index.RandomIndexWriter;
+import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.index.codecs.CodecProvider;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.IndexInput;
@@ -456,7 +462,8 @@ public class TestFSTs extends LuceneTest
prune1==0 && prune2==0,
allowRandomSuffixSharing ? random.nextBoolean() : true,
allowRandomSuffixSharing ? _TestUtil.nextInt(random, 1, 10) : Integer.MAX_VALUE,
- outputs);
+ outputs,
+ null);
for(InputOutput<T> pair : pairs) {
if (pair.output instanceof UpToTwoPositiveIntOutputs.TwoLongs) {
@@ -830,7 +837,7 @@ public class TestFSTs extends LuceneTest
final IntsRef prefix = ent.getKey();
final CountMinOutput<T> cmo = ent.getValue();
if (VERBOSE) {
- System.out.println(" term=" + inputToString(inputMode, prefix) + " count=" + cmo.count + " isLeaf=" + cmo.isLeaf + " output=" + outputs.outputToString(cmo.output) + " isFinal=" + cmo.isFinal);
+ System.out.println(" term prefix=" + inputToString(inputMode, prefix, false) + " count=" + cmo.count + " isLeaf=" + cmo.isLeaf + " output=" + outputs.outputToString(cmo.output) + " isFinal=" + cmo.isFinal);
}
final boolean keep;
if (prune1 > 0) {
@@ -872,15 +879,15 @@ public class TestFSTs extends LuceneTest
}
}
- //System.out.println("TEST: after prune");
- /*
- for(Map.Entry<BytesRef,CountMinOutput> ent : prefixes.entrySet()) {
- System.out.println(" " + inputToString(inputMode, ent.getKey()) + ": isLeaf=" + ent.getValue().isLeaf + " isFinal=" + ent.getValue().isFinal);
- if (ent.getValue().isFinal) {
- System.out.println(" finalOutput=" + outputs.outputToString(ent.getValue().finalOutput));
- }
+ if (VERBOSE) {
+ System.out.println("TEST: after prune");
+ for(Map.Entry<IntsRef,CountMinOutput<T>> ent : prefixes.entrySet()) {
+ System.out.println(" " + inputToString(inputMode, ent.getKey()) + ": isLeaf=" + ent.getValue().isLeaf + " isFinal=" + ent.getValue().isFinal);
+ if (ent.getValue().isFinal) {
+ System.out.println(" finalOutput=" + outputs.outputToString(ent.getValue().finalOutput));
+ }
}
- */
+ }
if (prefixes.size() <= 1) {
assertNull(fst);
@@ -897,7 +904,7 @@ public class TestFSTs extends LuceneTest
IntsRefFSTEnum.InputOutput<T> current;
while((current = fstEnum.next()) != null) {
if (VERBOSE) {
- System.out.println(" fstEnum.next term=" + inputToString(inputMode, current.input) + " output=" + outputs.outputToString(current.output));
+ System.out.println(" fstEnum.next prefix=" + inputToString(inputMode, current.input, false) + " output=" + outputs.outputToString(current.output));
}
final CountMinOutput cmo = prefixes.get(current.input);
assertNotNull(cmo);
@@ -920,7 +927,7 @@ public class TestFSTs extends LuceneTest
final CountMinOutput<T> cmo = ent.getValue();
final T output = run(fst, ent.getKey(), stopNode);
if (VERBOSE) {
- System.out.println("TEST: verify term=" + inputToString(inputMode, ent.getKey()) + " output=" + outputs.outputToString(cmo.output));
+ System.out.println("TEST: verify prefix=" + inputToString(inputMode, ent.getKey(), false) + " output=" + outputs.outputToString(cmo.output));
}
// if (cmo.isFinal && !cmo.isLeaf) {
if (cmo.isFinal) {
@@ -980,11 +987,17 @@ public class TestFSTs extends LuceneTest
@Nightly
public void testBigSet() throws IOException {
- testRandomWords(_TestUtil.nextInt(random, 50000, 60000), atLeast(1));
+ testRandomWords(_TestUtil.nextInt(random, 50000, 60000), 1);
}
-
+
private static String inputToString(int inputMode, IntsRef term) {
- if (inputMode == 0) {
+ return inputToString(inputMode, term, true);
+ }
+
+ private static String inputToString(int inputMode, IntsRef term, boolean isValidUnicode) {
+ if (!isValidUnicode) {
+ return term.toString();
+ } else if (inputMode == 0) {
// utf8
return toBytesRef(term).utf8ToString() + " " + term;
} else {
@@ -1007,7 +1020,7 @@ public class TestFSTs extends LuceneTest
final int RUN_TIME_MSEC = atLeast(500);
final IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(-1).setRAMBufferSizeMB(64);
final File tempDir = _TestUtil.getTempDir("fstlines");
- final MockDirectoryWrapper dir = new MockDirectoryWrapper(random, FSDirectory.open(tempDir));
+ final MockDirectoryWrapper dir = newFSDirectory(tempDir);
final IndexWriter writer = new IndexWriter(dir, conf);
writer.setInfoStream(VERBOSE ? System.out : null);
final long stopTime = System.currentTimeMillis() + RUN_TIME_MSEC;
@@ -1057,7 +1070,7 @@ public class TestFSTs extends LuceneTest
}
builder.add(term, outputs.get(output));
ord++;
- if (ord % 100000 == 0 && LuceneTestCase.TEST_NIGHTLY) {
+ if (VERBOSE && ord % 100000 == 0 && LuceneTestCase.TEST_NIGHTLY) {
System.out.println(ord + " terms...");
}
}
@@ -1075,7 +1088,7 @@ public class TestFSTs extends LuceneTest
final BytesRef randomTerm = new BytesRef(getRandomString());
if (VERBOSE) {
- System.out.println("TEST: seek " + randomTerm.utf8ToString() + " " + randomTerm);
+ System.out.println("TEST: seek non-exist " + randomTerm.utf8ToString() + " " + randomTerm);
}
final TermsEnum.SeekStatus seekResult = termsEnum.seekCeil(randomTerm);
@@ -1127,10 +1140,10 @@ public class TestFSTs extends LuceneTest
assertEquals(termsEnum.term().utf8ToString() + " != " + fstEnum.current().input.utf8ToString(), termsEnum.term(), fstEnum.current().input);
if (storeOrd) {
// fst stored the ord
- assertEquals(termsEnum.ord(), ((Long) fstEnum.current().output).longValue());
+ assertEquals("term=" + termsEnum.term().utf8ToString() + " " + termsEnum.term(), termsEnum.ord(), ((Long) fstEnum.current().output).longValue());
} else {
// fst stored the docFreq
- assertEquals(termsEnum.docFreq(), (int) (((Long) fstEnum.current().output).longValue()));
+ assertEquals("term=" + termsEnum.term().utf8ToString() + " " + termsEnum.term(), termsEnum.docFreq(), (int) (((Long) fstEnum.current().output).longValue()));
}
}
}
@@ -1148,7 +1161,7 @@ public class TestFSTs extends LuceneTest
this.inputMode = inputMode;
this.outputs = outputs;
- builder = new Builder<T>(inputMode == 0 ? FST.INPUT_TYPE.BYTE1 : FST.INPUT_TYPE.BYTE4, 0, prune, prune == 0, true, Integer.MAX_VALUE, outputs);
+ builder = new Builder<T>(inputMode == 0 ? FST.INPUT_TYPE.BYTE1 : FST.INPUT_TYPE.BYTE4, 0, prune, prune == 0, true, Integer.MAX_VALUE, outputs, null);
}
protected abstract T getOutput(IntsRef input, int ord) throws IOException;
@@ -1248,7 +1261,7 @@ public class TestFSTs extends LuceneTest
}
}
- // java -cp build/classes/test:build/classes/java:build/classes/test-framework:lib/junit-4.7.jar org.apache.lucene.util.fst.TestFSTs /x/tmp/allTerms3.txt out
+ // java -cp build/classes/test:build/classes/test-framework:build/classes/java:lib/junit-4.7.jar org.apache.lucene.util.automaton.fst.TestFSTs /x/tmp/allTerms3.txt out
public static void main(String[] args) throws IOException {
int prune = 0;
int limit = Integer.MAX_VALUE;
@@ -1405,6 +1418,198 @@ public class TestFSTs extends LuceneTest
assertEquals(42, (long) seekResult.output);
}
+ public void testPrimaryKeys() throws Exception {
+ Directory dir = newDirectory();
+
+ for(int cycle=0;cycle<2;cycle++) {
+ if (VERBOSE) {
+ System.out.println("TEST: cycle=" + cycle);
+ }
+ RandomIndexWriter w = new RandomIndexWriter(random, dir,
+ newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(IndexWriterConfig.OpenMode.CREATE));
+ Document doc = new Document();
+ Field idField = newField("id", "", Field.Index.NOT_ANALYZED);
+ doc.add(idField);
+
+ final int NUM_IDS = (int) (1000*RANDOM_MULTIPLIER*(1.0+random.nextDouble()));
+ //final int NUM_IDS = (int) (377 * (1.0+random.nextDouble()));
+ if (VERBOSE) {
+ System.out.println("TEST: NUM_IDS=" + NUM_IDS);
+ }
+ final Set<String> allIDs = new HashSet<String>();
+ for(int id=0;id<NUM_IDS;id++) {
+ String idString;
+ if (cycle == 0) {
+ // PKs are assigned sequentially
+ idString = String.format("%07d", id);
+ } else {
+ while(true) {
+ final String s = Long.toString(random.nextLong());
+ if (!allIDs.contains(s)) {
+ idString = s;
+ break;
+ }
+ }
+ }
+ allIDs.add(idString);
+ idField.setValue(idString);
+ w.addDocument(doc);
+ }
+
+ //w.optimize();
+
+ // turn writer into reader:
+ final IndexReader r = w.getReader();
+ final IndexSearcher s = new IndexSearcher(r);
+ w.close();
+
+ final List<String> allIDsList = new ArrayList<String>(allIDs);
+ final List<String> sortedAllIDsList = new ArrayList<String>(allIDsList);
+ Collections.sort(sortedAllIDsList);
+
+ // Sprinkle in some non-existent PKs:
+ Set<String> outOfBounds = new HashSet<String>();
+ for(int idx=0;idx<NUM_IDS/10;idx++) {
+ String idString;
+ if (cycle == 0) {
+ idString = String.format("%07d", (NUM_IDS + idx));
+ } else {
+ while(true) {
+ idString = Long.toString(random.nextLong());
+ if (!allIDs.contains(idString)) {
+ break;
+ }
+ }
+ }
+ outOfBounds.add(idString);
+ allIDsList.add(idString);
+ }
+
+ // Verify w/ TermQuery
+ for(int iter=0;iter<2*NUM_IDS;iter++) {
+ final String id = allIDsList.get(random.nextInt(allIDsList.size()));
+ final boolean exists = !outOfBounds.contains(id);
+ if (VERBOSE) {
+ System.out.println("TEST: TermQuery " + (exists ? "" : "non-exist ") + " id=" + id);
+ }
+ assertEquals((exists ? "" : "non-exist ") + "id=" + id, exists ? 1 : 0, s.search(new TermQuery(new Term("id", id)), 1).totalHits);
+ }
+
+ // Verify w/ MultiTermsEnum
+ final TermsEnum termsEnum = MultiFields.getTerms(r, "id").iterator();
+ for(int iter=0;iter<2*NUM_IDS;iter++) {
+ final String id;
+ final String nextID;
+ final boolean exists;
+
+ if (random.nextBoolean()) {
+ id = allIDsList.get(random.nextInt(allIDsList.size()));
+ exists = !outOfBounds.contains(id);
+ nextID = null;
+ if (VERBOSE) {
+ System.out.println("TEST: exactOnly " + (exists ? "" : "non-exist ") + "id=" + id);
+ }
+ } else {
+ // Pick ID between two IDs:
+ exists = false;
+ final int idv = random.nextInt(NUM_IDS-1);
+ if (cycle == 0) {
+ id = String.format("%07da", idv);
+ nextID = String.format("%07d", idv+1);
+ } else {
+ id = sortedAllIDsList.get(idv) + "a";
+ nextID = sortedAllIDsList.get(idv+1);
+ }
+ if (VERBOSE) {
+ System.out.println("TEST: not exactOnly id=" + id + " nextID=" + nextID);
+ }
+ }
+
+ final boolean useCache = random.nextBoolean();
+ if (VERBOSE) {
+ System.out.println(" useCache=" + useCache);
+ }
+
+ final TermsEnum.SeekStatus status;
+ if (nextID == null) {
+ if (termsEnum.seekExact(new BytesRef(id), useCache)) {
+ status = TermsEnum.SeekStatus.FOUND;
+ } else {
+ status = TermsEnum.SeekStatus.NOT_FOUND;
+ }
+ } else {
+ status = termsEnum.seekCeil(new BytesRef(id), useCache);
+ }
+
+ if (nextID != null) {
+ assertEquals(TermsEnum.SeekStatus.NOT_FOUND, status);
+ assertEquals("expected=" + nextID + " actual=" + termsEnum.term().utf8ToString(), new BytesRef(nextID), termsEnum.term());
+ } else if (!exists) {
+ assertTrue(status == TermsEnum.SeekStatus.NOT_FOUND ||
+ status == TermsEnum.SeekStatus.END);
+ } else {
+ assertEquals(TermsEnum.SeekStatus.FOUND, status);
+ }
+ }
+
+ r.close();
+ }
+ dir.close();
+ }
+
+ public void testRandomTermLookup() throws Exception {
+ Directory dir = newDirectory();
+
+ RandomIndexWriter w = new RandomIndexWriter(random, dir,
+ newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(IndexWriterConfig.OpenMode.CREATE));
+ w.w.setInfoStream(VERBOSE ? System.out : null);
+
+ Document doc = new Document();
+ Field f = newField("field", "", Field.Index.NOT_ANALYZED);
+ doc.add(f);
+
+ final int NUM_TERMS = (int) (1000*RANDOM_MULTIPLIER * (1+random.nextDouble()));
+ if (VERBOSE) {
+ System.out.println("TEST: NUM_TERMS=" + NUM_TERMS);
+ }
+
+ final Set<String> allTerms = new HashSet<String>();
+ while(allTerms.size() < NUM_TERMS) {
+ allTerms.add(simpleRandomString(random));
+ }
+
+ for(String term : allTerms) {
+ f.setValue(term);
+ w.addDocument(doc);
+ }
+
+ // turn writer into reader:
+ if (VERBOSE) {
+ System.out.println("TEST: get reader");
+ }
+ IndexReader r = w.getReader();
+ if (VERBOSE) {
+ System.out.println("TEST: got reader=" + r);
+ }
+ IndexSearcher s = new IndexSearcher(r);
+ w.close();
+
+ final List<String> allTermsList = new ArrayList<String>(allTerms);
+ Collections.shuffle(allTermsList, random);
+
+ // verify exact lookup
+ for(String term : allTermsList) {
+ if (VERBOSE) {
+ System.out.println("TEST: term=" + term);
+ }
+ assertEquals("term=" + term, 1, s.search(new TermQuery(new Term("field", term)), 1).totalHits);
+ }
+
+ r.close();
+ dir.close();
+ }
+
+
/**
* Test state expansion (array format) on close-to-root states. Creates
* synthetic input that has one expanded state on each level.
@@ -1486,6 +1691,36 @@ public class TestFSTs extends LuceneTest
s.verifyStateAndBelow(fst, arc, 1);
}
+ public void testFinalOutputOnEndState() throws Exception {
+ final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
+
+ final Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE4, 2, 0, true, true, Integer.MAX_VALUE, outputs, null);
+ builder.add("stat", outputs.get(17));
+ builder.add("station", outputs.get(10));
+ final FST<Long> fst = builder.finish();
+ //Writer w = new OutputStreamWriter(new FileOutputStream("/x/tmp/out.dot"));
+ StringWriter w = new StringWriter();
+ Util.toDot(fst, w, false, false);
+ w.close();
+ //System.out.println(w.toString());
+ assertTrue(w.toString().indexOf("label=\"t/[7]\"") != -1);
+ }
+
+ public void testInternalFinalState() throws Exception {
+ final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
+
+ final Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs, null);
+ builder.add(new BytesRef("stat"), outputs.getNoOutput());
+ builder.add(new BytesRef("station"), outputs.getNoOutput());
+ final FST<Long> fst = builder.finish();
+ StringWriter w = new StringWriter();
+ //Writer w = new OutputStreamWriter(new FileOutputStream("/x/tmp/out.dot"));
+ Util.toDot(fst, w, false, false);
+ w.close();
+ //System.out.println(w.toString());
+ assertTrue(w.toString().indexOf("6 [shape=doublecircle") != -1);
+ }
+
// Make sure raw FST can differentiate between final vs
// non-final end nodes
public void testNonFinalStopNodes() throws Exception {
Modified: lucene/dev/branches/flexscoring/modules/analysis/common/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/modules/analysis/common/build.xml?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/modules/analysis/common/build.xml (original)
+++ lucene/dev/branches/flexscoring/modules/analysis/common/build.xml Tue Aug 23 14:06:58 2011
@@ -28,14 +28,6 @@
<import file="../../../lucene/contrib/contrib-build.xml"/>
- <path id="test.classpath">
- <path refid="classpath"/>
- <pathelement location="../../../lucene/build/classes/test-framework"/>
- <pathelement location="../../../lucene/build/classes/test/"/>
- <path refid="junit-path"/>
- <pathelement location="${build.dir}/classes/java"/>
- </path>
-
<target name="compile-core" depends="jflex-notice, common.compile-core"/>
<target name="jflex" depends="jflex-check,clean-jflex,gen-uax29-supp-macros,
@@ -56,7 +48,7 @@
nobak="on"/>
</target>
- <target name="jflex-StandardAnalyzer" depends="init,jflex-check,gen-tlds" if="jflex.present">
+ <target name="jflex-StandardAnalyzer" depends="init,jflex-check" if="jflex.present">
<taskdef classname="jflex.anttask.JFlexTask" name="jflex">
<classpath refid="jflex.classpath"/>
</taskdef>
@@ -67,15 +59,21 @@
<jflex file="src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.jflex"
outdir="src/java/org/apache/lucene/analysis/standard"
nobak="on" />
+ <jflex file="src/java/org/apache/lucene/analysis/standard/std31/StandardTokenizerImpl31.jflex"
+ outdir="src/java/org/apache/lucene/analysis/standard/std31"
+ nobak="on" />
</target>
<target name="jflex-UAX29URLEmailTokenizer" depends="jflex-check" if="jflex.present">
<taskdef classname="jflex.anttask.JFlexTask" name="jflex">
<classpath refid="jflex.classpath"/>
</taskdef>
- <jflex file="src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.jflex"
+ <jflex file="src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.jflex"
outdir="src/java/org/apache/lucene/analysis/standard"
nobak="on" />
+ <jflex file="src/java/org/apache/lucene/analysis/standard/std31/UAX29URLEmailTokenizerImpl31.jflex"
+ outdir="src/java/org/apache/lucene/analysis/standard/std31"
+ nobak="on" />
</target>
<target name="clean-jflex">
@@ -83,7 +81,7 @@
<fileset dir="src/java/org/apache/lucene/analysis/wikipedia" includes="*.java">
<containsregexp expression="generated.*by.*JFlex"/>
</fileset>
- <fileset dir="src/java/org/apache/lucene/analysis/standard" includes="*.java">
+ <fileset dir="src/java/org/apache/lucene/analysis/standard" includes="**/*.java">
<containsregexp expression="generated.*by.*JFlex"/>
</fileset>
</delete>
Modified: lucene/dev/branches/flexscoring/modules/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/modules/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicAnalyzer.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/modules/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicAnalyzer.java (original)
+++ lucene/dev/branches/flexscoring/modules/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicAnalyzer.java Tue Aug 23 14:06:58 2011
@@ -126,10 +126,10 @@ public final class ArabicAnalyzer extend
/**
* Creates
- * {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents}
+ * {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* used to tokenize all the text in the provided {@link Reader}.
*
- * @return {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents}
+ * @return {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* built from an {@link StandardTokenizer} filtered with
* {@link LowerCaseFilter}, {@link StopFilter},
* {@link ArabicNormalizationFilter}, {@link KeywordMarkerFilter}
Modified: lucene/dev/branches/flexscoring/modules/analysis/common/src/java/org/apache/lucene/analysis/bg/BulgarianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/modules/analysis/common/src/java/org/apache/lucene/analysis/bg/BulgarianAnalyzer.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/modules/analysis/common/src/java/org/apache/lucene/analysis/bg/BulgarianAnalyzer.java (original)
+++ lucene/dev/branches/flexscoring/modules/analysis/common/src/java/org/apache/lucene/analysis/bg/BulgarianAnalyzer.java Tue Aug 23 14:06:58 2011
@@ -107,11 +107,11 @@ public final class BulgarianAnalyzer ext
/**
* Creates a
- * {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents}
+ * {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* which tokenizes all the text in the provided {@link Reader}.
*
* @return A
- * {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents}
+ * {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* built from an {@link StandardTokenizer} filtered with
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
* , {@link KeywordMarkerFilter} if a stem exclusion set is