You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ry...@apache.org on 2012/03/07 00:17:23 UTC
svn commit: r1297785 [6/16] - in
/lucene/dev/branches/lucene3795_lsp_spatial_module: ./ dev-tools/eclipse/
dev-tools/maven/ dev-tools/maven/lucene/ dev-tools/maven/lucene/contrib/demo/
dev-tools/maven/lucene/contrib/highlighter/ dev-tools/maven/lucene/...
Modified: lucene/dev/branches/lucene3795_lsp_spatial_module/lucene/core/src/test/org/apache/lucene/search/TestSort.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3795_lsp_spatial_module/lucene/core/src/test/org/apache/lucene/search/TestSort.java?rev=1297785&r1=1297784&r2=1297785&view=diff
==============================================================================
--- lucene/dev/branches/lucene3795_lsp_spatial_module/lucene/core/src/test/org/apache/lucene/search/TestSort.java (original)
+++ lucene/dev/branches/lucene3795_lsp_spatial_module/lucene/core/src/test/org/apache/lucene/search/TestSort.java Tue Mar 6 23:17:08 2012
@@ -20,27 +20,32 @@ package org.apache.lucene.search;
import java.io.IOException;
import java.util.ArrayList;
import java.util.BitSet;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Random;
+import java.util.Set;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.codecs.Codec;
+import org.apache.lucene.document.DocValuesField;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
-import org.apache.lucene.document.DocValuesField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.MultiReader;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
-import org.apache.lucene.index.DocValues;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.FieldValueHitQueue.Entry;
import org.apache.lucene.store.Directory;
@@ -48,6 +53,7 @@ import org.apache.lucene.store.LockObtai
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.DocIdBitSet;
+import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util._TestUtil;
import org.junit.BeforeClass;
@@ -693,7 +699,7 @@ public class TestSort extends LuceneTest
};
@Override
- public FieldComparator setNextReader(AtomicReaderContext context) throws IOException {
+ public FieldComparator<Integer> setNextReader(AtomicReaderContext context) throws IOException {
docValues = FieldCache.DEFAULT.getInts(context.reader(), "parser", testIntParser, false);
return this;
}
@@ -706,7 +712,7 @@ public class TestSort extends LuceneTest
static class MyFieldComparatorSource extends FieldComparatorSource {
@Override
- public FieldComparator newComparator(String fieldname, int numHits, int sortPos, boolean reversed) {
+ public FieldComparator<Integer> newComparator(String fieldname, int numHits, int sortPos, boolean reversed) {
return new MyFieldComparator(numHits);
}
}
@@ -1294,4 +1300,155 @@ public class TestSort extends LuceneTest
reader.close();
indexStore.close();
}
+
+ private static class RandomFilter extends Filter {
+ private final Random random;
+ private float density;
+ private final List<BytesRef> docValues;
+ public final List<BytesRef> matchValues = Collections.synchronizedList(new ArrayList<BytesRef>());
+
+ // density should be 0.0 ... 1.0
+ public RandomFilter(Random random, float density, List<BytesRef> docValues) {
+ this.random = random;
+ this.density = density;
+ this.docValues = docValues;
+ }
+
+ @Override
+ public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
+ final int maxDoc = context.reader().maxDoc();
+ final DocValues.Source idSource = context.reader().docValues("id").getSource();
+ assertNotNull(idSource);
+ final FixedBitSet bits = new FixedBitSet(maxDoc);
+ for(int docID=0;docID<maxDoc;docID++) {
+ if (random.nextFloat() <= density && (acceptDocs == null || acceptDocs.get(docID))) {
+ bits.set(docID);
+ //System.out.println(" acc id=" + idSource.getInt(docID) + " docID=" + docID);
+ matchValues.add(docValues.get((int) idSource.getInt(docID)));
+ }
+ }
+
+ return bits;
+ }
+ }
+
+ public void testRandomStringSort() throws Exception {
+ assumeTrue("cannot work with Lucene3x codec",
+ defaultCodecSupportsDocValues());
+
+ final int NUM_DOCS = atLeast(100);
+ final Directory dir = newDirectory();
+ final RandomIndexWriter writer = new RandomIndexWriter(random, dir);
+ final boolean allowDups = random.nextBoolean();
+ final Set<String> seen = new HashSet<String>();
+ final int maxLength = _TestUtil.nextInt(random, 5, 100);
+ if (VERBOSE) {
+ System.out.println("TEST: NUM_DOCS=" + NUM_DOCS + " maxLength=" + maxLength + " allowDups=" + allowDups);
+ }
+
+ int numDocs = 0;
+ final List<BytesRef> docValues = new ArrayList<BytesRef>();
+ // TODO: deletions
+ while (numDocs < NUM_DOCS) {
+ final String s;
+ if (random.nextBoolean()) {
+ s = _TestUtil.randomSimpleString(random, maxLength);
+ } else {
+ s = _TestUtil.randomUnicodeString(random, maxLength);
+ }
+ final BytesRef br = new BytesRef(s);
+
+ if (!allowDups) {
+ if (seen.contains(s)) {
+ continue;
+ }
+ seen.add(s);
+ }
+
+ if (VERBOSE) {
+ System.out.println(" " + numDocs + ": s=" + s);
+ }
+
+ final Document doc = new Document();
+ doc.add(new DocValuesField("stringdv", br, DocValues.Type.BYTES_VAR_SORTED));
+ doc.add(newField("string", s, StringField.TYPE_UNSTORED));
+ doc.add(new DocValuesField("id", numDocs, DocValues.Type.VAR_INTS));
+ docValues.add(br);
+ writer.addDocument(doc);
+ numDocs++;
+
+ if (random.nextInt(40) == 17) {
+ // force flush
+ writer.getReader().close();
+ }
+ }
+
+ final IndexReader r = writer.getReader();
+ writer.close();
+ if (VERBOSE) {
+ System.out.println(" reader=" + r);
+ }
+
+ final IndexSearcher s = newSearcher(r, false);
+ final int ITERS = atLeast(100);
+ for(int iter=0;iter<ITERS;iter++) {
+ final boolean reverse = random.nextBoolean();
+ final TopFieldDocs hits;
+ final SortField sf;
+ if (random.nextBoolean()) {
+ sf = new SortField("stringdv", SortField.Type.STRING, reverse);
+ sf.setUseIndexValues(true);
+ } else {
+ sf = new SortField("string", SortField.Type.STRING, reverse);
+ }
+ final Sort sort = new Sort(sf);
+ final int hitCount = _TestUtil.nextInt(random, 1, r.maxDoc() + 20);
+ final RandomFilter f = new RandomFilter(random, random.nextFloat(), docValues);
+ if (random.nextBoolean()) {
+ hits = s.search(new ConstantScoreQuery(f),
+ hitCount,
+ sort);
+ } else {
+ hits = s.search(new MatchAllDocsQuery(),
+ f,
+ hitCount,
+ sort);
+ }
+
+ if (VERBOSE) {
+ System.out.println("\nTEST: iter=" + iter + " " + hits.totalHits + " hits; topN=" + hitCount + "; reverse=" + reverse);
+ }
+
+ // Compute expected results:
+ Collections.sort(f.matchValues);
+ if (reverse) {
+ Collections.reverse(f.matchValues);
+ }
+ final List<BytesRef> expected = f.matchValues;
+ if (VERBOSE) {
+ System.out.println(" expected:");
+ for(int idx=0;idx<expected.size();idx++) {
+ System.out.println(" " + idx + ": " + expected.get(idx).utf8ToString());
+ if (idx == hitCount-1) {
+ break;
+ }
+ }
+ }
+
+ if (VERBOSE) {
+ System.out.println(" actual:");
+ for(int hitIDX=0;hitIDX<hits.scoreDocs.length;hitIDX++) {
+ final FieldDoc fd = (FieldDoc) hits.scoreDocs[hitIDX];
+ System.out.println(" " + hitIDX + ": " + ((BytesRef) fd.fields[0]).utf8ToString());
+ }
+ }
+ for(int hitIDX=0;hitIDX<hits.scoreDocs.length;hitIDX++) {
+ final FieldDoc fd = (FieldDoc) hits.scoreDocs[hitIDX];
+ assertEquals(expected.get(hitIDX), (BytesRef) fd.fields[0]);
+ }
+ }
+
+ r.close();
+ dir.close();
+ }
}
Modified: lucene/dev/branches/lucene3795_lsp_spatial_module/lucene/core/src/test/org/apache/lucene/search/TestSubScorerFreqs.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3795_lsp_spatial_module/lucene/core/src/test/org/apache/lucene/search/TestSubScorerFreqs.java?rev=1297785&r1=1297784&r2=1297785&view=diff
==============================================================================
--- lucene/dev/branches/lucene3795_lsp_spatial_module/lucene/core/src/test/org/apache/lucene/search/TestSubScorerFreqs.java (original)
+++ lucene/dev/branches/lucene3795_lsp_spatial_module/lucene/core/src/test/org/apache/lucene/search/TestSubScorerFreqs.java Tue Mar 6 23:17:08 2012
@@ -161,7 +161,7 @@ public class TestSubScorerFreqs extends
query.add(inner, Occur.MUST);
query.add(aQuery, Occur.MUST);
query.add(dQuery, Occur.MUST);
- Set<String>[] occurList = new Set[] {
+ @SuppressWarnings({"rawtypes","unchecked"}) Set<String>[] occurList = new Set[] {
Collections.singleton(Occur.MUST.toString()),
new HashSet<String>(Arrays.asList(Occur.MUST.toString(), Occur.SHOULD.toString()))
};
Modified: lucene/dev/branches/lucene3795_lsp_spatial_module/lucene/core/src/test/org/apache/lucene/search/TestTermVectors.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3795_lsp_spatial_module/lucene/core/src/test/org/apache/lucene/search/TestTermVectors.java?rev=1297785&r1=1297784&r2=1297785&view=diff
==============================================================================
--- lucene/dev/branches/lucene3795_lsp_spatial_module/lucene/core/src/test/org/apache/lucene/search/TestTermVectors.java (original)
+++ lucene/dev/branches/lucene3795_lsp_spatial_module/lucene/core/src/test/org/apache/lucene/search/TestTermVectors.java Tue Mar 6 23:17:08 2012
@@ -135,19 +135,19 @@ public class TestTermVectors extends Luc
TermsEnum termsEnum = terms.iterator(null);
assertEquals("content", termsEnum.next().utf8ToString());
dpEnum = termsEnum.docsAndPositions(null, dpEnum, false);
- assertTrue(dpEnum.nextDoc() != DocsEnum.NO_MORE_DOCS);
+ assertTrue(dpEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
assertEquals(1, dpEnum.freq());
assertEquals(expectedPositions[0], dpEnum.nextPosition());
assertEquals("here", termsEnum.next().utf8ToString());
dpEnum = termsEnum.docsAndPositions(null, dpEnum, false);
- assertTrue(dpEnum.nextDoc() != DocsEnum.NO_MORE_DOCS);
+ assertTrue(dpEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
assertEquals(1, dpEnum.freq());
assertEquals(expectedPositions[1], dpEnum.nextPosition());
assertEquals("some", termsEnum.next().utf8ToString());
dpEnum = termsEnum.docsAndPositions(null, dpEnum, false);
- assertTrue(dpEnum.nextDoc() != DocsEnum.NO_MORE_DOCS);
+ assertTrue(dpEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
assertEquals(1, dpEnum.freq());
assertEquals(expectedPositions[2], dpEnum.nextPosition());
@@ -178,7 +178,7 @@ public class TestTermVectors extends Luc
while(true) {
dpEnum = termsEnum.docsAndPositions(null, dpEnum, shouldBeOffVector);
assertNotNull(dpEnum);
- assertTrue(dpEnum.nextDoc() != DocsEnum.NO_MORE_DOCS);
+ assertTrue(dpEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
dpEnum.nextPosition();
@@ -256,14 +256,14 @@ public class TestTermVectors extends Luc
DocsEnum docs = null;
while(fields.next() != null) {
Terms terms = fields.terms();
- assertNotNull(terms);
+ assertNotNull(terms); // NOTE: kinda sketchy assumptions, but ideally we would fix fieldsenum api...
TermsEnum termsEnum = terms.iterator(null);
while (termsEnum.next() != null) {
String text = termsEnum.term().utf8ToString();
docs = _TestUtil.docs(random, termsEnum, MultiFields.getLiveDocs(knownSearcher.reader), docs, true);
- while (docs.nextDoc() != DocsEnum.NO_MORE_DOCS) {
+ while (docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
int docId = docs.docID();
int freq = docs.freq();
//System.out.println("Doc Id: " + docId + " freq " + freq);
@@ -428,7 +428,7 @@ public class TestTermVectors extends Luc
assertEquals(5, termsEnum.totalTermFreq());
DocsAndPositionsEnum dpEnum = termsEnum.docsAndPositions(null, null, false);
assertNotNull(dpEnum);
- assertTrue(dpEnum.nextDoc() != DocsEnum.NO_MORE_DOCS);
+ assertTrue(dpEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
assertEquals(5, dpEnum.freq());
for(int i=0;i<5;i++) {
assertEquals(i, dpEnum.nextPosition());
@@ -436,7 +436,7 @@ public class TestTermVectors extends Luc
dpEnum = termsEnum.docsAndPositions(null, dpEnum, true);
assertNotNull(dpEnum);
- assertTrue(dpEnum.nextDoc() != DocsEnum.NO_MORE_DOCS);
+ assertTrue(dpEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
assertEquals(5, dpEnum.freq());
for(int i=0;i<5;i++) {
dpEnum.nextPosition();
Modified: lucene/dev/branches/lucene3795_lsp_spatial_module/lucene/core/src/test/org/apache/lucene/search/spans/MultiSpansWrapper.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3795_lsp_spatial_module/lucene/core/src/test/org/apache/lucene/search/spans/MultiSpansWrapper.java?rev=1297785&r1=1297784&r2=1297785&view=diff
==============================================================================
--- lucene/dev/branches/lucene3795_lsp_spatial_module/lucene/core/src/test/org/apache/lucene/search/spans/MultiSpansWrapper.java (original)
+++ lucene/dev/branches/lucene3795_lsp_spatial_module/lucene/core/src/test/org/apache/lucene/search/spans/MultiSpansWrapper.java Tue Mar 6 23:17:08 2012
@@ -28,6 +28,7 @@ import org.apache.lucene.index.AtomicRea
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.IndexReaderContext;
import org.apache.lucene.index.Term;
+import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.ReaderUtil;
import org.apache.lucene.util.TermContext;
@@ -121,7 +122,7 @@ public class MultiSpansWrapper extends S
@Override
public int doc() {
if (current == null) {
- return DocsEnum.NO_MORE_DOCS;
+ return DocIdSetIterator.NO_MORE_DOCS;
}
return current.doc() + leaves[leafOrd].docBase;
}
@@ -129,7 +130,7 @@ public class MultiSpansWrapper extends S
@Override
public int start() {
if (current == null) {
- return DocsEnum.NO_MORE_DOCS;
+ return DocIdSetIterator.NO_MORE_DOCS;
}
return current.start();
}
@@ -137,7 +138,7 @@ public class MultiSpansWrapper extends S
@Override
public int end() {
if (current == null) {
- return DocsEnum.NO_MORE_DOCS;
+ return DocIdSetIterator.NO_MORE_DOCS;
}
return current.end();
}
Modified: lucene/dev/branches/lucene3795_lsp_spatial_module/lucene/core/src/test/org/apache/lucene/store/TestNRTCachingDirectory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3795_lsp_spatial_module/lucene/core/src/test/org/apache/lucene/store/TestNRTCachingDirectory.java?rev=1297785&r1=1297784&r2=1297785&view=diff
==============================================================================
--- lucene/dev/branches/lucene3795_lsp_spatial_module/lucene/core/src/test/org/apache/lucene/store/TestNRTCachingDirectory.java (original)
+++ lucene/dev/branches/lucene3795_lsp_spatial_module/lucene/core/src/test/org/apache/lucene/store/TestNRTCachingDirectory.java Tue Mar 6 23:17:08 2012
@@ -48,7 +48,8 @@ public class TestNRTCachingDirectory ext
NRTCachingDirectory cachedDir = new NRTCachingDirectory(dir, 2.0, 25.0);
IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random));
RandomIndexWriter w = new RandomIndexWriter(random, cachedDir, conf);
- final LineFileDocs docs = new LineFileDocs(random);
+ final LineFileDocs docs = new LineFileDocs(random,
+ defaultCodecSupportsDocValues());
final int numDocs = _TestUtil.nextInt(random, 100, 400);
if (VERBOSE) {
Modified: lucene/dev/branches/lucene3795_lsp_spatial_module/lucene/core/src/test/org/apache/lucene/util/TestAttributeSource.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3795_lsp_spatial_module/lucene/core/src/test/org/apache/lucene/util/TestAttributeSource.java?rev=1297785&r1=1297784&r2=1297785&view=diff
==============================================================================
--- lucene/dev/branches/lucene3795_lsp_spatial_module/lucene/core/src/test/org/apache/lucene/util/TestAttributeSource.java (original)
+++ lucene/dev/branches/lucene3795_lsp_spatial_module/lucene/core/src/test/org/apache/lucene/util/TestAttributeSource.java Tue Mar 6 23:17:08 2012
@@ -126,7 +126,7 @@ public class TestAttributeSource extends
src.addAttribute(TypeAttribute.class) instanceof TypeAttributeImpl);
}
- @SuppressWarnings("unchecked")
+ @SuppressWarnings({"rawtypes","unchecked"})
public void testInvalidArguments() throws Exception {
try {
AttributeSource src = new AttributeSource();
Modified: lucene/dev/branches/lucene3795_lsp_spatial_module/lucene/core/src/test/org/apache/lucene/util/TestVirtualMethod.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3795_lsp_spatial_module/lucene/core/src/test/org/apache/lucene/util/TestVirtualMethod.java?rev=1297785&r1=1297784&r2=1297785&view=diff
==============================================================================
--- lucene/dev/branches/lucene3795_lsp_spatial_module/lucene/core/src/test/org/apache/lucene/util/TestVirtualMethod.java (original)
+++ lucene/dev/branches/lucene3795_lsp_spatial_module/lucene/core/src/test/org/apache/lucene/util/TestVirtualMethod.java Tue Mar 6 23:17:08 2012
@@ -69,7 +69,7 @@ public class TestVirtualMethod extends L
assertEquals(0, VirtualMethod.compareImplementationDistance(TestClass5.class, publicTestMethod, protectedTestMethod));
}
- @SuppressWarnings("unchecked")
+ @SuppressWarnings({"rawtypes","unchecked"})
public void testExceptions() {
try {
// cast to Class to remove generics:
Modified: lucene/dev/branches/lucene3795_lsp_spatial_module/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3795_lsp_spatial_module/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java?rev=1297785&r1=1297784&r2=1297785&view=diff
==============================================================================
--- lucene/dev/branches/lucene3795_lsp_spatial_module/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java (original)
+++ lucene/dev/branches/lucene3795_lsp_spatial_module/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java Tue Mar 6 23:17:08 2012
@@ -57,8 +57,10 @@ import org.apache.lucene.util.LuceneTest
import org.apache.lucene.util.LuceneTestCase.UseNoMemoryExpensiveCodec;
import org.apache.lucene.util.UnicodeUtil;
import org.apache.lucene.util._TestUtil;
+import org.apache.lucene.util.fst.BytesRefFSTEnum.InputOutput;
import org.apache.lucene.util.fst.FST.Arc;
import org.apache.lucene.util.fst.FST.BytesReader;
+import org.apache.lucene.util.fst.PairOutputs.Pair;
@UseNoMemoryExpensiveCodec
public class TestFSTs extends LuceneTestCase {
@@ -493,7 +495,7 @@ public class TestFSTs extends LuceneTest
if (random.nextBoolean() && fst != null && !willRewrite) {
TestFSTs t = new TestFSTs();
- IOContext context = t.newIOContext(random);
+ IOContext context = LuceneTestCase.newIOContext(random);
IndexOutput out = dir.createOutput("fst.bin", context);
fst.save(out);
out.close();
@@ -983,7 +985,7 @@ public class TestFSTs extends LuceneTest
if (VERBOSE) {
System.out.println(" fstEnum.next prefix=" + inputToString(inputMode, current.input, false) + " output=" + outputs.outputToString(current.output));
}
- final CountMinOutput cmo = prefixes.get(current.input);
+ final CountMinOutput<T> cmo = prefixes.get(current.input);
assertNotNull(cmo);
assertTrue(cmo.isLeaf || cmo.isFinal);
//if (cmo.isFinal && !cmo.isLeaf) {
@@ -1094,7 +1096,7 @@ public class TestFSTs extends LuceneTest
Codec.setDefault(_TestUtil.alwaysPostingsFormat(new Lucene40PostingsFormat()));
}
- final LineFileDocs docs = new LineFileDocs(random);
+ final LineFileDocs docs = new LineFileDocs(random, defaultCodecSupportsDocValues());
final int RUN_TIME_MSEC = atLeast(500);
final IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(-1).setRAMBufferSizeMB(64);
final File tempDir = _TestUtil.getTempDir("fstlines");
@@ -1182,7 +1184,7 @@ public class TestFSTs extends LuceneTest
}
final TermsEnum.SeekStatus seekResult = termsEnum.seekCeil(randomTerm);
- final BytesRefFSTEnum.InputOutput fstSeekResult = fstEnum.seekCeil(randomTerm);
+ final InputOutput<Long> fstSeekResult = fstEnum.seekCeil(randomTerm);
if (seekResult == TermsEnum.SeekStatus.END) {
assertNull("got " + (fstSeekResult == null ? "null" : fstSeekResult.input.utf8ToString()) + " but expected null", fstSeekResult);
@@ -1223,7 +1225,7 @@ public class TestFSTs extends LuceneTest
dir.close();
}
- private void assertSame(TermsEnum termsEnum, BytesRefFSTEnum fstEnum, boolean storeOrd) throws Exception {
+ private void assertSame(TermsEnum termsEnum, BytesRefFSTEnum<?> fstEnum, boolean storeOrd) throws Exception {
if (termsEnum.term() == null) {
assertNull(fstEnum.current());
} else {
@@ -1828,7 +1830,7 @@ public class TestFSTs extends LuceneTest
public int verifyStateAndBelow(FST<Object> fst, Arc<Object> arc, int depth)
throws IOException {
- if (fst.targetHasArcs(arc)) {
+ if (FST.targetHasArcs(arc)) {
int childCount = 0;
for (arc = fst.readFirstTargetArc(arc, arc);;
arc = fst.readNextArc(arc), childCount++)
@@ -1975,6 +1977,12 @@ public class TestFSTs extends LuceneTest
assertFalse(arc.isFinal());
assertEquals(42, arc.output.longValue());
}
+
+ static final Comparator<Long> minLongComparator = new Comparator<Long> () {
+ public int compare(Long left, Long right) {
+ return left.compareTo(right);
+ }
+ };
public void testShortestPaths() throws Exception {
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
@@ -1989,19 +1997,65 @@ public class TestFSTs extends LuceneTest
//Util.toDot(fst, w, false, false);
//w.close();
- Util.MinResult[] r = Util.shortestPaths(fst,
+ Util.MinResult<Long>[] r = Util.shortestPaths(fst,
fst.getFirstArc(new FST.Arc<Long>()),
+ minLongComparator,
3);
assertEquals(3, r.length);
assertEquals(Util.toIntsRef(new BytesRef("aac"), scratch), r[0].input);
- assertEquals(7, r[0].output);
+ assertEquals(7L, r[0].output.longValue());
assertEquals(Util.toIntsRef(new BytesRef("ax"), scratch), r[1].input);
- assertEquals(17, r[1].output);
+ assertEquals(17L, r[1].output.longValue());
assertEquals(Util.toIntsRef(new BytesRef("aab"), scratch), r[2].input);
- assertEquals(22, r[2].output);
+ assertEquals(22L, r[2].output.longValue());
+ }
+
+ // compares just the weight side of the pair
+ static final Comparator<Pair<Long,Long>> minPairWeightComparator = new Comparator<Pair<Long,Long>> () {
+ public int compare(Pair<Long,Long> left, Pair<Long,Long> right) {
+ return left.output1.compareTo(right.output1);
+ }
+ };
+
+ /** like testShortestPaths, but uses pairoutputs so we have both a weight and an output */
+ public void testShortestPathsWFST() throws Exception {
+
+ PairOutputs<Long,Long> outputs = new PairOutputs<Long,Long>(
+ PositiveIntOutputs.getSingleton(true), // weight
+ PositiveIntOutputs.getSingleton(true) // output
+ );
+
+ final Builder<Pair<Long,Long>> builder = new Builder<Pair<Long,Long>>(FST.INPUT_TYPE.BYTE1, outputs);
+
+ final IntsRef scratch = new IntsRef();
+ builder.add(Util.toIntsRef(new BytesRef("aab"), scratch), outputs.newPair(22L, 57L));
+ builder.add(Util.toIntsRef(new BytesRef("aac"), scratch), outputs.newPair(7L, 36L));
+ builder.add(Util.toIntsRef(new BytesRef("ax"), scratch), outputs.newPair(17L, 85L));
+ final FST<Pair<Long,Long>> fst = builder.finish();
+ //Writer w = new OutputStreamWriter(new FileOutputStream("out.dot"));
+ //Util.toDot(fst, w, false, false);
+ //w.close();
+
+ Util.MinResult<Pair<Long,Long>>[] r = Util.shortestPaths(fst,
+ fst.getFirstArc(new FST.Arc<Pair<Long,Long>>()),
+ minPairWeightComparator,
+ 3);
+ assertEquals(3, r.length);
+
+ assertEquals(Util.toIntsRef(new BytesRef("aac"), scratch), r[0].input);
+ assertEquals(7L, r[0].output.output1.longValue()); // weight
+ assertEquals(36L, r[0].output.output2.longValue()); // output
+
+ assertEquals(Util.toIntsRef(new BytesRef("ax"), scratch), r[1].input);
+ assertEquals(17L, r[1].output.output1.longValue()); // weight
+ assertEquals(85L, r[1].output.output2.longValue()); // output
+
+ assertEquals(Util.toIntsRef(new BytesRef("aab"), scratch), r[2].input);
+ assertEquals(22L, r[2].output.output1.longValue()); // weight
+ assertEquals(57L, r[2].output.output2.longValue()); // output
}
public void testShortestPathsRandom() throws Exception {
@@ -2059,17 +2113,121 @@ public class TestFSTs extends LuceneTest
final int topN = _TestUtil.nextInt(random, 1, 10);
- Util.MinResult[] r = Util.shortestPaths(fst, arc, topN);
+ Util.MinResult<Long>[] r = Util.shortestPaths(fst, arc, minLongComparator, topN);
// 2. go thru whole treemap (slowCompletor) and check its actually the best suggestion
- final List<Util.MinResult> matches = new ArrayList<Util.MinResult>();
+ final List<Util.MinResult<Long>> matches = new ArrayList<Util.MinResult<Long>>();
// TODO: could be faster... but its slowCompletor for a reason
for (Map.Entry<String,Long> e : slowCompletor.entrySet()) {
if (e.getKey().startsWith(prefix)) {
//System.out.println(" consider " + e.getKey());
- matches.add(new Util.MinResult(Util.toIntsRef(new BytesRef(e.getKey().substring(prefix.length())), new IntsRef()),
- e.getValue() - prefixOutput));
+ matches.add(new Util.MinResult<Long>(Util.toIntsRef(new BytesRef(e.getKey().substring(prefix.length())), new IntsRef()),
+ e.getValue() - prefixOutput, minLongComparator));
+ }
+ }
+
+ assertTrue(matches.size() > 0);
+ Collections.sort(matches);
+ if (matches.size() > topN) {
+ matches.subList(topN, matches.size()).clear();
+ }
+
+ assertEquals(matches.size(), r.length);
+
+ for(int hit=0;hit<r.length;hit++) {
+ //System.out.println(" check hit " + hit);
+ assertEquals(matches.get(hit).input, r[hit].input);
+ assertEquals(matches.get(hit).output, r[hit].output);
+ }
+ }
+ }
+
+ // used by slowcompletor
+ class TwoLongs {
+ long a;
+ long b;
+
+ TwoLongs(long a, long b) {
+ this.a = a;
+ this.b = b;
+ }
+ }
+
+ /** like testShortestPathsRandom, but uses pairoutputs so we have both a weight and an output */
+ public void testShortestPathsWFSTRandom() throws Exception {
+ int numWords = atLeast(1000);
+
+ final TreeMap<String,TwoLongs> slowCompletor = new TreeMap<String,TwoLongs>();
+ final TreeSet<String> allPrefixes = new TreeSet<String>();
+
+ PairOutputs<Long,Long> outputs = new PairOutputs<Long,Long>(
+ PositiveIntOutputs.getSingleton(true), // weight
+ PositiveIntOutputs.getSingleton(true) // output
+ );
+ final Builder<Pair<Long,Long>> builder = new Builder<Pair<Long,Long>>(FST.INPUT_TYPE.BYTE1, outputs);
+ final IntsRef scratch = new IntsRef();
+
+ for (int i = 0; i < numWords; i++) {
+ String s;
+ while (true) {
+ s = _TestUtil.randomSimpleString(random);
+ if (!slowCompletor.containsKey(s)) {
+ break;
+ }
+ }
+
+ for (int j = 1; j < s.length(); j++) {
+ allPrefixes.add(s.substring(0, j));
+ }
+ int weight = _TestUtil.nextInt(random, 1, 100); // weights 1..100
+ int output = _TestUtil.nextInt(random, 0, 500); // outputs 0..500
+ slowCompletor.put(s, new TwoLongs(weight, output));
+ }
+
+ for (Map.Entry<String,TwoLongs> e : slowCompletor.entrySet()) {
+ //System.out.println("add: " + e);
+ long weight = e.getValue().a;
+ long output = e.getValue().b;
+ builder.add(Util.toIntsRef(new BytesRef(e.getKey()), scratch), outputs.newPair(weight, output));
+ }
+
+ final FST<Pair<Long,Long>> fst = builder.finish();
+ //System.out.println("SAVE out.dot");
+ //Writer w = new OutputStreamWriter(new FileOutputStream("out.dot"));
+ //Util.toDot(fst, w, false, false);
+ //w.close();
+
+ BytesReader reader = fst.getBytesReader(0);
+
+ //System.out.println("testing: " + allPrefixes.size() + " prefixes");
+ for (String prefix : allPrefixes) {
+ // 1. run prefix against fst, then complete by value
+ //System.out.println("TEST: " + prefix);
+
+ Pair<Long,Long> prefixOutput = outputs.getNoOutput();
+ FST.Arc<Pair<Long,Long>> arc = fst.getFirstArc(new FST.Arc<Pair<Long,Long>>());
+ for(int idx=0;idx<prefix.length();idx++) {
+ if (fst.findTargetArc((int) prefix.charAt(idx), arc, arc, reader) == null) {
+ fail();
+ }
+ prefixOutput = outputs.add(prefixOutput, arc.output);
+ }
+
+ final int topN = _TestUtil.nextInt(random, 1, 10);
+
+ Util.MinResult<Pair<Long,Long>>[] r = Util.shortestPaths(fst, arc, minPairWeightComparator, topN);
+
+ // 2. go thru whole treemap (slowCompletor) and check its actually the best suggestion
+ final List<Util.MinResult<Pair<Long,Long>>> matches = new ArrayList<Util.MinResult<Pair<Long,Long>>>();
+
+ // TODO: could be faster... but its slowCompletor for a reason
+ for (Map.Entry<String,TwoLongs> e : slowCompletor.entrySet()) {
+ if (e.getKey().startsWith(prefix)) {
+ //System.out.println(" consider " + e.getKey());
+ matches.add(new Util.MinResult<Pair<Long,Long>>(Util.toIntsRef(new BytesRef(e.getKey().substring(prefix.length())), new IntsRef()),
+ outputs.newPair(e.getValue().a - prefixOutput.output1, e.getValue().b - prefixOutput.output2),
+ minPairWeightComparator));
}
}
Modified: lucene/dev/branches/lucene3795_lsp_spatial_module/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3795_lsp_spatial_module/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java?rev=1297785&r1=1297784&r2=1297785&view=diff
==============================================================================
--- lucene/dev/branches/lucene3795_lsp_spatial_module/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java (original)
+++ lucene/dev/branches/lucene3795_lsp_spatial_module/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java Tue Mar 6 23:17:08 2012
@@ -17,13 +17,18 @@ package org.apache.lucene.analysis;
* limitations under the License.
*/
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.OutputStreamWriter;
+import java.io.PrintWriter;
import java.io.Reader;
import java.io.StringReader;
-import java.io.IOException;
+import java.io.StringWriter;
+import java.io.Writer;
import java.util.ArrayList;
import java.util.List;
import java.util.Random;
-
+
import org.apache.lucene.analysis.tokenattributes.*;
import org.apache.lucene.util.Attribute;
import org.apache.lucene.util.AttributeImpl;
@@ -83,7 +88,7 @@ public abstract class BaseTokenStreamTes
}
}
- public static void assertTokenStreamContents(TokenStream ts, String[] output, int startOffsets[], int endOffsets[], String types[], int posIncrements[], Integer finalOffset) throws IOException {
+ public static void assertTokenStreamContents(TokenStream ts, String[] output, int startOffsets[], int endOffsets[], String types[], int posIncrements[], int posLengths[], Integer finalOffset) throws IOException {
assertNotNull(output);
CheckClearAttributesAttribute checkClearAtt = ts.addAttribute(CheckClearAttributesAttribute.class);
@@ -107,6 +112,12 @@ public abstract class BaseTokenStreamTes
assertTrue("has no PositionIncrementAttribute", ts.hasAttribute(PositionIncrementAttribute.class));
posIncrAtt = ts.getAttribute(PositionIncrementAttribute.class);
}
+
+ PositionLengthAttribute posLengthAtt = null;
+ if (posLengths != null) {
+ assertTrue("has no PositionLengthAttribute", ts.hasAttribute(PositionLengthAttribute.class));
+ posLengthAtt = ts.getAttribute(PositionLengthAttribute.class);
+ }
ts.reset();
for (int i = 0; i < output.length; i++) {
@@ -116,6 +127,7 @@ public abstract class BaseTokenStreamTes
if (offsetAtt != null) offsetAtt.setOffset(14584724,24683243);
if (typeAtt != null) typeAtt.setType("bogusType");
if (posIncrAtt != null) posIncrAtt.setPositionIncrement(45987657);
+ if (posLengthAtt != null) posLengthAtt.setPositionLength(45987653);
checkClearAtt.getAndResetClearCalled(); // reset it, because we called clearAttribute() before
assertTrue("token "+i+" does not exist", ts.incrementToken());
@@ -130,6 +142,8 @@ public abstract class BaseTokenStreamTes
assertEquals("type "+i, types[i], typeAtt.type());
if (posIncrements != null)
assertEquals("posIncrement "+i, posIncrements[i], posIncrAtt.getPositionIncrement());
+ if (posLengths != null)
+ assertEquals("posLength "+i, posLengths[i], posLengthAtt.getPositionLength());
// we can enforce some basic things about a few attributes even if the caller doesn't check:
if (offsetAtt != null) {
@@ -138,14 +152,18 @@ public abstract class BaseTokenStreamTes
assertTrue("endOffset must be >= startOffset", offsetAtt.endOffset() >= offsetAtt.startOffset());
if (finalOffset != null) {
assertTrue("startOffset must be <= finalOffset", offsetAtt.startOffset() <= finalOffset.intValue());
- assertTrue("endOffset must be <= finalOffset", offsetAtt.endOffset() <= finalOffset.intValue());
+ assertTrue("endOffset must be <= finalOffset: got endOffset=" + offsetAtt.endOffset() + " vs finalOffset=" + finalOffset.intValue(),
+ offsetAtt.endOffset() <= finalOffset.intValue());
}
}
if (posIncrAtt != null) {
assertTrue("posIncrement must be >= 0", posIncrAtt.getPositionIncrement() >= 0);
}
+ if (posLengthAtt != null) {
+ assertTrue("posLength must be >= 1", posLengthAtt.getPositionLength() >= 1);
+ }
}
- assertFalse("end of stream", ts.incrementToken());
+ assertFalse("TokenStream has more tokens than expected", ts.incrementToken());
ts.end();
if (finalOffset != null)
assertEquals("finalOffset ", finalOffset.intValue(), offsetAtt.endOffset());
@@ -155,65 +173,81 @@ public abstract class BaseTokenStreamTes
ts.close();
}
+ public static void assertTokenStreamContents(TokenStream ts, String[] output, int startOffsets[], int endOffsets[], String types[], int posIncrements[], Integer finalOffset) throws IOException {
+ assertTokenStreamContents(ts, output, startOffsets, endOffsets, types, posIncrements, null, finalOffset);
+ }
+
public static void assertTokenStreamContents(TokenStream ts, String[] output, int startOffsets[], int endOffsets[], String types[], int posIncrements[]) throws IOException {
- assertTokenStreamContents(ts, output, startOffsets, endOffsets, types, posIncrements, null);
+ assertTokenStreamContents(ts, output, startOffsets, endOffsets, types, posIncrements, null, null);
}
public static void assertTokenStreamContents(TokenStream ts, String[] output) throws IOException {
- assertTokenStreamContents(ts, output, null, null, null, null, null);
+ assertTokenStreamContents(ts, output, null, null, null, null, null, null);
}
public static void assertTokenStreamContents(TokenStream ts, String[] output, String[] types) throws IOException {
- assertTokenStreamContents(ts, output, null, null, types, null, null);
+ assertTokenStreamContents(ts, output, null, null, types, null, null, null);
}
public static void assertTokenStreamContents(TokenStream ts, String[] output, int[] posIncrements) throws IOException {
- assertTokenStreamContents(ts, output, null, null, null, posIncrements, null);
+ assertTokenStreamContents(ts, output, null, null, null, posIncrements, null, null);
}
public static void assertTokenStreamContents(TokenStream ts, String[] output, int startOffsets[], int endOffsets[]) throws IOException {
- assertTokenStreamContents(ts, output, startOffsets, endOffsets, null, null, null);
+ assertTokenStreamContents(ts, output, startOffsets, endOffsets, null, null, null, null);
}
public static void assertTokenStreamContents(TokenStream ts, String[] output, int startOffsets[], int endOffsets[], Integer finalOffset) throws IOException {
- assertTokenStreamContents(ts, output, startOffsets, endOffsets, null, null, finalOffset);
+ assertTokenStreamContents(ts, output, startOffsets, endOffsets, null, null, null, finalOffset);
}
public static void assertTokenStreamContents(TokenStream ts, String[] output, int startOffsets[], int endOffsets[], int[] posIncrements) throws IOException {
- assertTokenStreamContents(ts, output, startOffsets, endOffsets, null, posIncrements, null);
+ assertTokenStreamContents(ts, output, startOffsets, endOffsets, null, posIncrements, null, null);
}
public static void assertTokenStreamContents(TokenStream ts, String[] output, int startOffsets[], int endOffsets[], int[] posIncrements, Integer finalOffset) throws IOException {
- assertTokenStreamContents(ts, output, startOffsets, endOffsets, null, posIncrements, finalOffset);
+ assertTokenStreamContents(ts, output, startOffsets, endOffsets, null, posIncrements, null, finalOffset);
+ }
+
+ public static void assertTokenStreamContents(TokenStream ts, String[] output, int startOffsets[], int endOffsets[], int[] posIncrements, int[] posLengths, Integer finalOffset) throws IOException {
+ assertTokenStreamContents(ts, output, startOffsets, endOffsets, null, posIncrements, posLengths, finalOffset);
}
public static void assertAnalyzesTo(Analyzer a, String input, String[] output, int startOffsets[], int endOffsets[], String types[], int posIncrements[]) throws IOException {
- assertTokenStreamContents(a.tokenStream("dummy", new StringReader(input)), output, startOffsets, endOffsets, types, posIncrements, input.length());
+ assertTokenStreamContents(a.tokenStream("dummy", new StringReader(input)), output, startOffsets, endOffsets, types, posIncrements, null, input.length());
+ }
+
+ public static void assertAnalyzesTo(Analyzer a, String input, String[] output, int startOffsets[], int endOffsets[], String types[], int posIncrements[], int posLengths[]) throws IOException {
+ assertTokenStreamContents(a.tokenStream("dummy", new StringReader(input)), output, startOffsets, endOffsets, types, posIncrements, posLengths, input.length());
}
public static void assertAnalyzesTo(Analyzer a, String input, String[] output) throws IOException {
- assertAnalyzesTo(a, input, output, null, null, null, null);
+ assertAnalyzesTo(a, input, output, null, null, null, null, null);
}
public static void assertAnalyzesTo(Analyzer a, String input, String[] output, String[] types) throws IOException {
- assertAnalyzesTo(a, input, output, null, null, types, null);
+ assertAnalyzesTo(a, input, output, null, null, types, null, null);
}
public static void assertAnalyzesTo(Analyzer a, String input, String[] output, int[] posIncrements) throws IOException {
- assertAnalyzesTo(a, input, output, null, null, null, posIncrements);
+ assertAnalyzesTo(a, input, output, null, null, null, posIncrements, null);
+ }
+
+ public static void assertAnalyzesToPositions(Analyzer a, String input, String[] output, int[] posIncrements, int[] posLengths) throws IOException {
+ assertAnalyzesTo(a, input, output, null, null, null, posIncrements, posLengths);
}
public static void assertAnalyzesTo(Analyzer a, String input, String[] output, int startOffsets[], int endOffsets[]) throws IOException {
- assertAnalyzesTo(a, input, output, startOffsets, endOffsets, null, null);
+ assertAnalyzesTo(a, input, output, startOffsets, endOffsets, null, null, null);
}
public static void assertAnalyzesTo(Analyzer a, String input, String[] output, int startOffsets[], int endOffsets[], int[] posIncrements) throws IOException {
- assertAnalyzesTo(a, input, output, startOffsets, endOffsets, null, posIncrements);
+ assertAnalyzesTo(a, input, output, startOffsets, endOffsets, null, posIncrements, null);
}
public static void assertAnalyzesToReuse(Analyzer a, String input, String[] output, int startOffsets[], int endOffsets[], String types[], int posIncrements[]) throws IOException {
- assertTokenStreamContents(a.tokenStream("dummy", new StringReader(input)), output, startOffsets, endOffsets, types, posIncrements, input.length());
+ assertTokenStreamContents(a.tokenStream("dummy", new StringReader(input)), output, startOffsets, endOffsets, types, posIncrements, null, input.length());
}
public static void assertAnalyzesToReuse(Analyzer a, String input, String[] output) throws IOException {
@@ -246,15 +280,22 @@ public abstract class BaseTokenStreamTes
assertAnalyzesToReuse(a, input, new String[]{expected});
}
- // simple utility method for blasting tokenstreams with data to make sure they don't do anything crazy
- // TODO: add a MockCharStream, and use it here too, to ensure that correctOffset etc is being done by tokenizers.
+ /** utility method for blasting tokenstreams with data to make sure they don't do anything crazy */
public static void checkRandomData(Random random, Analyzer a, int iterations) throws IOException {
- checkRandomData(random, a, iterations, 20);
+ checkRandomData(random, a, iterations, false);
+ }
+
+ /**
+ * utility method for blasting tokenstreams with data to make sure they don't do anything crazy
+ * @param simple true if only ascii strings will be used (try to avoid)
+ */
+ public static void checkRandomData(Random random, Analyzer a, int iterations, boolean simple) throws IOException {
+ checkRandomData(random, a, iterations, 20, simple);
// now test with multiple threads
int numThreads = _TestUtil.nextInt(random, 4, 8);
Thread threads[] = new Thread[numThreads];
for (int i = 0; i < threads.length; i++) {
- threads[i] = new AnalysisThread(new Random(random.nextLong()), a, iterations);
+ threads[i] = new AnalysisThread(new Random(random.nextLong()), a, iterations, simple);
}
for (int i = 0; i < threads.length; i++) {
threads[i].start();
@@ -272,11 +313,13 @@ public abstract class BaseTokenStreamTes
final int iterations;
final Random random;
final Analyzer a;
+ final boolean simple;
- AnalysisThread(Random random, Analyzer a, int iterations) {
+ AnalysisThread(Random random, Analyzer a, int iterations, boolean simple) {
this.random = random;
this.a = a;
this.iterations = iterations;
+ this.simple = simple;
}
@Override
@@ -284,36 +327,40 @@ public abstract class BaseTokenStreamTes
try {
// see the part in checkRandomData where it replays the same text again
// to verify reproducability/reuse: hopefully this would catch thread hazards.
- checkRandomData(random, a, iterations, 20);
+ checkRandomData(random, a, iterations, 20, simple);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
};
- public static void checkRandomData(Random random, Analyzer a, int iterations, int maxWordLength) throws IOException {
- checkRandomData(random, a, iterations, maxWordLength, random.nextBoolean());
+ public static void checkRandomData(Random random, Analyzer a, int iterations, int maxWordLength, boolean simple) throws IOException {
+ checkRandomData(random, a, iterations, maxWordLength, random.nextBoolean(), simple);
}
- public static void checkRandomData(Random random, Analyzer a, int iterations, int maxWordLength, boolean useCharFilter) throws IOException {
+ public static void checkRandomData(Random random, Analyzer a, int iterations, int maxWordLength, boolean useCharFilter, boolean simple) throws IOException {
for (int i = 0; i < iterations; i++) {
String text;
- switch(_TestUtil.nextInt(random, 0, 4)) {
- case 0:
- text = _TestUtil.randomSimpleString(random);
- break;
- case 1:
- text = _TestUtil.randomRealisticUnicodeString(random, maxWordLength);
- break;
- case 2:
- text = _TestUtil.randomHtmlishString(random, maxWordLength);
- break;
- default:
- text = _TestUtil.randomUnicodeString(random, maxWordLength);
+ if (simple) {
+ text = random.nextBoolean() ? _TestUtil.randomSimpleString(random) : _TestUtil.randomHtmlishString(random, maxWordLength);
+ } else {
+ switch(_TestUtil.nextInt(random, 0, 4)) {
+ case 0:
+ text = _TestUtil.randomSimpleString(random);
+ break;
+ case 1:
+ text = _TestUtil.randomRealisticUnicodeString(random, maxWordLength);
+ break;
+ case 2:
+ text = _TestUtil.randomHtmlishString(random, maxWordLength);
+ break;
+ default:
+ text = _TestUtil.randomUnicodeString(random, maxWordLength);
+ }
}
if (VERBOSE) {
- System.out.println("NOTE: BaseTokenStreamTestCase: get first token stream now text=" + text);
+ System.out.println(Thread.currentThread().getName() + ": NOTE: BaseTokenStreamTestCase: get first token stream now text=" + text);
}
int remainder = random.nextInt(10);
@@ -323,10 +370,12 @@ public abstract class BaseTokenStreamTes
CharTermAttribute termAtt = ts.getAttribute(CharTermAttribute.class);
OffsetAttribute offsetAtt = ts.hasAttribute(OffsetAttribute.class) ? ts.getAttribute(OffsetAttribute.class) : null;
PositionIncrementAttribute posIncAtt = ts.hasAttribute(PositionIncrementAttribute.class) ? ts.getAttribute(PositionIncrementAttribute.class) : null;
+ PositionLengthAttribute posLengthAtt = ts.hasAttribute(PositionLengthAttribute.class) ? ts.getAttribute(PositionLengthAttribute.class) : null;
TypeAttribute typeAtt = ts.hasAttribute(TypeAttribute.class) ? ts.getAttribute(TypeAttribute.class) : null;
List<String> tokens = new ArrayList<String>();
List<String> types = new ArrayList<String>();
List<Integer> positions = new ArrayList<Integer>();
+ List<Integer> positionLengths = new ArrayList<Integer>();
List<Integer> startOffsets = new ArrayList<Integer>();
List<Integer> endOffsets = new ArrayList<Integer>();
ts.reset();
@@ -334,6 +383,7 @@ public abstract class BaseTokenStreamTes
tokens.add(termAtt.toString());
if (typeAtt != null) types.add(typeAtt.type());
if (posIncAtt != null) positions.add(posIncAtt.getPositionIncrement());
+ if (posLengthAtt != null) positionLengths.add(posLengthAtt.getPositionLength());
if (offsetAtt != null) {
startOffsets.add(offsetAtt.startOffset());
endOffsets.add(offsetAtt.endOffset());
@@ -344,11 +394,21 @@ public abstract class BaseTokenStreamTes
// verify reusing is "reproducable" and also get the normal tokenstream sanity checks
if (!tokens.isEmpty()) {
if (VERBOSE) {
- System.out.println("NOTE: BaseTokenStreamTestCase: re-run analysis");
+ System.out.println(Thread.currentThread().getName() + ": NOTE: BaseTokenStreamTestCase: re-run analysis; " + tokens.size() + " tokens");
}
reader = new StringReader(text);
ts = a.tokenStream("dummy", useCharFilter ? new MockCharFilter(reader, remainder) : reader);
- if (typeAtt != null && posIncAtt != null && offsetAtt != null) {
+ if (typeAtt != null && posIncAtt != null && posLengthAtt != null && offsetAtt != null) {
+ // offset + pos + posLength + type
+ assertTokenStreamContents(ts,
+ tokens.toArray(new String[tokens.size()]),
+ toIntArray(startOffsets),
+ toIntArray(endOffsets),
+ types.toArray(new String[types.size()]),
+ toIntArray(positions),
+ toIntArray(positionLengths),
+ text.length());
+ } else if (typeAtt != null && posIncAtt != null && offsetAtt != null) {
// offset + pos + type
assertTokenStreamContents(ts,
tokens.toArray(new String[tokens.size()]),
@@ -356,7 +416,18 @@ public abstract class BaseTokenStreamTes
toIntArray(endOffsets),
types.toArray(new String[types.size()]),
toIntArray(positions),
+ null,
text.length());
+ } else if (posIncAtt != null && posLengthAtt != null && offsetAtt != null) {
+ // offset + pos + posLength
+ assertTokenStreamContents(ts,
+ tokens.toArray(new String[tokens.size()]),
+ toIntArray(startOffsets),
+ toIntArray(endOffsets),
+ null,
+ toIntArray(positions),
+ toIntArray(positionLengths),
+ text.length());
} else if (posIncAtt != null && offsetAtt != null) {
// offset + pos
assertTokenStreamContents(ts,
@@ -365,6 +436,7 @@ public abstract class BaseTokenStreamTes
toIntArray(endOffsets),
null,
toIntArray(positions),
+ null,
text.length());
} else if (offsetAtt != null) {
// offset
@@ -374,6 +446,7 @@ public abstract class BaseTokenStreamTes
toIntArray(endOffsets),
null,
null,
+ null,
text.length());
} else {
// terms only
@@ -383,6 +456,22 @@ public abstract class BaseTokenStreamTes
}
}
}
+
+ protected String toDot(Analyzer a, String inputText) throws IOException {
+ final StringWriter sw = new StringWriter();
+ final TokenStream ts = a.tokenStream("field", new StringReader(inputText));
+ ts.reset();
+ new TokenStreamToDot(inputText, ts, new PrintWriter(sw)).toDot();
+ return sw.toString();
+ }
+
+ protected void toDotFile(Analyzer a, String inputText, String localFileName) throws IOException {
+ Writer w = new OutputStreamWriter(new FileOutputStream(localFileName), "UTF-8");
+ final TokenStream ts = a.tokenStream("field", new StringReader(inputText));
+ ts.reset();
+ new TokenStreamToDot(inputText, ts, new PrintWriter(w)).toDot();
+ w.close();
+ }
static int[] toIntArray(List<Integer> list) {
int ret[] = new int[list.size()];
Modified: lucene/dev/branches/lucene3795_lsp_spatial_module/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene3x/TermInfosWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3795_lsp_spatial_module/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene3x/TermInfosWriter.java?rev=1297785&r1=1297784&r2=1297785&view=diff
==============================================================================
--- lucene/dev/branches/lucene3795_lsp_spatial_module/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene3x/TermInfosWriter.java (original)
+++ lucene/dev/branches/lucene3795_lsp_spatial_module/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene3x/TermInfosWriter.java Tue Mar 6 23:17:08 2012
@@ -210,9 +210,9 @@ final class TermInfosWriter implements C
assert ti.freqPointer >= lastTi.freqPointer: "freqPointer out of order (" + ti.freqPointer + " < " + lastTi.freqPointer + ")";
assert ti.proxPointer >= lastTi.proxPointer: "proxPointer out of order (" + ti.proxPointer + " < " + lastTi.proxPointer + ")";
- if (!isIndex && size % indexInterval == 0)
+ if (!isIndex && size % indexInterval == 0) {
other.add(lastFieldNumber, lastTerm, lastTi); // add an index term
-
+ }
writeTerm(fieldNumber, term); // write term
output.writeVInt(ti.docFreq); // write doc freq
Modified: lucene/dev/branches/lucene3795_lsp_spatial_module/lucene/test-framework/src/java/org/apache/lucene/index/ThreadedIndexingAndSearchingTestCase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3795_lsp_spatial_module/lucene/test-framework/src/java/org/apache/lucene/index/ThreadedIndexingAndSearchingTestCase.java?rev=1297785&r1=1297784&r2=1297785&view=diff
==============================================================================
--- lucene/dev/branches/lucene3795_lsp_spatial_module/lucene/test-framework/src/java/org/apache/lucene/index/ThreadedIndexingAndSearchingTestCase.java (original)
+++ lucene/dev/branches/lucene3795_lsp_spatial_module/lucene/test-framework/src/java/org/apache/lucene/index/ThreadedIndexingAndSearchingTestCase.java Tue Mar 6 23:17:08 2012
@@ -356,37 +356,29 @@ public abstract class ThreadedIndexingAn
shift = 0;
trigger = 1;
} else {
- trigger = totTermCount.get()/10;
+ trigger = totTermCount.get()/30;
shift = random.nextInt(trigger);
}
- BytesRef term = termsEnum.next();
- if (term == null) {
- if (seenTermCount == 0) {
+ while (true) {
+ BytesRef term = termsEnum.next();
+ if (term == null) {
+ if (seenTermCount == 0) {
+ break;
+ }
+ totTermCount.set(seenTermCount);
break;
}
- totTermCount.set(seenTermCount);
- seenTermCount = 0;
- if (totTermCount.get() < 10) {
- shift = 0;
+ seenTermCount++;
+ // search 30 terms
+ if (trigger == 0) {
trigger = 1;
- } else {
- trigger = totTermCount.get()/10;
- //System.out.println("trigger " + trigger);
- shift = random.nextInt(trigger);
}
- termsEnum.seekCeil(new BytesRef(""));
- continue;
- }
- seenTermCount++;
- // search 10 terms
- if (trigger == 0) {
- trigger = 1;
- }
- if ((seenTermCount + shift) % trigger == 0) {
- //if (VERBOSE) {
- //System.out.println(Thread.currentThread().getName() + " now search body:" + term.utf8ToString());
- //}
- totHits.addAndGet(runQuery(s, new TermQuery(new Term("body", term))));
+ if ((seenTermCount + shift) % trigger == 0) {
+ //if (VERBOSE) {
+ //System.out.println(Thread.currentThread().getName() + " now search body:" + term.utf8ToString());
+ //}
+ totHits.addAndGet(runQuery(s, new TermQuery(new Term("body", term))));
+ }
}
//if (VERBOSE) {
//System.out.println(Thread.currentThread().getName() + ": search done");
@@ -432,7 +424,7 @@ public abstract class ThreadedIndexingAn
final long t0 = System.currentTimeMillis();
- final LineFileDocs docs = new LineFileDocs(random);
+ final LineFileDocs docs = new LineFileDocs(random, defaultCodecSupportsDocValues());
final File tempDir = _TestUtil.getTempDir(testName);
dir = newFSDirectory(tempDir);
((MockDirectoryWrapper) dir).setCheckIndexOnClose(false); // don't double-checkIndex, we do it ourselves.
@@ -636,7 +628,14 @@ public abstract class ThreadedIndexingAn
private int runQuery(IndexSearcher s, Query q) throws Exception {
s.search(q, 10);
- return s.search(q, null, 10, new Sort(new SortField("title", SortField.Type.STRING))).totalHits;
+ int hitCount = s.search(q, null, 10, new Sort(new SortField("title", SortField.Type.STRING))).totalHits;
+ if (defaultCodecSupportsDocValues()) {
+ final Sort dvSort = new Sort(new SortField("title", SortField.Type.STRING));
+ dvSort.getSort()[0].setUseIndexValues(true);
+ int hitCount2 = s.search(q, null, 10, dvSort).totalHits;
+ assertEquals(hitCount, hitCount2);
+ }
+ return hitCount;
}
protected void smokeTestSearcher(IndexSearcher s) throws Exception {
Modified: lucene/dev/branches/lucene3795_lsp_spatial_module/lucene/test-framework/src/java/org/apache/lucene/search/CheckHits.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3795_lsp_spatial_module/lucene/test-framework/src/java/org/apache/lucene/search/CheckHits.java?rev=1297785&r1=1297784&r2=1297785&view=diff
==============================================================================
--- lucene/dev/branches/lucene3795_lsp_spatial_module/lucene/test-framework/src/java/org/apache/lucene/search/CheckHits.java (original)
+++ lucene/dev/branches/lucene3795_lsp_spatial_module/lucene/test-framework/src/java/org/apache/lucene/search/CheckHits.java Tue Mar 6 23:17:08 2012
@@ -88,7 +88,6 @@ public class CheckHits {
* @param searcher the searcher to test the query against
* @param defaultFieldName used for displaying the query in assertion messages
* @param results a list of documentIds that must match the query
- * see Searcher#search(Query,Collector)
* @see #checkHits
*/
public static void checkHitCollector(Random random, Query query, String defaultFieldName,
@@ -116,7 +115,6 @@ public class CheckHits {
Assert.assertEquals("Wrap Reader " + i + ": " +
query.toString(defaultFieldName),
correct, actual);
- QueryUtils.purgeFieldCache(s.getIndexReader()); // our wrapping can create insanity otherwise
}
}
@@ -153,7 +151,6 @@ public class CheckHits {
* @param searcher the searcher to test the query against
* @param defaultFieldName used for displaing the query in assertion messages
* @param results a list of documentIds that must match the query
- * see Searcher#search(Query, int)
* @see #checkHitCollector
*/
public static void checkHits(
Modified: lucene/dev/branches/lucene3795_lsp_spatial_module/lucene/test-framework/src/java/org/apache/lucene/search/QueryUtils.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3795_lsp_spatial_module/lucene/test-framework/src/java/org/apache/lucene/search/QueryUtils.java?rev=1297785&r1=1297784&r2=1297785&view=diff
==============================================================================
--- lucene/dev/branches/lucene3795_lsp_spatial_module/lucene/test-framework/src/java/org/apache/lucene/search/QueryUtils.java (original)
+++ lucene/dev/branches/lucene3795_lsp_spatial_module/lucene/test-framework/src/java/org/apache/lucene/search/QueryUtils.java Tue Mar 6 23:17:08 2012
@@ -114,13 +114,9 @@ public class QueryUtils {
checkFirstSkipTo(q1,s);
checkSkipTo(q1,s);
if (wrap) {
- IndexSearcher wrapped;
- check(random, q1, wrapped = wrapUnderlyingReader(random, s, -1), false);
- purgeFieldCache(wrapped.getIndexReader()); // our wrapping can create insanity otherwise
- check(random, q1, wrapped = wrapUnderlyingReader(random, s, 0), false);
- purgeFieldCache(wrapped.getIndexReader()); // our wrapping can create insanity otherwise
- check(random, q1, wrapped = wrapUnderlyingReader(random, s, +1), false);
- purgeFieldCache(wrapped.getIndexReader()); // our wrapping can create insanity otherwise
+ check(random, q1, wrapUnderlyingReader(random, s, -1), false);
+ check(random, q1, wrapUnderlyingReader(random, s, 0), false);
+ check(random, q1, wrapUnderlyingReader(random, s, +1), false);
}
checkExplanations(q1,s);
@@ -137,6 +133,27 @@ public class QueryUtils {
// this is just a hack, to get an atomic reader that contains all subreaders for insanity checks
FieldCache.DEFAULT.purge(SlowCompositeReaderWrapper.wrap(r));
}
+
+ /** This is a MultiReader that can be used for randomly wrapping other readers
+ * without creating FieldCache insanity.
+ * The trick is to use an opaque/fake cache key. */
+ public static class FCInvisibleMultiReader extends MultiReader {
+ private final Object cacheKey = new Object();
+
+ public FCInvisibleMultiReader(IndexReader... readers) throws IOException {
+ super(readers);
+ }
+
+ @Override
+ public Object getCoreCacheKey() {
+ return cacheKey;
+ }
+
+ @Override
+ public Object getCombinedCoreAndDeletesKey() {
+ return cacheKey;
+ }
+ }
/**
* Given an IndexSearcher, returns a new IndexSearcher whose IndexReader
@@ -157,16 +174,17 @@ public class QueryUtils {
IndexReader[] readers = new IndexReader[] {
edge < 0 ? r : emptyReaders[0],
emptyReaders[0],
- new MultiReader(edge < 0 ? emptyReaders[4] : emptyReaders[0],
+ new FCInvisibleMultiReader(edge < 0 ? emptyReaders[4] : emptyReaders[0],
emptyReaders[0],
0 == edge ? r : emptyReaders[0]),
0 < edge ? emptyReaders[0] : emptyReaders[7],
emptyReaders[0],
- new MultiReader(0 < edge ? emptyReaders[0] : emptyReaders[5],
+ new FCInvisibleMultiReader(0 < edge ? emptyReaders[0] : emptyReaders[5],
emptyReaders[0],
0 < edge ? r : emptyReaders[0])
};
- IndexSearcher out = LuceneTestCase.newSearcher(new MultiReader(readers));
+
+ IndexSearcher out = LuceneTestCase.newSearcher(new FCInvisibleMultiReader(readers));
out.setSimilarity(s.getSimilarity());
return out;
}
Modified: lucene/dev/branches/lucene3795_lsp_spatial_module/lucene/test-framework/src/java/org/apache/lucene/search/ShardSearchingTestBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3795_lsp_spatial_module/lucene/test-framework/src/java/org/apache/lucene/search/ShardSearchingTestBase.java?rev=1297785&r1=1297784&r2=1297785&view=diff
==============================================================================
--- lucene/dev/branches/lucene3795_lsp_spatial_module/lucene/test-framework/src/java/org/apache/lucene/search/ShardSearchingTestBase.java (original)
+++ lucene/dev/branches/lucene3795_lsp_spatial_module/lucene/test-framework/src/java/org/apache/lucene/search/ShardSearchingTestBase.java Tue Mar 6 23:17:08 2012
@@ -518,7 +518,7 @@ public abstract class ShardSearchingTest
@Override
public void run() {
try {
- final LineFileDocs docs = new LineFileDocs(random);
+ final LineFileDocs docs = new LineFileDocs(random, defaultCodecSupportsDocValues());
int numDocs = 0;
while (System.nanoTime() < endTimeNanos) {
final int what = random.nextInt(3);
Modified: lucene/dev/branches/lucene3795_lsp_spatial_module/lucene/test-framework/src/java/org/apache/lucene/util/LineFileDocs.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3795_lsp_spatial_module/lucene/test-framework/src/java/org/apache/lucene/util/LineFileDocs.java?rev=1297785&r1=1297784&r2=1297785&view=diff
==============================================================================
--- lucene/dev/branches/lucene3795_lsp_spatial_module/lucene/test-framework/src/java/org/apache/lucene/util/LineFileDocs.java (original)
+++ lucene/dev/branches/lucene3795_lsp_spatial_module/lucene/test-framework/src/java/org/apache/lucene/util/LineFileDocs.java Tue Mar 6 23:17:08 2012
@@ -17,22 +17,24 @@ package org.apache.lucene.util;
* limitations under the License.
*/
+import java.io.BufferedReader;
import java.io.Closeable;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
-import java.io.BufferedReader;
-import java.io.InputStreamReader;
import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.util.Random;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.zip.GZIPInputStream;
-import java.util.Random;
+import org.apache.lucene.document.DocValuesField;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
+import org.apache.lucene.index.DocValues;
/** Minimal port of contrib/benchmark's LneDocSource +
* DocMaker, so tests can enum docs from a line file created
@@ -43,16 +45,22 @@ public class LineFileDocs implements Clo
private final static int BUFFER_SIZE = 1 << 16; // 64K
private final AtomicInteger id = new AtomicInteger();
private final String path;
+ private final boolean useDocValues;
/** If forever is true, we rewind the file at EOF (repeat
* the docs over and over) */
- public LineFileDocs(Random random, String path) throws IOException {
+ public LineFileDocs(Random random, String path, boolean useDocValues) throws IOException {
this.path = path;
+ this.useDocValues = useDocValues;
open(random);
}
public LineFileDocs(Random random) throws IOException {
- this(random, LuceneTestCase.TEST_LINE_DOCS_FILE);
+ this(random, LuceneTestCase.TEST_LINE_DOCS_FILE, true);
+ }
+
+ public LineFileDocs(Random random, boolean useDocValues) throws IOException {
+ this(random, LuceneTestCase.TEST_LINE_DOCS_FILE, useDocValues);
}
public synchronized void close() throws IOException {
@@ -113,11 +121,12 @@ public class LineFileDocs implements Clo
final Document doc;
final Field titleTokenized;
final Field title;
+ final Field titleDV;
final Field body;
final Field id;
final Field date;
- public DocState() {
+ public DocState(boolean useDocValues) {
doc = new Document();
title = new StringField("title", "");
@@ -139,6 +148,13 @@ public class LineFileDocs implements Clo
date = new Field("date", "", StringField.TYPE_STORED);
doc.add(date);
+
+ if (useDocValues) {
+ titleDV = new DocValuesField("titleDV", new BytesRef(), DocValues.Type.BYTES_VAR_SORTED);
+ doc.add(titleDV);
+ } else {
+ titleDV = null;
+ }
}
}
@@ -162,7 +178,7 @@ public class LineFileDocs implements Clo
DocState docState = threadDocs.get();
if (docState == null) {
- docState = new DocState();
+ docState = new DocState(useDocValues);
threadDocs.set(docState);
}
@@ -178,6 +194,9 @@ public class LineFileDocs implements Clo
docState.body.setStringValue(line.substring(1+spot2, line.length()));
final String title = line.substring(0, spot);
docState.title.setStringValue(title);
+ if (docState.titleDV != null) {
+ docState.titleDV.setBytesValue(new BytesRef(title));
+ }
docState.titleTokenized.setStringValue(title);
docState.date.setStringValue(line.substring(1+spot, spot2));
docState.id.setStringValue(Integer.toString(id.getAndIncrement()));
Modified: lucene/dev/branches/lucene3795_lsp_spatial_module/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3795_lsp_spatial_module/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java?rev=1297785&r1=1297784&r2=1297785&view=diff
==============================================================================
--- lucene/dev/branches/lucene3795_lsp_spatial_module/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java (original)
+++ lucene/dev/branches/lucene3795_lsp_spatial_module/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java Tue Mar 6 23:17:08 2012
@@ -40,9 +40,7 @@ import java.util.Map.Entry;
import java.util.Random;
import java.util.Set;
import java.util.TimeZone;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Executors;
-import java.util.concurrent.TimeUnit;
+import java.util.concurrent.*;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.codecs.Codec;
@@ -55,7 +53,8 @@ import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.CompositeReader;
-import org.apache.lucene.index.MultiReader;
+import org.apache.lucene.index.FieldFilterAtomicReader;
+import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexReader.ReaderClosedListener;
@@ -81,6 +80,7 @@ import org.apache.lucene.search.IndexSea
import org.apache.lucene.search.RandomSimilarityProvider;
import org.apache.lucene.search.similarities.DefaultSimilarity;
import org.apache.lucene.search.similarities.Similarity;
+import org.apache.lucene.search.QueryUtils.FCInvisibleMultiReader;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.FlushInfo;
@@ -97,6 +97,7 @@ import org.junit.Assert;
import org.junit.Assume;
import org.junit.Before;
import org.junit.BeforeClass;
+import org.junit.ClassRule;
import org.junit.Ignore;
import org.junit.Rule;
import org.junit.internal.AssumptionViolatedException;
@@ -257,6 +258,11 @@ public abstract class LuceneTestCase ext
private static TimeZone timeZone;
private static TimeZone savedTimeZone;
+ /**
+ * Restore these system property values in {@link #afterClassLuceneTestCaseJ4()}.
+ */
+ private static HashMap<String, String> restoreProperties = new HashMap<String,String>();
+
protected static Map<MockDirectoryWrapper,StackTraceElement[]> stores;
/** @deprecated (4.0) until we fix no-fork problems in solr tests */
@@ -269,10 +275,13 @@ public abstract class LuceneTestCase ext
random.setSeed(staticSeed);
random.initialized = true;
}
-
+
@Deprecated
private static boolean icuTested = false;
+ @ClassRule
+ public static TestRule classRules = RuleChain.outerRule(new SystemPropertiesInvariantRule());
+
@BeforeClass
public static void beforeClassLuceneTestCaseJ4() {
initRandom();
@@ -282,6 +291,7 @@ public abstract class LuceneTestCase ext
// enable this by default, for IDE consistency with ant tests (as its the default from ant)
// TODO: really should be in solr base classes, but some extend LTC directly.
// we do this in beforeClass, because some tests currently disable it
+ restoreProperties.put("solr.directoryFactory", System.getProperty("solr.directoryFactory"));
if (System.getProperty("solr.directoryFactory") == null) {
System.setProperty("solr.directoryFactory", "org.apache.solr.core.MockDirectoryFactory");
}
@@ -363,6 +373,9 @@ public abstract class LuceneTestCase ext
locale = TEST_LOCALE.equals("random") ? randomLocale(random) : localeForName(TEST_LOCALE);
Locale.setDefault(locale);
+ // TimeZone.getDefault will set user.timezone to the default timezone of the user's locale.
+ // So store the original property value and restore it at end.
+ restoreProperties.put("user.timezone", System.getProperty("user.timezone"));
savedTimeZone = TimeZone.getDefault();
timeZone = TEST_TIMEZONE.equals("random") ? randomTimeZone(random) : TimeZone.getTimeZone(TEST_TIMEZONE);
TimeZone.setDefault(timeZone);
@@ -372,6 +385,15 @@ public abstract class LuceneTestCase ext
@AfterClass
public static void afterClassLuceneTestCaseJ4() {
+ for (Map.Entry<String,String> e : restoreProperties.entrySet()) {
+ if (e.getValue() == null) {
+ System.clearProperty(e.getKey());
+ } else {
+ System.setProperty(e.getKey(), e.getValue());
+ }
+ }
+ restoreProperties.clear();
+
Throwable problem = null;
if (! "false".equals(TEST_CLEAN_THREADS)) {
@@ -558,14 +580,19 @@ public abstract class LuceneTestCase ext
* @see LuceneTestCase#testCaseThread
*/
private class RememberThreadRule implements TestRule {
+ private String previousName;
+
@Override
public Statement apply(final Statement base, Description description) {
return new Statement() {
public void evaluate() throws Throwable {
try {
- LuceneTestCase.this.testCaseThread = Thread.currentThread();
+ Thread current = Thread.currentThread();
+ previousName = current.getName();
+ LuceneTestCase.this.testCaseThread = current;
base.evaluate();
} finally {
+ LuceneTestCase.this.testCaseThread.setName(previousName);
LuceneTestCase.this.testCaseThread = null;
}
}
@@ -582,6 +609,7 @@ public abstract class LuceneTestCase ext
public final TestRule ruleChain = RuleChain
.outerRule(new RememberThreadRule())
.around(new TestResultInterceptorRule())
+ .around(new SystemPropertiesInvariantRule())
.around(new InternalSetupTeardownRule())
.around(new SubclassSetupTeardownRule());
@@ -615,6 +643,9 @@ public abstract class LuceneTestCase ext
seed = "random".equals(TEST_SEED) ? seedRand.nextLong() : ThreeLongs.fromString(TEST_SEED).l2;
random.setSeed(seed);
+ Thread.currentThread().setName("LTC-main#seed=" +
+ new ThreeLongs(staticSeed, seed, LuceneTestCaseRunner.runnerSeed));
+
savedUncaughtExceptionHandler = Thread.getDefaultUncaughtExceptionHandler();
Thread.setDefaultUncaughtExceptionHandler(new Thread.UncaughtExceptionHandler() {
public void uncaughtException(Thread t, Throwable e) {
@@ -1344,6 +1375,60 @@ public abstract class LuceneTestCase ext
throw new RuntimeException(e);
}
}
+
+ /** Sometimes wrap the IndexReader as slow, parallel or filter reader (or combinations of that) */
+ public static IndexReader maybeWrapReader(IndexReader r) throws IOException {
+ if (rarely()) {
+ // TODO: remove this, and fix those tests to wrap before putting slow around:
+ final boolean wasOriginallyAtomic = r instanceof AtomicReader;
+ for (int i = 0, c = random.nextInt(6)+1; i < c; i++) {
+ switch(random.nextInt(4)) {
+ case 0:
+ r = SlowCompositeReaderWrapper.wrap(r);
+ break;
+ case 1:
+ // will create no FC insanity in atomic case, as ParallelAtomicReader has own cache key:
+ r = (r instanceof AtomicReader) ?
+ new ParallelAtomicReader((AtomicReader) r) :
+ new ParallelCompositeReader((CompositeReader) r);
+ break;
+ case 2:
+ // Häckidy-Hick-Hack: a standard MultiReader will cause FC insanity, so we use
+ // QueryUtils' reader with a fake cache key, so insanity checker cannot walk
+ // along our reader:
+ r = new FCInvisibleMultiReader(r);
+ break;
+ case 3:
+ final AtomicReader ar = SlowCompositeReaderWrapper.wrap(r);
+ final List<String> allFields = new ArrayList<String>();
+ for (FieldInfo fi : ar.getFieldInfos()) {
+ allFields.add(fi.name);
+ }
+ Collections.shuffle(allFields, random);
+ final int end = allFields.isEmpty() ? 0 : random.nextInt(allFields.size());
+ final Set<String> fields = new HashSet<String>(allFields.subList(0, end));
+ // will create no FC insanity as ParallelAtomicReader has own cache key:
+ r = new ParallelAtomicReader(
+ new FieldFilterAtomicReader(ar, fields, false),
+ new FieldFilterAtomicReader(ar, fields, true)
+ );
+ break;
+ default:
+ fail("should not get here");
+ }
+ }
+ if (wasOriginallyAtomic) {
+ r = SlowCompositeReaderWrapper.wrap(r);
+ } else if ((r instanceof CompositeReader) && !(r instanceof FCInvisibleMultiReader)) {
+ // prevent cache insanity caused by e.g. ParallelCompositeReader, to fix we wrap one more time:
+ r = new FCInvisibleMultiReader(r);
+ }
+ if (VERBOSE) {
+ System.out.println("maybeWrapReader wrapped: " +r);
+ }
+ }
+ return r;
+ }
/** create a new searcher over the reader.
* This searcher might randomly use threads. */
@@ -1358,27 +1443,25 @@ public abstract class LuceneTestCase ext
*/
public static IndexSearcher newSearcher(IndexReader r, boolean maybeWrap) throws IOException {
if (usually()) {
- if (maybeWrap && rarely()) {
- r = SlowCompositeReaderWrapper.wrap(r);
- }
- if (maybeWrap && rarely()) {
- // just wrap as MultiReader/ParallelXReader with one subreader
- if (random.nextBoolean()) {
- r = (r instanceof AtomicReader) ?
- new ParallelAtomicReader((AtomicReader) r) :
- new ParallelCompositeReader((CompositeReader) r);
- } else if (r instanceof CompositeReader) { // only wrap if not already atomic (some tests may fail)
- r = new MultiReader(r);
- }
+ if (maybeWrap) {
+ r = maybeWrapReader(r);
}
IndexSearcher ret = random.nextBoolean() ? new AssertingIndexSearcher(random, r) : new AssertingIndexSearcher(random, r.getTopReaderContext());
ret.setSimilarity(similarity);
return ret;
} else {
int threads = 0;
- final ExecutorService ex = (random.nextBoolean()) ? null
- : Executors.newFixedThreadPool(threads = _TestUtil.nextInt(random, 1, 8),
- new NamedThreadFactory("LuceneTestCase"));
+ final ThreadPoolExecutor ex;
+ if (random.nextBoolean()) {
+ ex = null;
+ } else {
+ threads = _TestUtil.nextInt(random, 1, 8);
+ ex = new ThreadPoolExecutor(threads, threads, 0L, TimeUnit.MILLISECONDS,
+ new LinkedBlockingQueue<Runnable>(),
+ new NamedThreadFactory("LuceneTestCase"));
+ // uncomment to intensify LUCENE-3840
+ // ex.prestartAllCoreThreads();
+ }
if (ex != null) {
if (VERBOSE) {
System.out.println("NOTE: newSearcher using ExecutorService with " + threads + " threads");
@@ -1530,4 +1613,8 @@ public abstract class LuceneTestCase ext
@Ignore("just a hack")
public final void alwaysIgnoredTestMethod() {}
+
+ protected static boolean defaultCodecSupportsDocValues() {
+ return !Codec.getDefault().getName().equals("Lucene3x");
+ }
}
Modified: lucene/dev/branches/lucene3795_lsp_spatial_module/lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3795_lsp_spatial_module/lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java?rev=1297785&r1=1297784&r2=1297785&view=diff
==============================================================================
--- lucene/dev/branches/lucene3795_lsp_spatial_module/lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java (original)
+++ lucene/dev/branches/lucene3795_lsp_spatial_module/lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java Tue Mar 6 23:17:08 2012
@@ -26,6 +26,7 @@ import java.io.InputStream;
import java.io.OutputStream;
import java.io.PrintStream;
import java.lang.reflect.Method;
+import java.nio.CharBuffer;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.Map;
@@ -37,10 +38,12 @@ import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.lucene40.Lucene40Codec;
import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
+import org.apache.lucene.document.DocValuesField;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.CheckIndex;
import org.apache.lucene.index.ConcurrentMergeScheduler;
+import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.FieldInfos;
@@ -190,8 +193,8 @@ public class _TestUtil {
return start + r.nextInt(end-start+1);
}
- public static String randomSimpleString(Random r) {
- final int end = r.nextInt(10);
+ public static String randomSimpleString(Random r, int maxLength) {
+ final int end = r.nextInt(maxLength);
if (end == 0) {
// allow 0 length
return "";
@@ -203,6 +206,10 @@ public class _TestUtil {
return new String(buffer, 0, end);
}
+ public static String randomSimpleString(Random r) {
+ return randomSimpleString(r, 10);
+ }
+
/** Returns random string, including full unicode range. */
public static String randomUnicodeString(Random r) {
return randomUnicodeString(r, 20);
@@ -249,6 +256,36 @@ public class _TestUtil {
}
}
+ /**
+ * Returns a String thats "regexpish" (contains lots of operators typically found in regular expressions)
+ * If you call this enough times, you might get a valid regex!
+ */
+ public static String randomRegexpishString(Random r) {
+ final int end = r.nextInt(20);
+ if (end == 0) {
+ // allow 0 length
+ return "";
+ }
+ final char[] buffer = new char[end];
+ for (int i = 0; i < end; i++) {
+ int t = r.nextInt(11);
+ if (t == 0) {
+ buffer[i] = (char) _TestUtil.nextInt(r, 97, 102);
+ }
+ else if (1 == t) buffer[i] = '.';
+ else if (2 == t) buffer[i] = '?';
+ else if (3 == t) buffer[i] = '*';
+ else if (4 == t) buffer[i] = '+';
+ else if (5 == t) buffer[i] = '(';
+ else if (6 == t) buffer[i] = ')';
+ else if (7 == t) buffer[i] = '-';
+ else if (8 == t) buffer[i] = '[';
+ else if (9 == t) buffer[i] = ']';
+ else if (10 == t) buffer[i] = '|';
+ }
+ return new String(buffer, 0, end);
+ }
+
private static final String[] HTML_CHAR_ENTITIES = {
"AElig", "Aacute", "Acirc", "Agrave", "Alpha", "AMP", "Aring", "Atilde",
"Auml", "Beta", "COPY", "Ccedil", "Chi", "Dagger", "Delta", "ETH",
@@ -646,9 +683,36 @@ public class _TestUtil {
public static Document cloneDocument(Document doc1) {
final Document doc2 = new Document();
for(IndexableField f : doc1) {
- Field field1 = (Field) f;
-
- Field field2 = new Field(field1.name(), field1.stringValue(), field1.fieldType());
+ final Field field1 = (Field) f;
+ final Field field2;
+ if (field1 instanceof DocValuesField) {
+ final DocValues.Type dvType = field1.fieldType().docValueType();
+ switch (dvType) {
+ case VAR_INTS:
+ case FIXED_INTS_8:
+ case FIXED_INTS_16:
+ case FIXED_INTS_32:
+ case FIXED_INTS_64:
+ field2 = new DocValuesField(field1.name(), field1.numericValue().intValue(), dvType);
+ break;
+ case BYTES_FIXED_DEREF:
+ case BYTES_FIXED_STRAIGHT:
+ case BYTES_VAR_DEREF:
+ case BYTES_VAR_STRAIGHT:
+ case BYTES_FIXED_SORTED:
+ case BYTES_VAR_SORTED:
+ field2 = new DocValuesField(field1.name(), BytesRef.deepCopyOf(field1.binaryValue()), dvType);
+ break;
+ case FLOAT_32:
+ case FLOAT_64:
+ field2 = new DocValuesField(field1.name(), field1.numericValue().doubleValue(), dvType);
+ break;
+ default:
+ throw new IllegalArgumentException("don't know how to clone DV field=" + field1);
+ }
+ } else {
+ field2 = new Field(field1.name(), field1.stringValue(), field1.fieldType());
+ }
doc2.add(field2);
}
@@ -707,4 +771,23 @@ public class _TestUtil {
}
return termsEnum.docs(liveDocs, null, needsFreqs);
}
+
+ public static CharSequence stringToCharSequence(String string, Random random) {
+ return bytesToCharSequence(new BytesRef(string), random);
+ }
+
+ public static CharSequence bytesToCharSequence(BytesRef ref, Random random) {
+ switch(random.nextInt(5)) {
+ case 4:
+ CharsRef chars = new CharsRef(ref.length);
+ UnicodeUtil.UTF8toUTF16(ref.bytes, ref.offset, ref.length, chars);
+ return chars;
+ case 3:
+ return CharBuffer.wrap(ref.utf8ToString());
+ default:
+ return ref.utf8ToString();
+ }
+
+ }
+
}
Modified: lucene/dev/branches/lucene3795_lsp_spatial_module/modules/analysis/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3795_lsp_spatial_module/modules/analysis/CHANGES.txt?rev=1297785&r1=1297784&r2=1297785&view=diff
==============================================================================
--- lucene/dev/branches/lucene3795_lsp_spatial_module/modules/analysis/CHANGES.txt (original)
+++ lucene/dev/branches/lucene3795_lsp_spatial_module/modules/analysis/CHANGES.txt Tue Mar 6 23:17:08 2012
@@ -7,6 +7,9 @@ http://s.apache.org/luceneversions
API Changes
+ * LUCENE-3820: Deprecated constructors accepting pattern matching bounds. The input
+ is buffered and matched in one pass. (Dawid Weiss)
+
* LUCENE-2413: Deprecated PatternAnalyzer in common/miscellaneous, in favor
of the pattern package (CharFilter, Tokenizer, TokenFilter). (Robert Muir)
@@ -34,6 +37,11 @@ API Changes
and sometimes different depending on the type of set, and ultimately a CharArraySet
or CharArrayMap was always used anyway. (Robert Muir)
+Bug fixes
+
+ * LUCENE-3820: PatternReplaceCharFilter could return invalid token positions.
+ (Dawid Weiss)
+
New Features
* LUCENE-2341: A new analyzer/ filter: Morfologik - a dictionary-driven lemmatizer