You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by bu...@apache.org on 2011/01/13 20:53:39 UTC
svn commit: r1058718 [11/18] - in /lucene/dev/branches/realtime_search: ./
lucene/ lucene/contrib/ lucene/contrib/ant/src/java/org/apache/lucene/ant/
lucene/contrib/ant/src/test/org/apache/lucene/ant/
lucene/contrib/benchmark/ lucene/contrib/demo/src/j...
Modified: lucene/dev/branches/realtime_search/lucene/src/test/org/apache/lucene/search/TestThreadSafe.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/test/org/apache/lucene/search/TestThreadSafe.java?rev=1058718&r1=1058717&r2=1058718&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/test/org/apache/lucene/search/TestThreadSafe.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/test/org/apache/lucene/search/TestThreadSafe.java Thu Jan 13 19:53:21 2011
@@ -27,6 +27,7 @@ import org.apache.lucene.document.*;
import java.util.Random;
import java.util.List;
+import java.util.concurrent.atomic.AtomicBoolean;
import java.io.IOException;
public class TestThreadSafe extends LuceneTestCase {
@@ -34,16 +35,16 @@ public class TestThreadSafe extends Luce
IndexReader ir1;
- String failure=null;
-
-
class Thr extends Thread {
final int iter;
final Random rand;
+ final AtomicBoolean failed;
+
// pass in random in case we want to make things reproducable
- public Thr(int iter, Random rand) {
+ public Thr(int iter, Random rand, AtomicBoolean failed) {
this.iter = iter;
this.rand = rand;
+ this.failed = failed;
}
@Override
@@ -61,8 +62,8 @@ public class TestThreadSafe extends Luce
}
} catch (Throwable th) {
- failure=th.toString();
- fail(failure);
+ failed.set(true);
+ throw new RuntimeException(th);
}
}
@@ -124,16 +125,15 @@ public class TestThreadSafe extends Luce
void doTest(int iter, int nThreads) throws Exception {
Thr[] tarr = new Thr[nThreads];
+ AtomicBoolean failed = new AtomicBoolean();
for (int i=0; i<nThreads; i++) {
- tarr[i] = new Thr(iter, new Random(random.nextLong()));
+ tarr[i] = new Thr(iter, new Random(random.nextLong()), failed);
tarr[i].start();
}
for (int i=0; i<nThreads; i++) {
tarr[i].join();
}
- if (failure!=null) {
- fail(failure);
- }
+ assertFalse(failed.get());
}
public void testLazyLoadThreadSafety() throws Exception{
@@ -142,7 +142,7 @@ public class TestThreadSafe extends Luce
buildDir(dir1, 15, 5, 2000);
// do many small tests so the thread locals go away inbetween
- int num = 100 * RANDOM_MULTIPLIER;
+ int num = 10 * RANDOM_MULTIPLIER;
for (int i = 0; i < num; i++) {
ir1 = IndexReader.open(dir1, false);
doTest(10,10);
Modified: lucene/dev/branches/realtime_search/lucene/src/test/org/apache/lucene/search/TestTimeLimitingCollector.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/test/org/apache/lucene/search/TestTimeLimitingCollector.java?rev=1058718&r1=1058717&r2=1058718&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/test/org/apache/lucene/search/TestTimeLimitingCollector.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/test/org/apache/lucene/search/TestTimeLimitingCollector.java Thu Jan 13 19:53:21 2011
@@ -24,6 +24,7 @@ import org.apache.lucene.analysis.MockAn
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.TimeLimitingCollector.TimeExceededException;
@@ -50,7 +51,7 @@ public class TestTimeLimitingCollector e
private static final int N_DOCS = 3000;
private static final int N_THREADS = 50;
- private Searcher searcher;
+ private IndexSearcher searcher;
private Directory directory;
private IndexReader reader;
@@ -339,8 +340,8 @@ public class TestTimeLimitingCollector e
}
@Override
- public void setNextReader(IndexReader reader, int base) {
- docBase = base;
+ public void setNextReader(AtomicReaderContext context) {
+ docBase = context.docBase;
}
@Override
Modified: lucene/dev/branches/realtime_search/lucene/src/test/org/apache/lucene/search/TestTopDocsCollector.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/test/org/apache/lucene/search/TestTopDocsCollector.java?rev=1058718&r1=1058717&r2=1058718&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/test/org/apache/lucene/search/TestTopDocsCollector.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/test/org/apache/lucene/search/TestTopDocsCollector.java Thu Jan 13 19:53:21 2011
@@ -21,6 +21,7 @@ import java.io.IOException;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
@@ -60,9 +61,9 @@ public class TestTopDocsCollector extend
}
@Override
- public void setNextReader(IndexReader reader, int docBase)
+ public void setNextReader(AtomicReaderContext context)
throws IOException {
- base = docBase;
+ base = context.docBase;
}
@Override
Modified: lucene/dev/branches/realtime_search/lucene/src/test/org/apache/lucene/search/TestWildcardRandom.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/test/org/apache/lucene/search/TestWildcardRandom.java?rev=1058718&r1=1058717&r2=1058718&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/test/org/apache/lucene/search/TestWildcardRandom.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/test/org/apache/lucene/search/TestWildcardRandom.java Thu Jan 13 19:53:21 2011
@@ -38,7 +38,7 @@ import org.apache.lucene.util._TestUtil;
* and validates the correct number of hits are returned.
*/
public class TestWildcardRandom extends LuceneTestCase {
- private Searcher searcher;
+ private IndexSearcher searcher;
private IndexReader reader;
private Directory dir;
Modified: lucene/dev/branches/realtime_search/lucene/src/test/org/apache/lucene/search/function/JustCompileSearchSpans.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/test/org/apache/lucene/search/function/JustCompileSearchSpans.java?rev=1058718&r1=1058717&r2=1058718&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/test/org/apache/lucene/search/function/JustCompileSearchSpans.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/test/org/apache/lucene/search/function/JustCompileSearchSpans.java Thu Jan 13 19:53:21 2011
@@ -18,6 +18,7 @@ package org.apache.lucene.search.functio
*/
import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.search.FieldCache;
import java.io.IOException;
@@ -82,7 +83,7 @@ final class JustCompileSearchFunction {
}
@Override
- public DocValues getValues(IndexReader reader) throws IOException {
+ public DocValues getValues(AtomicReaderContext context) throws IOException {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
}
Modified: lucene/dev/branches/realtime_search/lucene/src/test/org/apache/lucene/search/function/TestCustomScoreQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/test/org/apache/lucene/search/function/TestCustomScoreQuery.java?rev=1058718&r1=1058717&r2=1058718&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/test/org/apache/lucene/search/function/TestCustomScoreQuery.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/test/org/apache/lucene/search/function/TestCustomScoreQuery.java Thu Jan 13 19:53:21 2011
@@ -329,7 +329,7 @@ public class TestCustomScoreQuery extend
}
}
- private void logResult(String msg, Searcher s, Query q, int doc, float score1) throws IOException {
+ private void logResult(String msg, IndexSearcher s, Query q, int doc, float score1) throws IOException {
log(msg+" "+score1);
log("Explain by: "+q);
log(s.explain(q,doc));
Modified: lucene/dev/branches/realtime_search/lucene/src/test/org/apache/lucene/search/function/TestFieldScoreQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/test/org/apache/lucene/search/function/TestFieldScoreQuery.java?rev=1058718&r1=1058717&r2=1058718&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/test/org/apache/lucene/search/function/TestFieldScoreQuery.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/test/org/apache/lucene/search/function/TestFieldScoreQuery.java Thu Jan 13 19:53:21 2011
@@ -19,12 +19,13 @@ package org.apache.lucene.search.functio
import java.util.HashMap;
-import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryUtils;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.util.ReaderUtil;
import org.junit.Test;
/**
@@ -185,12 +186,12 @@ public class TestFieldScoreQuery extends
FieldScoreQuery q = new FieldScoreQuery(field,tp);
ScoreDoc[] h = s.search(q, null, 1000).scoreDocs;
assertEquals("All docs should be matched!",N_DOCS,h.length);
- IndexReader[] readers = s.getIndexReader().getSequentialSubReaders();
- for (int j = 0; j < readers.length; j++) {
- IndexReader reader = readers[j];
+ AtomicReaderContext[] leaves = ReaderUtil.leaves(s.getTopReaderContext());
+ for (int j = 0; j < leaves.length; j++) {
+ AtomicReaderContext leaf = leaves[j];
try {
if (i == 0) {
- innerArray[j] = q.valSrc.getValues(reader).getInnerArray();
+ innerArray[j] = q.valSrc.getValues(leaf).getInnerArray();
log(i + ". compare: " + innerArray[j].getClass() + " to "
+ expectedArrayTypes.get(tp).getClass());
assertEquals(
@@ -198,9 +199,9 @@ public class TestFieldScoreQuery extends
innerArray[j].getClass(), expectedArrayTypes.get(tp).getClass());
} else {
log(i + ". compare: " + innerArray[j] + " to "
- + q.valSrc.getValues(reader).getInnerArray());
+ + q.valSrc.getValues(leaf).getInnerArray());
assertSame("field values should be cached and reused!", innerArray[j],
- q.valSrc.getValues(reader).getInnerArray());
+ q.valSrc.getValues(leaf).getInnerArray());
}
} catch (UnsupportedOperationException e) {
if (!warned) {
@@ -217,15 +218,15 @@ public class TestFieldScoreQuery extends
FieldScoreQuery q = new FieldScoreQuery(field,tp);
ScoreDoc[] h = s.search(q, null, 1000).scoreDocs;
assertEquals("All docs should be matched!",N_DOCS,h.length);
- IndexReader[] readers = s.getIndexReader().getSequentialSubReaders();
- for (int j = 0; j < readers.length; j++) {
- IndexReader reader = readers[j];
+ AtomicReaderContext[] leaves = ReaderUtil.leaves(s.getTopReaderContext());
+ for (int j = 0; j < leaves.length; j++) {
+ AtomicReaderContext leaf = leaves[j];
try {
log("compare: " + innerArray + " to "
- + q.valSrc.getValues(reader).getInnerArray());
+ + q.valSrc.getValues(leaf).getInnerArray());
assertNotSame(
"cached field values should not be reused if reader as changed!",
- innerArray, q.valSrc.getValues(reader).getInnerArray());
+ innerArray, q.valSrc.getValues(leaf).getInnerArray());
} catch (UnsupportedOperationException e) {
if (!warned) {
System.err.println("WARNING: " + testName()
Modified: lucene/dev/branches/realtime_search/lucene/src/test/org/apache/lucene/search/function/TestOrdValues.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/test/org/apache/lucene/search/function/TestOrdValues.java?rev=1058718&r1=1058717&r2=1058718&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/test/org/apache/lucene/search/function/TestOrdValues.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/test/org/apache/lucene/search/function/TestOrdValues.java Thu Jan 13 19:53:21 2011
@@ -18,8 +18,9 @@ package org.apache.lucene.search.functio
*/
import org.apache.lucene.index.CorruptIndexException;
-import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.search.*;
+import org.apache.lucene.util.ReaderUtil;
import org.junit.Test;
/**
@@ -168,14 +169,14 @@ public class TestOrdValues extends Funct
ScoreDoc[] h = s.search(q, null, 1000).scoreDocs;
try {
assertEquals("All docs should be matched!", N_DOCS, h.length);
- IndexReader[] readers = s.getIndexReader().getSequentialSubReaders();
+ AtomicReaderContext[] leaves = ReaderUtil.leaves(s.getTopReaderContext());
- for (IndexReader reader : readers) {
+ for (AtomicReaderContext leaf : leaves) {
if (i == 0) {
- innerArray = q.valSrc.getValues(reader).getInnerArray();
+ innerArray = q.valSrc.getValues(leaf).getInnerArray();
} else {
- log(i + ". compare: " + innerArray + " to " + q.valSrc.getValues(reader).getInnerArray());
- assertSame("field values should be cached and reused!", innerArray, q.valSrc.getValues(reader).getInnerArray());
+ log(i + ". compare: " + innerArray + " to " + q.valSrc.getValues(leaf).getInnerArray());
+ assertSame("field values should be cached and reused!", innerArray, q.valSrc.getValues(leaf).getInnerArray());
}
}
} catch (UnsupportedOperationException e) {
@@ -201,15 +202,15 @@ public class TestOrdValues extends Funct
q = new ValueSourceQuery(vs);
h = s.search(q, null, 1000).scoreDocs;
assertEquals("All docs should be matched!", N_DOCS, h.length);
- IndexReader[] readers = s.getIndexReader().getSequentialSubReaders();
+ AtomicReaderContext[] leaves = ReaderUtil.leaves(s.getTopReaderContext());
- for (IndexReader reader : readers) {
+ for (AtomicReaderContext leaf : leaves) {
try {
log("compare (should differ): " + innerArray + " to "
- + q.valSrc.getValues(reader).getInnerArray());
+ + q.valSrc.getValues(leaf).getInnerArray());
assertNotSame(
"different values should be loaded for a different field!",
- innerArray, q.valSrc.getValues(reader).getInnerArray());
+ innerArray, q.valSrc.getValues(leaf).getInnerArray());
} catch (UnsupportedOperationException e) {
if (!warned) {
System.err.println("WARNING: " + testName()
@@ -229,15 +230,15 @@ public class TestOrdValues extends Funct
q = new ValueSourceQuery(vs);
h = s.search(q, null, 1000).scoreDocs;
assertEquals("All docs should be matched!", N_DOCS, h.length);
- readers = s.getIndexReader().getSequentialSubReaders();
+ leaves = ReaderUtil.leaves(s.getTopReaderContext());
- for (IndexReader reader : readers) {
+ for (AtomicReaderContext leaf : leaves) {
try {
log("compare (should differ): " + innerArray + " to "
- + q.valSrc.getValues(reader).getInnerArray());
+ + q.valSrc.getValues(leaf).getInnerArray());
assertNotSame(
"cached field values should not be reused if reader as changed!",
- innerArray, q.valSrc.getValues(reader).getInnerArray());
+ innerArray, q.valSrc.getValues(leaf).getInnerArray());
} catch (UnsupportedOperationException e) {
if (!warned) {
System.err.println("WARNING: " + testName()
Modified: lucene/dev/branches/realtime_search/lucene/src/test/org/apache/lucene/search/function/TestValueSource.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/test/org/apache/lucene/search/function/TestValueSource.java?rev=1058718&r1=1058717&r2=1058718&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/test/org/apache/lucene/search/function/TestValueSource.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/test/org/apache/lucene/search/function/TestValueSource.java Thu Jan 13 19:53:21 2011
@@ -22,6 +22,7 @@ import org.apache.lucene.store.*;
import org.apache.lucene.search.*;
import org.apache.lucene.analysis.*;
import org.apache.lucene.index.*;
+import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.document.*;
public class TestValueSource extends LuceneTestCase {
@@ -45,11 +46,17 @@ public class TestValueSource extends Luc
assertTrue(r.getSequentialSubReaders().length > 1);
ValueSource s1 = new IntFieldSource("field");
- DocValues v1 = s1.getValues(r);
- DocValues v2 = new MultiValueSource(s1).getValues(r);
-
+ AtomicReaderContext[] leaves = ReaderUtil.leaves(r.getTopReaderContext());
+ DocValues v1 = null;
+ DocValues v2 = new MultiValueSource(s1).getValues(r.getTopReaderContext());
+ int leafOrd = -1;
for(int i=0;i<r.maxDoc();i++) {
- assertEquals(v1.intVal(i), i);
+ int subIndex = ReaderUtil.subIndex(i, leaves);
+ if (subIndex != leafOrd) {
+ leafOrd = subIndex;
+ v1 = s1.getValues(leaves[leafOrd]);
+ }
+ assertEquals(v1.intVal(i - leaves[leafOrd].docBase), i);
assertEquals(v2.intVal(i), i);
}
Modified: lucene/dev/branches/realtime_search/lucene/src/test/org/apache/lucene/search/payloads/TestPayloadNearQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/test/org/apache/lucene/search/payloads/TestPayloadNearQuery.java?rev=1058718&r1=1058717&r2=1058718&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/test/org/apache/lucene/search/payloads/TestPayloadNearQuery.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/test/org/apache/lucene/search/payloads/TestPayloadNearQuery.java Thu Jan 13 19:53:21 2011
@@ -26,16 +26,16 @@ import org.apache.lucene.analysis.TokenS
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
+import org.apache.lucene.index.FieldInvertState;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Payload;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.DefaultSimilarity;
import org.apache.lucene.search.Explanation;
-import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.QueryUtils;
import org.apache.lucene.search.ScoreDoc;
-import org.apache.lucene.search.Searcher;
+import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanNearQuery;
@@ -306,8 +306,8 @@ public class TestPayloadNearQuery extend
//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
//Make everything else 1 so we see the effect of the payload
//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
- @Override public float lengthNorm(String fieldName, int numTerms) {
- return 1.0f;
+ @Override public float computeNorm(String fieldName, FieldInvertState state) {
+ return state.getBoost();
}
@Override public float queryNorm(float sumOfSquaredWeights) {
@@ -325,7 +325,7 @@ public class TestPayloadNearQuery extend
return 1.0f;
}
// idf used for phrase queries
- @Override public IDFExplanation idfExplain(Collection<Term> terms, Searcher searcher) throws IOException {
+ @Override public IDFExplanation idfExplain(Collection<Term> terms, IndexSearcher searcher) throws IOException {
return new IDFExplanation() {
@Override
public float getIdf() {
Modified: lucene/dev/branches/realtime_search/lucene/src/test/org/apache/lucene/search/payloads/TestPayloadTermQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/test/org/apache/lucene/search/payloads/TestPayloadTermQuery.java?rev=1058718&r1=1058717&r2=1058718&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/test/org/apache/lucene/search/payloads/TestPayloadTermQuery.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/test/org/apache/lucene/search/payloads/TestPayloadTermQuery.java Thu Jan 13 19:53:21 2011
@@ -34,6 +34,7 @@ import org.apache.lucene.analysis.MockTo
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
+import org.apache.lucene.index.FieldInvertState;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Payload;
import org.apache.lucene.index.RandomIndexWriter;
@@ -299,8 +300,8 @@ public class TestPayloadTermQuery extend
//Make everything else 1 so we see the effect of the payload
//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
@Override
- public float lengthNorm(String fieldName, int numTerms) {
- return 1;
+ public float computeNorm(String fieldName, FieldInvertState state) {
+ return state.getBoost();
}
@Override
Modified: lucene/dev/branches/realtime_search/lucene/src/test/org/apache/lucene/search/spans/TestNearSpansOrdered.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/test/org/apache/lucene/search/spans/TestNearSpansOrdered.java?rev=1058718&r1=1058717&r2=1058718&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/test/org/apache/lucene/search/spans/TestNearSpansOrdered.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/test/org/apache/lucene/search/spans/TestNearSpansOrdered.java Thu Jan 13 19:53:21 2011
@@ -21,6 +21,7 @@ import org.apache.lucene.analysis.MockAn
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.index.SlowMultiReaderWrapper;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
@@ -30,6 +31,7 @@ import org.apache.lucene.search.Explanat
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Weight;
import org.apache.lucene.search.Scorer;
+import org.apache.lucene.search.Weight.ScorerContext;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
@@ -168,7 +170,8 @@ public class TestNearSpansOrdered extend
public void testSpanNearScorerSkipTo1() throws Exception {
SpanNearQuery q = makeQuery();
Weight w = q.weight(searcher);
- Scorer s = w.scorer(searcher.getIndexReader(), true, false);
+ assertTrue(searcher.getTopReaderContext().isAtomic);
+ Scorer s = w.scorer((AtomicReaderContext) searcher.getTopReaderContext(), ScorerContext.def());
assertEquals(1, s.advance(1));
}
/**
@@ -177,7 +180,8 @@ public class TestNearSpansOrdered extend
*/
public void testSpanNearScorerExplain() throws Exception {
SpanNearQuery q = makeQuery();
- Explanation e = q.weight(searcher).explain(searcher.getIndexReader(), 1);
+ assertTrue(searcher.getTopReaderContext().isAtomic);
+ Explanation e = q.weight(searcher).explain((AtomicReaderContext) searcher.getTopReaderContext(), 1);
assertTrue("Scorer explanation value for doc#1 isn't positive: "
+ e.toString(),
0.0f < e.getValue());
Modified: lucene/dev/branches/realtime_search/lucene/src/test/org/apache/lucene/search/spans/TestSpanMultiTermQueryWrapper.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/test/org/apache/lucene/search/spans/TestSpanMultiTermQueryWrapper.java?rev=1058718&r1=1058717&r2=1058718&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/test/org/apache/lucene/search/spans/TestSpanMultiTermQueryWrapper.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/test/org/apache/lucene/search/spans/TestSpanMultiTermQueryWrapper.java Thu Jan 13 19:53:21 2011
@@ -24,7 +24,6 @@ import org.apache.lucene.index.RandomInd
import org.apache.lucene.index.Term;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.IndexSearcher;
-import org.apache.lucene.search.Searcher;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
@@ -35,7 +34,7 @@ import org.apache.lucene.util.LuceneTest
public class TestSpanMultiTermQueryWrapper extends LuceneTestCase {
private Directory directory;
private IndexReader reader;
- private Searcher searcher;
+ private IndexSearcher searcher;
@Override
public void setUp() throws Exception {
Modified: lucene/dev/branches/realtime_search/lucene/src/test/org/apache/lucene/search/spans/TestSpans.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/test/org/apache/lucene/search/spans/TestSpans.java?rev=1058718&r1=1058717&r2=1058718&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/test/org/apache/lucene/search/spans/TestSpans.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/test/org/apache/lucene/search/spans/TestSpans.java Thu Jan 13 19:53:21 2011
@@ -18,18 +18,19 @@ package org.apache.lucene.search.spans;
*/
import org.apache.lucene.search.DocIdSetIterator;
-import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.CheckHits;
import org.apache.lucene.search.Similarity;
import org.apache.lucene.search.DefaultSimilarity;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.TermQuery;
-import org.apache.lucene.search.Searcher;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Weight.ScorerContext;
import org.apache.lucene.store.Directory;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.index.SlowMultiReaderWrapper;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.RandomIndexWriter;
@@ -416,12 +417,12 @@ public class TestSpans extends LuceneTes
slop,
ordered) {
@Override
- public Similarity getSimilarity(Searcher s) {
+ public Similarity getSimilarity(IndexSearcher s) {
return sim;
}
- };
+ };
- Scorer spanScorer = snq.weight(searcher).scorer(new SlowMultiReaderWrapper(searcher.getIndexReader()), true, false);
+ Scorer spanScorer = snq.weight(searcher).scorer(new AtomicReaderContext(new SlowMultiReaderWrapper(searcher.getIndexReader())), ScorerContext.def());
assertTrue("first doc", spanScorer.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
assertEquals("first doc number", spanScorer.docID(), 11);
@@ -439,7 +440,7 @@ public class TestSpans extends LuceneTes
}
// LUCENE-1404
- private int hitCount(Searcher searcher, String word) throws Throwable {
+ private int hitCount(IndexSearcher searcher, String word) throws Throwable {
return searcher.search(new TermQuery(new Term("text", word)), 10).totalHits;
}
Modified: lucene/dev/branches/realtime_search/lucene/src/test/org/apache/lucene/search/spans/TestSpansAdvanced.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/test/org/apache/lucene/search/spans/TestSpansAdvanced.java?rev=1058718&r1=1058717&r2=1058718&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/test/org/apache/lucene/search/spans/TestSpansAdvanced.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/test/org/apache/lucene/search/spans/TestSpansAdvanced.java Thu Jan 13 19:53:21 2011
@@ -134,7 +134,7 @@ public class TestSpansAdvanced extends L
*
* @throws IOException
*/
- protected static void assertHits(Searcher s, Query query,
+ protected static void assertHits(IndexSearcher s, Query query,
final String description, final String[] expectedIds,
final float[] expectedScores) throws IOException {
QueryUtils.check(random, query, s);
Modified: lucene/dev/branches/realtime_search/lucene/src/test/org/apache/lucene/store/TestRAMDirectory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/test/org/apache/lucene/store/TestRAMDirectory.java?rev=1058718&r1=1058717&r2=1058718&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/test/org/apache/lucene/store/TestRAMDirectory.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/test/org/apache/lucene/store/TestRAMDirectory.java Thu Jan 13 19:53:21 2011
@@ -180,4 +180,22 @@ public class TestRAMDirectory extends Lu
}
dir.delete();
}
+
+ // LUCENE-2852
+ public void testSeekToEOFThenBack() throws Exception {
+ RAMDirectory dir = new RAMDirectory();
+
+ IndexOutput o = dir.createOutput("out");
+ byte[] bytes = new byte[3*RAMInputStream.BUFFER_SIZE];
+ o.writeBytes(bytes, 0, bytes.length);
+ o.close();
+
+ IndexInput i = dir.openInput("out");
+ i.seek(2*RAMInputStream.BUFFER_SIZE-1);
+ i.seek(3*RAMInputStream.BUFFER_SIZE);
+ i.seek(RAMInputStream.BUFFER_SIZE);
+ i.readBytes(bytes, 0, 2*RAMInputStream.BUFFER_SIZE);
+ i.close();
+ dir.close();
+ }
}
Modified: lucene/dev/branches/realtime_search/lucene/src/test/org/apache/lucene/util/LineFileDocs.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/test/org/apache/lucene/util/LineFileDocs.java?rev=1058718&r1=1058717&r2=1058718&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/test/org/apache/lucene/util/LineFileDocs.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/test/org/apache/lucene/util/LineFileDocs.java Thu Jan 13 19:53:21 2011
@@ -18,6 +18,7 @@ package org.apache.lucene.util;
*/
import java.io.Closeable;
+import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.BufferedReader;
@@ -26,6 +27,7 @@ import java.io.InputStream;
import java.io.BufferedInputStream;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.zip.GZIPInputStream;
+import java.util.Random;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
@@ -36,21 +38,19 @@ import org.apache.lucene.document.Field;
public class LineFileDocs implements Closeable {
private BufferedReader reader;
- private final boolean forever;
private final static int BUFFER_SIZE = 1 << 16; // 64K
private final AtomicInteger id = new AtomicInteger();
private final String path;
// If forever is true, we rewind the file at EOF (repeat
// the docs over and over)
- public LineFileDocs(String path, boolean forever) throws IOException {
+ public LineFileDocs(Random random, String path) throws IOException {
this.path = path;
- this.forever = forever;
- open();
+ open(random);
}
- public LineFileDocs(boolean forever) throws IOException {
- this(LuceneTestCase.TEST_LINE_DOCS_FILE, forever);
+ public LineFileDocs(Random random) throws IOException {
+ this(random, LuceneTestCase.TEST_LINE_DOCS_FILE);
}
public synchronized void close() throws IOException {
@@ -60,22 +60,49 @@ public class LineFileDocs implements Clo
}
}
- private synchronized void open() throws IOException {
+ private synchronized void open(Random random) throws IOException {
InputStream is = getClass().getResourceAsStream(path);
if (is == null) {
// if its not in classpath, we load it as absolute filesystem path (e.g. Hudson's home dir)
is = new FileInputStream(path);
}
+ File file = new File(path);
+ long size;
+ if (file.exists()) {
+ size = file.length();
+ } else {
+ size = is.available();
+ }
if (path.endsWith(".gz")) {
is = new GZIPInputStream(is);
+ // guestimate:
+ size *= 2.8;
}
+
final InputStream in = new BufferedInputStream(is, BUFFER_SIZE);
reader = new BufferedReader(new InputStreamReader(in, "UTF-8"), BUFFER_SIZE);
+
+ // Override sizes for currently "known" line files:
+ if (path.equals("europarl.lines.txt.gz")) {
+ size = 15129506L;
+ } else if (path.equals("/home/hudson/lucene-data/enwiki.random.lines.txt.gz")) {
+ size = 3038178822L;
+ }
+
+ // Randomly seek to starting point:
+ if (random != null && size > 3) {
+ final long seekTo = (random.nextLong()&Long.MAX_VALUE) % (size/3);
+ if (LuceneTestCase.VERBOSE) {
+ System.out.println("TEST: LineFileDocs: seek to fp=" + seekTo + " on open");
+ }
+ reader.skip(seekTo);
+ reader.readLine();
+ }
}
- public synchronized void reset() throws IOException {
+ public synchronized void reset(Random random) throws IOException {
close();
- open();
+ open(random);
id.set(0);
}
@@ -117,15 +144,13 @@ public class LineFileDocs implements Clo
synchronized(this) {
line = reader.readLine();
if (line == null) {
- if (forever) {
- if (LuceneTestCase.VERBOSE) {
- System.out.println("TEST: LineFileDocs: now rewind file...");
- }
- close();
- open();
- line = reader.readLine();
+ // Always rewind at end:
+ if (LuceneTestCase.VERBOSE) {
+ System.out.println("TEST: LineFileDocs: now rewind file...");
}
- return null;
+ close();
+ open(null);
+ line = reader.readLine();
}
}
Modified: lucene/dev/branches/realtime_search/lucene/src/test/org/apache/lucene/util/LuceneTestCase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/test/org/apache/lucene/util/LuceneTestCase.java?rev=1058718&r1=1058717&r2=1058718&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/test/org/apache/lucene/util/LuceneTestCase.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/test/org/apache/lucene/util/LuceneTestCase.java Thu Jan 13 19:53:21 2011
@@ -61,6 +61,7 @@ import org.apache.lucene.index.codecs.Co
import org.apache.lucene.index.codecs.mockintblock.MockFixedIntBlockCodec;
import org.apache.lucene.index.codecs.mockintblock.MockVariableIntBlockCodec;
import org.apache.lucene.index.codecs.mocksep.MockSepCodec;
+import org.apache.lucene.index.codecs.mockrandom.MockRandomCodec;
import org.apache.lucene.index.codecs.preflex.PreFlexCodec;
import org.apache.lucene.index.codecs.preflexrw.PreFlexRWCodec;
import org.apache.lucene.index.codecs.pulsing.PulsingCodec;
@@ -226,7 +227,7 @@ public abstract class LuceneTestCase ext
private static Map<MockDirectoryWrapper,StackTraceElement[]> stores;
- private static final String[] TEST_CODECS = new String[] {"MockSep", "MockFixedIntBlock", "MockVariableIntBlock"};
+ private static final String[] TEST_CODECS = new String[] {"MockSep", "MockFixedIntBlock", "MockVariableIntBlock", "MockRandom"};
private static void swapCodec(Codec c, CodecProvider cp) {
Codec prior = null;
@@ -279,6 +280,7 @@ public abstract class LuceneTestCase ext
swapCodec(new MockFixedIntBlockCodec(codecHasParam && "MockFixedIntBlock".equals(codec) ? codecParam : _TestUtil.nextInt(random, 1, 2000)), cp);
// baseBlockSize cannot be over 127:
swapCodec(new MockVariableIntBlockCodec(codecHasParam && "MockVariableIntBlock".equals(codec) ? codecParam : _TestUtil.nextInt(random, 1, 127)), cp);
+ swapCodec(new MockRandomCodec(random), cp);
return cp.lookup(codec);
}
@@ -295,9 +297,9 @@ public abstract class LuceneTestCase ext
cp.unregister(cp.lookup("MockSep"));
cp.unregister(cp.lookup("MockFixedIntBlock"));
cp.unregister(cp.lookup("MockVariableIntBlock"));
+ cp.unregister(cp.lookup("MockRandom"));
swapCodec(new PulsingCodec(1), cp);
cp.setDefaultFieldCodec(savedDefaultCodec);
-
}
// randomly picks from core and test codecs
@@ -415,6 +417,16 @@ public abstract class LuceneTestCase ext
if (testsFailed) {
System.err.println("NOTE: all tests run in this JVM:");
System.err.println(Arrays.toString(testClassesRun.toArray()));
+ System.err.println("NOTE: " + System.getProperty("os.name") + " "
+ + System.getProperty("os.version") + " "
+ + System.getProperty("os.arch") + "/"
+ + System.getProperty("java.vendor") + " "
+ + System.getProperty("java.version") + " "
+ + (Constants.JRE_IS_64BIT ? "(64-bit)" : "(32-bit)") + "/"
+ + "cpus=" + Runtime.getRuntime().availableProcessors() + ","
+ + "threads=" + Thread.activeCount() + ","
+ + "free=" + Runtime.getRuntime().freeMemory() + ","
+ + "total=" + Runtime.getRuntime().totalMemory());
}
}
@@ -1134,8 +1146,15 @@ public abstract class LuceneTestCase ext
@Override
protected void runChild(FrameworkMethod arg0, RunNotifier arg1) {
- for (int i = 0; i < TEST_ITER; i++)
+ if (VERBOSE) {
+ System.out.println("\nNOTE: running test " + arg0.getName());
+ }
+ for (int i = 0; i < TEST_ITER; i++) {
+ if (VERBOSE && TEST_ITER > 1) {
+ System.out.println("\nNOTE: running iter=" + (1+i) + " of " + TEST_ITER);
+ }
super.runChild(arg0, arg1);
+ }
}
public LuceneTestCaseRunner(Class<?> clazz) throws InitializationError {
Modified: lucene/dev/branches/realtime_search/lucene/src/test/org/apache/lucene/util/automaton/fst/TestFSTs.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/test/org/apache/lucene/util/automaton/fst/TestFSTs.java?rev=1058718&r1=1058717&r2=1058718&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/test/org/apache/lucene/util/automaton/fst/TestFSTs.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/test/org/apache/lucene/util/automaton/fst/TestFSTs.java Thu Jan 13 19:53:21 2011
@@ -20,13 +20,13 @@ package org.apache.lucene.util.automaton
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
-import java.io.InputStreamReader;
import java.io.IOException;
+import java.io.InputStreamReader;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Arrays;
-import java.util.HashMap;
import java.util.Collections;
+import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
@@ -40,6 +40,7 @@ import org.apache.lucene.index.IndexRead
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.MultiFields;
+import org.apache.lucene.index.OrdTermState;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.index.codecs.CodecProvider;
@@ -122,8 +123,10 @@ public class TestFSTs extends LuceneTest
}
public void testBasicFSA() throws IOException {
- String[] strings = new String[] {"station", "commotion", "elation", "elastic", "plastic", "stop", "ftop", "ftation"};
+ String[] strings = new String[] {"station", "commotion", "elation", "elastic", "plastic", "stop", "ftop", "ftation", "stat"};
+ String[] strings2 = new String[] {"station", "commotion", "elation", "elastic", "plastic", "stop", "ftop", "ftation"};
IntsRef[] terms = new IntsRef[strings.length];
+ IntsRef[] terms2 = new IntsRef[strings2.length];
for(int inputMode=0;inputMode<2;inputMode++) {
if (VERBOSE) {
System.out.println("TEST: inputMode=" + inputModeToString(inputMode));
@@ -132,6 +135,10 @@ public class TestFSTs extends LuceneTest
for(int idx=0;idx<strings.length;idx++) {
terms[idx] = toIntsRef(strings[idx], inputMode);
}
+ for(int idx=0;idx<strings2.length;idx++) {
+ terms2[idx] = toIntsRef(strings2[idx], inputMode);
+ }
+ Arrays.sort(terms2);
doTest(inputMode, terms);
@@ -141,8 +148,8 @@ public class TestFSTs extends LuceneTest
{
final Outputs<Object> outputs = NoOutputs.getSingleton();
final Object NO_OUTPUT = outputs.getNoOutput();
- final List<FSTTester.InputOutput<Object>> pairs = new ArrayList<FSTTester.InputOutput<Object>>(terms.length);
- for(IntsRef term : terms) {
+ final List<FSTTester.InputOutput<Object>> pairs = new ArrayList<FSTTester.InputOutput<Object>>(terms2.length);
+ for(IntsRef term : terms2) {
pairs.add(new FSTTester.InputOutput<Object>(term, NO_OUTPUT));
}
FST<Object> fst = new FSTTester<Object>(random, dir, inputMode, pairs, outputs).doTest(0, 0);
@@ -154,9 +161,9 @@ public class TestFSTs extends LuceneTest
// FST ord pos int
{
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
- final List<FSTTester.InputOutput<Long>> pairs = new ArrayList<FSTTester.InputOutput<Long>>(terms.length);
- for(int idx=0;idx<terms.length;idx++) {
- pairs.add(new FSTTester.InputOutput<Long>(terms[idx], outputs.get(idx)));
+ final List<FSTTester.InputOutput<Long>> pairs = new ArrayList<FSTTester.InputOutput<Long>>(terms2.length);
+ for(int idx=0;idx<terms2.length;idx++) {
+ pairs.add(new FSTTester.InputOutput<Long>(terms2[idx], outputs.get(idx)));
}
final FST<Long> fst = new FSTTester<Long>(random, dir, inputMode, pairs, outputs).doTest(0, 0);
assertNotNull(fst);
@@ -168,10 +175,10 @@ public class TestFSTs extends LuceneTest
{
final ByteSequenceOutputs outputs = ByteSequenceOutputs.getSingleton();
final BytesRef NO_OUTPUT = outputs.getNoOutput();
- final List<FSTTester.InputOutput<BytesRef>> pairs = new ArrayList<FSTTester.InputOutput<BytesRef>>(terms.length);
- for(int idx=0;idx<terms.length;idx++) {
+ final List<FSTTester.InputOutput<BytesRef>> pairs = new ArrayList<FSTTester.InputOutput<BytesRef>>(terms2.length);
+ for(int idx=0;idx<terms2.length;idx++) {
final BytesRef output = random.nextInt(30) == 17 ? NO_OUTPUT : new BytesRef(Integer.toString(idx));
- pairs.add(new FSTTester.InputOutput<BytesRef>(terms[idx], output));
+ pairs.add(new FSTTester.InputOutput<BytesRef>(terms2[idx], output));
}
final FST<BytesRef> fst = new FSTTester<BytesRef>(random, dir, inputMode, pairs, outputs).doTest(0, 0);
assertNotNull(fst);
@@ -322,19 +329,6 @@ public class TestFSTs extends LuceneTest
}
}
- private String getRandomString() {
- final String term;
- if (random.nextBoolean()) {
- term = _TestUtil.randomRealisticUnicodeString(random);
- } else {
- // we want to mix in limited-alphabet symbols so
- // we get more sharing of the nodes given how few
- // terms we are testing...
- term = simpleRandomString(random);
- }
- return term;
- }
-
public void doTest() throws IOException {
// no pruning
doTest(0, 0);
@@ -346,156 +340,83 @@ public class TestFSTs extends LuceneTest
doTest(0, _TestUtil.nextInt(random, 1, 1+pairs.size()));
}
- // NOTE: only copies the stuff this test needs!!
- private FST.Arc<T> copyArc(FST.Arc<T> arc) {
- final FST.Arc<T> copy = new FST.Arc<T>();
- copy.label = arc.label;
- copy.target = arc.target;
- copy.output = arc.output;
- copy.nextFinalOutput = arc.nextFinalOutput;
- return arc;
- }
-
// runs the term, returning the output, or null if term
- // isn't accepted. if stopNode is non-null it must be
- // length 2 int array; stopNode[0] will be the last
- // matching node (-1 if the term is accepted)
- // and stopNode[1] will be the length of the
- // term prefix that matches
- private T run(FST<T> fst, IntsRef term, int[] stopNode) throws IOException {
- if (term.length == 0) {
- final T output = fst.getEmptyOutput();
- if (stopNode != null) {
- stopNode[1] = 0;
- if (output != null) {
- // accepted
- stopNode[0] = -1;
- } else {
- stopNode[0] = fst.getStartNode();
- }
+ // isn't accepted. if prefixLength is non-null it must be
+ // length 1 int array; prefixLength[0] is set to the length
+ // of the term prefix that matches
+ private T run(FST<T> fst, IntsRef term, int[] prefixLength) throws IOException {
+ assert prefixLength == null || prefixLength.length == 1;
+ final FST.Arc<T> arc = fst.getFirstArc(new FST.Arc<T>());
+ final T NO_OUTPUT = fst.outputs.getNoOutput();
+ T output = NO_OUTPUT;
+
+ for(int i=0;i<=term.length;i++) {
+ final int label;
+ if (i == term.length) {
+ label = FST.END_LABEL;
+ } else {
+ label = term.ints[term.offset+i];
}
- return output;
- }
-
- final FST.Arc<T> arc = new FST.Arc<T>();
- int node = fst.getStartNode();
- int lastNode = -1;
- T output = fst.outputs.getNoOutput();
- //System.out.println("match?");
- for(int i=0;i<term.length;i++) {
- //System.out.println(" int=" + term.ints[i]);
- if (!fst.hasArcs(node)) {
- //System.out.println(" no arcs!");
- // hit end node before term's end
- if (stopNode != null) {
- stopNode[0] = lastNode;
- stopNode[1] = i-1;
+ //System.out.println(" loop i=" + i + " label=" + label + " output=" + fst.outputs.outputToString(output) + " curArc: target=" + arc.target + " isFinal?=" + arc.isFinal());
+ if (fst.findTargetArc(label, arc, arc) == null) {
+ if (prefixLength != null) {
+ prefixLength[0] = i;
return output;
} else {
return null;
}
}
-
- if (fst.findArc(node, term.ints[term.offset + i], arc) != null) {
- node = arc.target;
- //System.out.println(" match final?=" + arc.isFinal());
- if (arc.output != fst.outputs.getNoOutput()) {
- output = fst.outputs.add(output, arc.output);
- }
- } else if (stopNode != null) {
- stopNode[0] = node;
- stopNode[1] = i;
- return output;
- } else {
- //System.out.println(" no match");
- return null;
- }
-
- lastNode = node;
- }
-
- if (!arc.isFinal()) {
- // hit term's end before end node
- if (stopNode != null) {
- stopNode[0] = node;
- stopNode[1] = term.length;
- return output;
- } else {
- return null;
- }
+ output = fst.outputs.add(output, arc.output);
}
- if (arc.nextFinalOutput != fst.outputs.getNoOutput()) {
- output = fst.outputs.add(output, arc.nextFinalOutput);
+ if (prefixLength != null) {
+ prefixLength[0] = term.length;
}
- if (stopNode != null) {
- stopNode[0] = -1;
- stopNode[1] = term.length;
- }
return output;
}
private T randomAcceptedWord(FST<T> fst, IntsRef in) throws IOException {
- int node = fst.getStartNode();
+ FST.Arc<T> arc = fst.getFirstArc(new FST.Arc<T>());
- if (fst.noNodes()) {
- // degenerate FST: only accepts the empty string
- assertTrue(fst.getEmptyOutput() != null);
- in.length = 0;
- return fst.getEmptyOutput();
- }
final List<FST.Arc<T>> arcs = new ArrayList<FST.Arc<T>>();
in.length = 0;
in.offset = 0;
- T output = fst.outputs.getNoOutput();
- //System.out.println("get random");
+ final T NO_OUTPUT = fst.outputs.getNoOutput();
+ T output = NO_OUTPUT;
+
while(true) {
// read all arcs:
- //System.out.println(" n=" + node);
- int arcAddress = node;
- FST.Arc<T> arc = new FST.Arc<T>();
- fst.readFirstArc(arcAddress, arc);
- arcs.add(copyArc(arc));
+ fst.readFirstTargetArc(arc, arc);
+ arcs.add(new FST.Arc<T>().copyFrom(arc));
while(!arc.isLast()) {
fst.readNextArc(arc);
- arcs.add(copyArc(arc));
+ arcs.add(new FST.Arc<T>().copyFrom(arc));
}
// pick one
arc = arcs.get(random.nextInt(arcs.size()));
-
arcs.clear();
+ // accumulate output
+ output = fst.outputs.add(output, arc.output);
+
// append label
+ if (arc.label == FST.END_LABEL) {
+ break;
+ }
+
if (in.ints.length == in.length) {
in.grow(1+in.length);
}
in.ints[in.length++] = arc.label;
-
- output = fst.outputs.add(output, arc.output);
-
- // maybe stop
- if (arc.isFinal()) {
- if (fst.hasArcs(arc.target)) {
- // final state but it also has outgoing edges
- if (random.nextBoolean()) {
- output = fst.outputs.add(output, arc.nextFinalOutput);
- break;
- }
- } else {
- break;
- }
- }
-
- node = arc.target;
}
return output;
}
- private FST<T> doTest(int prune1, int prune2) throws IOException {
+ FST<T> doTest(int prune1, int prune2) throws IOException {
if (VERBOSE) {
System.out.println("TEST: prune1=" + prune1 + " prune2=" + prune2);
}
@@ -524,7 +445,7 @@ public class TestFSTs extends LuceneTest
if (VERBOSE && pairs.size() <= 20 && fst != null) {
PrintStream ps = new PrintStream("out.dot");
- fst.toDot(ps);
+ Util.toDot(fst, ps);
ps.close();
System.out.println("SAVED out.dot");
}
@@ -566,11 +487,19 @@ public class TestFSTs extends LuceneTest
assertNotNull(fst);
- // make sure all words are accepted
+ // visit valid paris in order -- make sure all words
+ // are accepted, and FSTEnum's next() steps through
+ // them correctly
+ if (VERBOSE) {
+ System.out.println("TEST: check valid terms/next()");
+ }
{
IntsRefFSTEnum<T> fstEnum = new IntsRefFSTEnum<T>(fst);
for(InputOutput<T> pair : pairs) {
IntsRef term = pair.input;
+ if (VERBOSE) {
+ System.out.println("TEST: check term=" + inputToString(inputMode, term) + " output=" + fst.outputs.outputToString(pair.output));
+ }
Object output = run(fst, term, null);
assertNotNull("term " + inputToString(inputMode, term) + " is not accepted", output);
@@ -578,8 +507,8 @@ public class TestFSTs extends LuceneTest
// verify enum's next
IntsRefFSTEnum.InputOutput<T> t = fstEnum.next();
-
- assertEquals(term, t.input);
+ assertNotNull(t);
+ assertEquals("expected input=" + inputToString(inputMode, term) + " but fstEnum returned " + inputToString(inputMode, t.input), term, t.input);
assertEquals(pair.output, t.output);
}
assertNull(fstEnum.next());
@@ -591,6 +520,9 @@ public class TestFSTs extends LuceneTest
}
// find random matching word and make sure it's valid
+ if (VERBOSE) {
+ System.out.println("TEST: verify random accepted terms");
+ }
final IntsRef scratch = new IntsRef(10);
for(int iter=0;iter<500*RANDOM_MULTIPLIER;iter++) {
T output = randomAcceptedWord(fst, scratch);
@@ -598,10 +530,15 @@ public class TestFSTs extends LuceneTest
assertEquals(termsMap.get(scratch), output);
}
- // test single IntsRefFSTEnum.advance:
- //System.out.println("TEST: verify advance");
+ // test IntsRefFSTEnum.seek:
+ if (VERBOSE) {
+ System.out.println("TEST: verify seek");
+ }
+ IntsRefFSTEnum<T> fstEnum = new IntsRefFSTEnum<T>(fst);
for(int iter=0;iter<100*RANDOM_MULTIPLIER;iter++) {
- final IntsRefFSTEnum<T> fstEnum = new IntsRefFSTEnum<T>(fst);
+ if (VERBOSE) {
+ System.out.println("TEST: iter=" + iter);
+ }
if (random.nextBoolean()) {
// seek to term that doesn't exist:
while(true) {
@@ -611,15 +548,35 @@ public class TestFSTs extends LuceneTest
pos = -(pos+1);
// ok doesn't exist
//System.out.println(" seek " + inputToString(inputMode, term));
- final IntsRefFSTEnum.InputOutput<T> seekResult = fstEnum.advance(term);
- if (pos < pairs.size()) {
+ final IntsRefFSTEnum.InputOutput<T> seekResult;
+ if (random.nextBoolean()) {
+ if (VERBOSE) {
+ System.out.println(" do non-exist seekFloor term=" + inputToString(inputMode, term));
+ }
+ seekResult = fstEnum.seekFloor(term);
+ pos--;
+ } else {
+ if (VERBOSE) {
+ System.out.println(" do non-exist seekCeil term=" + inputToString(inputMode, term));
+ }
+ seekResult = fstEnum.seekCeil(term);
+ }
+
+ if (pos != -1 && pos < pairs.size()) {
//System.out.println(" got " + inputToString(inputMode,seekResult.input) + " output=" + fst.outputs.outputToString(seekResult.output));
- assertEquals(pairs.get(pos).input, seekResult.input);
+ assertNotNull("got null but expected term=" + inputToString(inputMode, pairs.get(pos).input), seekResult);
+ if (VERBOSE) {
+ System.out.println(" got " + inputToString(inputMode, seekResult.input));
+ }
+ assertEquals("expected " + inputToString(inputMode, pairs.get(pos).input) + " but got " + inputToString(inputMode, seekResult.input), pairs.get(pos).input, seekResult.input);
assertEquals(pairs.get(pos).output, seekResult.output);
} else {
- // seeked beyond end
+ // seeked before start or beyond end
//System.out.println("seek=" + seekTerm);
assertNull("expected null but got " + (seekResult==null ? "null" : inputToString(inputMode, seekResult.input)), seekResult);
+ if (VERBOSE) {
+ System.out.println(" got null");
+ }
}
break;
@@ -627,24 +584,36 @@ public class TestFSTs extends LuceneTest
}
} else {
// seek to term that does exist:
- InputOutput pair = pairs.get(random.nextInt(pairs.size()));
- //System.out.println(" seek " + inputToString(inputMode, pair.input));
- final IntsRefFSTEnum.InputOutput<T> seekResult = fstEnum.advance(pair.input);
- assertEquals(pair.input, seekResult.input);
+ InputOutput<T> pair = pairs.get(random.nextInt(pairs.size()));
+ final IntsRefFSTEnum.InputOutput<T> seekResult;
+ if (random.nextBoolean()) {
+ if (VERBOSE) {
+ System.out.println(" do exists seekFloor " + inputToString(inputMode, pair.input));
+ }
+ seekResult = fstEnum.seekFloor(pair.input);
+ } else {
+ if (VERBOSE) {
+ System.out.println(" do exists seekCeil " + inputToString(inputMode, pair.input));
+ }
+ seekResult = fstEnum.seekCeil(pair.input);
+ }
+ assertNotNull(seekResult);
+ assertEquals("got " + inputToString(inputMode, seekResult.input) + " but expected " + inputToString(inputMode, pair.input), pair.input, seekResult.input);
assertEquals(pair.output, seekResult.output);
}
}
if (VERBOSE) {
- System.out.println("TEST: mixed next/advance");
+ System.out.println("TEST: mixed next/seek");
}
- // test mixed next/advance
+ // test mixed next/seek
for(int iter=0;iter<100*RANDOM_MULTIPLIER;iter++) {
if (VERBOSE) {
System.out.println("TEST: iter " + iter);
}
- final IntsRefFSTEnum<T> fstEnum = new IntsRefFSTEnum<T>(fst);
+ // reset:
+ fstEnum = new IntsRefFSTEnum<T>(fst);
int upto = -1;
while(true) {
boolean isDone = false;
@@ -660,13 +629,24 @@ public class TestFSTs extends LuceneTest
for(;attempt<10;attempt++) {
IntsRef term = toIntsRef(getRandomString(), inputMode);
if (!termsMap.containsKey(term) && term.compareTo(pairs.get(upto).input) > 0) {
- if (VERBOSE) {
- System.out.println(" do non-exist advance(" + inputToString(inputMode, term) + "]");
- }
int pos = Collections.binarySearch(pairs, new InputOutput<T>(term, null));
assert pos < 0;
upto = -(pos+1);
- isDone = fstEnum.advance(term) == null;
+
+ if (random.nextBoolean()) {
+ upto--;
+ assertTrue(upto != -1);
+ if (VERBOSE) {
+ System.out.println(" do non-exist seekFloor(" + inputToString(inputMode, term) + ")");
+ }
+ isDone = fstEnum.seekFloor(term) == null;
+ } else {
+ if (VERBOSE) {
+ System.out.println(" do non-exist seekCeil(" + inputToString(inputMode, term) + ")");
+ }
+ isDone = fstEnum.seekCeil(term) == null;
+ }
+
break;
}
}
@@ -681,10 +661,17 @@ public class TestFSTs extends LuceneTest
upto = 0;
}
- if (VERBOSE) {
- System.out.println(" do advance(" + inputToString(inputMode, pairs.get(upto).input) + "]");
+ if (random.nextBoolean()) {
+ if (VERBOSE) {
+ System.out.println(" do advanceCeil(" + inputToString(inputMode, pairs.get(upto).input) + ")");
+ }
+ isDone = fstEnum.seekCeil(pairs.get(upto).input) == null;
+ } else {
+ if (VERBOSE) {
+ System.out.println(" do advanceFloor(" + inputToString(inputMode, pairs.get(upto).input) + ")");
+ }
+ isDone = fstEnum.seekFloor(pairs.get(upto).input) == null;
}
- isDone = fstEnum.advance(pairs.get(upto).input) == null;
}
if (VERBOSE) {
if (!isDone) {
@@ -701,6 +688,24 @@ public class TestFSTs extends LuceneTest
assertFalse(isDone);
assertEquals(pairs.get(upto).input, fstEnum.current().input);
assertEquals(pairs.get(upto).output, fstEnum.current().output);
+
+ /*
+ if (upto < pairs.size()-1) {
+ int tryCount = 0;
+ while(tryCount < 10) {
+ final IntsRef t = toIntsRef(getRandomString(), inputMode);
+ if (pairs.get(upto).input.compareTo(t) < 0) {
+ final boolean expected = t.compareTo(pairs.get(upto+1).input) < 0;
+ if (VERBOSE) {
+ System.out.println("TEST: call beforeNext(" + inputToString(inputMode, t) + "); current=" + inputToString(inputMode, pairs.get(upto).input) + " next=" + inputToString(inputMode, pairs.get(upto+1).input) + " expected=" + expected);
+ }
+ assertEquals(expected, fstEnum.beforeNext(t));
+ break;
+ }
+ tryCount++;
+ }
+ }
+ */
}
}
}
@@ -757,7 +762,9 @@ public class TestFSTs extends LuceneTest
}
}
- //System.out.println("TEST: now prune");
+ if (VERBOSE) {
+ System.out.println("TEST: now prune");
+ }
// prune 'em
final Iterator<Map.Entry<IntsRef,CountMinOutput<T>>> it = prefixes.entrySet().iterator();
@@ -765,7 +772,9 @@ public class TestFSTs extends LuceneTest
Map.Entry<IntsRef,CountMinOutput<T>> ent = it.next();
final IntsRef prefix = ent.getKey();
final CountMinOutput<T> cmo = ent.getValue();
- //System.out.println(" term=" + inputToString(inputMode, prefix) + " count=" + cmo.count + " isLeaf=" + cmo.isLeaf);
+ if (VERBOSE) {
+ System.out.println(" term=" + inputToString(inputMode, prefix) + " count=" + cmo.count + " isLeaf=" + cmo.isLeaf + " output=" + outputs.outputToString(cmo.output) + " isFinal=" + cmo.isFinal);
+ }
final boolean keep;
if (prune1 > 0) {
keep = cmo.count >= prune1;
@@ -824,14 +833,20 @@ public class TestFSTs extends LuceneTest
assertNotNull(fst);
// make sure FST only enums valid prefixes
+ if (VERBOSE) {
+ System.out.println("TEST: check pruned enum");
+ }
IntsRefFSTEnum<T> fstEnum = new IntsRefFSTEnum<T>(fst);
- IntsRefFSTEnum.InputOutput current;
+ IntsRefFSTEnum.InputOutput<T> current;
while((current = fstEnum.next()) != null) {
- //System.out.println(" fst enum term=" + inputToString(inputMode, current.input) + " output=" + outputs.outputToString(current.output));
+ if (VERBOSE) {
+ System.out.println(" fstEnum.next term=" + inputToString(inputMode, current.input) + " output=" + outputs.outputToString(current.output));
+ }
final CountMinOutput cmo = prefixes.get(current.input);
assertNotNull(cmo);
assertTrue(cmo.isLeaf || cmo.isFinal);
- if (cmo.isFinal && !cmo.isLeaf) {
+ //if (cmo.isFinal && !cmo.isLeaf) {
+ if (cmo.isFinal) {
assertEquals(cmo.finalOutput, current.output);
} else {
assertEquals(cmo.output, current.output);
@@ -839,19 +854,24 @@ public class TestFSTs extends LuceneTest
}
// make sure all non-pruned prefixes are present in the FST
- final int[] stopNode = new int[2];
+ if (VERBOSE) {
+ System.out.println("TEST: verify all prefixes");
+ }
+ final int[] stopNode = new int[1];
for(Map.Entry<IntsRef,CountMinOutput<T>> ent : prefixes.entrySet()) {
if (ent.getKey().length > 0) {
final CountMinOutput<T> cmo = ent.getValue();
final T output = run(fst, ent.getKey(), stopNode);
- //System.out.println(" term=" + inputToString(inputMode, ent.getKey()) + " output=" + outputs.outputToString(cmo.output));
+ if (VERBOSE) {
+ System.out.println("TEST: verify term=" + inputToString(inputMode, ent.getKey()) + " output=" + outputs.outputToString(cmo.output));
+ }
// if (cmo.isFinal && !cmo.isLeaf) {
if (cmo.isFinal) {
assertEquals(cmo.finalOutput, output);
} else {
assertEquals(cmo.output, output);
}
- assertEquals(ent.getKey().length, stopNode[1]);
+ assertEquals(ent.getKey().length, stopNode[0]);
}
}
}
@@ -859,7 +879,7 @@ public class TestFSTs extends LuceneTest
public void testRandomWords() throws IOException {
testRandomWords(1000, 5 * RANDOM_MULTIPLIER);
- //testRandomWords(10, 100);
+ //testRandomWords(20, 100);
}
private String inputModeToString(int mode) {
@@ -888,7 +908,7 @@ public class TestFSTs extends LuceneTest
}
}
- private String getRandomString() {
+ static String getRandomString() {
final String term;
if (random.nextBoolean()) {
term = _TestUtil.randomRealisticUnicodeString(random);
@@ -909,10 +929,10 @@ public class TestFSTs extends LuceneTest
private static String inputToString(int inputMode, IntsRef term) {
if (inputMode == 0) {
// utf8
- return toBytesRef(term).utf8ToString();
+ return toBytesRef(term).utf8ToString() + " " + term;
} else {
// utf32
- return UnicodeUtil.newString(term.ints, term.offset, term.length);
+ return UnicodeUtil.newString(term.ints, term.offset, term.length) + " " + term;
}
}
@@ -925,12 +945,13 @@ public class TestFSTs extends LuceneTest
CodecProvider.getDefault().setDefaultFieldCodec("Standard");
}
- final LineFileDocs docs = new LineFileDocs(false);
+ final LineFileDocs docs = new LineFileDocs(random);
final int RUN_TIME_SEC = LuceneTestCase.TEST_NIGHTLY ? 100 : 1;
final IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMaxBufferedDocs(-1).setRAMBufferSizeMB(64);
final File tempDir = _TestUtil.getTempDir("fstlines");
final MockDirectoryWrapper dir = new MockDirectoryWrapper(random, FSDirectory.open(tempDir));
final IndexWriter writer = new IndexWriter(dir, conf);
+ writer.setInfoStream(VERBOSE ? System.out : null);
final long stopTime = System.currentTimeMillis() + RUN_TIME_SEC * 1000;
Document doc;
int docCount = 0;
@@ -986,18 +1007,17 @@ public class TestFSTs extends LuceneTest
// same:
final BytesRefFSTEnum<Long> fstEnum = new BytesRefFSTEnum<Long>(fst);
for(int iter=0;iter<1000*RANDOM_MULTIPLIER;iter++) {
- fstEnum.reset();
final BytesRef randomTerm = new BytesRef(getRandomString());
- final TermsEnum.SeekStatus seekResult = termsEnum.seek(randomTerm);
- final BytesRefFSTEnum.InputOutput fstSeekResult = fstEnum.advance(randomTerm);
-
if (VERBOSE) {
- System.out.println("TEST: seek " + randomTerm.utf8ToString());
+ System.out.println("TEST: seek " + randomTerm.utf8ToString() + " " + randomTerm);
}
+ final TermsEnum.SeekStatus seekResult = termsEnum.seek(randomTerm);
+ final BytesRefFSTEnum.InputOutput fstSeekResult = fstEnum.seekCeil(randomTerm);
+
if (seekResult == TermsEnum.SeekStatus.END) {
- assertNull(fstSeekResult);
+ assertNull("got " + (fstSeekResult == null ? "null" : fstSeekResult.input.utf8ToString()) + " but expected null", fstSeekResult);
} else {
assertSame(termsEnum, fstEnum, storeOrd);
for(int nextIter=0;nextIter<10;nextIter++) {
@@ -1011,6 +1031,9 @@ public class TestFSTs extends LuceneTest
assertNotNull(fstEnum.next());
assertSame(termsEnum, fstEnum, storeOrd);
} else {
+ if (VERBOSE) {
+ System.out.println(" end!");
+ }
BytesRefFSTEnum.InputOutput<Long> nextResult = fstEnum.next();
if (nextResult != null) {
System.out.println("expected null but got: input=" + nextResult.input.utf8ToString() + " output=" + outputs.outputToString(nextResult.output));
@@ -1032,7 +1055,8 @@ public class TestFSTs extends LuceneTest
if (termsEnum.term() == null) {
assertNull(fstEnum.current());
} else {
- assertEquals(termsEnum.term(), fstEnum.current().input);
+ assertNotNull(fstEnum.current());
+ assertEquals(termsEnum.term().utf8ToString() + " != " + fstEnum.current().input.utf8ToString(), termsEnum.term(), fstEnum.current().input);
if (storeOrd) {
// fst stored the ord
assertEquals(termsEnum.ord(), ((Long) fstEnum.current().output).longValue());
@@ -1095,7 +1119,7 @@ public class TestFSTs extends LuceneTest
System.out.println(ord + " terms; " + fst.getNodeCount() + " nodes; " + fst.getArcCount() + " arcs; " + fst.getArcWithOutputCount() + " arcs w/ output; tot size " + fst.sizeInBytes());
if (fst.getNodeCount() < 100) {
PrintStream ps = new PrintStream("out.dot");
- fst.toDot(ps);
+ Util.toDot(fst, ps);
ps.close();
System.out.println("Wrote FST to out.dot");
}
@@ -1121,7 +1145,7 @@ public class TestFSTs extends LuceneTest
}
toIntsRef(w, inputMode, intsRef);
T expected = getOutput(intsRef, ord);
- T actual = fst.get(intsRef);
+ T actual = Util.get(fst, intsRef);
if (actual == null) {
throw new RuntimeException("unexpected null output on input=" + w);
}
@@ -1233,4 +1257,57 @@ public class TestFSTs extends LuceneTest
}.run(limit);
}
}
+
+ public void testSingleString() throws Exception {
+ final Outputs<Object> outputs = NoOutputs.getSingleton();
+ final Builder<Object> b = new Builder<Object>(FST.INPUT_TYPE.BYTE1, 0, 0, true, outputs);
+ b.add(new BytesRef("foobar"), outputs.getNoOutput());
+ final BytesRefFSTEnum<Object> fstEnum = new BytesRefFSTEnum<Object>(b.finish());
+ assertNull(fstEnum.seekFloor(new BytesRef("foo")));
+ assertNull(fstEnum.seekCeil(new BytesRef("foobaz")));
+ }
+
+ public void testSimple() throws Exception {
+
+ // Get outputs -- passing true means FST will share
+ // (delta code) the outputs. This should result in
+ // smaller FST if the outputs grow monotonically. But
+ // if numbers are "random", false should give smaller
+ // final size:
+ final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
+
+ // Build an FST mapping BytesRef -> Long
+ final Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE1, 0, 0, true, outputs);
+
+ final BytesRef a = new BytesRef("a");
+ final BytesRef b = new BytesRef("b");
+ final BytesRef c = new BytesRef("c");
+
+ builder.add(a, outputs.get(17));
+ builder.add(b, outputs.get(42));
+ builder.add(c, outputs.get(13824324872317238L));
+
+ final FST<Long> fst = builder.finish();
+
+ assertEquals(13824324872317238L, (long) Util.get(fst, c));
+ assertEquals(42, (long) Util.get(fst, b));
+ assertEquals(17, (long) Util.get(fst, a));
+
+ BytesRefFSTEnum<Long> fstEnum = new BytesRefFSTEnum<Long>(fst);
+ BytesRefFSTEnum.InputOutput<Long> seekResult;
+ seekResult = fstEnum.seekFloor(a);
+ assertNotNull(seekResult);
+ assertEquals(17, (long) seekResult.output);
+
+ // goes to a
+ seekResult = fstEnum.seekFloor(new BytesRef("aa"));
+ assertNotNull(seekResult);
+ assertEquals(17, (long) seekResult.output);
+
+ // goes to b
+ seekResult = fstEnum.seekCeil(new BytesRef("aa"));
+ assertNotNull(seekResult);
+ assertEquals(b, seekResult.input);
+ assertEquals(42, (long) seekResult.output);
+ }
}
Modified: lucene/dev/branches/realtime_search/modules/analysis/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/modules/analysis/CHANGES.txt?rev=1058718&r1=1058717&r2=1058718&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/modules/analysis/CHANGES.txt (original)
+++ lucene/dev/branches/realtime_search/modules/analysis/CHANGES.txt Thu Jan 13 19:53:21 2011
@@ -9,12 +9,14 @@ API Changes
* LUCENE-2413: Removed the AnalyzerUtil in common/miscellaneous. (Robert Muir)
- * LUCENE-2167,LUCENE-2699,LUCENE-2763: StandardTokenizer/Analyzer in
- common/standard/ now implement the Word Break rules from the Unicode 6.0.0
- Text Segmentation algorithm (UAX#29).
+ * LUCENE-2167,LUCENE-2699,LUCENE-2763,LUCENE-2847: StandardTokenizer/Analyzer
+ in common/standard/ now implement the Word Break rules from the Unicode 6.0.0
+ Text Segmentation algorithm (UAX#29), covering the full range of Unicode code
+ points, including values from U+FFFF to U+10FFFF
- ClassicTokenizer/Analyzer retains the old StandardTokenizer/Analyzer
- implementation and behavior.
+ ClassicTokenizer/Analyzer retains the old (pre-Lucene 3.1) StandardTokenizer/
+ Analyzer implementation and behavior. Only the Unicode Basic Multilingual
+ Plane (code points from U+0000 to U+FFFF) is covered.
UAX29URLEmailTokenizer tokenizes URLs and E-mail addresses according to the
relevant RFCs, in addition to implementing the UAX#29 Word Break rules.
Modified: lucene/dev/branches/realtime_search/modules/analysis/common/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/modules/analysis/common/build.xml?rev=1058718&r1=1058717&r2=1058718&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/modules/analysis/common/build.xml (original)
+++ lucene/dev/branches/realtime_search/modules/analysis/common/build.xml Thu Jan 13 19:53:21 2011
@@ -38,8 +38,15 @@
<target name="compile-core" depends="jflex-notice, common.compile-core"/>
- <target name="jflex" depends="jflex-check,clean-jflex,jflex-StandardAnalyzer,jflex-UAX29URLEmailTokenizer,jflex-wiki-tokenizer"/>
+ <target name="jflex" depends="jflex-check,clean-jflex,gen-uax29-supp-macros,
+ jflex-StandardAnalyzer,jflex-UAX29URLEmailTokenizer,jflex-wiki-tokenizer"/>
+ <target name="gen-uax29-supp-macros">
+ <subant target="gen-uax29-supp-macros">
+ <fileset dir="../icu" includes="build.xml"/>
+ </subant>
+ </target>
+
<target name="jflex-wiki-tokenizer" depends="init,jflex-check" if="jflex.present">
<taskdef classname="jflex.anttask.JFlexTask" name="jflex">
<classpath refid="jflex.classpath"/>
Modified: lucene/dev/branches/realtime_search/modules/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseMinimalStemmer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/modules/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseMinimalStemmer.java?rev=1058718&r1=1058717&r2=1058718&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/modules/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseMinimalStemmer.java (original)
+++ lucene/dev/branches/realtime_search/modules/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseMinimalStemmer.java Thu Jan 13 19:53:21 2011
@@ -1,10 +1,5 @@
package org.apache.lucene.analysis.pt;
-import java.util.Arrays;
-
-import org.apache.lucene.analysis.util.CharArraySet;
-import org.apache.lucene.util.Version;
-
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -31,89 +26,14 @@ import org.apache.lucene.util.Version;
* which is just the plural reduction step of the RSLP
* algorithm from <i>A Stemming Algorithmm for the Portuguese Language</i>,
* Orengo et al.
+ * @see RSLPStemmerBase
*/
-public class PortugueseMinimalStemmer {
+public class PortugueseMinimalStemmer extends RSLPStemmerBase {
- private static final CharArraySet excIS = new CharArraySet(Version.LUCENE_31,
- Arrays.asList("lápis", "cais", "mais", "crúcis", "biquÃnis", "pois",
- "depois","dois","leis"),
- false);
-
- private static final CharArraySet excS = new CharArraySet(Version.LUCENE_31,
- Arrays.asList("aliás", "pires", "lápis", "cais", "mais", "mas", "menos",
- "férias", "fezes", "pêsames", "crúcis", "gás", "atrás", "moisés",
- "através", "convés", "ês", "paÃs", "após", "ambas", "ambos",
- "messias", "depois"),
- false);
+ private static final Step pluralStep =
+ parse(PortugueseMinimalStemmer.class, "portuguese.rslp").get("Plural");
public int stem(char s[], int len) {
- if (len < 3 || s[len-1] != 's')
- return len;
-
- if (s[len-2] == 'n') {
- len--;
- s[len-1] = 'm';
- return len;
- }
-
- if (len >= 6 && s[len-3] == 'õ' && s[len-2] == 'e') {
- len--;
- s[len-2] = 'ã';
- s[len-1] = 'o';
- return len;
- }
-
- if (len >= 4 && s[len-3] == 'ã' && s[len-2] == 'e')
- if (!(len == 4 && s[0] == 'm')) {
- len--;
- s[len-1] = 'o';
- return len;
- }
-
- if (len >= 4 && s[len-2] == 'i') {
- if (s[len-3] == 'a')
- if (!(len == 4 && (s[0] == 'c' || s[0] == 'm'))) {
- len--;
- s[len-1] = 'l';
- return len;
- }
-
- if (len >= 5 && s[len-3] == 'é') {
- len--;
- s[len-2] = 'e';
- s[len-1] = 'l';
- return len;
- }
-
- if (len >= 5 && s[len-3] == 'e') {
- len--;
- s[len-1] = 'l';
- return len;
- }
-
- if (len >= 5 && s[len-3] == 'ó') {
- len--;
- s[len-2] = 'o';
- s[len-1] = 'l';
- return len;
- }
-
- if (!excIS.contains(s, 0, len)) {
- s[len-1] = 'l';
- return len;
- }
- }
-
- if (len >= 6 && s[len-3] == 'l' && s[len-2] == 'e')
- return len - 2;
-
- if (len >= 6 && s[len-3] == 'r' && s[len-2] == 'e')
- if (!(len == 7 && s[0] == 'á' && s[1] == 'r' && s[2] == 'v' && s[3] == 'o'))
- return len - 2;
-
- if (excS.contains(s, 0, len))
- return len;
- else
- return len-1;
+ return pluralStep.apply(s, len);
}
}
Modified: lucene/dev/branches/realtime_search/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ASCIITLD.jflex-macro
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ASCIITLD.jflex-macro?rev=1058718&r1=1058717&r2=1058718&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ASCIITLD.jflex-macro (original)
+++ lucene/dev/branches/realtime_search/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ASCIITLD.jflex-macro Thu Jan 13 19:53:21 2011
@@ -15,8 +15,8 @@
*/
// Generated from IANA Root Zone Database <http://www.internic.net/zones/root.zone>
-// file version from Saturday, December 4, 2010 12:34:19 PM UTC
-// generated on Sunday, December 5, 2010 12:24:12 AM UTC
+// file version from Wednesday, January 5, 2011 12:34:09 PM UTC
+// generated on Thursday, January 6, 2011 5:09:41 AM UTC
// by org.apache.lucene.analysis.standard.GenerateJflexTLDMacros
ASCIITLD = "." (
@@ -306,6 +306,7 @@ ASCIITLD = "." (
| [xX][nN]--[pP]1[aA][iI]
| [xX][nN]--[pP][gG][bB][sS]0[dD][hH]
| [xX][nN]--[wW][gG][bB][hH]1[cC]
+ | [xX][nN]--[wW][gG][bB][lL]6[aA]
| [xX][nN]--[xX][kK][cC]2[aA][lL]3[hH][yY][eE]2[aA]
| [xX][nN]--[yY][gG][bB][iI]2[aA][mM][mM][xX]
| [xX][nN]--[zZ][cC][kK][zZ][aA][hH]