You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by no...@apache.org on 2014/06/19 18:25:36 UTC
svn commit: r1603938 [3/5] - in /lucene/dev/branches/solr-5473: ./
dev-tools/ lucene/ lucene/analysis/ lucene/analysis/common/
lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/
lucene/analysis/common/src/test/org/apache/lucene/analys...
Modified: lucene/dev/branches/solr-5473/lucene/core/src/test/org/apache/lucene/index/TestTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr-5473/lucene/core/src/test/org/apache/lucene/index/TestTermsEnum.java?rev=1603938&r1=1603937&r2=1603938&view=diff
==============================================================================
--- lucene/dev/branches/solr-5473/lucene/core/src/test/org/apache/lucene/index/TestTermsEnum.java (original)
+++ lucene/dev/branches/solr-5473/lucene/core/src/test/org/apache/lucene/index/TestTermsEnum.java Thu Jun 19 16:25:31 2014
@@ -33,9 +33,9 @@ import org.apache.lucene.util.LineFileDo
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
-import org.apache.lucene.util.automaton.Automaton;
-import org.apache.lucene.util.automaton.BasicAutomata;
+import org.apache.lucene.util.automaton.Automata;
import org.apache.lucene.util.automaton.CompiledAutomaton;
+import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.RegExp;
@SuppressCodecs({ "SimpleText", "Memory", "Direct" })
@@ -244,7 +244,7 @@ public class TestTermsEnum extends Lucen
if (VERBOSE) {
System.out.println("\nTEST: empty automaton");
}
- a = BasicAutomata.makeEmpty();
+ a = Automata.makeEmpty();
} else {
if (VERBOSE) {
System.out.println("\nTEST: keepPct=" + keepPct);
@@ -259,16 +259,9 @@ public class TestTermsEnum extends Lucen
acceptTerms.add(s2);
sortedAcceptTerms.add(new BytesRef(s2));
}
- a = BasicAutomata.makeStringUnion(sortedAcceptTerms);
+ a = Automata.makeStringUnion(sortedAcceptTerms);
}
- if (random().nextBoolean()) {
- if (VERBOSE) {
- System.out.println("TEST: reduce the automaton");
- }
- a.reduce();
- }
-
final CompiledAutomaton c = new CompiledAutomaton(a, true, false);
final BytesRef[] acceptTermsArray = new BytesRef[acceptTerms.size()];
@@ -745,7 +738,7 @@ public class TestTermsEnum extends Lucen
w.shutdown();
AtomicReader sub = getOnlySegmentReader(r);
Terms terms = sub.fields().terms("field");
- Automaton automaton = new RegExp(".*", RegExp.NONE).toAutomaton();
+ Automaton automaton = new RegExp(".*", RegExp.NONE).toAutomaton();
CompiledAutomaton ca = new CompiledAutomaton(automaton, false, false);
TermsEnum te = terms.intersect(ca, null);
assertEquals("aaa", te.next().utf8ToString());
Modified: lucene/dev/branches/solr-5473/lucene/core/src/test/org/apache/lucene/index/TestTermsEnum2.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr-5473/lucene/core/src/test/org/apache/lucene/index/TestTermsEnum2.java?rev=1603938&r1=1603937&r2=1603938&view=diff
==============================================================================
--- lucene/dev/branches/solr-5473/lucene/core/src/test/org/apache/lucene/index/TestTermsEnum2.java (original)
+++ lucene/dev/branches/solr-5473/lucene/core/src/test/org/apache/lucene/index/TestTermsEnum2.java Thu Jun 19 16:25:31 2014
@@ -31,6 +31,7 @@ import org.apache.lucene.index.TermsEnum
import org.apache.lucene.search.AutomatonQuery;
import org.apache.lucene.search.CheckHits;
import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
@@ -68,7 +69,7 @@ public class TestTermsEnum2 extends Luce
writer.addDocument(doc);
}
- termsAutomaton = BasicAutomata.makeStringUnion(terms);
+ termsAutomaton = Automata.makeStringUnion(terms);
reader = writer.getReader();
searcher = newSearcher(reader);
@@ -84,23 +85,27 @@ public class TestTermsEnum2 extends Luce
/** tests a pre-intersected automaton against the original */
public void testFiniteVersusInfinite() throws Exception {
+
for (int i = 0; i < numIterations; i++) {
String reg = AutomatonTestUtil.randomRegexp(random());
- Automaton automaton = new RegExp(reg, RegExp.NONE).toAutomaton();
+ Automaton automaton = Operations.determinize(new RegExp(reg, RegExp.NONE).toAutomaton());
final List<BytesRef> matchedTerms = new ArrayList<>();
for(BytesRef t : terms) {
- if (BasicOperations.run(automaton, t.utf8ToString())) {
+ if (Operations.run(automaton, t.utf8ToString())) {
matchedTerms.add(t);
}
}
- Automaton alternate = BasicAutomata.makeStringUnion(matchedTerms);
+ Automaton alternate = Automata.makeStringUnion(matchedTerms);
//System.out.println("match " + matchedTerms.size() + " " + alternate.getNumberOfStates() + " states, sigma=" + alternate.getStartPoints().length);
//AutomatonTestUtil.minimizeSimple(alternate);
//System.out.println("minmize done");
AutomatonQuery a1 = new AutomatonQuery(new Term("field", ""), automaton);
AutomatonQuery a2 = new AutomatonQuery(new Term("field", ""), alternate);
- CheckHits.checkEqual(a1, searcher.search(a1, 25).scoreDocs, searcher.search(a2, 25).scoreDocs);
+
+ ScoreDoc[] origHits = searcher.search(a1, 25).scoreDocs;
+ ScoreDoc[] newHits = searcher.search(a2, 25).scoreDocs;
+ CheckHits.checkEqual(a1, origHits, newHits);
}
}
@@ -108,13 +113,13 @@ public class TestTermsEnum2 extends Luce
public void testSeeking() throws Exception {
for (int i = 0; i < numIterations; i++) {
String reg = AutomatonTestUtil.randomRegexp(random());
- Automaton automaton = new RegExp(reg, RegExp.NONE).toAutomaton();
+ Automaton automaton = Operations.determinize(new RegExp(reg, RegExp.NONE).toAutomaton());
TermsEnum te = MultiFields.getTerms(reader, "field").iterator(null);
ArrayList<BytesRef> unsortedTerms = new ArrayList<>(terms);
Collections.shuffle(unsortedTerms, random());
for (BytesRef term : unsortedTerms) {
- if (BasicOperations.run(automaton, term.utf8ToString())) {
+ if (Operations.run(automaton, term.utf8ToString())) {
// term is accepted
if (random().nextBoolean()) {
// seek exact
@@ -153,16 +158,16 @@ public class TestTermsEnum2 extends Luce
for (int i = 0; i < numIterations; i++) {
String reg = AutomatonTestUtil.randomRegexp(random());
Automaton automaton = new RegExp(reg, RegExp.NONE).toAutomaton();
- CompiledAutomaton ca = new CompiledAutomaton(automaton, SpecialOperations.isFinite(automaton), false);
+ CompiledAutomaton ca = new CompiledAutomaton(automaton, Operations.isFinite(automaton), false);
TermsEnum te = MultiFields.getTerms(reader, "field").intersect(ca, null);
- Automaton expected = BasicOperations.intersection(termsAutomaton, automaton);
+ Automaton expected = Operations.determinize(Operations.intersection(termsAutomaton, automaton));
TreeSet<BytesRef> found = new TreeSet<>();
while (te.next() != null) {
found.add(BytesRef.deepCopyOf(te.term()));
}
- Automaton actual = BasicAutomata.makeStringUnion(found);
- assertTrue(BasicOperations.sameLanguage(expected, actual));
+ Automaton actual = Operations.determinize(Automata.makeStringUnion(found));
+ assertTrue(Operations.sameLanguage(expected, actual));
}
}
}
Modified: lucene/dev/branches/solr-5473/lucene/core/src/test/org/apache/lucene/search/TestAutomatonQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr-5473/lucene/core/src/test/org/apache/lucene/search/TestAutomatonQuery.java?rev=1603938&r1=1603937&r2=1603938&view=diff
==============================================================================
--- lucene/dev/branches/solr-5473/lucene/core/src/test/org/apache/lucene/search/TestAutomatonQuery.java (original)
+++ lucene/dev/branches/solr-5473/lucene/core/src/test/org/apache/lucene/search/TestAutomatonQuery.java Thu Jun 19 16:25:31 2014
@@ -33,10 +33,10 @@ import org.apache.lucene.store.Directory
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.Rethrow;
import org.apache.lucene.util.TestUtil;
-import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.AutomatonTestUtil;
-import org.apache.lucene.util.automaton.BasicAutomata;
-import org.apache.lucene.util.automaton.BasicOperations;
+import org.apache.lucene.util.automaton.Automata;
+import org.apache.lucene.util.automaton.Operations;
+import org.apache.lucene.util.automaton.Automaton;
public class TestAutomatonQuery extends LuceneTestCase {
private Directory directory;
@@ -106,24 +106,24 @@ public class TestAutomatonQuery extends
/**
* Test some very simple automata.
*/
- public void testBasicAutomata() throws IOException {
- assertAutomatonHits(0, BasicAutomata.makeEmpty());
- assertAutomatonHits(0, BasicAutomata.makeEmptyString());
- assertAutomatonHits(2, BasicAutomata.makeAnyChar());
- assertAutomatonHits(3, BasicAutomata.makeAnyString());
- assertAutomatonHits(2, BasicAutomata.makeString("doc"));
- assertAutomatonHits(1, BasicAutomata.makeChar('a'));
- assertAutomatonHits(2, BasicAutomata.makeCharRange('a', 'b'));
- assertAutomatonHits(2, BasicAutomata.makeInterval(1233, 2346, 0));
- assertAutomatonHits(1, BasicAutomata.makeInterval(0, 2000, 0));
- assertAutomatonHits(2, BasicOperations.union(BasicAutomata.makeChar('a'),
- BasicAutomata.makeChar('b')));
- assertAutomatonHits(0, BasicOperations.intersection(BasicAutomata
- .makeChar('a'), BasicAutomata.makeChar('b')));
- assertAutomatonHits(1, BasicOperations.minus(BasicAutomata.makeCharRange('a', 'b'),
- BasicAutomata.makeChar('a')));
+ public void testAutomata() throws IOException {
+ assertAutomatonHits(0, Automata.makeEmpty());
+ assertAutomatonHits(0, Automata.makeEmptyString());
+ assertAutomatonHits(2, Automata.makeAnyChar());
+ assertAutomatonHits(3, Automata.makeAnyString());
+ assertAutomatonHits(2, Automata.makeString("doc"));
+ assertAutomatonHits(1, Automata.makeChar('a'));
+ assertAutomatonHits(2, Automata.makeCharRange('a', 'b'));
+ assertAutomatonHits(2, Automata.makeInterval(1233, 2346, 0));
+ assertAutomatonHits(1, Automata.makeInterval(0, 2000, 0));
+ assertAutomatonHits(2, Operations.union(Automata.makeChar('a'),
+ Automata.makeChar('b')));
+ assertAutomatonHits(0, Operations.intersection(Automata
+ .makeChar('a'), Automata.makeChar('b')));
+ assertAutomatonHits(1, Operations.minus(Automata.makeCharRange('a', 'b'),
+ Automata.makeChar('a')));
}
-
+
/**
* Test that a nondeterministic automaton works correctly. (It should will be
* determinized)
@@ -131,26 +131,27 @@ public class TestAutomatonQuery extends
public void testNFA() throws IOException {
// accept this or three, the union is an NFA (two transitions for 't' from
// initial state)
- Automaton nfa = BasicOperations.union(BasicAutomata.makeString("this"),
- BasicAutomata.makeString("three"));
+ Automaton nfa = Operations.union(Automata.makeString("this"),
+ Automata.makeString("three"));
assertAutomatonHits(2, nfa);
}
public void testEquals() {
- AutomatonQuery a1 = new AutomatonQuery(newTerm("foobar"), BasicAutomata
+ AutomatonQuery a1 = new AutomatonQuery(newTerm("foobar"), Automata
.makeString("foobar"));
// reference to a1
AutomatonQuery a2 = a1;
// same as a1 (accepts the same language, same term)
- AutomatonQuery a3 = new AutomatonQuery(newTerm("foobar"), BasicOperations
- .concatenate(BasicAutomata.makeString("foo"), BasicAutomata
- .makeString("bar")));
+ AutomatonQuery a3 = new AutomatonQuery(newTerm("foobar"),
+ Operations.concatenate(
+ Automata.makeString("foo"),
+ Automata.makeString("bar")));
// different than a1 (same term, but different language)
- AutomatonQuery a4 = new AutomatonQuery(newTerm("foobar"), BasicAutomata
- .makeString("different"));
+ AutomatonQuery a4 = new AutomatonQuery(newTerm("foobar"),
+ Automata.makeString("different"));
// different than a1 (different term, same language)
- AutomatonQuery a5 = new AutomatonQuery(newTerm("blah"), BasicAutomata
- .makeString("foobar"));
+ AutomatonQuery a5 = new AutomatonQuery(newTerm("blah"),
+ Automata.makeString("foobar"));
assertEquals(a1.hashCode(), a2.hashCode());
assertEquals(a1, a2);
@@ -176,8 +177,7 @@ public class TestAutomatonQuery extends
* MultiTermQuery semantics.
*/
public void testRewriteSingleTerm() throws IOException {
- AutomatonQuery aq = new AutomatonQuery(newTerm("bogus"), BasicAutomata
- .makeString("piece"));
+ AutomatonQuery aq = new AutomatonQuery(newTerm("bogus"), Automata.makeString("piece"));
Terms terms = MultiFields.getTerms(searcher.getIndexReader(), FN);
assertTrue(aq.getTermsEnum(terms) instanceof SingleTermsEnum);
assertEquals(1, automatonQueryNrHits(aq));
@@ -188,10 +188,8 @@ public class TestAutomatonQuery extends
* MultiTermQuery semantics.
*/
public void testRewritePrefix() throws IOException {
- Automaton pfx = BasicAutomata.makeString("do");
- pfx.expandSingleton(); // expand singleton representation for testing
- Automaton prefixAutomaton = BasicOperations.concatenate(pfx, BasicAutomata
- .makeAnyString());
+ Automaton pfx = Automata.makeString("do");
+ Automaton prefixAutomaton = Operations.concatenate(pfx, Automata.makeAnyString());
AutomatonQuery aq = new AutomatonQuery(newTerm("bogus"), prefixAutomaton);
Terms terms = MultiFields.getTerms(searcher.getIndexReader(), FN);
assertTrue(aq.getTermsEnum(terms) instanceof PrefixTermsEnum);
@@ -202,8 +200,7 @@ public class TestAutomatonQuery extends
* Test handling of the empty language
*/
public void testEmptyOptimization() throws IOException {
- AutomatonQuery aq = new AutomatonQuery(newTerm("bogus"), BasicAutomata
- .makeEmpty());
+ AutomatonQuery aq = new AutomatonQuery(newTerm("bogus"), Automata.makeEmpty());
// not yet available: assertTrue(aq.getEnum(searcher.getIndexReader())
// instanceof EmptyTermEnum);
Terms terms = MultiFields.getTerms(searcher.getIndexReader(), FN);
Modified: lucene/dev/branches/solr-5473/lucene/core/src/test/org/apache/lucene/search/TestDocTermOrdsRewriteMethod.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr-5473/lucene/core/src/test/org/apache/lucene/search/TestDocTermOrdsRewriteMethod.java?rev=1603938&r1=1603937&r2=1603938&view=diff
==============================================================================
--- lucene/dev/branches/solr-5473/lucene/core/src/test/org/apache/lucene/search/TestDocTermOrdsRewriteMethod.java (original)
+++ lucene/dev/branches/solr-5473/lucene/core/src/test/org/apache/lucene/search/TestDocTermOrdsRewriteMethod.java Thu Jun 19 16:25:31 2014
@@ -78,7 +78,7 @@ public class TestDocTermOrdsRewriteMetho
Collections.sort(terms);
System.out.println("UTF16 order:");
for(String s : terms) {
- System.out.println(" " + UnicodeUtil.toHexString(s));
+ System.out.println(" " + UnicodeUtil.toHexString(s) + " " + s);
}
}
@@ -115,7 +115,7 @@ public class TestDocTermOrdsRewriteMetho
/** check that the # of hits is the same as if the query
* is run against the inverted index
*/
- protected void assertSame(String regexp) throws IOException {
+ protected void assertSame(String regexp) throws IOException {
RegexpQuery docValues = new RegexpQuery(new Term(fieldName, regexp), RegExp.NONE);
docValues.setRewriteMethod(new DocTermOrdsRewriteMethod());
RegexpQuery inverted = new RegexpQuery(new Term(fieldName, regexp), RegExp.NONE);
Modified: lucene/dev/branches/solr-5473/lucene/core/src/test/org/apache/lucene/search/TestFuzzyQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr-5473/lucene/core/src/test/org/apache/lucene/search/TestFuzzyQuery.java?rev=1603938&r1=1603937&r2=1603938&view=diff
==============================================================================
--- lucene/dev/branches/solr-5473/lucene/core/src/test/org/apache/lucene/search/TestFuzzyQuery.java (original)
+++ lucene/dev/branches/solr-5473/lucene/core/src/test/org/apache/lucene/search/TestFuzzyQuery.java Thu Jun 19 16:25:31 2014
@@ -38,6 +38,21 @@ import org.apache.lucene.util.LuceneTest
*/
public class TestFuzzyQuery extends LuceneTestCase {
+ public void testBasicPrefix() throws Exception {
+ Directory directory = newDirectory();
+ RandomIndexWriter writer = new RandomIndexWriter(random(), directory);
+ addDoc("abc", writer);
+ IndexReader reader = writer.getReader();
+ IndexSearcher searcher = newSearcher(reader);
+ writer.shutdown();
+
+ FuzzyQuery query = new FuzzyQuery(new Term("field", "abc"), FuzzyQuery.defaultMaxEdits, 1);
+ ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
+ assertEquals(1, hits.length);
+ reader.close();
+ directory.close();
+ }
+
public void testFuzziness() throws Exception {
Directory directory = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), directory);
Modified: lucene/dev/branches/solr-5473/lucene/core/src/test/org/apache/lucene/search/TestRegexpQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr-5473/lucene/core/src/test/org/apache/lucene/search/TestRegexpQuery.java?rev=1603938&r1=1603937&r2=1603938&view=diff
==============================================================================
--- lucene/dev/branches/solr-5473/lucene/core/src/test/org/apache/lucene/search/TestRegexpQuery.java (original)
+++ lucene/dev/branches/solr-5473/lucene/core/src/test/org/apache/lucene/search/TestRegexpQuery.java Thu Jun 19 16:25:31 2014
@@ -27,10 +27,10 @@ import org.apache.lucene.index.RandomInd
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.automaton.Automata;
+import org.apache.lucene.util.automaton.Operations;
import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.AutomatonProvider;
-import org.apache.lucene.util.automaton.BasicAutomata;
-import org.apache.lucene.util.automaton.BasicOperations;
import org.apache.lucene.util.automaton.RegExp;
/**
@@ -97,10 +97,10 @@ public class TestRegexpQuery extends Luc
public void testCustomProvider() throws IOException {
AutomatonProvider myProvider = new AutomatonProvider() {
// automaton that matches quick or brown
- private Automaton quickBrownAutomaton = BasicOperations.union(Arrays
- .asList(BasicAutomata.makeString("quick"),
- BasicAutomata.makeString("brown"),
- BasicAutomata.makeString("bob")));
+ private Automaton quickBrownAutomaton = Operations.union(Arrays
+ .asList(Automata.makeString("quick"),
+ Automata.makeString("brown"),
+ Automata.makeString("bob")));
@Override
public Automaton getAutomaton(String name) {
@@ -108,8 +108,7 @@ public class TestRegexpQuery extends Luc
else return null;
}
};
- RegexpQuery query = new RegexpQuery(newTerm("<quickBrown>"), RegExp.ALL,
- myProvider);
+ RegexpQuery query = new RegexpQuery(newTerm("<quickBrown>"), RegExp.ALL, myProvider);
assertEquals(1, searcher.search(query, 5).totalHits);
}
Modified: lucene/dev/branches/solr-5473/lucene/core/src/test/org/apache/lucene/search/TestRegexpRandom2.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr-5473/lucene/core/src/test/org/apache/lucene/search/TestRegexpRandom2.java?rev=1603938&r1=1603937&r2=1603938&view=diff
==============================================================================
--- lucene/dev/branches/solr-5473/lucene/core/src/test/org/apache/lucene/search/TestRegexpRandom2.java (original)
+++ lucene/dev/branches/solr-5473/lucene/core/src/test/org/apache/lucene/search/TestRegexpRandom2.java Thu Jun 19 16:25:31 2014
@@ -40,9 +40,9 @@ import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
import org.apache.lucene.util.UnicodeUtil;
-import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.AutomatonTestUtil;
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
+import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.RegExp;
/**
Modified: lucene/dev/branches/solr-5473/lucene/core/src/test/org/apache/lucene/search/TestWildcard.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr-5473/lucene/core/src/test/org/apache/lucene/search/TestWildcard.java?rev=1603938&r1=1603937&r2=1603938&view=diff
==============================================================================
--- lucene/dev/branches/solr-5473/lucene/core/src/test/org/apache/lucene/search/TestWildcard.java (original)
+++ lucene/dev/branches/solr-5473/lucene/core/src/test/org/apache/lucene/search/TestWildcard.java Thu Jun 19 16:25:31 2014
@@ -268,7 +268,7 @@ public class TestWildcard
* Test that wild card queries are parsed to the correct type and are searched correctly.
* This test looks at both parsing and execution of wildcard queries.
* Although placed here, it also tests prefix queries, verifying that
- * prefix queries are not parsed into wild card queries, and viceversa.
+ * prefix queries are not parsed into wild card queries, and vice-versa.
*/
public void testParsingAndSearching() throws Exception {
String field = "content";
Modified: lucene/dev/branches/solr-5473/lucene/core/src/test/org/apache/lucene/util/TestPagedBytes.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr-5473/lucene/core/src/test/org/apache/lucene/util/TestPagedBytes.java?rev=1603938&r1=1603937&r2=1603938&view=diff
==============================================================================
--- lucene/dev/branches/solr-5473/lucene/core/src/test/org/apache/lucene/util/TestPagedBytes.java (original)
+++ lucene/dev/branches/solr-5473/lucene/core/src/test/org/apache/lucene/util/TestPagedBytes.java Thu Jun 19 16:25:31 2014
@@ -185,4 +185,18 @@ public class TestPagedBytes extends Luce
dir.close();
}
+ public void testRamBytesUsed() {
+ final int blockBits = TestUtil.nextInt(random(), 4, 22);
+ PagedBytes b = new PagedBytes(blockBits);
+ final int totalBytes = random().nextInt(10000);
+ for (long pointer = 0; pointer < totalBytes; ) {
+ BytesRef bytes = new BytesRef(TestUtil.randomSimpleString(random(), 10));
+ pointer = b.copyUsingLengthPrefix(bytes);
+ }
+ assertEquals(RamUsageTester.sizeOf(b), b.ramBytesUsed());
+ final PagedBytes.Reader reader = b.freeze(random().nextBoolean());
+ assertEquals(RamUsageTester.sizeOf(b), b.ramBytesUsed());
+ assertEquals(RamUsageTester.sizeOf(reader), reader.ramBytesUsed());
+ }
+
}
Modified: lucene/dev/branches/solr-5473/lucene/core/src/test/org/apache/lucene/util/automaton/TestDeterminism.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr-5473/lucene/core/src/test/org/apache/lucene/util/automaton/TestDeterminism.java?rev=1603938&r1=1603937&r2=1603938&view=diff
==============================================================================
--- lucene/dev/branches/solr-5473/lucene/core/src/test/org/apache/lucene/util/automaton/TestDeterminism.java (original)
+++ lucene/dev/branches/solr-5473/lucene/core/src/test/org/apache/lucene/util/automaton/TestDeterminism.java Thu Jun 19 16:25:31 2014
@@ -28,8 +28,9 @@ public class TestDeterminism extends Luc
/** test a bunch of random regular expressions */
public void testRegexps() throws Exception {
int num = atLeast(500);
- for (int i = 0; i < num; i++)
+ for (int i = 0; i < num; i++) {
assertAutomaton(new RegExp(AutomatonTestUtil.randomRegexp(random()), RegExp.NONE).toAutomaton());
+ }
}
/** test against a simple, unoptimized det */
@@ -37,42 +38,41 @@ public class TestDeterminism extends Luc
int num = atLeast(200);
for (int i = 0; i < num; i++) {
Automaton a = AutomatonTestUtil.randomAutomaton(random());
- Automaton b = a.clone();
- AutomatonTestUtil.determinizeSimple(a);
- b.deterministic = false; // force det
- b.determinize();
+ a = AutomatonTestUtil.determinizeSimple(a);
+ Automaton b = Operations.determinize(a);
// TODO: more verifications possible?
- assertTrue(BasicOperations.sameLanguage(a, b));
+ assertTrue(Operations.sameLanguage(a, b));
}
}
private static void assertAutomaton(Automaton a) {
- Automaton clone = a.clone();
+ a = Operations.determinize(Operations.removeDeadStates(a));
+
// complement(complement(a)) = a
- Automaton equivalent = BasicOperations.complement(BasicOperations.complement(a));
- assertTrue(BasicOperations.sameLanguage(a, equivalent));
+ Automaton equivalent = Operations.complement(Operations.complement(a));
+ assertTrue(Operations.sameLanguage(a, equivalent));
// a union a = a
- equivalent = BasicOperations.union(a, clone);
- assertTrue(BasicOperations.sameLanguage(a, equivalent));
+ equivalent = Operations.determinize(Operations.removeDeadStates(Operations.union(a, a)));
+ assertTrue(Operations.sameLanguage(a, equivalent));
// a intersect a = a
- equivalent = BasicOperations.intersection(a, clone);
- assertTrue(BasicOperations.sameLanguage(a, equivalent));
+ equivalent = Operations.determinize(Operations.removeDeadStates(Operations.intersection(a, a)));
+ assertTrue(Operations.sameLanguage(a, equivalent));
// a minus a = empty
- Automaton empty = BasicOperations.minus(a, clone);
- assertTrue(BasicOperations.isEmpty(empty));
+ Automaton empty = Operations.minus(a, a);
+ assertTrue(Operations.isEmpty(empty));
// as long as don't accept the empty string
// then optional(a) - empty = a
- if (!BasicOperations.run(a, "")) {
+ if (!Operations.run(a, "")) {
//System.out.println("test " + a);
- Automaton optional = BasicOperations.optional(a);
+ Automaton optional = Operations.optional(a);
//System.out.println("optional " + optional);
- equivalent = BasicOperations.minus(optional, BasicAutomata.makeEmptyString());
+ equivalent = Operations.minus(optional, Automata.makeEmptyString());
//System.out.println("equiv " + equivalent);
- assertTrue(BasicOperations.sameLanguage(a, equivalent));
+ assertTrue(Operations.sameLanguage(a, equivalent));
}
}
}
Modified: lucene/dev/branches/solr-5473/lucene/core/src/test/org/apache/lucene/util/automaton/TestDeterminizeLexicon.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr-5473/lucene/core/src/test/org/apache/lucene/util/automaton/TestDeterminizeLexicon.java?rev=1603938&r1=1603937&r2=1603938&view=diff
==============================================================================
--- lucene/dev/branches/solr-5473/lucene/core/src/test/org/apache/lucene/util/automaton/TestDeterminizeLexicon.java (original)
+++ lucene/dev/branches/solr-5473/lucene/core/src/test/org/apache/lucene/util/automaton/TestDeterminizeLexicon.java Thu Jun 19 16:25:31 2014
@@ -41,7 +41,7 @@ public class TestDeterminizeLexicon exte
for (int j = 0; j < 5000; j++) {
String randomString = TestUtil.randomUnicodeString(random());
terms.add(randomString);
- automata.add(BasicAutomata.makeString(randomString));
+ automata.add(Automata.makeString(randomString));
}
assertLexicon();
}
@@ -49,11 +49,11 @@ public class TestDeterminizeLexicon exte
public void assertLexicon() throws Exception {
Collections.shuffle(automata, random());
- final Automaton lex = BasicOperations.union(automata);
- lex.determinize();
- assertTrue(SpecialOperations.isFinite(lex));
+ Automaton lex = Operations.union(automata);
+ lex = Operations.determinize(lex);
+ assertTrue(Operations.isFinite(lex));
for (String s : terms) {
- assertTrue(BasicOperations.run(lex, s));
+ assertTrue(Operations.run(lex, s));
}
final ByteRunAutomaton lexByte = new ByteRunAutomaton(lex);
for (String s : terms) {
Modified: lucene/dev/branches/solr-5473/lucene/core/src/test/org/apache/lucene/util/automaton/TestLevenshteinAutomata.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr-5473/lucene/core/src/test/org/apache/lucene/util/automaton/TestLevenshteinAutomata.java?rev=1603938&r1=1603937&r2=1603938&view=diff
==============================================================================
--- lucene/dev/branches/solr-5473/lucene/core/src/test/org/apache/lucene/util/automaton/TestLevenshteinAutomata.java (original)
+++ lucene/dev/branches/solr-5473/lucene/core/src/test/org/apache/lucene/util/automaton/TestLevenshteinAutomata.java Thu Jun 19 16:25:31 2014
@@ -41,7 +41,7 @@ public class TestLevenshteinAutomata ext
// LUCENE-3094
public void testNoWastedStates() throws Exception {
- AutomatonTestUtil.assertNoDetachedStates(new LevenshteinAutomata("abc", false).toAutomaton(1));
+ assertFalse(Operations.hasDeadStatesFromInitial(new LevenshteinAutomata("abc", false).toAutomaton(1)));
}
/**
@@ -75,31 +75,36 @@ public class TestLevenshteinAutomata ext
assertNotNull(tautomata[n]);
assertTrue(automata[n].isDeterministic());
assertTrue(tautomata[n].isDeterministic());
- assertTrue(SpecialOperations.isFinite(automata[n]));
- assertTrue(SpecialOperations.isFinite(tautomata[n]));
- AutomatonTestUtil.assertNoDetachedStates(automata[n]);
- AutomatonTestUtil.assertNoDetachedStates(tautomata[n]);
+ assertTrue(Operations.isFinite(automata[n]));
+ assertTrue(Operations.isFinite(tautomata[n]));
+ assertFalse(Operations.hasDeadStatesFromInitial(automata[n]));
+ assertFalse(Operations.hasDeadStatesFromInitial(tautomata[n]));
// check that the dfa for n-1 accepts a subset of the dfa for n
if (n > 0) {
- assertTrue(automata[n-1].subsetOf(automata[n]));
- assertTrue(automata[n-1].subsetOf(tautomata[n]));
- assertTrue(tautomata[n-1].subsetOf(automata[n]));
- assertTrue(tautomata[n-1].subsetOf(tautomata[n]));
+ assertTrue(Operations.subsetOf(Operations.removeDeadStates(automata[n-1]),
+ Operations.removeDeadStates(automata[n])));
+ assertTrue(Operations.subsetOf(Operations.removeDeadStates(automata[n-1]),
+ Operations.removeDeadStates(tautomata[n])));
+ assertTrue(Operations.subsetOf(Operations.removeDeadStates(tautomata[n-1]),
+ Operations.removeDeadStates(automata[n])));
+ assertTrue(Operations.subsetOf(Operations.removeDeadStates(tautomata[n-1]),
+ Operations.removeDeadStates(tautomata[n])));
assertNotSame(automata[n-1], automata[n]);
}
// check that Lev(N) is a subset of LevT(N)
- assertTrue(automata[n].subsetOf(tautomata[n]));
+ assertTrue(Operations.subsetOf(Operations.removeDeadStates(automata[n]),
+ Operations.removeDeadStates(tautomata[n])));
// special checks for specific n
switch(n) {
case 0:
// easy, matches the string itself
- assertTrue(BasicOperations.sameLanguage(BasicAutomata.makeString(s), automata[0]));
- assertTrue(BasicOperations.sameLanguage(BasicAutomata.makeString(s), tautomata[0]));
+ assertTrue(Operations.sameLanguage(Automata.makeString(s), Operations.removeDeadStates(automata[0])));
+ assertTrue(Operations.sameLanguage(Automata.makeString(s), Operations.removeDeadStates(tautomata[0])));
break;
case 1:
// generate a lev1 naively, and check the accepted lang is the same.
- assertTrue(BasicOperations.sameLanguage(naiveLev1(s), automata[1]));
- assertTrue(BasicOperations.sameLanguage(naiveLev1T(s), tautomata[1]));
+ assertTrue(Operations.sameLanguage(naiveLev1(s), Operations.removeDeadStates(automata[1])));
+ assertTrue(Operations.sameLanguage(naiveLev1T(s), Operations.removeDeadStates(tautomata[1])));
break;
default:
assertBruteForce(s, automata[n], n);
@@ -114,13 +119,13 @@ public class TestLevenshteinAutomata ext
* substitutions of s.
*/
private Automaton naiveLev1(String s) {
- Automaton a = BasicAutomata.makeString(s);
- a = BasicOperations.union(a, insertionsOf(s));
- MinimizationOperations.minimize(a);
- a = BasicOperations.union(a, deletionsOf(s));
- MinimizationOperations.minimize(a);
- a = BasicOperations.union(a, substitutionsOf(s));
- MinimizationOperations.minimize(a);
+ Automaton a = Automata.makeString(s);
+ a = Operations.union(a, insertionsOf(s));
+ a = MinimizationOperations.minimize(a);
+ a = Operations.union(a, deletionsOf(s));
+ a = MinimizationOperations.minimize(a);
+ a = Operations.union(a, substitutionsOf(s));
+ a = MinimizationOperations.minimize(a);
return a;
}
@@ -131,8 +136,8 @@ public class TestLevenshteinAutomata ext
*/
private Automaton naiveLev1T(String s) {
Automaton a = naiveLev1(s);
- a = BasicOperations.union(a, transpositionsOf(s));
- MinimizationOperations.minimize(a);
+ a = Operations.union(a, transpositionsOf(s));
+ a = MinimizationOperations.minimize(a);
return a;
}
@@ -144,15 +149,14 @@ public class TestLevenshteinAutomata ext
List<Automaton> list = new ArrayList<>();
for (int i = 0; i <= s.length(); i++) {
- Automaton a = BasicAutomata.makeString(s.substring(0, i));
- a = BasicOperations.concatenate(a, BasicAutomata.makeAnyChar());
- a = BasicOperations.concatenate(a, BasicAutomata.makeString(s
- .substring(i)));
+ Automaton a = Automata.makeString(s.substring(0, i));
+ a = Operations.concatenate(a, Automata.makeAnyChar());
+ a = Operations.concatenate(a, Automata.makeString(s.substring(i)));
list.add(a);
}
- Automaton a = BasicOperations.union(list);
- MinimizationOperations.minimize(a);
+ Automaton a = Operations.union(list);
+ a = MinimizationOperations.minimize(a);
return a;
}
@@ -164,15 +168,13 @@ public class TestLevenshteinAutomata ext
List<Automaton> list = new ArrayList<>();
for (int i = 0; i < s.length(); i++) {
- Automaton a = BasicAutomata.makeString(s.substring(0, i));
- a = BasicOperations.concatenate(a, BasicAutomata.makeString(s
- .substring(i + 1)));
- a.expandSingleton();
+ Automaton a = Automata.makeString(s.substring(0, i));
+ a = Operations.concatenate(a, Automata.makeString(s.substring(i + 1)));
list.add(a);
}
- Automaton a = BasicOperations.union(list);
- MinimizationOperations.minimize(a);
+ Automaton a = Operations.union(list);
+ a = MinimizationOperations.minimize(a);
return a;
}
@@ -184,15 +186,14 @@ public class TestLevenshteinAutomata ext
List<Automaton> list = new ArrayList<>();
for (int i = 0; i < s.length(); i++) {
- Automaton a = BasicAutomata.makeString(s.substring(0, i));
- a = BasicOperations.concatenate(a, BasicAutomata.makeAnyChar());
- a = BasicOperations.concatenate(a, BasicAutomata.makeString(s
- .substring(i + 1)));
+ Automaton a = Automata.makeString(s.substring(0, i));
+ a = Operations.concatenate(a, Automata.makeAnyChar());
+ a = Operations.concatenate(a, Automata.makeString(s.substring(i + 1)));
list.add(a);
}
- Automaton a = BasicOperations.union(list);
- MinimizationOperations.minimize(a);
+ Automaton a = Operations.union(list);
+ a = MinimizationOperations.minimize(a);
return a;
}
@@ -201,8 +202,9 @@ public class TestLevenshteinAutomata ext
* (transposing two adjacent characters)
*/
private Automaton transpositionsOf(String s) {
- if (s.length() < 2)
- return BasicAutomata.makeEmpty();
+ if (s.length() < 2) {
+ return Automata.makeEmpty();
+ }
List<Automaton> list = new ArrayList<>();
for (int i = 0; i < s.length()-1; i++) {
StringBuilder sb = new StringBuilder();
@@ -211,11 +213,12 @@ public class TestLevenshteinAutomata ext
sb.append(s.charAt(i));
sb.append(s.substring(i+2, s.length()));
String st = sb.toString();
- if (!st.equals(s))
- list.add(BasicAutomata.makeString(st));
+ if (!st.equals(s)) {
+ list.add(Automata.makeString(st));
+ }
}
- Automaton a = BasicOperations.union(list);
- MinimizationOperations.minimize(a);
+ Automaton a = Operations.union(list);
+ a = MinimizationOperations.minimize(a);
return a;
}
Modified: lucene/dev/branches/solr-5473/lucene/core/src/test/org/apache/lucene/util/automaton/TestMinimize.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr-5473/lucene/core/src/test/org/apache/lucene/util/automaton/TestMinimize.java?rev=1603938&r1=1603937&r2=1603938&view=diff
==============================================================================
--- lucene/dev/branches/solr-5473/lucene/core/src/test/org/apache/lucene/util/automaton/TestMinimize.java (original)
+++ lucene/dev/branches/solr-5473/lucene/core/src/test/org/apache/lucene/util/automaton/TestMinimize.java Thu Jun 19 16:25:31 2014
@@ -24,13 +24,13 @@ import org.apache.lucene.util.LuceneTest
*/
public class TestMinimize extends LuceneTestCase {
/** the minimal and non-minimal are compared to ensure they are the same. */
- public void test() {
+ public void testBasic() {
int num = atLeast(200);
for (int i = 0; i < num; i++) {
Automaton a = AutomatonTestUtil.randomAutomaton(random());
- Automaton b = a.clone();
- MinimizationOperations.minimize(b);
- assertTrue(BasicOperations.sameLanguage(a, b));
+ Automaton la = Operations.determinize(Operations.removeDeadStates(a));
+ Automaton lb = MinimizationOperations.minimize(a);
+ assertTrue(Operations.sameLanguage(la, lb));
}
}
@@ -41,12 +41,22 @@ public class TestMinimize extends Lucene
int num = atLeast(200);
for (int i = 0; i < num; i++) {
Automaton a = AutomatonTestUtil.randomAutomaton(random());
- AutomatonTestUtil.minimizeSimple(a);
- Automaton b = a.clone();
- MinimizationOperations.minimize(b);
- assertTrue(BasicOperations.sameLanguage(a, b));
- assertEquals(a.getNumberOfStates(), b.getNumberOfStates());
- assertEquals(a.getNumberOfTransitions(), b.getNumberOfTransitions());
+ a = AutomatonTestUtil.minimizeSimple(a);
+ Automaton b = MinimizationOperations.minimize(a);
+ assertTrue(Operations.sameLanguage(a, b));
+ assertEquals(a.getNumStates(), b.getNumStates());
+ int numStates = a.getNumStates();
+
+ int sum1 = 0;
+ for(int s=0;s<numStates;s++) {
+ sum1 += a.getNumTransitions(s);
+ }
+ int sum2 = 0;
+ for(int s=0;s<numStates;s++) {
+ sum2 += b.getNumTransitions(s);
+ }
+
+ assertEquals(sum1, sum2);
}
}
Modified: lucene/dev/branches/solr-5473/lucene/core/src/test/org/apache/lucene/util/automaton/TestUTF32ToUTF8.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr-5473/lucene/core/src/test/org/apache/lucene/util/automaton/TestUTF32ToUTF8.java?rev=1603938&r1=1603937&r2=1603938&view=diff
==============================================================================
--- lucene/dev/branches/solr-5473/lucene/core/src/test/org/apache/lucene/util/automaton/TestUTF32ToUTF8.java (original)
+++ lucene/dev/branches/solr-5473/lucene/core/src/test/org/apache/lucene/util/automaton/TestUTF32ToUTF8.java Thu Jun 19 16:25:31 2014
@@ -17,13 +17,17 @@ package org.apache.lucene.util.automaton
* limitations under the License.
*/
+import java.nio.charset.StandardCharsets;
+import java.util.HashSet;
+import java.util.Random;
+import java.util.Set;
+
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
-import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.UnicodeUtil;
-
-import java.nio.charset.StandardCharsets;
-import java.util.Random;
+import org.apache.lucene.util.fst.Util;
public class TestUTF32ToUTF8 extends LuceneTestCase {
@@ -151,12 +155,7 @@ public class TestUTF32ToUTF8 extends Luc
continue;
}
- final Automaton a = new Automaton();
- final State end = new State();
- end.setAccept(true);
- a.getInitialState().addTransition(new Transition(startCode, endCode, end));
- a.setDeterministic(true);
-
+ Automaton a = Automata.makeCharRange(startCode, endCode);
testOne(r, new ByteRunAutomaton(a), startCode, endCode, ITERS_PER_DFA);
}
}
@@ -208,6 +207,20 @@ public class TestUTF32ToUTF8 extends Luc
assertAutomaton(new RegExp(AutomatonTestUtil.randomRegexp(random()), RegExp.NONE).toAutomaton());
}
}
+
+ public void testSingleton() throws Exception {
+ int iters = atLeast(100);
+ for(int iter=0;iter<iters;iter++) {
+ String s = TestUtil.randomRealisticUnicodeString(random());
+ Automaton a = Automata.makeString(s);
+ Automaton utf8 = new UTF32ToUTF8().convert(a);
+ IntsRef ints = new IntsRef();
+ Util.toIntsRef(new BytesRef(s), ints);
+ Set<IntsRef> set = new HashSet<>();
+ set.add(ints);
+ assertEquals(set, Operations.getFiniteStrings(utf8, -1));
+ }
+ }
private void assertAutomaton(Automaton automaton) throws Exception {
CharacterRunAutomaton cra = new CharacterRunAutomaton(automaton);
Modified: lucene/dev/branches/solr-5473/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr-5473/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java?rev=1603938&r1=1603937&r2=1603938&view=diff
==============================================================================
--- lucene/dev/branches/solr-5473/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java (original)
+++ lucene/dev/branches/solr-5473/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java Thu Jun 19 16:25:31 2014
@@ -17,6 +17,30 @@ package org.apache.lucene.util.fst;
* limitations under the License.
*/
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.OutputStreamWriter;
+import java.io.StringWriter;
+import java.io.Writer;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+import java.util.Random;
+import java.util.Set;
+import java.util.TreeMap;
+import java.util.TreeSet;
+import java.util.concurrent.atomic.AtomicInteger;
+
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
@@ -40,12 +64,12 @@ import org.apache.lucene.store.MockDirec
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.LineFileDocs;
-import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.LuceneTestCase.Slow;
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
+import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
-import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.CompiledAutomaton;
+import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.RegExp;
import org.apache.lucene.util.fst.BytesRefFSTEnum.InputOutput;
import org.apache.lucene.util.fst.FST.Arc;
@@ -54,30 +78,6 @@ import org.apache.lucene.util.fst.PairOu
import org.apache.lucene.util.fst.Util.Result;
import org.apache.lucene.util.packed.PackedInts;
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.io.InputStreamReader;
-import java.io.OutputStreamWriter;
-import java.io.StringWriter;
-import java.io.Writer;
-import java.nio.charset.StandardCharsets;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.Comparator;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Locale;
-import java.util.Map;
-import java.util.Random;
-import java.util.Set;
-import java.util.TreeMap;
-import java.util.TreeSet;
-import java.util.concurrent.atomic.AtomicInteger;
-
import static org.apache.lucene.util.fst.FSTTester.getRandomString;
import static org.apache.lucene.util.fst.FSTTester.simpleRandomString;
import static org.apache.lucene.util.fst.FSTTester.toIntsRef;
@@ -346,7 +346,7 @@ public class TestFSTs extends LuceneTest
BytesRef term;
int ord = 0;
- Automaton automaton = new RegExp(".*", RegExp.NONE).toAutomaton();
+ Automaton automaton = new RegExp(".*", RegExp.NONE).toAutomaton();
final TermsEnum termsEnum2 = terms.intersect(new CompiledAutomaton(automaton, false, false), null);
while((term = termsEnum.next()) != null) {
Modified: lucene/dev/branches/solr-5473/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/MultiTermHighlighting.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr-5473/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/MultiTermHighlighting.java?rev=1603938&r1=1603937&r2=1603938&view=diff
==============================================================================
--- lucene/dev/branches/solr-5473/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/MultiTermHighlighting.java (original)
+++ lucene/dev/branches/solr-5473/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/MultiTermHighlighting.java Thu Jun 19 16:25:31 2014
@@ -46,11 +46,11 @@ import org.apache.lucene.search.spans.Sp
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.UnicodeUtil;
-import org.apache.lucene.util.automaton.Automaton;
-import org.apache.lucene.util.automaton.BasicAutomata;
-import org.apache.lucene.util.automaton.BasicOperations;
+import org.apache.lucene.util.automaton.Automata;
+import org.apache.lucene.util.automaton.Operations;
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
import org.apache.lucene.util.automaton.LevenshteinAutomata;
+import org.apache.lucene.util.automaton.Automaton;
/**
* Support for highlighting multiterm queries in PostingsHighlighter.
@@ -106,8 +106,8 @@ class MultiTermHighlighting {
final PrefixQuery pq = (PrefixQuery) query;
Term prefix = pq.getPrefix();
if (prefix.field().equals(field)) {
- list.add(new CharacterRunAutomaton(BasicOperations.concatenate(BasicAutomata.makeString(prefix.text()),
- BasicAutomata.makeAnyString())) {
+ list.add(new CharacterRunAutomaton(Operations.concatenate(Automata.makeString(prefix.text()),
+ Automata.makeAnyString())) {
@Override
public String toString() {
return pq.toString();
@@ -126,11 +126,8 @@ class MultiTermHighlighting {
int prefixLength = Math.min(fq.getPrefixLength(), termLength);
String suffix = UnicodeUtil.newString(termText, prefixLength, termText.length - prefixLength);
LevenshteinAutomata builder = new LevenshteinAutomata(suffix, fq.getTranspositions());
- Automaton automaton = builder.toAutomaton(fq.getMaxEdits());
- if (prefixLength > 0) {
- Automaton prefix = BasicAutomata.makeString(UnicodeUtil.newString(termText, 0, prefixLength));
- automaton = BasicOperations.concatenate(prefix, automaton);
- }
+ String prefix = UnicodeUtil.newString(termText, 0, prefixLength);
+ Automaton automaton = builder.toAutomaton(fq.getMaxEdits(), prefix);
list.add(new CharacterRunAutomaton(automaton) {
@Override
public String toString() {
@@ -161,7 +158,7 @@ class MultiTermHighlighting {
final Comparator<CharsRef> comparator = CharsRef.getUTF16SortedAsUTF8Comparator();
// this is *not* an automaton, but its very simple
- list.add(new CharacterRunAutomaton(BasicAutomata.makeEmpty()) {
+ list.add(new CharacterRunAutomaton(Automata.makeEmpty()) {
@Override
public boolean run(char[] s, int offset, int length) {
scratch.chars = s;
Modified: lucene/dev/branches/solr-5473/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr-5473/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java?rev=1603938&r1=1603937&r2=1603938&view=diff
==============================================================================
--- lucene/dev/branches/solr-5473/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java (original)
+++ lucene/dev/branches/solr-5473/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java Thu Jun 19 16:25:31 2014
@@ -55,7 +55,7 @@ import org.apache.lucene.search.spans.*;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
-import org.apache.lucene.util.automaton.BasicAutomata;
+import org.apache.lucene.util.automaton.Automata;
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
import org.apache.lucene.util.automaton.RegExp;
import org.w3c.dom.Element;
@@ -1340,7 +1340,7 @@ public class HighlighterTest extends Bas
@Override
public void run() throws Exception {
String goodWord = "goodtoken";
- CharacterRunAutomaton stopWords = new CharacterRunAutomaton(BasicAutomata.makeString("stoppedtoken"));
+ CharacterRunAutomaton stopWords = new CharacterRunAutomaton(Automata.makeString("stoppedtoken"));
// we disable MockTokenizer checks because we will forcefully limit the
// tokenstream and call end() before incrementToken() returns false.
final MockAnalyzer analyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopWords);
Modified: lucene/dev/branches/solr-5473/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr-5473/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java?rev=1603938&r1=1603937&r2=1603938&view=diff
==============================================================================
--- lucene/dev/branches/solr-5473/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java (original)
+++ lucene/dev/branches/solr-5473/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java Thu Jun 19 16:25:31 2014
@@ -629,7 +629,7 @@ public class TestBlockJoin extends Lucen
}
DocsEnum parents = MultiFields.getTermDocsEnum(joinR, null, "isParent", new BytesRef("x"));
System.out.println("parent docIDs:");
- while (parents.nextDoc() != parents.NO_MORE_DOCS) {
+ while (parents.nextDoc() != DocsEnum.NO_MORE_DOCS) {
System.out.println(" " + parents.docID());
}
}
Modified: lucene/dev/branches/solr-5473/lucene/misc/src/java/org/apache/lucene/index/sorter/SortingAtomicReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr-5473/lucene/misc/src/java/org/apache/lucene/index/sorter/SortingAtomicReader.java?rev=1603938&r1=1603937&r2=1603938&view=diff
==============================================================================
--- lucene/dev/branches/solr-5473/lucene/misc/src/java/org/apache/lucene/index/sorter/SortingAtomicReader.java (original)
+++ lucene/dev/branches/solr-5473/lucene/misc/src/java/org/apache/lucene/index/sorter/SortingAtomicReader.java Thu Jun 19 16:25:31 2014
@@ -30,10 +30,12 @@ import org.apache.lucene.index.Fields;
import org.apache.lucene.index.FilterAtomicReader;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SortedDocValues;
+import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.StoredFieldVisitor;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.index.sorter.Sorter.DocMap;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Sort;
import org.apache.lucene.store.IndexInput;
@@ -222,6 +224,32 @@ public class SortingAtomicReader extends
}
}
+ private static class SortingSortedNumericDocValues extends SortedNumericDocValues {
+
+ private final SortedNumericDocValues in;
+ private final Sorter.DocMap docMap;
+
+ SortingSortedNumericDocValues(SortedNumericDocValues in, DocMap docMap) {
+ this.in = in;
+ this.docMap = docMap;
+ }
+
+ @Override
+ public int count() {
+ return in.count();
+ }
+
+ @Override
+ public void setDocument(int doc) {
+ in.setDocument(docMap.newToOld(doc));
+ }
+
+ @Override
+ public long valueAt(int index) {
+ return in.valueAt(index);
+ }
+ }
+
private static class SortingBits implements Bits {
private final Bits in;
@@ -786,6 +814,17 @@ public class SortingAtomicReader extends
if (oldDocValues == null) return null;
return new SortingNumericDocValues(oldDocValues, docMap);
}
+
+ @Override
+ public SortedNumericDocValues getSortedNumericDocValues(String field)
+ throws IOException {
+ final SortedNumericDocValues oldDocValues = in.getSortedNumericDocValues(field);
+ if (oldDocValues == null) {
+ return null;
+ } else {
+ return new SortingSortedNumericDocValues(oldDocValues, docMap);
+ }
+ }
@Override
public SortedDocValues getSortedDocValues(String field) throws IOException {
Modified: lucene/dev/branches/solr-5473/lucene/misc/src/test/org/apache/lucene/index/sorter/SorterTestBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr-5473/lucene/misc/src/test/org/apache/lucene/index/sorter/SorterTestBase.java?rev=1603938&r1=1603937&r2=1603938&view=diff
==============================================================================
--- lucene/dev/branches/solr-5473/lucene/misc/src/test/org/apache/lucene/index/sorter/SorterTestBase.java (original)
+++ lucene/dev/branches/solr-5473/lucene/misc/src/test/org/apache/lucene/index/sorter/SorterTestBase.java Thu Jun 19 16:25:31 2014
@@ -36,6 +36,7 @@ import org.apache.lucene.document.Field.
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.SortedDocValuesField;
+import org.apache.lucene.document.SortedNumericDocValuesField;
import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
@@ -52,6 +53,7 @@ import org.apache.lucene.index.NumericDo
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.SlowCompositeReaderWrapper;
import org.apache.lucene.index.SortedDocValues;
+import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
@@ -145,6 +147,7 @@ public abstract class SorterTestBase ext
protected static final String DOC_POSITIONS_FIELD = "positions";
protected static final String DOC_POSITIONS_TERM = "$all$";
protected static final String NUMERIC_DV_FIELD = "numeric";
+ protected static final String SORTED_NUMERIC_DV_FIELD = "sorted_numeric";
protected static final String NORMS_FIELD = "norm";
protected static final String BINARY_DV_FIELD = "binary";
protected static final String SORTED_DV_FIELD = "sorted";
@@ -183,6 +186,10 @@ public abstract class SorterTestBase ext
doc.add(new SortedSetDocValuesField(SORTED_SET_DV_FIELD, new BytesRef(Integer.toString(id))));
doc.add(new SortedSetDocValuesField(SORTED_SET_DV_FIELD, new BytesRef(Integer.toString(id + 1))));
}
+ if (defaultCodecSupportsSortedNumeric()) {
+ doc.add(new SortedNumericDocValuesField(SORTED_NUMERIC_DV_FIELD, id));
+ doc.add(new SortedNumericDocValuesField(SORTED_NUMERIC_DV_FIELD, id + 1));
+ }
doc.add(new Field(TERM_VECTORS_FIELD, Integer.toString(id), TERM_VECTORS_TYPE));
return doc;
}
@@ -389,6 +396,20 @@ public abstract class SorterTestBase ext
}
@Test
+ public void testSortedNumericDocValuesField() throws Exception {
+ assumeTrue("default codec does not support SORTED_NUMERIC", defaultCodecSupportsSortedNumeric());
+ SortedNumericDocValues dv = reader.getSortedNumericDocValues(SORTED_NUMERIC_DV_FIELD);
+ int maxDoc = reader.maxDoc();
+ for (int i = 0; i < maxDoc; i++) {
+ dv.setDocument(i);
+ assertEquals(2, dv.count());
+ int value = sortedValues[i].intValue();
+ assertEquals("incorrect sorted-numeric DocValues for doc " + i, value, dv.valueAt(0));
+ assertEquals("incorrect sorted-numeric DocValues for doc " + i, value + 1, dv.valueAt(1));
+ }
+ }
+
+ @Test
public void testTermVectors() throws Exception {
int maxDoc = reader.maxDoc();
for (int i = 0; i < maxDoc; i++) {
Modified: lucene/dev/branches/solr-5473/lucene/queryparser/src/test/org/apache/lucene/queryparser/flexible/precedence/TestPrecedenceQueryParser.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr-5473/lucene/queryparser/src/test/org/apache/lucene/queryparser/flexible/precedence/TestPrecedenceQueryParser.java?rev=1603938&r1=1603937&r2=1603938&view=diff
==============================================================================
--- lucene/dev/branches/solr-5473/lucene/queryparser/src/test/org/apache/lucene/queryparser/flexible/precedence/TestPrecedenceQueryParser.java (original)
+++ lucene/dev/branches/solr-5473/lucene/queryparser/src/test/org/apache/lucene/queryparser/flexible/precedence/TestPrecedenceQueryParser.java Thu Jun 19 16:25:31 2014
@@ -46,7 +46,7 @@ import org.apache.lucene.search.TermQuer
import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.util.LuceneTestCase;
-import org.apache.lucene.util.automaton.BasicAutomata;
+import org.apache.lucene.util.automaton.Automata;
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
import org.junit.AfterClass;
import org.junit.BeforeClass;
@@ -557,7 +557,7 @@ public class TestPrecedenceQueryParser e
}
public void testBoost() throws Exception {
- CharacterRunAutomaton stopSet = new CharacterRunAutomaton(BasicAutomata.makeString("on"));
+ CharacterRunAutomaton stopSet = new CharacterRunAutomaton(Automata.makeString("on"));
Analyzer oneStopAnalyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopSet);
PrecedenceQueryParser qp = new PrecedenceQueryParser();
Modified: lucene/dev/branches/solr-5473/lucene/queryparser/src/test/org/apache/lucene/queryparser/flexible/standard/TestQPHelper.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr-5473/lucene/queryparser/src/test/org/apache/lucene/queryparser/flexible/standard/TestQPHelper.java?rev=1603938&r1=1603937&r2=1603938&view=diff
==============================================================================
--- lucene/dev/branches/solr-5473/lucene/queryparser/src/test/org/apache/lucene/queryparser/flexible/standard/TestQPHelper.java (original)
+++ lucene/dev/branches/solr-5473/lucene/queryparser/src/test/org/apache/lucene/queryparser/flexible/standard/TestQPHelper.java Thu Jun 19 16:25:31 2014
@@ -67,7 +67,7 @@ import org.apache.lucene.search.TermRang
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
-import org.apache.lucene.util.automaton.BasicAutomata;
+import org.apache.lucene.util.automaton.Automata;
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
import org.apache.lucene.util.automaton.RegExp;
import org.junit.AfterClass;
@@ -957,7 +957,7 @@ public class TestQPHelper extends Lucene
}
public void testBoost() throws Exception {
- CharacterRunAutomaton stopSet = new CharacterRunAutomaton(BasicAutomata.makeString("on"));
+ CharacterRunAutomaton stopSet = new CharacterRunAutomaton(Automata.makeString("on"));
Analyzer oneStopAnalyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopSet);
StandardQueryParser qp = new StandardQueryParser();
qp.setAnalyzer(oneStopAnalyzer);
Modified: lucene/dev/branches/solr-5473/lucene/queryparser/src/test/org/apache/lucene/queryparser/util/QueryParserTestBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr-5473/lucene/queryparser/src/test/org/apache/lucene/queryparser/util/QueryParserTestBase.java?rev=1603938&r1=1603937&r2=1603938&view=diff
==============================================================================
--- lucene/dev/branches/solr-5473/lucene/queryparser/src/test/org/apache/lucene/queryparser/util/QueryParserTestBase.java (original)
+++ lucene/dev/branches/solr-5473/lucene/queryparser/src/test/org/apache/lucene/queryparser/util/QueryParserTestBase.java Thu Jun 19 16:25:31 2014
@@ -47,7 +47,7 @@ import org.apache.lucene.search.*;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
-import org.apache.lucene.util.automaton.BasicAutomata;
+import org.apache.lucene.util.automaton.Automata;
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
import org.apache.lucene.util.automaton.RegExp;
import org.junit.AfterClass;
@@ -868,7 +868,7 @@ public abstract class QueryParserTestBas
public void testBoost()
throws Exception {
- CharacterRunAutomaton stopWords = new CharacterRunAutomaton(BasicAutomata.makeString("on"));
+ CharacterRunAutomaton stopWords = new CharacterRunAutomaton(Automata.makeString("on"));
Analyzer oneStopAnalyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopWords);
CommonQueryParserConfiguration qp = getParserConfig(oneStopAnalyzer);
Query q = getQuery("on^1.0",qp);
Modified: lucene/dev/branches/solr-5473/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/VersionBlockTreeTermsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr-5473/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/VersionBlockTreeTermsReader.java?rev=1603938&r1=1603937&r2=1603938&view=diff
==============================================================================
--- lucene/dev/branches/solr-5473/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/VersionBlockTreeTermsReader.java (original)
+++ lucene/dev/branches/solr-5473/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/VersionBlockTreeTermsReader.java Thu Jun 19 16:25:31 2014
@@ -18,45 +18,25 @@ package org.apache.lucene.codecs.idversi
*/
import java.io.IOException;
-import java.io.PrintStream;
import java.util.Collections;
import java.util.Iterator;
import java.util.TreeMap;
-import org.apache.lucene.codecs.BlockTermState;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.FieldsProducer;
import org.apache.lucene.codecs.PostingsReaderBase;
import org.apache.lucene.index.CorruptIndexException;
-import org.apache.lucene.index.DocsAndPositionsEnum;
-import org.apache.lucene.index.DocsEnum;
-import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentInfo;
-import org.apache.lucene.index.TermState;
import org.apache.lucene.index.Terms;
-import org.apache.lucene.index.TermsEnum;
-import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
-import org.apache.lucene.util.ArrayUtil;
-import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
-import org.apache.lucene.util.RamUsageEstimator;
-import org.apache.lucene.util.StringHelper;
-import org.apache.lucene.util.automaton.CompiledAutomaton;
-import org.apache.lucene.util.automaton.RunAutomaton;
-import org.apache.lucene.util.automaton.Transition;
-import org.apache.lucene.util.fst.ByteSequenceOutputs;
-import org.apache.lucene.util.fst.FST;
-import org.apache.lucene.util.fst.Outputs;
import org.apache.lucene.util.fst.PairOutputs.Pair;
-import org.apache.lucene.util.fst.PairOutputs;
-import org.apache.lucene.util.fst.Util;
/**
* See {@link VersionBlockTreeTermsWriter}.
Modified: lucene/dev/branches/solr-5473/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr-5473/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java?rev=1603938&r1=1603937&r2=1603938&view=diff
==============================================================================
--- lucene/dev/branches/solr-5473/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java (original)
+++ lucene/dev/branches/solr-5473/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java Thu Jun 19 16:25:31 2014
@@ -17,6 +17,16 @@ package org.apache.lucene.search.suggest
* limitations under the License.
*/
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashSet;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Set;
+
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.TokenStreamToAutomaton;
@@ -33,30 +43,19 @@ import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.OfflineSorter;
import org.apache.lucene.util.UnicodeUtil;
+import org.apache.lucene.util.automaton.Operations;
import org.apache.lucene.util.automaton.Automaton;
-import org.apache.lucene.util.automaton.BasicOperations;
-import org.apache.lucene.util.automaton.SpecialOperations;
-import org.apache.lucene.util.automaton.State;
import org.apache.lucene.util.automaton.Transition;
import org.apache.lucene.util.fst.Builder;
import org.apache.lucene.util.fst.ByteSequenceOutputs;
-import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.FST.BytesReader;
-import org.apache.lucene.util.fst.PairOutputs;
+import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.PairOutputs.Pair;
+import org.apache.lucene.util.fst.PairOutputs;
import org.apache.lucene.util.fst.PositiveIntOutputs;
-import org.apache.lucene.util.fst.Util;
import org.apache.lucene.util.fst.Util.Result;
import org.apache.lucene.util.fst.Util.TopResults;
-
-import java.io.File;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.Comparator;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Set;
+import org.apache.lucene.util.fst.Util;
/**
* Suggester that first analyzes the surface form, adds the
@@ -255,37 +254,64 @@ public class AnalyzingSuggester extends
return fst == null ? 0 : fst.ramBytesUsed();
}
- private void copyDestTransitions(State from, State to, List<Transition> transitions) {
- if (to.isAccept()) {
- from.setAccept(true);
- }
- for(Transition t : to.getTransitions()) {
- transitions.add(t);
+ private int[] topoSortStates(Automaton a) {
+ int[] states = new int[a.getNumStates()];
+ final Set<Integer> visited = new HashSet<>();
+ final LinkedList<Integer> worklist = new LinkedList<>();
+ worklist.add(0);
+ visited.add(0);
+ int upto = 0;
+ states[upto] = 0;
+ upto++;
+ Transition t = new Transition();
+ while (worklist.size() > 0) {
+ int s = worklist.removeFirst();
+ int count = a.initTransition(s, t);
+ for (int i=0;i<count;i++) {
+ a.getNextTransition(t);
+ if (!visited.contains(t.dest)) {
+ visited.add(t.dest);
+ worklist.add(t.dest);
+ states[upto++] = t.dest;
+ }
+ }
}
+ return states;
}
+
// Replaces SEP with epsilon or remaps them if
// we were asked to preserve them:
- private void replaceSep(Automaton a) {
+ private Automaton replaceSep(Automaton a) {
- State[] states = a.getNumberedStates();
+ Automaton result = new Automaton();
+
+ // Copy all states over
+ int numStates = a.getNumStates();
+ for(int s=0;s<numStates;s++) {
+ result.createState();
+ result.setAccept(s, a.isAccept(s));
+ }
// Go in reverse topo sort so we know we only have to
// make one pass:
- for(int stateNumber=states.length-1;stateNumber >=0;stateNumber--) {
- final State state = states[stateNumber];
- List<Transition> newTransitions = new ArrayList<>();
- for(Transition t : state.getTransitions()) {
- assert t.getMin() == t.getMax();
- if (t.getMin() == TokenStreamToAutomaton.POS_SEP) {
+ Transition t = new Transition();
+ int[] topoSortStates = topoSortStates(a);
+ for(int i=0;i<topoSortStates.length;i++) {
+ int state = topoSortStates[topoSortStates.length-1-i];
+ int count = a.initTransition(state, t);
+ for(int j=0;j<count;j++) {
+ a.getNextTransition(t);
+ if (t.min == TokenStreamToAutomaton.POS_SEP) {
+ assert t.max == TokenStreamToAutomaton.POS_SEP;
if (preserveSep) {
// Remap to SEP_LABEL:
- newTransitions.add(new Transition(SEP_LABEL, t.getDest()));
+ result.addTransition(state, t.dest, SEP_LABEL);
} else {
- copyDestTransitions(state, t.getDest(), newTransitions);
- a.setDeterministic(false);
+ result.addEpsilon(state, t.dest);
}
- } else if (t.getMin() == TokenStreamToAutomaton.HOLE) {
+ } else if (t.min == TokenStreamToAutomaton.HOLE) {
+ assert t.max == TokenStreamToAutomaton.HOLE;
// Just remove the hole: there will then be two
// SEP tokens next to each other, which will only
@@ -294,14 +320,16 @@ public class AnalyzingSuggester extends
// that's somehow a problem we can always map HOLE
// to a dedicated byte (and escape it in the
// input).
- copyDestTransitions(state, t.getDest(), newTransitions);
- a.setDeterministic(false);
+ result.addEpsilon(state, t.dest);
} else {
- newTransitions.add(t);
+ result.addTransition(state, t.dest, t.min, t.max);
}
}
- state.setTransitions(newTransitions.toArray(new Transition[newTransitions.size()]));
}
+
+ result.finishState();
+
+ return result;
}
/** Used by subclass to change the lookup automaton, if
@@ -665,7 +693,6 @@ public class AnalyzingSuggester extends
}
final BytesRef utf8Key = new BytesRef(key);
try {
-
Automaton lookupAutomaton = toLookupAutomaton(key);
final CharsRef spare = new CharsRef();
@@ -835,7 +862,7 @@ public class AnalyzingSuggester extends
automaton = ts2a.toAutomaton(ts);
}
- replaceSep(automaton);
+ automaton = replaceSep(automaton);
automaton = convertAutomaton(automaton);
// TODO: LUCENE-5660 re-enable this once we disallow massive suggestion strings
@@ -848,7 +875,8 @@ public class AnalyzingSuggester extends
// TODO: we could walk & add simultaneously, so we
// don't have to alloc [possibly biggish]
// intermediate HashSet in RAM:
- return SpecialOperations.getFiniteStrings(automaton, maxGraphExpansions);
+
+ return Operations.getFiniteStrings(automaton, maxGraphExpansions);
}
final Automaton toLookupAutomaton(final CharSequence key) throws IOException {
@@ -856,24 +884,16 @@ public class AnalyzingSuggester extends
// Turn tokenstream into automaton:
Automaton automaton = null;
try (TokenStream ts = queryAnalyzer.tokenStream("", key.toString())) {
- automaton = (getTokenStreamToAutomaton()).toAutomaton(ts);
+ automaton = getTokenStreamToAutomaton().toAutomaton(ts);
}
- // TODO: we could use the end offset to "guess"
- // whether the final token was a partial token; this
- // would only be a heuristic ... but maybe an OK one.
- // This way we could eg differentiate "net" from "net ",
- // which we can't today...
-
- replaceSep(automaton);
+ automaton = replaceSep(automaton);
// TODO: we can optimize this somewhat by determinizing
// while we convert
- BasicOperations.determinize(automaton);
+ automaton = Operations.determinize(automaton);
return automaton;
}
-
-
/**
* Returns the weight associated with an input string,
Modified: lucene/dev/branches/solr-5473/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FSTUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr-5473/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FSTUtil.java?rev=1603938&r1=1603937&r2=1603938&view=diff
==============================================================================
--- lucene/dev/branches/solr-5473/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FSTUtil.java (original)
+++ lucene/dev/branches/solr-5473/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FSTUtil.java Thu Jun 19 16:25:31 2014
@@ -17,13 +17,12 @@ package org.apache.lucene.search.suggest
* limitations under the License.
*/
+import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
-import java.io.IOException;
import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.automaton.Automaton;
-import org.apache.lucene.util.automaton.State;
import org.apache.lucene.util.automaton.Transition;
import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.Util;
@@ -43,7 +42,7 @@ public class FSTUtil {
public static final class Path<T> {
/** Node in the automaton where path ends: */
- public final State state;
+ public final int state;
/** Node in the FST where path ends: */
public final FST.Arc<T> fstNode;
@@ -55,7 +54,7 @@ public class FSTUtil {
public final IntsRef input;
/** Sole constructor. */
- public Path(State state, FST.Arc<T> fstNode, T output, IntsRef input) {
+ public Path(int state, FST.Arc<T> fstNode, T output, IntsRef input) {
this.state = state;
this.fstNode = fstNode;
this.output = output;
@@ -72,16 +71,22 @@ public class FSTUtil {
assert a.isDeterministic();
final List<Path<T>> queue = new ArrayList<>();
final List<Path<T>> endNodes = new ArrayList<>();
- queue.add(new Path<>(a.getInitialState(), fst
+ if (a.getNumStates() == 0) {
+ return endNodes;
+ }
+
+ queue.add(new Path<>(0, fst
.getFirstArc(new FST.Arc<T>()), fst.outputs.getNoOutput(),
new IntsRef()));
final FST.Arc<T> scratchArc = new FST.Arc<>();
final FST.BytesReader fstReader = fst.getBytesReader();
-
+
+ Transition t = new Transition();
+
while (queue.size() != 0) {
final Path<T> path = queue.remove(queue.size() - 1);
- if (path.state.isAccept()) {
+ if (a.isAccept(path.state)) {
endNodes.add(path);
// we can stop here if we accept this path,
// we accept all further paths too
@@ -89,18 +94,20 @@ public class FSTUtil {
}
IntsRef currentInput = path.input;
- for (Transition t : path.state.getTransitions()) {
- final int min = t.getMin();
- final int max = t.getMax();
+ int count = a.initTransition(path.state, t);
+ for (int i=0;i<count;i++) {
+ a.getNextTransition(t);
+ final int min = t.min;
+ final int max = t.max;
if (min == max) {
- final FST.Arc<T> nextArc = fst.findTargetArc(t.getMin(),
+ final FST.Arc<T> nextArc = fst.findTargetArc(t.min,
path.fstNode, scratchArc, fstReader);
if (nextArc != null) {
final IntsRef newInput = new IntsRef(currentInput.length + 1);
newInput.copyInts(currentInput);
- newInput.ints[currentInput.length] = t.getMin();
+ newInput.ints[currentInput.length] = t.min;
newInput.length = currentInput.length + 1;
- queue.add(new Path<>(t.getDest(), new FST.Arc<T>()
+ queue.add(new Path<>(t.dest, new FST.Arc<T>()
.copyFrom(nextArc), fst.outputs
.add(path.output, nextArc.output), newInput));
}
@@ -122,7 +129,7 @@ public class FSTUtil {
newInput.copyInts(currentInput);
newInput.ints[currentInput.length] = nextArc.label;
newInput.length = currentInput.length + 1;
- queue.add(new Path<>(t.getDest(), new FST.Arc<T>()
+ queue.add(new Path<>(t.dest, new FST.Arc<T>()
.copyFrom(nextArc), fst.outputs
.add(path.output, nextArc.output), newInput));
final int label = nextArc.label; // used in assert
Modified: lucene/dev/branches/solr-5473/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FuzzySuggester.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr-5473/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FuzzySuggester.java?rev=1603938&r1=1603937&r2=1603938&view=diff
==============================================================================
--- lucene/dev/branches/solr-5473/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FuzzySuggester.java (original)
+++ lucene/dev/branches/solr-5473/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FuzzySuggester.java Thu Jun 19 16:25:31 2014
@@ -28,11 +28,11 @@ import org.apache.lucene.analysis.TokenS
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute; // javadocs
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IntsRef;
-import org.apache.lucene.util.automaton.Automaton;
-import org.apache.lucene.util.automaton.BasicAutomata;
-import org.apache.lucene.util.automaton.BasicOperations;
+import org.apache.lucene.util.UnicodeUtil;
+import org.apache.lucene.util.automaton.Automata;
+import org.apache.lucene.util.automaton.Operations;
import org.apache.lucene.util.automaton.LevenshteinAutomata;
-import org.apache.lucene.util.automaton.SpecialOperations;
+import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.UTF32ToUTF8;
import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.PairOutputs.Pair;
@@ -205,7 +205,7 @@ public final class FuzzySuggester extend
protected Automaton convertAutomaton(Automaton a) {
if (unicodeAware) {
Automaton utf8automaton = new UTF32ToUTF8().convert(a);
- BasicOperations.determinize(utf8automaton);
+ utf8automaton = Operations.determinize(utf8automaton);
return utf8automaton;
} else {
return a;
@@ -220,15 +220,14 @@ public final class FuzzySuggester extend
}
Automaton toLevenshteinAutomata(Automaton automaton) {
- final Set<IntsRef> ref = SpecialOperations.getFiniteStrings(automaton, -1);
+ final Set<IntsRef> ref = Operations.getFiniteStrings(automaton, -1);
Automaton subs[] = new Automaton[ref.size()];
int upto = 0;
for (IntsRef path : ref) {
if (path.length <= nonFuzzyPrefix || path.length < minFuzzyLength) {
- subs[upto] = BasicAutomata.makeString(path.ints, path.offset, path.length);
+ subs[upto] = Automata.makeString(path.ints, path.offset, path.length);
upto++;
} else {
- Automaton prefix = BasicAutomata.makeString(path.ints, path.offset, nonFuzzyPrefix);
int ints[] = new int[path.length-nonFuzzyPrefix];
System.arraycopy(path.ints, path.offset+nonFuzzyPrefix, ints, 0, ints.length);
// TODO: maybe add alphaMin to LevenshteinAutomata,
@@ -237,29 +236,24 @@ public final class FuzzySuggester extend
// edited... but then 0 byte is "in general" allowed
// on input (but not in UTF8).
LevenshteinAutomata lev = new LevenshteinAutomata(ints, unicodeAware ? Character.MAX_CODE_POINT : 255, transpositions);
- Automaton levAutomaton = lev.toAutomaton(maxEdits);
- Automaton combined = BasicOperations.concatenate(Arrays.asList(prefix, levAutomaton));
- combined.setDeterministic(true); // its like the special case in concatenate itself, except we cloneExpanded already
- subs[upto] = combined;
+ subs[upto] = lev.toAutomaton(maxEdits, UnicodeUtil.newString(path.ints, path.offset, nonFuzzyPrefix));
upto++;
}
}
if (subs.length == 0) {
// automaton is empty, there is no accepted paths through it
- return BasicAutomata.makeEmpty(); // matches nothing
+ return Automata.makeEmpty(); // matches nothing
} else if (subs.length == 1) {
// no synonyms or anything: just a single path through the tokenstream
return subs[0];
} else {
// multiple paths: this is really scary! is it slow?
// maybe we should not do this and throw UOE?
- Automaton a = BasicOperations.union(Arrays.asList(subs));
+ Automaton a = Operations.union(Arrays.asList(subs));
// TODO: we could call toLevenshteinAutomata() before det?
// this only happens if you have multiple paths anyway (e.g. synonyms)
- BasicOperations.determinize(a);
-
- return a;
+ return Operations.determinize(a);
}
}
}
Modified: lucene/dev/branches/solr-5473/lucene/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellLookup.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr-5473/lucene/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellLookup.java?rev=1603938&r1=1603937&r2=1603938&view=diff
==============================================================================
--- lucene/dev/branches/solr-5473/lucene/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellLookup.java (original)
+++ lucene/dev/branches/solr-5473/lucene/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellLookup.java Thu Jun 19 16:25:31 2014
@@ -37,7 +37,9 @@ import org.apache.lucene.util.UnicodeUti
* <a href="http://jaspell.sourceforge.net/">JaSpell</a>.
*
* @see JaspellTernarySearchTrie
+ * @deprecated Migrate to one of the newer suggesters which are much more RAM efficient.
*/
+@Deprecated
public class JaspellLookup extends Lookup implements Accountable {
JaspellTernarySearchTrie trie = new JaspellTernarySearchTrie();
private boolean usePrefix = true;
Modified: lucene/dev/branches/solr-5473/lucene/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellTernarySearchTrie.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr-5473/lucene/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellTernarySearchTrie.java?rev=1603938&r1=1603937&r2=1603938&view=diff
==============================================================================
--- lucene/dev/branches/solr-5473/lucene/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellTernarySearchTrie.java (original)
+++ lucene/dev/branches/solr-5473/lucene/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellTernarySearchTrie.java Thu Jun 19 16:25:31 2014
@@ -62,7 +62,10 @@ import org.apache.lucene.util.RamUsageEs
* Algorithms, January 1997). Algorithms in C, Third Edition, by Robert
* Sedgewick (Addison-Wesley, 1998) provides yet another view of ternary search
* trees.
+ *
+ * @deprecated Migrate to one of the newer suggesters which are much more RAM efficient.
*/
+@Deprecated
public class JaspellTernarySearchTrie implements Accountable {
/**
@@ -98,7 +101,9 @@ public class JaspellTernarySearchTrie im
@Override
public long ramBytesUsed() {
long mem = RamUsageEstimator.shallowSizeOf(this) + RamUsageEstimator.shallowSizeOf(relatives);
- for (TSTNode node : relatives) {
+ // We don't need to add parent since our parent added itself:
+ for (int i=1;i<4;i++) {
+ TSTNode node = relatives[i];
if (node != null) {
mem += node.ramBytesUsed();
}
Modified: lucene/dev/branches/solr-5473/lucene/suggest/src/test/org/apache/lucene/search/suggest/InputArrayIterator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr-5473/lucene/suggest/src/test/org/apache/lucene/search/suggest/InputArrayIterator.java?rev=1603938&r1=1603937&r2=1603938&view=diff
==============================================================================
--- lucene/dev/branches/solr-5473/lucene/suggest/src/test/org/apache/lucene/search/suggest/InputArrayIterator.java (original)
+++ lucene/dev/branches/solr-5473/lucene/suggest/src/test/org/apache/lucene/search/suggest/InputArrayIterator.java Thu Jun 19 16:25:31 2014
@@ -92,4 +92,4 @@ public final class InputArrayIterator im
public boolean hasContexts() {
return hasContexts;
}
-}
\ No newline at end of file
+}
Modified: lucene/dev/branches/solr-5473/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/FuzzySuggesterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr-5473/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/FuzzySuggesterTest.java?rev=1603938&r1=1603937&r2=1603938&view=diff
==============================================================================
--- lucene/dev/branches/solr-5473/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/FuzzySuggesterTest.java (original)
+++ lucene/dev/branches/solr-5473/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/FuzzySuggesterTest.java Thu Jun 19 16:25:31 2014
@@ -40,15 +40,14 @@ import org.apache.lucene.analysis.TokenS
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import org.apache.lucene.search.suggest.Lookup.LookupResult;
import org.apache.lucene.search.suggest.Input;
import org.apache.lucene.search.suggest.InputArrayIterator;
+import org.apache.lucene.search.suggest.Lookup.LookupResult;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
import org.apache.lucene.util.automaton.Automaton;
-import org.apache.lucene.util.automaton.State;
import org.apache.lucene.util.fst.Util;
public class FuzzySuggesterTest extends LuceneTestCase {
@@ -754,27 +753,28 @@ public class FuzzySuggesterTest extends
// this:
Automaton automaton = suggester.convertAutomaton(suggester.toLevenshteinAutomata(suggester.toLookupAutomaton(analyzedKey)));
assertTrue(automaton.isDeterministic());
+
// TODO: could be faster... but its slowCompletor for a reason
BytesRef spare = new BytesRef();
for (TermFreqPayload2 e : slowCompletor) {
spare.copyChars(e.analyzedForm);
Set<IntsRef> finiteStrings = suggester.toFiniteStrings(spare, tokenStreamToAutomaton);
for (IntsRef intsRef : finiteStrings) {
- State p = automaton.getInitialState();
+ int p = 0;
BytesRef ref = Util.toBytesRef(intsRef, spare);
boolean added = false;
for (int i = ref.offset; i < ref.length; i++) {
- State q = p.step(ref.bytes[i] & 0xff);
- if (q == null) {
+ int q = automaton.step(p, ref.bytes[i] & 0xff);
+ if (q == -1) {
break;
- } else if (q.isAccept()) {
+ } else if (automaton.isAccept(q)) {
matches.add(new LookupResult(e.surfaceForm, e.weight));
added = true;
break;
}
p = q;
}
- if (!added && p.isAccept()) {
+ if (!added && automaton.isAccept(p)) {
matches.add(new LookupResult(e.surfaceForm, e.weight));
}
}