You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2014/06/19 10:22:11 UTC
svn commit: r1603752 [3/3] - in /lucene/dev/trunk: ./ dev-tools/ lucene/
lucene/analysis/ lucene/analysis/common/
lucene/analysis/common/src/test/org/apache/lucene/analysis/core/
lucene/codecs/ lucene/codecs/src/java/org/apache/lucene/codecs/memory/ lu...
Modified: lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/InputArrayIterator.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/InputArrayIterator.java?rev=1603752&r1=1603751&r2=1603752&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/InputArrayIterator.java (original)
+++ lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/InputArrayIterator.java Thu Jun 19 08:22:08 2014
@@ -92,4 +92,4 @@ public final class InputArrayIterator im
public boolean hasContexts() {
return hasContexts;
}
-}
\ No newline at end of file
+}
Modified: lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/FuzzySuggesterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/FuzzySuggesterTest.java?rev=1603752&r1=1603751&r2=1603752&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/FuzzySuggesterTest.java (original)
+++ lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/FuzzySuggesterTest.java Thu Jun 19 08:22:08 2014
@@ -40,15 +40,14 @@ import org.apache.lucene.analysis.TokenS
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import org.apache.lucene.search.suggest.Lookup.LookupResult;
import org.apache.lucene.search.suggest.Input;
import org.apache.lucene.search.suggest.InputArrayIterator;
+import org.apache.lucene.search.suggest.Lookup.LookupResult;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
import org.apache.lucene.util.automaton.Automaton;
-import org.apache.lucene.util.automaton.State;
import org.apache.lucene.util.fst.Util;
public class FuzzySuggesterTest extends LuceneTestCase {
@@ -754,27 +753,28 @@ public class FuzzySuggesterTest extends
// this:
Automaton automaton = suggester.convertAutomaton(suggester.toLevenshteinAutomata(suggester.toLookupAutomaton(analyzedKey)));
assertTrue(automaton.isDeterministic());
+
// TODO: could be faster... but its slowCompletor for a reason
BytesRef spare = new BytesRef();
for (TermFreqPayload2 e : slowCompletor) {
spare.copyChars(e.analyzedForm);
Set<IntsRef> finiteStrings = suggester.toFiniteStrings(spare, tokenStreamToAutomaton);
for (IntsRef intsRef : finiteStrings) {
- State p = automaton.getInitialState();
+ int p = 0;
BytesRef ref = Util.toBytesRef(intsRef, spare);
boolean added = false;
for (int i = ref.offset; i < ref.length; i++) {
- State q = p.step(ref.bytes[i] & 0xff);
- if (q == null) {
+ int q = automaton.step(p, ref.bytes[i] & 0xff);
+ if (q == -1) {
break;
- } else if (q.isAccept()) {
+ } else if (automaton.isAccept(q)) {
matches.add(new LookupResult(e.surfaceForm, e.weight));
added = true;
break;
}
p = q;
}
- if (!added && p.isAccept()) {
+ if (!added && automaton.isAccept(p)) {
matches.add(new LookupResult(e.surfaceForm, e.weight));
}
}
Modified: lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/analysis/MockTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/analysis/MockTokenFilter.java?rev=1603752&r1=1603751&r2=1603752&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/analysis/MockTokenFilter.java (original)
+++ lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/analysis/MockTokenFilter.java Thu Jun 19 08:22:08 2014
@@ -17,15 +17,15 @@ package org.apache.lucene.analysis;
* limitations under the License.
*/
-import static org.apache.lucene.util.automaton.BasicAutomata.makeEmpty;
-import static org.apache.lucene.util.automaton.BasicAutomata.makeString;
+import static org.apache.lucene.util.automaton.Automata.makeEmpty;
+import static org.apache.lucene.util.automaton.Automata.makeString;
import java.io.IOException;
import java.util.Arrays;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import org.apache.lucene.util.automaton.BasicOperations;
+import org.apache.lucene.util.automaton.Operations;
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
/**
@@ -43,7 +43,7 @@ public final class MockTokenFilter exten
/** Set of common english stopwords */
public static final CharacterRunAutomaton ENGLISH_STOPSET =
- new CharacterRunAutomaton(BasicOperations.union(Arrays.asList(
+ new CharacterRunAutomaton(Operations.union(Arrays.asList(
makeString("a"), makeString("an"), makeString("and"), makeString("are"),
makeString("as"), makeString("at"), makeString("be"), makeString("but"),
makeString("by"), makeString("for"), makeString("if"), makeString("in"),
Modified: lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/search/SearchEquivalenceTestBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/search/SearchEquivalenceTestBase.java?rev=1603752&r1=1603751&r2=1603752&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/search/SearchEquivalenceTestBase.java (original)
+++ lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/search/SearchEquivalenceTestBase.java Thu Jun 19 08:22:08 2014
@@ -33,7 +33,7 @@ import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
-import org.apache.lucene.util.automaton.BasicAutomata;
+import org.apache.lucene.util.automaton.Automata;
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
import org.junit.AfterClass;
import org.junit.BeforeClass;
@@ -57,7 +57,7 @@ public abstract class SearchEquivalenceT
Random random = random();
directory = newDirectory();
stopword = "" + randomChar();
- CharacterRunAutomaton stopset = new CharacterRunAutomaton(BasicAutomata.makeString(stopword));
+ CharacterRunAutomaton stopset = new CharacterRunAutomaton(Automata.makeString(stopword));
analyzer = new MockAnalyzer(random, MockTokenizer.WHITESPACE, false, stopset);
RandomIndexWriter iw = new RandomIndexWriter(random, directory, analyzer);
Document doc = new Document();
Modified: lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/util/automaton/AutomatonTestUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/util/automaton/AutomatonTestUtil.java?rev=1603752&r1=1603751&r2=1603752&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/util/automaton/AutomatonTestUtil.java (original)
+++ lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/util/automaton/AutomatonTestUtil.java Thu Jun 19 08:22:08 2014
@@ -20,7 +20,6 @@ package org.apache.lucene.util.automaton
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
-import java.util.IdentityHashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
@@ -31,7 +30,6 @@ import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.TestUtil;
import org.apache.lucene.util.UnicodeUtil;
-import org.apache.lucene.util.fst.Util;
/**
* Utilities for testing automata.
@@ -92,40 +90,40 @@ public class AutomatonTestUtil {
/** picks a random int code point, avoiding surrogates;
* throws IllegalArgumentException if this transition only
* accepts surrogates */
- private static int getRandomCodePoint(final Random r, final Transition t) {
+ private static int getRandomCodePoint(final Random r, int min, int max) {
final int code;
- if (t.max < UnicodeUtil.UNI_SUR_HIGH_START ||
- t.min > UnicodeUtil.UNI_SUR_HIGH_END) {
+ if (max < UnicodeUtil.UNI_SUR_HIGH_START ||
+ min > UnicodeUtil.UNI_SUR_HIGH_END) {
// easy: entire range is before or after surrogates
- code = t.min+r.nextInt(t.max-t.min+1);
- } else if (t.min >= UnicodeUtil.UNI_SUR_HIGH_START) {
- if (t.max > UnicodeUtil.UNI_SUR_LOW_END) {
+ code = min+r.nextInt(max-min+1);
+ } else if (min >= UnicodeUtil.UNI_SUR_HIGH_START) {
+ if (max > UnicodeUtil.UNI_SUR_LOW_END) {
// after surrogates
- code = 1+UnicodeUtil.UNI_SUR_LOW_END+r.nextInt(t.max-UnicodeUtil.UNI_SUR_LOW_END);
+ code = 1+UnicodeUtil.UNI_SUR_LOW_END+r.nextInt(max-UnicodeUtil.UNI_SUR_LOW_END);
} else {
- throw new IllegalArgumentException("transition accepts only surrogates: " + t);
+ throw new IllegalArgumentException("transition accepts only surrogates: min=" + min + " max=" + max);
}
- } else if (t.max <= UnicodeUtil.UNI_SUR_LOW_END) {
- if (t.min < UnicodeUtil.UNI_SUR_HIGH_START) {
+ } else if (max <= UnicodeUtil.UNI_SUR_LOW_END) {
+ if (min < UnicodeUtil.UNI_SUR_HIGH_START) {
// before surrogates
- code = t.min + r.nextInt(UnicodeUtil.UNI_SUR_HIGH_START - t.min);
+ code = min + r.nextInt(UnicodeUtil.UNI_SUR_HIGH_START - min);
} else {
- throw new IllegalArgumentException("transition accepts only surrogates: " + t);
+ throw new IllegalArgumentException("transition accepts only surrogates: min=" + min + " max=" + max);
}
} else {
// range includes all surrogates
- int gap1 = UnicodeUtil.UNI_SUR_HIGH_START - t.min;
- int gap2 = t.max - UnicodeUtil.UNI_SUR_LOW_END;
+ int gap1 = UnicodeUtil.UNI_SUR_HIGH_START - min;
+ int gap2 = max - UnicodeUtil.UNI_SUR_LOW_END;
int c = r.nextInt(gap1+gap2);
if (c < gap1) {
- code = t.min + c;
+ code = min + c;
} else {
code = UnicodeUtil.UNI_SUR_LOW_END + c - gap1 + 1;
}
}
- assert code >= t.min && code <= t.max && (code < UnicodeUtil.UNI_SUR_HIGH_START || code > UnicodeUtil.UNI_SUR_LOW_END):
- "code=" + code + " min=" + t.min + " max=" + t.max;
+ assert code >= min && code <= max && (code < UnicodeUtil.UNI_SUR_HIGH_START || code > UnicodeUtil.UNI_SUR_LOW_END):
+ "code=" + code + " min=" + min + " max=" + max;
return code;
}
@@ -140,11 +138,13 @@ public class AutomatonTestUtil {
private final Map<Transition,Boolean> leadsToAccept;
private final Automaton a;
+ private final Transition[][] transitions;
private static class ArrivingTransition {
- final State from;
+ final int from;
final Transition t;
- public ArrivingTransition(State from, Transition t) {
+
+ public ArrivingTransition(int from, Transition t) {
this.from = from;
this.t = t;
}
@@ -152,32 +152,30 @@ public class AutomatonTestUtil {
public RandomAcceptedStrings(Automaton a) {
this.a = a;
- if (a.isSingleton()) {
- leadsToAccept = null;
- return;
+ if (a.getNumStates() == 0) {
+ throw new IllegalArgumentException("this automaton accepts nothing");
}
+ this.transitions = a.getSortedTransitions();
- // must use IdentityHashmap because two Transitions w/
- // different start nodes can be considered the same
- leadsToAccept = new IdentityHashMap<>();
- final Map<State,List<ArrivingTransition>> allArriving = new HashMap<>();
+ leadsToAccept = new HashMap<>();
+ final Map<Integer,List<ArrivingTransition>> allArriving = new HashMap<>();
- final LinkedList<State> q = new LinkedList<>();
- final Set<State> seen = new HashSet<>();
+ final LinkedList<Integer> q = new LinkedList<>();
+ final Set<Integer> seen = new HashSet<>();
// reverse map the transitions, so we can quickly look
// up all arriving transitions to a given state
- for(State s: a.getNumberedStates()) {
- for(int i=0;i<s.numTransitions;i++) {
- final Transition t = s.transitionsArray[i];
- List<ArrivingTransition> tl = allArriving.get(t.to);
+ int numStates = a.getNumStates();
+ for(int s=0;s<numStates;s++) {
+ for(Transition t : transitions[s]) {
+ List<ArrivingTransition> tl = allArriving.get(t.dest);
if (tl == null) {
tl = new ArrayList<>();
- allArriving.put(t.to, tl);
+ allArriving.put(t.dest, tl);
}
tl.add(new ArrivingTransition(s, t));
}
- if (s.accept) {
+ if (a.isAccept(s)) {
q.add(s);
seen.add(s);
}
@@ -185,12 +183,12 @@ public class AutomatonTestUtil {
// Breadth-first search, from accept states,
// backwards:
- while(!q.isEmpty()) {
- final State s = q.removeFirst();
+ while (q.isEmpty() == false) {
+ final int s = q.removeFirst();
List<ArrivingTransition> arriving = allArriving.get(s);
if (arriving != null) {
for(ArrivingTransition at : arriving) {
- final State from = at.from;
+ final int from = at.from;
if (!seen.contains(from)) {
q.add(from);
seen.add(from);
@@ -204,62 +202,49 @@ public class AutomatonTestUtil {
public int[] getRandomAcceptedString(Random r) {
final List<Integer> soFar = new ArrayList<>();
- if (a.isSingleton()) {
- // accepts only one
- final String s = a.singleton;
-
- int charUpto = 0;
- while(charUpto < s.length()) {
- final int cp = s.codePointAt(charUpto);
- charUpto += Character.charCount(cp);
- soFar.add(cp);
- }
- } else {
- State s = a.initial;
+ int s = 0;
- while(true) {
+ while(true) {
- if (s.accept) {
- if (s.numTransitions == 0) {
- // stop now
+ if (a.isAccept(s)) {
+ if (a.getNumTransitions(s) == 0) {
+ // stop now
+ break;
+ } else {
+ if (r.nextBoolean()) {
break;
- } else {
- if (r.nextBoolean()) {
- break;
- }
}
}
+ }
- if (s.numTransitions == 0) {
- throw new RuntimeException("this automaton has dead states");
- }
+ if (a.getNumTransitions(s) == 0) {
+ throw new RuntimeException("this automaton has dead states");
+ }
- boolean cheat = r.nextBoolean();
+ boolean cheat = r.nextBoolean();
- final Transition t;
- if (cheat) {
- // pick a transition that we know is the fastest
- // path to an accept state
- List<Transition> toAccept = new ArrayList<>();
- for(int i=0;i<s.numTransitions;i++) {
- final Transition t0 = s.transitionsArray[i];
- if (leadsToAccept.containsKey(t0)) {
- toAccept.add(t0);
- }
- }
- if (toAccept.size() == 0) {
- // this is OK -- it means we jumped into a cycle
- t = s.transitionsArray[r.nextInt(s.numTransitions)];
- } else {
- t = toAccept.get(r.nextInt(toAccept.size()));
+ final Transition t;
+ if (cheat) {
+ // pick a transition that we know is the fastest
+ // path to an accept state
+ List<Transition> toAccept = new ArrayList<>();
+ for(Transition t0 : transitions[s]) {
+ if (leadsToAccept.containsKey(t0)) {
+ toAccept.add(t0);
}
+ }
+ if (toAccept.size() == 0) {
+ // this is OK -- it means we jumped into a cycle
+ t = transitions[s][r.nextInt(transitions[s].length)];
} else {
- t = s.transitionsArray[r.nextInt(s.numTransitions)];
+ t = toAccept.get(r.nextInt(toAccept.size()));
}
- soFar.add(getRandomCodePoint(r, t));
- s = t.to;
+ } else {
+ t = transitions[s][r.nextInt(transitions[s].length)];
}
+ soFar.add(getRandomCodePoint(r, t.min, t.max));
+ s = t.dest;
}
return ArrayUtil.toIntArray(soFar);
@@ -270,19 +255,21 @@ public class AutomatonTestUtil {
public static Automaton randomAutomaton(Random random) {
// get two random Automata from regexps
Automaton a1 = new RegExp(AutomatonTestUtil.randomRegexp(random), RegExp.NONE).toAutomaton();
- if (random.nextBoolean())
- a1 = BasicOperations.complement(a1);
+ if (random.nextBoolean()) {
+ a1 = Operations.complement(a1);
+ }
Automaton a2 = new RegExp(AutomatonTestUtil.randomRegexp(random), RegExp.NONE).toAutomaton();
- if (random.nextBoolean())
- a2 = BasicOperations.complement(a2);
-
+ if (random.nextBoolean()) {
+ a2 = Operations.complement(a2);
+ }
+
// combine them in random ways
- switch(random.nextInt(4)) {
- case 0: return BasicOperations.concatenate(a1, a2);
- case 1: return BasicOperations.union(a1, a2);
- case 2: return BasicOperations.intersection(a1, a2);
- default: return BasicOperations.minus(a1, a2);
+ switch (random.nextInt(4)) {
+ case 0: return Operations.concatenate(a1, a2);
+ case 1: return Operations.union(a1, a2);
+ case 2: return Operations.intersection(a1, a2);
+ default: return Operations.minus(a1, a2);
}
}
@@ -323,70 +310,81 @@ public class AutomatonTestUtil {
/**
* Simple, original brics implementation of Brzozowski minimize()
*/
- public static void minimizeSimple(Automaton a) {
- if (a.isSingleton())
- return;
- determinizeSimple(a, SpecialOperations.reverse(a));
- determinizeSimple(a, SpecialOperations.reverse(a));
+ public static Automaton minimizeSimple(Automaton a) {
+ Set<Integer> initialSet = new HashSet<Integer>();
+ a = determinizeSimple(Operations.reverse(a, initialSet), initialSet);
+ initialSet.clear();
+ a = determinizeSimple(Operations.reverse(a, initialSet), initialSet);
+ return a;
}
/**
* Simple, original brics implementation of determinize()
*/
- public static void determinizeSimple(Automaton a) {
- if (a.deterministic || a.isSingleton())
- return;
- Set<State> initialset = new HashSet<>();
- initialset.add(a.initial);
- determinizeSimple(a, initialset);
+ public static Automaton determinizeSimple(Automaton a) {
+ Set<Integer> initialset = new HashSet<>();
+ initialset.add(0);
+ return determinizeSimple(a, initialset);
}
-
+
/**
* Simple, original brics implementation of determinize()
* Determinizes the given automaton using the given set of initial states.
*/
- public static void determinizeSimple(Automaton a, Set<State> initialset) {
+ public static Automaton determinizeSimple(Automaton a, Set<Integer> initialset) {
+ if (a.getNumStates() == 0) {
+ return a;
+ }
int[] points = a.getStartPoints();
// subset construction
- Map<Set<State>, Set<State>> sets = new HashMap<>();
- LinkedList<Set<State>> worklist = new LinkedList<>();
- Map<Set<State>, State> newstate = new HashMap<>();
+ Map<Set<Integer>, Set<Integer>> sets = new HashMap<>();
+ LinkedList<Set<Integer>> worklist = new LinkedList<>();
+ Map<Set<Integer>, Integer> newstate = new HashMap<>();
sets.put(initialset, initialset);
worklist.add(initialset);
- a.initial = new State();
- newstate.put(initialset, a.initial);
+ Automaton.Builder result = new Automaton.Builder();
+ result.createState();
+ newstate.put(initialset, 0);
+ Transition t = new Transition();
while (worklist.size() > 0) {
- Set<State> s = worklist.removeFirst();
- State r = newstate.get(s);
- for (State q : s)
- if (q.accept) {
- r.accept = true;
+ Set<Integer> s = worklist.removeFirst();
+ int r = newstate.get(s);
+ for (int q : s) {
+ if (a.isAccept(q)) {
+ result.setAccept(r, true);
break;
}
+ }
for (int n = 0; n < points.length; n++) {
- Set<State> p = new HashSet<>();
- for (State q : s)
- for (Transition t : q.getTransitions())
- if (t.min <= points[n] && points[n] <= t.max)
- p.add(t.to);
+ Set<Integer> p = new HashSet<>();
+ for (int q : s) {
+ int count = a.initTransition(q, t);
+ for(int i=0;i<count;i++) {
+ a.getNextTransition(t);
+ if (t.min <= points[n] && points[n] <= t.max) {
+ p.add(t.dest);
+ }
+ }
+ }
+
if (!sets.containsKey(p)) {
sets.put(p, p);
worklist.add(p);
- newstate.put(p, new State());
+ newstate.put(p, result.createState());
}
- State q = newstate.get(p);
+ int q = newstate.get(p);
int min = points[n];
int max;
- if (n + 1 < points.length)
+ if (n + 1 < points.length) {
max = points[n + 1] - 1;
- else
+ } else {
max = Character.MAX_CODE_POINT;
- r.addTransition(new Transition(min, max, q));
+ }
+ result.addTransition(r, q, min, max);
}
}
- a.deterministic = true;
- a.clearNumberedStates();
- a.removeDeadTransitions();
+
+ return Operations.removeDeadStates(result.finish());
}
/**
@@ -403,11 +401,7 @@ public class AutomatonTestUtil {
*/
public static Set<IntsRef> getFiniteStringsRecursive(Automaton a, int limit) {
HashSet<IntsRef> strings = new HashSet<>();
- if (a.isSingleton()) {
- if (limit > 0) {
- strings.add(Util.toUTF32(a.singleton, new IntsRef()));
- }
- } else if (!getFiniteStrings(a.initial, new HashSet<State>(), strings, new IntsRef(), limit)) {
+ if (!getFiniteStrings(a, 0, new HashSet<Integer>(), strings, new IntsRef(), limit)) {
return strings;
}
return strings;
@@ -418,24 +412,27 @@ public class AutomatonTestUtil {
* false if more than <code>limit</code> strings are found.
* <code>limit</code><0 means "infinite".
*/
- private static boolean getFiniteStrings(State s, HashSet<State> pathstates,
+ private static boolean getFiniteStrings(Automaton a, int s, HashSet<Integer> pathstates,
HashSet<IntsRef> strings, IntsRef path, int limit) {
pathstates.add(s);
- for (Transition t : s.getTransitions()) {
- if (pathstates.contains(t.to)) {
+ Transition t = new Transition();
+ int count = a.initTransition(s, t);
+ for (int i=0;i<count;i++) {
+ a.getNextTransition(t);
+ if (pathstates.contains(t.dest)) {
return false;
}
for (int n = t.min; n <= t.max; n++) {
path.grow(path.length+1);
path.ints[path.length] = n;
path.length++;
- if (t.to.accept) {
+ if (a.isAccept(t.dest)) {
strings.add(IntsRef.deepCopyOf(path));
if (limit >= 0 && strings.size() > limit) {
return false;
}
}
- if (!getFiniteStrings(t.to, pathstates, strings, path, limit)) {
+ if (!getFiniteStrings(a, t.dest, pathstates, strings, path, limit)) {
return false;
}
path.length--;
@@ -452,8 +449,10 @@ public class AutomatonTestUtil {
* this is only used to test the correctness of our faster implementation.
*/
public static boolean isFiniteSlow(Automaton a) {
- if (a.isSingleton()) return true;
- return isFiniteSlow(a.initial, new HashSet<State>());
+ if (a.getNumStates() == 0) {
+ return true;
+ }
+ return isFiniteSlow(a, 0, new HashSet<Integer>());
}
/**
@@ -462,22 +461,48 @@ public class AutomatonTestUtil {
*/
// TODO: not great that this is recursive... in theory a
// large automata could exceed java's stack
- private static boolean isFiniteSlow(State s, HashSet<State> path) {
+ private static boolean isFiniteSlow(Automaton a, int s, HashSet<Integer> path) {
path.add(s);
- for (Transition t : s.getTransitions())
- if (path.contains(t.to) || !isFiniteSlow(t.to, path)) return false;
+ Transition t = new Transition();
+ int count = a.initTransition(s, t);
+ for (int i=0;i<count;i++) {
+ a.getNextTransition(t);
+ if (path.contains(t.dest) || !isFiniteSlow(a, t.dest, path)) {
+ return false;
+ }
+ }
path.remove(s);
return true;
}
-
/**
* Checks that an automaton has no detached states that are unreachable
* from the initial state.
*/
public static void assertNoDetachedStates(Automaton a) {
- int numStates = a.getNumberOfStates();
- a.clearNumberedStates(); // force recomputation of cached numbered states
- assert numStates == a.getNumberOfStates() : "automaton has " + (numStates - a.getNumberOfStates()) + " detached states";
+ Automaton a2 = Operations.removeDeadStates(a);
+ assert a.getNumStates() == a2.getNumStates() : "automaton has " + (a.getNumStates() - a2.getNumStates()) + " detached states";
}
+
+ /** Returns true if the automaton is deterministic. */
+ public static boolean isDeterministicSlow(Automaton a) {
+ Transition t = new Transition();
+ int numStates = a.getNumStates();
+ for(int s=0;s<numStates;s++) {
+ int count = a.initTransition(s, t);
+ int lastMax = -1;
+ for(int i=0;i<count;i++) {
+ a.getNextTransition(t);
+ if (t.min <= lastMax) {
+ assert a.isDeterministic() == false;
+ return false;
+ }
+ lastMax = t.max;
+ }
+ }
+
+ assert a.isDeterministic() == true;
+ return true;
+ }
+
}
Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java?rev=1603752&r1=1603751&r2=1603752&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java Thu Jun 19 08:22:08 2014
@@ -17,6 +17,12 @@
package org.apache.solr.parser;
+import java.io.StringReader;
+import java.util.EnumSet;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.util.TokenFilterFactory;
import org.apache.lucene.index.Term;
@@ -34,10 +40,9 @@ import org.apache.lucene.search.Wildcard
import org.apache.lucene.util.QueryBuilder;
import org.apache.lucene.util.ToStringUtils;
import org.apache.lucene.util.Version;
+import org.apache.lucene.util.automaton.Automata;
+import org.apache.lucene.util.automaton.Operations;
import org.apache.lucene.util.automaton.Automaton;
-import org.apache.lucene.util.automaton.BasicAutomata;
-import org.apache.lucene.util.automaton.BasicOperations;
-import org.apache.lucene.util.automaton.SpecialOperations;
import org.apache.solr.analysis.ReversedWildcardFilterFactory;
import org.apache.solr.analysis.TokenizerChain;
import org.apache.solr.common.SolrException;
@@ -49,12 +54,6 @@ import org.apache.solr.schema.TextField;
import org.apache.solr.search.QParser;
import org.apache.solr.search.SyntaxError;
-import java.io.StringReader;
-import java.util.EnumSet;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
/** This class is overridden by QueryParser in QueryParser.jj
* and acts to separate the majority of the Java code from the .jj grammar file.
*/
@@ -780,16 +779,16 @@ public abstract class SolrQueryParserBas
Automaton automaton = WildcardQuery.toAutomaton(term);
// TODO: we should likely use the automaton to calculate shouldReverse, too.
if (factory.shouldReverse(termStr)) {
- automaton = BasicOperations.concatenate(automaton, BasicAutomata.makeChar(factory.getMarkerChar()));
- SpecialOperations.reverse(automaton);
+ automaton = Operations.concatenate(automaton, Automata.makeChar(factory.getMarkerChar()));
+ automaton = Operations.reverse(automaton);
} else {
// reverse wildcardfilter is active: remove false positives
// fsa representing false positives (markerChar*)
- Automaton falsePositives = BasicOperations.concatenate(
- BasicAutomata.makeChar(factory.getMarkerChar()),
- BasicAutomata.makeAnyString());
+ Automaton falsePositives = Operations.concatenate(
+ Automata.makeChar(factory.getMarkerChar()),
+ Automata.makeAnyString());
// subtract these away
- automaton = BasicOperations.minus(automaton, falsePositives);
+ automaton = Operations.minus(automaton, falsePositives);
}
return new AutomatonQuery(term, automaton) {
// override toString so its completely transparent
Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/SolrQueryParser.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/SolrQueryParser.java?rev=1603752&r1=1603751&r2=1603752&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/SolrQueryParser.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/SolrQueryParser.java Thu Jun 19 08:22:08 2014
@@ -25,10 +25,7 @@ import org.apache.lucene.analysis.util.T
import org.apache.lucene.index.Term;
import org.apache.lucene.search.*;
import org.apache.lucene.util.ToStringUtils;
-import org.apache.lucene.util.automaton.Automaton;
-import org.apache.lucene.util.automaton.BasicAutomata;
-import org.apache.lucene.util.automaton.BasicOperations;
-import org.apache.lucene.util.automaton.SpecialOperations;
+import org.apache.lucene.util.automaton.Automata;
import org.apache.lucene.analysis.Analyzer;
import org.apache.solr.analysis.ReversedWildcardFilterFactory;
import org.apache.solr.analysis.TokenizerChain;
Modified: lucene/dev/trunk/solr/core/src/test/org/apache/solr/analysis/TestReversedWildcardFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/test/org/apache/solr/analysis/TestReversedWildcardFilterFactory.java?rev=1603752&r1=1603751&r2=1603752&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/test/org/apache/solr/analysis/TestReversedWildcardFilterFactory.java (original)
+++ lucene/dev/trunk/solr/core/src/test/org/apache/solr/analysis/TestReversedWildcardFilterFactory.java Thu Jun 19 08:22:08 2014
@@ -19,7 +19,6 @@ package org.apache.solr.analysis;
import java.io.IOException;
import java.lang.reflect.Field;
-
import java.util.HashMap;
import java.util.Map;
@@ -27,8 +26,8 @@ import org.apache.lucene.analysis.Analyz
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.search.AutomatonQuery;
import org.apache.lucene.search.Query;
+import org.apache.lucene.util.automaton.Operations;
import org.apache.lucene.util.automaton.Automaton;
-import org.apache.lucene.util.automaton.SpecialOperations;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.schema.IndexSchema;
@@ -158,14 +157,11 @@ public class TestReversedWildcardFilterF
/** fragile assert: depends on our implementation, but cleanest way to check for now */
private boolean wasReversed(SolrQueryParser qp, String query) throws Exception {
Query q = qp.parse(query);
- if (!(q instanceof AutomatonQuery))
+ if (!(q instanceof AutomatonQuery)) {
return false;
- // this is a hack to get the protected Automaton field in AutomatonQuery,
- // may break in later lucene versions - we have no getter... for good reasons.
- final Field automatonField = AutomatonQuery.class.getDeclaredField("automaton");
- automatonField.setAccessible(true);
- Automaton automaton = (Automaton) automatonField.get(q);
- String prefix = SpecialOperations.getCommonPrefix(automaton);
+ }
+ Automaton automaton = ((AutomatonQuery) q).getAutomaton();
+ String prefix = Operations.getCommonPrefix(Operations.determinize(automaton));
return prefix.length() > 0 && prefix.charAt(0) == '\u0001';
}
Modified: lucene/dev/trunk/solr/test-framework/src/java/org/apache/solr/analysis/MockTokenFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/test-framework/src/java/org/apache/solr/analysis/MockTokenFilterFactory.java?rev=1603752&r1=1603751&r2=1603752&view=diff
==============================================================================
--- lucene/dev/trunk/solr/test-framework/src/java/org/apache/solr/analysis/MockTokenFilterFactory.java (original)
+++ lucene/dev/trunk/solr/test-framework/src/java/org/apache/solr/analysis/MockTokenFilterFactory.java Thu Jun 19 08:22:08 2014
@@ -62,4 +62,4 @@ public class MockTokenFilterFactory exte
public MockTokenFilter create(TokenStream stream) {
return new MockTokenFilter(stream, filter);
}
-}
\ No newline at end of file
+}