You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2014/06/13 18:46:41 UTC
svn commit: r1602472 - in /lucene/dev/branches/lucene5752/lucene:
codecs/src/java/org/apache/lucene/codecs/blockterms/
codecs/src/java/org/apache/lucene/codecs/memory/
core/src/java/org/apache/lucene/codecs/blocktree/
core/src/java/org/apache/lucene/in...
Author: mikemccand
Date: Fri Jun 13 16:46:40 2014
New Revision: 1602472
URL: http://svn.apache.org/r1602472
Log:
LUCENE-5752: move Transition back out; improve tests
Modified:
lucene/dev/branches/lucene5752/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsReader.java
lucene/dev/branches/lucene5752/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java
lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/codecs/blocktree/IntersectTermsEnumFrame.java
lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/index/AutomatonTermsEnum.java
lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/util/automaton/BasicAutomata.java
lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/util/automaton/BasicOperations.java
lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/util/automaton/CompiledAutomaton.java
lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/util/automaton/LightAutomaton.java
lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/util/automaton/MinimizationOperationsLight.java
lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/util/automaton/SpecialOperations.java
lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/util/automaton/UTF32ToUTF8Light.java
lucene/dev/branches/lucene5752/lucene/core/src/test/org/apache/lucene/util/automaton/TestLightAutomaton.java
lucene/dev/branches/lucene5752/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java
lucene/dev/branches/lucene5752/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FSTUtil.java
lucene/dev/branches/lucene5752/lucene/test-framework/src/java/org/apache/lucene/util/automaton/AutomatonTestUtil.java
Modified: lucene/dev/branches/lucene5752/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5752/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsReader.java?rev=1602472&r1=1602471&r2=1602472&view=diff
==============================================================================
--- lucene/dev/branches/lucene5752/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsReader.java (original)
+++ lucene/dev/branches/lucene5752/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsReader.java Fri Jun 13 16:46:40 2014
@@ -106,8 +106,7 @@ public class BlockTermsReader extends Fi
}
}
- // nocommit
- private String segment;
+ // private String segment;
public BlockTermsReader(TermsIndexReaderBase indexReader, Directory dir, FieldInfos fieldInfos, SegmentInfo info, PostingsReaderBase postingsReader, IOContext context,
String segmentSuffix)
@@ -115,7 +114,7 @@ public class BlockTermsReader extends Fi
this.postingsReader = postingsReader;
- this.segment = segment;
+ // this.segment = segment;
in = dir.openInput(IndexFileNames.segmentFileName(info.name, segmentSuffix, BlockTermsWriter.TERMS_EXTENSION),
context);
Modified: lucene/dev/branches/lucene5752/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5752/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java?rev=1602472&r1=1602471&r2=1602472&view=diff
==============================================================================
--- lucene/dev/branches/lucene5752/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java (original)
+++ lucene/dev/branches/lucene5752/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java Fri Jun 13 16:46:40 2014
@@ -29,8 +29,8 @@ import org.apache.lucene.codecs.Postings
import org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat; // javadocs
import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.DocsEnum;
-import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfo.IndexOptions;
+import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.OrdTermState;
import org.apache.lucene.index.SegmentReadState;
@@ -48,6 +48,7 @@ import org.apache.lucene.util.RamUsageEs
import org.apache.lucene.util.automaton.CompiledAutomaton;
import org.apache.lucene.util.automaton.LightAutomaton;
import org.apache.lucene.util.automaton.RunAutomaton;
+import org.apache.lucene.util.automaton.Transition;
// TODO:
// - build depth-N prefix hash?
@@ -931,7 +932,7 @@ public final class DirectPostingsFormat
int transitionCount;
int transitionMax;
int transitionMin;
- final LightAutomaton.Transition transition = new LightAutomaton.Transition();
+ final Transition transition = new Transition();
}
private State[] states;
Modified: lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/codecs/blocktree/IntersectTermsEnumFrame.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/codecs/blocktree/IntersectTermsEnumFrame.java?rev=1602472&r1=1602471&r2=1602472&view=diff
==============================================================================
--- lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/codecs/blocktree/IntersectTermsEnumFrame.java (original)
+++ lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/codecs/blocktree/IntersectTermsEnumFrame.java Fri Jun 13 16:46:40 2014
@@ -25,6 +25,7 @@ import org.apache.lucene.store.ByteArray
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.automaton.LightAutomaton;
+import org.apache.lucene.util.automaton.Transition;
import org.apache.lucene.util.fst.FST;
// TODO: can we share this with the frame in STE?
@@ -68,7 +69,7 @@ final class IntersectTermsEnumFrame {
int numFollowFloorBlocks;
int nextFloorLabel;
- LightAutomaton.Transition transition = new LightAutomaton.Transition();
+ Transition transition = new Transition();
int curTransitionMax;
int transitionIndex;
int transitionCount;
Modified: lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/index/AutomatonTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/index/AutomatonTermsEnum.java?rev=1602472&r1=1602471&r2=1602472&view=diff
==============================================================================
--- lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/index/AutomatonTermsEnum.java (original)
+++ lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/index/AutomatonTermsEnum.java Fri Jun 13 16:46:40 2014
@@ -25,6 +25,7 @@ import org.apache.lucene.util.StringHelp
import org.apache.lucene.util.automaton.ByteRunAutomaton;
import org.apache.lucene.util.automaton.CompiledAutomaton;
import org.apache.lucene.util.automaton.LightAutomaton;
+import org.apache.lucene.util.automaton.Transition;
/**
* A FilteredTermsEnum that enumerates terms based upon what is accepted by a
@@ -124,7 +125,7 @@ class AutomatonTermsEnum extends Filtere
}
}
- private LightAutomaton.Transition transition = new LightAutomaton.Transition();
+ private Transition transition = new Transition();
/**
* Sets the enum to operate in linear fashion, as we have found
Modified: lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/util/automaton/BasicAutomata.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/util/automaton/BasicAutomata.java?rev=1602472&r1=1602471&r2=1602472&view=diff
==============================================================================
--- lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/util/automaton/BasicAutomata.java (original)
+++ lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/util/automaton/BasicAutomata.java Fri Jun 13 16:46:40 2014
@@ -73,6 +73,13 @@ final public class BasicAutomata {
return a;
}
+ public static int appendAnyString(LightAutomaton a, int state) {
+ int newState = a.createState();
+ a.addTransition(state, newState, Character.MIN_CODE_POINT, Character.MAX_CODE_POINT);
+ a.addTransition(newState, newState, Character.MIN_CODE_POINT, Character.MAX_CODE_POINT);
+ return newState;
+ }
+
/**
* Returns a new (deterministic) automaton that accepts any single codepoint.
*/
@@ -80,6 +87,12 @@ final public class BasicAutomata {
return makeCharRangeLight(Character.MIN_CODE_POINT, Character.MAX_CODE_POINT);
}
+ public static int appendAnyChar(LightAutomaton a, int state) {
+ int newState = a.createState();
+ a.addTransition(state, newState, Character.MIN_CODE_POINT, Character.MAX_CODE_POINT);
+ return newState;
+ }
+
/**
* Returns a new (deterministic) automaton that accepts a single codepoint of
* the given value.
@@ -88,6 +101,12 @@ final public class BasicAutomata {
return makeCharRangeLight(c, c);
}
+ public static int appendChar(LightAutomaton a, int state, int c) {
+ int newState = a.createState();
+ a.addTransition(state, newState, c, c);
+ return newState;
+ }
+
/**
* Returns a new (deterministic) automaton that accepts a single codepoint whose
* value is in the given interval (including both end points).
@@ -189,7 +208,7 @@ final public class BasicAutomata {
return s;
}
-
+
/**
* Returns a new automaton that accepts strings representing decimal
* non-negative integers in the given interval.
@@ -229,6 +248,11 @@ final public class BasicAutomata {
LightAutomaton.Builder builder = new LightAutomaton.Builder();
+ if (digits <= 0) {
+ // Reserve the "real" initial state:
+ builder.createState();
+ }
+
Collection<Integer> initials = new ArrayList<>();
betweenLight(builder, x, y, 0, initials, digits <= 0);
@@ -236,20 +260,13 @@ final public class BasicAutomata {
LightAutomaton a1 = builder.finish();
if (digits <= 0) {
- LightAutomaton a2 = new LightAutomaton();
- a2.createState();
- // TODO: can we somehow do this w/o a full copy here?
- a2.copy(a1);
-
for (int p : initials) {
- a2.addEpsilon(0, p+1);
+ a1.addEpsilon(0, p);
}
-
- a2.finish();
- return a2;
- } else {
- return a1;
+ a1.finish();
}
+
+ return a1;
}
/**
Modified: lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/util/automaton/BasicOperations.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/util/automaton/BasicOperations.java?rev=1602472&r1=1602471&r2=1602472&view=diff
==============================================================================
--- lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/util/automaton/BasicOperations.java (original)
+++ lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/util/automaton/BasicOperations.java Fri Jun 13 16:46:40 2014
@@ -72,7 +72,6 @@ final public class BasicOperations {
*/
static public LightAutomaton concatenateLight(List<LightAutomaton> l) {
LightAutomaton result = new LightAutomaton();
- LightAutomaton.Transition scratch = new LightAutomaton.Transition();
// First pass: create all states
for(LightAutomaton a : l) {
@@ -85,6 +84,7 @@ final public class BasicOperations {
// Second pass: add transitions, carefully linking accept
// states of A to init state of next A:
int stateOffset = 0;
+ Transition t = new Transition();
for(int i=0;i<l.size();i++) {
LightAutomaton a = l.get(i);
int numStates = a.getNumStates();
@@ -92,10 +92,10 @@ final public class BasicOperations {
LightAutomaton nextA = (i == l.size()-1) ? null : l.get(i+1);
for(int s=0;s<numStates;s++) {
- int numTransitions = a.initTransition(s, scratch);
- for(int t=0;t<numTransitions;t++) {
- a.getNextTransition(scratch);
- result.addTransition(stateOffset + s, stateOffset + scratch.dest, scratch.min, scratch.max);
+ int numTransitions = a.initTransition(s, t);
+ for(int j=0;j<numTransitions;j++) {
+ a.getNextTransition(t);
+ result.addTransition(stateOffset + s, stateOffset + t.dest, t.min, t.max);
}
if (a.isAccept(s)) {
@@ -105,10 +105,10 @@ final public class BasicOperations {
while (true) {
if (followA != null) {
// Adds a "virtual" epsilon transition:
- numTransitions = followA.initTransition(0, scratch);
- for(int t=0;t<numTransitions;t++) {
- followA.getNextTransition(scratch);
- result.addTransition(stateOffset + s, followOffset + numStates + scratch.dest, scratch.min, scratch.max);
+ numTransitions = followA.initTransition(0, t);
+ for(int j=0;j<numTransitions;j++) {
+ followA.getNextTransition(t);
+ result.addTransition(stateOffset + s, followOffset + numStates + t.dest, t.min, t.max);
}
if (followA.isAccept(0)) {
// Keep chaining if followA accepts empty string
@@ -154,7 +154,7 @@ final public class BasicOperations {
result.setAccept(i+1, a.isAccept(i));
}
- LightAutomaton.Transition t = new LightAutomaton.Transition();
+ Transition t = new Transition();
int count = a.initTransition(0, t);
for(int i=0;i<count;i++) {
a.getNextTransition(t);
@@ -186,7 +186,7 @@ final public class BasicOperations {
builder.setAccept(0, true);
builder.copy(a);
- LightAutomaton.Transition t = new LightAutomaton.Transition();
+ Transition t = new Transition();
int count = a.initTransition(0, t);
for(int i=0;i<count;i++) {
a.getNextTransition(t);
@@ -215,7 +215,7 @@ final public class BasicOperations {
List<Integer> queue = new ArrayList<>();
queue.add(0);
done.set(0);
- LightAutomaton.Transition t = new LightAutomaton.Transition();
+ Transition t = new Transition();
while (queue.isEmpty() == false) {
int state = queue.remove(queue.size()-1);
@@ -344,8 +344,8 @@ final public class BasicOperations {
if (a1 == a2) {
return a1;
}
- LightAutomaton.Transition[][] transitions1 = a1.getSortedTransitions();
- LightAutomaton.Transition[][] transitions2 = a2.getSortedTransitions();
+ Transition[][] transitions1 = a1.getSortedTransitions();
+ Transition[][] transitions2 = a2.getSortedTransitions();
LightAutomaton c = new LightAutomaton();
c.createState();
LinkedList<LightStatePair> worklist = new LinkedList<>();
@@ -356,8 +356,8 @@ final public class BasicOperations {
while (worklist.size() > 0) {
p = worklist.removeFirst();
c.setAccept(p.s, a1.isAccept(p.s1) && a2.isAccept(p.s2));
- LightAutomaton.Transition[] t1 = transitions1[p.s1];
- LightAutomaton.Transition[] t2 = transitions2[p.s2];
+ Transition[] t1 = transitions1[p.s1];
+ Transition[] t2 = transitions2[p.s2];
for (int n1 = 0, b2 = 0; n1 < t1.length; n1++) {
while (b2 < t2.length && t2[b2].max < t1[n1].min)
b2++;
@@ -477,8 +477,8 @@ final public class BasicOperations {
assert isDeterministic(a1);
assert isDeterministic(a2);
// TODO: cutover to iterators instead
- LightAutomaton.Transition[][] transitions1 = a1.getSortedTransitions();
- LightAutomaton.Transition[][] transitions2 = a2.getSortedTransitions();
+ Transition[][] transitions1 = a1.getSortedTransitions();
+ Transition[][] transitions2 = a2.getSortedTransitions();
LinkedList<LightStatePair> worklist = new LinkedList<>();
HashSet<LightStatePair> visited = new HashSet<>();
LightStatePair p = new LightStatePair(0, 0);
@@ -489,8 +489,8 @@ final public class BasicOperations {
if (a1.isAccept(p.s1) && a2.isAccept(p.s2) == false) {
return false;
}
- LightAutomaton.Transition[] t1 = transitions1[p.s1];
- LightAutomaton.Transition[] t2 = transitions2[p.s2];
+ Transition[] t1 = transitions1[p.s1];
+ Transition[] t2 = transitions2[p.s2];
for (int n1 = 0, b2 = 0; n1 < t1.length; n1++) {
while (b2 < t2.length && t2[b2].max < t1[n1].min) {
b2++;
@@ -665,7 +665,7 @@ final public class BasicOperations {
result.createState();
// Copy over all automata
- LightAutomaton.Transition t = new LightAutomaton.Transition();
+ Transition t = new Transition();
for(LightAutomaton a : l) {
result.copy(a);
}
@@ -691,7 +691,7 @@ final public class BasicOperations {
int[] transitions = new int[3];
int next;
- public void add(LightAutomaton.Transition t) {
+ public void add(Transition t) {
if (transitions.length < next+3) {
transitions = ArrayUtil.grow(transitions, next+3);
}
@@ -797,7 +797,7 @@ final public class BasicOperations {
if (count > 1) ArrayUtil.timSort(points, 0, count);
}
- public void add(LightAutomaton.Transition t) {
+ public void add(Transition t) {
find(t.min).starts.add(t);
find(1+t.max).ends.add(t);
}
@@ -855,7 +855,7 @@ final public class BasicOperations {
// like SortedMap<Integer,Integer>
final SortedIntSetLight statesSet = new SortedIntSetLight(5);
- LightAutomaton.Transition scratch = new LightAutomaton.Transition();
+ Transition t = new Transition();
while (worklist.size() > 0) {
SortedIntSetLight.FrozenIntSetLight s = worklist.removeFirst();
@@ -865,10 +865,10 @@ final public class BasicOperations {
for(int i=0;i<s.values.length;i++) {
final int s0 = s.values[i];
int numTransitions = a.getNumTransitions(s0);
- a.initTransition(s0, scratch);
+ a.initTransition(s0, t);
for(int j=0;j<numTransitions;j++) {
- a.getNextTransition(scratch);
- points.add(scratch);
+ a.getNextTransition(t);
+ points.add(t);
}
}
@@ -953,7 +953,7 @@ final public class BasicOperations {
*/
public static boolean isTotal(LightAutomaton a) {
if (a.isAccept(0) && a.getNumTransitions(0) == 1) {
- LightAutomaton.Transition t = new LightAutomaton.Transition();
+ Transition t = new Transition();
a.getTransition(0, 0, t);
return t.dest == 0 && t.min == Character.MIN_CODE_POINT
&& t.max == Character.MAX_CODE_POINT;
@@ -1021,7 +1021,7 @@ final public class BasicOperations {
for (int i = 0; i < numStates; i++) {
map[i] = new HashSet<>();
}
- LightAutomaton.Transition t = new LightAutomaton.Transition();
+ Transition t = new Transition();
for (int s=0;s<numStates;s++) {
int numTransitions = a.initTransition(s, t);
for(int i=0;i<numTransitions;i++) {
@@ -1063,7 +1063,7 @@ final public class BasicOperations {
}
}
- LightAutomaton.Transition t = new LightAutomaton.Transition();
+ Transition t = new Transition();
for (int i=0;i<numStates;i++) {
if (liveSet.get(i)) {
Modified: lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/util/automaton/CompiledAutomaton.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/util/automaton/CompiledAutomaton.java?rev=1602472&r1=1602471&r2=1602472&view=diff
==============================================================================
--- lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/util/automaton/CompiledAutomaton.java (original)
+++ lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/util/automaton/CompiledAutomaton.java Fri Jun 13 16:46:40 2014
@@ -173,7 +173,7 @@ public class CompiledAutomaton {
lightAutomaton = runAutomaton.a;
}
- private LightAutomaton.Transition scratch = new LightAutomaton.Transition();
+ private Transition scratch = new Transition();
//private static final boolean DEBUG = BlockTreeTermsWriter.DEBUG;
Modified: lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/util/automaton/LightAutomaton.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/util/automaton/LightAutomaton.java?rev=1602472&r1=1602471&r2=1602472&view=diff
==============================================================================
--- lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/util/automaton/LightAutomaton.java (original)
+++ lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/util/automaton/LightAutomaton.java Fri Jun 13 16:46:40 2014
@@ -420,27 +420,6 @@ public class LightAutomaton {
}
};
- /** Just used temporarily to return the transition from
- * {@link getTransition} and {@link #getNextTransition}. */
- public static class Transition {
- // used only for assert:
- public int source;
- public int dest;
- public int min;
- public int max;
-
- /** Remembers where we are in the iteration; init to -1 to provoke
- * exception if nextTransition is called without first initTransition. */
- private int transitionUpto = -1;
-
- @Override
- public String toString() {
- return source + " --> " + dest + " " + (char) min + "-" + (char) max;
- }
-
- // nocommit equals? hashCode? don't want to encourage putting these into a Map...?
- }
-
// nocommit createStates(int count)?
// nocommit kinda awkward iterator api...
Modified: lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/util/automaton/MinimizationOperationsLight.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/util/automaton/MinimizationOperationsLight.java?rev=1602472&r1=1602471&r2=1602472&view=diff
==============================================================================
--- lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/util/automaton/MinimizationOperationsLight.java (original)
+++ lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/util/automaton/MinimizationOperationsLight.java Fri Jun 13 16:46:40 2014
@@ -63,7 +63,7 @@ final public class MinimizationOperation
a = BasicOperations.determinize(a);
//a.writeDot("adet");
if (a.getNumTransitions(0) == 1) {
- LightAutomaton.Transition t = new LightAutomaton.Transition();
+ Transition t = new Transition();
a.getTransition(0, 0, t);
if (t.dest == 0 && t.min == Character.MIN_CODE_POINT
&& t.max == Character.MAX_CODE_POINT) {
@@ -201,7 +201,7 @@ final public class MinimizationOperation
LightAutomaton result = new LightAutomaton();
- LightAutomaton.Transition t = new LightAutomaton.Transition();
+ Transition t = new Transition();
//System.out.println(" k=" + k);
Modified: lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/util/automaton/SpecialOperations.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/util/automaton/SpecialOperations.java?rev=1602472&r1=1602471&r2=1602472&view=diff
==============================================================================
--- lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/util/automaton/SpecialOperations.java (original)
+++ lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/util/automaton/SpecialOperations.java Fri Jun 13 16:46:40 2014
@@ -71,7 +71,7 @@ final public class SpecialOperations {
* Returns true if the language of this automaton is finite.
*/
public static boolean isFinite(LightAutomaton a) {
- return isFinite(new LightAutomaton.Transition(), a, 0, new BitSet(a.getNumStates()), new BitSet(a.getNumStates()));
+ return isFinite(new Transition(), a, 0, new BitSet(a.getNumStates()), new BitSet(a.getNumStates()));
}
/**
@@ -80,7 +80,7 @@ final public class SpecialOperations {
*/
// TODO: not great that this is recursive... in theory a
// large automata could exceed java's stack
- private static boolean isFinite(LightAutomaton.Transition scratch, LightAutomaton a, int state, BitSet path, BitSet visited) {
+ private static boolean isFinite(Transition scratch, LightAutomaton a, int state, BitSet path, BitSet visited) {
path.set(state);
int numTransitions = a.initTransition(state, scratch);
for(int t=0;t<numTransitions;t++) {
@@ -106,7 +106,7 @@ final public class SpecialOperations {
HashSet<Integer> visited = new HashSet<>();
int s = 0;
boolean done;
- LightAutomaton.Transition t = new LightAutomaton.Transition();
+ Transition t = new Transition();
do {
done = true;
visited.add(s);
@@ -128,7 +128,7 @@ final public class SpecialOperations {
HashSet<Integer> visited = new HashSet<>();
int s = 0;
boolean done;
- LightAutomaton.Transition t = new LightAutomaton.Transition();
+ Transition t = new Transition();
do {
done = true;
visited.add(s);
@@ -191,7 +191,7 @@ final public class SpecialOperations {
// Old initial state becomes new accept state:
builder.setAccept(1, true);
- LightAutomaton.Transition t = new LightAutomaton.Transition();
+ Transition t = new Transition();
for (int s=0;s<numStates;s++) {
int numTransitions = a.getNumTransitions(s);
a.initTransition(s, t);
@@ -231,7 +231,7 @@ final public class SpecialOperations {
* current Transition */
public int label;
- private final LightAutomaton.Transition t = new LightAutomaton.Transition();
+ private final Transition t = new Transition();
public void resetState(LightAutomaton a, int state) {
assert a.getNumTransitions(state) != 0;
Modified: lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/util/automaton/UTF32ToUTF8Light.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/util/automaton/UTF32ToUTF8Light.java?rev=1602472&r1=1602471&r2=1602472&view=diff
==============================================================================
--- lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/util/automaton/UTF32ToUTF8Light.java (original)
+++ lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/util/automaton/UTF32ToUTF8Light.java Fri Jun 13 16:46:40 2014
@@ -295,7 +295,7 @@ public final class UTF32ToUTF8Light {
map[utf32State] = utf8State;
- LightAutomaton.Transition scratch = new LightAutomaton.Transition();
+ Transition scratch = new Transition();
while (pending.size() != 0) {
utf32State = pending.remove(pending.size()-1);
Modified: lucene/dev/branches/lucene5752/lucene/core/src/test/org/apache/lucene/util/automaton/TestLightAutomaton.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5752/lucene/core/src/test/org/apache/lucene/util/automaton/TestLightAutomaton.java?rev=1602472&r1=1602471&r2=1602472&view=diff
==============================================================================
--- lucene/dev/branches/lucene5752/lucene/core/src/test/org/apache/lucene/util/automaton/TestLightAutomaton.java (original)
+++ lucene/dev/branches/lucene5752/lucene/core/src/test/org/apache/lucene/util/automaton/TestLightAutomaton.java Fri Jun 13 16:46:40 2014
@@ -20,13 +20,17 @@ package org.apache.lucene.util.automaton
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
+import java.util.HashSet;
import java.util.List;
+import java.util.Set;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.TestUtil;
import org.apache.lucene.util.automaton.AutomatonTestUtil.RandomAcceptedStringsLight;
+import org.apache.lucene.util.fst.Util;
public class TestLightAutomaton extends LuceneTestCase {
@@ -60,7 +64,7 @@ public class TestLightAutomaton extends
a.finish();
assertEquals(3, a.getNumTransitions(start));
- LightAutomaton.Transition scratch = new LightAutomaton.Transition();
+ Transition scratch = new Transition();
a.initTransition(start, scratch);
a.getNextTransition(scratch);
assertEquals('a', scratch.min);
@@ -107,6 +111,7 @@ public class TestLightAutomaton extends
assertTrue(BasicOperations.run(a, "mn"));
assertTrue(BasicOperations.run(a, "mone"));
assertFalse(BasicOperations.run(a, "m"));
+ assertFalse(SpecialOperations.isFinite(a));
}
public void testUnion1() throws Exception {
@@ -117,7 +122,7 @@ public class TestLightAutomaton extends
assertTrue(BasicOperations.run(a, "foobar"));
assertTrue(BasicOperations.run(a, "barbaz"));
- // nocommit test getFinitStrings count == 2
+ assertMatches(a, "foobar", "barbaz");
}
public void testUnion2() throws Exception {
@@ -130,14 +135,12 @@ public class TestLightAutomaton extends
assertTrue(BasicOperations.run(a, "barbaz"));
assertTrue(BasicOperations.run(a, ""));
- // nocommit test getFinitStrings count == 3
+ assertMatches(a, "", "foobar", "barbaz");
}
public void testMinimizeSimple() throws Exception {
LightAutomaton a = BasicAutomata.makeStringLight("foobar");
- //a.writeDot("a");
LightAutomaton aMin = MinimizationOperationsLight.minimize(a);
- //aMin.writeDot("aMin");
assertTrue(BasicOperations.sameLanguage(a, aMin));
}
@@ -311,12 +314,12 @@ public class TestLightAutomaton extends
LightAutomaton a = AutomatonTestUtil.randomAutomaton(random());
// Just get all transitions, shuffle, and build a new automaton with the same transitions:
- List<LightAutomaton.Transition> allTrans = new ArrayList<>();
+ List<Transition> allTrans = new ArrayList<>();
int numStates = a.getNumStates();
for(int s=0;s<numStates;s++) {
int count = a.getNumTransitions(s);
for(int i=0;i<count;i++) {
- LightAutomaton.Transition t = new LightAutomaton.Transition();
+ Transition t = new Transition();
a.getTransition(s, i, t);
allTrans.add(t);
}
@@ -329,7 +332,7 @@ public class TestLightAutomaton extends
}
Collections.shuffle(allTrans, random());
- for(LightAutomaton.Transition t : allTrans) {
+ for(Transition t : allTrans) {
builder.addTransition(t.source, t.dest, t.min, t.max);
}
@@ -351,32 +354,81 @@ public class TestLightAutomaton extends
} else if (random().nextBoolean()) {
a = MinimizationOperationsLight.minimize(a);
}
+ assertMatches(a, "foobar", "beebar", "boobar");
LightAutomaton a4 = BasicOperations.determinize(BasicOperations.minusLight(a, a2));
assertTrue(BasicOperations.run(a4, "foobar"));
assertFalse(BasicOperations.run(a4, "boobar"));
assertTrue(BasicOperations.run(a4, "beebar"));
-
- // nocommit test getFinitStrings count == 2
+ assertMatches(a4, "foobar", "beebar");
a4 = BasicOperations.determinize(BasicOperations.minusLight(a4, a1));
assertFalse(BasicOperations.run(a4, "foobar"));
assertFalse(BasicOperations.run(a4, "boobar"));
assertTrue(BasicOperations.run(a4, "beebar"));
+ assertMatches(a4, "beebar");
a4 = BasicOperations.determinize(BasicOperations.minusLight(a4, a3));
assertFalse(BasicOperations.run(a4, "foobar"));
assertFalse(BasicOperations.run(a4, "boobar"));
assertFalse(BasicOperations.run(a4, "beebar"));
+ assertMatches(a4);
}
- // nocommit
- //public void testWildcard() throws Exception {
- //WildcardQuery.toAutomaton(new Term("foo", "bar*")).writeDot("wq");
- //}
+ public void testIntervalRandom() throws Exception {
+ int ITERS = atLeast(100);
+ for(int iter=0;iter<ITERS;iter++) {
+ int min = TestUtil.nextInt(random(), 0, 100000);
+ int max = TestUtil.nextInt(random(), min, min+100000);
+ int digits;
+ if (random().nextBoolean()) {
+ digits = 0;
+ } else {
+ String s = Integer.toString(max);
+ digits = TestUtil.nextInt(random(), s.length(), 2*s.length());
+ }
+ StringBuilder b = new StringBuilder();
+ for(int i=0;i<digits;i++) {
+ b.append('0');
+ }
+ String prefix = b.toString();
+
+ LightAutomaton a = BasicOperations.determinize(BasicAutomata.makeIntervalLight(min, max, digits ));
+ if (random().nextBoolean()) {
+ a = MinimizationOperationsLight.minimize(a);
+ }
+ String mins = Integer.toString(min);
+ String maxs = Integer.toString(max);
+ if (digits > 0) {
+ mins = prefix.substring(mins.length()) + mins;
+ maxs = prefix.substring(maxs.length()) + maxs;
+ }
+ assertTrue(BasicOperations.run(a, mins));
+ assertTrue(BasicOperations.run(a, maxs));
+
+ for(int iter2=0;iter2<100;iter2++) {
+ int x = random().nextInt(2*max);
+ boolean expected = x >= min && x <= max;
+ String sx = Integer.toString(x);
+ if (digits > 0 && sx.length() < digits) {
+ // Left-fill with 0s
+ sx = b.substring(sx.length()) + sx;
+ }
+ assertEquals(expected, BasicOperations.run(a, sx));
+ }
+ }
+ }
// nocommit more tests ... it's an algebra
- // nocommit random test for testInterval if we don't have one already
+ private void assertMatches(LightAutomaton a, String... strings) {
+ Set<IntsRef> expected = new HashSet<>();
+ for(String s : strings) {
+ IntsRef ints = new IntsRef();
+ expected.add(Util.toUTF32(s, ints));
+ }
+
+ assertEquals(expected, SpecialOperations.getFiniteStrings(BasicOperations.determinize(a), -1));
+ }
}
Modified: lucene/dev/branches/lucene5752/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5752/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java?rev=1602472&r1=1602471&r2=1602472&view=diff
==============================================================================
--- lucene/dev/branches/lucene5752/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java (original)
+++ lucene/dev/branches/lucene5752/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java Fri Jun 13 16:46:40 2014
@@ -46,6 +46,7 @@ import org.apache.lucene.util.UnicodeUti
import org.apache.lucene.util.automaton.BasicOperations;
import org.apache.lucene.util.automaton.LightAutomaton;
import org.apache.lucene.util.automaton.SpecialOperations;
+import org.apache.lucene.util.automaton.Transition;
import org.apache.lucene.util.fst.Builder;
import org.apache.lucene.util.fst.ByteSequenceOutputs;
import org.apache.lucene.util.fst.FST.BytesReader;
@@ -263,7 +264,7 @@ public class AnalyzingSuggester extends
int upto = 0;
states[upto] = 0;
upto++;
- LightAutomaton.Transition t = new LightAutomaton.Transition();
+ Transition t = new Transition();
while (worklist.size() > 0) {
int s = worklist.removeFirst();
int count = a.initTransition(s, t);
@@ -295,7 +296,7 @@ public class AnalyzingSuggester extends
// Go in reverse topo sort so we know we only have to
// make one pass:
- LightAutomaton.Transition t = new LightAutomaton.Transition();
+ Transition t = new Transition();
int[] topoSortStates = topoSortStates(a);
for(int i=0;i<topoSortStates.length;i++) {
int state = topoSortStates[topoSortStates.length-1-i];
Modified: lucene/dev/branches/lucene5752/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FSTUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5752/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FSTUtil.java?rev=1602472&r1=1602471&r2=1602472&view=diff
==============================================================================
--- lucene/dev/branches/lucene5752/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FSTUtil.java (original)
+++ lucene/dev/branches/lucene5752/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FSTUtil.java Fri Jun 13 16:46:40 2014
@@ -24,6 +24,7 @@ import java.util.List;
import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.automaton.BasicOperations;
import org.apache.lucene.util.automaton.LightAutomaton;
+import org.apache.lucene.util.automaton.Transition;
import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.Util;
@@ -78,7 +79,7 @@ public class FSTUtil {
final FST.Arc<T> scratchArc = new FST.Arc<>();
final FST.BytesReader fstReader = fst.getBytesReader();
- LightAutomaton.Transition t = new LightAutomaton.Transition();
+ Transition t = new Transition();
while (queue.size() != 0) {
final Path<T> path = queue.remove(queue.size() - 1);
Modified: lucene/dev/branches/lucene5752/lucene/test-framework/src/java/org/apache/lucene/util/automaton/AutomatonTestUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5752/lucene/test-framework/src/java/org/apache/lucene/util/automaton/AutomatonTestUtil.java?rev=1602472&r1=1602471&r2=1602472&view=diff
==============================================================================
--- lucene/dev/branches/lucene5752/lucene/test-framework/src/java/org/apache/lucene/util/automaton/AutomatonTestUtil.java (original)
+++ lucene/dev/branches/lucene5752/lucene/test-framework/src/java/org/apache/lucene/util/automaton/AutomatonTestUtil.java Fri Jun 13 16:46:40 2014
@@ -138,15 +138,15 @@ public class AutomatonTestUtil {
*/
public static class RandomAcceptedStringsLight {
- private final Map<LightAutomaton.Transition,Boolean> leadsToAccept;
+ private final Map<Transition,Boolean> leadsToAccept;
private final LightAutomaton a;
- private final LightAutomaton.Transition[][] transitions;
+ private final Transition[][] transitions;
private static class ArrivingTransition {
final int from;
- final LightAutomaton.Transition t;
+ final Transition t;
- public ArrivingTransition(int from, LightAutomaton.Transition t) {
+ public ArrivingTransition(int from, Transition t) {
this.from = from;
this.t = t;
}
@@ -169,7 +169,7 @@ public class AutomatonTestUtil {
// up all arriving transitions to a given state
int numStates = a.getNumStates();
for(int s=0;s<numStates;s++) {
- for(LightAutomaton.Transition t : transitions[s]) {
+ for(Transition t : transitions[s]) {
List<ArrivingTransition> tl = allArriving.get(t.dest);
if (tl == null) {
tl = new ArrayList<>();
@@ -226,12 +226,12 @@ public class AutomatonTestUtil {
boolean cheat = r.nextBoolean();
- final LightAutomaton.Transition t;
+ final Transition t;
if (cheat) {
// pick a transition that we know is the fastest
// path to an accept state
- List<LightAutomaton.Transition> toAccept = new ArrayList<>();
- for(LightAutomaton.Transition t0 : transitions[s]) {
+ List<Transition> toAccept = new ArrayList<>();
+ for(Transition t0 : transitions[s]) {
if (leadsToAccept.containsKey(t0)) {
toAccept.add(t0);
}
@@ -344,7 +344,7 @@ public class AutomatonTestUtil {
LightAutomaton.Builder result = new LightAutomaton.Builder();
result.createState();
newstate.put(initialset, 0);
- LightAutomaton.Transition t = new LightAutomaton.Transition();
+ Transition t = new Transition();
while (worklist.size() > 0) {
Set<Integer> s = worklist.removeFirst();
int r = newstate.get(s);
@@ -414,7 +414,7 @@ public class AutomatonTestUtil {
private static boolean getFiniteStringsLight(LightAutomaton a, int s, HashSet<Integer> pathstates,
HashSet<IntsRef> strings, IntsRef path, int limit) {
pathstates.add(s);
- LightAutomaton.Transition t = new LightAutomaton.Transition();
+ Transition t = new Transition();
int count = a.initTransition(s, t);
for (int i=0;i<count;i++) {
a.getNextTransition(t);
@@ -459,7 +459,7 @@ public class AutomatonTestUtil {
// large automata could exceed java's stack
private static boolean isFiniteSlow(LightAutomaton a, int s, HashSet<Integer> path) {
path.add(s);
- LightAutomaton.Transition t = new LightAutomaton.Transition();
+ Transition t = new Transition();
int count = a.initTransition(s, t);
for (int i=0;i<count;i++) {
a.getNextTransition(t);