You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2014/06/16 21:22:03 UTC

svn commit: r1602966 [2/2] - in /lucene/dev/branches/lucene5752/lucene: core/src/java/org/apache/lucene/search/ core/src/java/org/apache/lucene/util/automaton/ core/src/test/org/apache/lucene/analysis/ core/src/test/org/apache/lucene/index/ core/src/te...

Modified: lucene/dev/branches/lucene5752/lucene/core/src/test/org/apache/lucene/util/automaton/TestLightAutomaton.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5752/lucene/core/src/test/org/apache/lucene/util/automaton/TestLightAutomaton.java?rev=1602966&r1=1602965&r2=1602966&view=diff
==============================================================================
--- lucene/dev/branches/lucene5752/lucene/core/src/test/org/apache/lucene/util/automaton/TestLightAutomaton.java (original)
+++ lucene/dev/branches/lucene5752/lucene/core/src/test/org/apache/lucene/util/automaton/TestLightAutomaton.java Mon Jun 16 19:22:02 2014
@@ -19,17 +19,19 @@ package org.apache.lucene.util.automaton
 
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Collection;
 import java.util.Collections;
 import java.util.HashSet;
+import java.util.Iterator;
 import java.util.List;
 import java.util.Set;
 
-import org.apache.lucene.index.Term;
-import org.apache.lucene.search.WildcardQuery;
+import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.IntsRef;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util.TestUtil;
-import org.apache.lucene.util.automaton.AutomatonTestUtil.RandomAcceptedStringsLight;
+import org.apache.lucene.util.UnicodeUtil;
+import org.apache.lucene.util.automaton.AutomatonTestUtil.RandomAcceptedStrings;
 import org.apache.lucene.util.fst.Util;
 
 public class TestLightAutomaton extends LuceneTestCase {
@@ -46,7 +48,7 @@ public class TestLightAutomaton extends 
     a.addTransition(start, end, 'd', 'd');
     a.addTransition(x, y, 'b', 'b');
     a.addTransition(y, end, 'c', 'c');
-    a.finish();
+    a.finishState();
   }
 
   public void testReduceBasic() throws Exception {
@@ -62,7 +64,7 @@ public class TestLightAutomaton extends 
     a.addTransition(start, end, 'x', 'x');
     a.addTransition(start, end, 'y', 'y');
 
-    a.finish();
+    a.finishState();
     assertEquals(3, a.getNumTransitions(start));
     Transition scratch = new Transition();
     a.initTransition(start, scratch);
@@ -79,9 +81,9 @@ public class TestLightAutomaton extends 
 
   public void testSameLanguage() throws Exception {
     LightAutomaton a1 = BasicAutomata.makeStringLight("foobar");
-    LightAutomaton a2 = BasicOperations.concatenateLight(
+    LightAutomaton a2 = BasicOperations.removeDeadStates(BasicOperations.concatenateLight(
                             BasicAutomata.makeStringLight("foo"),
-                            BasicAutomata.makeStringLight("bar"));
+                            BasicAutomata.makeStringLight("bar")));
     assertTrue(BasicOperations.sameLanguage(a1, a2));
   }
 
@@ -149,7 +151,7 @@ public class TestLightAutomaton extends 
     LightAutomaton a = BasicOperations.unionLight(Arrays.asList(BasicAutomata.makeStringLight("foobar"),
                                                                 BasicAutomata.makeStringLight("boobar")));
     LightAutomaton aMin = MinimizationOperationsLight.minimize(a);
-    assertTrue(BasicOperations.sameLanguage(BasicOperations.determinize(a), aMin));
+    assertTrue(BasicOperations.sameLanguage(BasicOperations.determinize(BasicOperations.removeDeadStates(a)), aMin));
   }
 
   public void testReverse() throws Exception {
@@ -234,7 +236,7 @@ public class TestLightAutomaton extends 
     a.setAccept(fini, true);
     a.addTransition(init, fini, 'm');
     a.addTransition(fini, fini, 'm');
-    a.finish();
+    a.finishState();
     assertEquals(0, SpecialOperations.getCommonSuffixBytesRef(a).length);
   }
 
@@ -244,8 +246,8 @@ public class TestLightAutomaton extends 
       LightAutomaton a = AutomatonTestUtil.randomAutomaton(random());
       LightAutomaton ra = SpecialOperations.reverse(a);
       LightAutomaton rra = SpecialOperations.reverse(ra);
-      assertTrue(BasicOperations.sameLanguage(BasicOperations.determinize(a),
-                                              BasicOperations.determinize(rra)));
+      assertTrue(BasicOperations.sameLanguage(BasicOperations.determinize(BasicOperations.removeDeadStates(a)),
+                                              BasicOperations.determinize(BasicOperations.removeDeadStates(rra))));
     }
   }
 
@@ -260,16 +262,16 @@ public class TestLightAutomaton extends 
       LightAutomaton ra = SpecialOperations.reverse(a);
       LightAutomaton rda = BasicOperations.determinize(ra);
 
-      if (a.isEmpty()) {
-        assertTrue(rda.isEmpty());
+      if (BasicOperations.isEmpty(a)) {
+        assertTrue(BasicOperations.isEmpty(rda));
         continue;
       }
 
-      RandomAcceptedStringsLight rasl = new RandomAcceptedStringsLight(a);
+      RandomAcceptedStrings ras = new RandomAcceptedStrings(a);
 
       for(int iter2=0;iter2<20;iter2++) {
         // Find string accepted by original automaton
-        int[] s = rasl.getRandomAcceptedString(random());
+        int[] s = ras.getRandomAcceptedString(random());
 
         // Reverse it
         for(int j=0;j<s.length/2;j++) {
@@ -290,11 +292,16 @@ public class TestLightAutomaton extends 
     assertTrue(BasicOperations.run(a, ""));
   }
 
+  public void testBasicIsEmpty() throws Exception {
+    LightAutomaton a = new LightAutomaton();
+    a.createState();
+    assertTrue(BasicOperations.isEmpty(a));
+  }
 
   public void testRemoveDeadTransitionsEmpty() throws Exception {
     LightAutomaton a = BasicAutomata.makeEmptyLight();
     LightAutomaton a2 = BasicOperations.removeDeadStates(a);
-    assertTrue(a2.isEmpty());
+    assertTrue(BasicOperations.isEmpty(a2));
   }
 
   public void testInvalidAddTransition() throws Exception {
@@ -340,13 +347,38 @@ public class TestLightAutomaton extends 
       }
 
       assertTrue(BasicOperations.sameLanguage(
-                    BasicOperations.determinize(a),
-                    BasicOperations.determinize(builder.finish())));
+                    BasicOperations.determinize(BasicOperations.removeDeadStates(a)),
+                    BasicOperations.determinize(BasicOperations.removeDeadStates(builder.finish()))));
       
     }
   }
 
-  // nocommit testMinus
+  public void testIsTotal() throws Exception {
+    assertFalse(BasicOperations.isTotal(new LightAutomaton()));
+    LightAutomaton a = new LightAutomaton();
+    int init = a.createState();
+    int fini = a.createState();
+    a.setAccept(fini, true);
+    a.addTransition(init, fini, Character.MIN_CODE_POINT, Character.MAX_CODE_POINT);
+    a.finishState();
+    assertFalse(BasicOperations.isTotal(a));
+    a.addTransition(fini, fini, Character.MIN_CODE_POINT, Character.MAX_CODE_POINT);
+    a.finishState();
+    assertFalse(BasicOperations.isTotal(a));
+    a.setAccept(init, true);
+    assertTrue(BasicOperations.isTotal(MinimizationOperationsLight.minimize(a)));
+  }
+
+  public void testMinimizeEmpty() throws Exception {
+    LightAutomaton a = new LightAutomaton();
+    int init = a.createState();
+    int fini = a.createState();
+    a.addTransition(init, fini, 'a');
+    a.finishState();
+    a = MinimizationOperationsLight.minimize(a);
+    assertEquals(0, a.getNumStates());
+  }
+
   public void testMinus() throws Exception {
     LightAutomaton a1 = BasicAutomata.makeStringLight("foobar");
     LightAutomaton a2 = BasicAutomata.makeStringLight("boobar");
@@ -379,6 +411,20 @@ public class TestLightAutomaton extends 
     assertMatches(a4);
   }
 
+  public void testOneInterval() throws Exception {
+    LightAutomaton a = BasicAutomata.makeIntervalLight(999, 1032, 0);
+    a = BasicOperations.determinize(a);
+    assertTrue(BasicOperations.run(a, "0999"));
+    assertTrue(BasicOperations.run(a, "00999"));
+    assertTrue(BasicOperations.run(a, "000999"));
+  }
+
+  public void testAnotherInterval() throws Exception {
+    LightAutomaton a = BasicAutomata.makeIntervalLight(1, 2, 0);
+    a = BasicOperations.determinize(a);
+    assertTrue(BasicOperations.run(a, "01"));
+  }
+
   public void testIntervalRandom() throws Exception {
     int ITERS = atLeast(100);
     for(int iter=0;iter<ITERS;iter++) {
@@ -397,7 +443,7 @@ public class TestLightAutomaton extends 
       }
       String prefix = b.toString();
 
-      LightAutomaton a = BasicOperations.determinize(BasicAutomata.makeIntervalLight(min, max, digits   ));
+      LightAutomaton a = BasicOperations.determinize(BasicAutomata.makeIntervalLight(min, max, digits));
       if (random().nextBoolean()) {
         a = MinimizationOperationsLight.minimize(a);
       }
@@ -414,18 +460,43 @@ public class TestLightAutomaton extends 
         int x = random().nextInt(2*max);
         boolean expected = x >= min && x <= max;
         String sx = Integer.toString(x);
-        if (digits > 0 && sx.length() < digits) {
+        if (sx.length() < digits) {
           // Left-fill with 0s
           sx = b.substring(sx.length()) + sx;
+        } else if (digits == 0) {
+          // Left-fill with random number of 0s:
+          int numZeros = random().nextInt(10);
+          StringBuilder sb = new StringBuilder();
+          for(int i=0;i<numZeros;i++) {
+            sb.append('0');
+          }
+          sb.append(sx);
+          sx = sb.toString();
         }
         assertEquals(expected, BasicOperations.run(a, sx));
       }
     }
   }
 
-  // nocommit testRemoveDead of an A acceptint nothing should go to emptye A (0 states)
+  private void assertMatches(LightAutomaton a, String... strings) {
+    Set<IntsRef> expected = new HashSet<>();
+    for(String s : strings) {
+      IntsRef ints = new IntsRef();
+      expected.add(Util.toUTF32(s, ints));
+    }
+
+    assertEquals(expected, SpecialOperations.getFiniteStrings(BasicOperations.determinize(a), -1)); 
+  }
+
+  public void testConcatenatePreservesDet() throws Exception {
+    LightAutomaton a1 = BasicAutomata.makeStringLight("foobar");
+    assertTrue(a1.isDeterministic());
+    LightAutomaton a2 = BasicAutomata.makeStringLight("baz");
+    assertTrue(a2.isDeterministic());
+    assertTrue((BasicOperations.concatenateLight(Arrays.asList(a1, a2)).isDeterministic()));
+  }
 
-  public void testRemoveDead() throws Exception {
+  public void testRemoveDeadStates() throws Exception {
     LightAutomaton a = BasicOperations.concatenateLight(Arrays.asList(BasicAutomata.makeStringLight("x"),
                                                                       BasicAutomata.makeStringLight("y")));
     assertEquals(4, a.getNumStates());
@@ -433,15 +504,510 @@ public class TestLightAutomaton extends 
     assertEquals(3, a.getNumStates());
   }
 
-  // nocommit more tests ... it's an algebra
+  public void testRemoveDeadStatesEmpty1() throws Exception {
+    LightAutomaton a = new LightAutomaton();
+    a.finishState();
+    assertTrue(BasicOperations.isEmpty(a));
+    assertTrue(BasicOperations.isEmpty(BasicOperations.removeDeadStates(a)));
+  }
 
-  private void assertMatches(LightAutomaton a, String... strings) {
-    Set<IntsRef> expected = new HashSet<>();
-    for(String s : strings) {
-      IntsRef ints = new IntsRef();
-      expected.add(Util.toUTF32(s, ints));
+  public void testRemoveDeadStatesEmpty2() throws Exception {
+    LightAutomaton a = new LightAutomaton();
+    a.finishState();
+    assertTrue(BasicOperations.isEmpty(a));
+    assertTrue(BasicOperations.isEmpty(BasicOperations.removeDeadStates(a)));
+  }
+
+  public void testRemoveDeadStatesEmpty3() throws Exception {
+    LightAutomaton a = new LightAutomaton();
+    int init = a.createState();
+    int fini = a.createState();
+    a.addTransition(init, fini, 'a');
+    LightAutomaton a2 = BasicOperations.removeDeadStates(a);
+    assertEquals(0, a2.getNumStates());
+  }
+
+  public void testConcatEmpty() throws Exception {
+    // If you concat empty automaton to anything the result should still be empty:
+    LightAutomaton a = BasicOperations.concatenateLight(BasicAutomata.makeEmptyLight(),
+                                                        BasicAutomata.makeStringLight("foo"));
+    assertEquals(new HashSet<IntsRef>(), SpecialOperations.getFiniteStrings(a, -1));
+
+    a = BasicOperations.concatenateLight(BasicAutomata.makeStringLight("foo"),
+                                         BasicAutomata.makeEmptyLight());
+    assertEquals(new HashSet<IntsRef>(), SpecialOperations.getFiniteStrings(a, -1));
+  }
+
+  public void testSeemsNonEmptyButIsNot1() throws Exception {
+    LightAutomaton a = new LightAutomaton();
+    // Init state has a transition but doesn't lead to accept
+    int init = a.createState();
+    int s = a.createState();
+    a.addTransition(init, s, 'a');
+    a.finishState();
+    assertTrue(BasicOperations.isEmpty(a));
+  }
+
+  public void testSeemsNonEmptyButIsNot2() throws Exception {
+    LightAutomaton a = new LightAutomaton();
+    int init = a.createState();
+    int s = a.createState();
+    a.addTransition(init, s, 'a');
+    // An orphan'd accept state
+    s = a.createState();
+    a.setAccept(s, true);
+    a.finishState();
+    assertTrue(BasicOperations.isEmpty(a));
+  }
+
+  public void testSameLanguage1() throws Exception {
+    LightAutomaton a = BasicAutomata.makeEmptyStringLight();
+    LightAutomaton a2 = BasicAutomata.makeEmptyStringLight();
+    int state = a2.createState();
+    a2.addTransition(0, state, 'a');
+    a2.finishState();
+    assertTrue(BasicOperations.sameLanguage(BasicOperations.removeDeadStates(a),
+                                            BasicOperations.removeDeadStates(a2)));
+  }
+
+  private LightAutomaton randomNoOp(LightAutomaton a) {
+    switch (random().nextInt(5)) {
+    case 0:
+      if (VERBOSE) {
+        System.out.println("  randomNoOp: determinize");
+      }
+      return BasicOperations.determinize(a);
+    case 1:
+      if (VERBOSE) {
+        System.out.println("  randomNoOp: minimize");
+      }
+      return MinimizationOperationsLight.minimize(a);
+    case 2:
+      if (VERBOSE) {
+        System.out.println("  randomNoOp: removeDeadStates");
+      }
+      return BasicOperations.removeDeadStates(a);
+    case 3:
+      if (VERBOSE) {
+        System.out.println("  randomNoOp: reverse reverse");
+      }
+      a = SpecialOperations.reverse(a);
+      a = randomNoOp(a);
+      return SpecialOperations.reverse(a);
+    case 4:
+      if (VERBOSE) {
+        System.out.println("  randomNoOp: concat empty string");
+      }
+      return BasicOperations.concatenateLight(a, BasicAutomata.makeEmptyStringLight());
+    case 5:
+      if (VERBOSE) {
+        System.out.println("  randomNoOp: union empty automaton");
+      }
+      return BasicOperations.unionLight(a, BasicAutomata.makeEmptyLight());
     }
+    assert false;
+    return null;
+  }
 
-    assertEquals(expected, SpecialOperations.getFiniteStrings(BasicOperations.determinize(a), -1)); 
+  private LightAutomaton unionTerms(Collection<BytesRef> terms) {
+    LightAutomaton a;
+    if (random().nextBoolean()) {
+      if (VERBOSE) {
+        System.out.println("TEST: unionTerms: use union");
+      }
+      List<LightAutomaton> as = new ArrayList<>();
+      for(BytesRef term : terms) {
+        as.add(BasicAutomata.makeStringLight(term.utf8ToString()));
+      }
+      a = BasicOperations.unionLight(as);
+    } else {
+      if (VERBOSE) {
+        System.out.println("TEST: unionTerms: use makeStringUnion");
+      }
+      List<BytesRef> termsList = new ArrayList<>(terms);
+      Collections.sort(termsList);
+      a = BasicAutomata.makeStringUnionLight(termsList);
+    }
+
+    return randomNoOp(a);
+  }
+
+  private String getRandomString(boolean isAscii) {
+    if (isAscii) {
+      return TestUtil.randomSimpleString(random());
+    } else {
+      return TestUtil.randomRealisticUnicodeString(random());
+    }
+  }
+
+  public void testRandomFinite() throws Exception {
+
+    int numTerms = atLeast(10);
+    int iters = atLeast(100);
+
+    // Some of the ops we do (stripping random byte, reverse) turn valid UTF8 into invalid if we allow non-ascii:
+    boolean isAscii = random().nextBoolean();
+
+    if (VERBOSE) {
+      System.out.println("TEST: isAscii=" + isAscii + " numTerms" + numTerms + " iters=" + iters);
+    }
+
+    Set<BytesRef> terms = new HashSet<>();
+    while (terms.size() < numTerms) {
+      terms.add(new BytesRef(getRandomString(isAscii)));
+    }
+
+    LightAutomaton a = unionTerms(terms);
+    assertSame(terms, a);
+
+    for(int iter=0;iter<iters;iter++) {
+      if (VERBOSE) {
+        System.out.println("TEST: iter=" + iter + " numTerms=" + terms.size());
+        System.out.println("  terms:");
+        for(BytesRef term : terms) {
+          System.out.println("    " + term);
+        }
+      }
+      switch(random().nextInt(14)) {
+
+      case 0:
+        // concatenate prefix
+        {
+          if (VERBOSE) {
+            System.out.println("  op=concat prefix");
+          }
+          Set<BytesRef> newTerms = new HashSet<>();
+          BytesRef prefix = new BytesRef(getRandomString(isAscii));
+          for(BytesRef term : terms) {
+            BytesRef newTerm = BytesRef.deepCopyOf(prefix);
+            newTerm.append(term);
+            newTerms.add(newTerm);
+          }
+          terms = newTerms;
+          boolean wasDeterministic1 = a.isDeterministic();
+          a = BasicOperations.concatenateLight(BasicAutomata.makeStringLight(prefix.utf8ToString()), a);
+          assertEquals(wasDeterministic1, a.isDeterministic());
+        }
+        break;
+
+      case 1:
+        // concatenate suffix
+        {
+          BytesRef suffix = new BytesRef(getRandomString(isAscii));
+          if (VERBOSE) {
+            System.out.println("  op=concat suffix " + suffix);
+          }
+          Set<BytesRef> newTerms = new HashSet<>();
+          for(BytesRef term : terms) {
+            BytesRef newTerm = BytesRef.deepCopyOf(term);
+            newTerm.append(suffix);
+            newTerms.add(newTerm);
+          }
+          terms = newTerms;
+          a = BasicOperations.concatenateLight(a, BasicAutomata.makeStringLight(suffix.utf8ToString()));
+        }
+        break;
+
+        // nocommit sometimes concat a suffix accepting more than 1 term, and sometimes non-det
+
+      case 2:
+        // determinize
+        if (VERBOSE) {
+          System.out.println("  op=determinize");
+        }
+        a = BasicOperations.determinize(a);
+        assertTrue(a.isDeterministic());
+        break;
+
+      case 3:
+        if (VERBOSE) {
+          System.out.println("  op=minimize");
+        }
+        // minimize
+        a = MinimizationOperationsLight.minimize(a);
+        break;
+
+      case 4:
+        // union
+        {
+          if (VERBOSE) {
+            System.out.println("  op=union");
+          }
+          Set<BytesRef> newTerms = new HashSet<>();
+          int numNewTerms = random().nextInt(5);
+          while (newTerms.size() < numNewTerms) {
+            newTerms.add(new BytesRef(getRandomString(isAscii)));
+          }
+          terms.addAll(newTerms);
+          LightAutomaton newA = unionTerms(newTerms);
+          a = BasicOperations.unionLight(a, newA);
+        }
+        break;
+
+      case 5:
+        // optional
+        {
+          if (VERBOSE) {
+            System.out.println("  op=optional");
+          }
+          a = BasicOperations.optionalLight(a);
+          terms.add(new BytesRef());
+        }
+        break;
+
+      case 6:
+        // minus finite 
+        {
+          if (VERBOSE) {
+            System.out.println("  op=minus finite");
+          }
+          if (terms.size() > 0) {
+            RandomAcceptedStrings rasl = new RandomAcceptedStrings(BasicOperations.removeDeadStates(a));
+            Set<BytesRef> toRemove = new HashSet<>();
+            int numToRemove = TestUtil.nextInt(random(), 1, (terms.size()+1)/2);
+            while (toRemove.size() < numToRemove) {
+              int[] ints = rasl.getRandomAcceptedString(random());
+              BytesRef term = new BytesRef(UnicodeUtil.newString(ints, 0, ints.length));
+              if (toRemove.contains(term) == false) {
+                toRemove.add(term);
+              }
+            }
+            for(BytesRef term : toRemove) {
+              boolean removed = terms.remove(term);
+              assertTrue(removed);
+            }
+            LightAutomaton a2 = unionTerms(toRemove);
+            a = BasicOperations.minusLight(a, a2);
+          }
+        }
+        break;
+
+      case 7:
+        {
+          // minus infinite
+          List<LightAutomaton> as = new ArrayList<>();
+          int count = TestUtil.nextInt(random(), 1, 5);
+          Set<Integer> prefixes = new HashSet<>();
+          while(prefixes.size() < count) {
+            // prefix is a leading ascii byte; we remove <prefix>* from a
+            int prefix = random().nextInt(128);
+            prefixes.add(prefix);
+          }
+
+          if (VERBOSE) {
+            System.out.println("  op=minus infinite prefixes=" + prefixes);
+          }
+
+          for(int prefix : prefixes) {
+            // prefix is a leading ascii byte; we remove <prefix>* from a
+            LightAutomaton a2 = new LightAutomaton();
+            int init = a2.createState();
+            int state = a2.createState();
+            a2.addTransition(init, state, prefix);
+            a2.setAccept(state, true);
+            a2.addTransition(state, state, Character.MIN_CODE_POINT, Character.MAX_CODE_POINT);
+            a2.finishState();
+            as.add(a2);
+            Iterator<BytesRef> it = terms.iterator();
+            while (it.hasNext()) {
+              BytesRef term = it.next();
+              if (term.length > 0 && (term.bytes[term.offset] & 0xFF) == prefix) {
+                it.remove();
+              }
+            }
+          }
+          LightAutomaton a2 = randomNoOp(BasicOperations.unionLight(as));
+          a = BasicOperations.minusLight(a, a2);
+        }
+        break;
+
+      case 8:
+        {
+          int count = TestUtil.nextInt(random(), 10, 20);
+          if (VERBOSE) {
+            System.out.println("  op=intersect infinite count=" + count);
+          }
+          // intersect infinite
+          List<LightAutomaton> as = new ArrayList<>();
+
+          Set<Integer> prefixes = new HashSet<>();
+          while(prefixes.size() < count) {
+            int prefix = random().nextInt(128);
+            prefixes.add(prefix);
+          }
+          if (VERBOSE) {
+            System.out.println("  prefixes=" + prefixes);
+          }
+
+          for(int prefix : prefixes) {
+            // prefix is a leading ascii byte; we retain <prefix>* in a
+            LightAutomaton a2 = new LightAutomaton();
+            int init = a2.createState();
+            int state = a2.createState();
+            a2.addTransition(init, state, prefix);
+            a2.setAccept(state, true);
+            a2.addTransition(state, state, Character.MIN_CODE_POINT, Character.MAX_CODE_POINT);
+            a2.finishState();
+            as.add(a2);
+            prefixes.add(prefix);
+          }
+
+          LightAutomaton a2 = BasicOperations.unionLight(as);
+          if (random().nextBoolean()) {
+            a2 = BasicOperations.determinize(a2);
+          } else if (random().nextBoolean()) {
+            a2 = MinimizationOperationsLight.minimize(a2);
+          }
+          a = BasicOperations.intersectionLight(a, a2);
+
+          Iterator<BytesRef> it = terms.iterator();
+          while (it.hasNext()) {
+            BytesRef term = it.next();
+            if (term.length == 0 || prefixes.contains(term.bytes[term.offset]&0xff) == false) {
+              if (VERBOSE) {
+                System.out.println("  drop term=" + term);
+              }
+              it.remove();
+            } else {
+              if (VERBOSE) {
+                System.out.println("  keep term=" + term);
+              }
+            }
+          }
+        }        
+        break;
+
+      case 9:
+        // reverse
+        if (VERBOSE) {
+          System.out.println("  op=reverse");
+        }
+        a = SpecialOperations.reverse(a);
+        Set<BytesRef> newTerms = new HashSet<>();
+        for(BytesRef term : terms) {
+          newTerms.add(new BytesRef(new StringBuilder(term.utf8ToString()).reverse().toString()));
+        }
+        terms = newTerms;
+        break;
+
+      case 10:
+        if (VERBOSE) {
+          System.out.println("  op=randomNoOp");
+        }
+        a = randomNoOp(a);
+        break;
+
+      case 11:
+        // interval
+        int min = random().nextInt(1000);
+        int max = min + random().nextInt(50);
+        // digits must be non-zero else we make cycle
+        int digits = Integer.toString(max).length();
+        if (VERBOSE) {
+          System.out.println("  op=union interval min=" + min + " max=" + max + " digits=" + digits);
+        }
+        a = BasicOperations.unionLight(a, BasicAutomata.makeIntervalLight(min, max, digits));
+        StringBuilder b = new StringBuilder();
+        for(int i=0;i<digits;i++) {
+          b.append('0');
+        }
+        String prefix = b.toString();
+        for(int i=min;i<=max;i++) {
+          String s = Integer.toString(i);
+          if (s.length() < digits) {
+            // Left-fill with 0s
+            s = prefix.substring(s.length()) + s;
+          }
+          terms.add(new BytesRef(s));
+        }
+        break;
+
+      case 12:
+        if (VERBOSE) {
+          System.out.println("  op=remove the empty string");
+        }
+        a = BasicOperations.minusLight(a, BasicAutomata.makeEmptyStringLight());
+        terms.remove(new BytesRef());
+        break;
+
+      case 13:
+        if (VERBOSE) {
+          System.out.println("  op=add the empty string");
+        }
+        a = BasicOperations.unionLight(a, BasicAutomata.makeEmptyStringLight());
+        terms.add(new BytesRef());
+        break;
+      }
+
+      assertSame(terms, a);
+    }
+
+    assertSame(terms, a);
+  }
+
+  private void assertSame(Collection<BytesRef> terms, LightAutomaton a) {
+
+    try {
+      assertTrue(SpecialOperations.isFinite(a));
+      assertFalse(BasicOperations.isTotal(a));
+
+      LightAutomaton detA = BasicOperations.determinize(a);
+
+      // Make sure all terms are accepted:
+      IntsRef scratch = new IntsRef();
+      for(BytesRef term : terms) {
+        Util.toIntsRef(term, scratch);
+        assertTrue("failed to accept term=" + term.utf8ToString(), BasicOperations.run(detA, term.utf8ToString()));
+      }
+
+      // Use getFiniteStrings:
+      Set<IntsRef> expected = new HashSet<>();
+      for(BytesRef term : terms) {
+        IntsRef intsRef = new IntsRef();
+        Util.toUTF32(term.utf8ToString(), intsRef);
+        expected.add(intsRef);
+      }
+      Set<IntsRef> actual = SpecialOperations.getFiniteStrings(a, -1);
+
+      if (expected.equals(actual) == false) {
+        System.out.println("FAILED:");
+        for(IntsRef term : expected) {
+          if (actual.contains(term) == false) {
+            System.out.println("  term=" + term + " should be accepted but isn't");
+          }
+        }
+        for(IntsRef term : actual) {
+          if (expected.contains(term) == false) {
+            System.out.println("  term=" + term + " is accepted but should not be");
+          }
+        }
+        throw new AssertionError("mismatch");
+      }
+
+      // Use sameLanguage:
+      LightAutomaton a2 = BasicOperations.removeDeadStates(BasicOperations.determinize(unionTerms(terms)));
+      assertTrue(BasicOperations.sameLanguage(a2, BasicOperations.removeDeadStates(BasicOperations.determinize(a))));
+
+      // Do same check, in UTF8 space
+      LightAutomaton utf8 = randomNoOp(new UTF32ToUTF8Light().convert(a));
+    
+      Set<IntsRef> expected2 = new HashSet<>();
+      for(BytesRef term : terms) {
+        IntsRef intsRef = new IntsRef();
+        Util.toIntsRef(term, intsRef);
+        expected2.add(intsRef);
+      }
+      assertEquals(expected2, SpecialOperations.getFiniteStrings(utf8, -1));
+    } catch (AssertionError ae) {
+      System.out.println("TEST: FAILED: not same");
+      System.out.println("  terms (count=" + terms.size() + "):");
+      for(BytesRef term : terms) {
+        System.out.println("    " + term);
+      }
+      System.out.println("  automaton:");
+      System.out.println(a.toDot());
+      //a.writeDot("fail");
+      throw ae;
+    }
   }
 }

Modified: lucene/dev/branches/lucene5752/lucene/core/src/test/org/apache/lucene/util/automaton/TestMinimize.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5752/lucene/core/src/test/org/apache/lucene/util/automaton/TestMinimize.java?rev=1602966&r1=1602965&r2=1602966&view=diff
==============================================================================
--- lucene/dev/branches/lucene5752/lucene/core/src/test/org/apache/lucene/util/automaton/TestMinimize.java (original)
+++ lucene/dev/branches/lucene5752/lucene/core/src/test/org/apache/lucene/util/automaton/TestMinimize.java Mon Jun 16 19:22:02 2014
@@ -28,8 +28,8 @@ public class TestMinimize extends Lucene
     int num = atLeast(200);
     for (int i = 0; i < num; i++) {
       LightAutomaton a = AutomatonTestUtil.randomAutomaton(random());
-      LightAutomaton la = BasicOperations.determinize(a);
-      LightAutomaton lb = BasicOperations.determinize(MinimizationOperationsLight.minimize(a));
+      LightAutomaton la = BasicOperations.determinize(BasicOperations.removeDeadStates(a));
+      LightAutomaton lb = MinimizationOperationsLight.minimize(a);
       assertTrue(BasicOperations.sameLanguage(la, lb));
     }
   }

Modified: lucene/dev/branches/lucene5752/lucene/core/src/test/org/apache/lucene/util/automaton/TestUTF32ToUTF8.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5752/lucene/core/src/test/org/apache/lucene/util/automaton/TestUTF32ToUTF8.java?rev=1602966&r1=1602965&r2=1602966&view=diff
==============================================================================
--- lucene/dev/branches/lucene5752/lucene/core/src/test/org/apache/lucene/util/automaton/TestUTF32ToUTF8.java (original)
+++ lucene/dev/branches/lucene5752/lucene/core/src/test/org/apache/lucene/util/automaton/TestUTF32ToUTF8.java Mon Jun 16 19:22:02 2014
@@ -17,13 +17,17 @@ package org.apache.lucene.util.automaton
  * limitations under the License.
  */
 
+import java.nio.charset.StandardCharsets;
+import java.util.HashSet;
+import java.util.Random;
+import java.util.Set;
+
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.IntsRef;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util.TestUtil;
-import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.UnicodeUtil;
-
-import java.nio.charset.StandardCharsets;
-import java.util.Random;
+import org.apache.lucene.util.fst.Util;
 
 public class TestUTF32ToUTF8 extends LuceneTestCase {
   
@@ -203,11 +207,25 @@ public class TestUTF32ToUTF8 extends Luc
       assertAutomaton(new RegExp(AutomatonTestUtil.randomRegexp(random()), RegExp.NONE).toLightAutomaton());
     }
   }
+
+  public void testSingleton() throws Exception {
+    int iters = atLeast(100);
+    for(int iter=0;iter<iters;iter++) {
+      String s = TestUtil.randomRealisticUnicodeString(random());
+      LightAutomaton a = BasicAutomata.makeStringLight(s);
+      LightAutomaton utf8 = new UTF32ToUTF8Light().convert(a);
+      IntsRef ints = new IntsRef();
+      Util.toIntsRef(new BytesRef(s), ints);
+      Set<IntsRef> set = new HashSet<>();
+      set.add(ints);
+      assertEquals(set, SpecialOperations.getFiniteStrings(utf8, -1));
+    }
+  }
   
   private void assertAutomaton(LightAutomaton automaton) throws Exception {
     CharacterRunAutomaton cra = new CharacterRunAutomaton(automaton);
     ByteRunAutomaton bra = new ByteRunAutomaton(automaton);
-    final AutomatonTestUtil.RandomAcceptedStringsLight ras = new AutomatonTestUtil.RandomAcceptedStringsLight(automaton);
+    final AutomatonTestUtil.RandomAcceptedStrings ras = new AutomatonTestUtil.RandomAcceptedStrings(automaton);
     
     int num = atLeast(1000);
     for (int i = 0; i < num; i++) {

Modified: lucene/dev/branches/lucene5752/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5752/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java?rev=1602966&r1=1602965&r2=1602966&view=diff
==============================================================================
--- lucene/dev/branches/lucene5752/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java (original)
+++ lucene/dev/branches/lucene5752/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java Mon Jun 16 19:22:02 2014
@@ -328,7 +328,7 @@ public class AnalyzingSuggester extends 
       }
     }
 
-    result.finish();
+    result.finishState();
 
     return result;
   }

Modified: lucene/dev/branches/lucene5752/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FSTUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5752/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FSTUtil.java?rev=1602966&r1=1602965&r2=1602966&view=diff
==============================================================================
--- lucene/dev/branches/lucene5752/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FSTUtil.java (original)
+++ lucene/dev/branches/lucene5752/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FSTUtil.java Mon Jun 16 19:22:02 2014
@@ -22,7 +22,6 @@ import java.util.ArrayList;
 import java.util.List;
 
 import org.apache.lucene.util.IntsRef;
-import org.apache.lucene.util.automaton.BasicOperations;
 import org.apache.lucene.util.automaton.LightAutomaton;
 import org.apache.lucene.util.automaton.Transition;
 import org.apache.lucene.util.fst.FST;
@@ -72,6 +71,10 @@ public class FSTUtil {
     assert a.isDeterministic();
     final List<Path<T>> queue = new ArrayList<>();
     final List<Path<T>> endNodes = new ArrayList<>();
+    if (a.getNumStates() == 0) {
+      return endNodes;
+    }
+
     queue.add(new Path<>(0, fst
         .getFirstArc(new FST.Arc<T>()), fst.outputs.getNoOutput(),
         new IntsRef()));

Modified: lucene/dev/branches/lucene5752/lucene/test-framework/src/java/org/apache/lucene/util/automaton/AutomatonTestUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5752/lucene/test-framework/src/java/org/apache/lucene/util/automaton/AutomatonTestUtil.java?rev=1602966&r1=1602965&r2=1602966&view=diff
==============================================================================
--- lucene/dev/branches/lucene5752/lucene/test-framework/src/java/org/apache/lucene/util/automaton/AutomatonTestUtil.java (original)
+++ lucene/dev/branches/lucene5752/lucene/test-framework/src/java/org/apache/lucene/util/automaton/AutomatonTestUtil.java Mon Jun 16 19:22:02 2014
@@ -20,7 +20,6 @@ package org.apache.lucene.util.automaton
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.HashSet;
-import java.util.IdentityHashMap;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
@@ -31,7 +30,6 @@ import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.IntsRef;
 import org.apache.lucene.util.TestUtil;
 import org.apache.lucene.util.UnicodeUtil;
-import org.apache.lucene.util.fst.Util;
 
 /**
  * Utilities for testing automata.
@@ -136,7 +134,7 @@ public class AutomatonTestUtil {
    * Once created, call {@link #getRandomAcceptedString(Random)}
    * to get a new string (in UTF-32 codepoints).
    */
-  public static class RandomAcceptedStringsLight {
+  public static class RandomAcceptedStrings {
 
     private final Map<Transition,Boolean> leadsToAccept;
     private final LightAutomaton a;
@@ -152,7 +150,7 @@ public class AutomatonTestUtil {
       }
     }
 
-    public RandomAcceptedStringsLight(LightAutomaton a) {
+    public RandomAcceptedStrings(LightAutomaton a) {
       this.a = a;
       if (a.getNumStates() == 0) {
         throw new IllegalArgumentException("this automaton accepts nothing");
@@ -334,6 +332,9 @@ public class AutomatonTestUtil {
    * Determinizes the given automaton using the given set of initial states. 
    */
   public static LightAutomaton determinizeSimpleLight(LightAutomaton a, Set<Integer> initialset) {
+    if (a.getNumStates() == 0) {
+      return a;
+    }
     int[] points = a.getStartPoints();
     // subset construction
     Map<Set<Integer>, Set<Integer>> sets = new HashMap<>();
@@ -448,6 +449,9 @@ public class AutomatonTestUtil {
    * this is only used to test the correctness of our faster implementation.
    */
   public static boolean isFiniteSlow(LightAutomaton a) {
+    if (a.getNumStates() == 0) {
+      return true;
+    }
     return isFiniteSlow(a, 0, new HashSet<Integer>());
   }