You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by dw...@apache.org on 2011/02/21 15:23:48 UTC

svn commit: r1072978 - in /lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/automaton/fst: FST.java Util.java

Author: dweiss
Date: Mon Feb 21 14:23:48 2011
New Revision: 1072978

URL: http://svn.apache.org/viewvc?rev=1072978&view=rev
Log:
Alternative depth-based DOT layout ordering in FST's Utils.
https://issues.apache.org/jira/browse/LUCENE-2934

Modified:
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/automaton/fst/FST.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/automaton/fst/Util.java

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/automaton/fst/FST.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/automaton/fst/FST.java?rev=1072978&r1=1072977&r2=1072978&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/automaton/fst/FST.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/automaton/fst/FST.java Mon Feb 21 14:23:48 2011
@@ -467,9 +467,13 @@ public class FST<T> {
     return arc;
   }
 
-  /** Follow the follow arc and read the first arc of its
-   *  target; this changes the provide arc (2nd arg) in-place
-   *  and returns it. */
+  /**
+   * Follow the <code>follow</code> arc and read the first arc of its target;
+   * this changes the provided <code>arc</code> (2nd arg) in-place and returns
+   * it.
+   * 
+   * @returns Returns the second argument (<code>arc</code>).
+   */
   public Arc<T> readFirstTargetArc(Arc<T> follow, Arc<T> arc) throws IOException {
     //int pos = address;
     //System.out.println("    readFirstTarget follow.target=" + follow.target + " isFinal=" + follow.isFinal());

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/automaton/fst/Util.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/automaton/fst/Util.java?rev=1072978&r1=1072977&r2=1072978&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/automaton/fst/Util.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/automaton/fst/Util.java Mon Feb 21 14:23:48 2011
@@ -17,12 +17,8 @@ package org.apache.lucene.util.automaton
  * limitations under the License.
  */
 
-import java.io.IOException;
-import java.io.PrintStream;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Set;
-import java.util.HashSet;
+import java.io.*;
+import java.util.*;
 
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.IntsRef;
@@ -160,90 +156,165 @@ public final class Util {
       return output;
     }
   }
+  
+  /**
+   * Dumps an {@link FST} to a GraphViz's <code>dot</code> language description
+   * for visualization. Example of use:
+   * 
+   * <pre>
+   * PrintStream ps = new PrintStream(&quot;out.dot&quot;);
+   * fst.toDot(ps);
+   * ps.close();
+   * </pre>
+   * 
+   * and then, from command line:
+   * 
+   * <pre>
+   * dot -Tpng -o out.png out.dot
+   * </pre>
+   * 
+   * <p>
+   * Note: larger FSTs (a few thousand nodes) won't even render, don't bother.
+   * 
+   * @param sameRank
+   *          If <code>true</code>, the resulting <code>dot</code> file will try
+   *          to order states in layers of breadth-first traversal. This may
+   *          mess up arcs, but makes the output FST's structure a bit clearer.
+   * 
+   * @param labelStates
+   *          If <code>true</code> states will have labels equal to their offsets in their
+   *          binary format. Expands the graph considerably. 
+   * 
+   * @see "http://www.graphviz.org/"
+   */
+  public static <T> void toDot(FST<T> fst, Writer out, boolean sameRank, boolean labelStates) 
+    throws IOException {    
+    // This is the start arc in the automaton (from the epsilon state to the first state 
+    // with outgoing transitions.
+    final FST.Arc<T> startArc = fst.getFirstArc(new FST.Arc<T>());
 
+    // A queue of transitions to consider for the next level.
+    final List<FST.Arc<T>> thisLevelQueue = new ArrayList<FST.Arc<T>>();
 
-  // NOTE: this consumes alot of RAM!
-  // arcs w/ NEXT opto are in blue
-  /*
-    eg:
-      PrintStream ps = new PrintStream("out.dot");
-      fst.toDot(ps);
-      ps.close();
-      System.out.println("SAVED out.dot");
-      
-    then dot -Tpng out.dot > /x/tmp/out.png
-  */
-
-  public static<T> void toDot(FST<T> fst, PrintStream out) throws IOException {
+    // A queue of transitions to consider when processing the next level.
+    final List<FST.Arc<T>> nextLevelQueue = new ArrayList<FST.Arc<T>>();
+    nextLevelQueue.add(startArc);
     
-    final FST.Arc<T> startArc = fst.getFirstArc(new FST.Arc<T>());
+    // A list of states on the same level (for ranking).
+    final List<Integer> sameLevelStates = new ArrayList<Integer>();
 
-    final List<FST.Arc<T>> queue = new ArrayList<FST.Arc<T>>();
-    queue.add(startArc);
+    // A bitset of already seen states (target offset).
+    final BitSet seen = new BitSet();
+    seen.set(startArc.target);
+
+    // Shape for states.
+    final String stateShape = "circle";
+
+    // Emit DOT prologue.
+    out.write("digraph FST {\n");
+    out.write("  rankdir = LR; splines=true; concentrate=true; ordering=out; ranksep=2.5; \n");
 
-    final Set<Integer> seen = new HashSet<Integer>();
-    seen.add(startArc.target);
-    
-    out.println("digraph FST {");
-    out.println("  rankdir = LR;");
-    //out.println("  " + startNode + " [shape=circle label=" + startNode + "];");
-    out.println("  " + startArc.target + " [label=\"\" shape=circle];");
-    out.println("  initial [shape=point color=white label=\"\"];");
-    out.println("  initial -> " + startArc.target);
+    if (!labelStates) {
+      out.write("  node [shape=circle, width=.2, height=.2, style=filled]\n");      
+    }
+
+    emitDotState(out, "initial", "point", "white", "");
+    emitDotState(out, Integer.toString(startArc.target), stateShape, null, "");
+    out.write("  initial -> " + startArc.target + "\n");
 
     final T NO_OUTPUT = fst.outputs.getNoOutput();
+    int level = 0;
 
-    while(queue.size() != 0) {
-      FST.Arc<T> arc = queue.get(queue.size()-1);
-      queue.remove(queue.size()-1);
-      //System.out.println("dot cycle target=" + arc.target);
-
-      if (fst.targetHasArcs(arc)) {
-
-        // scan all arcs
-        final int node = arc.target;
-        fst.readFirstTargetArc(arc, arc);
-        while(true) {
-
-          //System.out.println("  cycle label=" + arc.label + " (" + (char) arc.label + ") target=" + arc.target);
-          if (!seen.contains(arc.target)) {
-            final String shape;
-            if (arc.target == -1) {
-              shape = "doublecircle";
+    while (!nextLevelQueue.isEmpty()) {
+      // we could double buffer here, but it doesn't matter probably.
+      thisLevelQueue.addAll(nextLevelQueue);
+      nextLevelQueue.clear();
+
+      level++;
+      out.write("\n  // Transitions and states at level: " + level + "\n");
+      while (!thisLevelQueue.isEmpty()) {
+        final FST.Arc<T> arc = thisLevelQueue.remove(thisLevelQueue.size() - 1);
+        
+        if (fst.targetHasArcs(arc)) {
+          // scan all arcs
+          final int node = arc.target;
+          fst.readFirstTargetArc(arc, arc);
+          
+          while (true) {
+            // Emit the unseen state and add it to the queue for the next level.
+            if (arc.target >= 0 && !seen.get(arc.target)) {
+              emitDotState(out, Integer.toString(arc.target), stateShape, null, 
+                  labelStates ? Integer.toString(arc.target) : ""); 
+              seen.set(arc.target);
+              nextLevelQueue.add(new FST.Arc<T>().copyFrom(arc));
+              sameLevelStates.add(arc.target);
+            }
+
+            String outs;
+            if (arc.output != NO_OUTPUT) {
+              outs = "/" + fst.outputs.outputToString(arc.output);
             } else {
-              shape = "circle";
+              outs = "";
+            }
+
+            final String cl;
+            if (arc.label == FST.END_LABEL) {
+              cl = "~";
+            } else {
+              cl = printableLabel(arc.label);
+            }
+
+            out.write("  " + node + " -> " + arc.target + " [label=\"" + cl + outs + "\"]\n");
+            
+            // Break the loop if we're on the last arc of this state.
+            if (arc.isLast()) {
+              break;
             }
-            out.println("  " + arc.target + " [shape=" + shape + "];");
-            seen.add(arc.target);
-            queue.add(new FST.Arc<T>().copyFrom(arc));
-            //System.out.println("    new!");
-          }
-          String outs;
-          if (arc.output != NO_OUTPUT) {
-            outs = "/" + fst.outputs.outputToString(arc.output);
-          } else {
-            outs = "";
-          }
-          final char cl;
-          if (arc.label == FST.END_LABEL) {
-            cl = '~';
-          } else {
-            cl = (char) arc.label;
-          }
-          out.println("  " + node + " -> " + arc.target + " [label=\"" + cl + outs + "\"]");
-          //if (arc.flag(FST.BIT_TARGET_NEXT)) {
-          //out.print(" color=blue");
-          //}
-          //out.println("];");
-        
-          if (arc.isLast()) {
-            break;
-          } else {
             fst.readNextArc(arc);
           }
         }
       }
+
+      // Emit state ranking information.
+      if (sameRank && sameLevelStates.size() > 1) {
+        out.write("  {rank=same; ");
+        for (int state : sameLevelStates) {
+          out.write(state + "; ");
+        }
+        out.write(" }\n");
+      }
+      sameLevelStates.clear();                
+    }
+    
+    // Emit terminating state (always there anyway).
+    out.write("  -1 [style=filled, color=black, shape=circle, label=\"\"]\n\n");
+    out.write("  {rank=sink; -1 } ");
+    
+    out.write("}\n");
+    out.flush();
+  }
+
+  /**
+   * Emit a single state in the <code>dot</code> language. 
+   */
+  private static void emitDotState(Writer out, String name, String shape,
+      String color, String label) throws IOException {
+    out.write("  " + name 
+        + " [" 
+        + (shape != null ? "shape=" + shape : "") + " "
+        + (color != null ? "color=" + color : "") + " "
+        + (label != null ? "label=\"" + label + "\"" : "label=\"\"") + " "
+        + "]\n");
+  }
+
+  /**
+   * Ensures an arc's label is indeed printable (dot uses US-ASCII). 
+   */
+  private static String printableLabel(int label) {
+    if (label >= 0x20 && label <= 0x7d) {
+      return Character.toString((char) label);
+    } else {
+      return "0x" + Integer.toHexString(label);
     }
-    out.println("}");
   }
 }