You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by us...@apache.org on 2010/10/20 14:21:54 UTC

svn commit: r1025532 - /lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java

Author: uschindler
Date: Wed Oct 20 12:21:54 2010
New Revision: 1025532

URL: http://svn.apache.org/viewvc?rev=1025532&view=rev
Log:
LUCENE-2715: Use an internal attribute to cache DFAs in FuzzyTermsEnum during per-segment search

Modified:
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java?rev=1025532&r1=1025531&r2=1025532&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java Wed Oct 20 12:21:54 2010
@@ -22,6 +22,8 @@ import org.apache.lucene.index.DocsEnum;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.util.Attribute;
+import org.apache.lucene.util.AttributeImpl;
 import org.apache.lucene.util.AttributeSource;
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
@@ -34,7 +36,7 @@ import org.apache.lucene.util.automaton.
 import org.apache.lucene.util.automaton.LevenshteinAutomata;
 
 import java.io.IOException;
-import java.util.Arrays;
+import java.util.ArrayList;
 import java.util.Comparator;
 import java.util.List;
 
@@ -53,6 +55,7 @@ public final class FuzzyTermsEnum extend
     attributes().addAttribute(MultiTermQuery.BoostAttribute.class);
   
   private final MultiTermQuery.MaxNonCompetitiveBoostAttribute maxBoostAtt;
+  private final Priv.LevenshteinAutomataAttribute dfaAtt;
   
   private float bottom;
   private BytesRef bottomTerm;
@@ -67,8 +70,6 @@ public final class FuzzyTermsEnum extend
   private int maxEdits;
   private final boolean raw;
 
-  private List<ByteRunAutomaton> runAutomata;
-  
   private final IndexReader reader;
   private final Term term;
   private final int termText[];
@@ -83,6 +84,9 @@ public final class FuzzyTermsEnum extend
    * valid term if such a term exists. 
    * 
    * @param reader Delivers terms.
+   * @param atts {@link AttributeSource} created by the rewrite method of {@link MultiTermQuery}
+   * thats contains information about competitive boosts during rewrite. It is also used
+   * to cache DFAs between segment transitions.
    * @param term Pattern term.
    * @param minSimilarity Minimum required similarity for terms from the reader.
    * @param prefixLength Length of required common prefix. Default value is 0.
@@ -105,6 +109,7 @@ public final class FuzzyTermsEnum extend
     for (int cp, i = 0, j = 0; i < utf16.length(); i += Character.charCount(cp))
            termText[j++] = cp = utf16.codePointAt(i);
     this.termLength = termText.length;
+    this.dfaAtt = atts.addAttribute(Priv.LevenshteinAutomataAttribute.class);
 
     //The prefix could be longer than the word.
     //It's kind of silly though.  It means we must match the entire word.
@@ -134,35 +139,35 @@ public final class FuzzyTermsEnum extend
    */
   private TermsEnum getAutomatonEnum(int editDistance, BytesRef lastTerm)
       throws IOException {
-    initAutomata(editDistance);
-    if (runAutomata != null && editDistance < runAutomata.size()) {
+    final List<ByteRunAutomaton> runAutomata = initAutomata(editDistance);
+    if (editDistance < runAutomata.size()) {
       return new AutomatonFuzzyTermsEnum(runAutomata.subList(0, editDistance + 1)
-          .toArray(new ByteRunAutomaton[0]), lastTerm);
+          .toArray(new ByteRunAutomaton[editDistance + 1]), lastTerm);
     } else {
       return null;
     }
   }
 
   /** initialize levenshtein DFAs up to maxDistance, if possible */
-  private void initAutomata(int maxDistance) {
-    if (runAutomata == null && 
+  private List<ByteRunAutomaton> initAutomata(int maxDistance) {
+    final List<ByteRunAutomaton> runAutomata = dfaAtt.automata();
+    if (runAutomata.size() <= maxDistance && 
         maxDistance <= LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
       LevenshteinAutomata builder = 
         new LevenshteinAutomata(UnicodeUtil.newString(termText, realPrefixLength, termText.length - realPrefixLength));
 
-      final ByteRunAutomaton[] ra = new ByteRunAutomaton[maxDistance + 1];
-      for (int i = 0; i <= maxDistance; i++) {
+      for (int i = runAutomata.size(); i <= maxDistance; i++) {
         Automaton a = builder.toAutomaton(i);
         // constant prefix
         if (realPrefixLength > 0) {
           Automaton prefix = BasicAutomata.makeString(
-              UnicodeUtil.newString(termText, 0, realPrefixLength));
+            UnicodeUtil.newString(termText, 0, realPrefixLength));
           a = BasicOperations.concatenate(prefix, a);
         }
-        ra[i] = new ByteRunAutomaton(a);
+        runAutomata.add(new ByteRunAutomaton(a));
       }
-      runAutomata = Arrays.asList(ra);
     }
+    return runAutomata;
   }
 
   /** swap in a new actual enum to proxy to */
@@ -545,4 +550,50 @@ public final class FuzzyTermsEnum extend
   public float getScaleFactor() {
     return scale_factor;
   }
+  
+  // Wrapper class to hide the attribute from outside!
+  private static final class Priv {
+  
+    /** @lucene.internal */
+    public static interface LevenshteinAutomataAttribute extends Attribute {
+      public List<ByteRunAutomaton> automata();
+    }
+    
+    /** @lucene.internal */
+    public static final class LevenshteinAutomataAttributeImpl extends AttributeImpl implements LevenshteinAutomataAttribute {
+      private final List<ByteRunAutomaton> automata = new ArrayList<ByteRunAutomaton>();
+      
+      public List<ByteRunAutomaton> automata() {
+        return automata;
+      }
+
+      @Override
+      public void clear() {
+        automata.clear();
+      }
+
+      @Override
+      public int hashCode() {
+        return automata.hashCode();
+      }
+
+      @Override
+      public boolean equals(Object other) {
+        if (this == other)
+          return true;
+        if (!(other instanceof LevenshteinAutomataAttributeImpl))
+          return false;
+        return automata.equals(((LevenshteinAutomataAttributeImpl) other).automata);
+      }
+
+      @Override
+      public void copyTo(AttributeImpl target) {
+        final List<ByteRunAutomaton> targetAutomata =
+          ((LevenshteinAutomataAttribute) target).automata();
+        targetAutomata.clear();
+        targetAutomata.addAll(automata);
+      }
+    }
+    
+  }
 }