You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2011/08/23 16:07:19 UTC

svn commit: r1160700 [8/22] - in /lucene/dev/branches/flexscoring: ./ dev-tools/eclipse/ dev-tools/idea/.idea/ dev-tools/idea/lucene/contrib/ dev-tools/idea/lucene/contrib/demo/ dev-tools/idea/lucene/contrib/highlighter/ dev-tools/idea/lucene/contrib/q...

Copied: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/ConjunctionTermScorer.java (from r1160237, lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/ConjunctionTermScorer.java)
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/ConjunctionTermScorer.java?p2=lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/ConjunctionTermScorer.java&p1=lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/ConjunctionTermScorer.java&r1=1160237&r2=1160700&rev=1160700&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/ConjunctionTermScorer.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/ConjunctionTermScorer.java Tue Aug 23 14:06:58 2011
@@ -18,7 +18,7 @@ package org.apache.lucene.search;
  */
 
 import org.apache.lucene.index.DocsEnum;
-import org.apache.lucene.search.Similarity.ExactDocScorer;
+import org.apache.lucene.search.similarities.Similarity.ExactDocScorer;
 import org.apache.lucene.util.ArrayUtil;
 import java.io.IOException;
 import java.util.Comparator;

Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/FieldCache.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/FieldCache.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/FieldCache.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/FieldCache.java Tue Aug 23 14:06:58 2011
@@ -17,6 +17,7 @@ package org.apache.lucene.search;
  * limitations under the License.
  */
 
+import org.apache.lucene.index.DocTermOrds;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.search.cache.EntryCreator;
@@ -654,6 +655,18 @@ public interface FieldCache {
   throws IOException;
 
   /**
+   * Checks the internal cache for an appropriate entry, and if none is found, reads the term values
+   * in <code>field</code> and returns a {@link DocTermOrds} instance, providing a method to retrieve
+   * the terms (as ords) per document.
+   *
+   * @param reader  Used to build a {@link DocTermOrds} instance
+   * @param field   Which field contains the strings.
+   * @return a {@link DocTermOrds} instance
+   * @throws IOException  If any error occurs.
+   */
+  public DocTermOrds getDocTermOrds(IndexReader reader, String field) throws IOException;
+
+  /**
    * EXPERT: A unique Identifier/Description for each item in the FieldCache. 
    * Can be useful for logging/debugging.
    * @lucene.experimental

Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/FieldCacheImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/FieldCacheImpl.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/FieldCacheImpl.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/FieldCacheImpl.java Tue Aug 23 14:06:58 2011
@@ -17,32 +17,16 @@ package org.apache.lucene.search;
  * limitations under the License.
  */
 
-import java.io.IOException;
-import java.io.PrintStream;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.WeakHashMap;
-
+import org.apache.lucene.index.DocTermOrds;
 import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.search.cache.ByteValuesCreator;
-import org.apache.lucene.search.cache.DocTermsCreator;
-import org.apache.lucene.search.cache.DocTermsIndexCreator;
-import org.apache.lucene.search.cache.DoubleValuesCreator;
-import org.apache.lucene.search.cache.EntryCreator;
-import org.apache.lucene.search.cache.FloatValuesCreator;
-import org.apache.lucene.search.cache.IntValuesCreator;
-import org.apache.lucene.search.cache.LongValuesCreator;
-import org.apache.lucene.search.cache.ShortValuesCreator;
-import org.apache.lucene.search.cache.CachedArray.ByteValues;
-import org.apache.lucene.search.cache.CachedArray.DoubleValues;
-import org.apache.lucene.search.cache.CachedArray.FloatValues;
-import org.apache.lucene.search.cache.CachedArray.IntValues;
-import org.apache.lucene.search.cache.CachedArray.LongValues;
-import org.apache.lucene.search.cache.CachedArray.ShortValues;
+import org.apache.lucene.search.cache.*;
+import org.apache.lucene.search.cache.CachedArray.*;
 import org.apache.lucene.util.FieldCacheSanityChecker;
 
+import java.io.IOException;
+import java.io.PrintStream;
+import java.util.*;
+
 /**
  * Expert: The default cache implementation, storing all values in memory.
  * A WeakHashMap is used for storage.
@@ -61,7 +45,7 @@ public class FieldCacheImpl implements F
     init();
   }
   private synchronized void init() {
-    caches = new HashMap<Class<?>,Cache>(7);
+    caches = new HashMap<Class<?>,Cache>(9);
     caches.put(Byte.TYPE, new Cache<ByteValues>(this));
     caches.put(Short.TYPE, new Cache<ShortValues>(this));
     caches.put(Integer.TYPE, new Cache<IntValues>(this));
@@ -70,6 +54,7 @@ public class FieldCacheImpl implements F
     caches.put(Double.TYPE, new Cache<DoubleValues>(this));
     caches.put(DocTermsIndex.class, new Cache<DocTermsIndex>(this));
     caches.put(DocTerms.class, new Cache<DocTerms>(this));
+    caches.put(DocTermOrds.class, new Cache<DocTermOrds>(this));
   }
   
   public synchronized void purgeAllCaches() {
@@ -393,6 +378,11 @@ public class FieldCacheImpl implements F
     return (DocTerms)caches.get(DocTerms.class).get(reader, new Entry(field, creator));
   }
 
+  @SuppressWarnings("unchecked")
+  public DocTermOrds getDocTermOrds(IndexReader reader, String field) throws IOException {
+    return (DocTermOrds) caches.get(DocTermOrds.class).get(reader, new Entry(field, new DocTermOrdsCreator(field, 0)));
+  }
+
   private volatile PrintStream infoStream;
 
   public void setInfoStream(PrintStream stream) {

Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/FilteredTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/FilteredTermsEnum.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/FilteredTermsEnum.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/FilteredTermsEnum.java Tue Aug 23 14:06:58 2011
@@ -28,6 +28,8 @@ import org.apache.lucene.index.DocsAndPo
 import org.apache.lucene.util.AttributeSource;
 import org.apache.lucene.util.Bits;
 
+// TODO: move this class to oal.index
+
 /**
  * Abstract class for enumerating a subset of all terms. 
  * 
@@ -42,7 +44,7 @@ import org.apache.lucene.util.Bits;
 public abstract class FilteredTermsEnum extends TermsEnum {
 
   private BytesRef initialSeekTerm = null;
-  private boolean doSeek = true;        
+  private boolean doSeek;
   private BytesRef actualTerm = null;
 
   private final TermsEnum tenum;
@@ -64,8 +66,17 @@ public abstract class FilteredTermsEnum 
    * @param tenum the terms enumeration to filter.
    */
   public FilteredTermsEnum(final TermsEnum tenum) {
+    this(tenum, true);
+  }
+
+  /**
+   * Creates a filtered {@link TermsEnum} on a terms enum.
+   * @param tenum the terms enumeration to filter.
+   */
+  public FilteredTermsEnum(final TermsEnum tenum, final boolean startWithSeek) {
     assert tenum != null;
     this.tenum = tenum;
+    doSeek = startWithSeek;
   }
 
   /**
@@ -190,18 +201,23 @@ public abstract class FilteredTermsEnum 
   @SuppressWarnings("fallthrough")
   @Override
   public BytesRef next() throws IOException {
+    //System.out.println("FTE.next doSeek=" + doSeek);
+    //new Throwable().printStackTrace(System.out);
     for (;;) {
       // Seek or forward the iterator
       if (doSeek) {
         doSeek = false;
         final BytesRef t = nextSeekTerm(actualTerm);
+        //System.out.println("  seek to t=" + (t == null ? "null" : t.utf8ToString()) + " tenum=" + tenum);
         // Make sure we always seek forward:
         assert actualTerm == null || t == null || getComparator().compare(t, actualTerm) > 0: "curTerm=" + actualTerm + " seekTerm=" + t;
         if (t == null || tenum.seekCeil(t, false) == SeekStatus.END) {
           // no more terms to seek to or enum exhausted
+          //System.out.println("  return null");
           return null;
         }
         actualTerm = tenum.term();
+        //System.out.println("  got term=" + actualTerm.utf8ToString());
       } else {
         actualTerm = tenum.next();
         if (actualTerm == null) {

Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/FuzzyQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/FuzzyQuery.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/FuzzyQuery.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/FuzzyQuery.java Tue Aug 23 14:06:58 2011
@@ -137,12 +137,10 @@ public class FuzzyQuery extends MultiTer
 
   @Override
   protected TermsEnum getTermsEnum(Terms terms, AttributeSource atts) throws IOException {
-    TermsEnum tenum = terms.iterator();
-    
     if (!termLongEnough) {  // can only match if it's exact
-      return new SingleTermsEnum(tenum, term);
+      return new SingleTermsEnum(terms.iterator(), term.bytes());
     }
-    return new FuzzyTermsEnum(tenum, atts, getTerm(), minimumSimilarity, prefixLength);
+    return new FuzzyTermsEnum(terms, atts, getTerm(), minimumSimilarity, prefixLength);
   }
   
   /**

Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java Tue Aug 23 14:06:58 2011
@@ -17,12 +17,17 @@ package org.apache.lucene.search;
  * limitations under the License.
  */
 
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Comparator;
+import java.util.List;
+
 import org.apache.lucene.index.DocsAndPositionsEnum;
 import org.apache.lucene.index.DocsEnum;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.TermState;
+import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.TermsEnum;
-import org.apache.lucene.search.AutomatonTermsEnum.CompiledAutomaton;
 import org.apache.lucene.util.Attribute;
 import org.apache.lucene.util.AttributeImpl;
 import org.apache.lucene.util.AttributeSource;
@@ -34,13 +39,9 @@ import org.apache.lucene.util.automaton.
 import org.apache.lucene.util.automaton.BasicAutomata;
 import org.apache.lucene.util.automaton.BasicOperations;
 import org.apache.lucene.util.automaton.ByteRunAutomaton;
+import org.apache.lucene.util.automaton.CompiledAutomaton;
 import org.apache.lucene.util.automaton.LevenshteinAutomata;
 
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Comparator;
-import java.util.List;
-
 /** Subclass of TermsEnum for enumerating all terms that are similar
  * to the specified filter term.
  *
@@ -72,7 +73,7 @@ public final class FuzzyTermsEnum extend
   private int maxEdits;
   private final boolean raw;
 
-  private final TermsEnum tenum;
+  private final Terms terms;
   private final Term term;
   private final int termText[];
   private final int realPrefixLength;
@@ -85,7 +86,7 @@ public final class FuzzyTermsEnum extend
    * After calling the constructor the enumeration is already pointing to the first 
    * valid term if such a term exists. 
    * 
-   * @param tenum Delivers terms.
+   * @param terms Delivers terms.
    * @param atts {@link AttributeSource} created by the rewrite method of {@link MultiTermQuery}
    * thats contains information about competitive boosts during rewrite. It is also used
    * to cache DFAs between segment transitions.
@@ -94,7 +95,7 @@ public final class FuzzyTermsEnum extend
    * @param prefixLength Length of required common prefix. Default value is 0.
    * @throws IOException
    */
-  public FuzzyTermsEnum(TermsEnum tenum, AttributeSource atts, Term term, 
+  public FuzzyTermsEnum(Terms terms, AttributeSource atts, Term term, 
       final float minSimilarity, final int prefixLength) throws IOException {
     if (minSimilarity >= 1.0f && minSimilarity != (int)minSimilarity)
       throw new IllegalArgumentException("fractional edit distances are not allowed");
@@ -102,7 +103,7 @@ public final class FuzzyTermsEnum extend
       throw new IllegalArgumentException("minimumSimilarity cannot be less than 0");
     if(prefixLength < 0)
       throw new IllegalArgumentException("prefixLength cannot be less than 0");
-    this.tenum = tenum;
+    this.terms = terms;
     this.term = term;
 
     // convert the string into a utf32 int[] representation for fast comparisons
@@ -143,8 +144,10 @@ public final class FuzzyTermsEnum extend
       throws IOException {
     final List<CompiledAutomaton> runAutomata = initAutomata(editDistance);
     if (editDistance < runAutomata.size()) {
-      return new AutomatonFuzzyTermsEnum(runAutomata.subList(0, editDistance + 1)
-          .toArray(new CompiledAutomaton[editDistance + 1]), lastTerm);
+      //if (BlockTreeTermsWriter.DEBUG) System.out.println("FuzzyTE.getAEnum: ed=" + editDistance + " lastTerm=" + (lastTerm==null ? "null" : lastTerm.utf8ToString()));
+      final CompiledAutomaton compiled = runAutomata.get(editDistance);
+      return new AutomatonFuzzyTermsEnum(terms.intersect(compiled, lastTerm == null ? null : compiled.floor(lastTerm, new BytesRef())),
+                                         runAutomata.subList(0, editDistance + 1).toArray(new CompiledAutomaton[editDistance + 1]));
     } else {
       return null;
     }
@@ -153,6 +156,7 @@ public final class FuzzyTermsEnum extend
   /** initialize levenshtein DFAs up to maxDistance, if possible */
   private List<CompiledAutomaton> initAutomata(int maxDistance) {
     final List<CompiledAutomaton> runAutomata = dfaAtt.automata();
+    //System.out.println("cached automata size: " + runAutomata.size());
     if (runAutomata.size() <= maxDistance && 
         maxDistance <= LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
       LevenshteinAutomata builder = 
@@ -160,13 +164,14 @@ public final class FuzzyTermsEnum extend
 
       for (int i = runAutomata.size(); i <= maxDistance; i++) {
         Automaton a = builder.toAutomaton(i);
+        //System.out.println("compute automaton n=" + i);
         // constant prefix
         if (realPrefixLength > 0) {
           Automaton prefix = BasicAutomata.makeString(
             UnicodeUtil.newString(termText, 0, realPrefixLength));
           a = BasicOperations.concatenate(prefix, a);
         }
-        runAutomata.add(new CompiledAutomaton(a, true));
+        runAutomata.add(new CompiledAutomaton(a, true, false));
       }
     }
     return runAutomata;
@@ -301,65 +306,65 @@ public final class FuzzyTermsEnum extend
   public BytesRef term() throws IOException {
     return actualEnum.term();
   }
-  
+
   /**
-   * Implement fuzzy enumeration with automaton.
+   * Implement fuzzy enumeration with Terms.intersect.
    * <p>
    * This is the fastest method as opposed to LinearFuzzyTermsEnum:
    * as enumeration is logarithmic to the number of terms (instead of linear)
    * and comparison is linear to length of the term (rather than quadratic)
    */
-  private class AutomatonFuzzyTermsEnum extends AutomatonTermsEnum {
+  private class AutomatonFuzzyTermsEnum extends FilteredTermsEnum {
     private final ByteRunAutomaton matchers[];
     
     private final BytesRef termRef;
     
-    private final BytesRef lastTerm;
     private final BoostAttribute boostAtt =
       attributes().addAttribute(BoostAttribute.class);
     
-    public AutomatonFuzzyTermsEnum(CompiledAutomaton compiled[], 
-        BytesRef lastTerm) throws IOException {
-      super(tenum, compiled[compiled.length - 1]);
+    public AutomatonFuzzyTermsEnum(TermsEnum tenum, CompiledAutomaton compiled[]) 
+      throws IOException {
+      super(tenum, false);
       this.matchers = new ByteRunAutomaton[compiled.length];
       for (int i = 0; i < compiled.length; i++)
         this.matchers[i] = compiled[i].runAutomaton;
-      this.lastTerm = lastTerm;
       termRef = new BytesRef(term.text());
     }
-    
+
     /** finds the smallest Lev(n) DFA that accepts the term. */
     @Override
     protected AcceptStatus accept(BytesRef term) {    
+      //System.out.println("AFTE.accept term=" + term);
       int ed = matchers.length - 1;
       
-      if (matches(term, ed)) { // we match the outer dfa
-        // now compute exact edit distance
-        while (ed > 0) {
-          if (matches(term, ed - 1)) {
-            ed--;
-          } else {
-            break;
-          }
-        }
-        
-        // scale to a boost and return (if similarity > minSimilarity)
-        if (ed == 0) { // exact match
-          boostAtt.setBoost(1.0F);
-          return AcceptStatus.YES_AND_SEEK;
+      // we are wrapping either an intersect() TermsEnum or an AutomatonTermsENum,
+      // so we know the outer DFA always matches.
+      // now compute exact edit distance
+      while (ed > 0) {
+        if (matches(term, ed - 1)) {
+          ed--;
         } else {
-          final int codePointCount = UnicodeUtil.codePointCount(term);
-          final float similarity = 1.0f - ((float) ed / (float) 
-              (Math.min(codePointCount, termLength)));
-          if (similarity > minSimilarity) {
-            boostAtt.setBoost((similarity - minSimilarity) * scale_factor);
-            return AcceptStatus.YES_AND_SEEK;
-          } else {
-            return AcceptStatus.NO_AND_SEEK;
-          }
+          break;
         }
+      }
+      //System.out.println("CHECK term=" + term.utf8ToString() + " ed=" + ed);
+      
+      // scale to a boost and return (if similarity > minSimilarity)
+      if (ed == 0) { // exact match
+        boostAtt.setBoost(1.0F);
+        //System.out.println("  yes");
+        return AcceptStatus.YES;
       } else {
-        return AcceptStatus.NO_AND_SEEK;
+        final int codePointCount = UnicodeUtil.codePointCount(term);
+        final float similarity = 1.0f - ((float) ed / (float) 
+            (Math.min(codePointCount, termLength)));
+        if (similarity > minSimilarity) {
+          boostAtt.setBoost((similarity - minSimilarity) * scale_factor);
+          //System.out.println("  yes");
+          return AcceptStatus.YES;
+        } else {
+          return AcceptStatus.NO;
+        }
       }
     }
     
@@ -367,16 +372,8 @@ public final class FuzzyTermsEnum extend
     final boolean matches(BytesRef term, int k) {
       return k == 0 ? term.equals(termRef) : matchers[k].run(term.bytes, term.offset, term.length);
     }
-    
-    /** defers to superclass, except can start at an arbitrary location */
-    @Override
-    protected BytesRef nextSeekTerm(BytesRef term) throws IOException {
-      if (term == null)
-        term = lastTerm;
-      return super.nextSeekTerm(term);
-    }
   }
-  
+
   /**
    * Implement fuzzy enumeration with linear brute force.
    */
@@ -408,7 +405,7 @@ public final class FuzzyTermsEnum extend
      * @throws IOException
      */
     public LinearFuzzyTermsEnum() throws IOException {
-      super(tenum);
+      super(terms.iterator());
 
       this.text = new int[termLength - realPrefixLength];
       System.arraycopy(termText, realPrefixLength, text, 0, text.length);

Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/IndexSearcher.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/IndexSearcher.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/IndexSearcher.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/IndexSearcher.java Tue Aug 23 14:06:58 2011
@@ -17,6 +17,7 @@ package org.apache.lucene.search;
  * limitations under the License.
  */
 
+import java.io.Closeable;
 import java.io.IOException;
 import java.util.Iterator;
 import java.util.NoSuchElementException;
@@ -69,7 +70,7 @@ import org.apache.lucene.util.ThreadInte
  * synchronize on the <code>IndexSearcher</code> instance;
  * use your own (non-Lucene) objects instead.</p>
  */
-public class IndexSearcher {
+public class IndexSearcher implements Closeable {
   final IndexReader reader; // package private for testing!
   private boolean closeReader;
   
@@ -267,6 +268,7 @@ public class IndexSearcher {
    * If the IndexReader was supplied implicitly by specifying a directory, then
    * the IndexReader is closed.
    */
+  @Override
   public void close() throws IOException {
     if (closeReader) {
       reader.close();
@@ -882,6 +884,6 @@ public class IndexSearcher {
 
   @Override
   public String toString() {
-    return "IndexSearcher(" + reader + ")";
+    return "IndexSearcher(" + reader + "; executor=" + executor + ")";
   }
 }

Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/PrefixQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/PrefixQuery.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/PrefixQuery.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/PrefixQuery.java Tue Aug 23 14:06:58 2011
@@ -51,7 +51,7 @@ public class PrefixQuery extends MultiTe
       // no prefix -- match all terms for this field:
       return tenum;
     }
-    return new PrefixTermsEnum(tenum, prefix);
+    return new PrefixTermsEnum(tenum, prefix.bytes());
   }
 
   /** Prints a user-readable version of this query. */

Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/PrefixTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/PrefixTermsEnum.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/PrefixTermsEnum.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/PrefixTermsEnum.java Tue Aug 23 14:06:58 2011
@@ -19,7 +19,6 @@ package org.apache.lucene.search;
 
 import java.io.IOException;
 
-import org.apache.lucene.index.Term;
 import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.util.BytesRef;
 
@@ -34,9 +33,9 @@ public class PrefixTermsEnum extends Fil
 
   private final BytesRef prefixRef;
 
-  public PrefixTermsEnum(TermsEnum tenum, Term prefix) throws IOException {
+  public PrefixTermsEnum(TermsEnum tenum, BytesRef prefixText) throws IOException {
     super(tenum);
-    setInitialSeekTerm(prefixRef = prefix.bytes());
+    setInitialSeekTerm(this.prefixRef = prefixText);
   }
 
   @Override

Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/Scorer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/Scorer.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/Scorer.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/Scorer.java Tue Aug 23 14:06:58 2011
@@ -18,8 +18,8 @@ package org.apache.lucene.search;
  */
 
 import java.io.IOException;
-
-import org.apache.lucene.search.BooleanClause.Occur;
+import java.util.Collection;
+import java.util.Collections;
 
 /**
  * Expert: Common scoring functionality for different types of queries.
@@ -101,83 +101,30 @@ public abstract class Scorer extends Doc
   public float freq() throws IOException {
     throw new UnsupportedOperationException(this + " does not implement freq()");
   }
-
-  /**
-   * A callback to gather information from a scorer and its sub-scorers. Each
-   * the top-level scorer as well as each of its sub-scorers are passed to
-   * either one of the visit methods depending on their boolean relationship in
-   * the query.
-   * @lucene.experimental
-   */
-  public static abstract class ScorerVisitor<P extends Query, C extends Query, S extends Scorer> {
-    /**
-     * Invoked for all optional scorer 
-     * 
-     * @param parent the parent query of the child query or <code>null</code> if the child is a top-level query
-     * @param child the query of the currently visited scorer
-     * @param scorer the current scorer
-     */
-    public void visitOptional(P parent, C child, S scorer) {}
-    
-    /**
-     * Invoked for all required scorer 
-     * 
-     * @param parent the parent query of the child query or <code>null</code> if the child is a top-level query
-     * @param child the query of the currently visited scorer
-     * @param scorer the current scorer
-     */
-    public void visitRequired(P parent, C child, S scorer) {}
-    
-    /**
-     * Invoked for all prohibited scorer 
-     * 
-     * @param parent the parent query of the child query or <code>null</code> if the child is a top-level query
-     * @param child the query of the currently visited scorer
-     * @param scorer the current scorer
-     */
-    public void visitProhibited(P parent, C child, S scorer) {}
-  } 
-
-  /**
-   * Expert: call this to gather details for all sub-scorers for this query.
-   * This can be used, in conjunction with a custom {@link Collector} to gather
-   * details about how each sub-query matched the current hit.
-   * 
-   * @param visitor a callback executed for each sub-scorer
+  
+  /** returns parent Weight
    * @lucene.experimental
    */
-  public void visitScorers(ScorerVisitor<Query, Query, Scorer> visitor) {
-    visitSubScorers(null, Occur.MUST/*must id default*/, visitor);
+  public Weight getWeight() {
+    return weight;
   }
-
-  /**
-   * {@link Scorer} subclasses should implement this method if the subclass
-   * itself contains multiple scorers to support gathering details for
-   * sub-scorers via {@link ScorerVisitor}
-   * <p>
-   * Note: this method will throw {@link UnsupportedOperationException} if no
-   * associated {@link Weight} instance is provided to
-   * {@link #Scorer(Weight)}
-   * </p>
-   * 
-   * @lucene.experimental
-   */
-  protected void visitSubScorers(Query parent, Occur relationship,
-      ScorerVisitor<Query, Query, Scorer> visitor) {
-    if (weight == null)
-      throw new UnsupportedOperationException();
-
-    final Query q = weight.getQuery();
-    switch (relationship) {
-    case MUST:
-      visitor.visitRequired(parent, q, this);
-      break;
-    case MUST_NOT:
-      visitor.visitProhibited(parent, q, this);
-      break;
-    case SHOULD:
-      visitor.visitOptional(parent, q, this);
-      break;
+  
+  /** Returns child sub-scorers
+   * @lucene.experimental */
+  public Collection<ChildScorer> getChildren() {
+    return Collections.emptyList();
+  }
+  
+  /** a child Scorer and its relationship to its parent.
+   * the meaning of the relationship depends upon the parent query. 
+   * @lucene.experimental */
+  public static class ChildScorer {
+    public final Scorer child;
+    public final String relationship;
+    
+    public ChildScorer(Scorer child, String relationship) {
+      this.child = child;
+      this.relationship = relationship;
     }
   }
 }

Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/SingleTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/SingleTermsEnum.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/SingleTermsEnum.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/SingleTermsEnum.java Tue Aug 23 14:06:58 2011
@@ -19,7 +19,6 @@ package org.apache.lucene.search;
 
 import java.io.IOException;
 
-import org.apache.lucene.index.Term;
 import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.util.BytesRef;
 
@@ -39,10 +38,10 @@ public final class SingleTermsEnum exten
    * After calling the constructor the enumeration is already pointing to the term,
    * if it exists.
    */
-  public SingleTermsEnum(TermsEnum tenum, Term singleTerm) throws IOException {
+  public SingleTermsEnum(TermsEnum tenum, BytesRef termText) throws IOException {
     super(tenum);
-    singleRef = singleTerm.bytes();
-    setInitialSeekTerm(singleRef);
+    singleRef = termText;
+    setInitialSeekTerm(termText);
   }
 
   @Override

Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/TermQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/TermQuery.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/TermQuery.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/TermQuery.java Tue Aug 23 14:06:58 2011
@@ -24,6 +24,7 @@ import org.apache.lucene.index.DocsEnum;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.TermState;
 import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.index.IndexReader.AtomicReaderContext;
 import org.apache.lucene.index.IndexReader.ReaderContext;
 import org.apache.lucene.index.Term;
@@ -42,7 +43,7 @@ public class TermQuery extends Query {
   private int docFreq;
   private transient TermContext perReaderTermState;
 
-  private class TermWeight extends Weight {
+  final class TermWeight extends Weight {
     private final Similarity similarity;
     private final Similarity.Stats stats;
     private transient TermContext termStates;
@@ -73,21 +74,43 @@ public class TermQuery extends Query {
 
     @Override
     public Scorer scorer(AtomicReaderContext context, ScorerContext scorerContext) throws IOException {
-      final String field = term.field();
-      final IndexReader reader = context.reader;
       assert termStates.topReaderContext == ReaderUtil.getTopLevelContext(context) : "The top-reader used to create Weight (" + termStates.topReaderContext + ") is not the same as the current reader's top-reader (" + ReaderUtil.getTopLevelContext(context);
+      final TermsEnum termsEnum = getTermsEnum(context);
+      if (termsEnum == null) {
+        return null;
+      }
+      // TODO should we reuse the DocsEnum here? 
+      final DocsEnum docs = termsEnum.docs(context.reader.getLiveDocs(), null);
+      assert docs != null;
+      return new TermScorer(this, docs, createDocScorer(context));
+    }
+    
+    /**
+     * Creates an {@link ExactDocScorer} for this {@link TermWeight}*/
+    ExactDocScorer createDocScorer(AtomicReaderContext context)
+        throws IOException {
+      return similarity.exactDocScorer(stats, term.field(), context);
+    }
+    
+    /**
+     * Returns a {@link TermsEnum} positioned at this weights Term or null if
+     * the term does not exist in the given context
+     */
+    TermsEnum getTermsEnum(AtomicReaderContext context) throws IOException {
       final TermState state = termStates.get(context.ord);
       if (state == null) { // term is not present in that reader
-        assert termNotInReader(reader, field, term.bytes()) : "no termstate found but term exists in reader";
+        assert termNotInReader(context.reader, term.field(), term.bytes()) : "no termstate found but term exists in reader term=" + term;
         return null;
       }
-      final DocsEnum docs = reader.termDocsEnum(reader.getLiveDocs(), field, term.bytes(), state);
-      assert docs != null;
-      return new TermScorer(this, docs, similarity.exactDocScorer(stats, field, context));
+      //System.out.println("LD=" + reader.getLiveDocs() + " set?=" + (reader.getLiveDocs() != null ? reader.getLiveDocs().get(0) : "null"));
+      final TermsEnum termsEnum = context.reader.terms(term.field()).getThreadTermsEnum();
+      termsEnum.seekExact(term.bytes(), state);
+      return termsEnum;
     }
     
     private boolean termNotInReader(IndexReader reader, String field, BytesRef bytes) throws IOException {
       // only called from assert
+      //System.out.println("TQ.termNotInReader reader=" + reader + " term=" + field + ":" + bytes.utf8ToString());
       final Terms terms = reader.terms(field);
       return terms == null || terms.docFreq(bytes) == 0;
     }

Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/TermScorer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/TermScorer.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/TermScorer.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/TermScorer.java Tue Aug 23 14:06:58 2011
@@ -71,6 +71,7 @@ final class TermScorer extends Scorer {
   public boolean score(Collector c, int end, int firstDocID) throws IOException {
     c.setScorer(this);
     while (doc < end) {                           // for docs in window
+      //System.out.println("TS: collect doc=" + doc);
       c.collect(doc);                      // collect score
       if (++pointer >= pointerMax) {
         refillBuffer();

Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/TopScoreDocCollector.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/TopScoreDocCollector.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/TopScoreDocCollector.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/TopScoreDocCollector.java Tue Aug 23 14:06:58 2011
@@ -82,8 +82,13 @@ public abstract class TopScoreDocCollect
       assert !Float.isNaN(score);
 
       totalHits++;
+      if (score < pqTop.score) {
+        // Doesn't compete w/ bottom entry in queue
+        return;
+      }
       doc += docBase;
-      if (score < pqTop.score || (score == pqTop.score && doc > pqTop.doc)) {
+      if (score == pqTop.score && doc > pqTop.doc) {
+        // Break tie in score by doc ID:
         return;
       }
       pqTop.doc = doc;

Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/TopTermsRewrite.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/TopTermsRewrite.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/TopTermsRewrite.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/TopTermsRewrite.java Tue Aug 23 14:06:58 2011
@@ -78,15 +78,38 @@ public abstract class TopTermsRewrite<Q 
       public void setNextEnum(TermsEnum termsEnum) throws IOException {
         this.termsEnum = termsEnum;
         this.termComp = termsEnum.getComparator();
+        
+        assert compareToLastTerm(null);
+
         // lazy init the initial ScoreTerm because comparator is not known on ctor:
         if (st == null)
           st = new ScoreTerm(this.termComp, new TermContext(topReaderContext));
         boostAtt = termsEnum.attributes().addAttribute(BoostAttribute.class);
       }
     
+      // for assert:
+      private BytesRef lastTerm;
+      private boolean compareToLastTerm(BytesRef t) throws IOException {
+        if (lastTerm == null && t != null) {
+          lastTerm = new BytesRef(t);
+        } else if (t == null) {
+          lastTerm = null;
+        } else {
+          assert termsEnum.getComparator().compare(lastTerm, t) < 0: "lastTerm=" + lastTerm + " t=" + t;
+          lastTerm.copy(t);
+        }
+        return true;
+      }
+  
       @Override
       public boolean collect(BytesRef bytes) throws IOException {
         final float boost = boostAtt.getBoost();
+
+        // make sure within a single seg we always collect
+        // terms in order
+        assert compareToLastTerm(bytes);
+
+        //System.out.println("TTR.collect term=" + bytes.utf8ToString() + " boost=" + boost + " ord=" + readerContext.ord);
         // ignore uncompetitive hits
         if (stQueue.size() == maxSize) {
           final ScoreTerm t = stQueue.peek();
@@ -134,9 +157,10 @@ public abstract class TopTermsRewrite<Q 
     final Q q = getTopLevelQuery();
     final ScoreTerm[] scoreTerms = stQueue.toArray(new ScoreTerm[stQueue.size()]);
     ArrayUtil.mergeSort(scoreTerms, scoreTermSortByTermComp);
+    
     for (final ScoreTerm st : scoreTerms) {
       final Term term = new Term(query.field, st.bytes);
-      assert reader.docFreq(term) == st.termState.docFreq() : "reader DF is " + reader.docFreq(term) + " vs " + st.termState.docFreq();
+      assert reader.docFreq(term) == st.termState.docFreq() : "reader DF is " + reader.docFreq(term) + " vs " + st.termState.docFreq() + " term=" + term;
       addClause(q, term, st.termState.docFreq(), query.getBoost() * st.boost, st.termState); // add to query
     }
     query.incTotalNumberOfTerms(scoreTerms.length);

Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/store/ByteArrayDataInput.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/store/ByteArrayDataInput.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/store/ByteArrayDataInput.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/store/ByteArrayDataInput.java Tue Aug 23 14:06:58 2011
@@ -43,16 +43,30 @@ public final class ByteArrayDataInput ex
     reset(bytes, 0, bytes.length);
   }
 
+  // NOTE: sets pos to 0, which is not right if you had
+  // called reset w/ non-zero offset!!
+  public void rewind() {
+    pos = 0;
+  }
+
   public int getPosition() {
     return pos;
   }
 
+  public void setPosition(int pos) {
+    this.pos = pos;
+  }
+
   public void reset(byte[] bytes, int offset, int len) {
     this.bytes = bytes;
     pos = offset;
     limit = offset + len;
   }
 
+  public int length() {
+    return limit;
+  }
+
   public boolean eof() {
     return pos == limit;
   }

Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/store/CompoundFileDirectory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/store/CompoundFileDirectory.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/store/CompoundFileDirectory.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/store/CompoundFileDirectory.java Tue Aug 23 14:06:58 2011
@@ -19,10 +19,6 @@ package org.apache.lucene.store;
 
 import org.apache.lucene.index.CorruptIndexException;
 import org.apache.lucene.index.IndexFileNames;
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.IndexInput;
-import org.apache.lucene.store.IndexOutput;
-import org.apache.lucene.store.Lock;
 import org.apache.lucene.util.IOUtils;
 
 import java.util.Collection;
@@ -189,14 +185,14 @@ public abstract class CompoundFileDirect
   }
   
   @Override
-  public synchronized IndexInput openInput(String id, IOContext context) throws IOException {
+  public synchronized IndexInput openInput(String fileName, IOContext context) throws IOException {
     ensureOpen();
     assert !openForWrite;
-    id = IndexFileNames.stripSegmentName(id);
+    final String id = IndexFileNames.stripSegmentName(fileName);
     final FileEntry entry = entries.get(id);
-    if (entry == null)
-      throw new IOException("No sub-file with id " + id + " found (files: " + entries.keySet() + ")");
-    
+    if (entry == null) {
+      throw new IOException("No sub-file with id " + id + " found (fileName=" + fileName + " files: " + entries.keySet() + ")");
+    }
     return openInputSlice(id, entry.offset, entry.length, readBufferSize);
   }
   

Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/store/FSDirectory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/store/FSDirectory.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/store/FSDirectory.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/store/FSDirectory.java Tue Aug 23 14:06:58 2011
@@ -448,6 +448,7 @@ public abstract class FSDirectory extend
     /** output methods: */
     @Override
     public void flushBuffer(byte[] b, int offset, int size) throws IOException {
+      assert isOpen;
       if (rateLimiter != null) {
         rateLimiter.pause(size);
       }

Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/store/FileSwitchDirectory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/store/FileSwitchDirectory.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/store/FileSwitchDirectory.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/store/FileSwitchDirectory.java Tue Aug 23 14:06:58 2011
@@ -78,11 +78,38 @@ public class FileSwitchDirectory extends
   @Override
   public String[] listAll() throws IOException {
     Set<String> files = new HashSet<String>();
-    for(String f : primaryDir.listAll()) {
-      files.add(f);
+    // LUCENE-3380: either or both of our dirs could be FSDirs,
+    // but if one underlying delegate is an FSDir and mkdirs() has not
+    // yet been called, because so far everything is written to the other,
+    // in this case, we don't want to throw a NoSuchDirectoryException
+    NoSuchDirectoryException exc = null;
+    try {
+      for(String f : primaryDir.listAll()) {
+        files.add(f);
+      }
+    } catch (NoSuchDirectoryException e) {
+      exc = e;
     }
-    for(String f : secondaryDir.listAll()) {
-      files.add(f);
+    try {
+      for(String f : secondaryDir.listAll()) {
+        files.add(f);
+      }
+    } catch (NoSuchDirectoryException e) {
+      // we got NoSuchDirectoryException from both dirs
+      // rethrow the first.
+      if (exc != null) {
+        throw exc;
+      }
+      // we got NoSuchDirectoryException from the secondary,
+      // and the primary is empty.
+      if (files.isEmpty()) {
+        throw e;
+      }
+    }
+    // we got NoSuchDirectoryException from the primary,
+    // and the secondary is empty.
+    if (exc != null && files.isEmpty()) {
+      throw exc;
     }
     return files.toArray(new String[files.size()]);
   }
@@ -150,13 +177,19 @@ public class FileSwitchDirectory extends
     return getDirectory(name).openInput(name, context);
   }
 
+  // final due to LUCENE-3380: currently CFS backdoors the directory to create CFE
+  // by using the basic implementation and not delegating, we ensure that all 
+  // openInput/createOutput requests come thru NRTCachingDirectory.
   @Override
-  public CompoundFileDirectory openCompoundInput(String name, IOContext context) throws IOException {
-    return getDirectory(name).openCompoundInput(name, context);
+  public final CompoundFileDirectory openCompoundInput(String name, IOContext context) throws IOException {
+    return super.openCompoundInput(name, context);
   }
   
+  // final due to LUCENE-3380: currently CFS backdoors the directory to create CFE
+  // by using the basic implementation and not delegating, we ensure that all 
+  // openInput/createOutput requests come thru NRTCachingDirectory.
   @Override
-  public CompoundFileDirectory createCompoundOutput(String name, IOContext context) throws IOException {
-    return getDirectory(name).createCompoundOutput(name, context);
+  public final CompoundFileDirectory createCompoundOutput(String name, IOContext context) throws IOException {
+    return super.createCompoundOutput(name, context);
   }
 }

Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/store/RAMFile.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/store/RAMFile.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/store/RAMFile.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/store/RAMFile.java Tue Aug 23 14:06:58 2011
@@ -29,7 +29,7 @@ public class RAMFile {
   private long lastModified = System.currentTimeMillis();
 
   // File used as buffer, in no RAMDirectory
-  protected RAMFile() {}
+  public RAMFile() {}
   
   RAMFile(RAMDirectory directory) {
     this.directory = directory;

Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/store/RAMInputStream.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/store/RAMInputStream.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/store/RAMInputStream.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/store/RAMInputStream.java Tue Aug 23 14:06:58 2011
@@ -19,8 +19,10 @@ package org.apache.lucene.store;
 
 import java.io.IOException;
 
-/** A memory-resident {@link IndexInput} implementation. */
-class RAMInputStream extends IndexInput implements Cloneable {
+/** A memory-resident {@link IndexInput} implementation. 
+ *  
+ *  @lucene.internal */
+public class RAMInputStream extends IndexInput implements Cloneable {
   static final int BUFFER_SIZE = RAMOutputStream.BUFFER_SIZE;
 
   private RAMFile file;
@@ -33,7 +35,7 @@ class RAMInputStream extends IndexInput 
   private long bufferStart;
   private int bufferLength;
 
-  RAMInputStream(RAMFile f) throws IOException {
+  public RAMInputStream(RAMFile f) throws IOException {
     file = f;
     length = file.length;
     if (length/BUFFER_SIZE >= Integer.MAX_VALUE) {

Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/util/BitVector.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/util/BitVector.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/util/BitVector.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/util/BitVector.java Tue Aug 23 14:06:58 2011
@@ -353,6 +353,11 @@ public final class BitVector implements 
       } else {
         readBits(input);
       }
+
+      if (version < VERSION_DGAPS_CLEARED) {
+        invertAll();
+      }
+
       assert verifyCount();
     } finally {
       input.close();

Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/util/BytesRef.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/util/BytesRef.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/util/BytesRef.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/util/BytesRef.java Tue Aug 23 14:06:58 2011
@@ -65,6 +65,18 @@ public final class BytesRef implements C
     this.bytes = new byte[capacity];
   }
 
+  /** Incoming IntsRef values must be Byte.MIN_VALUE -
+   *  Byte.MAX_VALUE. */
+  public BytesRef(IntsRef intsRef) {
+    bytes = new byte[intsRef.length];
+    for(int idx=0;idx<intsRef.length;idx++) {
+      final int v = intsRef.ints[intsRef.offset + idx];
+      assert v >= Byte.MIN_VALUE && v <= Byte.MAX_VALUE;
+      bytes[idx] = (byte) v;
+    }
+    length = intsRef.length;
+  }
+
   /**
    * @param text Initialize the byte[] from the UTF8 bytes
    * for the provided Sring.  This must be well-formed

Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/util/IOUtils.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/util/IOUtils.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/util/IOUtils.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/util/IOUtils.java Tue Aug 23 14:06:58 2011
@@ -19,8 +19,11 @@ package org.apache.lucene.util;
 
 import java.io.Closeable;
 import java.io.IOException;
+import java.lang.reflect.Method;
 
-/** @lucene.internal */
+/** This class emulates the new Java 7 "Try-With-Resources" statement.
+ * Remove once Lucene is on Java 7.
+ * @lucene.internal */
 public final class IOUtils {
 
   private IOUtils() {} // no instance
@@ -55,6 +58,7 @@ public final class IOUtils {
           object.close();
         }
       } catch (Throwable t) {
+        addSuppressed((priorException == null) ? th : priorException, t);
         if (th == null) {
           th = t;
         }
@@ -81,6 +85,7 @@ public final class IOUtils {
           object.close();
         }
       } catch (Throwable t) {
+        addSuppressed((priorException == null) ? th : priorException, t);
         if (th == null) {
           th = t;
         }
@@ -118,6 +123,7 @@ public final class IOUtils {
           object.close();
         }
       } catch (Throwable t) {
+        addSuppressed(th, t);
         if (th == null)
           th = t;
       }
@@ -143,6 +149,7 @@ public final class IOUtils {
           object.close();
         }
       } catch (Throwable t) {
+        addSuppressed(th, t);
         if (th == null)
           th = t;
       }
@@ -155,5 +162,31 @@ public final class IOUtils {
       throw new RuntimeException(th);
     }
   }
+  
+  /** This reflected {@link Method} is {@code null} before Java 7 */
+  private static final Method SUPPRESS_METHOD;
+  static {
+    Method m;
+    try {
+      m = Throwable.class.getMethod("addSuppressed", Throwable.class);
+    } catch (Exception e) {
+      m = null;
+    }
+    SUPPRESS_METHOD = m;
+  }
+
+  /** adds a Throwable to the list of suppressed Exceptions of the first Throwable (if Java 7 is detected)
+   * @param exception this exception should get the suppressed one added
+   * @param suppressed the suppressed exception
+   */
+  private static final void addSuppressed(Throwable exception, Throwable suppressed) {
+    if (SUPPRESS_METHOD != null && exception != null && suppressed != null) {
+      try {
+        SUPPRESS_METHOD.invoke(exception, suppressed);
+      } catch (Exception e) {
+        // ignore any exceptions caused by invoking (e.g. security constraints)
+      }
+    }
+  }
 
 }

Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/util/TermContext.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/util/TermContext.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/util/TermContext.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/util/TermContext.java Tue Aug 23 14:06:58 2011
@@ -21,14 +21,13 @@ import java.io.IOException;
 import java.util.Arrays;
 
 import org.apache.lucene.index.Fields;
+import org.apache.lucene.index.IndexReader.AtomicReaderContext;
+import org.apache.lucene.index.IndexReader.ReaderContext;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.TermState;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.TermsEnum;
-import org.apache.lucene.index.IndexReader.AtomicReaderContext;
-import org.apache.lucene.index.IndexReader.ReaderContext;
-import org.apache.lucene.index.TermsEnum.SeekStatus;
 
 /**
  * Maintains a {@link IndexReader} {@link TermState} view over
@@ -45,6 +44,9 @@ public final class TermContext {
   private int docFreq;
   private long totalTermFreq;
 
+  //public static boolean DEBUG = BlockTreeTermsWriter.DEBUG;
+
+
   /**
    * Creates an empty {@link TermContext} from a {@link ReaderContext}
    */
@@ -85,7 +87,9 @@ public final class TermContext {
     final BytesRef bytes = term.bytes();
     final TermContext perReaderTermState = new TermContext(context);
     final AtomicReaderContext[] leaves = ReaderUtil.leaves(context);
+    //if (DEBUG) System.out.println("prts.build term=" + term);
     for (int i = 0; i < leaves.length; i++) {
+      //if (DEBUG) System.out.println("  r=" + leaves[i].reader);
       final Fields fields = leaves[i].reader.fields();
       if (fields != null) {
         final Terms terms = fields.terms(field);
@@ -93,6 +97,7 @@ public final class TermContext {
           final TermsEnum termsEnum = terms.getThreadTermsEnum(); // thread-private don't share!
           if (termsEnum.seekExact(bytes, cache)) { 
             final TermState termState = termsEnum.termState();
+            //if (DEBUG) System.out.println("    found");
             perReaderTermState.register(termState, leaves[i].ord, termsEnum.docFreq(), termsEnum.totalTermFreq());
           }
         }

Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/util/fst/Builder.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/util/fst/Builder.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/util/fst/Builder.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/util/fst/Builder.java Tue Aug 23 14:06:58 2011
@@ -53,6 +53,8 @@ public class Builder<T> {
   private final FST<T> fst;
   private final T NO_OUTPUT;
 
+  // private static final boolean DEBUG = false;
+
   // simplistic pruning: we prune node (and all following
   // nodes) if less than this number of terms go through it:
   private final int minSuffixCount1;
@@ -73,13 +75,21 @@ public class Builder<T> {
   // current "frontier"
   private UnCompiledNode<T>[] frontier;
 
+  // Expert: you pass an instance of this if you want to do
+  // something "custom" as suffixes are "frozen":
+  public static abstract class FreezeTail<T> {
+    public abstract void freeze(final UnCompiledNode<T>[] frontier, int prefixLenPlus1, IntsRef prevInput) throws IOException;
+  }
+
+  private final FreezeTail<T> freezeTail;
+
   /**
    * Instantiates an FST/FSA builder without any pruning. A shortcut
-   * to {@link #Builder(FST.INPUT_TYPE, int, int, boolean, boolean, int, Outputs)} with 
+   * to {@link #Builder(FST.INPUT_TYPE, int, int, boolean, boolean, int, Outputs, FreezeTail)} with
    * pruning options turned off.
    */
   public Builder(FST.INPUT_TYPE inputType, Outputs<T> outputs) {
-    this(inputType, 0, 0, true, true, Integer.MAX_VALUE, outputs);
+    this(inputType, 0, 0, true, true, Integer.MAX_VALUE, outputs, null);
   }
 
   /**
@@ -120,9 +130,11 @@ public class Builder<T> {
    *    singleton output object.
    */
   public Builder(FST.INPUT_TYPE inputType, int minSuffixCount1, int minSuffixCount2, boolean doShareSuffix,
-                 boolean doShareNonSingletonNodes, int shareMaxTailLength, Outputs<T> outputs) {
+                 boolean doShareNonSingletonNodes, int shareMaxTailLength, Outputs<T> outputs,
+                 FreezeTail<T> freezeTail) {
     this.minSuffixCount1 = minSuffixCount1;
     this.minSuffixCount2 = minSuffixCount2;
+    this.freezeTail = freezeTail;
     this.doShareNonSingletonNodes = doShareNonSingletonNodes;
     this.shareMaxTailLength = shareMaxTailLength;
     fst = new FST<T>(inputType, outputs);
@@ -179,94 +191,100 @@ public class Builder<T> {
     return fn;
   }
 
-  private void compilePrevTail(int prefixLenPlus1) throws IOException {
-    assert prefixLenPlus1 >= 1;
-    //System.out.println("  compileTail " + prefixLenPlus1);
-    for(int idx=lastInput.length; idx >= prefixLenPlus1; idx--) {
-      boolean doPrune = false;
-      boolean doCompile = false;
+  private void freezeTail(int prefixLenPlus1) throws IOException {
+    if (freezeTail != null) {
+      // Custom plugin:
+      freezeTail.freeze(frontier, prefixLenPlus1, lastInput);
+    } else {
+      //System.out.println("  compileTail " + prefixLenPlus1);
+      final int downTo = Math.max(1, prefixLenPlus1);
+      for(int idx=lastInput.length; idx >= downTo; idx--) {
 
-      final UnCompiledNode<T> node = frontier[idx];
-      final UnCompiledNode<T> parent = frontier[idx-1];
+        boolean doPrune = false;
+        boolean doCompile = false;
 
-      if (node.inputCount < minSuffixCount1) {
-        doPrune = true;
-        doCompile = true;
-      } else if (idx > prefixLenPlus1) {
-        // prune if parent's inputCount is less than suffixMinCount2
-        if (parent.inputCount < minSuffixCount2 || minSuffixCount2 == 1 && parent.inputCount == 1) {
-          // my parent, about to be compiled, doesn't make the cut, so
-          // I'm definitely pruned 
-
-          // if pruneCount2 is 1, we keep only up
-          // until the 'distinguished edge', ie we keep only the
-          // 'divergent' part of the FST. if my parent, about to be
-          // compiled, has inputCount 1 then we are already past the
-          // distinguished edge.  NOTE: this only works if
-          // the FST outputs are not "compressible" (simple
-          // ords ARE compressible).
+        final UnCompiledNode<T> node = frontier[idx];
+        final UnCompiledNode<T> parent = frontier[idx-1];
+
+        if (node.inputCount < minSuffixCount1) {
           doPrune = true;
+          doCompile = true;
+        } else if (idx > prefixLenPlus1) {
+          // prune if parent's inputCount is less than suffixMinCount2
+          if (parent.inputCount < minSuffixCount2 || (minSuffixCount2 == 1 && parent.inputCount == 1 && idx > 1)) {
+            // my parent, about to be compiled, doesn't make the cut, so
+            // I'm definitely pruned 
+
+            // if minSuffixCount2 is 1, we keep only up
+            // until the 'distinguished edge', ie we keep only the
+            // 'divergent' part of the FST. if my parent, about to be
+            // compiled, has inputCount 1 then we are already past the
+            // distinguished edge.  NOTE: this only works if
+            // the FST outputs are not "compressible" (simple
+            // ords ARE compressible).
+            doPrune = true;
+          } else {
+            // my parent, about to be compiled, does make the cut, so
+            // I'm definitely not pruned 
+            doPrune = false;
+          }
+          doCompile = true;
         } else {
-          // my parent, about to be compiled, does make the cut, so
-          // I'm definitely not pruned 
-          doPrune = false;
+          // if pruning is disabled (count is 0) we can always
+          // compile current node
+          doCompile = minSuffixCount2 == 0;
         }
-        doCompile = true;
-      } else {
-        // if pruning is disabled (count is 0) we can always
-        // compile current node
-        doCompile = minSuffixCount2 == 0;
-      }
 
-      //System.out.println("    label=" + ((char) lastInput.ints[lastInput.offset+idx-1]) + " idx=" + idx + " inputCount=" + frontier[idx].inputCount + " doCompile=" + doCompile + " doPrune=" + doPrune);
-
-      if (node.inputCount < minSuffixCount2 || minSuffixCount2 == 1 && node.inputCount == 1) {
-        // drop all arcs
-        for(int arcIdx=0;arcIdx<node.numArcs;arcIdx++) {
-          @SuppressWarnings("unchecked") final UnCompiledNode<T> target = (UnCompiledNode<T>) node.arcs[arcIdx].target;
-          target.clear();
-        }
-        node.numArcs = 0;
-      }
+        //System.out.println("    label=" + ((char) lastInput.ints[lastInput.offset+idx-1]) + " idx=" + idx + " inputCount=" + frontier[idx].inputCount + " doCompile=" + doCompile + " doPrune=" + doPrune);
 
-      if (doPrune) {
-        // this node doesn't make it -- deref it
-        node.clear();
-        parent.deleteLast(lastInput.ints[lastInput.offset+idx-1], node);
-      } else {
-
-        if (minSuffixCount2 != 0) {
-          compileAllTargets(node, lastInput.length-idx);
+        if (node.inputCount < minSuffixCount2 || (minSuffixCount2 == 1 && node.inputCount == 1 && idx > 1)) {
+          // drop all arcs
+          for(int arcIdx=0;arcIdx<node.numArcs;arcIdx++) {
+            @SuppressWarnings("unchecked") final UnCompiledNode<T> target = (UnCompiledNode<T>) node.arcs[arcIdx].target;
+            target.clear();
+          }
+          node.numArcs = 0;
         }
-        final T nextFinalOutput = node.output;
 
-        // We "fake" the node as being final if it has no
-        // outgoing arcs; in theory we could leave it
-        // as non-final (the FST can represent this), but
-        // FSTEnum, Util, etc., have trouble w/ non-final
-        // dead-end states:
-        final boolean isFinal = node.isFinal || node.numArcs == 0;
-
-        if (doCompile) {
-          // this node makes it and we now compile it.  first,
-          // compile any targets that were previously
-          // undecided:
-          parent.replaceLast(lastInput.ints[lastInput.offset + idx-1],
-                             compileNode(node, 1+lastInput.length-idx),
-                             nextFinalOutput,
-                             isFinal);
+        if (doPrune) {
+          // this node doesn't make it -- deref it
+          node.clear();
+          parent.deleteLast(lastInput.ints[lastInput.offset+idx-1], node);
         } else {
-          // replaceLast just to install
-          // nextFinalOutput/isFinal onto the arc
-          parent.replaceLast(lastInput.ints[lastInput.offset + idx-1],
-                             node,
-                             nextFinalOutput,
-                             isFinal);
-          // this node will stay in play for now, since we are
-          // undecided on whether to prune it.  later, it
-          // will be either compiled or pruned, so we must
-          // allocate a new node:
-          frontier[idx] = new UnCompiledNode<T>(this, idx);
+
+          if (minSuffixCount2 != 0) {
+            compileAllTargets(node, lastInput.length-idx);
+          }
+          final T nextFinalOutput = node.output;
+
+          // We "fake" the node as being final if it has no
+          // outgoing arcs; in theory we could leave it
+          // as non-final (the FST can represent this), but
+          // FSTEnum, Util, etc., have trouble w/ non-final
+          // dead-end states:
+          final boolean isFinal = node.isFinal || node.numArcs == 0;
+
+          if (doCompile) {
+            // this node makes it and we now compile it.  first,
+            // compile any targets that were previously
+            // undecided:
+            parent.replaceLast(lastInput.ints[lastInput.offset + idx-1],
+                               compileNode(node, 1+lastInput.length-idx),
+                               nextFinalOutput,
+                               isFinal);
+          } else {
+            // replaceLast just to install
+            // nextFinalOutput/isFinal onto the arc
+            parent.replaceLast(lastInput.ints[lastInput.offset + idx-1],
+                               node,
+                               nextFinalOutput,
+                               isFinal);
+            // this node will stay in play for now, since we are
+            // undecided on whether to prune it.  later, it
+            // will be either compiled or pruned, so we must
+            // allocate a new node:
+            frontier[idx] = new UnCompiledNode<T>(this, idx);
+          }
         }
       }
     }
@@ -320,11 +338,36 @@ public class Builder<T> {
     add(scratchIntsRef, output);
   }
 
+  // for debugging
+  /*
+  private String toString(BytesRef b) {
+    try {
+      return b.utf8ToString() + " " + b;
+    } catch (Throwable t) {
+      return b.toString();
+    }
+  }
+  */
+
   /** It's OK to add the same input twice in a row with
    *  different outputs, as long as outputs impls the merge
    *  method. */
   public void add(IntsRef input, T output) throws IOException {
-    //System.out.println("\nFST ADD: input=" + input + " output=" + fst.outputs.outputToString(output));
+    /*
+    if (DEBUG) {
+      BytesRef b = new BytesRef(input.length);
+      for(int x=0;x<input.length;x++) {
+        b.bytes[x] = (byte) input.ints[x];
+      }
+      b.length = input.length;
+      if (output == NO_OUTPUT) {
+        System.out.println("\nFST ADD: input=" + toString(b) + " " + b);
+      } else {
+        System.out.println("\nFST ADD: input=" + toString(b) + " " + b + " output=" + fst.outputs.outputToString(output));
+      }
+    }
+    */
+
     assert lastInput.length == 0 || input.compareTo(lastInput) >= 0: "inputs are added out of order lastInput=" + lastInput + " vs input=" + input;
     assert validOutput(output);
 
@@ -346,8 +389,8 @@ public class Builder<T> {
     int pos2 = input.offset;
     final int pos1Stop = Math.min(lastInput.length, input.length);
     while(true) {
-      //System.out.println("  incr " + pos1);
       frontier[pos1].inputCount++;
+      //System.out.println("  incr " + pos1 + " ct=" + frontier[pos1].inputCount + " n=" + frontier[pos1]);
       if (pos1 >= pos1Stop || lastInput.ints[pos1] != input.ints[pos2]) {
         break;
       }
@@ -368,13 +411,12 @@ public class Builder<T> {
 
     // minimize/compile states from previous input's
     // orphan'd suffix
-    compilePrevTail(prefixLenPlus1);
+    freezeTail(prefixLenPlus1);
 
     // init tail states for current input
     for(int idx=prefixLenPlus1;idx<=input.length;idx++) {
       frontier[idx-1].addArc(input.ints[input.offset + idx - 1],
                              frontier[idx]);
-      //System.out.println("  incr tail " + idx);
       frontier[idx].inputCount++;
     }
 
@@ -433,34 +475,25 @@ public class Builder<T> {
    *  nothing is accepted by the FST. */
   public FST<T> finish() throws IOException {
 
+    final UnCompiledNode<T> root = frontier[0];
+
     // minimize nodes in the last word's suffix
-    compilePrevTail(1);
-    //System.out.println("finish: inputCount=" + frontier[0].inputCount);
-    if (frontier[0].inputCount < minSuffixCount1 || frontier[0].inputCount < minSuffixCount2 || frontier[0].numArcs == 0) {
+    freezeTail(0);
+    if (root.inputCount < minSuffixCount1 || root.inputCount < minSuffixCount2 || root.numArcs == 0) {
       if (fst.emptyOutput == null) {
         return null;
       } else if (minSuffixCount1 > 0 || minSuffixCount2 > 0) {
         // empty string got pruned
         return null;
-      } else {
-        fst.finish(compileNode(frontier[0], lastInput.length).address);
-        //System.out.println("compile addr = " + fst.getStartNode());
-        return fst;
       }
     } else {
       if (minSuffixCount2 != 0) {
-        compileAllTargets(frontier[0], lastInput.length);
+        compileAllTargets(root, lastInput.length);
       }
-      //System.out.println("NOW: " + frontier[0].numArcs);
-      fst.finish(compileNode(frontier[0], lastInput.length).address);
     }
+    //if (DEBUG) System.out.println("  builder.finish root.isFinal=" + root.isFinal + " root.output=" + root.output);
+    fst.finish(compileNode(root, lastInput.length).address);
 
-    /*
-    if (dedupHash != null) {
-      System.out.println("NH: " + dedupHash.count()); 
-    }
-    */
-    
     return fst;
   }
 
@@ -479,7 +512,7 @@ public class Builder<T> {
     }
   }
 
-  static class Arc<T> {
+  public static class Arc<T> {
     public int label;                             // really an "unsigned" byte
     public Node target;
     public boolean isFinal;
@@ -502,16 +535,20 @@ public class Builder<T> {
     }
   }
 
-  static final class UnCompiledNode<T> implements Node {
+  public static final class UnCompiledNode<T> implements Node {
     final Builder<T> owner;
-    int numArcs;
-    Arc<T>[] arcs;
-    T output;
-    boolean isFinal;
-    long inputCount;
+    public int numArcs;
+    public Arc<T>[] arcs;
+    // TODO: instead of recording isFinal/output on the
+    // node, maybe we should use -1 arc to mean "end" (like
+    // we do when reading the FST).  Would simplify much
+    // code here...
+    public T output;
+    public boolean isFinal;
+    public long inputCount;
 
     /** This node's depth, starting from the automaton root. */
-    final int depth;
+    public final int depth;
 
     /**
      * @param depth

Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/util/fst/ByteSequenceOutputs.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/util/fst/ByteSequenceOutputs.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/util/fst/ByteSequenceOutputs.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/util/fst/ByteSequenceOutputs.java Tue Aug 23 14:06:58 2011
@@ -133,6 +133,6 @@ public final class ByteSequenceOutputs e
 
   @Override
   public String outputToString(BytesRef output) {
-    return output.utf8ToString();
+    return output.toString();
   }
 }

Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/util/fst/FST.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/util/fst/FST.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/util/fst/FST.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/util/fst/FST.java Tue Aug 23 14:06:58 2011
@@ -123,7 +123,7 @@ public class FST<T> {
     public int label;
     public T output;
 
-    int target;
+    public int target;
 
     byte flags;
     public T nextFinalOutput;
@@ -274,6 +274,10 @@ public class FST<T> {
     }
   }
 
+  public T getEmptyOutput() {
+    return emptyOutput;
+  }
+
   void setEmptyOutput(T v) throws IOException {
     if (emptyOutput != null) {
       emptyOutput = outputs.merge(emptyOutput, v);
@@ -597,9 +601,9 @@ public class FST<T> {
       arc.label = END_LABEL;
       arc.output = follow.nextFinalOutput;
       if (follow.target <= 0) {
-        arc.flags = BIT_LAST_ARC;
+        arc.flags = BIT_LAST_ARC | BIT_FINAL_ARC;
       } else {
-        arc.flags = 0;
+        arc.flags = BIT_FINAL_ARC;
         arc.nextArc = follow.target;
       }
       //System.out.println("    insert isFinal; nextArc=" + follow.target + " isLast=" + arc.isLast() + " output=" + outputs.outputToString(arc.output));
@@ -609,8 +613,7 @@ public class FST<T> {
     }
   }
 
-  // Not private because NodeHash needs access:
-  Arc<T> readFirstRealArc(int address, Arc<T> arc) throws IOException {
+  public Arc<T> readFirstRealArc(int address, Arc<T> arc) throws IOException {
 
     final BytesReader in = getBytesReader(address);
 
@@ -693,7 +696,9 @@ public class FST<T> {
     return readLabel(in);
   }
 
-  Arc<T> readNextRealArc(Arc<T> arc, final BytesReader in) throws IOException {
+  /** Never returns null, but you should never call this if
+   *  arc.isLast() is true. */
+  public Arc<T> readNextRealArc(Arc<T> arc, final BytesReader in) throws IOException {
     // this is a continuing arc in a fixed array
     if (arc.bytesPerArc != 0) {
       // arcs are at fixed entries
@@ -925,7 +930,7 @@ public class FST<T> {
     }
   }
 
-  final BytesReader getBytesReader(int pos) {
+  public final BytesReader getBytesReader(int pos) {
     // TODO: maybe re-use via ThreadLocal?
     return new BytesReader(pos);
   }

Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/util/fst/UpToTwoPositiveIntOutputs.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/util/fst/UpToTwoPositiveIntOutputs.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/util/fst/UpToTwoPositiveIntOutputs.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/util/fst/UpToTwoPositiveIntOutputs.java Tue Aug 23 14:06:58 2011
@@ -39,8 +39,8 @@ import org.apache.lucene.store.DataOutpu
 public final class UpToTwoPositiveIntOutputs extends Outputs<Object> {
 
   public final static class TwoLongs {
-    final long first;
-    final long second;
+    public final long first;
+    public final long second;
 
     public TwoLongs(long first, long second) {
       this.first = first;

Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/util/fst/Util.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/util/fst/Util.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/util/fst/Util.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/util/fst/Util.java Tue Aug 23 14:06:58 2011
@@ -213,6 +213,7 @@ public final class Util {
 
     // Shape for states.
     final String stateShape = "circle";
+    final String finalStateShape = "doublecircle";
 
     // Emit DOT prologue.
     out.write("digraph FST {\n");
@@ -223,12 +224,34 @@ public final class Util {
     }
 
     emitDotState(out, "initial", "point", "white", "");
-    emitDotState(out, Integer.toString(startArc.target), stateShape, 
-        fst.isExpandedTarget(startArc) ? expandedNodeColor : null, 
-        "");
-    out.write("  initial -> " + startArc.target + "\n");
 
     final T NO_OUTPUT = fst.outputs.getNoOutput();
+
+    // final FST.Arc<T> scratchArc = new FST.Arc<T>();
+
+    {
+      final String stateColor;
+      if (fst.isExpandedTarget(startArc)) {
+        stateColor = expandedNodeColor;
+      } else {
+        stateColor = null;
+      }
+
+      final boolean isFinal;
+      final T finalOutput;
+      if (startArc.isFinal()) {
+        isFinal = true;
+        finalOutput = startArc.nextFinalOutput == NO_OUTPUT ? null : startArc.nextFinalOutput;
+      } else {
+        isFinal = false;
+        finalOutput = null;
+      }
+      
+      emitDotState(out, Integer.toString(startArc.target), isFinal ? finalStateShape : stateShape, stateColor, finalOutput == null ? "" : fst.outputs.outputToString(finalOutput));
+    }
+
+    out.write("  initial -> " + startArc.target + "\n");
+
     int level = 0;
 
     while (!nextLevelQueue.isEmpty()) {
@@ -240,19 +263,48 @@ public final class Util {
       out.write("\n  // Transitions and states at level: " + level + "\n");
       while (!thisLevelQueue.isEmpty()) {
         final FST.Arc<T> arc = thisLevelQueue.remove(thisLevelQueue.size() - 1);
-        
         if (fst.targetHasArcs(arc)) {
           // scan all arcs
           final int node = arc.target;
           fst.readFirstTargetArc(arc, arc);
-          
+
+          if (arc.label == FST.END_LABEL) {
+            // Skip it -- prior recursion took this into account already
+            assert !arc.isLast();
+            fst.readNextArc(arc);
+          }
+
           while (true) {
+
             // Emit the unseen state and add it to the queue for the next level.
             if (arc.target >= 0 && !seen.get(arc.target)) {
-              final boolean isExpanded = fst.isExpandedTarget(arc);
-              emitDotState(out, Integer.toString(arc.target), stateShape, 
-                  isExpanded ?  expandedNodeColor : null, 
-                  labelStates ? Integer.toString(arc.target) : ""); 
+
+              /*
+              boolean isFinal = false;
+              T finalOutput = null;
+              fst.readFirstTargetArc(arc, scratchArc);
+              if (scratchArc.isFinal() && fst.targetHasArcs(scratchArc)) {
+                // target is final
+                isFinal = true;
+                finalOutput = scratchArc.output == NO_OUTPUT ? null : scratchArc.output;
+                System.out.println("dot hit final label=" + (char) scratchArc.label);
+              }
+              */
+              final String stateColor;
+              if (fst.isExpandedTarget(arc)) {
+                stateColor = expandedNodeColor;
+              } else {
+               stateColor = null;
+              }
+
+              final String finalOutput;
+              if (arc.nextFinalOutput != null && arc.nextFinalOutput != NO_OUTPUT) {
+                finalOutput = fst.outputs.outputToString(arc.nextFinalOutput);
+              } else {
+                finalOutput = "";
+              }
+
+              emitDotState(out, Integer.toString(arc.target), arc.isFinal() ? finalStateShape : stateShape, stateColor, finalOutput);
               seen.set(arc.target);
               nextLevelQueue.add(new FST.Arc<T>().copyFrom(arc));
               sameLevelStates.add(arc.target);
@@ -265,15 +317,19 @@ public final class Util {
               outs = "";
             }
 
-            final String cl;
-            if (arc.label == FST.END_LABEL) {
-              cl = "~";
-            } else {
-              cl = printableLabel(arc.label);
+            if (!fst.targetHasArcs(arc) && arc.isFinal() && arc.nextFinalOutput != NO_OUTPUT) {
+              // Tricky special case: sometimes, due to
+              // pruning, the builder can [sillily] produce
+              // an FST with an arc into the final end state
+              // (-1) but also with a next final output; in
+              // this case we pull that output up onto this
+              // arc
+              outs = outs + "/[" + fst.outputs.outputToString(arc.nextFinalOutput) + "]";
             }
 
-            out.write("  " + node + " -> " + arc.target + " [label=\"" + cl + outs + "\"]\n");
-            
+            assert arc.label != FST.END_LABEL;
+            out.write("  " + node + " -> " + arc.target + " [label=\"" + printableLabel(arc.label) + outs + "\"]\n");
+                   
             // Break the loop if we're on the last arc of this state.
             if (arc.isLast()) {
               break;
@@ -295,7 +351,7 @@ public final class Util {
     }
 
     // Emit terminating state (always there anyway).
-    out.write("  -1 [style=filled, color=black, shape=circle, label=\"\"]\n\n");
+    out.write("  -1 [style=filled, color=black, shape=doublecircle, label=\"\"]\n\n");
     out.write("  {rank=sink; -1 }\n");
     
     out.write("}\n");

Modified: lucene/dev/branches/flexscoring/lucene/src/test-framework/org/apache/lucene/index/DocHelper.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/test-framework/org/apache/lucene/index/DocHelper.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/test-framework/org/apache/lucene/index/DocHelper.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/test-framework/org/apache/lucene/index/DocHelper.java Tue Aug 23 14:06:58 2011
@@ -29,6 +29,9 @@ import org.apache.lucene.analysis.MockTo
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.Fieldable;
+import org.apache.lucene.document.Field.Index;
+import org.apache.lucene.document.Field.Store;
+import org.apache.lucene.document.Field.TermVector;
 import org.apache.lucene.index.FieldInfo.IndexOptions;
 import org.apache.lucene.search.similarities.SimilarityProvider;
 import org.apache.lucene.store.Directory;
@@ -251,4 +254,21 @@ class DocHelper {
   public static int numFields(Document doc) {
     return doc.getFields().size();
   }
+  
+  public static Document createDocument(int n, String indexName, int numFields) {
+    StringBuilder sb = new StringBuilder();
+    Document doc = new Document();
+    doc.add(new Field("id", Integer.toString(n), Store.YES, Index.NOT_ANALYZED, TermVector.WITH_POSITIONS_OFFSETS));
+    doc.add(new Field("indexname", indexName, Store.YES, Index.NOT_ANALYZED, TermVector.WITH_POSITIONS_OFFSETS));
+    sb.append("a");
+    sb.append(n);
+    doc.add(new Field("field1", sb.toString(), Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS));
+    sb.append(" b");
+    sb.append(n);
+    for (int i = 1; i < numFields; i++) {
+      doc.add(new Field("field" + (i + 1), sb.toString(), Store.YES,
+                        Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS));
+    }
+    return doc;
+  }
 }

Modified: lucene/dev/branches/flexscoring/lucene/src/test-framework/org/apache/lucene/index/RandomIndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/test-framework/org/apache/lucene/index/RandomIndexWriter.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/test-framework/org/apache/lucene/index/RandomIndexWriter.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/test-framework/org/apache/lucene/index/RandomIndexWriter.java Tue Aug 23 14:06:58 2011
@@ -29,6 +29,7 @@ import org.apache.lucene.document.Docume
 import org.apache.lucene.index.IndexWriter; // javadoc
 import org.apache.lucene.index.codecs.CodecProvider;
 import org.apache.lucene.index.values.ValueType;
+import org.apache.lucene.search.Query;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.LuceneTestCase;
@@ -59,7 +60,7 @@ public class RandomIndexWriter implement
 
     private final Random r;
 
-    public MockIndexWriter(Random r,Directory dir, IndexWriterConfig conf) throws IOException {
+    public MockIndexWriter(Random r, Directory dir, IndexWriterConfig conf) throws IOException {
       super(dir, conf);
       // must make a private random since our methods are
       // called from different threads; else test failures may
@@ -298,6 +299,10 @@ public class RandomIndexWriter implement
   public void deleteDocuments(Term term) throws CorruptIndexException, IOException {
     w.deleteDocuments(term);
   }
+
+  public void deleteDocuments(Query q) throws CorruptIndexException, IOException {
+    w.deleteDocuments(q);
+  }
   
   public void commit() throws CorruptIndexException, IOException {
     w.commit();
@@ -321,11 +326,16 @@ public class RandomIndexWriter implement
   }
 
   private boolean doRandomOptimize = true;
+  private boolean doRandomOptimizeAssert = true;
 
   public void setDoRandomOptimize(boolean v) {
     doRandomOptimize = v;
   }
 
+  public void setDoRandomOptimizeAssert(boolean v) {
+    doRandomOptimizeAssert = v;
+  }
+
   private void doRandomOptimize() throws IOException {
     if (doRandomOptimize) {
       final int segCount = w.getSegmentCount();
@@ -336,7 +346,7 @@ public class RandomIndexWriter implement
         // partial optimize
         final int limit = _TestUtil.nextInt(r, 1, segCount);
         w.optimize(limit);
-        assert w.getSegmentCount() <= limit: "limit=" + limit + " actual=" + w.getSegmentCount();
+        assert !doRandomOptimizeAssert || w.getSegmentCount() <= limit: "limit=" + limit + " actual=" + w.getSegmentCount();
       }
     }
     switchDoDocValues();
@@ -354,6 +364,9 @@ public class RandomIndexWriter implement
       if (LuceneTestCase.VERBOSE) {
         System.out.println("RIW.getReader: use NRT reader");
       }
+      if (r.nextInt(5) == 1) {
+        w.commit();
+      }
       return w.getReader(applyDeletions);
     } else {
       if (LuceneTestCase.VERBOSE) {
@@ -361,7 +374,11 @@ public class RandomIndexWriter implement
       }
       w.commit();
       switchDoDocValues();
-      return IndexReader.open(w.getDirectory(), new KeepOnlyLastCommitDeletionPolicy(), r.nextBoolean(), _TestUtil.nextInt(r, 1, 10), w.getConfig().getCodecProvider());
+      if (r.nextBoolean()) {
+        return IndexReader.open(w.getDirectory(), new KeepOnlyLastCommitDeletionPolicy(), r.nextBoolean(), _TestUtil.nextInt(r, 1, 10), w.getConfig().getCodecProvider());
+      } else {
+        return w.getReader(applyDeletions);
+      }
     }
   }