You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by si...@apache.org on 2011/01/12 22:38:55 UTC

svn commit: r1058328 [1/2] - in /lucene/dev/trunk: lucene/ lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/ lucene/contrib/memory/src/java/org/apache/lucene/index/memory/ lucene/src/java/org/apache/lucene/index/ lucene/src/jav...

Author: simonw
Date: Wed Jan 12 21:38:51 2011
New Revision: 1058328

URL: http://svn.apache.org/viewvc?rev=1058328&view=rev
Log:
LUCENE-2694: Make MTQ rewrite + weight/scorer single pass

Added:
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/OrdTermState.java   (with props)
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/TermState.java   (with props)
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermState.java   (with props)
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/PerReaderTermState.java   (with props)
Removed:
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/TermState.java
Modified:
    lucene/dev/trunk/lucene/CHANGES.txt
    lucene/dev/trunk/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermsEnum.java
    lucene/dev/trunk/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/BufferedDeletes.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexReader.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MultiReader.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MultiTermsEnum.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/Terms.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/TermsEnum.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/PostingsReaderBase.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermsReader.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsReaderImpl.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReader.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/ConstantScoreAutoRewrite.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FilteredTermsEnum.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/MultiTermQuery.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/ScoringRewrite.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/TermCollectingRewrite.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/TermQuery.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/TopTermsRewrite.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/cache/DocTermsIndexCreator.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/spans/SpanMultiTermQueryWrapper.java
    lucene/dev/trunk/lucene/src/test/org/apache/lucene/TestExternalCodecs.java
    lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/QueryUtils.java
    lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/automaton/fst/TestFSTs.java
    lucene/dev/trunk/solr/src/java/org/apache/solr/request/UnInvertedField.java

Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1058328&r1=1058327&r2=1058328&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Wed Jan 12 21:38:51 2011
@@ -359,6 +359,11 @@ Optimizations
   not seek backwards when a sub-range has no terms. It now only seeks
   when the current term is less than the next sub-range's lower end.
   (Uwe Schindler, Mike McCandless)
+  
+* LUCENE-2694: Optimize MultiTermQuery to be single pass for Term lookups.
+  MultiTermQuery now stores TermState per leaf reader during rewrite to re-
+  seek the term dictionary in TermQuery / TermWeight.
+  (Simon Willnauer, Mike McCandless, Robert Muir)
 
 Documentation
 

Modified: lucene/dev/trunk/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermsEnum.java?rev=1058328&r1=1058327&r2=1058328&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermsEnum.java (original)
+++ lucene/dev/trunk/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermsEnum.java Wed Jan 12 21:38:51 2011
@@ -18,10 +18,15 @@ package org.apache.lucene.store.instanti
 
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.index.OrdTermState;
 import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermState;
 import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.index.DocsEnum;
 import org.apache.lucene.index.DocsAndPositionsEnum;
+import org.apache.lucene.index.codecs.PrefixCodedTermState;
+
+import java.io.IOException;
 import java.util.Arrays;
 import java.util.Comparator;
 
@@ -91,10 +96,6 @@ public class InstantiatedTermsEnum exten
   }
 
   @Override
-  public void cacheCurrentTerm() {
-  }
-
-  @Override
   public BytesRef term() {
     return br;
   }
@@ -129,5 +130,18 @@ public class InstantiatedTermsEnum exten
   public Comparator<BytesRef> getComparator() {
     return BytesRef.getUTF8SortedAsUnicodeComparator();
   }
+
+  @Override
+  public TermState termState() throws IOException {
+    final OrdTermState state = new OrdTermState();
+    state.ord = upto - start;
+    return state;
+  }
+
+  @Override
+  public SeekStatus seek(BytesRef term, TermState state) throws IOException {
+    assert state != null && state instanceof OrdTermState;
+    return seek(((OrdTermState)state).ord); // just use the ord for simplicity
+  }
 }
 

Modified: lucene/dev/trunk/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java?rev=1058328&r1=1058327&r2=1058328&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java (original)
+++ lucene/dev/trunk/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java Wed Jan 12 21:38:51 2011
@@ -39,6 +39,8 @@ import org.apache.lucene.document.FieldS
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.Fields;
 import org.apache.lucene.index.IndexReader.AtomicReaderContext;
+import org.apache.lucene.index.OrdTermState;
+import org.apache.lucene.index.TermState;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.index.FieldsEnum;
@@ -884,10 +886,6 @@ public class MemoryIndex implements Seri
       }
 
       @Override
-      public void cacheCurrentTerm() {
-      }
-
-      @Override
       public long ord() {
         return termUpto;
       }
@@ -917,8 +915,21 @@ public class MemoryIndex implements Seri
       public Comparator<BytesRef> getComparator() {
         return BytesRef.getUTF8SortedAsUnicodeComparator();
       }
-    }
 
+      @Override
+      public SeekStatus seek(BytesRef term, TermState state) throws IOException {
+        assert state != null;
+        return this.seek(((OrdTermState)state).ord);
+      }
+
+      @Override
+      public TermState termState() throws IOException {
+        OrdTermState ts = new OrdTermState();
+        ts.ord = termUpto;
+        return ts;
+      }
+    }
+    
     private class MemoryDocsEnum extends DocsEnum {
       private ArrayIntList positions;
       private boolean hasNext;

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/BufferedDeletes.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/BufferedDeletes.java?rev=1058328&r1=1058327&r2=1058328&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/BufferedDeletes.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/BufferedDeletes.java Wed Jan 12 21:38:51 2011
@@ -372,7 +372,6 @@ class BufferedDeletes {
           Query query = entry.getKey();
           int limit = entry.getValue().intValue();
           Weight weight = query.weight(searcher);
-          
           Scorer scorer = weight.scorer(readerContext, true, false);
           if (scorer != null) {
             while(true)  {

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java?rev=1058328&r1=1058327&r2=1058328&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java Wed Jan 12 21:38:51 2011
@@ -131,11 +131,6 @@ public class FilterIndexReader extends I
     }
 
     @Override
-    public void cacheCurrentTerm() throws IOException {
-      in.cacheCurrentTerm();
-    }
-
-    @Override
     public SeekStatus seek(long ord) throws IOException {
       return in.seek(ord);
     }
@@ -174,6 +169,16 @@ public class FilterIndexReader extends I
     public Comparator<BytesRef> getComparator() throws IOException {
       return in.getComparator();
     }
+
+    @Override
+    public SeekStatus seek(BytesRef term, TermState state) throws IOException {
+      return in.seek(term, state);
+    }
+
+    @Override
+    public TermState termState() throws IOException {
+      return in.termState();
+    }
   }
 
   /** Base class for filtering {@link DocsEnum} implementations. */

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexReader.java?rev=1058328&r1=1058327&r2=1058328&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexReader.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexReader.java Wed Jan 12 21:38:51 2011
@@ -1070,6 +1070,47 @@ public abstract class IndexReader implem
       return null;
     }
   }
+  
+  /**
+   * Returns {@link DocsEnum} for the specified field and
+   * {@link TermState}. This may return null, if either the field or the term
+   * does not exists or the {@link TermState} is invalid for the underlying
+   * implementation.*/
+  public DocsEnum termDocsEnum(Bits skipDocs, String field, BytesRef term, TermState state) throws IOException {
+    assert state != null;
+    assert field != null;
+    final Fields fields = fields();
+    if (fields == null) {
+      return null;
+    }
+    final Terms terms = fields.terms(field);
+    if (terms != null) {
+      return terms.docs(skipDocs, term, state, null);
+    } else {
+      return null;
+    }
+  }
+  
+  /**
+   * Returns {@link DocsAndPositionsEnum} for the specified field and
+   * {@link TermState}. This may return null, if either the field or the term
+   * does not exists, the {@link TermState} is invalid for the underlying
+   * implementation, or positions were not stored for this term.*/
+  public DocsAndPositionsEnum termPositionsEnum(Bits skipDocs, String field, BytesRef term, TermState state) throws IOException {
+    assert state != null;
+    assert field != null;
+    final Fields fields = fields();
+    if (fields == null) {
+      return null;
+    }
+    final Terms terms = fields.terms(field);
+    if (terms != null) {
+      return terms.docsAndPositions(skipDocs, term, state, null);
+    } else {
+      return null;
+    }
+  }
+
 
   /** Deletes the document numbered <code>docNum</code>.  Once a document is
    * deleted it will not appear in TermDocs or TermPositions enumerations.

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MultiReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MultiReader.java?rev=1058328&r1=1058327&r2=1058328&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MultiReader.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MultiReader.java Wed Jan 12 21:38:51 2011
@@ -19,7 +19,6 @@ package org.apache.lucene.index;
 
 import java.io.IOException;
 import java.util.Collection;
-import java.util.HashMap;
 import java.util.Map;
 
 import org.apache.lucene.document.Document;

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MultiTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MultiTermsEnum.java?rev=1058328&r1=1058327&r2=1058328&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MultiTermsEnum.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MultiTermsEnum.java Wed Jan 12 21:38:51 2011
@@ -91,13 +91,6 @@ public final class MultiTermsEnum extend
   }
 
   @Override
-  public void cacheCurrentTerm() throws IOException {
-    for(int i=0;i<numTop;i++) {
-      top[i].terms.cacheCurrentTerm();
-    }
-  }
-
-  @Override
   public Comparator<BytesRef> getComparator() {
     return termComp;
   }

Added: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/OrdTermState.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/OrdTermState.java?rev=1058328&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/OrdTermState.java (added)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/OrdTermState.java Wed Jan 12 21:38:51 2011
@@ -0,0 +1,33 @@
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * An ordinal based {@link TermState}
+ * 
+ * @lucene.experimental
+ */
+public class OrdTermState extends TermState {
+  public long ord;
+  
+  @Override
+  public void copyFrom(TermState other) {
+    assert other instanceof OrdTermState : "can not copy from " + other.getClass().getName();
+    this.ord = ((OrdTermState) other).ord;
+  }
+}

Added: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/TermState.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/TermState.java?rev=1058328&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/TermState.java (added)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/TermState.java Wed Jan 12 21:38:51 2011
@@ -0,0 +1,47 @@
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Encapsulates all required internal state to position the associated
+ * {@link TermsEnum} without re-seeking.
+ * 
+ * @see TermsEnum#seek(org.apache.lucene.util.BytesRef, TermState)
+ * @see TermsEnum#termState()
+ * @lucene.experimental
+ */
+public abstract class TermState implements Cloneable {
+
+  /**
+   * Copies the content of the given {@link TermState} to this instance
+   * 
+   * @param other
+   *          the TermState to copy
+   */
+  public abstract void copyFrom(TermState other);
+
+  @Override
+  public Object clone() {
+    try {
+      return super.clone();
+    } catch (CloneNotSupportedException cnse) {
+      // should not happen
+      throw new RuntimeException(cnse);
+    }
+  } 
+}
\ No newline at end of file

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/Terms.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/Terms.java?rev=1058328&r1=1058327&r2=1058328&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/Terms.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/Terms.java Wed Jan 12 21:38:51 2011
@@ -80,11 +80,57 @@ public abstract class Terms {
     }
   }
 
+  /**
+   * Expert: Get {@link DocsEnum} for the specified {@link TermState}.
+   * This method may return <code>null</code> if the term does not exist.
+   * 
+   * @see TermsEnum#termState()
+   * @see TermsEnum#seek(BytesRef, TermState) */
+  public DocsEnum docs(Bits skipDocs, BytesRef term, TermState termState, DocsEnum reuse) throws IOException {
+    final TermsEnum termsEnum = getThreadTermsEnum();
+    if (termsEnum.seek(term, termState) == TermsEnum.SeekStatus.FOUND) {
+      return termsEnum.docs(skipDocs, reuse);
+    } else {
+      return null;
+    }
+  }
+
+  /**
+   * Get {@link DocsEnum} for the specified {@link TermState}. This
+   * method will may return <code>null</code> if the term does not exists, or positions were
+   * not indexed.
+   * 
+   * @see TermsEnum#termState()
+   * @see TermsEnum#seek(BytesRef, TermState) */
+  public DocsAndPositionsEnum docsAndPositions(Bits skipDocs, BytesRef term, TermState termState, DocsAndPositionsEnum reuse) throws IOException {
+    final TermsEnum termsEnum = getThreadTermsEnum();
+    if (termsEnum.seek(term, termState) == TermsEnum.SeekStatus.FOUND) {
+      return termsEnum.docsAndPositions(skipDocs, reuse);
+    } else {
+      return null;
+    }
+  }
+
   public long getUniqueTermCount() throws IOException {
     throw new UnsupportedOperationException("this reader does not implement getUniqueTermCount()");
   }
 
-  protected TermsEnum getThreadTermsEnum() throws IOException {
+  /**
+   * Returns a thread-private {@link TermsEnum} instance. Obtaining
+   * {@link TermsEnum} from this method might be more efficient than using
+   * {@link #iterator()} directly since this method doesn't necessarily create a
+   * new {@link TermsEnum} instance.
+   * <p>
+   * NOTE: {@link TermsEnum} instances obtained from this method must not be
+   * shared across threads. The enum should only be used within a local context
+   * where other threads can't access it.
+   * 
+   * @return a thread-private {@link TermsEnum} instance
+   * @throws IOException
+   *           if an IOException occurs
+   * @lucene.internal
+   */
+  public TermsEnum getThreadTermsEnum() throws IOException {
     TermsEnum termsEnum = threadEnums.get();
     if (termsEnum == null) {
       termsEnum = iterator();

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/TermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/TermsEnum.java?rev=1058328&r1=1058327&r2=1058328&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/TermsEnum.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/TermsEnum.java Wed Jan 12 21:38:51 2011
@@ -73,7 +73,34 @@ public abstract class TermsEnum {
    *  may be before or after the current ord.  See {@link
    *  #seek(BytesRef)}. */
   public abstract SeekStatus seek(long ord) throws IOException;
-  
+
+  /**
+   * Expert: Seeks a specific position by {@link TermState} previously obtained
+   * from {@link #termState()}. Callers should maintain the {@link TermState} to
+   * use this method. Low-level implementations may position the TermsEnum
+   * without re-seeking the term dictionary.
+   * <p>
+   * Seeking by {@link TermState} should only be used iff the enu the state was
+   * obtained from and the enum the state is used for seeking are obtained from
+   * the same {@link IndexReader}, otherwise a {@link #seek(BytesRef, TermState)} call can
+   * leave the enum in undefined state.
+   * <p>
+   * NOTE: Using this method with an incompatible {@link TermState} might leave
+   * this {@link TermsEnum} in undefined state. On a segment level
+   * {@link TermState} instances are compatible only iff the source and the
+   * target {@link TermsEnum} operate on the same field. If operating on segment
+   * level, TermState instances must not be used across segments.
+   * <p>
+   * NOTE: A seek by {@link TermState} might not restore the
+   * {@link AttributeSource}'s state. {@link AttributeSource} states must be
+   * maintained separately if this method is used.
+   * @param term the term the TermState corresponds to
+   * @param state the {@link TermState}
+   * */
+  public SeekStatus seek(BytesRef term, TermState state) throws IOException {
+    return seek(term);
+  }
+
   /** Increments the enumeration to the next element.
    *  Returns the resulting term, or null if the end was
    *  hit.  The returned BytesRef may be re-used across calls
@@ -98,7 +125,7 @@ public abstract class TermsEnum {
    *  first time, after next() returns null or seek returns
    *  {@link SeekStatus#END}.*/
   public abstract int docFreq();
-
+  
   /** Get {@link DocsEnum} for the current term.  Do not
    *  call this before calling {@link #next} or {@link
    *  #seek} for the first time.  This method will not
@@ -116,6 +143,25 @@ public abstract class TermsEnum {
    *  the postings by this codec. */
   public abstract DocsAndPositionsEnum docsAndPositions(Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException;
 
+  /**
+   * Expert: Returns the TermsEnums internal state to position the TermsEnum
+   * without re-seeking the term dictionary.
+   * <p>
+   * NOTE: A seek by {@link TermState} might not capture the
+   * {@link AttributeSource}'s state. Callers must maintain the
+   * {@link AttributeSource} states separately
+   * 
+   * @see TermState
+   * @see #seek(BytesRef, TermState)
+   */
+  public TermState termState() throws IOException {
+    return new TermState() {
+      @Override
+      public void copyFrom(TermState other) {
+      }
+    };
+  }
+  
   /** Return the {@link BytesRef} Comparator used to sort
    *  terms provided by the iterator.  This may return
    *  null if there are no terms.  Callers may invoke this
@@ -123,10 +169,6 @@ public abstract class TermsEnum {
    *  instance & reuse it. */
   public abstract Comparator<BytesRef> getComparator() throws IOException;
 
-  /** Optional optimization hint: informs the codec that the
-   *  current term is likely to be re-seek'd-to soon.  */
-  public abstract void cacheCurrentTerm() throws IOException;
-
   /** An empty TermsEnum for quickly returning an empty instance e.g.
    * in {@link org.apache.lucene.search.MultiTermQuery}
    * <p><em>Please note:</em> This enum should be unmodifiable,
@@ -142,9 +184,6 @@ public abstract class TermsEnum {
     public SeekStatus seek(long ord) { return SeekStatus.END; }
     
     @Override
-    public void cacheCurrentTerm() {}
-    
-    @Override
     public BytesRef term() {
       throw new IllegalStateException("this method should never be called");
     }
@@ -183,5 +222,15 @@ public abstract class TermsEnum {
     public synchronized AttributeSource attributes() {
       return super.attributes();
     }
+
+    @Override
+    public TermState termState() throws IOException {
+      throw new IllegalStateException("this method should never be called");
+    }
+
+    @Override
+    public SeekStatus seek(BytesRef term, TermState state) throws IOException {
+      throw new IllegalStateException("this method should never be called");
+    }
   };
 }

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/PostingsReaderBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/PostingsReaderBase.java?rev=1058328&r1=1058327&r2=1058328&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/PostingsReaderBase.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/PostingsReaderBase.java Wed Jan 12 21:38:51 2011
@@ -42,17 +42,17 @@ public abstract class PostingsReaderBase
   public abstract void init(IndexInput termsIn) throws IOException;
 
   /** Return a newly created empty TermState */
-  public abstract TermState newTermState() throws IOException;
+  public abstract PrefixCodedTermState newTermState() throws IOException;
 
-  public abstract void readTerm(IndexInput termsIn, FieldInfo fieldInfo, TermState state, boolean isIndexTerm) throws IOException;
+  public abstract void readTerm(IndexInput termsIn, FieldInfo fieldInfo, PrefixCodedTermState state, boolean isIndexTerm) throws IOException;
 
   /** Must fully consume state, since after this call that
    *  TermState may be reused. */
-  public abstract DocsEnum docs(FieldInfo fieldInfo, TermState state, Bits skipDocs, DocsEnum reuse) throws IOException;
+  public abstract DocsEnum docs(FieldInfo fieldInfo, PrefixCodedTermState state, Bits skipDocs, DocsEnum reuse) throws IOException;
 
   /** Must fully consume state, since after this call that
    *  TermState may be reused. */
-  public abstract DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, TermState state, Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException;
+  public abstract DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, PrefixCodedTermState state, Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException;
 
   public abstract void close() throws IOException;
 }

Added: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermState.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermState.java?rev=1058328&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermState.java (added)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermState.java Wed Jan 12 21:38:51 2011
@@ -0,0 +1,45 @@
+package org.apache.lucene.index.codecs;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.index.OrdTermState;
+import org.apache.lucene.index.TermState;
+
+/**
+ * Holds all state required for {@link PostingsReaderBase}
+ * to produce a {@link DocsEnum} without re-seeking the
+ * terms dict.
+ */
+public class PrefixCodedTermState extends OrdTermState {
+  public int docFreq; // how many docs have this term
+  public long filePointer; // fp into the terms dict primary file (_X.tis)
+
+  @Override
+  public void copyFrom(TermState _other) {
+    assert _other instanceof PrefixCodedTermState : "can not copy from " + _other.getClass().getName();
+    PrefixCodedTermState other = (PrefixCodedTermState) _other;
+    super.copyFrom(_other);
+    filePointer = other.filePointer;
+    docFreq = other.docFreq;
+  }
+
+  @Override
+  public String toString() {
+    return super.toString() + "[ord=" + ord + ", tis.filePointer=" + filePointer + "]";
+  }
+  
+}

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermsReader.java?rev=1058328&r1=1058327&r2=1058328&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermsReader.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermsReader.java Wed Jan 12 21:38:51 2011
@@ -31,6 +31,7 @@ import org.apache.lucene.index.FieldInfo
 import org.apache.lucene.index.FieldsEnum;
 import org.apache.lucene.index.IndexFileNames;
 import org.apache.lucene.index.SegmentInfo;
+import org.apache.lucene.index.TermState;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.store.Directory;
@@ -68,7 +69,7 @@ public class PrefixCodedTermsReader exte
   private final Comparator<BytesRef> termComp;
 
   // Caches the most recently looked-up field + terms:
-  private final DoubleBarrelLRUCache<FieldAndTerm,TermState> termsCache;
+  private final DoubleBarrelLRUCache<FieldAndTerm,PrefixCodedTermState> termsCache;
 
   // Reads the terms index
   private TermsIndexReaderBase indexReader;
@@ -84,11 +85,6 @@ public class PrefixCodedTermsReader exte
     public FieldAndTerm() {
     }
 
-    public FieldAndTerm(String field, BytesRef term) {
-      this.field = field;
-      this.term = new BytesRef(term);
-    }
-
     public FieldAndTerm(FieldAndTerm other) {
       field = other.field;
       term = new BytesRef(other.term);
@@ -116,7 +112,7 @@ public class PrefixCodedTermsReader exte
     throws IOException {
     
     this.postingsReader = postingsReader;
-    termsCache = new DoubleBarrelLRUCache<FieldAndTerm,TermState>(termsCacheSize);
+    termsCache = new DoubleBarrelLRUCache<FieldAndTerm,PrefixCodedTermState>(termsCacheSize);
 
     this.termComp = termComp;
     
@@ -278,10 +274,10 @@ public class PrefixCodedTermsReader exte
     }
 
     // Iterates through terms in this field, not supporting ord()
-    private class SegmentTermsEnum extends TermsEnum {
+    private final class SegmentTermsEnum extends TermsEnum {
       private final IndexInput in;
       private final DeltaBytesReader bytesReader;
-      private final TermState state;
+      private final PrefixCodedTermState state;
       private boolean seekPending;
       private final FieldAndTerm fieldTerm = new FieldAndTerm();
       private final TermsIndexReaderBase.FieldIndexEnum indexEnum;
@@ -307,14 +303,6 @@ public class PrefixCodedTermsReader exte
         return termComp;
       }
 
-      @Override
-      public void cacheCurrentTerm() {
-        TermState stateCopy = (TermState) state.clone();
-        stateCopy.filePointer = in.getFilePointer();
-        termsCache.put(new FieldAndTerm(fieldInfo.name, bytesReader.term),
-                       stateCopy);
-      }
-
       // called only from assert
       private boolean first;
       private int indexTermCount;
@@ -342,7 +330,7 @@ public class PrefixCodedTermsReader exte
        *  is found, SeekStatus.NOT_FOUND if a different term
        *  was found, SeekStatus.END if we hit EOF */
       @Override
-      public SeekStatus seek(BytesRef term, boolean useCache) throws IOException {
+      public SeekStatus seek(final BytesRef term, final boolean useCache) throws IOException {
 
         if (indexEnum == null) {
           throw new IllegalStateException("terms index was not loaded");
@@ -357,9 +345,8 @@ public class PrefixCodedTermsReader exte
           cachedState = termsCache.get(fieldTerm);
           if (cachedState != null) {
             state.copyFrom(cachedState);
-            seekPending = true;
+            setTermState(term, state);
             positioned = false;
-            bytesReader.term.copy(term);
             //System.out.println("  cached!");
             return SeekStatus.FOUND;
           }
@@ -439,12 +426,7 @@ public class PrefixCodedTermsReader exte
           if (cmp == 0) {
             // Done!
             if (useCache) {
-              // Store in cache
-              FieldAndTerm entryKey = new FieldAndTerm(fieldTerm);
-              cachedState = (TermState) state.clone();
-              // this is fp after current term
-              cachedState.filePointer = in.getFilePointer();
-              termsCache.put(entryKey, cachedState);
+              cacheTerm(fieldTerm);
             }
 
             return SeekStatus.FOUND;
@@ -464,6 +446,23 @@ public class PrefixCodedTermsReader exte
         return SeekStatus.END;
       }
 
+      private final void setTermState(BytesRef term, final TermState termState) {
+        assert termState != null && termState instanceof PrefixCodedTermState;
+        state.copyFrom(termState);
+        seekPending = true;
+        bytesReader.term.copy(term);
+      }
+
+      private final void cacheTerm(FieldAndTerm other) {
+        // Store in cache
+        final FieldAndTerm entryKey = new FieldAndTerm(other);
+        final PrefixCodedTermState cachedState = (PrefixCodedTermState) state.clone();
+        // this is fp after current term
+        cachedState.filePointer = in.getFilePointer();
+        termsCache.put(entryKey, cachedState);
+      }
+      
+
       @Override
       public BytesRef term() {
         return bytesReader.term;
@@ -498,7 +497,9 @@ public class PrefixCodedTermsReader exte
         postingsReader.readTerm(in,
                                 fieldInfo, state,
                                 isIndexTerm);
-        state.ord++;
+        if (doOrd) {
+          state.ord++;
+        }
         positioned = true;
 
         //System.out.println("te.next term=" + bytesReader.term.utf8ToString());
@@ -512,7 +513,7 @@ public class PrefixCodedTermsReader exte
 
       @Override
       public DocsEnum docs(Bits skipDocs, DocsEnum reuse) throws IOException {
-        DocsEnum docsEnum = postingsReader.docs(fieldInfo, state, skipDocs, reuse);
+        final DocsEnum docsEnum = postingsReader.docs(fieldInfo, state, skipDocs, reuse);
         assert docsEnum != null;
         return docsEnum;
       }
@@ -527,6 +528,23 @@ public class PrefixCodedTermsReader exte
       }
 
       @Override
+      public SeekStatus seek(BytesRef term, TermState otherState) throws IOException {
+        assert otherState != null && otherState instanceof PrefixCodedTermState;
+        assert otherState.getClass() == this.state.getClass() : "Illegal TermState type " + otherState.getClass();
+        assert ((PrefixCodedTermState)otherState).ord < numTerms;
+        setTermState(term, otherState);
+        positioned = false;
+        return SeekStatus.FOUND;
+      }
+      
+      @Override
+      public TermState termState() throws IOException {
+        final PrefixCodedTermState newTermState = (PrefixCodedTermState) state.clone();
+        newTermState.filePointer = in.getFilePointer();
+        return newTermState;
+      }
+
+      @Override
       public SeekStatus seek(long ord) throws IOException {
 
         if (indexEnum == null) {
@@ -562,7 +580,6 @@ public class PrefixCodedTermsReader exte
         return SeekStatus.FOUND;
       }
 
-      @Override
       public long ord() {
         if (!doOrd) {
           throw new UnsupportedOperationException();

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java?rev=1058328&r1=1058327&r2=1058328&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java Wed Jan 12 21:38:51 2011
@@ -33,6 +33,7 @@ import org.apache.lucene.index.FieldsEnu
 import org.apache.lucene.index.IndexFileNames;
 import org.apache.lucene.index.SegmentInfo;
 import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermState;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.index.CompoundFileReader;
@@ -742,11 +743,6 @@ public class PreFlexFields extends Field
     }
 
     @Override
-    public void cacheCurrentTerm() throws IOException {
-      getTermsDict().cacheCurrentTerm(termEnum);
-    }
-
-    @Override
     public SeekStatus seek(long ord) throws IOException {
       throw new UnsupportedOperationException();
     }

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsReaderImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsReaderImpl.java?rev=1058328&r1=1058327&r2=1058328&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsReaderImpl.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsReaderImpl.java Wed Jan 12 21:38:51 2011
@@ -22,8 +22,9 @@ import java.io.IOException;
 import org.apache.lucene.index.DocsAndPositionsEnum;
 import org.apache.lucene.index.DocsEnum;
 import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.TermState;
 import org.apache.lucene.index.codecs.PostingsReaderBase;
-import org.apache.lucene.index.codecs.TermState;
+import org.apache.lucene.index.codecs.PrefixCodedTermState;
 import org.apache.lucene.store.ByteArrayDataInput;
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.util.ArrayUtil;
@@ -56,10 +57,10 @@ public class PulsingPostingsReaderImpl e
     wrappedPostingsReader.init(termsIn);
   }
 
-  private static class PulsingTermState extends TermState {
+  private static class PulsingTermState extends PrefixCodedTermState {
     private byte[] postings;
     private int postingsSize;                     // -1 if this term was not inlined
-    private TermState wrappedTermState;
+    private PrefixCodedTermState wrappedTermState;
     private boolean pendingIndexTerm;
 
     @Override
@@ -71,7 +72,7 @@ public class PulsingPostingsReaderImpl e
         System.arraycopy(postings, 0, clone.postings, 0, postingsSize);
       } else {
         assert wrappedTermState != null;
-        clone.wrappedTermState = (TermState) wrappedTermState.clone();
+        clone.wrappedTermState = (PrefixCodedTermState) wrappedTermState.clone();
       }
       return clone;
     }
@@ -102,15 +103,14 @@ public class PulsingPostingsReaderImpl e
   }
 
   @Override
-  public TermState newTermState() throws IOException {
+  public PrefixCodedTermState newTermState() throws IOException {
     PulsingTermState state = new PulsingTermState();
     state.wrappedTermState = wrappedPostingsReader.newTermState();
     return state;
   }
 
   @Override
-  public void readTerm(IndexInput termsIn, FieldInfo fieldInfo, TermState _termState, boolean isIndexTerm) throws IOException {
-
+  public void readTerm(IndexInput termsIn, FieldInfo fieldInfo, PrefixCodedTermState _termState, boolean isIndexTerm) throws IOException {
     PulsingTermState termState = (PulsingTermState) _termState;
 
     termState.pendingIndexTerm |= isIndexTerm;
@@ -137,7 +137,7 @@ public class PulsingPostingsReaderImpl e
   // TODO: we could actually reuse, by having TL that
   // holds the last wrapped reuse, and vice-versa
   @Override
-  public DocsEnum docs(FieldInfo field, TermState _termState, Bits skipDocs, DocsEnum reuse) throws IOException {
+  public DocsEnum docs(FieldInfo field, PrefixCodedTermState _termState, Bits skipDocs, DocsEnum reuse) throws IOException {
     PulsingTermState termState = (PulsingTermState) _termState;
     if (termState.postingsSize != -1) {
       PulsingDocsEnum postings;
@@ -162,7 +162,7 @@ public class PulsingPostingsReaderImpl e
 
   // TODO: -- not great that we can't always reuse
   @Override
-  public DocsAndPositionsEnum docsAndPositions(FieldInfo field, TermState _termState, Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException {
+  public DocsAndPositionsEnum docsAndPositions(FieldInfo field, PrefixCodedTermState _termState, Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException {
     if (field.omitTermFreqAndPositions) {
       return null;
     }

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java?rev=1058328&r1=1058327&r2=1058328&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java Wed Jan 12 21:38:51 2011
@@ -25,8 +25,9 @@ import org.apache.lucene.index.DocsAndPo
 import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.IndexFileNames;
 import org.apache.lucene.index.SegmentInfo;
+import org.apache.lucene.index.TermState;
 import org.apache.lucene.index.codecs.PostingsReaderBase;
-import org.apache.lucene.index.codecs.TermState;
+import org.apache.lucene.index.codecs.PrefixCodedTermState;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.util.Bits;
@@ -129,12 +130,13 @@ public class SepPostingsReaderImpl exten
     }
   }
 
-  private static class SepTermState extends TermState {
+  private static final class SepTermState extends PrefixCodedTermState {
     // We store only the seek point to the docs file because
     // the rest of the info (freqIndex, posIndex, etc.) is
     // stored in the docs file:
     IntIndexInput.Index docIndex;
-
+    
+    @Override
     public Object clone() {
       SepTermState other = (SepTermState) super.clone();
       other.docIndex = (IntIndexInput.Index) docIndex.clone();
@@ -154,19 +156,19 @@ public class SepPostingsReaderImpl exten
   }
 
   @Override
-  public TermState newTermState() throws IOException {
+  public PrefixCodedTermState newTermState() throws IOException {
     final SepTermState state =  new SepTermState();
     state.docIndex = docIn.index();
     return state;
   }
 
   @Override
-  public void readTerm(IndexInput termsIn, FieldInfo fieldInfo, TermState termState, boolean isIndexTerm) throws IOException {
+  public void readTerm(IndexInput termsIn, FieldInfo fieldInfo, PrefixCodedTermState termState, boolean isIndexTerm) throws IOException {
     ((SepTermState) termState).docIndex.read(termsIn, isIndexTerm);
   }
 
   @Override
-  public DocsEnum docs(FieldInfo fieldInfo, TermState _termState, Bits skipDocs, DocsEnum reuse) throws IOException {
+  public DocsEnum docs(FieldInfo fieldInfo, PrefixCodedTermState _termState, Bits skipDocs, DocsEnum reuse) throws IOException {
     final SepTermState termState = (SepTermState) _termState;
     SepDocsEnum docsEnum;
     if (reuse == null || !(reuse instanceof SepDocsEnum)) {
@@ -185,7 +187,7 @@ public class SepPostingsReaderImpl exten
   }
 
   @Override
-  public DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, TermState _termState, Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException {
+  public DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, PrefixCodedTermState _termState, Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException {
     assert !fieldInfo.omitTermFreqAndPositions;
     final SepTermState termState = (SepTermState) _termState;
     SepDocsAndPositionsEnum postingsEnum;

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java?rev=1058328&r1=1058327&r2=1058328&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java Wed Jan 12 21:38:51 2011
@@ -21,6 +21,7 @@ import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.index.codecs.FieldsProducer;
 import org.apache.lucene.index.SegmentReadState;
 import org.apache.lucene.index.FieldsEnum;
+import org.apache.lucene.index.TermState;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.DocsEnum;
 import org.apache.lucene.index.DocsAndPositionsEnum;
@@ -152,10 +153,6 @@ class SimpleTextFieldsReader extends Fie
     }
 
     @Override
-    public void cacheCurrentTerm() {
-    }
-
-    @Override
     public BytesRef next() throws IOException {
       assert !ended;
       final BytesRefFSTEnum.InputOutput<PairOutputs.Pair<Long,Long>> result = fstEnum.next();
@@ -214,7 +211,7 @@ class SimpleTextFieldsReader extends Fie
       } 
       return docsAndPositionsEnum.reset(docsStart, skipDocs);
     }
-
+    
     @Override
     public Comparator<BytesRef> getComparator() {
       return BytesRef.getUTF8SortedAsUnicodeComparator();
@@ -439,7 +436,6 @@ class SimpleTextFieldsReader extends Fie
   }
 
   private class SimpleTextTerms extends Terms {
-    private final String field;
     private final long termsStart;
     private final boolean omitTF;
     private FST<PairOutputs.Pair<Long,Long>> fst;
@@ -447,7 +443,6 @@ class SimpleTextFieldsReader extends Fie
     private final BytesRef scratch = new BytesRef(10);
 
     public SimpleTextTerms(String field, long termsStart) throws IOException {
-      this.field = StringHelper.intern(field);
       this.termsStart = termsStart;
       omitTF = fieldInfos.fieldInfo(field).omitTermFreqAndPositions;
       loadTerms();

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReader.java?rev=1058328&r1=1058327&r2=1058328&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReader.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReader.java Wed Jan 12 21:38:51 2011
@@ -26,8 +26,9 @@ import org.apache.lucene.index.FieldInfo
 import org.apache.lucene.index.DocsEnum;
 import org.apache.lucene.index.DocsAndPositionsEnum;
 import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.TermState;
 import org.apache.lucene.index.codecs.PostingsReaderBase;
-import org.apache.lucene.index.codecs.TermState;
+import org.apache.lucene.index.codecs.PrefixCodedTermState;
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
@@ -83,20 +84,20 @@ public class StandardPostingsReader exte
   }
 
   // Must keep final because we do non-standard clone
-  private final static class DocTermState extends TermState {
+  private final static class StandardTermState extends PrefixCodedTermState {
     long freqOffset;
     long proxOffset;
     int skipOffset;
 
     public Object clone() {
-      DocTermState other = new DocTermState();
+      StandardTermState other = new StandardTermState();
       other.copyFrom(this);
       return other;
     }
 
     public void copyFrom(TermState _other) {
       super.copyFrom(_other);
-      DocTermState other = (DocTermState) _other;
+      StandardTermState other = (StandardTermState) _other;
       freqOffset = other.freqOffset;
       proxOffset = other.proxOffset;
       skipOffset = other.skipOffset;
@@ -108,8 +109,8 @@ public class StandardPostingsReader exte
   }
 
   @Override
-  public TermState newTermState() {
-    return new DocTermState();
+  public PrefixCodedTermState newTermState() {
+    return new StandardTermState();
   }
 
   @Override
@@ -126,10 +127,9 @@ public class StandardPostingsReader exte
   }
 
   @Override
-  public void readTerm(IndexInput termsIn, FieldInfo fieldInfo, TermState termState, boolean isIndexTerm)
+  public void readTerm(IndexInput termsIn, FieldInfo fieldInfo, PrefixCodedTermState termState, boolean isIndexTerm)
     throws IOException {
-
-    final DocTermState docTermState = (DocTermState) termState;
+    final StandardTermState docTermState = (StandardTermState) termState;
 
     if (isIndexTerm) {
       docTermState.freqOffset = termsIn.readVLong();
@@ -153,7 +153,7 @@ public class StandardPostingsReader exte
   }
     
   @Override
-  public DocsEnum docs(FieldInfo fieldInfo, TermState termState, Bits skipDocs, DocsEnum reuse) throws IOException {
+  public DocsEnum docs(FieldInfo fieldInfo, PrefixCodedTermState termState, Bits skipDocs, DocsEnum reuse) throws IOException {
     SegmentDocsEnum docsEnum;
     if (reuse == null || !(reuse instanceof SegmentDocsEnum)) {
       docsEnum = new SegmentDocsEnum(freqIn);
@@ -166,11 +166,11 @@ public class StandardPostingsReader exte
         docsEnum = new SegmentDocsEnum(freqIn);
       }
     }
-    return docsEnum.reset(fieldInfo, (DocTermState) termState, skipDocs);
+    return docsEnum.reset(fieldInfo, (StandardTermState) termState, skipDocs);
   }
 
   @Override
-  public DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, TermState termState, Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException {
+  public DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, PrefixCodedTermState termState, Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException {
     if (fieldInfo.omitTermFreqAndPositions) {
       return null;
     }
@@ -189,7 +189,7 @@ public class StandardPostingsReader exte
           docsEnum = new SegmentDocsAndPositionsAndPayloadsEnum(freqIn, proxIn);
         }
       }
-      return docsEnum.reset(fieldInfo, (DocTermState) termState, skipDocs);
+      return docsEnum.reset(fieldInfo, (StandardTermState) termState, skipDocs);
     } else {
       SegmentDocsAndPositionsEnum docsEnum;
       if (reuse == null || !(reuse instanceof SegmentDocsAndPositionsEnum)) {
@@ -203,7 +203,7 @@ public class StandardPostingsReader exte
           docsEnum = new SegmentDocsAndPositionsEnum(freqIn, proxIn);
         }
       }
-      return docsEnum.reset(fieldInfo, (DocTermState) termState, skipDocs);
+      return docsEnum.reset(fieldInfo, (StandardTermState) termState, skipDocs);
     }
   }
 
@@ -233,7 +233,7 @@ public class StandardPostingsReader exte
       this.freqIn = (IndexInput) freqIn.clone();
     }
 
-    public SegmentDocsEnum reset(FieldInfo fieldInfo, DocTermState termState, Bits skipDocs) throws IOException {
+    public SegmentDocsEnum reset(FieldInfo fieldInfo, StandardTermState termState, Bits skipDocs) throws IOException {
       omitTF = fieldInfo.omitTermFreqAndPositions;
       if (omitTF) {
         freq = 1;
@@ -407,7 +407,7 @@ public class StandardPostingsReader exte
       this.proxIn = (IndexInput) proxIn.clone();
     }
 
-    public SegmentDocsAndPositionsEnum reset(FieldInfo fieldInfo, DocTermState termState, Bits skipDocs) throws IOException {
+    public SegmentDocsAndPositionsEnum reset(FieldInfo fieldInfo, StandardTermState termState, Bits skipDocs) throws IOException {
       assert !fieldInfo.omitTermFreqAndPositions;
       assert !fieldInfo.storePayloads;
 
@@ -594,7 +594,7 @@ public class StandardPostingsReader exte
       this.proxIn = (IndexInput) proxIn.clone();
     }
 
-    public SegmentDocsAndPositionsAndPayloadsEnum reset(FieldInfo fieldInfo, DocTermState termState, Bits skipDocs) throws IOException {
+    public SegmentDocsAndPositionsAndPayloadsEnum reset(FieldInfo fieldInfo, StandardTermState termState, Bits skipDocs) throws IOException {
       assert !fieldInfo.omitTermFreqAndPositions;
       assert fieldInfo.storePayloads;
       if (payload == null) {

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/ConstantScoreAutoRewrite.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/ConstantScoreAutoRewrite.java?rev=1058328&r1=1058327&r2=1058328&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/ConstantScoreAutoRewrite.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/ConstantScoreAutoRewrite.java Wed Jan 12 21:38:51 2011
@@ -21,9 +21,15 @@ import java.io.IOException;
 
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermState;
 import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.ByteBlockPool;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.BytesRefHash;
+import org.apache.lucene.util.PerReaderTermState;
+import org.apache.lucene.util.RamUsageEstimator;
+import org.apache.lucene.util.BytesRefHash.DirectBytesStartArray;
 
 class ConstantScoreAutoRewrite extends TermCollectingRewrite<BooleanQuery> {
 
@@ -71,8 +77,8 @@ class ConstantScoreAutoRewrite extends T
   }
   
   @Override
-  protected void addClause(BooleanQuery topLevel, Term term, int docFreq, float boost /*ignored*/) {
-    topLevel.add(new TermQuery(term, docFreq), BooleanClause.Occur.SHOULD);
+  protected void addClause(BooleanQuery topLevel, Term term, int docFreq, float boost /*ignored*/, PerReaderTermState states) {
+    topLevel.add(new TermQuery(term, states), BooleanClause.Occur.SHOULD);
   }
 
   @Override
@@ -98,9 +104,10 @@ class ConstantScoreAutoRewrite extends T
       final BytesRefHash pendingTerms = col.pendingTerms;
       final int sort[] = pendingTerms.sort(col.termsEnum.getComparator());
       for(int i = 0; i < size; i++) {
+        final int pos = sort[i];
         // docFreq is not used for constant score here, we pass 1
         // to explicitely set a fake value, so it's not calculated
-        addClause(bq, placeholderTerm.createTerm(pendingTerms.get(sort[i], new BytesRef())), 1, 1.0f);
+        addClause(bq, placeholderTerm.createTerm(pendingTerms.get(pos, new BytesRef())), 1, 1.0f, col.array.termState[pos]);
       }
       // Strip scores
       final Query result = new ConstantScoreQuery(bq);
@@ -123,12 +130,21 @@ class ConstantScoreAutoRewrite extends T
       
     @Override
     public boolean collect(BytesRef bytes) throws IOException {
-      pendingTerms.add(bytes);
+      int pos = pendingTerms.add(bytes);
       docVisitCount += termsEnum.docFreq();
       if (pendingTerms.size() >= termCountLimit || docVisitCount >= docCountCutoff) {
         hasCutOff = true;
         return false;
       }
+      
+      final TermState termState = termsEnum.termState();
+      assert termState != null;
+      if (pos < 0) {
+        pos = (-pos)-1;
+        array.termState[pos].register(termState, readerContext.ord, termsEnum.docFreq());
+      } else {
+        array.termState[pos] = new PerReaderTermState(topReaderContext, termState, readerContext.ord, termsEnum.docFreq());
+      }
       return true;
     }
     
@@ -137,7 +153,8 @@ class ConstantScoreAutoRewrite extends T
     TermsEnum termsEnum;
 
     final int docCountCutoff, termCountLimit;
-    final BytesRefHash pendingTerms = new BytesRefHash();
+    final TermStateByteStart array = new TermStateByteStart(16);
+    final BytesRefHash pendingTerms = new BytesRefHash(new ByteBlockPool(new ByteBlockPool.DirectAllocator()), 16, array);
   }
 
   @Override
@@ -166,4 +183,40 @@ class ConstantScoreAutoRewrite extends T
     
     return true;
   }
+  
+  /** Special implementation of BytesStartArray that keeps parallel arrays for {@link PerReaderTermState} */
+  static final class TermStateByteStart extends DirectBytesStartArray  {
+    PerReaderTermState[] termState;
+    
+    public TermStateByteStart(int initSize) {
+      super(initSize);
+    }
+
+    @Override
+    public int[] init() {
+      final int[] ord = super.init();
+      termState = new PerReaderTermState[ArrayUtil.oversize(ord.length, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
+      assert termState.length >= ord.length;
+      return ord;
+    }
+
+    @Override
+    public int[] grow() {
+      final int[] ord = super.grow();
+      if (termState.length < ord.length) {
+        PerReaderTermState[] tmpTermState = new PerReaderTermState[ArrayUtil.oversize(ord.length, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
+        System.arraycopy(termState, 0, tmpTermState, 0, termState.length);
+        termState = tmpTermState;
+      }      
+      assert termState.length >= ord.length;
+      return ord;
+    }
+
+    @Override
+    public int[] clear() {
+     termState = null;
+     return super.clear();
+    }
+    
+  }
 }

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FilteredTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FilteredTermsEnum.java?rev=1058328&r1=1058327&r2=1058328&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FilteredTermsEnum.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FilteredTermsEnum.java Wed Jan 12 21:38:51 2011
@@ -21,6 +21,7 @@ import java.io.IOException;
 import java.util.Comparator;
 
 import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.index.TermState;
 import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.index.DocsEnum;
 import org.apache.lucene.index.DocsAndPositionsEnum;
@@ -155,12 +156,24 @@ public abstract class FilteredTermsEnum 
   public DocsAndPositionsEnum docsAndPositions(Bits bits, DocsAndPositionsEnum reuse) throws IOException {
     return tenum.docsAndPositions(bits, reuse);
   }
-
+  
+  /** This enum does not support seeking!
+   * @throws UnsupportedOperationException
+   */
   @Override
-  public void cacheCurrentTerm() throws IOException {
-    tenum.cacheCurrentTerm();
+  public SeekStatus seek(BytesRef term, TermState state) throws IOException {
+    throw new UnsupportedOperationException(getClass().getName()+" does not support seeking");
   }
-    
+  
+  /**
+   * Returns the filtered enums term state 
+   */
+  @Override
+  public TermState termState() throws IOException {
+    assert tenum != null;
+    return tenum.termState();
+  }
+
   @SuppressWarnings("fallthrough")
   @Override
   public BytesRef next() throws IOException {

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java?rev=1058328&r1=1058327&r2=1058328&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java Wed Jan 12 21:38:51 2011
@@ -20,6 +20,7 @@ package org.apache.lucene.search;
 import org.apache.lucene.index.DocsAndPositionsEnum;
 import org.apache.lucene.index.DocsEnum;
 import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermState;
 import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.util.Attribute;
 import org.apache.lucene.util.AttributeImpl;
@@ -245,11 +246,6 @@ public final class FuzzyTermsEnum extend
   }
   
   @Override
-  public void cacheCurrentTerm() throws IOException {
-    actualEnum.cacheCurrentTerm();
-  }
-
-  @Override
   public DocsEnum docs(Bits skipDocs, DocsEnum reuse) throws IOException {
     return actualEnum.docs(skipDocs, reuse);
   }
@@ -260,6 +256,15 @@ public final class FuzzyTermsEnum extend
     return actualEnum.docsAndPositions(skipDocs, reuse);
   }
   
+  public SeekStatus seek(BytesRef term, TermState state) throws IOException {
+    return actualEnum.seek(term, state);
+  }
+  
+  @Override
+  public TermState termState() throws IOException {
+    return actualEnum.termState();
+  }
+  
   @Override
   public Comparator<BytesRef> getComparator() throws IOException {
     return actualEnum.getComparator();

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/MultiTermQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/MultiTermQuery.java?rev=1058328&r1=1058327&r2=1058328&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/MultiTermQuery.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/MultiTermQuery.java Wed Jan 12 21:38:51 2011
@@ -26,6 +26,7 @@ import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.queryParser.QueryParser;
 import org.apache.lucene.util.AttributeSource;
+import org.apache.lucene.util.PerReaderTermState;
 
 /**
  * An abstract {@link Query} that matches documents
@@ -159,8 +160,8 @@ public abstract class MultiTermQuery ext
     }
     
     @Override
-    protected void addClause(BooleanQuery topLevel, Term term, int docCount, float boost) {
-      final TermQuery tq = new TermQuery(term, docCount);
+    protected void addClause(BooleanQuery topLevel, Term term, int docCount, float boost, PerReaderTermState states) {
+      final TermQuery tq = new TermQuery(term, states);
       tq.setBoost(boost);
       topLevel.add(tq, BooleanClause.Occur.SHOULD);
     }
@@ -200,8 +201,8 @@ public abstract class MultiTermQuery ext
     }
     
     @Override
-    protected void addClause(BooleanQuery topLevel, Term term, int docFreq, float boost) {
-      final Query q = new ConstantScoreQuery(new TermQuery(term, docFreq));
+    protected void addClause(BooleanQuery topLevel, Term term, int docFreq, float boost, PerReaderTermState states) {
+      final Query q = new ConstantScoreQuery(new TermQuery(term, states));
       q.setBoost(boost);
       topLevel.add(q, BooleanClause.Occur.SHOULD);
     }

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/ScoringRewrite.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/ScoringRewrite.java?rev=1058328&r1=1058327&r2=1058328&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/ScoringRewrite.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/ScoringRewrite.java Wed Jan 12 21:38:51 2011
@@ -20,6 +20,7 @@ package org.apache.lucene.search;
 import java.io.IOException;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermState;
 import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.search.MultiTermQuery.RewriteMethod;
 
@@ -27,6 +28,7 @@ import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.ByteBlockPool;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.BytesRefHash;
+import org.apache.lucene.util.PerReaderTermState;
 import org.apache.lucene.util.RamUsageEstimator;
 import org.apache.lucene.util.BytesRefHash.DirectBytesStartArray;
 
@@ -53,8 +55,9 @@ public abstract class ScoringRewrite<Q e
     }
     
     @Override
-    protected void addClause(BooleanQuery topLevel, Term term, int docCount, float boost) {
-      final TermQuery tq = new TermQuery(term, docCount);
+    protected void addClause(BooleanQuery topLevel, Term term, int docCount,
+        float boost, PerReaderTermState states) {
+      final TermQuery tq = new TermQuery(term, states);
       tq.setBoost(boost);
       topLevel.add(tq, BooleanClause.Occur.SHOULD);
     }
@@ -114,13 +117,13 @@ public abstract class ScoringRewrite<Q e
     final int size = col.terms.size();
     if (size > 0) {
       final int sort[] = col.terms.sort(col.termsEnum.getComparator());
-      final int[] docFreq = col.array.docFreq;
       final float[] boost = col.array.boost;
+      final PerReaderTermState[] termStates = col.array.termState;
       for (int i = 0; i < size; i++) {
         final int pos = sort[i];
         final Term term = placeholderTerm.createTerm(col.terms.get(pos, new BytesRef()));
-        assert reader.docFreq(term) == docFreq[pos];
-        addClause(result, term, docFreq[pos], query.getBoost() * boost[pos]);
+        assert reader.docFreq(term) == termStates[pos].docFreq();
+        addClause(result, term, termStates[pos].docFreq(), query.getBoost() * boost[pos], termStates[pos]);
       }
     }
     query.incTotalNumberOfTerms(size);
@@ -143,15 +146,17 @@ public abstract class ScoringRewrite<Q e
     @Override
     public boolean collect(BytesRef bytes) throws IOException {
       final int e = terms.add(bytes);
+      final TermState state = termsEnum.termState();
+      assert state != null; 
       if (e < 0 ) {
         // duplicate term: update docFreq
         final int pos = (-e)-1;
-        array.docFreq[pos] += termsEnum.docFreq();
+        array.termState[pos].register(state, readerContext.ord, termsEnum.docFreq());
         assert array.boost[pos] == boostAtt.getBoost() : "boost should be equal in all segment TermsEnums";
       } else {
         // new entry: we populate the entry initially
-        array.docFreq[e] = termsEnum.docFreq();
         array.boost[e] = boostAtt.getBoost();
+        array.termState[e] = new PerReaderTermState(topReaderContext, state, readerContext.ord, termsEnum.docFreq());
         ScoringRewrite.this.checkMaxClauseCount(terms.size());
       }
       return true;
@@ -160,8 +165,8 @@ public abstract class ScoringRewrite<Q e
   
   /** Special implementation of BytesStartArray that keeps parallel arrays for boost and docFreq */
   static final class TermFreqBoostByteStart extends DirectBytesStartArray  {
-    int[] docFreq;
     float[] boost;
+    PerReaderTermState[] termState;
     
     public TermFreqBoostByteStart(int initSize) {
       super(initSize);
@@ -171,24 +176,28 @@ public abstract class ScoringRewrite<Q e
     public int[] init() {
       final int[] ord = super.init();
       boost = new float[ArrayUtil.oversize(ord.length, RamUsageEstimator.NUM_BYTES_FLOAT)];
-      docFreq = new int[ArrayUtil.oversize(ord.length, RamUsageEstimator.NUM_BYTES_INT)];
-      assert boost.length >= ord.length && docFreq.length >= ord.length;
+      termState = new PerReaderTermState[ArrayUtil.oversize(ord.length, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
+      assert termState.length >= ord.length && boost.length >= ord.length;
       return ord;
     }
 
     @Override
     public int[] grow() {
       final int[] ord = super.grow();
-      docFreq = ArrayUtil.grow(docFreq, ord.length);
       boost = ArrayUtil.grow(boost, ord.length);
-      assert boost.length >= ord.length && docFreq.length >= ord.length;
+      if (termState.length < ord.length) {
+        PerReaderTermState[] tmpTermState = new PerReaderTermState[ArrayUtil.oversize(ord.length, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
+        System.arraycopy(termState, 0, tmpTermState, 0, termState.length);
+        termState = tmpTermState;
+      }     
+      assert termState.length >= ord.length && boost.length >= ord.length;
       return ord;
     }
 
     @Override
     public int[] clear() {
      boost = null;
-     docFreq = null;
+     termState = null;
      return super.clear();
     }
     

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/TermCollectingRewrite.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/TermCollectingRewrite.java?rev=1058328&r1=1058327&r2=1058328&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/TermCollectingRewrite.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/TermCollectingRewrite.java Wed Jan 12 21:38:51 2011
@@ -18,8 +18,6 @@ package org.apache.lucene.search;
  */
 
 import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
 import java.util.Comparator;
 
 import org.apache.lucene.index.Fields;
@@ -27,25 +25,33 @@ import org.apache.lucene.index.IndexRead
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.index.IndexReader.AtomicReaderContext;
+import org.apache.lucene.index.IndexReader.ReaderContext;
 import org.apache.lucene.util.AttributeSource;
 import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.PerReaderTermState;
 import org.apache.lucene.util.ReaderUtil;
 
 abstract class TermCollectingRewrite<Q extends Query> extends MultiTermQuery.RewriteMethod {
   
+  
   /** Return a suitable top-level Query for holding all expanded terms. */
   protected abstract Q getTopLevelQuery() throws IOException;
   
   /** Add a MultiTermQuery term to the top-level query */
-  protected abstract void addClause(Q topLevel, Term term, int docCount, float boost) throws IOException;
+  protected final void addClause(Q topLevel, Term term, int docCount, float boost) throws IOException {
+    addClause(topLevel, term, docCount, boost, null);
+  }
+  
+  protected abstract void addClause(Q topLevel, Term term, int docCount, float boost, PerReaderTermState states) throws IOException;
+
   
   protected final void collectTerms(IndexReader reader, MultiTermQuery query, TermCollector collector) throws IOException {
-    final List<IndexReader> subReaders = new ArrayList<IndexReader>();
-    ReaderUtil.gatherSubReaders(subReaders, reader);
+    ReaderContext topReaderContext = reader.getTopReaderContext();
     Comparator<BytesRef> lastTermComp = null;
-    
-    for (IndexReader r : subReaders) {
-      final Fields fields = r.fields();
+    final AtomicReaderContext[] leaves = ReaderUtil.leaves(topReaderContext);
+    for (AtomicReaderContext context : leaves) {
+      final Fields fields = context.reader.fields();
       if (fields == null) {
         // reader has no fields
         continue;
@@ -68,11 +74,10 @@ abstract class TermCollectingRewrite<Q e
       if (lastTermComp != null && newTermComp != null && newTermComp != lastTermComp)
         throw new RuntimeException("term comparator should not change between segments: "+lastTermComp+" != "+newTermComp);
       lastTermComp = newTermComp;
-      
+      collector.setReaderContext(topReaderContext, context);
       collector.setNextEnum(termsEnum);
       BytesRef bytes;
       while ((bytes = termsEnum.next()) != null) {
-        termsEnum.cacheCurrentTerm();
         if (!collector.collect(bytes))
           return; // interrupt whole term collection, so also don't iterate other subReaders
       }
@@ -80,6 +85,14 @@ abstract class TermCollectingRewrite<Q e
   }
   
   protected static abstract class TermCollector {
+    
+    protected AtomicReaderContext readerContext;
+    protected ReaderContext topReaderContext;
+
+    public void setReaderContext(ReaderContext topReaderContext, AtomicReaderContext readerContext) {
+      this.readerContext = readerContext;
+      this.topReaderContext = topReaderContext;
+    }
     /** attributes used for communication with the enum */
     public final AttributeSource attributes = new AttributeSource();
   

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/TermQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/TermQuery.java?rev=1058328&r1=1058327&r2=1058328&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/TermQuery.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/TermQuery.java Wed Jan 12 21:38:51 2011
@@ -22,10 +22,14 @@ import java.util.Set;
 
 import org.apache.lucene.index.DocsEnum;
 import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.TermState;
+import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.IndexReader.AtomicReaderContext;
 import org.apache.lucene.index.IndexReader.ReaderContext;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.search.Explanation.IDFExplanation;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.PerReaderTermState;
 import org.apache.lucene.util.ToStringUtils;
 
 /** A Query that matches documents containing a term.
@@ -33,20 +37,22 @@ import org.apache.lucene.util.ToStringUt
   */
 public class TermQuery extends Query {
   private final Term term;
-  private final int docFreq;
+  private int docFreq;
+  private transient PerReaderTermState perReaderTermState;
 
   private class TermWeight extends Weight {
     private final Similarity similarity;
     private float value;
-    private float idf;
+    private final float idf;
     private float queryNorm;
     private float queryWeight;
-    private IDFExplanation idfExp;
-    private transient ReaderContext weightContext; // only set if -ea for assert in scorer()
+    private final IDFExplanation idfExp;
+    private transient PerReaderTermState termStates;
 
-    public TermWeight(IndexSearcher searcher)
+    public TermWeight(IndexSearcher searcher, PerReaderTermState termStates, int docFreq)
       throws IOException {
-      assert setWeightContext(searcher);
+      assert termStates != null : "PerReaderTermState must not be null";
+      this.termStates = termStates;
       this.similarity = getSimilarity(searcher);
       if (docFreq != -1) {
         idfExp = similarity.idfExplain(term, searcher, docFreq);
@@ -80,31 +86,34 @@ public class TermQuery extends Query {
 
     @Override
     public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder, boolean topScorer) throws IOException {
-      assert assertTopReaderContext(context);
+      final String field = term.field();
       final IndexReader reader = context.reader;
-      DocsEnum docs = reader.termDocsEnum(reader.getDeletedDocs(),
-                                          term.field(),
-                                          term.bytes());
-
-      if (docs == null) {
+      assert assertTopReaderContext(termStates, context) : "The top-reader used to create Weight is not the same as the current reader's top-reader";
+      final TermState state = termStates
+          .get(context.ord);
+      if (state == null) { // term is not present in that reader
+        assert termNotInReader(reader, field, term.bytes()) : "no termstate found but term exists in reader";
         return null;
       }
-
-      return new TermScorer(this, docs, similarity, reader.norms(term.field()));
+      final DocsEnum docs = reader.termDocsEnum(reader.getDeletedDocs(), field, term.bytes(), state);
+      assert docs != null;
+      return new TermScorer(this, docs, similarity, context.reader.norms(field));
+    }
+    
+    private boolean termNotInReader(IndexReader reader, String field, BytesRef bytes) throws IOException {
+      // only called from assert
+      final Terms terms = reader.terms(field);
+      return terms == null || terms.docFreq(bytes) == 0;
     }
     
-    private boolean assertTopReaderContext(ReaderContext context) {
-      while (context.parent != null) {
+    private boolean assertTopReaderContext(PerReaderTermState state, ReaderContext context) {
+      while(context.parent != null) {
         context = context.parent;
       }
-      return weightContext == context;
+      return state.topReaderContext == context;
     }
     
-    private boolean setWeightContext(IndexSearcher searcher) {
-      weightContext = searcher.getTopReaderContext();
-      return true;
-    }
-
+   
     @Override
     public Explanation explain(AtomicReaderContext context, int doc)
       throws IOException {
@@ -157,7 +166,7 @@ public class TermQuery extends Query {
       fieldExpl.addDetail(expl);
 
       Explanation fieldNormExpl = new Explanation();
-      byte[] fieldNorms = reader.norms(field);
+      final byte[] fieldNorms = reader.norms(field);
       float fieldNorm =
         fieldNorms!=null ? similarity.decodeNormValue(fieldNorms[doc]) : 1.0f;
       fieldNormExpl.setValue(fieldNorm);
@@ -193,6 +202,17 @@ public class TermQuery extends Query {
   public TermQuery(Term t, int docFreq) {
     term = t;
     this.docFreq = docFreq;
+    perReaderTermState = null;
+  }
+  
+  /** Expert: constructs a TermQuery that will use the
+   *  provided docFreq instead of looking up the docFreq
+   *  against the searcher. */
+  public TermQuery(Term t, PerReaderTermState states) {
+    assert states != null;
+    term = t;
+    docFreq = states.docFreq();
+    perReaderTermState = states;
   }
 
   /** Returns the term of this query. */
@@ -200,7 +220,21 @@ public class TermQuery extends Query {
 
   @Override
   public Weight createWeight(IndexSearcher searcher) throws IOException {
-    return new TermWeight(searcher);
+    final ReaderContext context = searcher.getTopReaderContext();
+    final int weightDocFreq;
+    final PerReaderTermState termState;
+    if (perReaderTermState == null || perReaderTermState.topReaderContext != context) {
+      // make TermQuery single-pass if we don't have a PRTS or if the context differs!
+      termState = PerReaderTermState.build(context, term, true); // cache term lookups!
+      // we must not ignore the given docFreq - if set use the given value
+      weightDocFreq = docFreq == -1 ? termState.docFreq() : docFreq;
+    } else {
+     // PRTS was pre-build for this IS
+     termState = this.perReaderTermState;
+     weightDocFreq = docFreq;
+    }
+    
+    return new TermWeight(searcher, termState, weightDocFreq);
   }
 
   @Override

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/TopTermsRewrite.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/TopTermsRewrite.java?rev=1058328&r1=1058327&r2=1058328&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/TopTermsRewrite.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/TopTermsRewrite.java Wed Jan 12 21:38:51 2011
@@ -25,9 +25,11 @@ import java.util.Comparator;
 
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermState;
 import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.PerReaderTermState;
 
 /**
  * Base rewrite method for collecting only the top terms
@@ -78,12 +80,12 @@ public abstract class TopTermsRewrite<Q 
         this.termComp = termsEnum.getComparator();
         // lazy init the initial ScoreTerm because comparator is not known on ctor:
         if (st == null)
-          st = new ScoreTerm(this.termComp);
+          st = new ScoreTerm(this.termComp, new PerReaderTermState(topReaderContext));
         boostAtt = termsEnum.attributes().addAttribute(BoostAttribute.class);
       }
     
       @Override
-      public boolean collect(BytesRef bytes) {
+      public boolean collect(BytesRef bytes) throws IOException {
         final float boost = boostAtt.getBoost();
         // ignore uncompetetive hits
         if (stQueue.size() == maxSize) {
@@ -94,23 +96,27 @@ public abstract class TopTermsRewrite<Q 
             return true;
         }
         ScoreTerm t = visitedTerms.get(bytes);
+        final TermState state = termsEnum.termState();
+        assert state != null;
         if (t != null) {
           // if the term is already in the PQ, only update docFreq of term in PQ
-          t.docFreq += termsEnum.docFreq();
           assert t.boost == boost : "boost should be equal in all segment TermsEnums";
+          t.termState.register(state, readerContext.ord, termsEnum.docFreq());
         } else {
           // add new entry in PQ, we must clone the term, else it may get overwritten!
           st.bytes.copy(bytes);
           st.boost = boost;
-          st.docFreq = termsEnum.docFreq();
           visitedTerms.put(st.bytes, st);
+          assert st.termState.docFreq() == 0;
+          st.termState.register(state, readerContext.ord, termsEnum.docFreq());
           stQueue.offer(st);
           // possibly drop entries from queue
           if (stQueue.size() > maxSize) {
             st = stQueue.poll();
             visitedTerms.remove(st.bytes);
+            st.termState.clear(); // reset the termstate! 
           } else {
-            st = new ScoreTerm(termComp);
+            st = new ScoreTerm(termComp, new PerReaderTermState(topReaderContext));
           }
           assert stQueue.size() <= maxSize : "the PQ size must be limited to maxSize";
           // set maxBoostAtt with values to help FuzzyTermsEnum to optimize
@@ -120,6 +126,7 @@ public abstract class TopTermsRewrite<Q 
             maxBoostAtt.setCompetitiveTerm(t.bytes);
           }
         }
+       
         return true;
       }
     });
@@ -130,8 +137,8 @@ public abstract class TopTermsRewrite<Q 
     ArrayUtil.quickSort(scoreTerms, scoreTermSortByTermComp);
     for (final ScoreTerm st : scoreTerms) {
       final Term term = placeholderTerm.createTerm(st.bytes);
-      assert reader.docFreq(term) == st.docFreq;
-      addClause(q, term, st.docFreq, query.getBoost() * st.boost); // add to query
+      assert reader.docFreq(term) == st.termState.docFreq() : "reader DF is " + reader.docFreq(term) + " vs " + st.termState.docFreq();
+      addClause(q, term, st.termState.docFreq(), query.getBoost() * st.boost, st.termState); // add to query
     }
     query.incTotalNumberOfTerms(scoreTerms.length);
     return q;
@@ -147,7 +154,7 @@ public abstract class TopTermsRewrite<Q 
     if (this == obj) return true;
     if (obj == null) return false;
     if (getClass() != obj.getClass()) return false;
-    final TopTermsRewrite other = (TopTermsRewrite) obj;
+    final TopTermsRewrite<?> other = (TopTermsRewrite<?>) obj;
     if (size != other.size) return false;
     return true;
   }
@@ -163,13 +170,12 @@ public abstract class TopTermsRewrite<Q 
 
   static final class ScoreTerm implements Comparable<ScoreTerm> {
     public final Comparator<BytesRef> termComp;
-
     public final BytesRef bytes = new BytesRef();
     public float boost;
-    public int docFreq;
-    
-    public ScoreTerm(Comparator<BytesRef> termComp) {
+    public final PerReaderTermState termState;
+    public ScoreTerm(Comparator<BytesRef> termComp, PerReaderTermState termState) {
       this.termComp = termComp;
+      this.termState = termState;
     }
     
     public int compareTo(ScoreTerm other) {

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/cache/DocTermsIndexCreator.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/cache/DocTermsIndexCreator.java?rev=1058328&r1=1058327&r2=1058328&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/cache/DocTermsIndexCreator.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/cache/DocTermsIndexCreator.java Wed Jan 12 21:38:51 2011
@@ -24,8 +24,11 @@ import org.apache.lucene.index.DocsAndPo
 import org.apache.lucene.index.DocsEnum;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.MultiFields;
+import org.apache.lucene.index.OrdTermState;
+import org.apache.lucene.index.TermState;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.index.codecs.PrefixCodedTermState;
 import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.search.FieldCache.DocTermsIndex;
 import org.apache.lucene.util.ArrayUtil;
@@ -304,11 +307,6 @@ public class DocTermsIndexCreator extend
       }
 
       @Override
-      public void cacheCurrentTerm() throws IOException {
-        throw new UnsupportedOperationException();
-      }
-
-      @Override
       public BytesRef term() throws IOException {
         return term;
       }
@@ -337,6 +335,19 @@ public class DocTermsIndexCreator extend
       public Comparator<BytesRef> getComparator() throws IOException {
         return BytesRef.getUTF8SortedAsUnicodeComparator();
       }
+
+      @Override
+      public SeekStatus seek(BytesRef term, TermState state) throws IOException {
+        assert state != null && state instanceof OrdTermState;
+        return this.seek(((OrdTermState)state).ord);
+      }
+
+      @Override
+      public TermState termState() throws IOException {
+        OrdTermState state = new OrdTermState();
+        state.ord = currentOrd;
+        return state;
+      }
     }
   }
 }

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/spans/SpanMultiTermQueryWrapper.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/spans/SpanMultiTermQueryWrapper.java?rev=1058328&r1=1058327&r2=1058328&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/spans/SpanMultiTermQueryWrapper.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/spans/SpanMultiTermQueryWrapper.java Wed Jan 12 21:38:51 2011
@@ -26,6 +26,7 @@ import org.apache.lucene.search.Query;
 import org.apache.lucene.search.TopTermsRewrite;
 import org.apache.lucene.search.ScoringRewrite;
 import org.apache.lucene.search.BooleanClause.Occur; // javadocs only
+import org.apache.lucene.util.PerReaderTermState;
 
 /**
  * Wraps any {@link MultiTermQuery} as a {@link SpanQuery}, 
@@ -153,7 +154,7 @@ public class SpanMultiTermQueryWrapper<Q
       }
     
       @Override
-      protected void addClause(SpanOrQuery topLevel, Term term, int docCount, float boost) {
+      protected void addClause(SpanOrQuery topLevel, Term term, int docCount, float boost, PerReaderTermState states) {
         final SpanTermQuery q = new SpanTermQuery(term);
         q.setBoost(boost);
         topLevel.addClause(q);
@@ -202,7 +203,7 @@ public class SpanMultiTermQueryWrapper<Q
         }
 
         @Override
-        protected void addClause(SpanOrQuery topLevel, Term term, int docFreq, float boost) {
+        protected void addClause(SpanOrQuery topLevel, Term term, int docFreq, float boost, PerReaderTermState states) {
           final SpanTermQuery q = new SpanTermQuery(term);
           q.setBoost(boost);
           topLevel.addClause(q);