You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2011/01/20 19:53:58 UTC

svn commit: r1061453 [1/3] - in /lucene/dev/trunk: lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/ lucene/contrib/memory/src/java/org/apache/lucene/index/memory/ lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/app...

Author: mikemccand
Date: Thu Jan 20 18:53:55 2011
New Revision: 1061453

URL: http://svn.apache.org/viewvc?rev=1061453&view=rev
Log:
LUCENE-2872: block-encode terms in between indexed terms

Added:
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/BlockTermState.java   (with props)
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/BlockTermsReader.java   (with props)
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/BlockTermsWriter.java   (with props)
Removed:
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/DeltaBytesReader.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/DeltaBytesWriter.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermState.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermsReader.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermsWriter.java
Modified:
    lucene/dev/trunk/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermsEnum.java
    lucene/dev/trunk/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
    lucene/dev/trunk/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingCodec.java
    lucene/dev/trunk/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingTermsDictReader.java
    lucene/dev/trunk/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingTermsDictWriter.java
    lucene/dev/trunk/lucene/contrib/xml-query-parser/src/test/org/apache/lucene/xmlparser/TestParser.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/CheckIndex.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MultiTermsEnum.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/Terms.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/TermsEnum.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/FixedGapTermsIndexReader.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/FixedGapTermsIndexWriter.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/PostingsReaderBase.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/PostingsWriterBase.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/TermsIndexWriterBase.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/VariableGapTermsIndexReader.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/VariableGapTermsIndexWriter.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/intblock/FixedIntBlockIndexInput.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/intblock/FixedIntBlockIndexOutput.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/intblock/VariableIntBlockIndexInput.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingCodec.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsReaderImpl.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsWriterImpl.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/sep/IntIndexInput.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsWriterImpl.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardCodec.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReader.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsWriter.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FilteredTermsEnum.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/cache/DocTermsIndexCreator.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/store/ByteArrayDataInput.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/store/DataInput.java
    lucene/dev/trunk/lucene/src/test/org/apache/lucene/TestExternalCodecs.java
    lucene/dev/trunk/lucene/src/test/org/apache/lucene/TestSearch.java
    lucene/dev/trunk/lucene/src/test/org/apache/lucene/TestSearchForDuplicates.java
    lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestAddIndexes.java
    lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexReader.java
    lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexReaderReopen.java
    lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestMultiFields.java
    lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/codecs/mockintblock/MockFixedIntBlockCodec.java
    lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/codecs/mockintblock/MockVariableIntBlockCodec.java
    lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/codecs/mockrandom/MockRandomCodec.java
    lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/codecs/mocksep/MockSepCodec.java
    lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/codecs/mocksep/MockSingleIntIndexInput.java
    lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestFieldCache.java
    lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/automaton/fst/TestFSTs.java
    lucene/dev/trunk/solr/src/java/org/apache/solr/request/UnInvertedField.java
    lucene/dev/trunk/solr/src/java/org/apache/solr/util/HighFrequencyDictionary.java

Modified: lucene/dev/trunk/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermsEnum.java?rev=1061453&r1=1061452&r2=1061453&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermsEnum.java (original)
+++ lucene/dev/trunk/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermsEnum.java Thu Jan 20 18:53:55 2011
@@ -144,9 +144,9 @@ public class InstantiatedTermsEnum exten
   }
 
   @Override
-  public SeekStatus seek(BytesRef term, TermState state) throws IOException {
+  public void seek(BytesRef term, TermState state) throws IOException {
     assert state != null && state instanceof OrdTermState;
-    return seek(((OrdTermState)state).ord); // just use the ord for simplicity
+    seek(((OrdTermState)state).ord); // just use the ord for simplicity
   }
 }
 

Modified: lucene/dev/trunk/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java?rev=1061453&r1=1061452&r2=1061453&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java (original)
+++ lucene/dev/trunk/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java Thu Jan 20 18:53:55 2011
@@ -938,9 +938,9 @@ public class MemoryIndex implements Seri
       }
 
       @Override
-      public SeekStatus seek(BytesRef term, TermState state) throws IOException {
+      public void seek(BytesRef term, TermState state) throws IOException {
         assert state != null;
-        return this.seek(((OrdTermState)state).ord);
+        this.seek(((OrdTermState)state).ord);
       }
 
       @Override

Modified: lucene/dev/trunk/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingCodec.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingCodec.java?rev=1061453&r1=1061452&r2=1061453&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingCodec.java (original)
+++ lucene/dev/trunk/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingCodec.java Thu Jan 20 18:53:55 2011
@@ -32,7 +32,7 @@ import org.apache.lucene.index.codecs.Po
 import org.apache.lucene.index.codecs.standard.StandardPostingsReader;
 import org.apache.lucene.index.codecs.PostingsWriterBase;
 import org.apache.lucene.index.codecs.standard.StandardPostingsWriter;
-import org.apache.lucene.index.codecs.PrefixCodedTermsReader;
+import org.apache.lucene.index.codecs.BlockTermsReader;
 import org.apache.lucene.index.codecs.TermsIndexReaderBase;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.BytesRef;
@@ -131,7 +131,7 @@ public class AppendingCodec extends Code
   public void files(Directory dir, SegmentInfo segmentInfo, String codecId, Set<String> files)
           throws IOException {
     StandardPostingsReader.files(dir, segmentInfo, codecId, files);
-    PrefixCodedTermsReader.files(dir, segmentInfo, codecId, files);
+    BlockTermsReader.files(dir, segmentInfo, codecId, files);
     FixedGapTermsIndexReader.files(dir, segmentInfo, codecId, files);
   }
 

Modified: lucene/dev/trunk/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingTermsDictReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingTermsDictReader.java?rev=1061453&r1=1061452&r2=1061453&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingTermsDictReader.java (original)
+++ lucene/dev/trunk/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingTermsDictReader.java Thu Jan 20 18:53:55 2011
@@ -22,15 +22,15 @@ import java.util.Comparator;
 
 import org.apache.lucene.index.FieldInfos;
 import org.apache.lucene.index.codecs.PostingsReaderBase;
-import org.apache.lucene.index.codecs.PrefixCodedTermsReader;
-import org.apache.lucene.index.codecs.PrefixCodedTermsWriter;
+import org.apache.lucene.index.codecs.BlockTermsReader;
+import org.apache.lucene.index.codecs.BlockTermsWriter;
 import org.apache.lucene.index.codecs.TermsIndexReaderBase;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.CodecUtil;
 
-public class AppendingTermsDictReader extends PrefixCodedTermsReader {
+public class AppendingTermsDictReader extends BlockTermsReader {
 
   public AppendingTermsDictReader(TermsIndexReaderBase indexReader,
           Directory dir, FieldInfos fieldInfos, String segment,
@@ -43,7 +43,7 @@ public class AppendingTermsDictReader ex
   @Override
   protected void readHeader(IndexInput in) throws IOException {
     CodecUtil.checkHeader(in, AppendingTermsDictWriter.CODEC_NAME,
-      PrefixCodedTermsWriter.VERSION_START, PrefixCodedTermsWriter.VERSION_CURRENT);    
+      BlockTermsWriter.VERSION_START, BlockTermsWriter.VERSION_CURRENT);    
   }
 
   @Override

Modified: lucene/dev/trunk/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingTermsDictWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingTermsDictWriter.java?rev=1061453&r1=1061452&r2=1061453&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingTermsDictWriter.java (original)
+++ lucene/dev/trunk/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingTermsDictWriter.java Thu Jan 20 18:53:55 2011
@@ -22,13 +22,13 @@ import java.util.Comparator;
 
 import org.apache.lucene.index.SegmentWriteState;
 import org.apache.lucene.index.codecs.PostingsWriterBase;
-import org.apache.lucene.index.codecs.PrefixCodedTermsWriter;
+import org.apache.lucene.index.codecs.BlockTermsWriter;
 import org.apache.lucene.index.codecs.TermsIndexWriterBase;
 import org.apache.lucene.store.IndexOutput;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.CodecUtil;
 
-public class AppendingTermsDictWriter extends PrefixCodedTermsWriter {
+public class AppendingTermsDictWriter extends BlockTermsWriter {
   final static String CODEC_NAME = "APPENDING_TERMS_DICT";
 
   public AppendingTermsDictWriter(TermsIndexWriterBase indexWriter,

Modified: lucene/dev/trunk/lucene/contrib/xml-query-parser/src/test/org/apache/lucene/xmlparser/TestParser.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/xml-query-parser/src/test/org/apache/lucene/xmlparser/TestParser.java?rev=1061453&r1=1061452&r2=1061453&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/xml-query-parser/src/test/org/apache/lucene/xmlparser/TestParser.java (original)
+++ lucene/dev/trunk/lucene/contrib/xml-query-parser/src/test/org/apache/lucene/xmlparser/TestParser.java Thu Jan 20 18:53:55 2011
@@ -215,7 +215,10 @@ public class TestParser extends LuceneTe
 	}
 	private void dumpResults(String qType,Query q, int numDocs) throws IOException
 	{
-		TopDocs hits = searcher.search(q, null, numDocs);
+                if (VERBOSE) {
+                  System.out.println("TEST: query=" + q);
+                }
+                TopDocs hits = searcher.search(q, null, numDocs);
 		assertTrue(qType +" should produce results ", hits.totalHits>0);
 		if(VERBOSE)
 		{

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/CheckIndex.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/CheckIndex.java?rev=1061453&r1=1061452&r2=1061453&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/CheckIndex.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/CheckIndex.java Thu Jan 20 18:53:55 2011
@@ -802,7 +802,7 @@ public class CheckIndex {
       msg("OK [" + status.termCount + " terms; " + status.totFreq + " terms/docs pairs; " + status.totPos + " tokens]");
 
     } catch (Throwable e) {
-      msg("ERROR [" + String.valueOf(e.getMessage()) + "]");
+      msg("ERROR: " + e);
       status.error = e;
       if (infoStream != null) {
         e.printStackTrace(infoStream);

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java?rev=1061453&r1=1061452&r2=1061453&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java Thu Jan 20 18:53:55 2011
@@ -903,8 +903,7 @@ final class DocumentsWriter {
   final static int BYTE_BLOCK_NOT_MASK = ~BYTE_BLOCK_MASK;
 
   /* if you increase this, you must fix field cache impl for
-   * getTerms/getTermsIndex requires <= 32768.  Also fix
-   * DeltaBytesWriter's TERM_EOF if necessary. */
+   * getTerms/getTermsIndex requires <= 32768. */
   final static int MAX_TERM_LENGTH_UTF8 = BYTE_BLOCK_SIZE-2;
 
   /* Initial chunks size of the shared int[] blocks used to

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java?rev=1061453&r1=1061452&r2=1061453&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java Thu Jan 20 18:53:55 2011
@@ -156,12 +156,12 @@ public class FilterIndexReader extends I
     }
 
     @Override
-    public int docFreq() {
+    public int docFreq() throws IOException {
       return in.docFreq();
     }
 
     @Override
-    public long totalTermFreq() {
+    public long totalTermFreq() throws IOException {
       return in.totalTermFreq();
     }
 
@@ -181,8 +181,8 @@ public class FilterIndexReader extends I
     }
 
     @Override
-    public SeekStatus seek(BytesRef term, TermState state) throws IOException {
-      return in.seek(term, state);
+    public void seek(BytesRef term, TermState state) throws IOException {
+      in.seek(term, state);
     }
 
     @Override

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MultiTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MultiTermsEnum.java?rev=1061453&r1=1061452&r2=1061453&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MultiTermsEnum.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MultiTermsEnum.java Thu Jan 20 18:53:55 2011
@@ -257,7 +257,7 @@ public final class MultiTermsEnum extend
   }
 
   @Override
-  public int docFreq() {
+  public int docFreq() throws IOException {
     int sum = 0;
     for(int i=0;i<numTop;i++) {
       sum += top[i].terms.docFreq();
@@ -266,7 +266,7 @@ public final class MultiTermsEnum extend
   }
 
   @Override
-  public long totalTermFreq() {
+  public long totalTermFreq() throws IOException {
     long sum = 0;
     for(int i=0;i<numTop;i++) {
       final long v = top[i].terms.totalTermFreq();

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/Terms.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/Terms.java?rev=1061453&r1=1061452&r2=1061453&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/Terms.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/Terms.java Thu Jan 20 18:53:55 2011
@@ -100,11 +100,8 @@ public abstract class Terms {
    * @see TermsEnum#seek(BytesRef, TermState) */
   public DocsEnum docs(Bits skipDocs, BytesRef term, TermState termState, DocsEnum reuse) throws IOException {
     final TermsEnum termsEnum = getThreadTermsEnum();
-    if (termsEnum.seek(term, termState) == TermsEnum.SeekStatus.FOUND) {
-      return termsEnum.docs(skipDocs, reuse);
-    } else {
-      return null;
-    }
+    termsEnum.seek(term, termState);
+    return termsEnum.docs(skipDocs, reuse);
   }
 
   /**
@@ -116,11 +113,8 @@ public abstract class Terms {
    * @see TermsEnum#seek(BytesRef, TermState) */
   public DocsAndPositionsEnum docsAndPositions(Bits skipDocs, BytesRef term, TermState termState, DocsAndPositionsEnum reuse) throws IOException {
     final TermsEnum termsEnum = getThreadTermsEnum();
-    if (termsEnum.seek(term, termState) == TermsEnum.SeekStatus.FOUND) {
-      return termsEnum.docsAndPositions(skipDocs, reuse);
-    } else {
-      return null;
-    }
+    termsEnum.seek(term, termState);
+    return termsEnum.docsAndPositions(skipDocs, reuse);
   }
 
   public long getUniqueTermCount() throws IOException {

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/TermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/TermsEnum.java?rev=1061453&r1=1061452&r2=1061453&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/TermsEnum.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/TermsEnum.java Thu Jan 20 18:53:55 2011
@@ -80,7 +80,7 @@ public abstract class TermsEnum {
    * use this method. Low-level implementations may position the TermsEnum
    * without re-seeking the term dictionary.
    * <p>
-   * Seeking by {@link TermState} should only be used iff the enu the state was
+   * Seeking by {@link TermState} should only be used iff the enum the state was
    * obtained from and the enum the state is used for seeking are obtained from
    * the same {@link IndexReader}, otherwise a {@link #seek(BytesRef, TermState)} call can
    * leave the enum in undefined state.
@@ -97,8 +97,8 @@ public abstract class TermsEnum {
    * @param term the term the TermState corresponds to
    * @param state the {@link TermState}
    * */
-  public SeekStatus seek(BytesRef term, TermState state) throws IOException {
-    return seek(term);
+  public void seek(BytesRef term, TermState state) throws IOException {
+    seek(term);
   }
 
   /** Increments the enumeration to the next element.
@@ -124,7 +124,7 @@ public abstract class TermsEnum {
    *  term.  Do not call this before calling next() for the
    *  first time, after next() returns null or seek returns
    *  {@link SeekStatus#END}.*/
-  public abstract int docFreq();
+  public abstract int docFreq() throws IOException;
 
   /** Returns the total number of occurrences of this term
    *  across all documents (the sum of the freq() for each
@@ -132,7 +132,7 @@ public abstract class TermsEnum {
    *  codec doesn't support this measure.  Note that, like
    *  other term measures, this measure does not take
    *  deleted documents into account. */
-  public abstract long totalTermFreq();
+  public abstract long totalTermFreq() throws IOException;
 
   /** Get {@link DocsEnum} for the current term.  Do not
    *  call this before calling {@link #next} or {@link
@@ -242,7 +242,7 @@ public abstract class TermsEnum {
     }
 
     @Override
-    public SeekStatus seek(BytesRef term, TermState state) throws IOException {
+    public void seek(BytesRef term, TermState state) throws IOException {
       throw new IllegalStateException("this method should never be called");
     }
   };

Added: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/BlockTermState.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/BlockTermState.java?rev=1061453&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/BlockTermState.java (added)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/BlockTermState.java Thu Jan 20 18:53:55 2011
@@ -0,0 +1,55 @@
+package org.apache.lucene.index.codecs;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.index.OrdTermState;
+import org.apache.lucene.index.TermState;
+
+/**
+ * Holds all state required for {@link PostingsReaderBase}
+ * to produce a {@link DocsEnum} without re-seeking the
+ * terms dict.
+ */
+public class BlockTermState extends OrdTermState {
+  public int docFreq;            // how many docs have this term
+  public long totalTermFreq;     // total number of occurrences of this term
+
+  public int termCount;          // term ord are in the current block
+  public long blockFilePointer;  // fp into the terms dict primary file (_X.tib) that holds this term
+
+  public int blockTermCount;     // how many terms in current block
+
+  @Override
+  public void copyFrom(TermState _other) {
+    assert _other instanceof BlockTermState : "can not copy from " + _other.getClass().getName();
+    BlockTermState other = (BlockTermState) _other;
+    super.copyFrom(_other);
+    docFreq = other.docFreq;
+    totalTermFreq = other.totalTermFreq;
+    termCount = other.termCount;
+    blockFilePointer = other.blockFilePointer;
+
+    // NOTE: don't copy blockTermCount;
+    // it's "transient": used only by the "primary"
+    // termState, and regenerated on seek by TermState
+  }
+
+  @Override
+  public String toString() {
+    return super.toString() + "ord=" + ord + " docFreq=" + docFreq + " totalTermFreq=" + totalTermFreq + " termCount=" + termCount + " blockFP=" + blockFilePointer;
+  }
+}

Added: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/BlockTermsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/BlockTermsReader.java?rev=1061453&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/BlockTermsReader.java (added)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/BlockTermsReader.java Thu Jan 20 18:53:55 2011
@@ -0,0 +1,741 @@
+package org.apache.lucene.index.codecs;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Closeable;
+import java.io.IOException;
+import java.util.Collection;
+import java.util.Comparator;
+import java.util.Iterator;
+import java.util.TreeMap;
+
+import org.apache.lucene.index.DocsAndPositionsEnum;
+import org.apache.lucene.index.DocsEnum;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.FieldInfos;
+import org.apache.lucene.index.FieldsEnum;
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.SegmentInfo;
+import org.apache.lucene.index.TermState;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.index.codecs.standard.StandardPostingsReader; // javadocs
+import org.apache.lucene.store.ByteArrayDataInput;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.CodecUtil;
+import org.apache.lucene.util.DoubleBarrelLRUCache;
+
+/** Handles a terms dict, but decouples all details of
+ *  doc/freqs/positions reading to an instance of {@link
+ *  PostingsReaderBase}.  This class is reusable for
+ *  codecs that use a different format for
+ *  docs/freqs/positions (though codecs are also free to
+ *  make their own terms dict impl).
+ *
+ * <p>This class also interacts with an instance of {@link
+ * TermsIndexReaderBase}, to abstract away the specific
+ * implementation of the terms dict index. 
+ * @lucene.experimental */
+
+public class BlockTermsReader extends FieldsProducer {
+  // Open input to the main terms dict file (_X.tis)
+  private final IndexInput in;
+
+  // Reads the terms dict entries, to gather state to
+  // produce DocsEnum on demand
+  private final PostingsReaderBase postingsReader;
+
+  private final TreeMap<String,FieldReader> fields = new TreeMap<String,FieldReader>();
+
+  // Comparator that orders our terms
+  private final Comparator<BytesRef> termComp;
+
+  // Caches the most recently looked-up field + terms:
+  private final DoubleBarrelLRUCache<FieldAndTerm,BlockTermState> termsCache;
+
+  // Reads the terms index
+  private TermsIndexReaderBase indexReader;
+
+  // keeps the dirStart offset
+  protected long dirOffset;
+
+  // Used as key for the terms cache
+  private static class FieldAndTerm extends DoubleBarrelLRUCache.CloneableKey {
+    String field;
+    BytesRef term;
+
+    public FieldAndTerm() {
+    }
+
+    public FieldAndTerm(FieldAndTerm other) {
+      field = other.field;
+      term = new BytesRef(other.term);
+    }
+
+    @Override
+    public boolean equals(Object _other) {
+      FieldAndTerm other = (FieldAndTerm) _other;
+      return other.field == field && term.bytesEquals(other.term);
+    }
+
+    @Override
+    public Object clone() {
+      return new FieldAndTerm(this);
+    }
+
+    @Override
+    public int hashCode() {
+      return field.hashCode() * 31 + term.hashCode();
+    }
+  }
+  
+  private String segment;
+  
+  public BlockTermsReader(TermsIndexReaderBase indexReader, Directory dir, FieldInfos fieldInfos, String segment, PostingsReaderBase postingsReader, int readBufferSize,
+                          Comparator<BytesRef> termComp, int termsCacheSize, String codecId)
+    throws IOException {
+    
+    this.postingsReader = postingsReader;
+    termsCache = new DoubleBarrelLRUCache<FieldAndTerm,BlockTermState>(termsCacheSize);
+
+    this.termComp = termComp;
+    this.segment = segment;
+    in = dir.openInput(IndexFileNames.segmentFileName(segment, codecId, BlockTermsWriter.TERMS_EXTENSION),
+                       readBufferSize);
+
+    boolean success = false;
+    try {
+      readHeader(in);
+
+      // Have PostingsReader init itself
+      postingsReader.init(in);
+
+      // Read per-field details
+      seekDir(in, dirOffset);
+
+      final int numFields = in.readVInt();
+
+      for(int i=0;i<numFields;i++) {
+        final int field = in.readVInt();
+        final long numTerms = in.readVLong();
+        assert numTerms >= 0;
+        final long termsStartPointer = in.readVLong();
+        final FieldInfo fieldInfo = fieldInfos.fieldInfo(field);
+        final long sumTotalTermFreq = fieldInfo.omitTermFreqAndPositions ? -1 : in.readVLong();
+        assert !fields.containsKey(fieldInfo.name);
+        fields.put(fieldInfo.name, new FieldReader(fieldInfo, numTerms, termsStartPointer, sumTotalTermFreq));
+      }
+      success = true;
+    } finally {
+      if (!success) {
+        in.close();
+      }
+    }
+
+    this.indexReader = indexReader;
+  }
+
+  protected void readHeader(IndexInput input) throws IOException {
+    CodecUtil.checkHeader(in, BlockTermsWriter.CODEC_NAME,
+                          BlockTermsWriter.VERSION_START,
+                          BlockTermsWriter.VERSION_CURRENT);
+    dirOffset = in.readLong();    
+  }
+  
+  protected void seekDir(IndexInput input, long dirOffset)
+      throws IOException {
+    input.seek(dirOffset);
+  }
+  
+  @Override
+  public void loadTermsIndex(int indexDivisor) throws IOException {
+    indexReader.loadTermsIndex(indexDivisor);
+  }
+
+  @Override
+  public void close() throws IOException {
+    try {
+      try {
+        if (indexReader != null) {
+          indexReader.close();
+        }
+      } finally {
+        // null so if an app hangs on to us (ie, we are not
+        // GCable, despite being closed) we still free most
+        // ram
+        indexReader = null;
+        if (in != null) {
+          in.close();
+        }
+      }
+    } finally {
+      try {
+        if (postingsReader != null) {
+          postingsReader.close();
+        }
+      } finally {
+        for(FieldReader field : fields.values()) {
+          field.close();
+        }
+      }
+    }
+  }
+
+  public static void files(Directory dir, SegmentInfo segmentInfo, String id, Collection<String> files) {
+    files.add(IndexFileNames.segmentFileName(segmentInfo.name, id, BlockTermsWriter.TERMS_EXTENSION));
+  }
+
+  public static void getExtensions(Collection<String> extensions) {
+    extensions.add(BlockTermsWriter.TERMS_EXTENSION);
+  }
+
+  @Override
+  public FieldsEnum iterator() {
+    return new TermFieldsEnum();
+  }
+
+  @Override
+  public Terms terms(String field) throws IOException {
+    return fields.get(field);
+  }
+
+  // Iterates through all fields
+  private class TermFieldsEnum extends FieldsEnum {
+    final Iterator<FieldReader> it;
+    FieldReader current;
+
+    TermFieldsEnum() {
+      it = fields.values().iterator();
+    }
+
+    @Override
+    public String next() {
+      if (it.hasNext()) {
+        current = it.next();
+        return current.fieldInfo.name;
+      } else {
+        current = null;
+        return null;
+      }
+    }
+    
+    @Override
+    public TermsEnum terms() throws IOException {
+      return current.iterator();
+    }
+  }
+
+  private class FieldReader extends Terms implements Closeable {
+    final long numTerms;
+    final FieldInfo fieldInfo;
+    final long termsStartPointer;
+    final long sumTotalTermFreq;
+
+    FieldReader(FieldInfo fieldInfo, long numTerms, long termsStartPointer, long sumTotalTermFreq) {
+      assert numTerms > 0;
+      this.fieldInfo = fieldInfo;
+      this.numTerms = numTerms;
+      this.termsStartPointer = termsStartPointer;
+      this.sumTotalTermFreq = sumTotalTermFreq;
+    }
+
+    @Override
+    public Comparator<BytesRef> getComparator() {
+      return termComp;
+    }
+
+    @Override
+    public void close() {
+      super.close();
+    }
+    
+    @Override
+    public TermsEnum iterator() throws IOException {
+      return new SegmentTermsEnum();
+    }
+
+    @Override
+    public long getUniqueTermCount() {
+      return numTerms;
+    }
+
+    @Override
+    public long getSumTotalTermFreq() {
+      return sumTotalTermFreq;
+    }
+
+    // Iterates through terms in this field
+    private final class SegmentTermsEnum extends TermsEnum {
+      private final IndexInput in;
+      private final BlockTermState state;
+      private final boolean doOrd;
+      private final FieldAndTerm fieldTerm = new FieldAndTerm();
+      private final TermsIndexReaderBase.FieldIndexEnum indexEnum;
+      private final BytesRef term = new BytesRef();
+
+      /* This is true if indexEnum is "still" seek'd to the index term
+         for the current term. We set it to true on seeking, and then it
+         remains valid until next() is called enough times to load another
+         terms block: */
+      private boolean indexIsCurrent;
+
+      /* True if we've already called .next() on the indexEnum, to "bracket"
+         the current block of terms: */
+      private boolean didIndexNext;
+
+      /* Next index term, bracketing the current block of terms; this is
+         only valid if didIndexNext is true: */
+      private BytesRef nextIndexTerm;
+
+      /* True after seek(TermState), do defer seeking.  If the app then
+         calls next() (which is not "typical"), then we'll do the real seek */
+      private boolean seekPending;
+
+      /* How many blocks we've read since last seek.  Once this
+         is >= indexEnum.getDivisor() we set indexIsCurrent to false (since
+         the index can no long bracket seek-within-block). */
+      private int blocksSinceSeek;
+
+      private byte[] termSuffixes;
+      private ByteArrayDataInput termSuffixesReader = new ByteArrayDataInput(null);
+
+      /* Common prefix used for all terms in this block. */
+      private int termBlockPrefix;
+
+      private byte[] docFreqBytes;
+      private final ByteArrayDataInput freqReader = new ByteArrayDataInput(null);
+      private int metaDataUpto;
+
+      public SegmentTermsEnum() throws IOException {
+        in = (IndexInput) BlockTermsReader.this.in.clone();
+        in.seek(termsStartPointer);
+        indexEnum = indexReader.getFieldEnum(fieldInfo);
+        doOrd = indexReader.supportsOrd();
+        fieldTerm.field = fieldInfo.name;
+        state = postingsReader.newTermState();
+        state.totalTermFreq = -1;
+        state.ord = -1;
+
+        termSuffixes = new byte[128];
+        docFreqBytes = new byte[64];
+        //System.out.println("BTR.enum init this=" + this + " postingsReader=" + postingsReader);
+      }
+
+      @Override
+      public Comparator<BytesRef> getComparator() {
+        return termComp;
+      }
+
+      @Override
+      public SeekStatus seek(final BytesRef target, final boolean useCache) throws IOException {
+
+        if (indexEnum == null) {
+          throw new IllegalStateException("terms index was not loaded");
+        }
+        
+        //System.out.println("BTR.seek seg=" + segment + " target=" + fieldInfo.name + ":" + target.utf8ToString() + " " + target + " current=" + term().utf8ToString() + " " + term() + " useCache=" + useCache + " indexIsCurrent=" + indexIsCurrent + " didIndexNext=" + didIndexNext + " seekPending=" + seekPending + " divisor=" + indexReader.getDivisor() + " this="  + this);
+        /*
+        if (didIndexNext) {
+          if (nextIndexTerm == null) {
+            //System.out.println("  nextIndexTerm=null");
+          } else {
+            //System.out.println("  nextIndexTerm=" + nextIndexTerm.utf8ToString());
+          }
+        }
+        */
+
+        // Check cache
+        if (useCache) {
+          fieldTerm.term = target;
+          // TODO: should we differentiate "frozen"
+          // TermState (ie one that was cloned and
+          // cached/returned by termState()) from the
+          // malleable (primary) one?
+          final TermState cachedState = termsCache.get(fieldTerm);
+          if (cachedState != null) {
+            seekPending = true;
+            //System.out.println("  cached!");
+            seek(target, cachedState);
+            //System.out.println("  term=" + term.utf8ToString());
+            return SeekStatus.FOUND;
+          }
+        }
+
+        boolean doSeek = true;
+
+        // See if we can avoid seeking, because target term
+        // is after current term but before next index term:
+        if (indexIsCurrent) {
+
+          final int cmp = termComp.compare(term, target);
+
+          if (cmp == 0) {
+            // Already at the requested term
+            return SeekStatus.FOUND;
+          } else if (cmp < 0) {
+
+            // Target term is after current term
+            if (!didIndexNext) {
+              if (indexEnum.next() == -1) {
+                nextIndexTerm = null;
+              } else {
+                nextIndexTerm = indexEnum.term();
+              }
+              //System.out.println("  now do index next() nextIndexTerm=" + (nextIndexTerm == null ? "null" : nextIndexTerm.utf8ToString()));
+              didIndexNext = true;
+            }
+
+            if (nextIndexTerm == null || termComp.compare(target, nextIndexTerm) < 0) {
+              // Optimization: requested term is within the
+              // same term block we are now in; skip seeking
+              // (but do scanning):
+              doSeek = false;
+              //System.out.println("  skip seek: nextIndexTerm=" + (nextIndexTerm == null ? "null" : nextIndexTerm.utf8ToString()));
+            }
+          }
+        }
+
+        if (doSeek) {
+          //System.out.println("  seek");
+
+          // Ask terms index to find biggest indexed term (=
+          // first term in a block) that's <= our text:
+          in.seek(indexEnum.seek(target));
+          boolean result = nextBlock();
+
+          // Block must exist since, at least, the indexed term
+          // is in the block:
+          assert result;
+
+          indexIsCurrent = true;
+          didIndexNext = false;
+          blocksSinceSeek = 0;          
+
+          if (doOrd) {
+            state.ord = indexEnum.ord()-1;
+          }
+
+          // NOTE: the first _next() after an index seek is
+          // a bit wasteful, since it redundantly reads some
+          // suffix bytes into the buffer.  We could avoid storing
+          // those bytes in the primary file, but then when
+          // next()ing over an index term we'd have to
+          // special case it:
+          term.copy(indexEnum.term());
+          //System.out.println("  seek: term=" + term.utf8ToString());
+        } else {
+          ////System.out.println("  skip seek");
+        }
+
+        seekPending = false;
+
+        // Now scan:
+        while (_next() != null) {
+          final int cmp = termComp.compare(term, target);
+          if (cmp == 0) {
+            // Match!
+            if (useCache) {
+              // Store in cache
+              decodeMetaData();
+              termsCache.put(new FieldAndTerm(fieldTerm), (BlockTermState) state.clone());
+            }
+            //System.out.println("  FOUND");
+            return SeekStatus.FOUND;
+          } else if (cmp > 0) {
+            //System.out.println("  NOT_FOUND term=" + term.utf8ToString());
+            return SeekStatus.NOT_FOUND;
+          }
+          
+          // The purpose of the terms dict index is to seek
+          // the enum to the closest index term before the
+          // term we are looking for.  So, we should never
+          // cross another index term (besides the first
+          // one) while we are scanning:
+          assert indexIsCurrent;
+        }
+
+        indexIsCurrent = false;
+        //System.out.println("  END");
+        return SeekStatus.END;
+      }
+
+      @Override
+      public BytesRef next() throws IOException {
+        //System.out.println("BTR.next() seekPending=" + seekPending + " pendingSeekCount=" + state.termCount);
+
+        // If seek was previously called and the term was cached,
+        // usually caller is just going to pull a D/&PEnum or get
+        // docFreq, etc.  But, if they then call next(),
+        // this method catches up all internal state so next()
+        // works properly:
+        if (seekPending) {
+          assert !indexIsCurrent;
+          in.seek(state.blockFilePointer);
+          final int pendingSeekCount = state.termCount;
+          boolean result = nextBlock();
+
+          final long savOrd = state.ord;
+
+          // Block must exist since seek(TermState) was called w/ a
+          // TermState previously returned by this enum when positioned
+          // on a real term:
+          assert result;
+
+          while(state.termCount < pendingSeekCount) {
+            BytesRef nextResult = _next();
+            assert nextResult != null;
+          }
+          seekPending = false;
+          state.ord = savOrd;
+        }
+        return _next();
+      }
+
+      /* Decodes only the term bytes of the next term.  If caller then asks for
+         metadata, ie docFreq, totalTermFreq or pulls a D/&PEnum, we then (lazily)
+         decode all metadata up to the current term. */
+      private BytesRef _next() throws IOException {
+        //System.out.println("BTR._next this=" + this + " termCount=" + state.termCount + " (vs " + state.blockTermCount + ")");
+        if (state.termCount == state.blockTermCount) {
+          if (!nextBlock()) {
+            //System.out.println("  eof");
+            indexIsCurrent = false;
+            return null;
+          }
+        }
+
+        // TODO: cutover to something better for these ints!  simple64?
+        final int suffix = termSuffixesReader.readVInt();
+        //System.out.println("  suffix=" + suffix);
+
+        term.length = termBlockPrefix + suffix;
+        if (term.bytes.length < term.length) {
+          term.grow(term.length);
+        }
+        termSuffixesReader.readBytes(term.bytes, termBlockPrefix, suffix);
+        state.termCount++;
+
+        // NOTE: meaningless in the non-ord case
+        state.ord++;
+
+        //System.out.println("  return term=" + fieldInfo.name + ":" + term.utf8ToString() + " " + term);
+        return term;
+      }
+
+      @Override
+      public BytesRef term() {
+        return term;
+      }
+
+      @Override
+      public int docFreq() throws IOException {
+        //System.out.println("BTR.docFreq");
+        decodeMetaData();
+        //System.out.println("  return " + state.docFreq);
+        return state.docFreq;
+      }
+
+      @Override
+      public long totalTermFreq() throws IOException {
+        decodeMetaData();
+        return state.totalTermFreq;
+      }
+
+      @Override
+      public DocsEnum docs(Bits skipDocs, DocsEnum reuse) throws IOException {
+        //System.out.println("BTR.docs this=" + this);
+        decodeMetaData();
+        //System.out.println("  state.docFreq=" + state.docFreq);
+        final DocsEnum docsEnum = postingsReader.docs(fieldInfo, state, skipDocs, reuse);
+        assert docsEnum != null;
+        return docsEnum;
+      }
+
+      @Override
+      public DocsAndPositionsEnum docsAndPositions(Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException {
+        //System.out.println("BTR.d&p this=" + this);
+        decodeMetaData();
+        if (fieldInfo.omitTermFreqAndPositions) {
+          return null;
+        } else {
+          DocsAndPositionsEnum dpe = postingsReader.docsAndPositions(fieldInfo, state, skipDocs, reuse);
+          //System.out.println("  return d&pe=" + dpe);
+          return dpe;
+        }
+      }
+
+      @Override
+      public void seek(BytesRef target, TermState otherState) throws IOException {
+        //System.out.println("BTR.seek termState target=" + target.utf8ToString() + " " + target + " this=" + this);
+        assert otherState != null && otherState instanceof BlockTermState;
+        assert !doOrd || ((BlockTermState) otherState).ord < numTerms;
+        state.copyFrom(otherState);
+        seekPending = true;
+        indexIsCurrent = false;
+        term.copy(target);
+      }
+      
+      @Override
+      public TermState termState() throws IOException {
+        //System.out.println("BTR.termState this=" + this);
+        decodeMetaData();
+        TermState ts = (TermState) state.clone();
+        //System.out.println("  return ts=" + ts);
+        return ts;
+      }
+
+      @Override
+      public SeekStatus seek(long ord) throws IOException {
+        //System.out.println("BTR.seek by ord ord=" + ord);
+        if (indexEnum == null) {
+          throw new IllegalStateException("terms index was not loaded");
+        }
+
+        if (ord >= numTerms) {
+          state.ord = numTerms-1;
+          return SeekStatus.END;
+        }
+
+        // TODO: if ord is in same terms block and
+        // after current ord, we should avoid this seek just
+        // like we do in the seek(BytesRef) case
+        in.seek(indexEnum.seek(ord));
+        boolean result = nextBlock();
+
+        // Block must exist since ord < numTerms:
+        assert result;
+
+        indexIsCurrent = true;
+        didIndexNext = false;
+        blocksSinceSeek = 0;
+        seekPending = false;
+
+        state.ord = indexEnum.ord()-1;
+        assert state.ord >= -1: "ord=" + state.ord;
+        term.copy(indexEnum.term());
+
+        // Now, scan:
+        int left = (int) (ord - state.ord);
+        while(left > 0) {
+          final BytesRef term = _next();
+          assert term != null;
+          left--;
+          assert indexIsCurrent;
+        }
+
+        // always found
+        return SeekStatus.FOUND;
+      }
+
+      public long ord() {
+        if (!doOrd) {
+          throw new UnsupportedOperationException();
+        }
+        return state.ord;
+      }
+
+      private void doPendingSeek() {
+      }
+
+      /* Does initial decode of next block of terms; this
+         doesn't actually decode the docFreq, totalTermFreq,
+         postings details (frq/prx offset, etc.) metadata;
+         it just loads them as byte[] blobs which are then      
+         decoded on-demand if the metadata is ever requested
+         for any term in this block.  This enables terms-only
+         intensive consumes (eg certain MTQs, respelling) to
+         not pay the price of decoding metadata they won't
+         use. */
+      private boolean nextBlock() throws IOException {
+
+        // TODO: we still lazy-decode the byte[] for each
+        // term (the suffix), but, if we decoded
+        // all N terms up front then seeking could do a fast
+        // bsearch w/in the block...
+
+        //System.out.println("BTR.nextBlock() fp=" + in.getFilePointer() + " this=" + this);
+        state.blockFilePointer = in.getFilePointer();
+        state.blockTermCount = in.readVInt();
+        //System.out.println("  blockTermCount=" + state.blockTermCount);
+        if (state.blockTermCount == 0) {
+          return false;
+        }
+        termBlockPrefix = in.readVInt();
+
+        // term suffixes:
+        int len = in.readVInt();
+        if (termSuffixes.length < len) {
+          termSuffixes = new byte[ArrayUtil.oversize(len, 1)];
+        }
+        //System.out.println("  termSuffixes len=" + len);
+        in.readBytes(termSuffixes, 0, len);
+        termSuffixesReader.reset(termSuffixes);
+
+        // docFreq, totalTermFreq
+        len = in.readVInt();
+        if (docFreqBytes.length < len) {
+          docFreqBytes = new byte[ArrayUtil.oversize(len, 1)];
+        }
+        //System.out.println("  freq bytes len=" + len);
+        in.readBytes(docFreqBytes, 0, len);
+        freqReader.reset(docFreqBytes);
+        metaDataUpto = 0;
+
+        state.termCount = 0;
+
+        postingsReader.readTermsBlock(in, fieldInfo, state);
+
+        blocksSinceSeek++;
+        indexIsCurrent &= (blocksSinceSeek < indexReader.getDivisor());
+        //System.out.println("  indexIsCurrent=" + indexIsCurrent);
+
+        return true;
+      }
+
+      private void decodeMetaData() throws IOException {
+        //System.out.println("BTR.decodeMetadata mdUpto=" + metaDataUpto + " vs termCount=" + state.termCount + " state=" + state);
+        if (!seekPending) {
+          // lazily catch up on metadata decode:
+          final int limit = state.termCount;
+          state.termCount = metaDataUpto;
+          while (metaDataUpto < limit) {
+            //System.out.println("  decode");
+            // TODO: we could make "tiers" of metadata, ie,
+            // decode docFreq/totalTF but don't decode postings
+            // metadata; this way caller could get
+            // docFreq/totalTF w/o paying decode cost for
+            // postings
+            state.docFreq = freqReader.readVInt();
+            if (!fieldInfo.omitTermFreqAndPositions) {
+              state.totalTermFreq = state.docFreq + freqReader.readVLong();
+            }
+            postingsReader.nextTerm(fieldInfo, state);
+            metaDataUpto++;
+            state.termCount++;
+          }
+        } else {
+          //System.out.println("  skip! seekPending");
+        }
+      }
+    }
+  }
+}

Added: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/BlockTermsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/BlockTermsWriter.java?rev=1061453&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/BlockTermsWriter.java (added)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/BlockTermsWriter.java Thu Jan 20 18:53:55 2011
@@ -0,0 +1,316 @@
+package org.apache.lucene.index.codecs;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Comparator;
+import java.util.List;
+
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.FieldInfos;
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.store.RAMOutputStream;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.CodecUtil;
+import org.apache.lucene.util.RamUsageEstimator;
+
+// TODO: currently we encode all terms between two indexed
+// terms as a block; but, we could decouple the two, ie
+// allow several blocks in between two indexed terms
+
+/**
+ * Writes terms dict, block-encoding (column stride) each
+ * term's metadata for each set of terms between two
+ * index terms.
+ *
+ * @lucene.experimental
+ */
+
+public class BlockTermsWriter extends FieldsConsumer {
+
+  final static String CODEC_NAME = "BLOCK_TERMS_DICT";
+
+  // Initial format
+  public static final int VERSION_START = 0;
+
+  public static final int VERSION_CURRENT = VERSION_START;
+
+  /** Extension of terms file */
+  static final String TERMS_EXTENSION = "tib";
+
+  protected final IndexOutput out;
+  final PostingsWriterBase postingsWriter;
+  final FieldInfos fieldInfos;
+  FieldInfo currentField;
+  private final TermsIndexWriterBase termsIndexWriter;
+  private final List<TermsWriter> fields = new ArrayList<TermsWriter>();
+  private final Comparator<BytesRef> termComp;
+  private final String segment;
+
+  public BlockTermsWriter(
+      TermsIndexWriterBase termsIndexWriter,
+      SegmentWriteState state,
+      PostingsWriterBase postingsWriter,
+      Comparator<BytesRef> termComp) throws IOException
+  {
+    final String termsFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, TERMS_EXTENSION);
+    this.termsIndexWriter = termsIndexWriter;
+    this.termComp = termComp;
+    out = state.directory.createOutput(termsFileName);
+    fieldInfos = state.fieldInfos;
+    writeHeader(out);
+    currentField = null;
+    this.postingsWriter = postingsWriter;
+    segment = state.segmentName;
+
+    //System.out.println("BTW.init seg=" + state.segmentName);
+
+    postingsWriter.start(out);                          // have consumer write its format/header
+  }
+  
+  protected void writeHeader(IndexOutput out) throws IOException {
+    CodecUtil.writeHeader(out, CODEC_NAME, VERSION_CURRENT); 
+
+    out.writeLong(0);                             // leave space for end index pointer    
+  }
+
+  @Override
+  public TermsConsumer addField(FieldInfo field) throws IOException {
+    //System.out.println("\nBTW.addField seg=" + segment + " field=" + field.name);
+    assert currentField == null || currentField.name.compareTo(field.name) < 0;
+    currentField = field;
+    TermsIndexWriterBase.FieldWriter fieldIndexWriter = termsIndexWriter.addField(field, out.getFilePointer());
+    final TermsWriter terms = new TermsWriter(fieldIndexWriter, field, postingsWriter);
+    fields.add(terms);
+    return terms;
+  }
+
+  @Override
+  public void close() throws IOException {
+
+    try {
+      
+      int nonZeroCount = 0;
+      for(TermsWriter field : fields) {
+        if (field.numTerms > 0) {
+          nonZeroCount++;
+        }
+      }
+
+      final long dirStart = out.getFilePointer();
+
+      out.writeVInt(nonZeroCount);
+      for(TermsWriter field : fields) {
+        if (field.numTerms > 0) {
+          out.writeVInt(field.fieldInfo.number);
+          out.writeVLong(field.numTerms);
+          out.writeVLong(field.termsStartPointer);
+          if (!field.fieldInfo.omitTermFreqAndPositions) {
+            out.writeVLong(field.sumTotalTermFreq);
+          }
+        }
+      }
+      writeTrailer(dirStart);
+    } finally {
+      try {
+        out.close();
+      } finally {
+        try {
+          postingsWriter.close();
+        } finally {
+          termsIndexWriter.close();
+        }
+      }
+    }
+  }
+
+  protected void writeTrailer(long dirStart) throws IOException {
+    // TODO Auto-generated method stub
+    out.seek(CodecUtil.headerLength(CODEC_NAME));
+    out.writeLong(dirStart);    
+  }
+  
+  private static class TermEntry {
+    public final BytesRef term = new BytesRef();
+    public TermStats stats;
+  }
+
+  class TermsWriter extends TermsConsumer {
+    private final FieldInfo fieldInfo;
+    private final PostingsWriterBase postingsWriter;
+    private final long termsStartPointer;
+    private long numTerms;
+    private final TermsIndexWriterBase.FieldWriter fieldIndexWriter;
+    long sumTotalTermFreq;
+    private final BytesRef lastTerm = new BytesRef();
+
+    private TermEntry[] pendingTerms;
+
+    private int pendingCount;
+
+    TermsWriter(
+        TermsIndexWriterBase.FieldWriter fieldIndexWriter,
+        FieldInfo fieldInfo,
+        PostingsWriterBase postingsWriter) 
+    {
+      this.fieldInfo = fieldInfo;
+      this.fieldIndexWriter = fieldIndexWriter;
+      pendingTerms = new TermEntry[32];
+      for(int i=0;i<pendingTerms.length;i++) {
+        pendingTerms[i] = new TermEntry();
+      }
+      termsStartPointer = out.getFilePointer();
+      postingsWriter.setField(fieldInfo);
+      this.postingsWriter = postingsWriter;
+    }
+    
+    @Override
+    public Comparator<BytesRef> getComparator() {
+      return termComp;
+    }
+
+    @Override
+    public PostingsConsumer startTerm(BytesRef text) throws IOException {
+      //System.out.println("BTW.startTerm seg=" + segment + " term=" + fieldInfo.name + ":" + text.utf8ToString() + " " + text);
+      postingsWriter.startTerm();
+      return postingsWriter;
+    }
+
+    private final BytesRef lastPrevTerm = new BytesRef();
+
+    @Override
+    public void finishTerm(BytesRef text, TermStats stats) throws IOException {
+
+      assert stats.docFreq > 0;
+      //System.out.println("BTW.finishTerm seg=" + segment + " term=" + fieldInfo.name + ":" + text.utf8ToString() + " " + text + " df=" + stats.docFreq);
+
+      final boolean isIndexTerm = fieldIndexWriter.checkIndexTerm(text, stats);
+
+      if (isIndexTerm) {
+        if (pendingCount > 0) {
+          // Instead of writing each term, live, we gather terms
+          // in RAM in a pending buffer, and then write the
+          // entire block in between index terms:
+          flushBlock();
+        }
+        fieldIndexWriter.add(text, stats, out.getFilePointer());
+      }
+
+      if (pendingTerms.length == pendingCount) {
+        final TermEntry[] newArray = new TermEntry[ArrayUtil.oversize(pendingCount+1, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
+        System.arraycopy(pendingTerms, 0, newArray, 0, pendingCount);
+        for(int i=pendingCount;i<newArray.length;i++) {
+          newArray[i] = new TermEntry();
+        }
+        pendingTerms = newArray;
+      }
+      final TermEntry te = pendingTerms[pendingCount];
+      te.term.copy(text);
+      te.stats = stats;
+
+      pendingCount++;
+
+      postingsWriter.finishTerm(stats);
+      numTerms++;
+    }
+
+    // Finishes all terms in this field
+    @Override
+    public void finish(long sumTotalTermFreq) throws IOException {
+      if (pendingCount > 0) {
+        flushBlock();
+      }
+      // EOF marker:
+      out.writeVInt(0);
+
+      this.sumTotalTermFreq = sumTotalTermFreq;
+      fieldIndexWriter.finish(out.getFilePointer());
+    }
+
+    private int sharedPrefix(BytesRef term1, BytesRef term2) {
+      assert term1.offset == 0;
+      assert term2.offset == 0;
+      int pos1 = 0;
+      int pos1End = pos1 + Math.min(term1.length, term2.length);
+      int pos2 = 0;
+      while(pos1 < pos1End) {
+        if (term1.bytes[pos1] != term2.bytes[pos2]) {
+          return pos1;
+        }
+        pos1++;
+        pos2++;
+      }
+      return pos1;
+    }
+
+    private final RAMOutputStream bytesWriter = new RAMOutputStream();
+
+    private void flushBlock() throws IOException {
+      //System.out.println("BTW.flushBlock pendingCount=" + pendingCount);
+
+      // First pass: compute common prefix for all terms
+      // in the block, against term before first term in
+      // this block:
+      int commonPrefix = sharedPrefix(lastPrevTerm, pendingTerms[0].term);
+      for(int termCount=1;termCount<pendingCount;termCount++) {
+        commonPrefix = Math.min(commonPrefix,
+                                sharedPrefix(lastPrevTerm,
+                                             pendingTerms[termCount].term));
+      }        
+
+      out.writeVInt(pendingCount);
+      out.writeVInt(commonPrefix);
+
+      // 2nd pass: write suffixes, as separate byte[] blob
+      for(int termCount=0;termCount<pendingCount;termCount++) {
+        final int suffix = pendingTerms[termCount].term.length - commonPrefix;
+        // TODO: cutover to better intblock codec, instead
+        // of interleaving here:
+        bytesWriter.writeVInt(suffix);
+        bytesWriter.writeBytes(pendingTerms[termCount].term.bytes, commonPrefix, suffix);
+      }
+      out.writeVInt((int) bytesWriter.getFilePointer());
+      bytesWriter.writeTo(out);
+      bytesWriter.reset();
+
+      // 3rd pass: write the freqs as byte[] blob
+      // TODO: cutover to better intblock codec.  simple64?
+      // write prefix, suffix first:
+      for(int termCount=0;termCount<pendingCount;termCount++) {
+        final TermStats stats = pendingTerms[termCount].stats;
+        assert stats != null;
+        bytesWriter.writeVInt(stats.docFreq);
+        if (!fieldInfo.omitTermFreqAndPositions) {
+          bytesWriter.writeVLong(stats.totalTermFreq-stats.docFreq);
+        }
+      }
+
+      out.writeVInt((int) bytesWriter.getFilePointer());
+      bytesWriter.writeTo(out);
+      bytesWriter.reset();
+
+      postingsWriter.flushTermsBlock();
+      lastPrevTerm.copy(pendingTerms[pendingCount-1].term);
+      pendingCount = 0;
+    }
+  }
+}

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/FixedGapTermsIndexReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/FixedGapTermsIndexReader.java?rev=1061453&r1=1061452&r2=1061453&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/FixedGapTermsIndexReader.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/FixedGapTermsIndexReader.java Thu Jan 20 18:53:55 2011
@@ -94,6 +94,7 @@ public class FixedGapTermsIndexReader ex
 
       // Read directory
       final int numFields = in.readVInt();      
+      //System.out.println("FGR: init seg=" + segment + " div=" + indexDivisor + " nF=" + numFields);
       for(int i=0;i<numFields;i++) {
         final int field = in.readVInt();
         final int numIndexTerms = in.readVInt();
@@ -241,9 +242,6 @@ public class FixedGapTermsIndexReader ex
       this.packedOffsetsStart = packedOffsetsStart;
       this.numIndexTerms = numIndexTerms;
 
-      // We still create the indexReader when indexDivisor
-      // is -1, so that PrefixCodedTermsReader can call
-      // isIndexTerm for each field:
       if (indexDivisor > 0) {
         loadTermsIndex();
       }

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/FixedGapTermsIndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/FixedGapTermsIndexWriter.java?rev=1061453&r1=1061452&r2=1061453&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/FixedGapTermsIndexWriter.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/FixedGapTermsIndexWriter.java Thu Jan 20 18:53:55 2011
@@ -53,7 +53,6 @@ public class FixedGapTermsIndexWriter ex
 
   private final List<SimpleFieldWriter> fields = new ArrayList<SimpleFieldWriter>();
   private final FieldInfos fieldInfos; // unread
-  private IndexOutput termsOut;
 
   public FixedGapTermsIndexWriter(SegmentWriteState state) throws IOException {
     final String indexFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, TERMS_INDEX_EXTENSION);
@@ -71,13 +70,9 @@ public class FixedGapTermsIndexWriter ex
   }
 
   @Override
-  public void setTermsOutput(IndexOutput termsOut) {
-    this.termsOut = termsOut;
-  }
-  
-  @Override
-  public FieldWriter addField(FieldInfo field) {
-    SimpleFieldWriter writer = new SimpleFieldWriter(field);
+  public FieldWriter addField(FieldInfo field, long termsFilePointer) {
+    //System.out.println("FGW: addFfield=" + field.name);
+    SimpleFieldWriter writer = new SimpleFieldWriter(field, termsFilePointer);
     fields.add(writer);
     return writer;
   }
@@ -119,10 +114,10 @@ public class FixedGapTermsIndexWriter ex
 
     private final BytesRef lastTerm = new BytesRef();
 
-    SimpleFieldWriter(FieldInfo fieldInfo) {
+    SimpleFieldWriter(FieldInfo fieldInfo, long termsFilePointer) {
       this.fieldInfo = fieldInfo;
       indexStart = out.getFilePointer();
-      termsStart = lastTermsPointer = termsOut.getFilePointer();
+      termsStart = lastTermsPointer = termsFilePointer;
       termLengths = new short[0];
       termsPointerDeltas = new int[0];
     }
@@ -130,33 +125,8 @@ public class FixedGapTermsIndexWriter ex
     @Override
     public boolean checkIndexTerm(BytesRef text, TermStats stats) throws IOException {
       // First term is first indexed term:
+      //System.out.println("FGW: checkIndexTerm text=" + text.utf8ToString());
       if (0 == (numTerms++ % termIndexInterval)) {
-
-        final int indexedTermLength = indexedTermPrefixLength(lastTerm, text);
-
-        // write only the min prefix that shows the diff
-        // against prior term
-        out.writeBytes(text.bytes, text.offset, indexedTermLength);
-
-        if (termLengths.length == numIndexTerms) {
-          termLengths = ArrayUtil.grow(termLengths);
-        }
-        if (termsPointerDeltas.length == numIndexTerms) {
-          termsPointerDeltas = ArrayUtil.grow(termsPointerDeltas);
-        }
-
-        // save delta terms pointer
-        final long fp = termsOut.getFilePointer();
-        termsPointerDeltas[numIndexTerms] = (int) (fp - lastTermsPointer);
-        lastTermsPointer = fp;
-
-        // save term length (in bytes)
-        assert indexedTermLength <= Short.MAX_VALUE;
-        termLengths[numIndexTerms] = (short) indexedTermLength;
-        totTermLength += indexedTermLength;
-
-        lastTerm.copy(text);
-        numIndexTerms++;
         return true;
       } else {
         if (0 == numTerms % termIndexInterval) {
@@ -169,13 +139,41 @@ public class FixedGapTermsIndexWriter ex
     }
 
     @Override
-    public void finish() throws IOException {
+    public void add(BytesRef text, TermStats stats, long termsFilePointer) throws IOException {
+      final int indexedTermLength = indexedTermPrefixLength(lastTerm, text);
+      //System.out.println("FGW: add text=" + text.utf8ToString() + " " + text + " fp=" + termsFilePointer);
+
+      // write only the min prefix that shows the diff
+      // against prior term
+      out.writeBytes(text.bytes, text.offset, indexedTermLength);
+
+      if (termLengths.length == numIndexTerms) {
+        termLengths = ArrayUtil.grow(termLengths);
+      }
+      if (termsPointerDeltas.length == numIndexTerms) {
+        termsPointerDeltas = ArrayUtil.grow(termsPointerDeltas);
+      }
+
+      // save delta terms pointer
+      termsPointerDeltas[numIndexTerms] = (int) (termsFilePointer - lastTermsPointer);
+      lastTermsPointer = termsFilePointer;
+
+      // save term length (in bytes)
+      assert indexedTermLength <= Short.MAX_VALUE;
+      termLengths[numIndexTerms] = (short) indexedTermLength;
+      totTermLength += indexedTermLength;
+
+      lastTerm.copy(text);
+      numIndexTerms++;
+    }
+
+    @Override
+    public void finish(long termsFilePointer) throws IOException {
 
       // write primary terms dict offsets
       packedIndexStart = out.getFilePointer();
 
-      final long maxValue = termsOut.getFilePointer();
-      PackedInts.Writer w = PackedInts.getWriter(out, numIndexTerms, PackedInts.bitsRequired(maxValue));
+      PackedInts.Writer w = PackedInts.getWriter(out, numIndexTerms, PackedInts.bitsRequired(termsFilePointer));
 
       // relative to our indexStart
       long upto = 0;

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/PostingsReaderBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/PostingsReaderBase.java?rev=1061453&r1=1061452&r2=1061453&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/PostingsReaderBase.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/PostingsReaderBase.java Thu Jan 20 18:53:55 2011
@@ -28,12 +28,12 @@ import org.apache.lucene.util.Bits;
 
 import org.apache.lucene.index.codecs.standard.StandardPostingsWriter; // javadocs
 
-/** PrefixCodedTermsReader interacts with a single instance
- *  of this to manage creation of {@link DocsEnum} and
+/** BlockTermsReader interacts with a single instance
+ *  of this class to manage creation of {@link DocsEnum} and
  *  {@link DocsAndPositionsEnum} instances.  It provides an
  *  IndexInput (termsIn) where this class may read any
  *  previously stored data that it had written in its
- *  corresponding {@link StandardPostingsWriter} at indexing
+ *  corresponding {@link PostingsWriterBase} at indexing
  *  time. 
  *  @lucene.experimental */
 
@@ -42,17 +42,23 @@ public abstract class PostingsReaderBase
   public abstract void init(IndexInput termsIn) throws IOException;
 
   /** Return a newly created empty TermState */
-  public abstract PrefixCodedTermState newTermState() throws IOException;
+  public abstract BlockTermState newTermState() throws IOException;
 
-  public abstract void readTerm(IndexInput termsIn, FieldInfo fieldInfo, PrefixCodedTermState state, boolean isIndexTerm) throws IOException;
+  /** Actually decode metadata for next term */
+  public abstract void nextTerm(FieldInfo fieldInfo, BlockTermState state) throws IOException;
 
   /** Must fully consume state, since after this call that
    *  TermState may be reused. */
-  public abstract DocsEnum docs(FieldInfo fieldInfo, PrefixCodedTermState state, Bits skipDocs, DocsEnum reuse) throws IOException;
+  public abstract DocsEnum docs(FieldInfo fieldInfo, BlockTermState state, Bits skipDocs, DocsEnum reuse) throws IOException;
 
   /** Must fully consume state, since after this call that
    *  TermState may be reused. */
-  public abstract DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, PrefixCodedTermState state, Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException;
+  public abstract DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, BlockTermState state, Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException;
 
   public abstract void close() throws IOException;
+
+  /** Reads data for all terms in the next block; this
+   *  method should merely load the byte[] blob but not
+   *  decode, which is done in {@link #nextTerm}. */
+  public abstract void readTermsBlock(IndexInput termsIn, FieldInfo fieldInfo, BlockTermState termState) throws IOException;
 }

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/PostingsWriterBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/PostingsWriterBase.java?rev=1061453&r1=1061452&r2=1061453&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/PostingsWriterBase.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/PostingsWriterBase.java Thu Jan 20 18:53:55 2011
@@ -33,8 +33,10 @@ public abstract class PostingsWriterBase
 
   public abstract void startTerm() throws IOException;
 
+  public abstract void flushTermsBlock() throws IOException;
+
   /** Finishes the current term */
-  public abstract void finishTerm(TermStats stats, boolean isIndexTerm) throws IOException;
+  public abstract void finishTerm(TermStats stats) throws IOException;
 
   public abstract void setField(FieldInfo fieldInfo);
 

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/TermsIndexWriterBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/TermsIndexWriterBase.java?rev=1061453&r1=1061452&r2=1061453&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/TermsIndexWriterBase.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/TermsIndexWriterBase.java Thu Jan 20 18:53:55 2011
@@ -17,7 +17,6 @@ package org.apache.lucene.index.codecs;
  * limitations under the License.
  */
 
-import org.apache.lucene.store.IndexOutput;
 import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.util.BytesRef;
 import java.io.IOException;
@@ -25,14 +24,13 @@ import java.io.IOException;
 /** @lucene.experimental */
 public abstract class TermsIndexWriterBase {
 
-  public abstract void setTermsOutput(IndexOutput out);
-
   public abstract class FieldWriter {
     public abstract boolean checkIndexTerm(BytesRef text, TermStats stats) throws IOException;
-    public abstract void finish() throws IOException;
+    public abstract void add(BytesRef text, TermStats stats, long termsFilePointer) throws IOException;
+    public abstract void finish(long termsFilePointer) throws IOException;
   }
 
-  public abstract FieldWriter addField(FieldInfo fieldInfo) throws IOException;
+  public abstract FieldWriter addField(FieldInfo fieldInfo, long termsFilePointer) throws IOException;
 
   public abstract void close() throws IOException;
 }

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/VariableGapTermsIndexReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/VariableGapTermsIndexReader.java?rev=1061453&r1=1061452&r2=1061453&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/VariableGapTermsIndexReader.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/VariableGapTermsIndexReader.java Thu Jan 20 18:53:55 2011
@@ -164,9 +164,6 @@ public class VariableGapTermsIndexReader
       this.fieldInfo = fieldInfo;
       this.indexStart = indexStart;
 
-      // We still create the indexReader when indexDivisor
-      // is -1, so that PrefixCodedTermsReader can call
-      // isIndexTerm for each field:
       if (indexDivisor > 0) {
         loadTermsIndex();
       }

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/VariableGapTermsIndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/VariableGapTermsIndexWriter.java?rev=1061453&r1=1061452&r2=1061453&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/VariableGapTermsIndexWriter.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/VariableGapTermsIndexWriter.java Thu Jan 20 18:53:55 2011
@@ -52,7 +52,6 @@ public class VariableGapTermsIndexWriter
 
   private final List<FSTFieldWriter> fields = new ArrayList<FSTFieldWriter>();
   private final FieldInfos fieldInfos; // unread
-  private IndexOutput termsOut;
   private final IndexTermSelector policy;
 
   /** @lucene.experimental */
@@ -60,6 +59,7 @@ public class VariableGapTermsIndexWriter
     // Called sequentially on every term being written,
     // returning true if this term should be indexed
     public abstract boolean isIndexTerm(BytesRef term, TermStats stats);
+    public abstract void newField(FieldInfo fieldInfo);
   }
 
   /** Same policy as {@link FixedGapTermsIndexWriter} */
@@ -83,6 +83,11 @@ public class VariableGapTermsIndexWriter
         return false;
       }
     }
+
+    @Override
+    public void newField(FieldInfo fieldInfo) {
+      count = interval;
+    }
   }
 
   /** Sets an index term when docFreq >= docFreqThresh, or
@@ -96,6 +101,9 @@ public class VariableGapTermsIndexWriter
     public EveryNOrDocFreqTermSelector(int docFreqThresh, int interval) {
       this.interval = interval;
       this.docFreqThresh = docFreqThresh;
+
+      // First term is first indexed term:
+      count = interval;
     }
 
     @Override
@@ -108,6 +116,11 @@ public class VariableGapTermsIndexWriter
         return false;
       }
     }
+
+    @Override
+    public void newField(FieldInfo fieldInfo) {
+      count = interval;
+    }
   }
 
   // TODO: it'd be nice to let the FST builder prune based
@@ -158,14 +171,10 @@ public class VariableGapTermsIndexWriter
   }
 
   @Override
-  public void setTermsOutput(IndexOutput termsOut) {
-    this.termsOut = termsOut;
-  }
-  
-  @Override
-  public FieldWriter addField(FieldInfo field) throws IOException {
-    //System.out.println("VGW: field=" + field.name);
-    FSTFieldWriter writer = new FSTFieldWriter(field);
+  public FieldWriter addField(FieldInfo field, long termsFilePointer) throws IOException {
+    ////System.out.println("VGW: field=" + field.name);
+    policy.newField(field);
+    FSTFieldWriter writer = new FSTFieldWriter(field, termsFilePointer);
     fields.add(writer);
     return writer;
   }
@@ -200,42 +209,48 @@ public class VariableGapTermsIndexWriter
     private final BytesRef lastTerm = new BytesRef();
     private boolean first = true;
 
-    public FSTFieldWriter(FieldInfo fieldInfo) throws IOException {
+    public FSTFieldWriter(FieldInfo fieldInfo, long termsFilePointer) throws IOException {
       this.fieldInfo = fieldInfo;
       fstOutputs = PositiveIntOutputs.getSingleton(true);
       fstBuilder = new Builder<Long>(FST.INPUT_TYPE.BYTE1,
                                      0, 0, true,
                                      fstOutputs);
       indexStart = out.getFilePointer();
-      //System.out.println("VGW: field=" + fieldInfo.name);
+      ////System.out.println("VGW: field=" + fieldInfo.name);
 
       // Always put empty string in
-      fstBuilder.add(new BytesRef(), fstOutputs.get(termsOut.getFilePointer()));
+      fstBuilder.add(new BytesRef(), fstOutputs.get(termsFilePointer));
     }
 
     @Override
     public boolean checkIndexTerm(BytesRef text, TermStats stats) throws IOException {
+      //System.out.println("VGW: index term=" + text.utf8ToString());
+      // NOTE: we must force the first term per field to be
+      // indexed, in case policy doesn't:
       if (policy.isIndexTerm(text, stats) || first) {
         first = false;
-        //System.out.println("VGW: index term=" + text.utf8ToString() + " fp=" + termsOut.getFilePointer());
-        final int lengthSave = text.length;
-        text.length = indexedTermPrefixLength(lastTerm, text);
-        try {
-          fstBuilder.add(text, fstOutputs.get(termsOut.getFilePointer()));
-        } finally {
-          text.length = lengthSave;
-        }
-        lastTerm.copy(text);
+        //System.out.println("  YES");
         return true;
       } else {
-        //System.out.println("VGW: not index term=" + text.utf8ToString() + " fp=" + termsOut.getFilePointer());
         lastTerm.copy(text);
         return false;
       }
     }
 
     @Override
-    public void finish() throws IOException {
+    public void add(BytesRef text, TermStats stats, long termsFilePointer) throws IOException {
+      final int lengthSave = text.length;
+      text.length = indexedTermPrefixLength(lastTerm, text);
+      try {
+        fstBuilder.add(text, fstOutputs.get(termsFilePointer));
+      } finally {
+        text.length = lengthSave;
+      }
+      lastTerm.copy(text);
+    }
+
+    @Override
+    public void finish(long termsFilePointer) throws IOException {
       fst = fstBuilder.finish();
       if (fst != null) {
         fst.save(out);

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/intblock/FixedIntBlockIndexInput.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/intblock/FixedIntBlockIndexInput.java?rev=1061453&r1=1061452&r2=1061453&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/intblock/FixedIntBlockIndexInput.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/intblock/FixedIntBlockIndexInput.java Thu Jan 20 18:53:55 2011
@@ -24,6 +24,7 @@ package org.apache.lucene.index.codecs.i
 import java.io.IOException;
 
 import org.apache.lucene.index.codecs.sep.IntIndexInput;
+import org.apache.lucene.store.DataInput;
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.util.IntsRef;
 
@@ -149,7 +150,7 @@ public abstract class FixedIntBlockIndex
     private int upto;
 
     @Override
-    public void read(final IndexInput indexIn, final boolean absolute) throws IOException {
+    public void read(final DataInput indexIn, final boolean absolute) throws IOException {
       if (absolute) {
         fp = indexIn.readVLong();
         upto = indexIn.readVInt();
@@ -205,5 +206,10 @@ public abstract class FixedIntBlockIndex
       other.upto = upto;
       return other;
     }
+    
+    @Override
+    public String toString() {
+      return "fp=" + fp + " upto=" + upto;
+    }
   }
 }

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/intblock/FixedIntBlockIndexOutput.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/intblock/FixedIntBlockIndexOutput.java?rev=1061453&r1=1061452&r2=1061453&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/intblock/FixedIntBlockIndexOutput.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/intblock/FixedIntBlockIndexOutput.java Thu Jan 20 18:53:55 2011
@@ -111,6 +111,11 @@ public abstract class FixedIntBlockIndex
       lastUpto = upto;
       lastFP = fp;
     }
+
+    @Override
+    public String toString() {
+      return "fp=" + fp + " upto=" + upto;
+    }
   }
 
   @Override

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/intblock/VariableIntBlockIndexInput.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/intblock/VariableIntBlockIndexInput.java?rev=1061453&r1=1061452&r2=1061453&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/intblock/VariableIntBlockIndexInput.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/intblock/VariableIntBlockIndexInput.java Thu Jan 20 18:53:55 2011
@@ -24,6 +24,7 @@ package org.apache.lucene.index.codecs.i
 import java.io.IOException;
 
 import org.apache.lucene.index.codecs.sep.IntIndexInput;
+import org.apache.lucene.store.DataInput;
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.util.IntsRef;
 
@@ -168,7 +169,7 @@ public abstract class VariableIntBlockIn
     private int upto;
 
     @Override
-    public void read(final IndexInput indexIn, final boolean absolute) throws IOException {
+    public void read(final DataInput indexIn, final boolean absolute) throws IOException {
       if (absolute) {
         fp = indexIn.readVLong();
         upto = indexIn.readByte()&0xFF;

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingCodec.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingCodec.java?rev=1061453&r1=1061452&r2=1061453&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingCodec.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingCodec.java Thu Jan 20 18:53:55 2011
@@ -32,8 +32,8 @@ import org.apache.lucene.index.codecs.Fi
 import org.apache.lucene.index.codecs.FieldsProducer;
 import org.apache.lucene.index.codecs.FixedGapTermsIndexReader;
 import org.apache.lucene.index.codecs.FixedGapTermsIndexWriter;
-import org.apache.lucene.index.codecs.PrefixCodedTermsReader;
-import org.apache.lucene.index.codecs.PrefixCodedTermsWriter;
+import org.apache.lucene.index.codecs.BlockTermsReader;
+import org.apache.lucene.index.codecs.BlockTermsWriter;
 import org.apache.lucene.index.codecs.TermsIndexReaderBase;
 import org.apache.lucene.index.codecs.TermsIndexWriterBase;
 import org.apache.lucene.index.codecs.standard.StandardCodec;
@@ -89,7 +89,7 @@ public class PulsingCodec extends Codec 
     // Terms dict
     success = false;
     try {
-      FieldsConsumer ret = new PrefixCodedTermsWriter(indexWriter, state, pulsingWriter, BytesRef.getUTF8SortedAsUnicodeComparator());
+      FieldsConsumer ret = new BlockTermsWriter(indexWriter, state, pulsingWriter, BytesRef.getUTF8SortedAsUnicodeComparator());
       success = true;
       return ret;
     } finally {
@@ -132,13 +132,13 @@ public class PulsingCodec extends Codec 
     // Terms dict reader
     success = false;
     try {
-      FieldsProducer ret = new PrefixCodedTermsReader(indexReader,
-                                                       state.dir, state.fieldInfos, state.segmentInfo.name,
-                                                       pulsingReader,
-                                                       state.readBufferSize,
-                                                       BytesRef.getUTF8SortedAsUnicodeComparator(),
-                                                       StandardCodec.TERMS_CACHE_SIZE,
-                                                       state.codecId);
+      FieldsProducer ret = new BlockTermsReader(indexReader,
+                                                state.dir, state.fieldInfos, state.segmentInfo.name,
+                                                pulsingReader,
+                                                state.readBufferSize,
+                                                BytesRef.getUTF8SortedAsUnicodeComparator(),
+                                                StandardCodec.TERMS_CACHE_SIZE,
+                                                state.codecId);
       success = true;
       return ret;
     } finally {
@@ -155,7 +155,7 @@ public class PulsingCodec extends Codec 
   @Override
   public void files(Directory dir, SegmentInfo segmentInfo, String id, Set<String> files) throws IOException {
     StandardPostingsReader.files(dir, segmentInfo, id, files);
-    PrefixCodedTermsReader.files(dir, segmentInfo, id, files);
+    BlockTermsReader.files(dir, segmentInfo, id, files);
     FixedGapTermsIndexReader.files(dir, segmentInfo, id, files);
   }