You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2014/01/29 17:21:50 UTC

svn commit: r1562497 [1/3] - in /lucene/dev/branches/branch_4x: ./ dev-tools/ lucene/ lucene/analysis/ lucene/benchmark/ lucene/classification/ lucene/codecs/ lucene/codecs/src/java/org/apache/lucene/codecs/appending/ lucene/codecs/src/java/org/apache/...

Author: mikemccand
Date: Wed Jan 29 16:21:48 2014
New Revision: 1562497

URL: http://svn.apache.org/r1562497
Log:
LUCENE-3069: port fully RAM-resident terms FST dictionary implementations to 4.x

Added:
    lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdPostingsFormat.java
      - copied unchanged from r1530520, lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdPostingsFormat.java
    lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdPulsing41PostingsFormat.java
      - copied unchanged from r1530520, lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdPulsing41PostingsFormat.java
    lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsReader.java
      - copied, changed from r1521173, lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsReader.java
    lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsWriter.java
      - copied unchanged from r1521173, lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsWriter.java
    lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTPostingsFormat.java
      - copied unchanged from r1530520, lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTPostingsFormat.java
    lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTPulsing41PostingsFormat.java
      - copied unchanged from r1530520, lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTPulsing41PostingsFormat.java
    lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermOutputs.java
      - copied unchanged from r1521173, lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermOutputs.java
    lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsReader.java
      - copied, changed from r1521173, lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsReader.java
    lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsWriter.java
      - copied unchanged from r1521173, lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsWriter.java
    lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/index.461.cfs.zip   (with props)
    lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/index.461.nocfs.zip   (with props)
Modified:
    lucene/dev/branches/branch_4x/   (props changed)
    lucene/dev/branches/branch_4x/dev-tools/   (props changed)
    lucene/dev/branches/branch_4x/lucene/   (props changed)
    lucene/dev/branches/branch_4x/lucene/CHANGES.txt   (contents, props changed)
    lucene/dev/branches/branch_4x/lucene/analysis/   (props changed)
    lucene/dev/branches/branch_4x/lucene/benchmark/   (props changed)
    lucene/dev/branches/branch_4x/lucene/build.xml   (props changed)
    lucene/dev/branches/branch_4x/lucene/classification/   (props changed)
    lucene/dev/branches/branch_4x/lucene/codecs/   (props changed)
    lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/appending/AppendingTermsReader.java
    lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsReader.java
    lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsWriter.java
    lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/intblock/FixedIntBlockIndexOutput.java
    lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/intblock/VariableIntBlockIndexOutput.java
    lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/memory/package.html
    lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsFormat.java
    lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsReader.java
    lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsWriter.java
    lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/sep/IntIndexOutput.java
    lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/sep/SepPostingsReader.java
    lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/sep/SepPostingsWriter.java
    lucene/dev/branches/branch_4x/lucene/codecs/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat
    lucene/dev/branches/branch_4x/lucene/common-build.xml   (props changed)
    lucene/dev/branches/branch_4x/lucene/core/   (props changed)
    lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/BlockTermState.java
    lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java
    lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java
    lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/PostingsReaderBase.java
    lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/PostingsWriterBase.java
    lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java
    lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsFormat.java
    lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsReader.java
    lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsWriter.java
    lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/store/RAMOutputStream.java
    lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/automaton/CompiledAutomaton.java
    lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/fst/FST.java
    lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/fst/NodeHash.java
    lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/fst/Outputs.java
    lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java
    lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterReader.java
    lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestTermsEnum.java
    lucene/dev/branches/branch_4x/lucene/demo/   (props changed)
    lucene/dev/branches/branch_4x/lucene/facet/   (props changed)
    lucene/dev/branches/branch_4x/lucene/grouping/   (props changed)
    lucene/dev/branches/branch_4x/lucene/highlighter/   (props changed)
    lucene/dev/branches/branch_4x/lucene/join/   (props changed)
    lucene/dev/branches/branch_4x/lucene/licenses/   (props changed)
    lucene/dev/branches/branch_4x/lucene/memory/   (props changed)
    lucene/dev/branches/branch_4x/lucene/misc/   (props changed)
    lucene/dev/branches/branch_4x/lucene/module-build.xml   (props changed)
    lucene/dev/branches/branch_4x/lucene/queries/   (props changed)
    lucene/dev/branches/branch_4x/lucene/queryparser/   (props changed)
    lucene/dev/branches/branch_4x/lucene/replicator/   (props changed)
    lucene/dev/branches/branch_4x/lucene/sandbox/   (props changed)
    lucene/dev/branches/branch_4x/lucene/site/   (props changed)
    lucene/dev/branches/branch_4x/lucene/spatial/   (props changed)
    lucene/dev/branches/branch_4x/lucene/suggest/   (props changed)
    lucene/dev/branches/branch_4x/lucene/test-framework/   (props changed)
    lucene/dev/branches/branch_4x/lucene/test-framework/src/java/org/apache/lucene/codecs/appending/AppendingTermsWriter.java
    lucene/dev/branches/branch_4x/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsWriter.java
    lucene/dev/branches/branch_4x/lucene/test-framework/src/java/org/apache/lucene/codecs/mockrandom/MockRandomPostingsFormat.java
    lucene/dev/branches/branch_4x/lucene/test-framework/src/java/org/apache/lucene/codecs/mocksep/MockSingleIntIndexOutput.java
    lucene/dev/branches/branch_4x/lucene/test-framework/src/java/org/apache/lucene/codecs/nestedpulsing/NestedPulsingPostingsFormat.java
    lucene/dev/branches/branch_4x/lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java
    lucene/dev/branches/branch_4x/lucene/test-framework/src/java/org/apache/lucene/util/TestRuleSetupAndRestoreClassEnv.java
    lucene/dev/branches/branch_4x/lucene/tools/   (props changed)
    lucene/dev/branches/branch_4x/solr/   (props changed)
    lucene/dev/branches/branch_4x/solr/CHANGES.txt   (props changed)
    lucene/dev/branches/branch_4x/solr/NOTICE.txt   (props changed)
    lucene/dev/branches/branch_4x/solr/build.xml   (props changed)
    lucene/dev/branches/branch_4x/solr/common-build.xml   (props changed)
    lucene/dev/branches/branch_4x/solr/contrib/   (props changed)
    lucene/dev/branches/branch_4x/solr/core/   (props changed)
    lucene/dev/branches/branch_4x/solr/example/   (props changed)
    lucene/dev/branches/branch_4x/solr/licenses/   (props changed)
    lucene/dev/branches/branch_4x/solr/site/   (props changed)
    lucene/dev/branches/branch_4x/solr/solrj/   (props changed)
    lucene/dev/branches/branch_4x/solr/test-framework/   (props changed)
    lucene/dev/branches/branch_4x/solr/webapp/   (props changed)

Modified: lucene/dev/branches/branch_4x/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/CHANGES.txt?rev=1562497&r1=1562496&r2=1562497&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/branch_4x/lucene/CHANGES.txt Wed Jan 29 16:21:48 2014
@@ -54,6 +54,12 @@ New Features
 * LUCENE-5415: Add multitermquery (wildcards,prefix,etc) to PostingsHighlighter.
   (Mike McCandless, Robert Muir)
 
+* LUCENE-3069: Add two memory resident dictionaries (FST terms dictionary and 
+  FSTOrd terms dictionary) to improve primary key lookups. The PostingsBaseFormat 
+  API is also changed so that term dictionaries get the ability to block 
+  encode term metadata, and all dictionary implementations can now plug in any 
+  PostingsBaseFormat. (Han Jiang, Mike McCandless)
+
 Build
 
 * LUCENE-5217: Maven config: get dependencies from Ant+Ivy config; disable
@@ -161,6 +167,11 @@ API Changes
   only accepts a ValueSource instead of a convenience ctor for an expression
   string. (Simon Willnauer)
 
+* LUCENE-3069: PostingsWriterBase and PostingsReaderBase are no longer
+  responsible for encoding/decoding a block of terms.  Instead, they
+  should encode/decode each term to/from a long[] and byte[].  (Han
+  Jiang, Mike McCandless)
+
 Optimizations
 
 * LUCENE-5372: Replace StringBuffer by StringBuilder, where possible.

Modified: lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/appending/AppendingTermsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/appending/AppendingTermsReader.java?rev=1562497&r1=1562496&r2=1562497&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/appending/AppendingTermsReader.java (original)
+++ lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/appending/AppendingTermsReader.java Wed Jan 29 16:21:48 2014
@@ -20,6 +20,7 @@ package org.apache.lucene.codecs.appendi
 import java.io.IOException;
 
 import org.apache.lucene.codecs.BlockTreeTermsReader;
+import org.apache.lucene.codecs.BlockTreeTermsWriter;
 import org.apache.lucene.codecs.CodecUtil;
 import org.apache.lucene.codecs.PostingsReaderBase;
 import org.apache.lucene.index.FieldInfos;
@@ -37,12 +38,7 @@ import org.apache.lucene.store.IndexInpu
 public class AppendingTermsReader extends BlockTreeTermsReader {
 
   final static String APPENDING_TERMS_CODEC_NAME = "APPENDING_TERMS_DICT";
-  final static int APPENDING_TERMS_VERSION_START = 0;
-  final static int APPENDING_TERMS_VERSION_CURRENT = APPENDING_TERMS_VERSION_START;
-  
   final static String APPENDING_TERMS_INDEX_CODEC_NAME = "APPENDING_TERMS_INDEX";
-  final static int APPENDING_TERMS_INDEX_VERSION_START = 0;
-  final static int APPENDING_TERMS_INDEX_VERSION_CURRENT = APPENDING_TERMS_INDEX_VERSION_START;
   
   public AppendingTermsReader(Directory dir, FieldInfos fieldInfos, SegmentInfo info, PostingsReaderBase postingsReader, 
       IOContext ioContext, String segmentSuffix, int indexDivisor) throws IOException {
@@ -52,15 +48,15 @@ public class AppendingTermsReader extend
   @Override
   protected int readHeader(IndexInput input) throws IOException {
     return CodecUtil.checkHeader(input, APPENDING_TERMS_CODEC_NAME,
-        APPENDING_TERMS_VERSION_START,
-        APPENDING_TERMS_VERSION_CURRENT);  
+                                 BlockTreeTermsWriter.TERMS_VERSION_START,
+                                 BlockTreeTermsWriter.TERMS_VERSION_CURRENT);
   }
 
   @Override
   protected int readIndexHeader(IndexInput input) throws IOException {
     return CodecUtil.checkHeader(input, APPENDING_TERMS_INDEX_CODEC_NAME,
-        APPENDING_TERMS_INDEX_VERSION_START,
-        APPENDING_TERMS_INDEX_VERSION_CURRENT);
+                                 BlockTreeTermsWriter.TERMS_INDEX_VERSION_START,
+                                 BlockTreeTermsWriter.TERMS_INDEX_VERSION_CURRENT);
   }
   
   @Override

Modified: lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsReader.java?rev=1562497&r1=1562496&r2=1562497&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsReader.java (original)
+++ lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsReader.java Wed Jan 29 16:21:48 2014
@@ -142,6 +142,7 @@ public class BlockTermsReader extends Fi
         final long sumTotalTermFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY ? -1 : in.readVLong();
         final long sumDocFreq = in.readVLong();
         final int docCount = in.readVInt();
+        final int longsSize = version >= BlockTermsWriter.VERSION_META_ARRAY ? in.readVInt() : 0;
         if (docCount < 0 || docCount > info.getDocCount()) { // #docs with field must be <= #docs
           throw new CorruptIndexException("invalid docCount: " + docCount + " maxDoc: " + info.getDocCount() + " (resource=" + in + ")");
         }
@@ -151,7 +152,7 @@ public class BlockTermsReader extends Fi
         if (sumTotalTermFreq != -1 && sumTotalTermFreq < sumDocFreq) { // #positions must be >= #postings
           throw new CorruptIndexException("invalid sumTotalTermFreq: " + sumTotalTermFreq + " sumDocFreq: " + sumDocFreq + " (resource=" + in + ")");
         }
-        FieldReader previous = fields.put(fieldInfo.name, new FieldReader(fieldInfo, numTerms, termsStartPointer, sumTotalTermFreq, sumDocFreq, docCount));
+        FieldReader previous = fields.put(fieldInfo.name, new FieldReader(fieldInfo, numTerms, termsStartPointer, sumTotalTermFreq, sumDocFreq, docCount, longsSize));
         if (previous != null) {
           throw new CorruptIndexException("duplicate fields: " + fieldInfo.name + " (resource=" + in + ")");
         }
@@ -230,8 +231,9 @@ public class BlockTermsReader extends Fi
     final long sumTotalTermFreq;
     final long sumDocFreq;
     final int docCount;
+    final int longsSize;
 
-    FieldReader(FieldInfo fieldInfo, long numTerms, long termsStartPointer, long sumTotalTermFreq, long sumDocFreq, int docCount) {
+    FieldReader(FieldInfo fieldInfo, long numTerms, long termsStartPointer, long sumTotalTermFreq, long sumDocFreq, int docCount, int longsSize) {
       assert numTerms > 0;
       this.fieldInfo = fieldInfo;
       this.numTerms = numTerms;
@@ -239,6 +241,7 @@ public class BlockTermsReader extends Fi
       this.sumTotalTermFreq = sumTotalTermFreq;
       this.sumDocFreq = sumDocFreq;
       this.docCount = docCount;
+      this.longsSize = longsSize;
     }
 
     @Override
@@ -336,6 +339,10 @@ public class BlockTermsReader extends Fi
       private final ByteArrayDataInput freqReader = new ByteArrayDataInput();
       private int metaDataUpto;
 
+      private long[] longs;
+      private byte[] bytes;
+      private ByteArrayDataInput bytesReader;
+
       public SegmentTermsEnum() throws IOException {
         in = BlockTermsReader.this.in.clone();
         in.seek(termsStartPointer);
@@ -349,6 +356,7 @@ public class BlockTermsReader extends Fi
         termSuffixes = new byte[128];
         docFreqBytes = new byte[64];
         //System.out.println("BTR.enum init this=" + this + " postingsReader=" + postingsReader);
+        longs = new long[longsSize];
       }
 
       @Override
@@ -801,11 +809,20 @@ public class BlockTermsReader extends Fi
         //System.out.println("  freq bytes len=" + len);
         in.readBytes(docFreqBytes, 0, len);
         freqReader.reset(docFreqBytes, 0, len);
-        metaDataUpto = 0;
 
-        state.termBlockOrd = 0;
+        // metadata
+        len = in.readVInt();
+        if (bytes == null) {
+          bytes = new byte[ArrayUtil.oversize(len, 1)];
+          bytesReader = new ByteArrayDataInput();
+        } else if (bytes.length < len) {
+          bytes = new byte[ArrayUtil.oversize(len, 1)];
+        }
+        in.readBytes(bytes, 0, len);
+        bytesReader.reset(bytes, 0, len);
 
-        postingsReader.readTermsBlock(in, fieldInfo, state);
+        metaDataUpto = 0;
+        state.termBlockOrd = 0;
 
         blocksSinceSeek++;
         indexIsCurrent = indexIsCurrent && (blocksSinceSeek < indexReader.getDivisor());
@@ -824,9 +841,7 @@ public class BlockTermsReader extends Fi
 
           // lazily catch up on metadata decode:
           final int limit = state.termBlockOrd;
-          // We must set/incr state.termCount because
-          // postings impl can look at this
-          state.termBlockOrd = metaDataUpto;
+          boolean absolute = metaDataUpto == 0;
           // TODO: better API would be "jump straight to term=N"???
           while (metaDataUpto < limit) {
             //System.out.println("  decode mdUpto=" + metaDataUpto);
@@ -838,16 +853,21 @@ public class BlockTermsReader extends Fi
 
             // TODO: if docFreq were bulk decoded we could
             // just skipN here:
+
+            // docFreq, totalTermFreq
             state.docFreq = freqReader.readVInt();
             //System.out.println("    dF=" + state.docFreq);
             if (fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY) {
               state.totalTermFreq = state.docFreq + freqReader.readVLong();
               //System.out.println("    totTF=" + state.totalTermFreq);
             }
-
-            postingsReader.nextTerm(fieldInfo, state);
+            // metadata
+            for (int i = 0; i < longs.length; i++) {
+              longs[i] = bytesReader.readVLong();
+            }
+            postingsReader.decodeTerm(longs, bytesReader, fieldInfo, state, absolute);
             metaDataUpto++;
-            state.termBlockOrd++;
+            absolute = false;
           }
         } else {
           //System.out.println("  skip! seekPending");

Modified: lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsWriter.java?rev=1562497&r1=1562496&r2=1562497&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsWriter.java (original)
+++ lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsWriter.java Wed Jan 29 16:21:48 2014
@@ -27,6 +27,7 @@ import org.apache.lucene.codecs.FieldsCo
 import org.apache.lucene.codecs.PostingsConsumer;
 import org.apache.lucene.codecs.PostingsWriterBase;
 import org.apache.lucene.codecs.TermStats;
+import org.apache.lucene.codecs.BlockTermState;
 import org.apache.lucene.codecs.TermsConsumer;
 import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.FieldInfo.IndexOptions;
@@ -59,7 +60,8 @@ public class BlockTermsWriter extends Fi
   // Initial format
   public static final int VERSION_START = 0;
   public static final int VERSION_APPEND_ONLY = 1;
-  public static final int VERSION_CURRENT = VERSION_APPEND_ONLY;
+  public static final int VERSION_META_ARRAY = 2;
+  public static final int VERSION_CURRENT = VERSION_META_ARRAY;
 
   /** Extension of terms file */
   static final String TERMS_EXTENSION = "tib";
@@ -77,8 +79,9 @@ public class BlockTermsWriter extends Fi
     public final long sumTotalTermFreq;
     public final long sumDocFreq;
     public final int docCount;
+    public final int longsSize;
 
-    public FieldMetaData(FieldInfo fieldInfo, long numTerms, long termsStartPointer, long sumTotalTermFreq, long sumDocFreq, int docCount) {
+    public FieldMetaData(FieldInfo fieldInfo, long numTerms, long termsStartPointer, long sumTotalTermFreq, long sumDocFreq, int docCount, int longsSize) {
       assert numTerms > 0;
       this.fieldInfo = fieldInfo;
       this.termsStartPointer = termsStartPointer;
@@ -86,6 +89,7 @@ public class BlockTermsWriter extends Fi
       this.sumTotalTermFreq = sumTotalTermFreq;
       this.sumDocFreq = sumDocFreq;
       this.docCount = docCount;
+      this.longsSize = longsSize;
     }
   }
 
@@ -109,7 +113,7 @@ public class BlockTermsWriter extends Fi
       
       //System.out.println("BTW.init seg=" + state.segmentName);
       
-      postingsWriter.start(out); // have consumer write its format/header
+      postingsWriter.init(out); // have consumer write its format/header
       success = true;
     } finally {
       if (!success) {
@@ -133,9 +137,7 @@ public class BlockTermsWriter extends Fi
 
   @Override
   public void close() throws IOException {
-
     try {
-      
       final long dirStart = out.getFilePointer();
 
       out.writeVInt(fields.size());
@@ -148,6 +150,9 @@ public class BlockTermsWriter extends Fi
         }
         out.writeVLong(field.sumDocFreq);
         out.writeVInt(field.docCount);
+        if (VERSION_CURRENT >= VERSION_META_ARRAY) {
+          out.writeVInt(field.longsSize);
+        }
       }
       writeTrailer(dirStart);
     } finally {
@@ -161,7 +166,7 @@ public class BlockTermsWriter extends Fi
   
   private static class TermEntry {
     public final BytesRef term = new BytesRef();
-    public TermStats stats;
+    public BlockTermState state;
   }
 
   class TermsWriter extends TermsConsumer {
@@ -173,6 +178,7 @@ public class BlockTermsWriter extends Fi
     long sumTotalTermFreq;
     long sumDocFreq;
     int docCount;
+    int longsSize;
 
     private TermEntry[] pendingTerms;
 
@@ -190,8 +196,8 @@ public class BlockTermsWriter extends Fi
         pendingTerms[i] = new TermEntry();
       }
       termsStartPointer = out.getFilePointer();
-      postingsWriter.setField(fieldInfo);
       this.postingsWriter = postingsWriter;
+      this.longsSize = postingsWriter.setField(fieldInfo);
     }
     
     @Override
@@ -237,11 +243,12 @@ public class BlockTermsWriter extends Fi
       }
       final TermEntry te = pendingTerms[pendingCount];
       te.term.copyBytes(text);
-      te.stats = stats;
+      te.state = postingsWriter.newTermState();
+      te.state.docFreq = stats.docFreq;
+      te.state.totalTermFreq = stats.totalTermFreq;
+      postingsWriter.finishTerm(te.state);
 
       pendingCount++;
-
-      postingsWriter.finishTerm(stats);
       numTerms++;
     }
 
@@ -264,7 +271,8 @@ public class BlockTermsWriter extends Fi
                                      termsStartPointer,
                                      sumTotalTermFreq,
                                      sumDocFreq,
-                                     docCount));
+                                     docCount,
+                                     longsSize));
       }
     }
 
@@ -285,6 +293,7 @@ public class BlockTermsWriter extends Fi
     }
 
     private final RAMOutputStream bytesWriter = new RAMOutputStream();
+    private final RAMOutputStream bufferWriter = new RAMOutputStream();
 
     private void flushBlock() throws IOException {
       //System.out.println("BTW.flushBlock seg=" + segment + " pendingCount=" + pendingCount + " fp=" + out.getFilePointer());
@@ -318,19 +327,34 @@ public class BlockTermsWriter extends Fi
       // TODO: cutover to better intblock codec.  simple64?
       // write prefix, suffix first:
       for(int termCount=0;termCount<pendingCount;termCount++) {
-        final TermStats stats = pendingTerms[termCount].stats;
-        assert stats != null;
-        bytesWriter.writeVInt(stats.docFreq);
+        final BlockTermState state = pendingTerms[termCount].state;
+        assert state != null;
+        bytesWriter.writeVInt(state.docFreq);
         if (fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY) {
-          bytesWriter.writeVLong(stats.totalTermFreq-stats.docFreq);
+          bytesWriter.writeVLong(state.totalTermFreq-state.docFreq);
         }
       }
+      out.writeVInt((int) bytesWriter.getFilePointer());
+      bytesWriter.writeTo(out);
+      bytesWriter.reset();
 
+      // 4th pass: write the metadata 
+      long[] longs = new long[longsSize];
+      boolean absolute = true;
+      for(int termCount=0;termCount<pendingCount;termCount++) {
+        final BlockTermState state = pendingTerms[termCount].state;
+        postingsWriter.encodeTerm(longs, bufferWriter, fieldInfo, state, absolute);
+        for (int i = 0; i < longsSize; i++) {
+          bytesWriter.writeVLong(longs[i]);
+        }
+        bufferWriter.writeTo(bytesWriter);
+        bufferWriter.reset();
+        absolute = false;
+      }
       out.writeVInt((int) bytesWriter.getFilePointer());
       bytesWriter.writeTo(out);
       bytesWriter.reset();
 
-      postingsWriter.flushTermsBlock(pendingCount, pendingCount);
       lastPrevTerm.copyBytes(pendingTerms[pendingCount-1].term);
       pendingCount = 0;
     }

Modified: lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/intblock/FixedIntBlockIndexOutput.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/intblock/FixedIntBlockIndexOutput.java?rev=1562497&r1=1562496&r2=1562497&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/intblock/FixedIntBlockIndexOutput.java (original)
+++ lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/intblock/FixedIntBlockIndexOutput.java Wed Jan 29 16:21:48 2014
@@ -24,6 +24,7 @@ package org.apache.lucene.codecs.intbloc
 import java.io.IOException;
 
 import org.apache.lucene.codecs.sep.IntIndexOutput;
+import org.apache.lucene.store.DataOutput;
 import org.apache.lucene.store.IndexOutput;
 
 /** Abstract base class that writes fixed-size blocks of ints
@@ -51,7 +52,7 @@ public abstract class FixedIntBlockIndex
   protected abstract void flushBlock() throws IOException;
 
   @Override
-  public IntIndexOutput.Index index() throws IOException {
+  public IntIndexOutput.Index index() {
     return new Index();
   }
 
@@ -79,7 +80,7 @@ public abstract class FixedIntBlockIndex
     }
 
     @Override
-    public void write(IndexOutput indexOut, boolean absolute) throws IOException {
+    public void write(DataOutput indexOut, boolean absolute) throws IOException {
       if (absolute) {
         indexOut.writeVInt(upto);
         indexOut.writeVLong(fp);

Modified: lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/intblock/VariableIntBlockIndexOutput.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/intblock/VariableIntBlockIndexOutput.java?rev=1562497&r1=1562496&r2=1562497&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/intblock/VariableIntBlockIndexOutput.java (original)
+++ lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/intblock/VariableIntBlockIndexOutput.java Wed Jan 29 16:21:48 2014
@@ -24,6 +24,7 @@ package org.apache.lucene.codecs.intbloc
 import java.io.IOException;
 
 import org.apache.lucene.codecs.sep.IntIndexOutput;
+import org.apache.lucene.store.DataOutput;
 import org.apache.lucene.store.IndexOutput;
 
 // TODO: much of this can be shared code w/ the fixed case
@@ -60,7 +61,7 @@ public abstract class VariableIntBlockIn
   protected abstract int add(int value) throws IOException;
 
   @Override
-  public IntIndexOutput.Index index() throws IOException {
+  public IntIndexOutput.Index index() {
     return new Index();
   }
 
@@ -88,7 +89,7 @@ public abstract class VariableIntBlockIn
     }
 
     @Override
-    public void write(IndexOutput indexOut, boolean absolute) throws IOException {
+    public void write(DataOutput indexOut, boolean absolute) throws IOException {
       assert upto >= 0;
       if (absolute) {
         indexOut.writeVInt(upto);

Copied: lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsReader.java (from r1521173, lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsReader.java)
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsReader.java?p2=lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsReader.java&p1=lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsReader.java&r1=1521173&r2=1562497&rev=1562497&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsReader.java (original)
+++ lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsReader.java Wed Jan 29 16:21:48 2014
@@ -18,16 +18,19 @@ package org.apache.lucene.codecs.memory;
  */
 
 import java.io.IOException;
-import java.io.PrintWriter;
-import java.io.File;
-import java.util.Arrays;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.BitSet;
 import java.util.Collections;
 import java.util.Comparator;
 import java.util.Iterator;
 import java.util.TreeMap;
 
+import org.apache.lucene.codecs.BlockTermState;
+import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.codecs.FieldsProducer;
+import org.apache.lucene.codecs.PostingsReaderBase;
+import org.apache.lucene.codecs.memory.FSTTermsReader.TermsReader;
 import org.apache.lucene.index.CorruptIndexException;
 import org.apache.lucene.index.DocsAndPositionsEnum;
 import org.apache.lucene.index.DocsEnum;
@@ -41,25 +44,20 @@ import org.apache.lucene.index.TermState
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.store.ByteArrayDataInput;
-import org.apache.lucene.store.IOContext;
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.util.ArrayUtil;
-import org.apache.lucene.util.automaton.ByteRunAutomaton;
-import org.apache.lucene.util.automaton.CompiledAutomaton;
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.RamUsageEstimator;
-import org.apache.lucene.util.fst.BytesRefFSTEnum;
+import org.apache.lucene.util.automaton.ByteRunAutomaton;
+import org.apache.lucene.util.automaton.CompiledAutomaton;
 import org.apache.lucene.util.fst.BytesRefFSTEnum.InputOutput;
+import org.apache.lucene.util.fst.BytesRefFSTEnum;
 import org.apache.lucene.util.fst.FST;
 import org.apache.lucene.util.fst.Outputs;
 import org.apache.lucene.util.fst.PositiveIntOutputs;
 import org.apache.lucene.util.fst.Util;
-import org.apache.lucene.codecs.BlockTermState;
-import org.apache.lucene.codecs.FieldsProducer;
-import org.apache.lucene.codecs.PostingsReaderBase;
-import org.apache.lucene.codecs.CodecUtil;
 
 /** 
  * FST-based terms dictionary reader.
@@ -214,6 +212,7 @@ public class FSTOrdTermsReader extends F
       return BytesRef.getUTF8SortedAsUnicodeComparator();
     }
 
+    @Override
     public boolean hasFreqs() {
       return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
     }
@@ -311,6 +310,11 @@ public class FSTOrdTermsReader extends F
         }
       }
 
+      @Override
+      public Comparator<BytesRef> getComparator() {
+        return BytesRef.getUTF8SortedAsUnicodeComparator();
+      }
+
       /** Decodes stats data into term state */
       void decodeStats() throws IOException {
         final int upto = (int)ord % INTERVAL;
@@ -376,11 +380,6 @@ public class FSTOrdTermsReader extends F
       }
 
       @Override
-      public Comparator<BytesRef> getComparator() {
-        return BytesRef.getUTF8SortedAsUnicodeComparator();
-      }
-
-      @Override
       public TermState termState() throws IOException {
         decodeMetaData();
         return state.clone();
@@ -817,4 +816,19 @@ public class FSTOrdTermsReader extends F
       }
     }
   }
+  
+  @Override
+  public long ramBytesUsed() {
+    long ramBytesUsed = 0;
+    for (TermsReader r : fields.values()) {
+      if (r.index != null) {
+        ramBytesUsed += r.index.sizeInBytes();
+        ramBytesUsed += RamUsageEstimator.sizeOf(r.metaBytesBlock);
+        ramBytesUsed += RamUsageEstimator.sizeOf(r.metaLongsBlock);
+        ramBytesUsed += RamUsageEstimator.sizeOf(r.skipInfo);
+        ramBytesUsed += RamUsageEstimator.sizeOf(r.statsBlock);
+      }
+    }
+    return ramBytesUsed;
+  }
 }

Copied: lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsReader.java (from r1521173, lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsReader.java)
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsReader.java?p2=lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsReader.java&p1=lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsReader.java&r1=1521173&r2=1562497&rev=1562497&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsReader.java (original)
+++ lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsReader.java Wed Jan 29 16:21:48 2014
@@ -18,8 +18,6 @@ package org.apache.lucene.codecs.memory;
  */
 
 import java.io.IOException;
-import java.io.PrintWriter;
-import java.io.File;
 import java.util.ArrayList;
 import java.util.BitSet;
 import java.util.Collections;
@@ -27,6 +25,10 @@ import java.util.Comparator;
 import java.util.Iterator;
 import java.util.TreeMap;
 
+import org.apache.lucene.codecs.BlockTermState;
+import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.codecs.FieldsProducer;
+import org.apache.lucene.codecs.PostingsReaderBase;
 import org.apache.lucene.index.CorruptIndexException;
 import org.apache.lucene.index.DocsAndPositionsEnum;
 import org.apache.lucene.index.DocsEnum;
@@ -40,24 +42,19 @@ import org.apache.lucene.index.TermState
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.store.ByteArrayDataInput;
-import org.apache.lucene.store.IOContext;
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.util.ArrayUtil;
-import org.apache.lucene.util.automaton.ByteRunAutomaton;
-import org.apache.lucene.util.automaton.CompiledAutomaton;
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.RamUsageEstimator;
-import org.apache.lucene.util.fst.BytesRefFSTEnum;
+import org.apache.lucene.util.automaton.ByteRunAutomaton;
+import org.apache.lucene.util.automaton.CompiledAutomaton;
 import org.apache.lucene.util.fst.BytesRefFSTEnum.InputOutput;
+import org.apache.lucene.util.fst.BytesRefFSTEnum;
 import org.apache.lucene.util.fst.FST;
 import org.apache.lucene.util.fst.Outputs;
 import org.apache.lucene.util.fst.Util;
-import org.apache.lucene.codecs.BlockTermState;
-import org.apache.lucene.codecs.FieldsProducer;
-import org.apache.lucene.codecs.PostingsReaderBase;
-import org.apache.lucene.codecs.CodecUtil;
 
 /**
  * FST-based terms dictionary reader.
@@ -185,6 +182,11 @@ public class FSTTermsReader extends Fiel
     }
 
     @Override
+    public boolean hasFreqs() {
+      return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
+    }
+
+    @Override
     public boolean hasOffsets() {
       return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
     }
@@ -252,11 +254,6 @@ public class FSTTermsReader extends Fiel
       }
 
       @Override
-      public Comparator<BytesRef> getComparator() {
-        return BytesRef.getUTF8SortedAsUnicodeComparator();
-      }
-
-      @Override
       public TermState termState() throws IOException {
         decodeMetaData();
         return state.clone();
@@ -322,6 +319,11 @@ public class FSTTermsReader extends Fiel
         this.meta = null;
       }
 
+      @Override
+      public Comparator<BytesRef> getComparator() {
+        return BytesRef.getUTF8SortedAsUnicodeComparator();
+      }
+
       // Let PBF decode metadata from long[] and byte[]
       @Override
       void decodeMetaData() throws IOException {
@@ -462,6 +464,11 @@ public class FSTTermsReader extends Fiel
       }
 
       @Override
+      public Comparator<BytesRef> getComparator() {
+        return BytesRef.getUTF8SortedAsUnicodeComparator();
+      }
+
+      @Override
       void decodeMetaData() throws IOException {
         assert term != null;
         if (!decoded) {
@@ -729,4 +736,13 @@ public class FSTTermsReader extends Fiel
       }
     }
   }
+
+  @Override
+  public long ramBytesUsed() {
+    long ramBytesUsed = 0;
+    for (TermsReader r : fields.values()) {
+      ramBytesUsed += r.dict == null ? 0 : r.dict.sizeInBytes();
+    }
+    return ramBytesUsed;
+  }
 }

Modified: lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/memory/package.html
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/memory/package.html?rev=1562497&r1=1562496&r2=1562497&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/memory/package.html (original)
+++ lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/memory/package.html Wed Jan 29 16:21:48 2014
@@ -20,6 +20,6 @@
    <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
 </head>
 <body>
-Postings and DocValues formats that are read entirely into memory.
+Term dictionary, DocValues or Postings formats that are read entirely into memory.
 </body>
-</html>
\ No newline at end of file
+</html>

Modified: lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsFormat.java?rev=1562497&r1=1562496&r2=1562497&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsFormat.java (original)
+++ lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsFormat.java Wed Jan 29 16:21:48 2014
@@ -79,7 +79,7 @@ public abstract class PulsingPostingsFor
 
       // Terms that have <= freqCutoff number of docs are
       // "pulsed" (inlined):
-      pulsingWriter = new PulsingPostingsWriter(freqCutoff, docsWriter);
+      pulsingWriter = new PulsingPostingsWriter(state, freqCutoff, docsWriter);
       FieldsConsumer ret = new BlockTreeTermsWriter(state, pulsingWriter, minBlockSize, maxBlockSize);
       success = true;
       return ret;
@@ -98,7 +98,7 @@ public abstract class PulsingPostingsFor
     boolean success = false;
     try {
       docsReader = wrappedPostingsBaseFormat.postingsReaderBase(state);
-      pulsingReader = new PulsingPostingsReader(docsReader);
+      pulsingReader = new PulsingPostingsReader(state, docsReader);
       FieldsProducer ret = new BlockTreeTermsReader(
                                                     state.directory, state.fieldInfos, state.segmentInfo,
                                                     pulsingReader,

Modified: lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsReader.java?rev=1562497&r1=1562496&r2=1562497&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsReader.java (original)
+++ lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsReader.java Wed Jan 29 16:21:48 2014
@@ -20,16 +20,20 @@ package org.apache.lucene.codecs.pulsing
 import java.io.IOException;
 import java.util.IdentityHashMap;
 import java.util.Map;
+import java.util.TreeMap;
 
 import org.apache.lucene.codecs.BlockTermState;
 import org.apache.lucene.codecs.CodecUtil;
 import org.apache.lucene.codecs.PostingsReaderBase;
 import org.apache.lucene.index.DocsAndPositionsEnum;
 import org.apache.lucene.index.DocsEnum;
+import org.apache.lucene.index.IndexFileNames;
 import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.FieldInfo.IndexOptions;
+import org.apache.lucene.index.SegmentReadState;
 import org.apache.lucene.index.TermState;
 import org.apache.lucene.store.ByteArrayDataInput;
+import org.apache.lucene.store.DataInput;
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.Attribute;
@@ -37,6 +41,7 @@ import org.apache.lucene.util.AttributeI
 import org.apache.lucene.util.AttributeSource;
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.IOUtils;
 
 /** Concrete class that reads the current doc/freq/skip
  *  postings format 
@@ -50,28 +55,53 @@ public class PulsingPostingsReader exten
 
   // Fallback reader for non-pulsed terms:
   final PostingsReaderBase wrappedPostingsReader;
+  final SegmentReadState segmentState;
   int maxPositions;
+  int version;
+  TreeMap<Integer, Integer> fields;
 
-  public PulsingPostingsReader(PostingsReaderBase wrappedPostingsReader) {
+  public PulsingPostingsReader(SegmentReadState state, PostingsReaderBase wrappedPostingsReader) {
     this.wrappedPostingsReader = wrappedPostingsReader;
+    this.segmentState = state;
   }
 
   @Override
   public void init(IndexInput termsIn) throws IOException {
-    CodecUtil.checkHeader(termsIn, PulsingPostingsWriter.CODEC,
-      PulsingPostingsWriter.VERSION_START, PulsingPostingsWriter.VERSION_START);
+    version = CodecUtil.checkHeader(termsIn, PulsingPostingsWriter.CODEC,
+                                    PulsingPostingsWriter.VERSION_START, 
+                                    PulsingPostingsWriter.VERSION_CURRENT);
     maxPositions = termsIn.readVInt();
     wrappedPostingsReader.init(termsIn);
+    if (wrappedPostingsReader instanceof PulsingPostingsReader || 
+        version < PulsingPostingsWriter.VERSION_META_ARRAY) {
+      fields = null;
+    } else {
+      fields = new TreeMap<Integer, Integer>();
+      String summaryFileName = IndexFileNames.segmentFileName(segmentState.segmentInfo.name, segmentState.segmentSuffix, PulsingPostingsWriter.SUMMARY_EXTENSION);
+      IndexInput in = null;
+      try { 
+        in = segmentState.directory.openInput(summaryFileName, segmentState.context);
+        CodecUtil.checkHeader(in, PulsingPostingsWriter.CODEC, version, 
+                              PulsingPostingsWriter.VERSION_CURRENT);
+        int numField = in.readVInt();
+        for (int i = 0; i < numField; i++) {
+          int fieldNum = in.readVInt();
+          int longsSize = in.readVInt();
+          fields.put(fieldNum, longsSize);
+        }
+      } finally {
+        IOUtils.closeWhileHandlingException(in);
+      }
+    }
   }
 
   private static class PulsingTermState extends BlockTermState {
+    private boolean absolute = false;
+    private long[] longs;
     private byte[] postings;
     private int postingsSize;                     // -1 if this term was not inlined
     private BlockTermState wrappedTermState;
 
-    ByteArrayDataInput inlinedBytesReader;
-    private byte[] inlinedBytes;
-
     @Override
     public PulsingTermState clone() {
       PulsingTermState clone;
@@ -82,6 +112,11 @@ public class PulsingPostingsReader exten
       } else {
         assert wrappedTermState != null;
         clone.wrappedTermState = (BlockTermState) wrappedTermState.clone();
+        clone.absolute = absolute;
+        if (longs != null) {
+          clone.longs = new long[longs.length];
+          System.arraycopy(longs, 0, clone.longs, 0, longs.length);
+        }
       }
       return clone;
     }
@@ -99,11 +134,6 @@ public class PulsingPostingsReader exten
       } else {
         wrappedTermState.copyFrom(other.wrappedTermState);
       }
-
-      // NOTE: we do not copy the
-      // inlinedBytes/inlinedBytesReader; these are only
-      // stored on the "primary" TermState.  They are
-      // "transient" to cloned term states.
     }
 
     @Override
@@ -117,25 +147,6 @@ public class PulsingPostingsReader exten
   }
 
   @Override
-  public void readTermsBlock(IndexInput termsIn, FieldInfo fieldInfo, BlockTermState _termState) throws IOException {
-    //System.out.println("PR.readTermsBlock state=" + _termState);
-    final PulsingTermState termState = (PulsingTermState) _termState;
-    if (termState.inlinedBytes == null) {
-      termState.inlinedBytes = new byte[128];
-      termState.inlinedBytesReader = new ByteArrayDataInput();
-    }
-    int len = termsIn.readVInt();
-    //System.out.println("  len=" + len + " fp=" + termsIn.getFilePointer());
-    if (termState.inlinedBytes.length < len) {
-      termState.inlinedBytes = new byte[ArrayUtil.oversize(len, 1)];
-    }
-    termsIn.readBytes(termState.inlinedBytes, 0, len);
-    termState.inlinedBytesReader.reset(termState.inlinedBytes);
-    termState.wrappedTermState.termBlockOrd = 0;
-    wrappedPostingsReader.readTermsBlock(termsIn, fieldInfo, termState.wrappedTermState);
-  }
-
-  @Override
   public BlockTermState newTermState() throws IOException {
     PulsingTermState state = new PulsingTermState();
     state.wrappedTermState = wrappedPostingsReader.newTermState();
@@ -143,20 +154,20 @@ public class PulsingPostingsReader exten
   }
 
   @Override
-  public void nextTerm(FieldInfo fieldInfo, BlockTermState _termState) throws IOException {
+  public void decodeTerm(long[] empty, DataInput in, FieldInfo fieldInfo, BlockTermState _termState, boolean absolute) throws IOException {
     //System.out.println("PR nextTerm");
     PulsingTermState termState = (PulsingTermState) _termState;
-
+    assert empty.length == 0;
+    termState.absolute = termState.absolute || absolute;
     // if we have positions, its total TF, otherwise its computed based on docFreq.
     long count = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0 ? termState.totalTermFreq : termState.docFreq;
     //System.out.println("  count=" + count + " threshold=" + maxPositions);
 
     if (count <= maxPositions) {
-
       // Inlined into terms dict -- just read the byte[] blob in,
       // but don't decode it now (we only decode when a DocsEnum
       // or D&PEnum is pulled):
-      termState.postingsSize = termState.inlinedBytesReader.readVInt();
+      termState.postingsSize = in.readVInt();
       if (termState.postings == null || termState.postings.length < termState.postingsSize) {
         termState.postings = new byte[ArrayUtil.oversize(termState.postingsSize, 1)];
       }
@@ -164,16 +175,23 @@ public class PulsingPostingsReader exten
       // (the blob holding all inlined terms' blobs for
       // current term block) into another byte[] (just the
       // blob for this term)...
-      termState.inlinedBytesReader.readBytes(termState.postings, 0, termState.postingsSize);
+      in.readBytes(termState.postings, 0, termState.postingsSize);
       //System.out.println("  inlined bytes=" + termState.postingsSize);
+      termState.absolute = termState.absolute || absolute;
     } else {
       //System.out.println("  not inlined");
+      final int longsSize = fields == null ? 0 : fields.get(fieldInfo.number);
+      if (termState.longs == null) {
+        termState.longs = new long[longsSize];
+      }
+      for (int i = 0; i < longsSize; i++) {
+        termState.longs[i] = in.readVLong();
+      }
       termState.postingsSize = -1;
-      // TODO: should we do full copyFrom?  much heavier...?
       termState.wrappedTermState.docFreq = termState.docFreq;
       termState.wrappedTermState.totalTermFreq = termState.totalTermFreq;
-      wrappedPostingsReader.nextTerm(fieldInfo, termState.wrappedTermState);
-      termState.wrappedTermState.termBlockOrd++;
+      wrappedPostingsReader.decodeTerm(termState.longs, in, fieldInfo, termState.wrappedTermState, termState.absolute);
+      termState.absolute = false;
     }
   }
 

Modified: lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsWriter.java?rev=1562497&r1=1562496&r2=1562497&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsWriter.java (original)
+++ lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsWriter.java Wed Jan 29 16:21:48 2014
@@ -21,14 +21,19 @@ import java.io.IOException;
 import java.util.List;
 import java.util.ArrayList;
 
+import org.apache.lucene.codecs.BlockTermState;
 import org.apache.lucene.codecs.CodecUtil;
 import org.apache.lucene.codecs.PostingsWriterBase;
 import org.apache.lucene.codecs.TermStats;
 import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.FieldInfo.IndexOptions;
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.store.DataOutput;
 import org.apache.lucene.store.IndexOutput;
 import org.apache.lucene.store.RAMOutputStream;
 import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.IOUtils;
 
 // TODO: we now inline based on total TF of the term,
 // but it might be better to inline by "net bytes used"
@@ -49,26 +54,43 @@ public final class PulsingPostingsWriter
 
   final static String CODEC = "PulsedPostingsWriter";
 
+  // recording field summary
+  final static String SUMMARY_EXTENSION = "smy";
+
   // To add a new version, increment from the last one, and
   // change VERSION_CURRENT to point to your new version:
   final static int VERSION_START = 0;
 
-  final static int VERSION_CURRENT = VERSION_START;
+  final static int VERSION_META_ARRAY = 1;
+
+  final static int VERSION_CURRENT = VERSION_META_ARRAY;
 
+  private SegmentWriteState segmentState;
   private IndexOutput termsOut;
 
+  private List<FieldMetaData> fields;
+
   private IndexOptions indexOptions;
   private boolean storePayloads;
 
-  private static class PendingTerm {
-    private final byte[] bytes;
-    public PendingTerm(byte[] bytes) {
-      this.bytes = bytes;
+  // information for wrapped PF, in current field
+  private int longsSize;
+  private long[] longs;
+  boolean absolute;
+
+  private static class PulsingTermState extends BlockTermState {
+    private byte[] bytes;
+    private BlockTermState wrappedState;
+    @Override
+    public String toString() {
+      if (bytes != null) {
+        return "inlined";
+      } else {
+        return "not inlined wrapped=" + wrappedState;
+      }
     }
   }
 
-  private final List<PendingTerm> pendingTerms = new ArrayList<PendingTerm>();
-
   // one entry per position
   private final Position[] pending;
   private int pendingCount = 0;                           // -1 once we've hit too many positions
@@ -83,6 +105,15 @@ public final class PulsingPostingsWriter
     int endOffset;
   }
 
+  private static final class FieldMetaData {
+    int fieldNumber;
+    int longsSize;
+    FieldMetaData(int number, int size) {
+      fieldNumber = number;
+      longsSize = size;
+    }
+  }
+
   // TODO: -- lazy init this?  ie, if every single term
   // was inlined (eg for a "primary key" field) then we
   // never need to use this fallback?  Fallback writer for
@@ -92,23 +123,33 @@ public final class PulsingPostingsWriter
   /** If the total number of positions (summed across all docs
    *  for this term) is <= maxPositions, then the postings are
    *  inlined into terms dict */
-  public PulsingPostingsWriter(int maxPositions, PostingsWriterBase wrappedPostingsWriter) {
+  public PulsingPostingsWriter(SegmentWriteState state, int maxPositions, PostingsWriterBase wrappedPostingsWriter) {
+
     pending = new Position[maxPositions];
     for(int i=0;i<maxPositions;i++) {
       pending[i] = new Position();
     }
+    fields = new ArrayList<FieldMetaData>();
 
     // We simply wrap another postings writer, but only call
     // on it when tot positions is >= the cutoff:
     this.wrappedPostingsWriter = wrappedPostingsWriter;
+    this.segmentState = state;
   }
 
   @Override
-  public void start(IndexOutput termsOut) throws IOException {
+  public void init(IndexOutput termsOut) throws IOException {
     this.termsOut = termsOut;
     CodecUtil.writeHeader(termsOut, CODEC, VERSION_CURRENT);
     termsOut.writeVInt(pending.length); // encode maxPositions in header
-    wrappedPostingsWriter.start(termsOut);
+    wrappedPostingsWriter.init(termsOut);
+  }
+
+  @Override
+  public BlockTermState newTermState() throws IOException {
+    PulsingTermState state = new PulsingTermState();
+    state.wrappedState = wrappedPostingsWriter.newTermState();
+    return state;
   }
 
   @Override
@@ -123,11 +164,15 @@ public final class PulsingPostingsWriter
   // Currently, this instance is re-used across fields, so
   // our parent calls setField whenever the field changes
   @Override
-  public void setField(FieldInfo fieldInfo) {
+  public int setField(FieldInfo fieldInfo) {
     this.indexOptions = fieldInfo.getIndexOptions();
     //if (DEBUG) System.out.println("PW field=" + fieldInfo.name + " indexOptions=" + indexOptions);
     storePayloads = fieldInfo.hasPayloads();
-    wrappedPostingsWriter.setField(fieldInfo);
+    absolute = false;
+    longsSize = wrappedPostingsWriter.setField(fieldInfo);
+    longs = new long[longsSize];
+    fields.add(new FieldMetaData(fieldInfo.number, longsSize));
+    return 0;
     //DEBUG = BlockTreeTermsWriter.DEBUG;
   }
 
@@ -219,18 +264,19 @@ public final class PulsingPostingsWriter
 
   /** Called when we are done adding docs to this term */
   @Override
-  public void finishTerm(TermStats stats) throws IOException {
+  public void finishTerm(BlockTermState _state) throws IOException {
+    PulsingTermState state = (PulsingTermState) _state;
+
     // if (DEBUG) System.out.println("PW   finishTerm docCount=" + stats.docFreq + " pendingCount=" + pendingCount + " pendingTerms.size()=" + pendingTerms.size());
 
     assert pendingCount > 0 || pendingCount == -1;
 
     if (pendingCount == -1) {
-      wrappedPostingsWriter.finishTerm(stats);
-      // Must add null entry to record terms that our
-      // wrapped postings impl added
-      pendingTerms.add(null);
+      state.wrappedState.docFreq = state.docFreq;
+      state.wrappedState.totalTermFreq = state.totalTermFreq;
+      state.bytes = null;
+      wrappedPostingsWriter.finishTerm(state.wrappedState);
     } else {
-
       // There were few enough total occurrences for this
       // term, so we fully inline our postings data into
       // terms dict, now:
@@ -325,61 +371,54 @@ public final class PulsingPostingsWriter
         }
       }
 
-      final byte[] bytes = new byte[(int) buffer.getFilePointer()];
-      buffer.writeTo(bytes, 0);
-      pendingTerms.add(new PendingTerm(bytes));
+      state.bytes = new byte[(int) buffer.getFilePointer()];
+      buffer.writeTo(state.bytes, 0);
       buffer.reset();
     }
-
     pendingCount = 0;
   }
 
   @Override
-  public void close() throws IOException {
-    wrappedPostingsWriter.close();
-  }
-
-  @Override
-  public void flushTermsBlock(int start, int count) throws IOException {
-    // if (DEBUG) System.out.println("PW: flushTermsBlock start=" + start + " count=" + count + " pendingTerms.size()=" + pendingTerms.size());
-    int wrappedCount = 0;
-    assert buffer.getFilePointer() == 0;
-    assert start >= count;
-
-    final int limit = pendingTerms.size() - start + count;
-
-    for(int idx=pendingTerms.size()-start; idx<limit; idx++) {
-      final PendingTerm term = pendingTerms.get(idx);
-      if (term == null) {
-        wrappedCount++;
-      } else {
-        buffer.writeVInt(term.bytes.length);
-        buffer.writeBytes(term.bytes, 0, term.bytes.length);
+  public void encodeTerm(long[] empty, DataOutput out, FieldInfo fieldInfo, BlockTermState _state, boolean absolute) throws IOException {
+    PulsingTermState state = (PulsingTermState)_state;
+    assert empty.length == 0;
+    this.absolute = this.absolute || absolute;
+    if (state.bytes == null) {
+      wrappedPostingsWriter.encodeTerm(longs, buffer, fieldInfo, state.wrappedState, this.absolute);
+      for (int i = 0; i < longsSize; i++) {
+        out.writeVLong(longs[i]);
       }
+      buffer.writeTo(out);
+      buffer.reset();
+      this.absolute = false;
+    } else {
+      out.writeVInt(state.bytes.length);
+      out.writeBytes(state.bytes, 0, state.bytes.length);
+      this.absolute = this.absolute || absolute;
     }
+  }
 
-    termsOut.writeVInt((int) buffer.getFilePointer());
-    buffer.writeTo(termsOut);
-    buffer.reset();
-
-    // TDOO: this could be somewhat costly since
-    // pendingTerms.size() could be biggish?
-    int futureWrappedCount = 0;
-    final int limit2 = pendingTerms.size();
-    for(int idx=limit;idx<limit2;idx++) {
-      if (pendingTerms.get(idx) == null) {
-        futureWrappedCount++;
+  @Override
+  public void close() throws IOException {
+    wrappedPostingsWriter.close();
+    if (wrappedPostingsWriter instanceof PulsingPostingsWriter ||
+        VERSION_CURRENT < VERSION_META_ARRAY) {
+      return;
+    }
+    String summaryFileName = IndexFileNames.segmentFileName(segmentState.segmentInfo.name, segmentState.segmentSuffix, SUMMARY_EXTENSION);
+    IndexOutput out = null;
+    try {
+      out = segmentState.directory.createOutput(summaryFileName, segmentState.context);
+      CodecUtil.writeHeader(out, CODEC, VERSION_CURRENT);
+      out.writeVInt(fields.size());
+      for (FieldMetaData field : fields) {
+        out.writeVInt(field.fieldNumber);
+        out.writeVInt(field.longsSize);
       }
+      out.close();
+    } finally {
+      IOUtils.closeWhileHandlingException(out);
     }
-
-    // Remove the terms we just wrote:
-    pendingTerms.subList(pendingTerms.size()-start, limit).clear();
-
-    // if (DEBUG) System.out.println("PW:   len=" + buffer.getFilePointer() + " fp=" + termsOut.getFilePointer() + " futureWrappedCount=" + futureWrappedCount + " wrappedCount=" + wrappedCount);
-    // TODO: can we avoid calling this if all terms
-    // were inlined...?  Eg for a "primary key" field, the
-    // wrapped codec is never invoked...
-    wrappedPostingsWriter.flushTermsBlock(futureWrappedCount+wrappedCount, wrappedCount);
   }
 
   // Pushes pending positions to the wrapped codec

Modified: lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/sep/IntIndexOutput.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/sep/IntIndexOutput.java?rev=1562497&r1=1562496&r2=1562497&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/sep/IntIndexOutput.java (original)
+++ lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/sep/IntIndexOutput.java Wed Jan 29 16:21:48 2014
@@ -20,7 +20,7 @@ package org.apache.lucene.codecs.sep;
 // TODO: we may want tighter integration w/ IndexOutput --
 // may give better perf:
 
-import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.store.DataOutput;
 
 import java.io.IOException;
 import java.io.Closeable;
@@ -49,12 +49,12 @@ public abstract class IntIndexOutput imp
 
     /** Writes "location" of current output pointer of primary
      *  output to different output (out) */
-    public abstract void write(IndexOutput indexOut, boolean absolute) throws IOException;
+    public abstract void write(DataOutput indexOut, boolean absolute) throws IOException;
   }
 
   /** If you are indexing the primary output file, call
    *  this and interact with the returned IndexWriter. */
-  public abstract Index index() throws IOException;
+  public abstract Index index();
 
   @Override
   public abstract void close() throws IOException;

Modified: lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/sep/SepPostingsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/sep/SepPostingsReader.java?rev=1562497&r1=1562496&r2=1562497&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/sep/SepPostingsReader.java (original)
+++ lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/sep/SepPostingsReader.java Wed Jan 29 16:21:48 2014
@@ -31,6 +31,7 @@ import org.apache.lucene.index.IndexFile
 import org.apache.lucene.index.SegmentInfo;
 import org.apache.lucene.index.TermState;
 import org.apache.lucene.store.ByteArrayDataInput;
+import org.apache.lucene.store.DataInput;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.IOContext;
 import org.apache.lucene.store.IndexInput;
@@ -115,15 +116,6 @@ public class SepPostingsReader extends P
     long payloadFP;
     long skipFP;
 
-    // Only used for "primary" term state; these are never
-    // copied on clone:
-    
-    // TODO: these should somehow be stored per-TermsEnum
-    // not per TermState; maybe somehow the terms dict
-    // should load/manage the byte[]/DataReader for us?
-    byte[] bytes;
-    ByteArrayDataInput bytesReader;
-
     @Override
     public SepTermState clone() {
       SepTermState other = new SepTermState();
@@ -182,40 +174,21 @@ public class SepPostingsReader extends P
   }
 
   @Override
-  public void readTermsBlock(IndexInput termsIn, FieldInfo fieldInfo, BlockTermState _termState) throws IOException {
-    final SepTermState termState = (SepTermState) _termState;
-    //System.out.println("SEPR: readTermsBlock termsIn.fp=" + termsIn.getFilePointer());
-    final int len = termsIn.readVInt();
-    //System.out.println("  numBytes=" + len);
-    if (termState.bytes == null) {
-      termState.bytes = new byte[ArrayUtil.oversize(len, 1)];
-      termState.bytesReader = new ByteArrayDataInput(termState.bytes);
-    } else if (termState.bytes.length < len) {
-      termState.bytes = new byte[ArrayUtil.oversize(len, 1)];
-    }
-    termState.bytesReader.reset(termState.bytes, 0, len);
-    termsIn.readBytes(termState.bytes, 0, len);
-  }
-
-  @Override
-  public void nextTerm(FieldInfo fieldInfo, BlockTermState _termState) throws IOException {
+  public void decodeTerm(long[] empty, DataInput in, FieldInfo fieldInfo, BlockTermState _termState, boolean absolute) 
+    throws IOException {
     final SepTermState termState = (SepTermState) _termState;
-    final boolean isFirstTerm = termState.termBlockOrd == 0;
-    //System.out.println("SEPR.nextTerm termCount=" + termState.termBlockOrd + " isFirstTerm=" + isFirstTerm + " bytesReader.pos=" + termState.bytesReader.getPosition());
-    //System.out.println("  docFreq=" + termState.docFreq);
-    termState.docIndex.read(termState.bytesReader, isFirstTerm);
-    //System.out.println("  docIndex=" + termState.docIndex);
+    termState.docIndex.read(in, absolute);
     if (fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY) {
-      termState.freqIndex.read(termState.bytesReader, isFirstTerm);
+      termState.freqIndex.read(in, absolute);
       if (fieldInfo.getIndexOptions() == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
         //System.out.println("  freqIndex=" + termState.freqIndex);
-        termState.posIndex.read(termState.bytesReader, isFirstTerm);
+        termState.posIndex.read(in, absolute);
         //System.out.println("  posIndex=" + termState.posIndex);
         if (fieldInfo.hasPayloads()) {
-          if (isFirstTerm) {
-            termState.payloadFP = termState.bytesReader.readVLong();
+          if (absolute) {
+            termState.payloadFP = in.readVLong();
           } else {
-            termState.payloadFP += termState.bytesReader.readVLong();
+            termState.payloadFP += in.readVLong();
           }
           //System.out.println("  payloadFP=" + termState.payloadFP);
         }
@@ -223,14 +196,14 @@ public class SepPostingsReader extends P
     }
 
     if (termState.docFreq >= skipMinimum) {
-      //System.out.println("   readSkip @ " + termState.bytesReader.getPosition());
-      if (isFirstTerm) {
-        termState.skipFP = termState.bytesReader.readVLong();
+      //System.out.println("   readSkip @ " + in.getPosition());
+      if (absolute) {
+        termState.skipFP = in.readVLong();
       } else {
-        termState.skipFP += termState.bytesReader.readVLong();
+        termState.skipFP += in.readVLong();
       }
       //System.out.println("  skipFP=" + termState.skipFP);
-    } else if (isFirstTerm) {
+    } else if (absolute) {
       termState.skipFP = 0;
     }
   }

Modified: lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/sep/SepPostingsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/sep/SepPostingsWriter.java?rev=1562497&r1=1562496&r2=1562497&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/sep/SepPostingsWriter.java (original)
+++ lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/sep/SepPostingsWriter.java Wed Jan 29 16:21:48 2014
@@ -18,18 +18,17 @@ package org.apache.lucene.codecs.sep;
  */
 
 import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
 
+import org.apache.lucene.codecs.BlockTermState;
 import org.apache.lucene.codecs.CodecUtil;
 import org.apache.lucene.codecs.PostingsWriterBase;
-import org.apache.lucene.codecs.TermStats;
 import org.apache.lucene.index.CorruptIndexException;
 import org.apache.lucene.index.DocsEnum;
 import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.FieldInfo.IndexOptions;
 import org.apache.lucene.index.IndexFileNames;
 import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.store.DataOutput;
 import org.apache.lucene.store.IndexOutput;
 import org.apache.lucene.store.RAMOutputStream;
 import org.apache.lucene.util.BytesRef;
@@ -64,7 +63,6 @@ public final class SepPostingsWriter ext
   IndexOutput payloadOut;
 
   IndexOutput skipOut;
-  IndexOutput termsOut;
 
   final SepSkipListWriter skipListWriter;
   /** Expert: The fraction of TermDocs entries stored in skip tables,
@@ -98,8 +96,9 @@ public final class SepPostingsWriter ext
   int lastDocID;
   int df;
 
-  // Holds pending byte[] blob for the current terms block
-  private final RAMOutputStream indexBytesWriter = new RAMOutputStream();
+  SepTermState lastState;
+  long lastPayloadFP;
+  long lastSkipFP;
 
   public SepPostingsWriter(SegmentWriteState state, IntStreamFactory factory) throws IOException {
     this(state, factory, DEFAULT_SKIP_INTERVAL);
@@ -116,9 +115,10 @@ public final class SepPostingsWriter ext
       this.skipInterval = skipInterval;
       this.skipMinimum = skipInterval; /* set to the same for now */
       final String docFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, DOC_EXTENSION);
+
       docOut = factory.createOutput(state.directory, docFileName, state.context);
       docIndex = docOut.index();
-      
+
       if (state.fieldInfos.hasFreq()) {
         final String frqFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, FREQ_EXTENSION);
         freqOut = factory.createOutput(state.directory, frqFileName, state.context);
@@ -134,7 +134,7 @@ public final class SepPostingsWriter ext
         final String payloadFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, PAYLOAD_EXTENSION);
         payloadOut = state.directory.createOutput(payloadFileName, state.context);
       }
-      
+
       final String skipFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, SKIP_EXTENSION);
       skipOut = state.directory.createOutput(skipFileName, state.context);
       
@@ -155,8 +155,7 @@ public final class SepPostingsWriter ext
   }
 
   @Override
-  public void start(IndexOutput termsOut) throws IOException {
-    this.termsOut = termsOut;
+  public void init(IndexOutput termsOut) throws IOException {
     CodecUtil.writeHeader(termsOut, CODEC, VERSION_CURRENT);
     // TODO: -- just ask skipper to "start" here
     termsOut.writeInt(skipInterval);                // write skipInterval
@@ -165,6 +164,11 @@ public final class SepPostingsWriter ext
   }
 
   @Override
+  public BlockTermState newTermState() {
+    return new SepTermState();
+  }
+
+  @Override
   public void startTerm() throws IOException {
     docIndex.mark();
     //System.out.println("SEPW: startTerm docIndex=" + docIndex);
@@ -185,7 +189,7 @@ public final class SepPostingsWriter ext
   // Currently, this instance is re-used across fields, so
   // our parent calls setField whenever the field changes
   @Override
-  public void setField(FieldInfo fieldInfo) {
+  public int setField(FieldInfo fieldInfo) {
     this.fieldInfo = fieldInfo;
     this.indexOptions = fieldInfo.getIndexOptions();
     if (indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0) {
@@ -193,6 +197,24 @@ public final class SepPostingsWriter ext
     }
     skipListWriter.setIndexOptions(indexOptions);
     storePayloads = indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS && fieldInfo.hasPayloads();
+    lastPayloadFP = 0;
+    lastSkipFP = 0;
+    lastState = setEmptyState();
+    return 0;
+  }
+
+  private SepTermState setEmptyState() {
+    SepTermState emptyState = new SepTermState();
+    emptyState.docIndex = docOut.index();
+    if (indexOptions != IndexOptions.DOCS_ONLY) {
+      emptyState.freqIndex = freqOut.index();
+      if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
+        emptyState.posIndex = posOut.index();
+      }
+    }
+    emptyState.payloadFP = 0;
+    emptyState.skipFP = 0;
+    return emptyState;
   }
 
   /** Adds a new doc in this term.  If this returns null
@@ -260,132 +282,86 @@ public final class SepPostingsWriter ext
     lastPosition = 0;
   }
 
-  private static class PendingTerm {
-    public final IntIndexOutput.Index docIndex;
-    public final IntIndexOutput.Index freqIndex;
-    public final IntIndexOutput.Index posIndex;
-    public final long payloadFP;
-    public final long skipFP;
-
-    public PendingTerm(IntIndexOutput.Index docIndex, IntIndexOutput.Index freqIndex, IntIndexOutput.Index posIndex, long payloadFP, long skipFP) {
-      this.docIndex = docIndex;
-      this.freqIndex = freqIndex;
-      this.posIndex = posIndex;
-      this.payloadFP = payloadFP;
-      this.skipFP = skipFP;
-    }
+  private static class SepTermState extends BlockTermState {
+    public IntIndexOutput.Index docIndex;
+    public IntIndexOutput.Index freqIndex;
+    public IntIndexOutput.Index posIndex;
+    public long payloadFP;
+    public long skipFP;
   }
 
-  private final List<PendingTerm> pendingTerms = new ArrayList<PendingTerm>();
-
   /** Called when we are done adding docs to this term */
   @Override
-  public void finishTerm(TermStats stats) throws IOException {
+  public void finishTerm(BlockTermState _state) throws IOException {
+    SepTermState state = (SepTermState)_state;
     // TODO: -- wasteful we are counting this in two places?
-    assert stats.docFreq > 0;
-    assert stats.docFreq == df;
-
-    final IntIndexOutput.Index docIndexCopy = docOut.index();
-    docIndexCopy.copyFrom(docIndex, false);
+    assert state.docFreq > 0;
+    assert state.docFreq == df;
 
-    final IntIndexOutput.Index freqIndexCopy;
-    final IntIndexOutput.Index posIndexCopy;
+    state.docIndex = docOut.index();
+    state.docIndex.copyFrom(docIndex, false);
     if (indexOptions != IndexOptions.DOCS_ONLY) {
-      freqIndexCopy = freqOut.index();
-      freqIndexCopy.copyFrom(freqIndex, false);
+      state.freqIndex = freqOut.index();
+      state.freqIndex.copyFrom(freqIndex, false);
       if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
-        posIndexCopy = posOut.index();
-        posIndexCopy.copyFrom(posIndex, false);
+        state.posIndex = posOut.index();
+        state.posIndex.copyFrom(posIndex, false);
       } else {
-        posIndexCopy = null;
+        state.posIndex = null;
       }
     } else {
-      freqIndexCopy = null;
-      posIndexCopy = null;
+      state.freqIndex = null;
+      state.posIndex = null;
     }
 
-    final long skipFP;
     if (df >= skipMinimum) {
-      skipFP = skipOut.getFilePointer();
+      state.skipFP = skipOut.getFilePointer();
       //System.out.println("  skipFP=" + skipFP);
       skipListWriter.writeSkip(skipOut);
       //System.out.println("    numBytes=" + (skipOut.getFilePointer()-skipFP));
     } else {
-      skipFP = -1;
+      state.skipFP = -1;
     }
+    state.payloadFP = payloadStart;
 
     lastDocID = 0;
     df = 0;
-
-    pendingTerms.add(new PendingTerm(docIndexCopy,
-                                     freqIndexCopy,
-                                     posIndexCopy,
-                                     payloadStart,
-                                     skipFP));
   }
 
   @Override
-  public void flushTermsBlock(int start, int count) throws IOException {
-    //System.out.println("SEPW: flushTermsBlock: start=" + start + " count=" + count + " pendingTerms.size()=" + pendingTerms.size() + " termsOut.fp=" + termsOut.getFilePointer());
-    assert indexBytesWriter.getFilePointer() == 0;
-    final int absStart = pendingTerms.size() - start;
-    final List<PendingTerm> slice = pendingTerms.subList(absStart, absStart+count);
-
-    long lastPayloadFP = 0;
-    long lastSkipFP = 0;
-
-    if (count == 0) {
-      termsOut.writeByte((byte) 0);
-      return;
-    }
-
-    final PendingTerm firstTerm = slice.get(0);
-    final IntIndexOutput.Index docIndexFlush = firstTerm.docIndex;
-    final IntIndexOutput.Index freqIndexFlush = firstTerm.freqIndex;
-    final IntIndexOutput.Index posIndexFlush = firstTerm.posIndex;
-
-    for(int idx=0;idx<slice.size();idx++) {
-      final boolean isFirstTerm = idx == 0;
-      final PendingTerm t = slice.get(idx);
-      //System.out.println("  write idx=" + idx + " docIndex=" + t.docIndex);
-      docIndexFlush.copyFrom(t.docIndex, false);
-      docIndexFlush.write(indexBytesWriter, isFirstTerm);
-      if (indexOptions != IndexOptions.DOCS_ONLY) {
-        freqIndexFlush.copyFrom(t.freqIndex, false);
-        freqIndexFlush.write(indexBytesWriter, isFirstTerm);
-        //System.out.println("    freqIndex=" + t.freqIndex);
-        if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
-          posIndexFlush.copyFrom(t.posIndex, false);
-          posIndexFlush.write(indexBytesWriter, isFirstTerm);
-          //System.out.println("    posIndex=" + t.posIndex);
-          if (storePayloads) {
-            //System.out.println("    payloadFP=" + t.payloadFP);
-            if (isFirstTerm) {
-              indexBytesWriter.writeVLong(t.payloadFP);
-            } else {
-              indexBytesWriter.writeVLong(t.payloadFP - lastPayloadFP);
-            }
-            lastPayloadFP = t.payloadFP;
+  public void encodeTerm(long[] longs, DataOutput out, FieldInfo fieldInfo, BlockTermState _state, boolean absolute) throws IOException {
+    SepTermState state = (SepTermState)_state;
+    if (absolute) {
+      lastSkipFP = 0;
+      lastPayloadFP = 0;
+      lastState = state;
+    }
+    lastState.docIndex.copyFrom(state.docIndex, false);
+    lastState.docIndex.write(out, absolute);
+    if (indexOptions != IndexOptions.DOCS_ONLY) {
+      lastState.freqIndex.copyFrom(state.freqIndex, false);
+      lastState.freqIndex.write(out, absolute);
+      if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
+        lastState.posIndex.copyFrom(state.posIndex, false);
+        lastState.posIndex.write(out, absolute);
+        if (storePayloads) {
+          if (absolute) {
+            out.writeVLong(state.payloadFP);
+          } else {
+            out.writeVLong(state.payloadFP - lastPayloadFP);
           }
+          lastPayloadFP = state.payloadFP;
         }
       }
-
-      if (t.skipFP != -1) {
-        if (isFirstTerm) {
-          indexBytesWriter.writeVLong(t.skipFP);
-        } else {
-          indexBytesWriter.writeVLong(t.skipFP - lastSkipFP);
-        }
-        lastSkipFP = t.skipFP;
-        //System.out.println("    skipFP=" + t.skipFP);
+    }
+    if (state.skipFP != -1) {
+      if (absolute) {
+        out.writeVLong(state.skipFP);
+      } else {
+        out.writeVLong(state.skipFP - lastSkipFP);
       }
+      lastSkipFP = state.skipFP;
     }
-
-    //System.out.println("  numBytes=" + indexBytesWriter.getFilePointer());
-    termsOut.writeVLong((int) indexBytesWriter.getFilePointer());
-    indexBytesWriter.writeTo(termsOut);
-    indexBytesWriter.reset();
-    slice.clear();
   }
 
   @Override

Modified: lucene/dev/branches/branch_4x/lucene/codecs/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/codecs/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat?rev=1562497&r1=1562496&r2=1562497&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/codecs/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat (original)
+++ lucene/dev/branches/branch_4x/lucene/codecs/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat Wed Jan 29 16:21:48 2014
@@ -18,3 +18,7 @@ org.apache.lucene.codecs.simpletext.Simp
 org.apache.lucene.codecs.memory.MemoryPostingsFormat
 org.apache.lucene.codecs.bloom.BloomFilteringPostingsFormat
 org.apache.lucene.codecs.memory.DirectPostingsFormat
+org.apache.lucene.codecs.memory.FSTPulsing41PostingsFormat
+org.apache.lucene.codecs.memory.FSTOrdPulsing41PostingsFormat
+org.apache.lucene.codecs.memory.FSTPostingsFormat
+org.apache.lucene.codecs.memory.FSTOrdPostingsFormat

Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/BlockTermState.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/BlockTermState.java?rev=1562497&r1=1562496&r2=1562497&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/BlockTermState.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/BlockTermState.java Wed Jan 29 16:21:48 2014
@@ -34,6 +34,7 @@ public class BlockTermState extends OrdT
   /** the term's ord in the current block */
   public int termBlockOrd;
   /** fp into the terms dict primary file (_X.tim) that holds this term */
+  // TODO: update BTR to nuke this
   public long blockFilePointer;
 
   /** Sole constructor. (For invocation by subclass