You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by mi...@apache.org on 2010/01/30 11:18:25 UTC
svn commit: r904750 [2/4] - in /lucene/java/branches/flex_1458: ./ contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/ contrib/misc/src/java/org/apache/lucene/index/ contrib/queries/src/java/org/apache/lucene/search/ src/java/org/apache/luce...

Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/pulsing/PulsingCodec.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/pulsing/PulsingCodec.java?rev=904750&r1=904749&r2=904750&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/pulsing/PulsingCodec.java (original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/pulsing/PulsingCodec.java Sat Jan 30 10:16:35 2010
@@ -24,19 +24,19 @@
 import org.apache.lucene.index.SegmentInfo;
 import org.apache.lucene.index.SegmentWriteState;
 import org.apache.lucene.index.codecs.Codec;
-import org.apache.lucene.index.codecs.standard.StandardDocsConsumer;
-import org.apache.lucene.index.codecs.standard.StandardDocsProducer;
+import org.apache.lucene.index.codecs.standard.StandardPostingsWriter;
+import org.apache.lucene.index.codecs.standard.StandardPostingsWriterImpl;
+import org.apache.lucene.index.codecs.standard.StandardPostingsReader;
+import org.apache.lucene.index.codecs.standard.StandardPostingsReaderImpl;
 import org.apache.lucene.index.codecs.FieldsConsumer;
 import org.apache.lucene.index.codecs.FieldsProducer;
 import org.apache.lucene.index.codecs.standard.SimpleStandardTermsIndexReader;
 import org.apache.lucene.index.codecs.standard.SimpleStandardTermsIndexWriter;
-import org.apache.lucene.index.codecs.standard.StandardCodec;
-import org.apache.lucene.index.codecs.standard.StandardDocsReader;
-import org.apache.lucene.index.codecs.standard.StandardDocsWriter;
 import org.apache.lucene.index.codecs.standard.StandardTermsDictReader;
 import org.apache.lucene.index.codecs.standard.StandardTermsDictWriter;
 import org.apache.lucene.index.codecs.standard.StandardTermsIndexReader;
 import org.apache.lucene.index.codecs.standard.StandardTermsIndexWriter;
+import org.apache.lucene.index.codecs.standard.StandardCodec;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.BytesRef;
 
@@ -55,14 +55,14 @@
 
   @Override
   public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
-    // We wrap StandardDocsWriter, but any DocsConsumer
+    // We wrap StandardPostingsWriterImpl, but any StandardPostingsWriter
     // will work:
-    StandardDocsConsumer docsWriter = new StandardDocsWriter(state);
+    StandardPostingsWriter docsWriter = new StandardPostingsWriterImpl(state);
 
     // Terms that have <= freqCutoff number of docs are
     // "pulsed" (inlined):
     final int freqCutoff = 1;
-    StandardDocsConsumer pulsingWriter = new PulsingDocsWriter(state, freqCutoff, docsWriter);
+    StandardPostingsWriter pulsingWriter = new PulsingPostingsWriterImpl(freqCutoff, docsWriter);
 
     // Terms dict index
     StandardTermsIndexWriter indexWriter;
@@ -96,10 +96,10 @@
   @Override
   public FieldsProducer fieldsProducer(Directory dir, FieldInfos fieldInfos, SegmentInfo si, int readBufferSize, int indexDivisor) throws IOException {
 
-    // We wrap StandardDocsReader, but any DocsProducer
+    // We wrap StandardPostingsReaderImpl, but any StandardPostingsReader
     // will work:
-    StandardDocsProducer docs = new StandardDocsReader(dir, si, readBufferSize);
-    StandardDocsProducer docsReader = new PulsingDocsReader(dir, si, readBufferSize, docs);
+    StandardPostingsReader docsReader = new StandardPostingsReaderImpl(dir, si, readBufferSize);
+    StandardPostingsReader pulsingReader = new PulsingPostingsReaderImpl(docsReader);
 
     // Terms dict index reader
     StandardTermsIndexReader indexReader;
@@ -114,7 +114,7 @@
       success = true;
     } finally {
       if (!success) {
-        docs.close();
+        pulsingReader.close();
       }
     }
 
@@ -123,15 +123,16 @@
     try {
       FieldsProducer ret = new StandardTermsDictReader(indexReader,
                                                        dir, fieldInfos, si.name,
-                                                       docsReader,
+                                                       pulsingReader,
                                                        readBufferSize,
-                                                       BytesRef.getUTF8SortedAsUTF16Comparator());
+                                                       BytesRef.getUTF8SortedAsUTF16Comparator(),
+                                                       StandardCodec.TERMS_CACHE_SIZE);
       success = true;
       return ret;
     } finally {
       if (!success) {
         try {
-          docs.close();
+          pulsingReader.close();
         } finally {
           indexReader.close();
         }
@@ -141,7 +142,7 @@
 
   @Override
   public void files(Directory dir, SegmentInfo segmentInfo, Collection<String> files) throws IOException {
-    StandardDocsReader.files(dir, segmentInfo, files);
+    StandardPostingsReaderImpl.files(dir, segmentInfo, files);
     StandardTermsDictReader.files(dir, segmentInfo, files);
     SimpleStandardTermsIndexReader.files(dir, segmentInfo, files);
   }

Added: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsReaderImpl.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsReaderImpl.java?rev=904750&view=auto
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsReaderImpl.java (added)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsReaderImpl.java Sat Jan 30 10:16:35 2010
@@ -0,0 +1,396 @@
+package org.apache.lucene.index.codecs.pulsing;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.index.DocsEnum;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.DocsAndPositionsEnum;
+import org.apache.lucene.index.SegmentInfo;
+import org.apache.lucene.index.codecs.Codec;
+import org.apache.lucene.index.codecs.standard.TermState;
+import org.apache.lucene.index.codecs.standard.StandardPostingsReader;
+import org.apache.lucene.index.codecs.pulsing.PulsingPostingsWriterImpl.Document;
+import org.apache.lucene.index.codecs.pulsing.PulsingPostingsWriterImpl.Position;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.BytesRef;
+
+/** Concrete class that reads the current doc/freq/skip
+ *  postings format */
+
+// nocommit -- should we switch "hasProx" higher up?  and
+// create two separate docs readers, one that also reads
+// prox and one that doesn't?
+
+public class PulsingPostingsReaderImpl extends StandardPostingsReader {
+
+  // Fallback reader for non-pulsed terms:
+  final StandardPostingsReader wrappedPostingsReader;
+  int maxPulsingDocFreq;
+
+  public PulsingPostingsReaderImpl(StandardPostingsReader wrappedPostingsReader) throws IOException {
+    this.wrappedPostingsReader = wrappedPostingsReader;
+  }
+
+  @Override
+  public void init(IndexInput termsIn) throws IOException {
+    Codec.checkHeader(termsIn, PulsingPostingsWriterImpl.CODEC, PulsingPostingsWriterImpl.VERSION_START);
+    maxPulsingDocFreq = termsIn.readVInt();
+    wrappedPostingsReader.init(termsIn);
+  }
+
+  private static class PulsingTermState extends TermState {
+    private Document docs[];
+    private TermState wrappedTermState;
+    private boolean pendingIndexTerm;
+
+    public Object clone() {
+      PulsingTermState clone;
+      clone = (PulsingTermState) super.clone();
+      clone.docs = (Document[]) docs.clone();
+      for(int i=0;i<clone.docs.length;i++) {
+        final Document doc = clone.docs[i];
+        if (doc != null) {
+          clone.docs[i] = (Document) doc.clone();
+        }
+      }
+      clone.wrappedTermState = (TermState) wrappedTermState.clone();
+      return clone;
+    }
+
+    public void copy(TermState _other) {
+      super.copy(_other);
+      PulsingTermState other = (PulsingTermState) _other;
+      pendingIndexTerm = other.pendingIndexTerm;
+      wrappedTermState.copy(other.wrappedTermState);
+      for(int i=0;i<docs.length;i++) {
+        if (other.docs[i] != null) {
+          docs[i] = (Document) other.docs[i].clone();
+        }
+      }
+    }
+  }
+
+  @Override
+  public TermState newTermState() throws IOException {
+    PulsingTermState state = new PulsingTermState();
+    state.wrappedTermState = wrappedPostingsReader.newTermState();
+    state.docs = new Document[maxPulsingDocFreq];
+    return state;
+  }
+
+  @Override
+  public void readTerm(IndexInput termsIn, FieldInfo fieldInfo, TermState _termState, boolean isIndexTerm) throws IOException {
+
+    PulsingTermState termState = (PulsingTermState) _termState;
+
+    if (Codec.DEBUG) {
+      System.out.println("pulsr.readTerm docFreq=" + termState.docFreq + " indexTerm=" + isIndexTerm);
+    }
+
+    termState.pendingIndexTerm |= isIndexTerm;
+
+    if (termState.docFreq <= maxPulsingDocFreq) {
+
+      if (Codec.DEBUG) {
+        System.out.println("  pulsed");
+      }
+
+      // Inlined into terms dict -- read everything in
+
+      // TODO: maybe only read everything in lazily?  But
+      // then we'd need to store length so we could seek
+      // over it when docs/pos enum was not requested
+
+      // TODO: it'd be better to share this encoding logic
+      // in some inner codec that knows how to write a
+      // single doc / single position, etc.  This way if a
+      // given codec wants to store other interesting
+      // stuff, it could use this pulsing codec to do so
+
+      int docID = 0;
+      for(int i=0;i<termState.docFreq;i++) {
+        Document doc = termState.docs[i];
+        if (doc == null) {
+          doc = termState.docs[i] = new Document();
+        }
+        final int code = termsIn.readVInt();
+        if (fieldInfo.omitTermFreqAndPositions) {
+          docID += code;
+          doc.numPositions = 1;
+          if (Codec.DEBUG) {
+            System.out.println("  doc=" + docID);
+          }
+        } else {
+          docID += code>>>1;
+          if ((code & 1) != 0) {
+            doc.numPositions = 1;
+          } else {
+            doc.numPositions = termsIn.readVInt();
+          }
+            
+          if (Codec.DEBUG) {
+            System.out.println("  doc=" + docID + " numPos=" + doc.numPositions);
+          }
+
+          if (doc.numPositions > doc.positions.length) {
+            doc.reallocPositions(doc.numPositions);
+          }
+
+          int position = 0;
+          int payloadLength = -1;
+
+          for(int j=0;j<doc.numPositions;j++) {
+            final Position pos = doc.positions[j];
+            final int code2 = termsIn.readVInt();
+            if (fieldInfo.storePayloads) {
+              position += code2 >>> 1;
+              if ((code2 & 1) != 0) {
+                payloadLength = termsIn.readVInt();
+              }
+
+              if (payloadLength > 0) {
+                if (pos.payload == null) {
+                  pos.payload = new BytesRef();
+                  pos.payload.bytes = new byte[payloadLength];
+                } else if (payloadLength > pos.payload.bytes.length) {
+                  pos.payload.grow(payloadLength);
+                }
+                pos.payload.length = payloadLength;
+                termsIn.readBytes(pos.payload.bytes, 0, payloadLength);
+              } else if (pos.payload != null) {
+                pos.payload.length = 0;
+              }
+            } else {
+              position += code2;
+            }
+            pos.pos = position;
+          }
+        }
+        doc.docID = docID;
+      }
+    } else {
+      if (Codec.DEBUG) {
+        System.out.println("  not pulsed pass isIndex=" + termState.pendingIndexTerm);
+      }
+      termState.wrappedTermState.docFreq = termState.docFreq;
+      wrappedPostingsReader.readTerm(termsIn, fieldInfo, termState.wrappedTermState, termState.pendingIndexTerm);
+      termState.pendingIndexTerm = false;
+    }
+  }
+
+  // nocommit -- not great that we can't always reuse
+  @Override
+  public DocsEnum docs(FieldInfo field, TermState _termState, Bits skipDocs, DocsEnum reuse) throws IOException {
+    PulsingTermState termState = (PulsingTermState) _termState;
+    if (termState.docFreq <= maxPulsingDocFreq) {
+      if (reuse instanceof PulsingDocsEnum) {
+        return ((PulsingDocsEnum) reuse).reset(skipDocs, termState);
+      } else {
+        PulsingDocsEnum docsEnum = new PulsingDocsEnum();
+        return docsEnum.reset(skipDocs, termState);
+      }
+    } else {
+      if (reuse instanceof PulsingDocsEnum) {
+        return wrappedPostingsReader.docs(field, termState.wrappedTermState, skipDocs, null);
+      } else {
+        return wrappedPostingsReader.docs(field, termState.wrappedTermState, skipDocs, reuse);
+      }
+    }
+  }
+
+  // nocommit -- not great that we can't always reuse
+  @Override
+  public DocsAndPositionsEnum docsAndPositions(FieldInfo field, TermState _termState, Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException {
+    PulsingTermState termState = (PulsingTermState) _termState;
+    if (termState.docFreq <= maxPulsingDocFreq) {
+      if (reuse instanceof PulsingDocsAndPositionsEnum) {
+        return ((PulsingDocsAndPositionsEnum) reuse).reset(skipDocs, termState);
+      } else {
+        PulsingDocsAndPositionsEnum postingsEnum = new PulsingDocsAndPositionsEnum();
+        return postingsEnum.reset(skipDocs, termState);
+      }
+    } else {
+      if (reuse instanceof PulsingDocsAndPositionsEnum) {
+        return wrappedPostingsReader.docsAndPositions(field, termState.wrappedTermState, skipDocs, null);
+      } else {
+        return wrappedPostingsReader.docsAndPositions(field, termState.wrappedTermState, skipDocs, reuse);
+      }
+    }
+  }
+
+  static class PulsingDocsEnum extends DocsEnum {
+    private int nextRead;
+    private Bits skipDocs;
+    private Document doc;
+    private PulsingTermState state;
+
+    public void close() {}
+
+    PulsingDocsEnum reset(Bits skipDocs, PulsingTermState termState) {
+      // nocommit -- not great we have to clone here --
+      // merging is wasteful; TermRangeQuery too
+      state = (PulsingTermState) termState.clone();
+      this.skipDocs = skipDocs;
+      nextRead = 0;
+      return this;
+    }
+
+    @Override
+    public int nextDoc() {
+      while(true) {
+        if (nextRead >= state.docFreq) {
+          return NO_MORE_DOCS;
+        } else {
+          doc = state.docs[nextRead++];
+          if (skipDocs == null || !skipDocs.get(doc.docID)) {
+            return doc.docID;
+          }
+        }
+      }
+    }
+
+    @Override
+    public int read(int[] docs, int[] freqs) {
+      int i=0;
+      // nocommit -- ob1?
+      while(nextRead < state.docFreq) {
+        doc = state.docs[nextRead++];
+        if (skipDocs == null || !skipDocs.get(doc.docID)) {
+          docs[i] = doc.docID;
+          freqs[i] = doc.numPositions;
+          i++;
+        }
+      }
+      return i;
+    }
+
+    @Override
+    public int freq() {
+      return doc.numPositions;
+    }
+
+    @Override
+    public int docID() {
+      return doc.docID;
+    }
+
+    @Override
+    public int advance(int target) throws IOException {
+      int doc;
+      while((doc=nextDoc()) != NO_MORE_DOCS) {
+        if (doc >= target)
+          return doc;
+      }
+      return NO_MORE_DOCS;
+    }
+  }
+
+  static class PulsingDocsAndPositionsEnum extends DocsAndPositionsEnum {
+    private int nextRead;
+    private int nextPosRead;
+    private Bits skipDocs;
+    private Document doc;
+    private Position pos;
+    private PulsingTermState state;
+
+    // Only here to emulate limitation of standard codec,
+    // which only allows retrieving payload more than once
+    private boolean payloadRetrieved;
+
+    public void close() {}
+
+    PulsingDocsAndPositionsEnum reset(Bits skipDocs, PulsingTermState termState) {
+      // nocommit -- not great we have to clone here --
+      // merging is wasteful; TermRangeQuery too
+      state = (PulsingTermState) termState.clone();
+      this.skipDocs = skipDocs;
+      nextRead = 0;
+      nextPosRead = 0;
+      return this;
+    }
+
+    @Override
+    public int nextDoc() {
+      while(true) {
+        if (nextRead >= state.docFreq) {
+          return NO_MORE_DOCS;
+        } else {
+          doc = state.docs[nextRead++];
+          if (skipDocs == null || !skipDocs.get(doc.docID)) {
+            nextPosRead = 0;
+            return doc.docID;
+          }
+        }
+      }
+    }
+
+    @Override
+    public int freq() {
+      return doc.numPositions;
+    }
+
+    @Override
+    public int docID() {
+      return doc.docID;
+    }
+
+    @Override
+    public int advance(int target) throws IOException {
+      int doc;
+      while((doc=nextDoc()) != NO_MORE_DOCS) {
+        if (doc >= target) {
+          return doc;
+        }
+      }
+      return NO_MORE_DOCS;
+    }
+
+    @Override
+    public int nextPosition() {
+      assert nextPosRead < doc.numPositions;
+      pos = doc.positions[nextPosRead++];
+      payloadRetrieved = false;
+      return pos.pos;
+    }
+
+    @Override
+    public int getPayloadLength() {
+      return payloadRetrieved || pos.payload == null ? 0 : pos.payload.length;
+    }
+
+    @Override
+    public boolean hasPayload() {
+      return !payloadRetrieved && pos.payload != null && pos.payload.length > 0;
+    }
+
+    @Override
+    public BytesRef getPayload() {
+      payloadRetrieved = true;
+      return pos.payload;
+    }
+  }
+
+  @Override
+  public void close() throws IOException {
+    wrappedPostingsReader.close();
+  }
+}

Propchange: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsReaderImpl.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsWriterImpl.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsWriterImpl.java?rev=904750&view=auto
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsWriterImpl.java (added)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsWriterImpl.java Sat Jan 30 10:16:35 2010
@@ -0,0 +1,326 @@
+package org.apache.lucene.index.codecs.pulsing;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.index.codecs.Codec;
+import org.apache.lucene.index.codecs.standard.StandardPostingsWriter;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.BytesRef;
+
+// TODO: we now pulse entirely according to docFreq of the
+// term; it might be better to eg pulse by "net bytes used"
+// so that a term that has only 1 doc but zillions of
+// positions would not be inlined.  Though this is
+// presumably rare in practice...
+
+//nocommit: public 
+public final class PulsingPostingsWriterImpl extends StandardPostingsWriter {
+
+  final static String CODEC = "PulsedPostings";
+
+  // To add a new version, increment from the last one, and
+  // change VERSION_CURRENT to point to your new version:
+  final static int VERSION_START = 0;
+
+  final static int VERSION_CURRENT = VERSION_START;
+
+  IndexOutput termsOut;
+
+  boolean omitTF;
+  boolean storePayloads;
+
+  // Starts a new term
+  FieldInfo fieldInfo;
+
+  // nocommit
+  String desc;
+
+  // nocommit: public
+  public static class Document {
+    int docID;
+    int termDocFreq;
+    int numPositions;
+    Position[] positions;
+    Document() {
+      positions = new Position[1];
+      positions[0] = new Position();
+    }
+    
+    @Override
+    public Object clone() {
+      Document doc = new Document();
+      doc.docID = docID;
+      doc.termDocFreq = termDocFreq;
+      doc.numPositions = numPositions;
+      doc.positions = new Position[positions.length];
+      for(int i = 0; i < positions.length; i++) {
+        doc.positions[i] = (Position) positions[i].clone();
+      }
+
+      return doc;
+    }
+
+    void reallocPositions(int minSize) {
+      final Position[] newArray = new Position[ArrayUtil.getNextSize(minSize)];
+      System.arraycopy(positions, 0, newArray, 0, positions.length);
+      for(int i=positions.length;i<newArray.length;i++) {
+        newArray[i] = new Position();
+      }
+      positions = newArray;
+    }
+  }
+
+  final Document[] pendingDocs;
+  int pendingDocCount = 0;
+  Document currentDoc;
+  boolean pulsed;                                 // false if we've seen > maxPulsingDocFreq docs
+
+  static class Position {
+    BytesRef payload;
+    int pos;
+    
+    @Override
+    public Object clone() {
+      Position position = new Position();
+      position.pos = pos;
+      if (payload != null) {
+        position.payload = new BytesRef(payload);
+      }
+      return position;
+    }
+  }
+
+  // nocommit -- lazy init this?  ie, if every single term
+  // was pulsed then we never need to use this fallback?
+  // Fallback writer for non-pulsed terms:
+  final StandardPostingsWriter wrappedPostingsWriter;
+
+  /** If docFreq <= maxPulsingDocFreq, its postings are
+   *  inlined into terms dict */
+  public PulsingPostingsWriterImpl(int maxPulsingDocFreq, StandardPostingsWriter wrappedPostingsWriter) throws IOException {
+    super();
+
+    pendingDocs = new Document[maxPulsingDocFreq];
+    for(int i=0;i<maxPulsingDocFreq;i++) {
+      pendingDocs[i] = new Document();
+    }
+
+    // We simply wrap another postings writer, but only call
+    // on it when doc freq is higher than our cutoff
+    this.wrappedPostingsWriter = wrappedPostingsWriter;
+  }
+
+  @Override
+  public void start(IndexOutput termsOut) throws IOException {
+    this.termsOut = termsOut;
+    Codec.writeHeader(termsOut, CODEC, VERSION_CURRENT);
+    termsOut.writeVInt(pendingDocs.length);
+    wrappedPostingsWriter.start(termsOut);
+  }
+
+  @Override
+  public void startTerm() {
+    assert pendingDocCount == 0;
+    pulsed = false;
+  }
+
+  // nocommit -- should we NOT reuse across fields?  would
+  // be cleaner
+
+  // Currently, this instance is re-used across fields, so
+  // our parent calls setField whenever the field changes
+  @Override
+  public void setField(FieldInfo fieldInfo) {
+    this.fieldInfo = fieldInfo;
+    omitTF = fieldInfo.omitTermFreqAndPositions;
+    storePayloads = fieldInfo.storePayloads;
+    wrappedPostingsWriter.setField(fieldInfo);
+  }
+
+  @Override
+  public void addDoc(int docID, int termDocFreq) throws IOException {
+
+    assert docID >= 0: "got docID=" + docID;
+        
+    if (Codec.DEBUG) {
+      System.out.println("PW.addDoc: docID=" + docID + " pendingDocCount=" + pendingDocCount + " vs " + pendingDocs.length + " pulsed=" + pulsed);
+    }
+
+    if (!pulsed && pendingDocCount == pendingDocs.length) {
+      
+      // OK we just crossed the threshold, this term should
+      // now be written with our wrapped codec:
+      wrappedPostingsWriter.startTerm();
+      
+      if (Codec.DEBUG) {
+        System.out.println("  now flush buffer");
+      }
+
+      // Flush all buffered docs
+      for(int i=0;i<pendingDocCount;i++) {
+        final Document doc = pendingDocs[i];
+        if (Codec.DEBUG)
+          System.out.println("  docID=" + doc.docID);
+
+        wrappedPostingsWriter.addDoc(doc.docID, doc.termDocFreq);
+
+        if (!omitTF) {
+          assert doc.termDocFreq == doc.numPositions;
+          for(int j=0;j<doc.termDocFreq;j++) {
+            final Position pos = doc.positions[j];
+            if (pos.payload != null && pos.payload.length > 0) {
+              assert storePayloads;
+              wrappedPostingsWriter.addPosition(pos.pos, pos.payload);
+            } else {
+              wrappedPostingsWriter.addPosition(pos.pos, null);
+            }
+          }
+          wrappedPostingsWriter.finishDoc();
+        }
+      }
+
+      pendingDocCount = 0;
+
+      pulsed = true;
+    }
+
+    if (pulsed) {
+      // We've already seen too many docs for this term --
+      // just forward to our fallback writer
+      wrappedPostingsWriter.addDoc(docID, termDocFreq);
+    } else {
+      currentDoc = pendingDocs[pendingDocCount++];
+      currentDoc.docID = docID;
+      // nocommit -- need not store in doc?  only used for alloc & assert
+      currentDoc.termDocFreq = termDocFreq;
+      if (termDocFreq > currentDoc.positions.length) {
+        currentDoc.reallocPositions(termDocFreq);
+      }
+      currentDoc.numPositions = 0;
+    }
+  }
+
+  @Override
+  public void addPosition(int position, BytesRef payload) throws IOException {
+    if (pulsed) {
+      wrappedPostingsWriter.addPosition(position, payload);
+    } else {
+      // just buffer up
+      Position pos = currentDoc.positions[currentDoc.numPositions++];
+      pos.pos = position;
+      if (payload != null && payload.length > 0) {
+        if (pos.payload == null) {
+          pos.payload = new BytesRef(payload);
+        } else {
+          pos.payload.copy(payload);
+        }
+      } else if (pos.payload != null) {
+        pos.payload.length = 0;
+      }
+    }
+  }
+
+  @Override
+  public void finishDoc() {
+    assert currentDoc.numPositions == currentDoc.termDocFreq;
+  }
+
+  boolean pendingIsIndexTerm;
+
+  int pulsedCount;
+  int nonPulsedCount;
+
+  /** Called when we are done adding docs to this term */
+  @Override
+  public void finishTerm(int docCount, boolean isIndexTerm) throws IOException {
+
+    if (Codec.DEBUG) {
+      System.out.println("PW: finishTerm pendingDocCount=" + pendingDocCount);
+    }
+
+    pendingIsIndexTerm |= isIndexTerm;
+
+    if (pulsed) {
+      wrappedPostingsWriter.finishTerm(docCount, pendingIsIndexTerm);
+      pendingIsIndexTerm = false;
+      pulsedCount++;
+    } else {
+      nonPulsedCount++;
+      // OK, there were few enough occurrences for this
+      // term, so we fully inline our postings data into
+      // terms dict, now:
+      int lastDocID = 0;
+      for(int i=0;i<pendingDocCount;i++) {
+        final Document doc = pendingDocs[i];
+        final int delta = doc.docID - lastDocID;
+        lastDocID = doc.docID;
+        if (omitTF) {
+          termsOut.writeVInt(delta);
+        } else {
+          assert doc.numPositions == doc.termDocFreq;
+          if (doc.numPositions == 1)
+            termsOut.writeVInt((delta<<1)|1);
+          else {
+            termsOut.writeVInt(delta<<1);
+            termsOut.writeVInt(doc.numPositions);
+          }
+
+          // TODO: we could do better in encoding
+          // payloadLength, eg, if it's always the same
+          // across all terms
+          int lastPosition = 0;
+          int lastPayloadLength = -1;
+
+          for(int j=0;j<doc.numPositions;j++) {
+            final Position pos = doc.positions[j];
+            final int delta2 = pos.pos - lastPosition;
+            lastPosition = pos.pos;
+            if (storePayloads) {
+              final int payloadLength = pos.payload == null ? 0 : pos.payload.length;
+              if (payloadLength != lastPayloadLength) {
+                termsOut.writeVInt((delta2 << 1)|1);
+                termsOut.writeVInt(payloadLength);
+                lastPayloadLength = payloadLength;
+              } else {
+                termsOut.writeVInt(delta2 << 1);
+              }
+
+              if (payloadLength > 0) {
+                termsOut.writeBytes(pos.payload.bytes, 0, pos.payload.length);
+              }
+            } else {
+              termsOut.writeVInt(delta2);
+            }
+          }
+        }
+      }
+    }
+
+    pendingDocCount = 0;
+  }
+
+  @Override
+  public void close() throws IOException {
+    wrappedPostingsWriter.close();
+  }
+}

Propchange: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsWriterImpl.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/IntIndexInput.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/IntIndexInput.java?rev=904750&r1=904749&r2=904750&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/IntIndexInput.java (original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/IntIndexInput.java Sat Jan 30 10:16:35 2010
@@ -36,8 +36,7 @@
 
   public abstract Index index() throws IOException;
   
-  public class IndexState {};
-
+  // nocommit -- can we simplify this?
   public abstract static class Index {
 
     // nocommit
@@ -50,11 +49,7 @@
 
     public abstract void set(Index other);
     
-    // nocommit handle with set and/or clone?
-    public abstract IndexState captureState();
-    
-    // nocommit handle with set and/or clone?
-    public abstract void setState(IndexState state);
+    public abstract Object clone();
   }
 
   public abstract static class Reader {

Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/IntIndexOutput.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/IntIndexOutput.java?rev=904750&r1=904749&r2=904750&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/IntIndexOutput.java (original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/IntIndexOutput.java Sat Jan 30 10:16:35 2010
@@ -33,6 +33,7 @@
  *
  * @lucene.experimental */
 public abstract class IntIndexOutput implements Closeable {
+
   /** Write an int to the primary file */
   public abstract void write(int v) throws IOException;
 

Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/SepCodec.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/SepCodec.java?rev=904750&r1=904749&r2=904750&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/SepCodec.java (original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/SepCodec.java Sat Jan 30 10:16:35 2010
@@ -28,12 +28,13 @@
 import org.apache.lucene.index.codecs.FieldsProducer;
 import org.apache.lucene.index.codecs.standard.SimpleStandardTermsIndexReader;
 import org.apache.lucene.index.codecs.standard.SimpleStandardTermsIndexWriter;
-import org.apache.lucene.index.codecs.standard.StandardDocsConsumer;
-import org.apache.lucene.index.codecs.standard.StandardDocsProducer;
+import org.apache.lucene.index.codecs.standard.StandardPostingsReader;
+import org.apache.lucene.index.codecs.standard.StandardPostingsWriter;
 import org.apache.lucene.index.codecs.standard.StandardTermsDictReader;
 import org.apache.lucene.index.codecs.standard.StandardTermsDictWriter;
 import org.apache.lucene.index.codecs.standard.StandardTermsIndexReader;
 import org.apache.lucene.index.codecs.standard.StandardTermsIndexWriter;
+import org.apache.lucene.index.codecs.standard.StandardCodec;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.BytesRef;
 
@@ -47,7 +48,7 @@
   @Override
   public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
 
-    StandardDocsConsumer docsWriter = new SepDocsWriter(state, new SingleIntFactory());
+    StandardPostingsWriter postingsWriter = new SepPostingsWriterImpl(state, new SingleIntFactory());
 
     boolean success = false;
     StandardTermsIndexWriter indexWriter;
@@ -56,19 +57,19 @@
       success = true;
     } finally {
       if (!success) {
-        docsWriter.close();
+        postingsWriter.close();
       }
     }
 
     success = false;
     try {
-      FieldsConsumer ret = new StandardTermsDictWriter(indexWriter, state, docsWriter, BytesRef.getUTF8SortedAsUTF16Comparator());
+      FieldsConsumer ret = new StandardTermsDictWriter(indexWriter, state, postingsWriter, BytesRef.getUTF8SortedAsUTF16Comparator());
       success = true;
       return ret;
     } finally {
       if (!success) {
         try {
-          docsWriter.close();
+          postingsWriter.close();
         } finally {
           indexWriter.close();
         }
@@ -85,7 +86,7 @@
   @Override
   public FieldsProducer fieldsProducer(Directory dir, FieldInfos fieldInfos, SegmentInfo si, int readBufferSize, int indexDivisor) throws IOException {
 
-    StandardDocsProducer docsReader = new SepDocsReader(dir, si, readBufferSize, new SingleIntFactory());
+    StandardPostingsReader postingsReader = new SepPostingsReaderImpl(dir, si, readBufferSize, new SingleIntFactory());
 
     StandardTermsIndexReader indexReader;
     boolean success = false;
@@ -98,7 +99,7 @@
       success = true;
     } finally {
       if (!success) {
-        docsReader.close();
+        postingsReader.close();
       }
     }
 
@@ -106,15 +107,16 @@
     try {
       FieldsProducer ret = new StandardTermsDictReader(indexReader,
                                                        dir, fieldInfos, si.name,
-                                                       docsReader,
+                                                       postingsReader,
                                                        readBufferSize,
-                                                       BytesRef.getUTF8SortedAsUTF16Comparator());
+                                                       BytesRef.getUTF8SortedAsUTF16Comparator(),
+                                                       StandardCodec.TERMS_CACHE_SIZE);
       success = true;
       return ret;
     } finally {
       if (!success) {
         try {
-          docsReader.close();
+          postingsReader.close();
         } finally {
           indexReader.close();
         }
@@ -124,7 +126,7 @@
 
   @Override
   public void files(Directory dir, SegmentInfo segmentInfo, Collection<String> files) {
-    SepDocsReader.files(segmentInfo, files);
+    SepPostingsReaderImpl.files(segmentInfo, files);
     StandardTermsDictReader.files(dir, segmentInfo, files);
     SimpleStandardTermsIndexReader.files(dir, segmentInfo, files);
   }

Added: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java?rev=904750&view=auto
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java (added)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java Sat Jan 30 10:16:35 2010
@@ -0,0 +1,814 @@
+package org.apache.lucene.index.codecs.sep;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Collection;
+
+import org.apache.lucene.index.DocsEnum;
+import org.apache.lucene.index.DocsAndPositionsEnum;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.SegmentInfo;
+import org.apache.lucene.index.codecs.Codec;
+import org.apache.lucene.index.codecs.standard.StandardPostingsReader;
+import org.apache.lucene.index.codecs.standard.TermState;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.BytesRef;
+
+/** Concrete class that reads the current doc/freq/skip
+ *  postings format.    
+ *
+ * @lucene.experimental
+ */
+
+// nocommit -- should we switch "hasProx" higher up?  and
+// create two separate docs readers, one that also reads
+// prox and one that doesn't?
+
+public class SepPostingsReaderImpl extends StandardPostingsReader {
+
+  final IntIndexInput freqIn;
+  final IntIndexInput docIn;
+  final IntIndexInput posIn;
+  final IndexInput payloadIn;
+  final IndexInput skipIn;
+
+  int skipInterval;
+  int maxSkipLevels;
+
+  public SepPostingsReaderImpl(Directory dir, SegmentInfo segmentInfo, int readBufferSize, IntStreamFactory intFactory) throws IOException {
+
+    boolean success = false;
+    try {
+
+      final String docFileName = IndexFileNames.segmentFileName(segmentInfo.name, SepCodec.DOC_EXTENSION);
+      docIn = intFactory.openInput(dir, docFileName);
+
+      skipIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, SepCodec.SKIP_EXTENSION), readBufferSize);
+
+      if (segmentInfo.getHasProx()) {
+        freqIn = intFactory.openInput(dir, IndexFileNames.segmentFileName(segmentInfo.name, SepCodec.FREQ_EXTENSION));
+        posIn = intFactory.openInput(dir, IndexFileNames.segmentFileName(segmentInfo.name, SepCodec.POS_EXTENSION), readBufferSize);
+        payloadIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, SepCodec.PAYLOAD_EXTENSION), readBufferSize);
+      } else {
+        posIn = null;
+        payloadIn = null;
+        freqIn = null;
+      }
+      success = true;
+    } finally {
+      if (!success) {
+        close();
+      }
+    }
+  }
+
+  public static void files(SegmentInfo segmentInfo, Collection<String> files) {
+    files.add(IndexFileNames.segmentFileName(segmentInfo.name, SepCodec.DOC_EXTENSION));
+    files.add(IndexFileNames.segmentFileName(segmentInfo.name, SepCodec.SKIP_EXTENSION));
+
+    if (segmentInfo.getHasProx()) {
+      files.add(IndexFileNames.segmentFileName(segmentInfo.name, SepCodec.FREQ_EXTENSION));
+      files.add(IndexFileNames.segmentFileName(segmentInfo.name, SepCodec.POS_EXTENSION));
+      files.add(IndexFileNames.segmentFileName(segmentInfo.name, SepCodec.PAYLOAD_EXTENSION));
+    }
+  }
+
+  @Override
+  public void init(IndexInput termsIn) throws IOException {
+    // Make sure we are talking to the matching past writer
+    Codec.checkHeader(termsIn, SepPostingsWriterImpl.CODEC, SepPostingsWriterImpl.VERSION_START);
+    skipInterval = termsIn.readInt();
+    maxSkipLevels = termsIn.readInt();
+  }
+
+  @Override
+  public void close() throws IOException {
+    try {
+      if (freqIn != null)
+        freqIn.close();
+    } finally {
+      try {
+        if (docIn != null)
+          docIn.close();
+      } finally {
+        try {
+          if (skipIn != null)
+            skipIn.close();
+        } finally {
+          try {
+            if (posIn != null) {
+              posIn.close();
+            }
+          } finally {
+            if (payloadIn != null) {
+              payloadIn.close();
+            }
+          }
+        }
+      }
+    }
+  }
+
+  private static class SepTermState extends TermState {
+    IntIndexInput.Index docIndex;
+    IntIndexInput.Index freqIndex;
+    IntIndexInput.Index posIndex;
+    long skipOffset;
+    long payloadOffset;
+
+    public Object clone() {
+      SepTermState other = (SepTermState) super.clone();
+      other.docIndex = (IntIndexInput.Index) docIndex.clone();
+      if (freqIndex != null) {
+        other.freqIndex = (IntIndexInput.Index) freqIndex.clone();
+      }
+      if (posIndex != null) {
+        other.posIndex = (IntIndexInput.Index) posIndex.clone();
+      }
+      return other;
+    }
+
+    public void copy(TermState _other) {
+      super.copy(_other);
+      SepTermState other = (SepTermState) _other;
+      docIndex.set(other.docIndex);
+      if (other.posIndex != null) {
+        if (posIndex == null) {
+          posIndex = (IntIndexInput.Index) other.posIndex.clone();
+        } else {
+          posIndex.set(other.posIndex);
+        }
+      }
+      if (other.freqIndex != null) {
+        if (freqIndex == null) {
+          freqIndex = (IntIndexInput.Index) other.freqIndex.clone();
+        } else {
+          freqIndex.set(other.freqIndex);
+        }
+      }
+      skipOffset = other.skipOffset;
+      payloadOffset = other.payloadOffset;
+    }
+  }
+
+  @Override
+  public TermState newTermState() throws IOException {
+    final SepTermState state =  new SepTermState();
+    state.docIndex = docIn.index();
+    return state;
+  }
+
+  @Override
+  public void readTerm(IndexInput termsIn, FieldInfo fieldInfo, TermState _termState, boolean isIndexTerm) throws IOException {
+    final SepTermState termState = (SepTermState) _termState;
+
+    if (Codec.DEBUG) {
+      System.out.println("  dr.readTerm termsFP=" + termsIn.getFilePointer() + " df=" + termState.docFreq + " isIndex=" + isIndexTerm);
+      System.out.println("    start freqFP=" + termState.freqIndex + " docFP=" + termState.docIndex + " skipFP=" + termState.skipOffset);
+    }
+
+    // read freq index
+    if (!fieldInfo.omitTermFreqAndPositions) {
+      if (termState.freqIndex == null) {
+        assert isIndexTerm;
+        termState.freqIndex = freqIn.index();
+        termState.posIndex = posIn.index();
+      }
+      termState.freqIndex.read(termsIn, isIndexTerm);
+    }
+
+    // read doc index
+    termState.docIndex.read(termsIn, isIndexTerm);
+
+    // read skip index
+    if (isIndexTerm) {    
+      termState.skipOffset = termsIn.readVLong();
+    } else if (termState.docFreq >= skipInterval) {
+      termState.skipOffset += termsIn.readVLong();
+    }
+
+    // read pos, payload index
+    if (!fieldInfo.omitTermFreqAndPositions) {
+      termState.posIndex.read(termsIn, isIndexTerm);
+      final long v = termsIn.readVLong();
+      if (isIndexTerm) {
+        termState.payloadOffset = v;
+      } else {
+        termState.payloadOffset += v;
+      }
+    }
+
+    if (Codec.DEBUG) {
+      System.out.println("    freqFP=" + termState.freqIndex + " docFP=" + termState.docIndex + " skipFP=" + termState.skipOffset);
+    }
+  }
+
+  @Override
+  public DocsEnum docs(FieldInfo fieldInfo, TermState _termState, Bits skipDocs, DocsEnum reuse) throws IOException {
+    final SepTermState termState = (SepTermState) _termState;
+    if (reuse == null) {
+      return (new SepDocsEnum()).init(fieldInfo, termState, skipDocs);
+    } else {
+      return ((SepDocsEnum) reuse).init(fieldInfo, termState, skipDocs);
+    }
+  }
+
+  @Override
+  public DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, TermState _termState, Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException {
+    assert !fieldInfo.omitTermFreqAndPositions;
+    final SepTermState termState = (SepTermState) _termState;
+    if (reuse == null) {
+      return (new SepDocsAndPositionsEnum()).init(fieldInfo, termState, skipDocs);
+    } else {
+      return ((SepDocsAndPositionsEnum) reuse).init(fieldInfo, termState, skipDocs);
+    }
+  }
+
+  class SepDocsEnum extends DocsEnum {
+    int docFreq;
+    int doc;
+    int count;
+    int freq;
+    long freqStart;
+
+    // nocommit -- should we do omitTF with 2 different enum classes?
+    private boolean omitTF;
+    private boolean storePayloads;
+    private Bits skipDocs;
+    private final IntIndexInput.Reader docReader;
+    private final IntIndexInput.Reader freqReader;
+    private long skipOffset;
+
+    private final IntIndexInput.Index docIndex;
+    private final IntIndexInput.Index freqIndex;
+    private final IntIndexInput.Index posIndex;
+
+    // nocommit -- should we do hasProx with 2 different enum classes?
+
+    boolean skipped;
+    SepSkipListReader skipper;
+
+    SepDocsEnum() throws IOException {
+      if (Codec.DEBUG) {
+        Codec.debug("sep: new DocsEnum");
+      }
+      docReader = docIn.reader();
+      docIndex = docIn.index();
+      if (freqIn != null) {
+        freqReader = freqIn.reader();
+        freqIndex = freqIn.index();
+      } else {
+        freqReader = null;
+        freqIndex = null;
+      }
+      if (posIn != null) {
+        posIndex = posIn.index();                 // only init this so skipper can read it
+      } else {
+        posIndex = null;
+      }
+    }
+
+    SepDocsEnum init(FieldInfo fieldInfo, SepTermState termState, Bits skipDocs) throws IOException {
+      if (Codec.DEBUG) {
+        System.out.println("[" + desc + "] dr.init freqIn seek " + freqIndex + " this=" + this + " (in=" + freqIn + "; this=" + this + ")");
+      }
+      this.skipDocs = skipDocs;
+      omitTF = fieldInfo.omitTermFreqAndPositions;
+      storePayloads = fieldInfo.storePayloads;
+
+      // nocommit: can't we only do this if consumer
+      // skipped consuming the previous docs?
+      docIndex.set(termState.docIndex);
+      docIndex.seek(docReader);
+
+      skipOffset = termState.skipOffset;
+
+      if (!omitTF) {
+        freqIndex.set(termState.freqIndex);
+        freqIndex.seek(freqReader);
+      } else {
+        freq = 1;
+      }
+      docFreq = termState.docFreq;
+      count = 0;
+      doc = 0;
+      skipped = false;
+
+      return this;
+    }
+
+    @Override
+    public int nextDoc() throws IOException {
+
+      if (Codec.DEBUG) {
+        if (!omitTF) {
+          Codec.debug("sep.reader.docs.nextDoc count=" + count + " vs df=" + docFreq + " freqFP=" + freqReader.descFilePointer() + " docFP=" + docReader.descFilePointer() + " skipDocs?=" + (skipDocs != null), desc);
+        } else {
+          Codec.debug("sep.reader.docs.nextDoc count=" + count + " vs df=" + docFreq + " docFP=" + docReader.descFilePointer() + " skipDocs?=" + (skipDocs != null), desc);
+        }
+      }
+
+      while(true) {
+        if (count == docFreq) {
+          return doc = NO_MORE_DOCS;
+        }
+
+        count++;
+
+        // Decode next doc
+        doc += docReader.next();
+          
+        if (!omitTF) {
+          freq = freqReader.next();
+        }
+
+        if (Codec.DEBUG) {
+          System.out.println("  decode doc=" + doc + " freq=" + freq);
+        }
+
+        if (skipDocs == null || !skipDocs.get(doc)) {
+          break;
+        } else if (Codec.DEBUG) {
+          System.out.println("  doc=" + doc + " is skipped");
+        }
+      }
+
+      // nocommit
+      if (Codec.DEBUG) {
+        System.out.println("  return doc=" + doc);
+      }
+      return doc;
+    }
+
+    @Override
+    public int read(int[] docs, int[] freqs) throws IOException {
+      // nocommit -- switch to bulk read api in IntIndexInput
+      int i = 0;
+      final int length = docs.length;
+      while (i < length && count < docFreq) {
+        count++;
+        // manually inlined call to next() for speed
+        doc += docReader.next();
+        if (!omitTF) {
+          freq = freqReader.next();
+        }
+
+        if (skipDocs == null || !skipDocs.get(doc)) {
+          docs[i] = doc;
+          freqs[i] = freq;
+          i++;
+        }
+      }
+
+      return i;
+    }
+
+    @Override
+    public int freq() {
+      return freq;
+    }
+
+    @Override
+    public int docID() {
+      return doc;
+    }
+
+    @Override
+    public int advance(int target) throws IOException {
+
+      // TODO: jump right to next() if target is < X away
+      // from where we are now?
+
+      if (Codec.DEBUG) {
+        Codec.debug("sep.reader.docs: advance target=" + target + " omitTF=" + omitTF, desc);
+      }
+
+      if (docFreq >= skipInterval) {
+
+        // There are enough docs in the posting to have
+        // skip data
+
+        if (skipper == null) {
+          // This DocsEnum has never done any skipping
+          if (Codec.DEBUG) {
+            System.out.println("  create skipper");
+          }
+
+          skipper = new SepSkipListReader((IndexInput) skipIn.clone(),
+                                          freqIn,
+                                          docIn,
+                                          posIn,
+                                          maxSkipLevels, skipInterval);
+
+        }
+
+        if (!skipped) {
+          // We haven't yet skipped for this posting
+          skipper.init(skipOffset,
+                       docIndex,
+                       freqIndex,
+                       posIndex,
+                       0,
+                       docFreq,
+                       storePayloads);
+          skipper.setOmitTF(omitTF);
+
+          if (Codec.DEBUG) {
+            System.out.println("    init skipper: base skipFP=" + skipOffset + " docFP=" + docIndex + " freqFP=" + freqIndex);
+          }
+
+          skipped = true;
+        }
+
+        final int newCount = skipper.skipTo(target); 
+
+        if (newCount > count) {
+
+          // Skipper did move
+          if (Codec.DEBUG) {
+            System.out.println("sdr [" + desc + "]: skipper moved to newCount=" + newCount +
+                               " docFP=" + skipper.getDocIndex() +
+                               " freqFP=" + skipper.getFreqIndex() +
+                               " doc=" + skipper.getDoc());
+          }
+            
+          if (!omitTF) {
+            skipper.getFreqIndex().seek(freqReader);
+          }
+          skipper.getDocIndex().seek(docReader);
+          count = newCount;
+          doc = skipper.getDoc();
+        } else if (Codec.DEBUG) {
+          System.out.println("  no skipping to be done");
+        }
+      }
+        
+      // Now, linear scan for the rest:
+      do {
+        if (nextDoc() == NO_MORE_DOCS) {
+          return NO_MORE_DOCS;
+        }
+      } while (target > doc);
+
+      if (Codec.DEBUG) {
+        Codec.debug("  skip return doc=" + doc);
+      }
+
+      return doc;
+    }
+  }
+
+  class SepDocsAndPositionsEnum extends DocsAndPositionsEnum {
+    int docFreq;
+    int doc;
+    int count;
+    int freq;
+    long freqStart;
+
+    // nocommit -- should we do omitTF with 2 different enum classes?
+    private boolean omitTF;
+    private boolean storePayloads;
+    private Bits skipDocs;
+    private final IntIndexInput.Reader docReader;
+    private final IntIndexInput.Reader freqReader;
+    private final IntIndexInput.Reader posReader;
+    private final IndexInput payloadIn;
+    private long skipOffset;
+
+    private final IntIndexInput.Index docIndex;
+    private final IntIndexInput.Index freqIndex;
+    private final IntIndexInput.Index posIndex;
+    private long payloadOffset;
+
+    private int pendingPosCount;
+    private int position;
+    private int payloadLength;
+    private long pendingPayloadBytes;
+
+    private boolean skipped;
+    private SepSkipListReader skipper;
+    private boolean payloadPending;
+    private boolean posSeekPending;
+
+    SepDocsAndPositionsEnum() throws IOException {
+      if (Codec.DEBUG) {
+        Codec.debug("sep: new DocsAndPositionsEnum");
+      }
+      docReader = docIn.reader();
+      docIndex = docIn.index();
+      freqReader = freqIn.reader();
+      freqIndex = freqIn.index();
+      posReader = posIn.reader();
+      posIndex = posIn.index();
+      payloadIn = (IndexInput) SepPostingsReaderImpl.this.payloadIn.clone();
+    }
+
+    SepDocsAndPositionsEnum init(FieldInfo fieldInfo, SepTermState termState, Bits skipDocs) throws IOException {
+      if (Codec.DEBUG) {
+        Codec.debug("sep.reader.init freqIn seek " + termState.freqIndex);
+      }
+      this.skipDocs = skipDocs;
+      storePayloads = fieldInfo.storePayloads;
+
+      // nocommit: can't we only do this if consumer
+      // skipped consuming the previous docs?
+      docIndex.set(termState.docIndex);
+      docIndex.seek(docReader);
+
+      freqIndex.set(termState.freqIndex);
+      freqIndex.seek(freqReader);
+
+      posIndex.set(termState.posIndex);
+      posSeekPending = true;
+      //posIndex.seek(posReader);
+
+      skipOffset = termState.skipOffset;
+      payloadOffset = termState.payloadOffset;
+      //payloadIn.seek(payloadOffset);
+
+      docFreq = termState.docFreq;
+      count = 0;
+      doc = 0;
+      pendingPosCount = 0;
+      pendingPayloadBytes = 0;
+      skipped = false;
+
+      return this;
+    }
+
+    @Override
+    public int nextDoc() throws IOException {
+
+      if (Codec.DEBUG) {
+        if (!omitTF) {
+          Codec.debug("sep.reader.nextDoc next count=" + count + " vs df=" + docFreq + " freqFP=" + freqReader.descFilePointer() + " docFP=" + docReader.descFilePointer() + " skipDocs?=" + (skipDocs != null), desc);
+        } else {
+          Codec.debug("sep.reader.nextDoc next count=" + count + " vs df=" + docFreq + " docFP=" + docReader.descFilePointer() + " skipDocs?=" + (skipDocs != null), desc);
+        }
+      }
+
+      while(true) {
+        if (count == docFreq) {
+          return doc = NO_MORE_DOCS;
+        }
+
+        count++;
+
+        // TODO: maybe we should do the 1-bit trick for encoding
+        // freq=1 case?
+
+        // Decode next doc
+        doc += docReader.next();
+          
+        freq = freqReader.next();
+
+        pendingPosCount += freq;
+
+        if (Codec.DEBUG) {
+          System.out.println("  decode doc=" + doc + " freq=" + freq);
+        }
+
+        if (skipDocs == null || !skipDocs.get(doc)) {
+          break;
+        } else if (Codec.DEBUG) {
+          System.out.println("  doc=" + doc + " is skipped");
+        }
+      }
+
+      // nocommit
+      if (Codec.DEBUG) {
+        System.out.println("  return doc=" + doc);
+      }
+      position = 0;
+      return doc;
+    }
+
+    @Override
+    public int freq() {
+      return freq;
+    }
+
+    @Override
+    public int docID() {
+      return doc;
+    }
+
+    @Override
+    public int advance(int target) throws IOException {
+
+      // TODO: jump right to next() if target is < X away
+      // from where we are now?
+
+      if (Codec.DEBUG) {
+        Codec.debug("sep.reader.advance current doc=" + doc + " target=" + target, desc);
+      }
+
+      if (docFreq >= skipInterval) {
+
+        // There are enough docs in the posting to have
+        // skip data
+
+        if (skipper == null) {
+          // This DocsEnum has never done any skipping
+          if (Codec.DEBUG) {
+            System.out.println("  create skipper");
+          }
+
+          skipper = new SepSkipListReader((IndexInput) skipIn.clone(),
+                                          freqIn,
+                                          docIn,
+                                          posIn,
+                                          maxSkipLevels, skipInterval);
+        }
+
+        if (!skipped) {
+          // We haven't yet skipped for this posting
+          skipper.init(skipOffset,
+                       docIndex,
+                       freqIndex,
+                       posIndex,
+                       payloadOffset,
+                       docFreq,
+                       storePayloads);
+
+          if (Codec.DEBUG) {
+            System.out.println("    init skipper: base skipFP=" + skipOffset + " docFP=" + docIndex + " freqFP=" + freqIndex + " proxFP=" +
+                               posIndex + " payloadFP=" + payloadOffset);
+          }
+
+          skipped = true;
+        }
+
+        final int newCount = skipper.skipTo(target); 
+
+        if (newCount > count) {
+
+          // Skipper did move
+          if (Codec.DEBUG) {
+            Codec.debug("  skipper moved to newCount=" + newCount +
+                        " docFP=" + skipper.getDocIndex() +
+                        " freqFP=" + skipper.getFreqIndex() +
+                        " doc=" + skipper.getDoc());
+          }
+            
+          skipper.getFreqIndex().seek(freqReader);
+          skipper.getDocIndex().seek(docReader);
+          //skipper.getPosIndex().seek(posReader);
+          posIndex.set(skipper.getPosIndex());
+          posSeekPending = true;
+          count = newCount;
+          doc = skipper.getDoc();
+          //payloadIn.seek(skipper.getPayloadPointer());
+          payloadOffset = skipper.getPayloadPointer();
+          pendingPosCount = 0;
+          pendingPayloadBytes = 0;
+          payloadPending = false;
+          payloadLength = skipper.getPayloadLength();
+
+        } else if (Codec.DEBUG) {
+          System.out.println("  no skipping to be done");
+        }
+
+      } else {
+        if (Codec.DEBUG) {
+          Codec.debug("[" + desc + "]: no skip data");
+        }
+      }
+        
+      // Now, linear scan for the rest:
+      do {
+        if (nextDoc() == NO_MORE_DOCS) {
+          return NO_MORE_DOCS;
+        }
+      } while (target > doc);
+
+      if (Codec.DEBUG) {
+        Codec.debug("advance done return doc=" + doc, desc);
+      }
+      return doc;
+    }
+
+    @Override
+    public int nextPosition() throws IOException {
+      if (Codec.DEBUG) {
+        Codec.debug("sep.reader.nextPos pendingPosCount=" + pendingPosCount + " freq=" + freq, desc);
+      }
+
+      if (posSeekPending) {
+        posIndex.seek(posReader);
+        payloadIn.seek(payloadOffset);
+        posSeekPending = false;
+      }
+
+      // scan over any docs that were iterated without their
+      // positions
+      while (pendingPosCount > freq) {
+        if (Codec.DEBUG) {
+          System.out.println("      skip position payloadBytesPending=" + pendingPayloadBytes);
+        }
+        final int code = posReader.next();
+        if (storePayloads) {
+          if ((code & 1) != 0) {
+            // Payload length has changed
+            payloadLength = posReader.next();
+            assert payloadLength >= 0;
+            if (Codec.DEBUG) {
+              System.out.println("  new payloadLen=" + payloadLength);
+            }
+          }
+        }
+        pendingPosCount--;
+        payloadPending = true;
+        position = 0;
+        pendingPayloadBytes += payloadLength;
+      }
+
+      final int code = posReader.next();
+      if (storePayloads) {
+        if ((code & 1) != 0) {
+          // Payload length has changed
+          payloadLength = posReader.next();
+          assert payloadLength >= 0;
+          if (Codec.DEBUG) {
+            System.out.println("  new payloadLen=" + payloadLength);
+          }
+        }
+        position += code >> 1;
+      } else {
+        position += code;
+      }
+    
+      pendingPayloadBytes += payloadLength;
+      payloadPending = payloadLength > 0;
+      pendingPosCount--;
+      payloadPending = true;
+      assert pendingPosCount >= 0;
+
+      if (Codec.DEBUG) {
+        System.out.println("      return pos=" + position);
+      }
+      return position;
+    }
+
+    @Override
+    public int getPayloadLength() {
+      return payloadLength;
+    }
+
+    private BytesRef payload;
+
+    @Override
+    public BytesRef getPayload() throws IOException {
+      if (!payloadPending) {
+        throw new IOException("Either no payload exists at this term position or an attempt was made to load it more than once.");
+      }
+
+      if (Codec.DEBUG) {
+        Codec.debug(" getPayload payloadFP=" + payloadIn.getFilePointer() + " len=" + payloadLength + " pendingPayloadBytes=" + pendingPayloadBytes, desc);
+      }
+
+      assert pendingPayloadBytes >= payloadLength;
+
+      if (pendingPayloadBytes > payloadLength) {
+        payloadIn.seek(payloadIn.getFilePointer() + (pendingPayloadBytes - payloadLength));
+      }
+
+      if (payload == null) {
+        payload = new BytesRef();
+        payload.bytes = new byte[payloadLength];
+      } else if (payload.bytes.length < payloadLength) {
+        payload.grow(payloadLength);
+      }
+
+      payloadIn.readBytes(payload.bytes, 0, payloadLength);
+      payloadPending = false;
+      payload.length = payloadLength;
+      pendingPayloadBytes = 0;
+      return payload;
+    }
+
+    @Override
+    public boolean hasPayload() {
+      return payloadPending && payloadLength > 0;
+    }
+  }
+}

Propchange: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/SepPostingsWriterImpl.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/SepPostingsWriterImpl.java?rev=904750&view=auto
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/SepPostingsWriterImpl.java (added)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/SepPostingsWriterImpl.java Sat Jan 30 10:16:35 2010
@@ -0,0 +1,334 @@
+package org.apache.lucene.index.codecs.sep;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+u * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.index.codecs.Codec;
+import org.apache.lucene.index.codecs.standard.StandardPostingsWriter;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.util.BytesRef;
+
+/** Writes frq to .frq, docs to .doc, pos to .pos, payloads
+ *  to .pyl, skip data to .skp
+ *
+ * @lucene.experimental */
+public final class SepPostingsWriterImpl extends StandardPostingsWriter {
+  final static String CODEC = "SepDocFreqSkip";
+
+  // Increment version to change it:
+  final static int VERSION_START = 0;
+  final static int VERSION_CURRENT = VERSION_START;
+
+  final IntIndexOutput freqOut;
+  final IntIndexOutput.Index freqIndex;
+
+  final IntIndexOutput posOut;
+  final IntIndexOutput.Index posIndex;
+
+  final IntIndexOutput docOut;
+  final IntIndexOutput.Index docIndex;
+
+  final IndexOutput payloadOut;
+
+  final IndexOutput skipOut;
+  IndexOutput termsOut;
+
+  final SepSkipListWriter skipListWriter;
+  final int skipInterval;
+  final int maxSkipLevels;
+  final int totalNumDocs;
+
+  boolean storePayloads;
+  boolean omitTF;
+
+  // Starts a new term
+  long lastSkipStart;
+
+  FieldInfo fieldInfo;
+
+  int lastPayloadLength;
+  int lastPosition;
+  long payloadStart;
+  long lastPayloadStart;
+  int lastDocID;
+  int df;
+  int count;
+
+  public SepPostingsWriterImpl(SegmentWriteState state, IntStreamFactory factory) throws IOException {
+    super();
+
+    final String docFileName = IndexFileNames.segmentFileName(state.segmentName, SepCodec.DOC_EXTENSION);
+    state.flushedFiles.add(docFileName);
+    docOut = factory.createOutput(state.directory, docFileName);
+    docIndex = docOut.index();
+
+    if (state.fieldInfos.hasProx()) {
+      final String frqFileName = IndexFileNames.segmentFileName(state.segmentName, SepCodec.FREQ_EXTENSION);
+      state.flushedFiles.add(frqFileName);
+      freqOut = factory.createOutput(state.directory, frqFileName);
+      freqIndex = freqOut.index();
+
+      final String posFileName = IndexFileNames.segmentFileName(state.segmentName, SepCodec.POS_EXTENSION);
+      posOut = factory.createOutput(state.directory, posFileName);
+      state.flushedFiles.add(posFileName);
+      posIndex = posOut.index();
+
+      // nocommit -- only if at least one field stores payloads?
+      final String payloadFileName = IndexFileNames.segmentFileName(state.segmentName, SepCodec.PAYLOAD_EXTENSION);
+      state.flushedFiles.add(payloadFileName);
+      payloadOut = state.directory.createOutput(payloadFileName);
+
+    } else {
+      freqOut = null;
+      freqIndex = null;
+      posOut = null;
+      posIndex = null;
+      payloadOut = null;
+    }
+
+    final String skipFileName = IndexFileNames.segmentFileName(state.segmentName, SepCodec.SKIP_EXTENSION);
+    state.flushedFiles.add(skipFileName);
+    skipOut = state.directory.createOutput(skipFileName);
+
+    totalNumDocs = state.numDocs;
+
+    // nocommit -- abstraction violation
+    skipListWriter = new SepSkipListWriter(state.skipInterval,
+                                           state.maxSkipLevels,
+                                           state.numDocs,
+                                           freqOut, docOut,
+                                           posOut, payloadOut);
+
+    skipInterval = state.skipInterval;
+    maxSkipLevels = state.maxSkipLevels;
+  }
+
+  @Override
+  public void start(IndexOutput termsOut) throws IOException {
+    this.termsOut = termsOut;
+    Codec.writeHeader(termsOut, CODEC, VERSION_CURRENT);
+    // nocommit -- just ask skipper to "start" here
+    termsOut.writeInt(skipInterval);                // write skipInterval
+    termsOut.writeInt(maxSkipLevels);               // write maxSkipLevels
+  }
+
+  @Override
+  public void startTerm() throws IOException {
+    if (Codec.DEBUG) {
+      Codec.debug("sep.writer.startTerm");
+    }
+    docIndex.mark();
+    if (!omitTF) {
+      freqIndex.mark();
+      posIndex.mark();
+      payloadStart = payloadOut.getFilePointer();
+      lastPayloadLength = -1;
+    }
+    skipListWriter.resetSkip(docIndex, freqIndex, posIndex);
+  }
+
+  // nocommit -- should we NOT reuse across fields?  would
+  // be cleaner
+
+  // Currently, this instance is re-used across fields, so
+  // our parent calls setField whenever the field changes
+  @Override
+  public void setField(FieldInfo fieldInfo) {
+    this.fieldInfo = fieldInfo;
+    omitTF = fieldInfo.omitTermFreqAndPositions;
+    skipListWriter.setOmitTF(omitTF);
+    storePayloads = !omitTF && fieldInfo.storePayloads;
+  }
+
+
+  /** Adds a new doc in this term.  If this returns null
+   *  then we just skip consuming positions/payloads. */
+  @Override
+  public void addDoc(int docID, int termDocFreq) throws IOException {
+
+    final int delta = docID - lastDocID;
+
+    if (Codec.DEBUG) {
+      System.out.println("  dw.addDoc [" + desc + "] count=" + (count++) + " docID=" + docID + " lastDocID=" + lastDocID + " delta=" + delta + " omitTF=" + omitTF + " freq=" + termDocFreq);
+    }
+
+    if (docID < 0 || (df > 0 && delta <= 0)) {
+      throw new CorruptIndexException("docs out of order (" + docID + " <= " + lastDocID + " )");
+    }
+
+    if ((++df % skipInterval) == 0) {
+      // nocommit -- awkward we have to make these two
+      // separate calls to skipper
+      skipListWriter.setSkipData(lastDocID, storePayloads, lastPayloadLength);
+      skipListWriter.bufferSkip(df);
+
+      if (Codec.DEBUG) {
+        System.out.println("    bufferSkip lastDocID=" + lastDocID +
+                           " df=" + df +
+                           " docFP=" + docOut.descFilePointer() + 
+                           " freqFP=" + freqOut.descFilePointer() + 
+                           " posFP=" + posOut.descFilePointer() + 
+                           " payloadFP=" + payloadOut.getFilePointer() + 
+                           " payloadLen=" + lastPayloadLength);
+      }
+    }
+
+    lastDocID = docID;
+    docOut.write(delta);
+    if (!omitTF) {
+      freqOut.write(termDocFreq);
+    }
+  }
+
+  /** Add a new position & payload */
+  @Override
+  public void addPosition(int position, BytesRef payload) throws IOException {
+    assert !omitTF;
+
+    if (Codec.DEBUG) {
+      if (payload != null && payload.length > 0) {
+        System.out.println("pw.addPos [" + desc + "]: pos=" + position + " posFP=" + posOut.descFilePointer() + " payloadFP=" + payloadOut.getFilePointer() + " payload=" + payload.length + " bytes");
+      } else {
+        System.out.println("pw.addPos [" + desc + "]: pos=" + position + " posFP=" + posOut.descFilePointer() + " payloadFP=" + payloadOut.getFilePointer());
+      }
+    }
+
+    final int delta = position - lastPosition;
+    lastPosition = position;
+
+    if (storePayloads) {
+      final int payloadLength = payload == null ? 0 : payload.length;
+      if (Codec.DEBUG) {
+        System.out.println("  store payload len=" + payloadLength);
+      }
+      if (payloadLength != lastPayloadLength) {
+        if (Codec.DEBUG) {
+          System.out.println("  payload len change old=" + lastPayloadLength + " new=" + payloadLength);
+        }
+        lastPayloadLength = payloadLength;
+        // TODO: explore whether we get better compression
+        // by not storing payloadLength into prox stream?
+        posOut.write((delta<<1)|1);
+        posOut.write(payloadLength);
+      } else {
+        posOut.write(delta << 1);
+      }
+
+      if (payloadLength > 0) {
+        if (Codec.DEBUG) {
+          System.out.println("  write @ payloadFP=" + payloadOut.getFilePointer());
+        }
+        payloadOut.writeBytes(payload.bytes, payload.offset, payloadLength);
+      }
+    } else {
+      posOut.write(delta);
+    }
+
+    lastPosition = position;
+  }
+
+  /** Called when we are done adding positions & payloads */
+  @Override
+  public void finishDoc() {       
+    lastPosition = 0;
+  }
+
+  /** Called when we are done adding docs to this term */
+  @Override
+  public void finishTerm(int docCount, boolean isIndexTerm) throws IOException {
+
+    long skipPos = skipOut.getFilePointer();
+
+    // nocommit -- wasteful we are counting this in two places?
+    assert docCount == df;
+    if (Codec.DEBUG) {
+      System.out.println("dw.finishTerm termsFP=" + termsOut.getFilePointer() + " df=" + df + " skipPos=" + skipPos);
+    }
+
+    // nocommit -- only do this if once (consolidate the
+    // conditional things that are written)
+    if (!omitTF) {
+      freqIndex.write(termsOut, isIndexTerm);
+    }
+    docIndex.write(termsOut, isIndexTerm);
+
+    if (df >= skipInterval) {
+      if (Codec.DEBUG) {
+        System.out.println("  writeSkip skipPos=" + skipPos + " lastSkipPos=" + lastSkipStart);
+      }
+      
+      skipListWriter.writeSkip(skipOut);
+    }
+
+    if (isIndexTerm) {
+      termsOut.writeVLong(skipPos);
+      lastSkipStart = skipPos;
+    } else if (df >= skipInterval) {
+      termsOut.writeVLong(skipPos-lastSkipStart);
+      lastSkipStart = skipPos;
+    }
+
+    if (!omitTF) {
+      posIndex.write(termsOut, isIndexTerm);
+      if (isIndexTerm) {
+        // Write absolute at seek points
+        termsOut.writeVLong(payloadStart);
+      } else {
+        termsOut.writeVLong(payloadStart-lastPayloadStart);
+      }
+      lastPayloadStart = payloadStart;
+    }
+
+    lastDocID = 0;
+    df = 0;
+
+    // nocommit
+    count = 0;
+  }
+
+  @Override
+  public void close() throws IOException {
+    if (Codec.DEBUG) {
+      System.out.println("sep.writer.close skipFP=" + skipOut.getFilePointer());
+    }
+    try {
+      docOut.close();
+    } finally {
+      try {
+        skipOut.close();
+      } finally {
+        if (freqOut != null) {
+          try {
+            freqOut.close();
+          } finally {
+            try {
+              posOut.close();
+            } finally {
+              payloadOut.close();
+            }
+          }
+        }
+      }
+    }
+  }
+}

Propchange: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/SepPostingsWriterImpl.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/SepSkipListReader.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/SepSkipListReader.java?rev=904750&r1=904749&r2=904750&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/SepSkipListReader.java (original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/SepSkipListReader.java Sat Jan 30 10:16:35 2010
@@ -97,6 +97,12 @@
     }
   }
   
+  boolean omitTF;
+
+  void setOmitTF(boolean v) {
+    omitTF = v;
+  }
+
   void init(long skipPointer,
             IntIndexInput.Index docBaseIndex,
             IntIndexInput.Index freqBaseIndex,
@@ -199,14 +205,11 @@
     } else {
       delta = skipStream.readVInt();
     }
-    //System.out.println("  delta=" + delta + " level=" +
-    //level);
-    if (freqIndex != null) {
+    if (!omitTF) {
       freqIndex[level].read(skipStream, false);
     }
     docIndex[level].read(skipStream, false);
-    // nocommit -- make this explicit w/ omitTF, matching SepSkipListWriter
-    if (posIndex != null) {
+    if (!omitTF) {
       posIndex[level].read(skipStream, false);
       payloadPointer[level] += skipStream.readVInt();
     }

Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/SepSkipListWriter.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/SepSkipListWriter.java?rev=904750&r1=904749&r2=904750&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/SepSkipListWriter.java (original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/SepSkipListWriter.java Sat Jan 30 10:16:35 2010
@@ -77,9 +77,11 @@
     posIndex = new IntIndexOutput.Index[numberOfSkipLevels];
 
     for(int i=0;i<numberOfSkipLevels;i++) {
-      freqIndex[i] = freqOutput.index();
-      if (Codec.DEBUG) {
-        freqIndex[i].desc = "sslw.freq.level" + i;
+      if (freqOutput != null) {
+        freqIndex[i] = freqOutput.index();
+        if (Codec.DEBUG) {
+          freqIndex[i].desc = "sslw.freq.level" + i;
+        }
       }
       docIndex[i] = docOutput.index();
       if (Codec.DEBUG) {
@@ -142,7 +144,9 @@
     Arrays.fill(lastSkipPayloadLength, -1);  // we don't have to write the first length in the skip list
     for(int i=0;i<numberOfSkipLevels;i++) {
       docIndex[i].set(topDocIndex);
-      freqIndex[i].set(topFreqIndex);
+      if (freqOutput != null) {
+        freqIndex[i].set(topFreqIndex);
+      }
       if (posOutput != null) {
         posIndex[i].set(topPosIndex);
       }

Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/SingleIntIndexInput.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/SingleIntIndexInput.java?rev=904750&r1=904749&r2=904750&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/SingleIntIndexInput.java (original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/SingleIntIndexInput.java Sat Jan 30 10:16:35 2010
@@ -69,13 +69,9 @@
     }
   }
   
-  class State extends IndexState {
-    long fp;
-    boolean first;
-  }
-  
   class Index extends IntIndexInput.Index {
     private long fp;
+    // nocmmit: only for asserts
     boolean first = true;
 
     @Override
@@ -110,24 +106,13 @@
       return Long.toString(fp);
     }
 
-    // nocommit handle with set and/or clone?
-    @Override
-    public IndexState captureState() {
-      State state = SingleIntIndexInput.this.new State();
-      state.fp = fp;
-      state.first = first;
-      return state;
-    }
-
-    // nocommit handle with set and/or clone?
     @Override
-    public void setState(IndexState state) {
-      State iState = (State) state;
-      this.fp = iState.fp;
-      this.first = iState.first;
-      
+    public Object clone() {
+      Index other = new Index();
+      other.first = first;
+      other.fp = fp;
+      return other;
     }
-    
   }
 
   @Override

Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/SingleIntIndexOutput.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/SingleIntIndexOutput.java?rev=904750&r1=904749&r2=904750&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/SingleIntIndexOutput.java (original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/SingleIntIndexOutput.java Sat Jan 30 10:16:35 2010
@@ -24,7 +24,7 @@
 import java.io.IOException;
 
 /** Writes ints directly to the file (not in blocks) as
- *  vInt
+ *  vInt.
  * 
  * @lucene.experimental
 */

Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/DefaultSkipListWriter.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/DefaultSkipListWriter.java?rev=904750&r1=904749&r2=904750&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/DefaultSkipListWriter.java (original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/DefaultSkipListWriter.java Sat Jan 30 10:16:35 2010
@@ -60,14 +60,14 @@
   }
 
   // nocommit -- made public
-  public void setFreqOutput(IndexOutput freqOutput) {
-    this.freqOutput = freqOutput;
-  }
+  //public void setFreqOutput(IndexOutput freqOutput) {
+  //this.freqOutput = freqOutput;
+  //}
 
   // nocommit -- made public
-  public void setProxOutput(IndexOutput proxOutput) {
-    this.proxOutput = proxOutput;
-  }
+  //public void setProxOutput(IndexOutput proxOutput) {
+  //this.proxOutput = proxOutput;
+  //}
 
   /**
    * Sets the values for the current skip data. 

Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/SimpleStandardTermsIndexReader.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/SimpleStandardTermsIndexReader.java?rev=904750&r1=904749&r2=904750&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/SimpleStandardTermsIndexReader.java (original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/SimpleStandardTermsIndexReader.java Sat Jan 30 10:16:35 2010
@@ -73,16 +73,7 @@
 
     this.termComp = termComp;
 
-    // nocommit -- why was this needed?
-    String file = IndexFileNames.segmentFileName(segment, StandardCodec.TERMS_INDEX_EXTENSION);
-    if (!dir.fileExists(file)) {
-      indexInterval = 0;
-      totalIndexInterval = 0;
-      this.indexDivisor = indexDivisor;
-      in = null;
-      return;
-    }
-    IndexInput in = dir.openInput(file);
+    IndexInput in = dir.openInput(IndexFileNames.segmentFileName(segment, StandardCodec.TERMS_INDEX_EXTENSION));
     
     boolean success = false;
 
@@ -90,7 +81,7 @@
       Codec.checkHeader(in, SimpleStandardTermsIndexWriter.CODEC_NAME, SimpleStandardTermsIndexWriter.VERSION_START);
 
       if (Codec.DEBUG) {
-        System.out.println(" readDirStart @ " + in.getFilePointer());
+        Codec.debug("  sstir init: header tii.fp=" + in.getFilePointer());
       }
 
       final long dirOffset = in.readLong();
@@ -444,7 +435,7 @@
       public final void getIndexOffset(BytesRef term, TermsIndexResult result) throws IOException {
 
         if (Codec.DEBUG) {
-          System.out.println("getIndexOffset field=" + fieldInfo.name + " term=" + term + " indexLen = " + blockPointer.length + " numIndexTerms=" + fileOffset.length + " this=" + this + " numIndexedTerms=" + fileOffset.length);
+          System.out.println("getIndexOffset field=" + fieldInfo.name + " term=" + term + " indexLen = " + blockPointer.length + " numIndexTerms=" + fileOffset.length + " numIndexedTerms=" + fileOffset.length);
         }
 
         int lo = 0;					  // binary search

Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/SimpleStandardTermsIndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/SimpleStandardTermsIndexWriter.java?rev=904750&r1=904749&r2=904750&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/SimpleStandardTermsIndexWriter.java (original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/SimpleStandardTermsIndexWriter.java Sat Jan 30 10:16:35 2010
@@ -93,10 +93,8 @@
       if (0 == (numTerms++ % termIndexInterval)) {
         final long termsPointer = termsOut.getFilePointer();
         if (Codec.DEBUG) {
-          System.out.println("sstiw.checkIndexTerm write index field=" + fieldInfo.name + " term=" + text + " termsFP=" + termsPointer + " numIndexTerms=" + numIndexTerms + " outFP=" + out.getFilePointer());
+          Codec.debug("sstiw.checkIndexTerm write index field=" + fieldInfo.name + " term=" + text + " termsFP=" + termsPointer + " numIndexTerms=" + numIndexTerms + " outFP=" + out.getFilePointer());
         }
-        // mxx
-        //System.out.println(Thread.currentThread().getName() + ": ii seg=" + segment + " term=" + fieldInfo.name + ":" + new String(term, 0, termLength, "UTF-8") + " numTerms=" + (numTerms-1) + " termFP=" + termsPointer);
         termWriter.write(text);
         out.writeVLong(termsPointer - lastTermsPointer);
         lastTermsPointer = termsPointer;

Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardCodec.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardCodec.java?rev=904750&r1=904749&r2=904750&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardCodec.java (original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardCodec.java Sat Jan 30 10:16:35 2010
@@ -38,7 +38,7 @@
 
   @Override
   public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
-    StandardDocsConsumer docs = new StandardDocsWriter(state);
+    StandardPostingsWriter docs = new StandardPostingsWriterImpl(state);
 
     StandardTermsIndexWriter indexWriter;
     boolean success = false;
@@ -67,9 +67,11 @@
     }
   }
 
+  public final static int TERMS_CACHE_SIZE = 1024;
+
   @Override
   public FieldsProducer fieldsProducer(Directory dir, FieldInfos fieldInfos, SegmentInfo si, int readBufferSize, int indexDivisor) throws IOException {
-    StandardDocsReader docs = new StandardDocsReader(dir, si, readBufferSize);
+    StandardPostingsReader postings = new StandardPostingsReaderImpl(dir, si, readBufferSize);
     StandardTermsIndexReader indexReader;
 
     // nocommit -- not clean that every codec must deal w/
@@ -84,7 +86,7 @@
       success = true;
     } finally {
       if (!success) {
-        docs.close();
+        postings.close();
       }
     }
 
@@ -92,15 +94,16 @@
     try {
       FieldsProducer ret = new StandardTermsDictReader(indexReader,
                                                        dir, fieldInfos, si.name,
-                                                       docs,
+                                                       postings,
                                                        readBufferSize,
-                                                       BytesRef.getUTF8SortedAsUTF16Comparator());
+                                                       BytesRef.getUTF8SortedAsUTF16Comparator(),
+                                                       TERMS_CACHE_SIZE);
       success = true;
       return ret;
     } finally {
       if (!success) {
         try {
-          docs.close();
+          postings.close();
         } finally {
           indexReader.close();
         }
@@ -122,7 +125,7 @@
 
   @Override
   public void files(Directory dir, SegmentInfo segmentInfo, Collection<String> files) throws IOException {
-    StandardDocsReader.files(dir, segmentInfo, files);
+    StandardPostingsReaderImpl.files(dir, segmentInfo, files);
     StandardTermsDictReader.files(dir, segmentInfo, files);
     SimpleStandardTermsIndexReader.files(dir, segmentInfo, files);
   }