You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by bu...@apache.org on 2011/01/13 20:53:39 UTC

svn commit: r1058718 [4/18] - in /lucene/dev/branches/realtime_search: ./ lucene/ lucene/contrib/ lucene/contrib/ant/src/java/org/apache/lucene/ant/ lucene/contrib/ant/src/test/org/apache/lucene/ant/ lucene/contrib/benchmark/ lucene/contrib/demo/src/ja...

Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/MultiReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/MultiReader.java?rev=1058718&r1=1058717&r2=1058718&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/MultiReader.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/MultiReader.java Thu Jan 13 19:53:21 2011
@@ -18,14 +18,11 @@ package org.apache.lucene.index;
  */
 
 import java.io.IOException;
-import java.util.Arrays;
 import java.util.Collection;
-import java.util.HashMap;
 import java.util.Map;
 
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.FieldSelector;
-import org.apache.lucene.search.Similarity;
 import org.apache.lucene.search.FieldCache; // not great (circular); used only to purge FieldCache entry on close
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
@@ -35,10 +32,9 @@ import org.apache.lucene.util.ReaderUtil
  *  their content. */
 public class MultiReader extends IndexReader implements Cloneable {
   protected IndexReader[] subReaders;
+  private final ReaderContext topLevelContext;
   private int[] starts;                           // 1st docno for each segment
-  private final Map<IndexReader,ReaderUtil.Slice> subReaderToSlice = new HashMap<IndexReader,ReaderUtil.Slice>();
   private boolean[] decrefOnClose;                // remember which subreaders to decRef on close
-  private Map<String,byte[]> normsCache = new HashMap<String,byte[]>();
   private int maxDoc = 0;
   private int numDocs = -1;
   private boolean hasDeletions = false;
@@ -51,7 +47,7 @@ public class MultiReader extends IndexRe
   * @param subReaders set of (sub)readers
   */
   public MultiReader(IndexReader... subReaders) throws IOException {
-    initialize(subReaders, true);
+    topLevelContext = initialize(subReaders, true);
   }
 
   /**
@@ -63,14 +59,13 @@ public class MultiReader extends IndexRe
    * @param subReaders set of (sub)readers
    */
   public MultiReader(IndexReader[] subReaders, boolean closeSubReaders) throws IOException {
-    initialize(subReaders, closeSubReaders);
+    topLevelContext = initialize(subReaders, closeSubReaders);
   }
   
-  private void initialize(IndexReader[] subReaders, boolean closeSubReaders) throws IOException {
+  private ReaderContext initialize(IndexReader[] subReaders, boolean closeSubReaders) throws IOException {
     this.subReaders =  subReaders.clone();
     starts = new int[subReaders.length + 1];    // build starts array
     decrefOnClose = new boolean[subReaders.length];
-
     for (int i = 0; i < subReaders.length; i++) {
       starts[i] = maxDoc;
       maxDoc += subReaders[i].maxDoc();      // compute maxDocs
@@ -85,14 +80,9 @@ public class MultiReader extends IndexRe
       if (subReaders[i].hasDeletions()) {
         hasDeletions = true;
       }
-
-      final ReaderUtil.Slice slice = new ReaderUtil.Slice(starts[i],
-                                                          subReaders[i].maxDoc(),
-                                                          i);
-      subReaderToSlice.put(subReaders[i], slice);
     }
-
     starts[subReaders.length] = maxDoc;
+    return ReaderUtil.buildReaderContext(this);
   }
 
   @Override
@@ -101,11 +91,6 @@ public class MultiReader extends IndexRe
   }
 
   @Override
-  public int getSubReaderDocBase(IndexReader subReader) {
-    return subReaderToSlice.get(subReader).start;
-  }
-
-  @Override
   public Fields fields() throws IOException {
     throw new UnsupportedOperationException("please use MultiFields.getFields, or wrap your IndexReader with SlowMultiReaderWrapper, if you really need a top level Fields");
   }
@@ -316,45 +301,12 @@ public class MultiReader extends IndexRe
   
   @Override
   public synchronized byte[] norms(String field) throws IOException {
-    ensureOpen();
-    byte[] bytes = normsCache.get(field);
-    if (bytes != null)
-      return bytes;          // cache hit
-    if (!hasNorms(field))
-      return null;
-
-    bytes = new byte[maxDoc()];
-    for (int i = 0; i < subReaders.length; i++)
-      subReaders[i].norms(field, bytes, starts[i]);
-    normsCache.put(field, bytes);      // update cache
-    return bytes;
-  }
-
-  @Override
-  public synchronized void norms(String field, byte[] result, int offset)
-    throws IOException {
-    ensureOpen();
-    byte[] bytes = normsCache.get(field);
-    for (int i = 0; i < subReaders.length; i++)      // read from segments
-      subReaders[i].norms(field, result, offset + starts[i]);
-
-    if (bytes==null && !hasNorms(field)) {
-      Arrays.fill(result, offset, result.length, Similarity.getDefault().encodeNormValue(1.0f));
-    } else if (bytes != null) {                         // cache hit
-      System.arraycopy(bytes, 0, result, offset, maxDoc());
-    } else {
-      for (int i = 0; i < subReaders.length; i++) {     // read from segments
-        subReaders[i].norms(field, result, offset + starts[i]);
-      }
-    }
+    throw new UnsupportedOperationException("please use MultiNorms.norms, or wrap your IndexReader with SlowMultiReaderWrapper, if you really need a top level norms");
   }
 
   @Override
   protected void doSetNorm(int n, String field, byte value)
     throws CorruptIndexException, IOException {
-    synchronized (normsCache) {
-      normsCache.remove(field);                         // clear cache
-    }
     int i = readerIndex(n);                           // find segment num
     subReaders[i].setNorm(n-starts[i], field, value); // dispatch
   }
@@ -433,4 +385,8 @@ public class MultiReader extends IndexRe
   public IndexReader[] getSequentialSubReaders() {
     return subReaders;
   }
+  
+  public ReaderContext getTopReaderContext() {
+    return topLevelContext;
+  }
 }

Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/MultiTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/MultiTermsEnum.java?rev=1058718&r1=1058717&r2=1058718&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/MultiTermsEnum.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/MultiTermsEnum.java Thu Jan 13 19:53:21 2011
@@ -91,13 +91,6 @@ public final class MultiTermsEnum extend
   }
 
   @Override
-  public void cacheCurrentTerm() throws IOException {
-    for(int i=0;i<numTop;i++) {
-      top[i].terms.cacheCurrentTerm();
-    }
-  }
-
-  @Override
   public Comparator<BytesRef> getComparator() {
     return termComp;
   }

Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/NormsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/NormsWriter.java?rev=1058718&r1=1058717&r2=1058718&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/NormsWriter.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/NormsWriter.java Thu Jan 13 19:53:21 2011
@@ -21,7 +21,6 @@ import java.io.IOException;
 import java.util.Collection;
 import java.util.Map;
 
-import org.apache.lucene.search.Similarity;
 import org.apache.lucene.store.IndexOutput;
 
 // TODO FI: norms could actually be stored as doc store
@@ -33,7 +32,6 @@ import org.apache.lucene.store.IndexOutp
 
 final class NormsWriter extends InvertedDocEndConsumer {
 
-  private static final byte defaultNorm = Similarity.getDefault().encodeNormValue(1.0f);
   private FieldInfos fieldInfos;
 
   @Override
@@ -51,9 +49,11 @@ final class NormsWriter extends Inverted
    *  not disabled */
   @Override
   public void flush(Map<FieldInfo,InvertedDocEndConsumerPerField> fieldsToFlush, SegmentWriteState state) throws IOException {
+    if (!fieldInfos.hasNorms()) {
+      return;
+    }
 
     final String normsFileName = IndexFileNames.segmentFileName(state.segmentName, "", IndexFileNames.NORMS_EXTENSION);
-    state.flushedFiles.add(normsFileName);
     IndexOutput normsOut = state.directory.createOutput(normsFileName);
 
     try {
@@ -79,7 +79,7 @@ final class NormsWriter extends Inverted
               normsOut.writeByte(toWrite.norms[upto]);
               upto++;
             } else {
-              normsOut.writeByte(defaultNorm);
+              normsOut.writeByte((byte) 0);
             }
           }
 
@@ -91,7 +91,7 @@ final class NormsWriter extends Inverted
           normCount++;
           // Fill entire field with default norm:
           for(;upto<state.numDocs;upto++)
-            normsOut.writeByte(defaultNorm);
+            normsOut.writeByte((byte) 0);
         }
 
         assert 4+normCount*state.numDocs == normsOut.getFilePointer() : ".nrm file size mismatch: expected=" + (4+normCount*state.numDocs) + " actual=" + normsOut.getFilePointer();

Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/NormsWriterPerField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/NormsWriterPerField.java?rev=1058718&r1=1058717&r2=1058718&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/NormsWriterPerField.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/NormsWriterPerField.java Thu Jan 13 19:53:21 2011
@@ -18,7 +18,6 @@ package org.apache.lucene.index;
  */
 
 import org.apache.lucene.util.ArrayUtil;
-import org.apache.lucene.search.Similarity;
 
 /** Taps into DocInverter, as an InvertedDocEndConsumer,
  *  which is called at the end of inverting each field.  We
@@ -71,7 +70,7 @@ final class NormsWriterPerField extends 
         norms = ArrayUtil.grow(norms, 1+upto);
       }
       final float norm = docState.similarity.computeNorm(fieldInfo.name, fieldState);
-      norms[upto] = Similarity.getDefault().encodeNormValue(norm);
+      norms[upto] = docState.similarity.encodeNormValue(norm);
       docIDs[upto] = docState.docID;
       upto++;
     }

Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/ParallelReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/ParallelReader.java?rev=1058718&r1=1058717&r2=1058718&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/ParallelReader.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/ParallelReader.java Thu Jan 13 19:53:21 2011
@@ -53,7 +53,8 @@ public class ParallelReader extends Inde
   private SortedMap<String,IndexReader> fieldToReader = new TreeMap<String,IndexReader>();
   private Map<IndexReader,Collection<String>> readerToFields = new HashMap<IndexReader,Collection<String>>();
   private List<IndexReader> storedFieldReaders = new ArrayList<IndexReader>();
-
+  private Map<String,byte[]> normsCache = new HashMap<String,byte[]>();
+  private final ReaderContext topLevelReaderContext = new AtomicReaderContext(this);
   private int maxDoc;
   private int numDocs;
   private boolean hasDeletions;
@@ -88,7 +89,7 @@ public class ParallelReader extends Inde
     buffer.append(')');
     return buffer.toString();
   }
-
+  
  /** Add an IndexReader.
   * @throws IOException if there is a low-level IO error
   */
@@ -141,6 +142,9 @@ public class ParallelReader extends Inde
       reader.incRef();
     }
     decrefOnClose.add(Boolean.valueOf(incRefReaders));
+    synchronized(normsCache) {
+      normsCache.clear(); // TODO: don't need to clear this for all fields really?
+    }
   }
 
   private class ParallelFieldsEnum extends FieldsEnum {
@@ -278,6 +282,7 @@ public class ParallelReader extends Inde
 
     if (reopened) {
       List<Boolean> newDecrefOnClose = new ArrayList<Boolean>();
+      // TODO: maybe add a special reopen-ctor for norm-copying?
       ParallelReader pr = new ParallelReader();
       for (int i = 0; i < readers.size(); i++) {
         IndexReader oldReader = readers.get(i);
@@ -419,27 +424,36 @@ public class ParallelReader extends Inde
   }
 
   @Override
-  public byte[] norms(String field) throws IOException {
+  public synchronized byte[] norms(String field) throws IOException {
     ensureOpen();
     IndexReader reader = fieldToReader.get(field);
-    return reader==null ? null : reader.norms(field);
-  }
 
-  @Override
-  public void norms(String field, byte[] result, int offset)
-    throws IOException {
-    ensureOpen();
-    IndexReader reader = fieldToReader.get(field);
-    if (reader!=null)
-      reader.norms(field, result, offset);
+    if (reader==null)
+      return null;
+    
+    byte[] bytes = normsCache.get(field);
+    if (bytes != null)
+      return bytes;
+    if (!hasNorms(field))
+      return null;
+    if (normsCache.containsKey(field)) // cached omitNorms, not missing key
+      return null;
+
+    bytes = MultiNorms.norms(reader, field);
+    normsCache.put(field, bytes);
+    return bytes;
   }
 
   @Override
   protected void doSetNorm(int n, String field, byte value)
     throws CorruptIndexException, IOException {
     IndexReader reader = fieldToReader.get(field);
-    if (reader!=null)
+    if (reader!=null) {
+      synchronized(normsCache) {
+        normsCache.remove(field);
+      }
       reader.doSetNorm(n, field, value);
+    }
   }
 
   @Override
@@ -529,6 +543,11 @@ public class ParallelReader extends Inde
     }
     return fieldSet;
   }
+  @Override
+  public ReaderContext getTopReaderContext() {
+    return topLevelReaderContext;
+  }
+
 }
 
 

Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/SegmentInfo.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/SegmentInfo.java?rev=1058718&r1=1058717&r2=1058718&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/SegmentInfo.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/SegmentInfo.java Thu Jan 13 19:53:21 2011
@@ -68,7 +68,8 @@ public final class SegmentInfo {
   private List<String> files;                     // cached list of files that this segment uses
                                                   // in the Directory
 
-  long sizeInBytes = -1;                          // total byte size of all of our files (computed on demand)
+  private long sizeInBytesNoStore = -1;           // total byte size of all but the store files (computed on demand)
+  private long sizeInBytesWithStore = -1;         // total byte size of all of our files (computed on demand)
 
   @Deprecated private int docStoreOffset;                     // if this segment shares stored fields & vectors, this
                                                   // offset is where in that file this segment's docs begin
@@ -220,25 +221,33 @@ public final class SegmentInfo {
     }
   }
 
-  /** Returns total size in bytes of all of files used by
-   *  this segment. */
+  /**
+   * Returns total size in bytes of all of files used by this segment (if
+   * {@code includeDocStores} is true), or the size of all files except the
+   * store files otherwise.
+   */
   public long sizeInBytes(boolean includeDocStores) throws IOException {
-    if (sizeInBytes == -1) {
-      List<String> files = files();
-      final int size = files.size();
-      sizeInBytes = 0;
-      for(int i=0;i<size;i++) {
-        final String fileName = files.get(i);
-        if (!includeDocStores && IndexFileNames.isDocStoreFile(fileName)) {
+    if (includeDocStores) {
+      if (sizeInBytesWithStore != -1) return sizeInBytesWithStore;
+      sizeInBytesWithStore = 0;
+      for (final String fileName : files()) {
+        // We don't count bytes used by a shared doc store against this segment
+        if (docStoreOffset == -1 || !IndexFileNames.isDocStoreFile(fileName)) {
+          sizeInBytesWithStore += dir.fileLength(fileName);
+        }
+      }
+      return sizeInBytesWithStore;
+    } else {
+      if (sizeInBytesNoStore != -1) return sizeInBytesNoStore;
+      sizeInBytesNoStore = 0;
+      for (final String fileName : files()) {
+        if (IndexFileNames.isDocStoreFile(fileName)) {
           continue;
         }
-        // We don't count bytes used by a shared doc store
-        // against this segment:
-        if (docStoreOffset == -1 || !IndexFileNames.isDocStoreFile(fileName))
-          sizeInBytes += dir.fileLength(fileName);
+        sizeInBytesNoStore += dir.fileLength(fileName);
       }
+      return sizeInBytesNoStore;
     }
-    return sizeInBytes;
   }
 
   public boolean getHasVectors() throws IOException {
@@ -549,7 +558,8 @@ public final class SegmentInfo {
    * files this segment has. */
   private void clearFiles() {
     files = null;
-    sizeInBytes = -1;
+    sizeInBytesNoStore = -1;
+    sizeInBytesWithStore = -1;
   }
 
   /** {@inheritDoc} */

Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/SegmentMerger.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/SegmentMerger.java?rev=1058718&r1=1058717&r2=1058718&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/SegmentMerger.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/SegmentMerger.java Thu Jan 13 19:53:21 2011
@@ -19,9 +19,8 @@ package org.apache.lucene.index;
 
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.Collection;
-import java.util.Set;
-import java.util.HashSet;
 import java.util.List;
 
 import org.apache.lucene.document.Document;
@@ -127,41 +126,11 @@ final class SegmentMerger {
     return mergedDocs;
   }
 
-  final Collection<String> getMergedFiles(final SegmentInfo info) throws IOException {
-    Set<String> fileSet = new HashSet<String>();
-
-    // Basic files
-    for (String ext : IndexFileNames.COMPOUND_EXTENSIONS_NOT_CODEC) {
-      fileSet.add(IndexFileNames.segmentFileName(segment, "", ext));
-    }
-
-    segmentWriteState.segmentCodecs.files(directory, info, fileSet);
-
-    // Fieldable norm files
-    int numFIs = fieldInfos.size();
-    for (int i = 0; i < numFIs; i++) {
-      FieldInfo fi = fieldInfos.fieldInfo(i);
-      if (fi.isIndexed && !fi.omitNorms) {
-        fileSet.add(IndexFileNames.segmentFileName(segment, "", IndexFileNames.NORMS_EXTENSION));
-        break;
-      }
-    }
-
-    // Vector files
-    if (fieldInfos.hasVectors()) {
-      for (String ext : IndexFileNames.VECTOR_EXTENSIONS) {
-        fileSet.add(IndexFileNames.segmentFileName(segment, "", ext));
-      }
-    }
-
-    return fileSet;
-  }
-
   final Collection<String> createCompoundFile(String fileName, final SegmentInfo info)
           throws IOException {
 
     // Now merge all added files
-    Collection<String> files = getMergedFiles(info);
+    Collection<String> files = info.files();
     CompoundFileWriter cfsWriter = new CompoundFileWriter(directory, fileName, checkAbort);
     for (String file : files) {
       cfsWriter.addFile(file);
@@ -602,13 +571,6 @@ final class SegmentMerger {
   }
 
   private void mergeNorms() throws IOException {
-    // get needed buffer size by finding the largest segment
-    int bufferSize = 0;
-    for (IndexReader reader : readers) {
-      bufferSize = Math.max(bufferSize, reader.maxDoc());
-    }
-
-    byte[] normBuffer = null;
     IndexOutput output = null;
     try {
       for (int i = 0, numFieldInfos = fieldInfos.size(); i < numFieldInfos; i++) {
@@ -618,12 +580,15 @@ final class SegmentMerger {
             output = directory.createOutput(IndexFileNames.segmentFileName(segment, "", IndexFileNames.NORMS_EXTENSION));
             output.writeBytes(NORMS_HEADER,NORMS_HEADER.length);
           }
-          if (normBuffer == null) {
-            normBuffer = new byte[bufferSize];
-          }
           for (IndexReader reader : readers) {
             final int maxDoc = reader.maxDoc();
-            reader.norms(fi.name, normBuffer, 0);
+            byte normBuffer[] = reader.norms(fi.name);
+            if (normBuffer == null) {
+              // Can be null if this segment doesn't have
+              // any docs with this field
+              normBuffer = new byte[maxDoc];
+              Arrays.fill(normBuffer, (byte)0);
+            }
             if (!reader.hasDeletions()) {
               //optimized case for segments without deleted docs
               output.writeBytes(normBuffer, maxDoc);

Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/SegmentReadState.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/SegmentReadState.java?rev=1058718&r1=1058717&r2=1058718&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/SegmentReadState.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/SegmentReadState.java Thu Jan 13 19:53:21 2011
@@ -33,7 +33,7 @@ public class SegmentReadState {
   // terms index on init (preflex is the only once currently
   // that must do so), then it should negate this value to
   // get the app's terms divisor:
-  public final int termsIndexDivisor;
+  public int termsIndexDivisor;
   public final String codecId;
 
   public SegmentReadState(Directory dir, SegmentInfo info,

Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/SegmentReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/SegmentReader.java?rev=1058718&r1=1058717&r2=1058718&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/SegmentReader.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/SegmentReader.java Thu Jan 13 19:53:21 2011
@@ -19,7 +19,6 @@ package org.apache.lucene.index;
 
 import java.io.IOException;
 import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.Collection;
 import java.util.HashMap;
 import java.util.HashSet;
@@ -31,7 +30,6 @@ import java.util.Set;
 import java.util.concurrent.atomic.AtomicInteger;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.FieldSelector;
-import org.apache.lucene.search.Similarity;
 import org.apache.lucene.store.BufferedIndexInput;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.IndexInput;
@@ -51,7 +49,7 @@ public class SegmentReader extends Index
 
   private SegmentInfo si;
   private int readBufferSize;
-
+  private final ReaderContext readerContext = new AtomicReaderContext(this);
   CloseableThreadLocal<FieldsReader> fieldsReaderLocal = new FieldsReaderLocal();
   CloseableThreadLocal<TermVectorsReader> termVectorsLocal = new CloseableThreadLocal<TermVectorsReader>();
 
@@ -991,22 +989,6 @@ public class SegmentReader extends Index
     norm.copyOnWrite()[doc] = value;                    // set the value
   }
 
-  /** Read norms into a pre-allocated array. */
-  @Override
-  public synchronized void norms(String field, byte[] bytes, int offset)
-    throws IOException {
-
-    ensureOpen();
-    Norm norm = norms.get(field);
-    if (norm == null) {
-      Arrays.fill(bytes, offset, bytes.length, Similarity.getDefault().encodeNormValue(1.0f));
-      return;
-    }
-  
-    norm.bytes(bytes, offset, maxDoc());
-  }
-
-
   private void openNorms(Directory cfsDir, int readBufferSize) throws IOException {
     long nextNormSeek = SegmentMerger.NORMS_HEADER.length; //skip header (header unused for now)
     int maxDoc = maxDoc();
@@ -1183,6 +1165,11 @@ public class SegmentReader extends Index
     buffer.append(si.toString(core.dir, pendingDeleteCount));
     return buffer.toString();
   }
+  
+  @Override
+  public ReaderContext getTopReaderContext() {
+    return readerContext;
+  }
 
   /**
    * Return the name of the segment this reader is reading.

Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/SegmentWriteState.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/SegmentWriteState.java?rev=1058718&r1=1058717&r2=1058718&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/SegmentWriteState.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/SegmentWriteState.java Thu Jan 13 19:53:21 2011
@@ -18,8 +18,6 @@ package org.apache.lucene.index;
  */
 
 import java.io.PrintStream;
-import java.util.Collection;
-import java.util.HashSet;
 
 import org.apache.lucene.store.Directory;
 
@@ -33,7 +31,6 @@ public class SegmentWriteState {
   public final FieldInfos fieldInfos;
   public final int numDocs;
   public boolean hasVectors;
-  public final Collection<String> flushedFiles;
 
   final SegmentCodecs segmentCodecs;
   public final String codecId;
@@ -43,7 +40,7 @@ public class SegmentWriteState {
    * faster, while larger values use less memory and make searching slightly
    * slower.  Searching is typically not dominated by dictionary lookup, so
    * tweaking this is rarely useful.*/
-  public final int termIndexInterval;
+  public int termIndexInterval;                   // TODO: this should be private to the codec, not settable here or in IWC
 
   /** Expert: The fraction of TermDocs entries stored in skip tables,
    * used to accelerate {@link DocsEnum#advance(int)}.  Larger values result in
@@ -68,7 +65,6 @@ public class SegmentWriteState {
     this.numDocs = numDocs;
     this.termIndexInterval = termIndexInterval;
     this.segmentCodecs = segmentCodecs;
-    flushedFiles = new HashSet<String>();
     codecId = "";
   }
 
@@ -83,7 +79,6 @@ public class SegmentWriteState {
     numDocs = state.numDocs;
     termIndexInterval = state.termIndexInterval;
     segmentCodecs = state.segmentCodecs;
-    flushedFiles = state.flushedFiles;
     this.codecId = codecId;
   }
 }

Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/SlowMultiReaderWrapper.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/SlowMultiReaderWrapper.java?rev=1058718&r1=1058717&r2=1058718&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/SlowMultiReaderWrapper.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/SlowMultiReaderWrapper.java Thu Jan 13 19:53:21 2011
@@ -18,6 +18,9 @@ package org.apache.lucene.index;
  */
 
 import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.ReaderUtil; // javadoc
 
@@ -48,8 +51,12 @@ import org.apache.lucene.index.MultiRead
 
 public final class SlowMultiReaderWrapper extends FilterIndexReader {
 
+  private final ReaderContext readerContext;
+  private final Map<String,byte[]> normsCache = new HashMap<String,byte[]>();
+  
   public SlowMultiReaderWrapper(IndexReader other) {
     super(other);
+    readerContext = new AtomicReaderContext(this); // emulate atomic reader!
   }
 
   @Override
@@ -62,9 +69,39 @@ public final class SlowMultiReaderWrappe
     return MultiFields.getDeletedDocs(in);
   }
 
+  
   @Override
   public IndexReader[] getSequentialSubReaders() {
     return null;
   }
+
+  @Override
+  public synchronized byte[] norms(String field) throws IOException {
+    ensureOpen();
+    byte[] bytes = normsCache.get(field);
+    if (bytes != null)
+      return bytes;
+    if (!hasNorms(field))
+      return null;
+    if (normsCache.containsKey(field)) // cached omitNorms, not missing key
+      return null;
+    
+    bytes = MultiNorms.norms(in, field);
+    normsCache.put(field, bytes);
+    return bytes;
+  }
   
+  @Override
+  public ReaderContext getTopReaderContext() {
+    return readerContext;
+  }
+  
+  @Override
+  protected void doSetNorm(int n, String field, byte value)
+      throws CorruptIndexException, IOException {
+    synchronized(normsCache) {
+      normsCache.remove(field);
+    }
+    in.doSetNorm(n, field, value);
+  }
 }

Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/StoredFieldsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/StoredFieldsWriter.java?rev=1058718&r1=1058717&r2=1058718&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/StoredFieldsWriter.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/StoredFieldsWriter.java Thu Jan 13 19:53:21 2011
@@ -70,11 +70,7 @@ final class StoredFieldsWriter {
       fieldsWriter = null;
       lastDocID = 0;
 
-      String fieldsName = IndexFileNames.segmentFileName(state.segmentName, "", IndexFileNames.FIELDS_EXTENSION);
       String fieldsIdxName = IndexFileNames.segmentFileName(state.segmentName, "", IndexFileNames.FIELDS_INDEX_EXTENSION);
-      state.flushedFiles.add(fieldsName);
-      state.flushedFiles.add(fieldsIdxName);
-
       if (4 + ((long) state.numDocs) * 8 != state.directory.fileLength(fieldsIdxName)) {
         throw new RuntimeException("after flush: fdx size mismatch: " + state.numDocs + " docs vs " + state.directory.fileLength(fieldsIdxName) + " length in bytes of " + fieldsIdxName + " file exists?=" + state.directory.fileExists(fieldsIdxName));
       }

Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriter.java?rev=1058718&r1=1058717&r2=1058718&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriter.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriter.java Thu Jan 13 19:53:21 2011
@@ -54,9 +54,6 @@ final class TermVectorsTermsWriter exten
       fill(state.numDocs);
       assert state.segmentName != null;
       String idxName = IndexFileNames.segmentFileName(state.segmentName, "", IndexFileNames.VECTORS_INDEX_EXTENSION);
-      String fldName = IndexFileNames.segmentFileName(state.segmentName, "", IndexFileNames.VECTORS_FIELDS_EXTENSION);
-      String docName = IndexFileNames.segmentFileName(state.segmentName, "", IndexFileNames.VECTORS_DOCUMENTS_EXTENSION);
-
       tvx.close();
       tvf.close();
       tvd.close();
@@ -64,10 +61,6 @@ final class TermVectorsTermsWriter exten
       if (4+((long) state.numDocs)*16 != state.directory.fileLength(idxName))
         throw new RuntimeException("after flush: tvx size mismatch: " + state.numDocs + " docs vs " + state.directory.fileLength(idxName) + " length in bytes of " + idxName + " file exists?=" + state.directory.fileExists(idxName));
 
-      state.flushedFiles.add(idxName);
-      state.flushedFiles.add(fldName);
-      state.flushedFiles.add(docName);
-
       lastDocID = 0;
       state.hasVectors = hasVectors;
       hasVectors = false;

Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/Terms.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/Terms.java?rev=1058718&r1=1058717&r2=1058718&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/Terms.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/Terms.java Thu Jan 13 19:53:21 2011
@@ -80,11 +80,57 @@ public abstract class Terms {
     }
   }
 
+  /**
+   * Expert: Get {@link DocsEnum} for the specified {@link TermState}.
+   * This method may return <code>null</code> if the term does not exist.
+   * 
+   * @see TermsEnum#termState()
+   * @see TermsEnum#seek(BytesRef, TermState) */
+  public DocsEnum docs(Bits skipDocs, BytesRef term, TermState termState, DocsEnum reuse) throws IOException {
+    final TermsEnum termsEnum = getThreadTermsEnum();
+    if (termsEnum.seek(term, termState) == TermsEnum.SeekStatus.FOUND) {
+      return termsEnum.docs(skipDocs, reuse);
+    } else {
+      return null;
+    }
+  }
+
+  /**
+   * Get {@link DocsEnum} for the specified {@link TermState}. This
+   * method will may return <code>null</code> if the term does not exists, or positions were
+   * not indexed.
+   * 
+   * @see TermsEnum#termState()
+   * @see TermsEnum#seek(BytesRef, TermState) */
+  public DocsAndPositionsEnum docsAndPositions(Bits skipDocs, BytesRef term, TermState termState, DocsAndPositionsEnum reuse) throws IOException {
+    final TermsEnum termsEnum = getThreadTermsEnum();
+    if (termsEnum.seek(term, termState) == TermsEnum.SeekStatus.FOUND) {
+      return termsEnum.docsAndPositions(skipDocs, reuse);
+    } else {
+      return null;
+    }
+  }
+
   public long getUniqueTermCount() throws IOException {
     throw new UnsupportedOperationException("this reader does not implement getUniqueTermCount()");
   }
 
-  protected TermsEnum getThreadTermsEnum() throws IOException {
+  /**
+   * Returns a thread-private {@link TermsEnum} instance. Obtaining
+   * {@link TermsEnum} from this method might be more efficient than using
+   * {@link #iterator()} directly since this method doesn't necessarily create a
+   * new {@link TermsEnum} instance.
+   * <p>
+   * NOTE: {@link TermsEnum} instances obtained from this method must not be
+   * shared across threads. The enum should only be used within a local context
+   * where other threads can't access it.
+   * 
+   * @return a thread-private {@link TermsEnum} instance
+   * @throws IOException
+   *           if an IOException occurs
+   * @lucene.internal
+   */
+  public TermsEnum getThreadTermsEnum() throws IOException {
     TermsEnum termsEnum = threadEnums.get();
     if (termsEnum == null) {
       termsEnum = iterator();

Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/TermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/TermsEnum.java?rev=1058718&r1=1058717&r2=1058718&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/TermsEnum.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/TermsEnum.java Thu Jan 13 19:53:21 2011
@@ -73,7 +73,34 @@ public abstract class TermsEnum {
    *  may be before or after the current ord.  See {@link
    *  #seek(BytesRef)}. */
   public abstract SeekStatus seek(long ord) throws IOException;
-  
+
+  /**
+   * Expert: Seeks a specific position by {@link TermState} previously obtained
+   * from {@link #termState()}. Callers should maintain the {@link TermState} to
+   * use this method. Low-level implementations may position the TermsEnum
+   * without re-seeking the term dictionary.
+   * <p>
+   * Seeking by {@link TermState} should only be used iff the enu the state was
+   * obtained from and the enum the state is used for seeking are obtained from
+   * the same {@link IndexReader}, otherwise a {@link #seek(BytesRef, TermState)} call can
+   * leave the enum in undefined state.
+   * <p>
+   * NOTE: Using this method with an incompatible {@link TermState} might leave
+   * this {@link TermsEnum} in undefined state. On a segment level
+   * {@link TermState} instances are compatible only iff the source and the
+   * target {@link TermsEnum} operate on the same field. If operating on segment
+   * level, TermState instances must not be used across segments.
+   * <p>
+   * NOTE: A seek by {@link TermState} might not restore the
+   * {@link AttributeSource}'s state. {@link AttributeSource} states must be
+   * maintained separately if this method is used.
+   * @param term the term the TermState corresponds to
+   * @param state the {@link TermState}
+   * */
+  public SeekStatus seek(BytesRef term, TermState state) throws IOException {
+    return seek(term);
+  }
+
   /** Increments the enumeration to the next element.
    *  Returns the resulting term, or null if the end was
    *  hit.  The returned BytesRef may be re-used across calls
@@ -98,7 +125,7 @@ public abstract class TermsEnum {
    *  first time, after next() returns null or seek returns
    *  {@link SeekStatus#END}.*/
   public abstract int docFreq();
-
+  
   /** Get {@link DocsEnum} for the current term.  Do not
    *  call this before calling {@link #next} or {@link
    *  #seek} for the first time.  This method will not
@@ -116,6 +143,25 @@ public abstract class TermsEnum {
    *  the postings by this codec. */
   public abstract DocsAndPositionsEnum docsAndPositions(Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException;
 
+  /**
+   * Expert: Returns the TermsEnums internal state to position the TermsEnum
+   * without re-seeking the term dictionary.
+   * <p>
+   * NOTE: A seek by {@link TermState} might not capture the
+   * {@link AttributeSource}'s state. Callers must maintain the
+   * {@link AttributeSource} states separately
+   * 
+   * @see TermState
+   * @see #seek(BytesRef, TermState)
+   */
+  public TermState termState() throws IOException {
+    return new TermState() {
+      @Override
+      public void copyFrom(TermState other) {
+      }
+    };
+  }
+  
   /** Return the {@link BytesRef} Comparator used to sort
    *  terms provided by the iterator.  This may return
    *  null if there are no terms.  Callers may invoke this
@@ -123,10 +169,6 @@ public abstract class TermsEnum {
    *  instance & reuse it. */
   public abstract Comparator<BytesRef> getComparator() throws IOException;
 
-  /** Optional optimization hint: informs the codec that the
-   *  current term is likely to be re-seek'd-to soon.  */
-  public abstract void cacheCurrentTerm() throws IOException;
-
   /** An empty TermsEnum for quickly returning an empty instance e.g.
    * in {@link org.apache.lucene.search.MultiTermQuery}
    * <p><em>Please note:</em> This enum should be unmodifiable,
@@ -142,9 +184,6 @@ public abstract class TermsEnum {
     public SeekStatus seek(long ord) { return SeekStatus.END; }
     
     @Override
-    public void cacheCurrentTerm() {}
-    
-    @Override
     public BytesRef term() {
       throw new IllegalStateException("this method should never be called");
     }
@@ -183,5 +222,15 @@ public abstract class TermsEnum {
     public synchronized AttributeSource attributes() {
       return super.attributes();
     }
+
+    @Override
+    public TermState termState() throws IOException {
+      throw new IllegalStateException("this method should never be called");
+    }
+
+    @Override
+    public SeekStatus seek(BytesRef term, TermState state) throws IOException {
+      throw new IllegalStateException("this method should never be called");
+    }
   };
 }

Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/DeltaBytesReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/DeltaBytesReader.java?rev=1058718&r1=1058717&r2=1058718&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/DeltaBytesReader.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/DeltaBytesReader.java Thu Jan 13 19:53:21 2011
@@ -36,13 +36,17 @@ final class DeltaBytesReader {
     term.copy(text);
   }
 
-  void read() throws IOException {
+  boolean read() throws IOException {
     final int start = in.readVInt();
+    if (start == DeltaBytesWriter.TERM_EOF) {
+      return false;
+    }
     final int suffix = in.readVInt();
     assert start <= term.length: "start=" + start + " length=" + term.length;
     final int newLength = start+suffix;
     term.grow(newLength);
     in.readBytes(term.bytes, start, suffix);
     term.length = newLength;
+    return true;
   }
 }

Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/DeltaBytesWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/DeltaBytesWriter.java?rev=1058718&r1=1058717&r2=1058718&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/DeltaBytesWriter.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/DeltaBytesWriter.java Thu Jan 13 19:53:21 2011
@@ -20,11 +20,18 @@ package org.apache.lucene.index.codecs;
 import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.store.IndexOutput;
 import org.apache.lucene.util.BytesRef;
+import static org.apache.lucene.util.ByteBlockPool.BYTE_BLOCK_SIZE;
 
 import java.io.IOException;
 
 final class DeltaBytesWriter {
 
+  // Must be bigger than
+  // DocumentsWriter.MAX_TERM_LENGTH_UTF8.  If you change
+  // this it's an index format change, so that change must be
+  // versioned:
+  final static int TERM_EOF = BYTE_BLOCK_SIZE;
+
   private byte[] lastBytes = new byte[10];
   private int lastLength;
   final IndexOutput out;
@@ -45,8 +52,9 @@ final class DeltaBytesWriter {
 
     final int limit = length < lastLength ? length : lastLength;
     while(start < limit) {
-      if (bytes[upto] != lastBytes[start])
+      if (bytes[upto] != lastBytes[start]) {
         break;
+      }
       start++;
       upto++;
     }

Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/FixedGapTermsIndexReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/FixedGapTermsIndexReader.java?rev=1058718&r1=1058717&r2=1058718&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/FixedGapTermsIndexReader.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/FixedGapTermsIndexReader.java Thu Jan 13 19:53:21 2011
@@ -33,29 +33,6 @@ import java.util.Collection;
 import java.util.Comparator;
 import java.io.IOException;
 
-/**
- * Uses a simplistic format to record terms dict index
- * information.  Limititations:
- *
- *   - Index for all fields is loaded entirely into RAM up
- *     front 
- *   - Index is stored in RAM using shared byte[] that
- *     wastefully expand every term.  Using FST to share
- *     common prefix & suffix would save RAM.
- *   - Index is taken at regular numTerms (every 128 by
- *     default); might be better to do it by "net docFreqs"
- *     encountered, so that for spans of low-freq terms we
- *     take index less often.
- *
- * A better approach might be something similar to how
- * postings are encoded, w/ multi-level skips.  Ie, load all
- * terms index data into memory, as a single large compactly
- * encoded stream (eg delta bytes + delta offset).  Index
- * that w/ multi-level skipper.  Then to look up a term is
- * the equivalent binary search, using the skipper instead,
- * while data remains compressed in memory.
- */
-
 import org.apache.lucene.index.IndexFileNames;
 
 /** @lucene.experimental */
@@ -74,7 +51,7 @@ public class FixedGapTermsIndexReader ex
   final private int indexInterval;
 
   // Closed if indexLoaded is true:
-  final private IndexInput in;
+  private IndexInput in;
   private volatile boolean indexLoaded;
 
   private final Comparator<BytesRef> termComp;
@@ -85,7 +62,7 @@ public class FixedGapTermsIndexReader ex
   private final PagedBytes termBytes = new PagedBytes(PAGED_BYTES_BITS);
   private PagedBytes.Reader termBytesReader;
 
-  final HashMap<FieldInfo,FieldIndexReader> fields = new HashMap<FieldInfo,FieldIndexReader>();
+  final HashMap<FieldInfo,FieldIndexData> fields = new HashMap<FieldInfo,FieldIndexData>();
   
   // start of the field info data
   protected long dirOffset;
@@ -95,7 +72,7 @@ public class FixedGapTermsIndexReader ex
 
     this.termComp = termComp;
 
-    IndexInput in = dir.openInput(IndexFileNames.segmentFileName(segment, codecId, FixedGapTermsIndexWriter.TERMS_INDEX_EXTENSION));
+    in = dir.openInput(IndexFileNames.segmentFileName(segment, codecId, FixedGapTermsIndexWriter.TERMS_INDEX_EXTENSION));
     
     boolean success = false;
 
@@ -116,49 +93,137 @@ public class FixedGapTermsIndexReader ex
       seekDir(in, dirOffset);
 
       // Read directory
-      final int numFields = in.readInt();
-
+      final int numFields = in.readVInt();      
       for(int i=0;i<numFields;i++) {
-        final int field = in.readInt();
-        final int numIndexTerms = in.readInt();
-        final long termsStart = in.readLong();
-        final long indexStart = in.readLong();
-        final long packedIndexStart = in.readLong();
-        final long packedOffsetsStart = in.readLong();
+        final int field = in.readVInt();
+        final int numIndexTerms = in.readVInt();
+        final long termsStart = in.readVLong();
+        final long indexStart = in.readVLong();
+        final long packedIndexStart = in.readVLong();
+        final long packedOffsetsStart = in.readVLong();
         assert packedIndexStart >= indexStart: "packedStart=" + packedIndexStart + " indexStart=" + indexStart + " numIndexTerms=" + numIndexTerms + " seg=" + segment;
-        if (numIndexTerms > 0) {
-          final FieldInfo fieldInfo = fieldInfos.fieldInfo(field);
-          fields.put(fieldInfo, new FieldIndexReader(in, fieldInfo, numIndexTerms, indexStart, termsStart, packedIndexStart, packedOffsetsStart));
-        }
+        final FieldInfo fieldInfo = fieldInfos.fieldInfo(field);
+        fields.put(fieldInfo, new FieldIndexData(fieldInfo, numIndexTerms, indexStart, termsStart, packedIndexStart, packedOffsetsStart));
       }
       success = true;
     } finally {
       if (indexDivisor > 0) {
         in.close();
-        this.in = null;
+        in = null;
         if (success) {
           indexLoaded = true;
         }
         termBytesReader = termBytes.freeze(true);
-      } else {
-        this.in = in;
       }
     }
   }
   
+  @Override
+  public int getDivisor() {
+    return indexDivisor;
+  }
+
   protected void readHeader(IndexInput input) throws IOException {
     CodecUtil.checkHeader(input, FixedGapTermsIndexWriter.CODEC_NAME,
       FixedGapTermsIndexWriter.VERSION_START, FixedGapTermsIndexWriter.VERSION_START);
     dirOffset = input.readLong();
   }
 
-  private final class FieldIndexReader extends FieldReader {
+  private class IndexEnum extends FieldIndexEnum {
+    private final FieldIndexData.CoreFieldIndex fieldIndex;
+    private final BytesRef term = new BytesRef();
+    private final BytesRef nextTerm = new BytesRef();
+    private long ord;
 
-    final private FieldInfo fieldInfo;
+    public IndexEnum(FieldIndexData.CoreFieldIndex fieldIndex) {
+      this.fieldIndex = fieldIndex;
+    }
+
+    @Override
+    public BytesRef term() {
+      return term;
+    }
+
+    @Override
+    public long seek(BytesRef target) {
+      int lo = 0;				  // binary search
+      int hi = fieldIndex.numIndexTerms - 1;
+      assert totalIndexInterval > 0 : "totalIndexInterval=" + totalIndexInterval;
+
+      while (hi >= lo) {
+        int mid = (lo + hi) >>> 1;
+
+        final long offset = fieldIndex.termOffsets.get(mid);
+        final int length = (int) (fieldIndex.termOffsets.get(1+mid) - offset);
+        termBytesReader.fillSlice(term, fieldIndex.termBytesStart + offset, length);
+
+        int delta = termComp.compare(target, term);
+        if (delta < 0) {
+          hi = mid - 1;
+        } else if (delta > 0) {
+          lo = mid + 1;
+        } else {
+          assert mid >= 0;
+          ord = mid*totalIndexInterval;
+          return fieldIndex.termsStart + fieldIndex.termsDictOffsets.get(mid);
+        }
+      }
 
-    private volatile CoreFieldIndex coreIndex;
+      if (hi < 0) {
+        assert hi == -1;
+        hi = 0;
+      }
 
-    private final IndexInput in;
+      final long offset = fieldIndex.termOffsets.get(hi);
+      final int length = (int) (fieldIndex.termOffsets.get(1+hi) - offset);
+      termBytesReader.fillSlice(term, fieldIndex.termBytesStart + offset, length);
+
+      ord = hi*totalIndexInterval;
+      return fieldIndex.termsStart + fieldIndex.termsDictOffsets.get(hi);
+    }
+
+    @Override
+    public long next() {
+      final int idx = 1 + (int) (ord / totalIndexInterval);
+      if (idx >= fieldIndex.numIndexTerms) {
+        return -1;
+      }
+      ord += totalIndexInterval;
+
+      final long offset = fieldIndex.termOffsets.get(idx);
+      final int length = (int) (fieldIndex.termOffsets.get(1+idx) - offset);
+      termBytesReader.fillSlice(nextTerm, fieldIndex.termBytesStart + offset, length);
+      return fieldIndex.termsStart + fieldIndex.termsDictOffsets.get(idx);
+    }
+
+    @Override
+    public long ord() {
+      return ord;
+    }
+
+    @Override
+    public long seek(long ord) {
+      int idx = (int) (ord / totalIndexInterval);
+      // caller must ensure ord is in bounds
+      assert idx < fieldIndex.numIndexTerms;
+      final long offset = fieldIndex.termOffsets.get(idx);
+      final int length = (int) (fieldIndex.termOffsets.get(1+idx) - offset);
+      termBytesReader.fillSlice(term, fieldIndex.termBytesStart + offset, length);
+      this.ord = idx * totalIndexInterval;
+      return fieldIndex.termsStart + fieldIndex.termsDictOffsets.get(idx);
+    }
+  }
+
+  @Override
+  public boolean supportsOrd() {
+    return true;
+  }
+
+  private final class FieldIndexData {
+
+    final private FieldInfo fieldInfo;
+
+    volatile CoreFieldIndex coreIndex;
 
     private final long indexStart;
     private final long termsStart;
@@ -167,11 +232,10 @@ public class FixedGapTermsIndexReader ex
 
     private final int numIndexTerms;
 
-    public FieldIndexReader(IndexInput in, FieldInfo fieldInfo, int numIndexTerms, long indexStart, long termsStart, long packedIndexStart,
-                            long packedOffsetsStart) throws IOException {
+    public FieldIndexData(FieldInfo fieldInfo, int numIndexTerms, long indexStart, long termsStart, long packedIndexStart,
+                          long packedOffsetsStart) throws IOException {
 
       this.fieldInfo = fieldInfo;
-      this.in = in;
       this.termsStart = termsStart;
       this.indexStart = indexStart;
       this.packedIndexStart = packedIndexStart;
@@ -182,12 +246,7 @@ public class FixedGapTermsIndexReader ex
       // is -1, so that PrefixCodedTermsReader can call
       // isIndexTerm for each field:
       if (indexDivisor > 0) {
-        coreIndex = new CoreFieldIndex(indexStart,
-                                       termsStart,
-                                       packedIndexStart,
-                                       packedOffsetsStart,
-                                       numIndexTerms);
-      
+        loadTermsIndex();
       }
     }
 
@@ -197,46 +256,11 @@ public class FixedGapTermsIndexReader ex
       }
     }
 
-    @Override
-    public boolean isIndexTerm(long ord, int docFreq, boolean onlyLoaded) {
-      if (onlyLoaded) {
-        return ord % totalIndexInterval == 0;
-      } else {
-        return ord % indexInterval == 0;
-      }
-    }
-
-    @Override
-    public boolean nextIndexTerm(long ord, TermsIndexResult result) throws IOException {
-      if (coreIndex == null) {
-        throw new IllegalStateException("terms index was not loaded");
-      } else {
-        return coreIndex.nextIndexTerm(ord, result);
-      }
-    }
-
-    @Override
-    public void getIndexOffset(BytesRef term, TermsIndexResult result) throws IOException {
-      // You must call loadTermsIndex if you had specified -1 for indexDivisor
-      if (coreIndex == null) {
-        throw new IllegalStateException("terms index was not loaded");
-      }
-      coreIndex.getIndexOffset(term, result);
-    }
-
-    @Override
-    public void getIndexOffset(long ord, TermsIndexResult result) throws IOException {
-      // You must call loadTermsIndex if you had specified
-      // indexDivisor < 0 to ctor
-      if (coreIndex == null) {
-        throw new IllegalStateException("terms index was not loaded");
-      }
-      coreIndex.getIndexOffset(ord, result);
-    }
-
     private final class CoreFieldIndex {
 
-      final private long termBytesStart;
+      // where this field's terms begin in the packed byte[]
+      // data
+      final long termBytesStart;
 
       // offset into index termBytes
       final PackedInts.Reader termOffsets;
@@ -245,7 +269,6 @@ public class FixedGapTermsIndexReader ex
       final PackedInts.Reader termsDictOffsets;
 
       final int numIndexTerms;
-
       final long termsStart;
 
       public CoreFieldIndex(long indexStart, long termsStart, long packedIndexStart, long packedOffsetsStart, int numIndexTerms) throws IOException {
@@ -315,7 +338,6 @@ public class FixedGapTermsIndexReader ex
               termsDictOffsetsM.set(upto, termsDictOffsetsIter.next());
 
               termOffsetsM.set(upto, termOffsetUpto);
-              upto++;
 
               long termOffset = termOffsetsIter.next();
               long nextTermOffset = termOffsetsIter.next();
@@ -328,6 +350,11 @@ public class FixedGapTermsIndexReader ex
               termBytes.copy(clone, numTermBytes);
               termOffsetUpto += numTermBytes;
 
+              upto++;
+              if (upto == this.numIndexTerms) {
+                break;
+              }
+
               // skip terms:
               termsDictOffsetsIter.next();
               for(int i=0;i<indexDivisor-2;i++) {
@@ -344,71 +371,10 @@ public class FixedGapTermsIndexReader ex
           }
         }
       }
-
-      public boolean nextIndexTerm(long ord, TermsIndexResult result) throws IOException {
-        int idx = 1 + (int) (ord / totalIndexInterval);
-        if (idx < numIndexTerms) {
-          fillResult(idx, result);
-          return true;
-        } else {
-          return false;
-        }
-      }
-
-      private void fillResult(int idx, TermsIndexResult result) {
-        final long offset = termOffsets.get(idx);
-        final int length = (int) (termOffsets.get(1+idx) - offset);
-        termBytesReader.fillSlice(result.term, termBytesStart + offset, length);
-        result.position = idx * totalIndexInterval;
-        result.offset = termsStart + termsDictOffsets.get(idx);
-      }
-
-      public void getIndexOffset(BytesRef term, TermsIndexResult result) throws IOException {
-        int lo = 0;					  // binary search
-        int hi = numIndexTerms - 1;
-        assert totalIndexInterval > 0 : "totalIndexInterval=" + totalIndexInterval;
-
-        while (hi >= lo) {
-          int mid = (lo + hi) >>> 1;
-
-          final long offset = termOffsets.get(mid);
-          final int length = (int) (termOffsets.get(1+mid) - offset);
-          termBytesReader.fillSlice(result.term, termBytesStart + offset, length);
-
-          int delta = termComp.compare(term, result.term);
-          if (delta < 0) {
-            hi = mid - 1;
-          } else if (delta > 0) {
-            lo = mid + 1;
-          } else {
-            assert mid >= 0;
-            result.position = mid*totalIndexInterval;
-            result.offset = termsStart + termsDictOffsets.get(mid);
-            return;
-          }
-        }
-        if (hi < 0) {
-          assert hi == -1;
-          hi = 0;
-        }
-
-        final long offset = termOffsets.get(hi);
-        final int length = (int) (termOffsets.get(1+hi) - offset);
-        termBytesReader.fillSlice(result.term, termBytesStart + offset, length);
-
-        result.position = hi*totalIndexInterval;
-        result.offset = termsStart + termsDictOffsets.get(hi);
-      }
-
-      public void getIndexOffset(long ord, TermsIndexResult result) throws IOException {
-        int idx = (int) (ord / totalIndexInterval);
-        // caller must ensure ord is in bounds
-        assert idx < numIndexTerms;
-        fillResult(idx, result);
-      }
     }
   }
 
+  // Externally synced in IndexWriter
   @Override
   public void loadTermsIndex(int indexDivisor) throws IOException {
     if (!indexLoaded) {
@@ -420,7 +386,7 @@ public class FixedGapTermsIndexReader ex
       }
       this.totalIndexInterval = indexInterval * this.indexDivisor;
 
-      Iterator<FieldIndexReader> it = fields.values().iterator();
+      Iterator<FieldIndexData> it = fields.values().iterator();
       while(it.hasNext()) {
         it.next().loadTermsIndex();
       }
@@ -432,8 +398,13 @@ public class FixedGapTermsIndexReader ex
   }
 
   @Override
-  public FieldReader getField(FieldInfo fieldInfo) {
-    return fields.get(fieldInfo);
+  public FieldIndexEnum getFieldEnum(FieldInfo fieldInfo) {
+    final FieldIndexData fieldData = fields.get(fieldInfo);
+    if (fieldData.coreIndex == null) {
+      return null;
+    } else {
+      return new IndexEnum(fieldData.coreIndex);
+    }
   }
 
   public static void files(Directory dir, SegmentInfo info, String id, Collection<String> files) {

Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/FixedGapTermsIndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/FixedGapTermsIndexWriter.java?rev=1058718&r1=1058717&r2=1058718&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/FixedGapTermsIndexWriter.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/FixedGapTermsIndexWriter.java Thu Jan 13 19:53:21 2011
@@ -31,7 +31,14 @@ import java.util.List;
 import java.util.ArrayList;
 import java.io.IOException;
 
-/** @lucene.experimental */
+/**
+ * Selects every Nth term as and index term, and hold term
+ * bytes fully expanded in memory.  This terms index
+ * supports seeking by ord.  See {@link
+ * VariableGapTermsIndexWriter} for a more memory efficient
+ * terms index that does not support seeking by ord.
+ *
+ * @lucene.experimental */
 public class FixedGapTermsIndexWriter extends TermsIndexWriterBase {
   protected final IndexOutput out;
 
@@ -50,7 +57,6 @@ public class FixedGapTermsIndexWriter ex
 
   public FixedGapTermsIndexWriter(SegmentWriteState state) throws IOException {
     final String indexFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, TERMS_INDEX_EXTENSION);
-    state.flushedFiles.add(indexFileName);
     termIndexInterval = state.termIndexInterval;
     out = state.directory.createOutput(indexFileName);
     fieldInfos = state.fieldInfos;
@@ -203,15 +209,25 @@ public class FixedGapTermsIndexWriter ex
     final long dirStart = out.getFilePointer();
     final int fieldCount = fields.size();
 
-    out.writeInt(fieldCount);
+    int nonNullFieldCount = 0;
     for(int i=0;i<fieldCount;i++) {
       SimpleFieldWriter field = fields.get(i);
-      out.writeInt(field.fieldInfo.number);
-      out.writeInt(field.numIndexTerms);
-      out.writeLong(field.termsStart);
-      out.writeLong(field.indexStart);
-      out.writeLong(field.packedIndexStart);
-      out.writeLong(field.packedOffsetsStart);
+      if (field.numIndexTerms > 0) {
+        nonNullFieldCount++;
+      }
+    }
+
+    out.writeVInt(nonNullFieldCount);
+    for(int i=0;i<fieldCount;i++) {
+      SimpleFieldWriter field = fields.get(i);
+      if (field.numIndexTerms > 0) {
+        out.writeVInt(field.fieldInfo.number);
+        out.writeVInt(field.numIndexTerms);
+        out.writeVLong(field.termsStart);
+        out.writeVLong(field.indexStart);
+        out.writeVLong(field.packedIndexStart);
+        out.writeVLong(field.packedOffsetsStart);
+      }
     }
     writeTrailer(dirStart);
     out.close();

Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/PostingsConsumer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/PostingsConsumer.java?rev=1058718&r1=1058717&r2=1058718&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/PostingsConsumer.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/PostingsConsumer.java Thu Jan 13 19:53:21 2011
@@ -30,9 +30,9 @@ import org.apache.lucene.util.BytesRef;
 
 public abstract class PostingsConsumer {
 
-  /** Adds a new doc in this term.  Return null if this
-   *  consumer doesn't need to see the positions for this
-   *  doc. */
+  /** Adds a new doc in this term.  If this field omits term
+   *  freqs & positions then termDocFreq should be ignored,
+   *  and, finishDoc will not be called. */
   public abstract void startDoc(int docID, int termDocFreq) throws IOException;
 
   public static class PostingsMergeState {
@@ -49,7 +49,8 @@ public abstract class PostingsConsumer {
   public abstract void addPosition(int position, BytesRef payload) throws IOException;
 
   /** Called when we are done adding positions & payloads
-   * for each doc */
+   *  for each doc.  Not called  when the field omits term
+   *  freq and positions. */
   public abstract void finishDoc() throws IOException;
 
   /** Default merge impl: append documents, mapping around

Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/PostingsReaderBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/PostingsReaderBase.java?rev=1058718&r1=1058717&r2=1058718&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/PostingsReaderBase.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/PostingsReaderBase.java Thu Jan 13 19:53:21 2011
@@ -42,17 +42,17 @@ public abstract class PostingsReaderBase
   public abstract void init(IndexInput termsIn) throws IOException;
 
   /** Return a newly created empty TermState */
-  public abstract TermState newTermState() throws IOException;
+  public abstract PrefixCodedTermState newTermState() throws IOException;
 
-  public abstract void readTerm(IndexInput termsIn, FieldInfo fieldInfo, TermState state, boolean isIndexTerm) throws IOException;
+  public abstract void readTerm(IndexInput termsIn, FieldInfo fieldInfo, PrefixCodedTermState state, boolean isIndexTerm) throws IOException;
 
   /** Must fully consume state, since after this call that
    *  TermState may be reused. */
-  public abstract DocsEnum docs(FieldInfo fieldInfo, TermState state, Bits skipDocs, DocsEnum reuse) throws IOException;
+  public abstract DocsEnum docs(FieldInfo fieldInfo, PrefixCodedTermState state, Bits skipDocs, DocsEnum reuse) throws IOException;
 
   /** Must fully consume state, since after this call that
    *  TermState may be reused. */
-  public abstract DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, TermState state, Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException;
+  public abstract DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, PrefixCodedTermState state, Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException;
 
   public abstract void close() throws IOException;
 }

Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermsReader.java?rev=1058718&r1=1058717&r2=1058718&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermsReader.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermsReader.java Thu Jan 13 19:53:21 2011
@@ -31,6 +31,7 @@ import org.apache.lucene.index.FieldInfo
 import org.apache.lucene.index.FieldsEnum;
 import org.apache.lucene.index.IndexFileNames;
 import org.apache.lucene.index.SegmentInfo;
+import org.apache.lucene.index.TermState;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.store.Directory;
@@ -68,7 +69,7 @@ public class PrefixCodedTermsReader exte
   private final Comparator<BytesRef> termComp;
 
   // Caches the most recently looked-up field + terms:
-  private final DoubleBarrelLRUCache<FieldAndTerm,TermState> termsCache;
+  private final DoubleBarrelLRUCache<FieldAndTerm,PrefixCodedTermState> termsCache;
 
   // Reads the terms index
   private TermsIndexReaderBase indexReader;
@@ -84,11 +85,6 @@ public class PrefixCodedTermsReader exte
     public FieldAndTerm() {
     }
 
-    public FieldAndTerm(String field, BytesRef term) {
-      this.field = field;
-      this.term = new BytesRef(term);
-    }
-
     public FieldAndTerm(FieldAndTerm other) {
       field = other.field;
       term = new BytesRef(other.term);
@@ -116,7 +112,7 @@ public class PrefixCodedTermsReader exte
     throws IOException {
     
     this.postingsReader = postingsReader;
-    termsCache = new DoubleBarrelLRUCache<FieldAndTerm,TermState>(termsCacheSize);
+    termsCache = new DoubleBarrelLRUCache<FieldAndTerm,PrefixCodedTermState>(termsCacheSize);
 
     this.termComp = termComp;
     
@@ -140,12 +136,10 @@ public class PrefixCodedTermsReader exte
         final long numTerms = in.readLong();
         assert numTerms >= 0;
         final long termsStartPointer = in.readLong();
-        final TermsIndexReaderBase.FieldReader fieldIndexReader;
         final FieldInfo fieldInfo = fieldInfos.fieldInfo(field);
-        fieldIndexReader = indexReader.getField(fieldInfo);
         if (numTerms > 0) {
           assert !fields.containsKey(fieldInfo.name);
-          fields.put(fieldInfo.name, new FieldReader(fieldIndexReader, fieldInfo, numTerms, termsStartPointer));
+          fields.put(fieldInfo.name, new FieldReader(fieldInfo, numTerms, termsStartPointer));
         }
       }
       success = true;
@@ -251,14 +245,12 @@ public class PrefixCodedTermsReader exte
     final long numTerms;
     final FieldInfo fieldInfo;
     final long termsStartPointer;
-    final TermsIndexReaderBase.FieldReader fieldIndexReader;
 
-    FieldReader(TermsIndexReaderBase.FieldReader fieldIndexReader, FieldInfo fieldInfo, long numTerms, long termsStartPointer) {
+    FieldReader(FieldInfo fieldInfo, long numTerms, long termsStartPointer) {
       assert numTerms > 0;
       this.fieldInfo = fieldInfo;
       this.numTerms = numTerms;
       this.termsStartPointer = termsStartPointer;
-      this.fieldIndexReader = fieldIndexReader;
     }
 
     @Override
@@ -281,18 +273,25 @@ public class PrefixCodedTermsReader exte
       return numTerms;
     }
 
-    // Iterates through terms in this field
-    private class SegmentTermsEnum extends TermsEnum {
+    // Iterates through terms in this field, not supporting ord()
+    private final class SegmentTermsEnum extends TermsEnum {
       private final IndexInput in;
       private final DeltaBytesReader bytesReader;
-      private final TermState state;
+      private final PrefixCodedTermState state;
       private boolean seekPending;
-      private final TermsIndexReaderBase.TermsIndexResult indexResult = new TermsIndexReaderBase.TermsIndexResult();
       private final FieldAndTerm fieldTerm = new FieldAndTerm();
+      private final TermsIndexReaderBase.FieldIndexEnum indexEnum;
+      private boolean positioned;
+      private boolean didIndexNext;
+      private BytesRef nextIndexTerm;
+      private boolean isIndexTerm;
+      private final boolean doOrd;
 
       SegmentTermsEnum() throws IOException {
         in = (IndexInput) PrefixCodedTermsReader.this.in.clone();
         in.seek(termsStartPointer);
+        indexEnum = indexReader.getFieldEnum(fieldInfo);
+        doOrd = indexReader.supportsOrd();
         bytesReader = new DeltaBytesReader(in);
         fieldTerm.field = fieldInfo.name;
         state = postingsReader.newTermState();
@@ -304,12 +303,26 @@ public class PrefixCodedTermsReader exte
         return termComp;
       }
 
-      @Override
-      public void cacheCurrentTerm() {
-        TermState stateCopy = (TermState) state.clone();
-        stateCopy.filePointer = in.getFilePointer();
-        termsCache.put(new FieldAndTerm(fieldInfo.name, bytesReader.term),
-                       stateCopy);
+      // called only from assert
+      private boolean first;
+      private int indexTermCount;
+
+      private boolean startSeek() {
+        first = true;
+        indexTermCount = 0;
+        return true;
+      }
+
+      private boolean checkSeekScan() {
+        if (!first && isIndexTerm) {
+          indexTermCount++;
+          if (indexTermCount >= indexReader.getDivisor()) {
+            //System.out.println("now fail count=" + indexTermCount);
+            return false;
+          }
+        }
+        first = false;
+        return true;
       }
 
       /** Seeks until the first term that's >= the provided
@@ -317,16 +330,24 @@ public class PrefixCodedTermsReader exte
        *  is found, SeekStatus.NOT_FOUND if a different term
        *  was found, SeekStatus.END if we hit EOF */
       @Override
-      public SeekStatus seek(BytesRef term, boolean useCache) throws IOException {
+      public SeekStatus seek(final BytesRef term, final boolean useCache) throws IOException {
+
+        if (indexEnum == null) {
+          throw new IllegalStateException("terms index was not loaded");
+        }
+        
+        //System.out.println("te.seek term=" + fieldInfo.name + ":" + term.utf8ToString() + " current=" + term().utf8ToString() + " useCache=" + useCache + " this="  + this);
+
         // Check cache
         fieldTerm.term = term;
         TermState cachedState;
         if (useCache) {
           cachedState = termsCache.get(fieldTerm);
           if (cachedState != null) {
-            state.copy(cachedState);
-            seekPending = true;
-            bytesReader.term.copy(term);
+            state.copyFrom(cachedState);
+            setTermState(term, state);
+            positioned = false;
+            //System.out.println("  cached!");
             return SeekStatus.FOUND;
           }
         } else {
@@ -335,36 +356,54 @@ public class PrefixCodedTermsReader exte
 
         boolean doSeek = true;
 
-        if (state.ord != -1) {
-          // we are positioned
+        if (positioned) {
 
           final int cmp = termComp.compare(bytesReader.term, term);
 
           if (cmp == 0) {
             // already at the requested term
             return SeekStatus.FOUND;
-          }
+          } else if (cmp < 0) {
+
+            if (seekPending) {
+              seekPending = false;
+              in.seek(state.filePointer);
+              indexEnum.seek(bytesReader.term);
+              didIndexNext = false;
+            }
+
+            // Target term is after current term
+            if (!didIndexNext) {
+              if (indexEnum.next() == -1) {
+                nextIndexTerm = null;
+              } else {
+                nextIndexTerm = indexEnum.term();
+              }
+              //System.out.println("  now do index next() nextIndexTerm=" + (nextIndexTerm == null ? "null" : nextIndexTerm.utf8ToString()));
+              didIndexNext = true;
+            }
 
-          if (cmp < 0 &&
-              fieldIndexReader.nextIndexTerm(state.ord, indexResult) &&
-              termComp.compare(indexResult.term, term) > 0) {
-            // Optimization: requested term is within the
-            // same index block we are now in; skip seeking
-            // (but do scanning):
-            doSeek = false;
+            if (nextIndexTerm == null || termComp.compare(term, nextIndexTerm) < 0) {
+              // Optimization: requested term is within the
+              // same index block we are now in; skip seeking
+              // (but do scanning):
+              doSeek = false;
+              //System.out.println("  skip seek: nextIndexTerm=" + nextIndexTerm);
+            }
           }
         }
 
-        // Used only for assert:
-        final long startOrd;
-
         if (doSeek) {
 
-          // As index to find biggest index term that's <=
-          // our text:
-          fieldIndexReader.getIndexOffset(term, indexResult);
+          positioned = true;
 
-          in.seek(indexResult.offset);
+          // Ask terms index to find biggest index term that's <=
+          // our text:
+          in.seek(indexEnum.seek(term));
+          didIndexNext = false;
+          if (doOrd) {
+            state.ord = indexEnum.ord()-1;
+          }
           seekPending = false;
 
           // NOTE: the first next() after an index seek is
@@ -373,78 +412,56 @@ public class PrefixCodedTermsReader exte
           // those bytes in the primary file, but then when
           // scanning over an index term we'd have to
           // special case it:
-          bytesReader.reset(indexResult.term);
-          
-          state.ord = indexResult.position-1;
-          assert state.ord >= -1: "ord=" + state.ord + " pos=" + indexResult.position;
-
-          startOrd = indexResult.position;
+          bytesReader.reset(indexEnum.term());
+          //System.out.println("  doSeek term=" + indexEnum.term().utf8ToString() + " vs target=" + term.utf8ToString());
         } else {
-          startOrd = -1;
+          //System.out.println("  skip seek");
         }
 
+        assert startSeek();
+
         // Now scan:
-        while(next() != null) {
+        while (next() != null) {
           final int cmp = termComp.compare(bytesReader.term, term);
           if (cmp == 0) {
-
-            if (doSeek && useCache) {
-              // Store in cache
-              FieldAndTerm entryKey = new FieldAndTerm(fieldTerm);
-              cachedState = (TermState) state.clone();
-              // this is fp after current term
-              cachedState.filePointer = in.getFilePointer();
-              termsCache.put(entryKey, cachedState);
+            // Done!
+            if (useCache) {
+              cacheTerm(fieldTerm);
             }
-              
+
             return SeekStatus.FOUND;
           } else if (cmp > 0) {
             return SeekStatus.NOT_FOUND;
           }
+
           // The purpose of the terms dict index is to seek
           // the enum to the closest index term before the
           // term we are looking for.  So, we should never
           // cross another index term (besides the first
           // one) while we are scanning:
-          assert state.ord == startOrd || !fieldIndexReader.isIndexTerm(state.ord, state.docFreq, true): "state.ord=" + state.ord + " startOrd=" + startOrd + " ir.isIndexTerm=" + fieldIndexReader.isIndexTerm(state.ord, state.docFreq, true) + " state.docFreq=" + state.docFreq;
+          assert checkSeekScan();
         }
 
+        positioned = false;
         return SeekStatus.END;
       }
 
-      @Override
-      public SeekStatus seek(long ord) throws IOException {
-
-        // TODO: should we cache term lookup by ord as well...?
-
-        if (ord >= numTerms) {
-          state.ord = numTerms-1;
-          return SeekStatus.END;
-        }
-
-        fieldIndexReader.getIndexOffset(ord, indexResult);
-        in.seek(indexResult.offset);
-        seekPending = false;
-
-        // NOTE: the first next() after an index seek is
-        // wasteful, since it redundantly reads the same
-        // bytes into the buffer
-        bytesReader.reset(indexResult.term);
-
-        state.ord = indexResult.position-1;
-        assert state.ord >= -1: "ord=" + state.ord;
-
-        // Now, scan:
-        int left = (int) (ord - state.ord);
-        while(left > 0) {
-          final BytesRef term = next();
-          assert term != null;
-          left--;
-        }
-
-        // always found
-        return SeekStatus.FOUND;
+      private final void setTermState(BytesRef term, final TermState termState) {
+        assert termState != null && termState instanceof PrefixCodedTermState;
+        state.copyFrom(termState);
+        seekPending = true;
+        bytesReader.term.copy(term);
+      }
+
+      private final void cacheTerm(FieldAndTerm other) {
+        // Store in cache
+        final FieldAndTerm entryKey = new FieldAndTerm(other);
+        final PrefixCodedTermState cachedState = (PrefixCodedTermState) state.clone();
+        // this is fp after current term
+        cachedState.filePointer = in.getFilePointer();
+        termsCache.put(entryKey, cachedState);
       }
+      
 
       @Override
       public BytesRef term() {
@@ -452,38 +469,40 @@ public class PrefixCodedTermsReader exte
       }
 
       @Override
-      public long ord() {
-        return state.ord;
-      }
-
-      @Override
       public BytesRef next() throws IOException {
 
         if (seekPending) {
           seekPending = false;
           in.seek(state.filePointer);
+          indexEnum.seek(bytesReader.term);
+          didIndexNext = false;
         }
         
-        if (state.ord >= numTerms-1) {
+        if (!bytesReader.read()) {
+          //System.out.println("te.next end!");
+          positioned = false;
           return null;
         }
 
-        bytesReader.read();
-        state.docFreq = in.readVInt();
+        final byte b = in.readByte();
+        isIndexTerm = (b & 0x80) != 0;
+
+        if ((b & 0x40) == 0) {
+          // Fast case -- docFreq fits in 6 bits
+          state.docFreq = b & 0x3F;
+        } else {
+          state.docFreq = (in.readVInt() << 6) | (b & 0x3F);
+        }
 
-        // TODO: would be cleaner, but space-wasting, to
-        // simply record a bit into each index entry as to
-        // whether it's an index entry or not, rather than
-        // re-compute that information... or, possibly store
-        // a "how many terms until next index entry" in each
-        // index entry, but that'd require some tricky
-        // lookahead work when writing the index
         postingsReader.readTerm(in,
                                 fieldInfo, state,
-                                fieldIndexReader.isIndexTerm(1+state.ord, state.docFreq, false));
-
-        state.ord++;
+                                isIndexTerm);
+        if (doOrd) {
+          state.ord++;
+        }
+        positioned = true;
 
+        //System.out.println("te.next term=" + bytesReader.term.utf8ToString());
         return bytesReader.term;
       }
 
@@ -494,7 +513,7 @@ public class PrefixCodedTermsReader exte
 
       @Override
       public DocsEnum docs(Bits skipDocs, DocsEnum reuse) throws IOException {
-        DocsEnum docsEnum = postingsReader.docs(fieldInfo, state, skipDocs, reuse);
+        final DocsEnum docsEnum = postingsReader.docs(fieldInfo, state, skipDocs, reuse);
         assert docsEnum != null;
         return docsEnum;
       }
@@ -507,6 +526,66 @@ public class PrefixCodedTermsReader exte
           return postingsReader.docsAndPositions(fieldInfo, state, skipDocs, reuse);
         }
       }
+
+      @Override
+      public SeekStatus seek(BytesRef term, TermState otherState) throws IOException {
+        assert otherState != null && otherState instanceof PrefixCodedTermState;
+        assert otherState.getClass() == this.state.getClass() : "Illegal TermState type " + otherState.getClass();
+        assert ((PrefixCodedTermState)otherState).ord < numTerms;
+        setTermState(term, otherState);
+        positioned = false;
+        return SeekStatus.FOUND;
+      }
+      
+      @Override
+      public TermState termState() throws IOException {
+        final PrefixCodedTermState newTermState = (PrefixCodedTermState) state.clone();
+        newTermState.filePointer = in.getFilePointer();
+        return newTermState;
+      }
+
+      @Override
+      public SeekStatus seek(long ord) throws IOException {
+
+        if (indexEnum == null) {
+          throw new IllegalStateException("terms index was not loaded");
+        }
+
+        if (ord >= numTerms) {
+          state.ord = numTerms-1;
+          return SeekStatus.END;
+        }
+
+        in.seek(indexEnum.seek(ord));
+        seekPending = false;
+        positioned = true;
+
+        // NOTE: the first next() after an index seek is
+        // wasteful, since it redundantly reads the same
+        // bytes into the buffer
+        bytesReader.reset(indexEnum.term());
+
+        state.ord = indexEnum.ord()-1;
+        assert state.ord >= -1: "ord=" + state.ord;
+
+        // Now, scan:
+        int left = (int) (ord - state.ord);
+        while(left > 0) {
+          final BytesRef term = next();
+          assert term != null;
+          left--;
+        }
+
+        // always found
+        return SeekStatus.FOUND;
+      }
+
+      public long ord() {
+        if (!doOrd) {
+          throw new UnsupportedOperationException();
+        }
+        return state.ord;
+      }
     }
   }
 }

Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermsWriter.java?rev=1058718&r1=1058717&r2=1058718&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermsWriter.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermsWriter.java Thu Jan 13 19:53:21 2011
@@ -74,7 +74,6 @@ public class PrefixCodedTermsWriter exte
     this.termComp = termComp;
     out = state.directory.createOutput(termsFileName);
     termsIndexWriter.setTermsOutput(out);
-    state.flushedFiles.add(termsFileName);
 
     fieldInfos = state.fieldInfos;
     writeHeader(out);
@@ -93,7 +92,7 @@ public class PrefixCodedTermsWriter exte
   }
 
   @Override
-  public TermsConsumer addField(FieldInfo field) {
+  public TermsConsumer addField(FieldInfo field) throws IOException {
     assert currentField == null || currentField.name.compareTo(field.name) < 0;
     currentField = field;
     TermsIndexWriterBase.FieldWriter fieldIndexWriter = termsIndexWriter.addField(field);
@@ -173,12 +172,25 @@ public class PrefixCodedTermsWriter exte
     public void finishTerm(BytesRef text, int numDocs) throws IOException {
 
       assert numDocs > 0;
+      //System.out.println("finishTerm term=" + fieldInfo.name + ":" + text.utf8ToString() + " fp="  + out.getFilePointer());
 
       final boolean isIndexTerm = fieldIndexWriter.checkIndexTerm(text, numDocs);
 
       termWriter.write(text);
-      out.writeVInt(numDocs);
+      final int highBit = isIndexTerm ? 0x80 : 0;
+      //System.out.println("  isIndex=" + isIndexTerm);
 
+      // This is a vInt, except, we steal top bit to record
+      // whether this was an indexed term:
+      if ((numDocs & ~0x3F) == 0) {
+        // Fast case -- docFreq fits in 6 bits
+        out.writeByte((byte) (highBit | numDocs));
+      } else {
+        // Write bottom 6 bits of docFreq, then write the
+        // remainder as vInt:
+        out.writeByte((byte) (highBit | 0x40 | (numDocs & 0x3F)));
+        out.writeVInt(numDocs >>> 6);
+      }
       postingsWriter.finishTerm(numDocs, isIndexTerm);
       numTerms++;
     }
@@ -186,6 +198,8 @@ public class PrefixCodedTermsWriter exte
     // Finishes all terms in this field
     @Override
     public void finish() throws IOException {
+      // EOF marker:
+      out.writeVInt(DeltaBytesWriter.TERM_EOF);
       fieldIndexWriter.finish();
     }
   }

Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/TermsIndexReaderBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/TermsIndexReaderBase.java?rev=1058718&r1=1058717&r2=1058718&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/TermsIndexReaderBase.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/TermsIndexReaderBase.java Thu Jan 13 19:53:21 2011
@@ -21,6 +21,7 @@ import org.apache.lucene.index.FieldInfo
 import org.apache.lucene.util.BytesRef;
 
 import java.io.IOException;
+import java.io.Closeable;
 import java.util.Collection;
 
 
@@ -35,42 +36,40 @@ import java.util.Collection;
  * indexed terms (many pairs of CharSequence text + long
  * fileOffset), and then this reader must be able to
  * retrieve the nearest index term to a provided term
- * text. 
+ * text.
  * @lucene.experimental */
 
-public abstract class TermsIndexReaderBase {
+public abstract class TermsIndexReaderBase implements Closeable {
 
-  static class TermsIndexResult {
-    long position;
-    final BytesRef term = new BytesRef();
-    long offset;
-  };
-
-  public abstract class FieldReader {
-    /** Returns position of "largest" index term that's <=
-     *  text.  Returned TermsIndexResult may be reused
-     *  across calls.  This resets internal state, and
-     *  expects that you'll then scan the file and
-     *  sequentially call isIndexTerm for each term
-     *  encountered. */
-    public abstract void getIndexOffset(BytesRef term, TermsIndexResult result) throws IOException;
-
-    public abstract void getIndexOffset(long ord, TermsIndexResult result) throws IOException;
-
-    /** Call this sequentially for each term encoutered,
-     *  after calling {@link #getIndexOffset}. */
-    public abstract boolean isIndexTerm(long ord, int docFreq, boolean onlyLoaded) throws IOException;
-
-    /** Finds the next index term, after the specified
-     *  ord.  Returns true if one exists.  */
-    public abstract boolean nextIndexTerm(long ord, TermsIndexResult result) throws IOException;
-  }
-
-  public abstract FieldReader getField(FieldInfo fieldInfo);
+  public abstract FieldIndexEnum getFieldEnum(FieldInfo fieldInfo);
 
   public abstract void loadTermsIndex(int indexDivisor) throws IOException;
 
   public abstract void close() throws IOException;
 
   public abstract void getExtensions(Collection<String> extensions);
-}
\ No newline at end of file
+
+  public abstract boolean supportsOrd();
+
+  public abstract int getDivisor();
+
+  // Similar to TermsEnum, except, the only "metadata" it
+  // reports for a given indexed term is the long fileOffset
+  // into the main terms dict (_X.tis) file:
+  public static abstract class FieldIndexEnum {
+
+    /** Seeks to "largest" indexed term that's <=
+     *  term; retruns file pointer index (into the main
+     *  terms index file) for that term */
+    public abstract long seek(BytesRef term) throws IOException;
+
+    /** Returns -1 at end */
+    public abstract long next() throws IOException;
+
+    public abstract BytesRef term();
+
+    // Only impl'd if supportsOrd() returns true!
+    public abstract long seek(long ord) throws IOException;
+    public abstract long ord();
+  }
+}