You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by bu...@apache.org on 2011/02/22 02:01:11 UTC

svn commit: r1073192 [8/32] - in /lucene/dev/branches/realtime_search: ./ dev-tools/eclipse/ dev-tools/idea/.idea/ dev-tools/idea/lucene/contrib/ant/ dev-tools/idea/lucene/contrib/demo/ dev-tools/idea/lucene/contrib/highlighter/ dev-tools/idea/lucene/c...

Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/FieldInfos.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/FieldInfos.java?rev=1073192&r1=1073191&r2=1073192&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/FieldInfos.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/FieldInfos.java Tue Feb 22 01:00:39 2011
@@ -17,6 +17,16 @@ package org.apache.lucene.index;
  * limitations under the License.
  */
 
+import java.io.IOException;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.SortedMap;
+import java.util.TreeMap;
+
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Fieldable;
 import org.apache.lucene.store.Directory;
@@ -24,9 +34,6 @@ import org.apache.lucene.store.IndexInpu
 import org.apache.lucene.store.IndexOutput;
 import org.apache.lucene.util.StringHelper;
 
-import java.io.IOException;
-import java.util.*;
-
 /** Access to the Fieldable Info file that describes document fields and whether or
  *  not they are indexed. Each segment has a separate Fieldable Info file. Objects
  *  of this class are thread-safe for multiple readers, but only one thread can
@@ -34,7 +41,72 @@ import java.util.*;
  *  accessing this object.
  *  @lucene.experimental
  */
-public final class FieldInfos {
+public final class FieldInfos implements Iterable<FieldInfo> {
+  private static final class FieldNumberBiMap {
+    private final Map<Integer,String> numberToName;
+    private final Map<String,Integer> nameToNumber;
+
+    private FieldNumberBiMap() {
+      this.nameToNumber = new HashMap<String, Integer>();
+      this.numberToName = new HashMap<Integer, String>();
+    }
+
+    synchronized int addOrGet(String fieldName, FieldInfoBiMap fieldInfoMap, int preferredFieldNumber) {
+      Integer fieldNumber = nameToNumber.get(fieldName);
+      if (fieldNumber == null) {
+        if (!numberToName.containsKey(preferredFieldNumber)) {
+          // cool - we can use this number globally
+          fieldNumber = preferredFieldNumber;
+        } else {
+          fieldNumber = findNextAvailableFieldNumber(preferredFieldNumber + 1, numberToName.keySet());
+        }
+
+        numberToName.put(fieldNumber, fieldName);
+        nameToNumber.put(fieldName, fieldNumber);
+      }
+
+      return fieldNumber;
+    }
+
+    synchronized void setIfNotSet(int fieldNumber, String fieldName) {
+     if (!numberToName.containsKey(fieldNumber) && !nameToNumber.containsKey(fieldName)) {
+        numberToName.put(fieldNumber, fieldName);
+        nameToNumber.put(fieldName, fieldNumber);
+      }
+    }
+  }
+
+  private static final class FieldInfoBiMap implements Iterable<FieldInfo> {
+    private final SortedMap<Integer,FieldInfo> byNumber = new TreeMap<Integer,FieldInfo>();
+    private final HashMap<String,FieldInfo> byName = new HashMap<String,FieldInfo>();
+    private int nextAvailableNumber = 0;
+
+    public void put(FieldInfo fi) {
+      assert !byNumber.containsKey(fi.number);
+      assert !byName.containsKey(fi.name);
+
+      byNumber.put(fi.number, fi);
+      byName.put(fi.name, fi);
+    }
+
+    public FieldInfo get(String fieldName) {
+      return byName.get(fieldName);
+    }
+
+    public FieldInfo get(int fieldNumber) {
+      return byNumber.get(fieldNumber);
+    }
+
+    public int size() {
+      assert byNumber.size() == byName.size();
+      return byNumber.size();
+    }
+
+    @Override
+    public Iterator<FieldInfo> iterator() {
+      return byNumber.values().iterator();
+    }
+  }
 
   // First used in 2.9; prior to 2.9 there was no format header
   public static final int FORMAT_START = -2;
@@ -53,11 +125,18 @@ public final class FieldInfos {
   static final byte STORE_PAYLOADS = 0x20;
   static final byte OMIT_TERM_FREQ_AND_POSITIONS = 0x40;
 
-  private final ArrayList<FieldInfo> byNumber = new ArrayList<FieldInfo>();
-  private final HashMap<String,FieldInfo> byName = new HashMap<String,FieldInfo>();
+  private final FieldNumberBiMap globalFieldNumbers;
+  private final FieldInfoBiMap localFieldInfos;
+
   private int format;
 
   public FieldInfos() {
+    this(new FieldNumberBiMap());
+  }
+
+  private FieldInfos(FieldNumberBiMap globalFieldNumbers) {
+    this.globalFieldNumbers = globalFieldNumbers;
+    this.localFieldInfos = new FieldInfoBiMap();
   }
 
   /**
@@ -68,6 +147,7 @@ public final class FieldInfos {
    * @throws IOException
    */
   public FieldInfos(Directory d, String name) throws IOException {
+    this(new FieldNumberBiMap());
     IndexInput input = d.openInput(name);
     try {
       read(input, name);
@@ -76,17 +156,27 @@ public final class FieldInfos {
     }
   }
 
+  private static final int findNextAvailableFieldNumber(int nextPreferredNumber, Set<Integer> unavailableNumbers) {
+    while (unavailableNumbers.contains(nextPreferredNumber)) {
+      nextPreferredNumber++;
+    }
+
+    return nextPreferredNumber;
+  }
+
+  public FieldInfos newFieldInfosWithGlobalFieldNumberMap() {
+    return new FieldInfos(this.globalFieldNumbers);
+  }
+
   /**
    * Returns a deep clone of this FieldInfos instance.
    */
   @Override
   synchronized public Object clone() {
-    FieldInfos fis = new FieldInfos();
-    final int numField = byNumber.size();
-    for(int i=0;i<numField;i++) {
-      FieldInfo fi = (FieldInfo) ( byNumber.get(i)).clone();
-      fis.byNumber.add(fi);
-      fis.byName.put(fi.name, fi);
+    FieldInfos fis = new FieldInfos(globalFieldNumbers);
+    for (FieldInfo fi : this) {
+      FieldInfo clone = (FieldInfo) (fi).clone();
+      fis.localFieldInfos.put(clone);
     }
     return fis;
   }
@@ -102,9 +192,7 @@ public final class FieldInfos {
 
   /** Returns true if any fields do not omitTermFreqAndPositions */
   public boolean hasProx() {
-    final int numFields = byNumber.size();
-    for(int i=0;i<numFields;i++) {
-      final FieldInfo fi = fieldInfo(i);
+    for (FieldInfo fi : this) {
       if (fi.isIndexed && !fi.omitTermFreqAndPositions) {
         return true;
       }
@@ -215,9 +303,28 @@ public final class FieldInfos {
   synchronized public FieldInfo add(String name, boolean isIndexed, boolean storeTermVector,
                        boolean storePositionWithTermVector, boolean storeOffsetWithTermVector,
                        boolean omitNorms, boolean storePayloads, boolean omitTermFreqAndPositions) {
+    return addOrUpdateInternal(name, -1, isIndexed, storeTermVector, storePositionWithTermVector,
+                               storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions);
+  }
+
+  synchronized private FieldInfo addOrUpdateInternal(String name, int preferredFieldNumber, boolean isIndexed,
+      boolean storeTermVector, boolean storePositionWithTermVector, boolean storeOffsetWithTermVector,
+      boolean omitNorms, boolean storePayloads, boolean omitTermFreqAndPositions) {
+
     FieldInfo fi = fieldInfo(name);
     if (fi == null) {
-      return addInternal(name, isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions);
+      if (preferredFieldNumber == -1) {
+        preferredFieldNumber = findNextAvailableFieldNumber(localFieldInfos.nextAvailableNumber, localFieldInfos.byNumber.keySet());
+        localFieldInfos.nextAvailableNumber = preferredFieldNumber;
+      }
+
+      // get a global number for this field
+      int fieldNumber = globalFieldNumbers.addOrGet(name, localFieldInfos, preferredFieldNumber);
+      if (localFieldInfos.get(fieldNumber) != null) {
+        // fall back if the global number is already taken
+        fieldNumber = preferredFieldNumber;
+      }
+      return addInternal(name, fieldNumber, isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions);
     } else {
       fi.update(isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions);
     }
@@ -225,27 +332,27 @@ public final class FieldInfos {
   }
 
   synchronized public FieldInfo add(FieldInfo fi) {
-    return add(fi.name, fi.isIndexed, fi.storeTermVector,
+    int preferredFieldNumber = fi.number;
+    FieldInfo other = localFieldInfos.get(preferredFieldNumber);
+    if (other == null || !other.name.equals(fi.name)) {
+      preferredFieldNumber = -1;
+    }
+    return addOrUpdateInternal(fi.name, preferredFieldNumber, fi.isIndexed, fi.storeTermVector,
                fi.storePositionWithTermVector, fi.storeOffsetWithTermVector,
                fi.omitNorms, fi.storePayloads,
                fi.omitTermFreqAndPositions);
   }
 
-  synchronized public void update(FieldInfos otherInfos) {
-    int numFields = otherInfos.size();
-    for (int i = 0; i < numFields; i++) {
-      add(otherInfos.fieldInfo(i));
-    }
-  }
-
-  private FieldInfo addInternal(String name, boolean isIndexed,
+  private FieldInfo addInternal(String name, int fieldNumber, boolean isIndexed,
                                 boolean storeTermVector, boolean storePositionWithTermVector,
                                 boolean storeOffsetWithTermVector, boolean omitNorms, boolean storePayloads, boolean omitTermFreqAndPositions) {
     name = StringHelper.intern(name);
-    FieldInfo fi = new FieldInfo(name, isIndexed, byNumber.size(), storeTermVector, storePositionWithTermVector,
+    globalFieldNumbers.setIfNotSet(fieldNumber, name);
+    FieldInfo fi = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector, storePositionWithTermVector,
                                  storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions);
-    byNumber.add(fi);
-    byName.put(name, fi);
+
+    assert localFieldInfos.get(fi.number) == null;
+    localFieldInfos.put(fi);
     return fi;
   }
 
@@ -255,7 +362,7 @@ public final class FieldInfos {
   }
 
   public FieldInfo fieldInfo(String fieldName) {
-    return  byName.get(fieldName);
+    return localFieldInfos.get(fieldName);
   }
 
   /**
@@ -277,16 +384,20 @@ public final class FieldInfos {
    * doesn't exist.
    */
   public FieldInfo fieldInfo(int fieldNumber) {
-	return (fieldNumber >= 0) ? byNumber.get(fieldNumber) : null;
+	return (fieldNumber >= 0) ? localFieldInfos.get(fieldNumber) : null;
+  }
+
+  public Iterator<FieldInfo> iterator() {
+    return localFieldInfos.iterator();
   }
 
   public int size() {
-    return byNumber.size();
+    return localFieldInfos.size();
   }
 
   public boolean hasVectors() {
-    for (int i = 0; i < size(); i++) {
-      if (fieldInfo(i).storeTermVector) {
+    for (FieldInfo fi : this) {
+      if (fi.storeTermVector) {
         return true;
       }
     }
@@ -294,8 +405,8 @@ public final class FieldInfos {
   }
 
   public boolean hasNorms() {
-    for (int i = 0; i < size(); i++) {
-      if (!fieldInfo(i).omitNorms) {
+    for (FieldInfo fi : this) {
+      if (!fi.omitNorms) {
         return true;
       }
     }
@@ -314,8 +425,7 @@ public final class FieldInfos {
   public void write(IndexOutput output) throws IOException {
     output.writeVInt(FORMAT_CURRENT);
     output.writeVInt(size());
-    for (int i = 0; i < size(); i++) {
-      FieldInfo fi = fieldInfo(i);
+    for (FieldInfo fi : this) {
       byte bits = 0x0;
       if (fi.isIndexed) bits |= IS_INDEXED;
       if (fi.storeTermVector) bits |= STORE_TERMVECTOR;
@@ -325,7 +435,8 @@ public final class FieldInfos {
       if (fi.storePayloads) bits |= STORE_PAYLOADS;
       if (fi.omitTermFreqAndPositions) bits |= OMIT_TERM_FREQ_AND_POSITIONS;
       output.writeString(fi.name);
-      output.writeInt(fi.codecId);
+      output.writeInt(fi.number);
+      output.writeInt(fi.getCodecId());
       output.writeByte(bits);
     }
   }
@@ -345,6 +456,7 @@ public final class FieldInfos {
     for (int i = 0; i < size; i++) {
       String name = StringHelper.intern(input.readString());
       // if this is a previous format codec 0 will be preflex!
+      final int fieldNumber = format <= FORMAT_PER_FIELD_CODEC? input.readInt():i;
       final int codecId = format <= FORMAT_PER_FIELD_CODEC? input.readInt():0;
       byte bits = input.readByte();
       boolean isIndexed = (bits & IS_INDEXED) != 0;
@@ -354,8 +466,8 @@ public final class FieldInfos {
       boolean omitNorms = (bits & OMIT_NORMS) != 0;
       boolean storePayloads = (bits & STORE_PAYLOADS) != 0;
       boolean omitTermFreqAndPositions = (bits & OMIT_TERM_FREQ_AND_POSITIONS) != 0;
-      final FieldInfo addInternal = addInternal(name, isIndexed, storeTermVector, storePositionsWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions);
-      addInternal.codecId = codecId;
+      final FieldInfo addInternal = addInternal(name, fieldNumber, isIndexed, storeTermVector, storePositionsWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions);
+      addInternal.setCodecId(codecId);
     }
 
     if (input.getFilePointer() != input.length()) {

Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/FieldsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/FieldsWriter.java?rev=1073192&r1=1073191&r2=1073192&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/FieldsWriter.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/FieldsWriter.java Tue Feb 22 01:00:39 2011
@@ -44,14 +44,12 @@ final class FieldsWriter {
   // If null - we were supplied with streams, if notnull - we manage them ourselves
   private Directory directory;
   private String segment;
-  private FieldInfos fieldInfos;
   private IndexOutput fieldsStream;
   private IndexOutput indexStream;
 
-  FieldsWriter(Directory directory, String segment, FieldInfos fn) throws IOException {
+  FieldsWriter(Directory directory, String segment) throws IOException {
     this.directory = directory;
     this.segment = segment;
-    fieldInfos = fn;
 
     boolean success = false;
     try {
@@ -69,10 +67,9 @@ final class FieldsWriter {
     }
   }
 
-  FieldsWriter(IndexOutput fdx, IndexOutput fdt, FieldInfos fn) {
+  FieldsWriter(IndexOutput fdx, IndexOutput fdt) {
     directory = null;
     segment = null;
-    fieldInfos = fn;
     fieldsStream = fdt;
     indexStream = fdx;
   }
@@ -164,7 +161,7 @@ final class FieldsWriter {
     assert fieldsStream.getFilePointer() == position;
   }
 
-  final void addDocument(Document doc) throws IOException {
+  final void addDocument(Document doc, FieldInfos fieldInfos) throws IOException {
     indexStream.writeLong(fieldsStream.getFilePointer());
 
     int storedCount = 0;

Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java?rev=1073192&r1=1073191&r2=1073192&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java Tue Feb 22 01:00:39 2011
@@ -22,13 +22,14 @@ import org.apache.lucene.document.FieldS
 import org.apache.lucene.index.IndexReader.ReaderContext;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.Bits;
-import org.apache.lucene.search.FieldCache; // not great (circular); used only to purge FieldCache entry on close
 import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.MapBackedSet;
 
 import java.io.IOException;
 import java.util.Collection;
 import java.util.Map;
 import java.util.Comparator;
+import java.util.concurrent.ConcurrentHashMap;
 
 /**  A <code>FilterIndexReader</code> contains another IndexReader, which it
  * uses as its basic source of data, possibly transforming the data along the
@@ -286,6 +287,7 @@ public class FilterIndexReader extends I
   public FilterIndexReader(IndexReader in) {
     super();
     this.in = in;
+    readerFinishedListeners = new MapBackedSet<ReaderFinishedListener>(new ConcurrentHashMap<ReaderFinishedListener,Boolean>());
   }
 
   @Override
@@ -391,11 +393,6 @@ public class FilterIndexReader extends I
   @Override
   protected void doClose() throws IOException {
     in.close();
-
-    // NOTE: only needed in case someone had asked for
-    // FieldCache for top-level reader (which is generally
-    // not a good idea):
-    FieldCache.DEFAULT.purge(this);
   }
 
 
@@ -454,4 +451,16 @@ public class FilterIndexReader extends I
     buffer.append(')');
     return buffer.toString();
   }
-}
\ No newline at end of file
+
+  @Override
+  public void addReaderFinishedListener(ReaderFinishedListener listener) {
+    super.addReaderFinishedListener(listener);
+    in.addReaderFinishedListener(listener);
+  }
+
+  @Override
+  public void removeReaderFinishedListener(ReaderFinishedListener listener) {
+    super.removeReaderFinishedListener(listener);
+    in.removeReaderFinishedListener(listener);
+  }
+}

Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriter.java?rev=1073192&r1=1073191&r2=1073192&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriter.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriter.java Tue Feb 22 01:00:39 2011
@@ -79,7 +79,7 @@ final class FreqProxTermsWriter extends 
 
       // If this field has postings then add them to the
       // segment
-      fieldWriter.flush(consumer, state);
+      fieldWriter.flush(fieldInfo.name, consumer, state);
 
       TermsHashPerField perField = fieldWriter.termsHashPerField;
       assert termsHash == null || termsHash == perField.termsHash;

Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java?rev=1073192&r1=1073191&r2=1073192&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java Tue Feb 22 01:00:39 2011
@@ -19,6 +19,7 @@ package org.apache.lucene.index;
 
 import java.io.IOException;
 import java.util.Comparator;
+import java.util.Map;
 
 import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
 import org.apache.lucene.document.Fieldable;
@@ -26,6 +27,7 @@ import org.apache.lucene.index.codecs.Fi
 import org.apache.lucene.index.codecs.PostingsConsumer;
 import org.apache.lucene.index.codecs.TermStats;
 import org.apache.lucene.index.codecs.TermsConsumer;
+import org.apache.lucene.util.BitVector;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.RamUsageEstimator;
 
@@ -200,6 +202,7 @@ final class FreqProxTermsWriterPerField 
       return new FreqProxPostingsArray(size);
     }
 
+    @Override
     void copyTo(ParallelPostingsArray toArray, int numToCopy) {
       assert toArray instanceof FreqProxPostingsArray;
       FreqProxPostingsArray to = (FreqProxPostingsArray) toArray;
@@ -225,14 +228,23 @@ final class FreqProxTermsWriterPerField 
   /* Walk through all unique text tokens (Posting
    * instances) found in this field and serialize them
    * into a single RAM segment. */
-  void flush(FieldsConsumer consumer,  final SegmentWriteState state)
+  void flush(String fieldName, FieldsConsumer consumer,  final SegmentWriteState state)
     throws CorruptIndexException, IOException {
 
     final TermsConsumer termsConsumer = consumer.addField(fieldInfo);
     final Comparator<BytesRef> termComp = termsConsumer.getComparator();
 
+    final Term protoTerm = new Term(fieldName);
+
     final boolean currentFieldOmitTermFreqAndPositions = fieldInfo.omitTermFreqAndPositions;
 
+    final Map<Term,Integer> segDeletes;
+    if (state.segDeletes != null && state.segDeletes.terms.size() > 0) {
+      segDeletes = state.segDeletes.terms;
+    } else {
+      segDeletes = null;
+    }
+
     final int[] termIDs = termsHashPerField.sortPostings(termComp);
     final int numTerms = termsHashPerField.bytesHash.size();
     final BytesRef text = new BytesRef();
@@ -260,6 +272,18 @@ final class FreqProxTermsWriterPerField 
 
       final PostingsConsumer postingsConsumer = termsConsumer.startTerm(text);
 
+      final int delDocLimit;
+      if (segDeletes != null) {
+        final Integer docIDUpto = segDeletes.get(protoTerm.createTerm(text));
+        if (docIDUpto != null) {
+          delDocLimit = docIDUpto;
+        } else {
+          delDocLimit = 0;
+        }
+      } else {
+        delDocLimit = 0;
+      }
+
       // Now termStates has numToMerge FieldMergeStates
       // which all share the same term.  Now we must
       // interleave the docID streams.
@@ -300,7 +324,28 @@ final class FreqProxTermsWriterPerField 
         numDocs++;
         assert docID < state.numDocs: "doc=" + docID + " maxDoc=" + state.numDocs;
         final int termDocFreq = termFreq;
+
+        // NOTE: we could check here if the docID was
+        // deleted, and skip it.  However, this is somewhat
+        // dangerous because it can yield non-deterministic
+        // behavior since we may see the docID before we see
+        // the term that caused it to be deleted.  This
+        // would mean some (but not all) of its postings may
+        // make it into the index, which'd alter the docFreq
+        // for those terms.  We could fix this by doing two
+        // passes, ie first sweep marks all del docs, and
+        // 2nd sweep does the real flush, but I suspect
+        // that'd add too much time to flush.
         postingsConsumer.startDoc(docID, termDocFreq);
+        if (docID < delDocLimit) {
+          // Mark it deleted.  TODO: we could also skip
+          // writing its postings; this would be
+          // deterministic (just for this Term's docs).
+          if (state.deletedDocs == null) {
+            state.deletedDocs = new BitVector(state.numDocs);
+          }
+          state.deletedDocs.set(docID);
+        }
 
         // Carefully copy over the prox + payload info,
         // changing the format to match Lucene's segment

Added: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/FrozenBufferedDeletes.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/FrozenBufferedDeletes.java?rev=1073192&view=auto
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/FrozenBufferedDeletes.java (added)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/FrozenBufferedDeletes.java Tue Feb 22 01:00:39 2011
@@ -0,0 +1,145 @@
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.Iterator;
+import java.util.Map;
+
+import org.apache.lucene.search.Query;
+import org.apache.lucene.util.RamUsageEstimator;
+import org.apache.lucene.index.BufferedDeletesStream.QueryAndLimit;
+
+/** Holds buffered deletes by term or query, once pushed.
+ *  Pushed deltes are write-once, so we shift to more
+ *  memory efficient data structure to hold them.  We don't
+ *  hold docIDs because these are applied on flush. */
+
+class FrozenBufferedDeletes {
+
+  /* Rough logic: Term is object w/
+     String field and String text (OBJ_HEADER + 2*POINTER).
+     We don't count Term's field since it's interned.
+     Term's text is String (OBJ_HEADER + 4*INT + POINTER +
+     OBJ_HEADER + string.length*CHAR). */
+  final static int BYTES_PER_DEL_TERM = 3*RamUsageEstimator.NUM_BYTES_OBJECT_REF + 3*RamUsageEstimator.NUM_BYTES_OBJECT_HEADER + 4*RamUsageEstimator.NUM_BYTES_INT;
+
+  /* Query we often undercount (say 24 bytes), plus int. */
+  final static int BYTES_PER_DEL_QUERY = RamUsageEstimator.NUM_BYTES_OBJECT_REF + RamUsageEstimator.NUM_BYTES_INT + 24;
+
+  // Terms, in sorted order:
+  // TODO: would be more RAM efficient to store BytesRef[],
+  // per field:
+  final Term[] terms;
+
+  // Parallel array of deleted query, and the docIDUpto for
+  // each
+  final Query[] queries;
+  final int[] queryLimits;
+  final int bytesUsed;
+  final int numTermDeletes;
+  final long gen;
+
+  public FrozenBufferedDeletes(BufferedDeletes deletes, long gen) {
+    terms = deletes.terms.keySet().toArray(new Term[deletes.terms.size()]);
+    queries = new Query[deletes.queries.size()];
+    queryLimits = new int[deletes.queries.size()];
+    int upto = 0;
+    for(Map.Entry<Query,Integer> ent : deletes.queries.entrySet()) {
+      queries[upto] = ent.getKey();
+      queryLimits[upto] = ent.getValue();
+      upto++;
+    }
+    bytesUsed = terms.length * BYTES_PER_DEL_TERM + queries.length * BYTES_PER_DEL_QUERY;
+    numTermDeletes = deletes.numTermDeletes.get();
+    this.gen = gen;
+  }
+
+  public Iterable<Term> termsIterable() {
+    return new Iterable<Term>() {
+      // @Override -- not until Java 1.6
+      public Iterator<Term> iterator() {
+        return new Iterator<Term>() {
+          private int upto;
+
+          // @Override -- not until Java 1.6
+          public boolean hasNext() {
+            return upto < terms.length;
+          }
+
+          // @Override -- not until Java 1.6
+          public Term next() {
+            return terms[upto++];
+          }
+
+          // @Override -- not until Java 1.6
+          public void remove() {
+            throw new UnsupportedOperationException();
+          }
+        };
+      }
+    };
+  }
+
+  public Iterable<QueryAndLimit> queriesIterable() {
+    return new Iterable<QueryAndLimit>() {
+      // @Override -- not until Java 1.6
+      public Iterator<QueryAndLimit> iterator() {
+        return new Iterator<QueryAndLimit>() {
+          private int upto;
+
+          // @Override -- not until Java 1.6
+          public boolean hasNext() {
+            return upto < queries.length;
+          }
+
+          // @Override -- not until Java 1.6
+          public QueryAndLimit next() {
+            QueryAndLimit ret = new QueryAndLimit(queries[upto], queryLimits[upto]);
+            upto++;
+            return ret;
+          }
+
+          // @Override -- not until Java 1.6
+          public void remove() {
+            throw new UnsupportedOperationException();
+          }
+        };
+      }
+    };
+  }
+
+  @Override
+  public String toString() {
+    String s = "";
+    if (numTermDeletes != 0) {
+      s += " " + numTermDeletes + " deleted terms (unique count=" + terms.length + ")";
+    }
+    if (queries.length != 0) {
+      s += " " + queries.length + " deleted queries";
+    }
+    if (bytesUsed != 0) {
+      s += " bytesUsed=" + bytesUsed;
+    }
+
+    return s;
+  }
+  
+  boolean any() {
+    return terms.length > 0 || queries.length > 0;
+  }
+}

Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/IndexFileNames.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/IndexFileNames.java?rev=1073192&r1=1073191&r2=1073192&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/IndexFileNames.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/IndexFileNames.java Tue Feb 22 01:00:39 2011
@@ -204,7 +204,7 @@ public final class IndexFileNames {
   
   /**
    * Returns true if the given filename ends with the given extension. One
-   * should provide a <i>pure</i> extension, withouth '.'.
+   * should provide a <i>pure</i> extension, without '.'.
    */
   public static boolean matchesExtension(String filename, String ext) {
     // It doesn't make a difference whether we allocate a StringBuilder ourself

Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/IndexNotFoundException.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/IndexNotFoundException.java?rev=1073192&r1=1073191&r2=1073192&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/IndexNotFoundException.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/IndexNotFoundException.java Tue Feb 22 01:00:39 2011
@@ -21,7 +21,7 @@ import java.io.FileNotFoundException;
 
 /**
  * Signals that no index was found in the Directory. Possibly because the
- * directory is empty, however can slso indicate an index corruption.
+ * directory is empty, however can also indicate an index corruption.
  */
 public final class IndexNotFoundException extends FileNotFoundException {
 

Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/IndexReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/IndexReader.java?rev=1073192&r1=1073191&r2=1073192&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/IndexReader.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/IndexReader.java Tue Feb 22 01:00:39 2011
@@ -19,6 +19,7 @@ package org.apache.lucene.index;
 
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.FieldSelector;
+import org.apache.lucene.search.FieldCache; // javadocs
 import org.apache.lucene.search.Similarity;
 import org.apache.lucene.index.codecs.Codec;
 import org.apache.lucene.index.codecs.CodecProvider;
@@ -82,6 +83,62 @@ import java.util.concurrent.atomic.Atomi
 public abstract class IndexReader implements Cloneable,Closeable {
 
   /**
+   * A custom listener that's invoked when the IndexReader
+   * is finished.
+   *
+   * <p>For a SegmentReader, this listener is called only
+   * once all SegmentReaders sharing the same core are
+   * closed.  At this point it is safe for apps to evict
+   * this reader from any caches keyed on {@link
+   * #getCoreCacheKey}.  This is the same interface that
+   * {@link FieldCache} uses, internally, to evict
+   * entries.</p>
+   *
+   * <p>For other readers, this listener is called when they
+   * are closed.</p>
+   *
+   * @lucene.experimental
+   */
+  public static interface ReaderFinishedListener {
+    public void finished(IndexReader reader);
+  }
+
+  // Impls must set this if they may call add/removeReaderFinishedListener:
+  protected volatile Collection<ReaderFinishedListener> readerFinishedListeners;
+
+  /** Expert: adds a {@link ReaderFinishedListener}.  The
+   * provided listener is also added to any sub-readers, if
+   * this is a composite reader.  Also, any reader reopened
+   * or cloned from this one will also copy the listeners at
+   * the time of reopen.
+   *
+   * @lucene.experimental */
+  public void addReaderFinishedListener(ReaderFinishedListener listener) {
+    readerFinishedListeners.add(listener);
+  }
+
+  /** Expert: remove a previously added {@link ReaderFinishedListener}.
+   *
+   * @lucene.experimental */
+  public void removeReaderFinishedListener(ReaderFinishedListener listener) {
+    readerFinishedListeners.remove(listener);
+  }
+
+  protected void notifyReaderFinishedListeners() {
+    // Defensive (should never be null -- all impls must set
+    // this):
+    if (readerFinishedListeners != null) {
+      for(ReaderFinishedListener listener : readerFinishedListeners) {
+        listener.finished(this);
+      }
+    }
+  }
+
+  protected void readerFinished() {
+    notifyReaderFinishedListeners();
+  }
+
+  /**
    * Constants describing field properties, for example used for
    * {@link IndexReader#getFieldNames(FieldOption)}.
    */
@@ -195,6 +252,7 @@ public abstract class IndexReader implem
           refCount.incrementAndGet();
         }
       }
+      readerFinished();
     }
   }
   
@@ -238,24 +296,26 @@ public abstract class IndexReader implem
   /**
    * Open a near real time IndexReader from the {@link org.apache.lucene.index.IndexWriter}.
    *
-   *
    * @param writer The IndexWriter to open from
+   * @param applyAllDeletes If true, all buffered deletes will
+   * be applied (made visible) in the returned reader.  If
+   * false, the deletes are not applied but remain buffered
+   * (in IndexWriter) so that they will be applied in the
+   * future.  Applying deletes can be costly, so if your app
+   * can tolerate deleted documents being returned you might
+   * gain some performance by passing false.
    * @return The new IndexReader
    * @throws CorruptIndexException
    * @throws IOException if there is a low-level IO error
    *
-   * @see #reopen(IndexWriter)
+   * @see #reopen(IndexWriter,boolean)
    *
    * @lucene.experimental
    */
-  public static IndexReader open(final IndexWriter writer) throws CorruptIndexException, IOException {
-    return writer.getReader();
+  public static IndexReader open(final IndexWriter writer, boolean applyAllDeletes) throws CorruptIndexException, IOException {
+    return writer.getReader(applyAllDeletes);
   }
 
-  
-
-
-
   /** Expert: returns an IndexReader reading the index in the given
    *  {@link IndexCommit}.  You should pass readOnly=true, since it
    *  gives much better concurrent performance, unless you
@@ -358,7 +418,10 @@ public abstract class IndexReader implem
    *  memory.  By setting this to a value > 1 you can reduce
    *  memory usage, at the expense of higher latency when
    *  loading a TermInfo.  The default value is 1.  Set this
-   *  to -1 to skip loading the terms index entirely.
+   *  to -1 to skip loading the terms index entirely. This is only useful in 
+   *  advanced situations when you will only .next() through all terms; 
+   *  attempts to seek will hit an exception.
+   *  
    * @throws CorruptIndexException if the index is corrupt
    * @throws IOException if there is a low-level IO error
    */
@@ -557,18 +620,26 @@ public abstract class IndexReader implem
    * if you attempt to reopen any of those readers, you'll
    * hit an {@link AlreadyClosedException}.</p>
    *
-   * @lucene.experimental
-   *
    * @return IndexReader that covers entire index plus all
    * changes made so far by this IndexWriter instance
    *
+   * @param writer The IndexWriter to open from
+   * @param applyAllDeletes If true, all buffered deletes will
+   * be applied (made visible) in the returned reader.  If
+   * false, the deletes are not applied but remain buffered
+   * (in IndexWriter) so that they will be applied in the
+   * future.  Applying deletes can be costly, so if your app
+   * can tolerate deleted documents being returned you might
+   * gain some performance by passing false.
+   *
    * @throws IOException
+   *
+   * @lucene.experimental
    */
-  public IndexReader reopen(IndexWriter writer) throws CorruptIndexException, IOException {
-    return writer.getReader();
+  public IndexReader reopen(IndexWriter writer, boolean applyAllDeletes) throws CorruptIndexException, IOException {
+    return writer.getReader(applyAllDeletes);
   }
 
-
   /**
    * Efficiently clones the IndexReader (sharing most
    * internal state).
@@ -933,8 +1004,8 @@ public abstract class IndexReader implem
 
   /** Expert: Resets the normalization factor for the named field of the named
    * document.  The norm represents the product of the field's {@link
-   * org.apache.lucene.document.Fieldable#setBoost(float) boost} and its {@link Similarity#lengthNorm(String,
-   * int) length normalization}.  Thus, to preserve the length normalization
+   * org.apache.lucene.document.Fieldable#setBoost(float) boost} and its
+   * length normalization}.  Thus, to preserve the length normalization
    * values when resetting this, one should base the new value upon the old.
    *
    * <b>NOTE:</b> If this field does not store norms, then
@@ -1163,7 +1234,16 @@ public abstract class IndexReader implem
     return n;
   }
 
-  /** Undeletes all documents currently marked as deleted in this index.
+  /** Undeletes all documents currently marked as deleted in
+   * this index.
+   *
+   * <p>NOTE: this method can only recover documents marked
+   * for deletion but not yet removed from the index; when
+   * and how Lucene removes deleted documents is an
+   * implementation detail, subject to change from release
+   * to release.  However, you can use {@link
+   * #numDeletedDocs} on the current IndexReader instance to
+   * see how many documents will be un-deleted.
    *
    * @throws StaleReaderException if the index has changed
    *  since this reader was opened

Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/IndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/IndexWriter.java?rev=1073192&r1=1073191&r2=1073192&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/IndexWriter.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/IndexWriter.java Tue Feb 22 01:00:39 2011
@@ -31,22 +31,24 @@ import java.util.List;
 import java.util.Map;
 import java.util.Set;
 import java.util.concurrent.atomic.AtomicInteger;
+import java.util.concurrent.ConcurrentHashMap;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
 import org.apache.lucene.index.PayloadProcessorProvider.DirPayloadProcessor;
 import org.apache.lucene.index.codecs.CodecProvider;
-import org.apache.lucene.index.codecs.DefaultSegmentInfosWriter;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.store.AlreadyClosedException;
 import org.apache.lucene.store.BufferedIndexInput;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.Lock;
 import org.apache.lucene.store.LockObtainFailedException;
+import org.apache.lucene.util.BitVector;
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.Constants;
 import org.apache.lucene.util.ThreadInterruptedException;
+import org.apache.lucene.util.MapBackedSet;
 
 /**
   An <code>IndexWriter</code> creates and maintains an index.
@@ -214,12 +216,12 @@ public class IndexWriter implements Clos
   private long lastCommitChangeCount; // last changeCount that was committed
 
   private SegmentInfos rollbackSegmentInfos;      // segmentInfos we will fallback to if the commit fails
-  private HashMap<SegmentInfo,Integer> rollbackSegments;
 
   volatile SegmentInfos pendingCommit;            // set when a commit is pending (after prepareCommit() & before commit())
   volatile long pendingCommitChangeCount;
 
   final SegmentInfos segmentInfos;       // the segments
+  final FieldInfos fieldInfos;
 
   private DocumentsWriter docWriter;
   final IndexFileDeleter deleter;
@@ -250,7 +252,7 @@ public class IndexWriter implements Clos
   private final AtomicInteger flushDeletesCount = new AtomicInteger();
 
   final ReaderPool readerPool = new ReaderPool();
-  final BufferedDeletes bufferedDeletes;
+  final BufferedDeletesStream bufferedDeletesStream;
 
   // This is a "write once" variable (like the organic dye
   // on a DVD-R that may or may not be heated by a laser and
@@ -273,6 +275,10 @@ public class IndexWriter implements Clos
   // for testing
   boolean anyNonBulkMerges;
 
+  IndexReader getReader() throws IOException {
+    return getReader(true);
+  }
+
   /**
    * Expert: returns a readonly reader, covering all
    * committed as well as un-committed changes to the index.
@@ -332,7 +338,7 @@ public class IndexWriter implements Clos
    *
    * @throws IOException
    */
-  IndexReader getReader() throws IOException {
+  IndexReader getReader(boolean applyAllDeletes) throws IOException {
     ensureOpen();
 
     final long tStart = System.currentTimeMillis();
@@ -351,8 +357,8 @@ public class IndexWriter implements Clos
     // just like we do when loading segments_N
     IndexReader r;
     synchronized(this) {
-      flush(false, true);
-      r = new DirectoryReader(this, segmentInfos, config.getReaderTermsIndexDivisor(), codecs);
+      flush(false, applyAllDeletes);
+      r = new DirectoryReader(this, segmentInfos, config.getReaderTermsIndexDivisor(), codecs, applyAllDeletes);
       if (infoStream != null) {
         message("return reader version=" + r.getVersion() + " reader=" + r);
       }
@@ -365,6 +371,13 @@ public class IndexWriter implements Clos
     return r;
   }
 
+  // Used for all SegmentReaders we open
+  private final Collection<IndexReader.ReaderFinishedListener> readerFinishedListeners = new MapBackedSet<IndexReader.ReaderFinishedListener>(new ConcurrentHashMap<IndexReader.ReaderFinishedListener,Boolean>());
+
+  Collection<IndexReader.ReaderFinishedListener> getReaderFinishedListeners() throws IOException {
+    return readerFinishedListeners;
+  }
+
   /** Holds shared SegmentReader instances. IndexWriter uses
    *  SegmentReaders for 1) applying deletes, 2) doing
    *  merges, 3) handing out a real-time reader.  This pool
@@ -376,8 +389,7 @@ public class IndexWriter implements Clos
 
     private final Map<SegmentInfo,SegmentReader> readerMap = new HashMap<SegmentInfo,SegmentReader>();
 
-    /** Forcefully clear changes for the specified segments,
-     *  and remove from the pool.   This is called on successful merge. */
+    /** Forcefully clear changes for the specified segments.  This is called on successful merge. */
     synchronized void clear(SegmentInfos infos) throws IOException {
       if (infos == null) {
         for (Map.Entry<SegmentInfo,SegmentReader> ent: readerMap.entrySet()) {
@@ -385,8 +397,9 @@ public class IndexWriter implements Clos
         }
       } else {
         for (final SegmentInfo info: infos) {
-          if (readerMap.containsKey(info)) {
-            readerMap.get(info).hasChanges = false;
+          final SegmentReader r = readerMap.get(info);
+          if (r != null) {
+            r.hasChanges = false;
           }
         }
       }
@@ -395,8 +408,8 @@ public class IndexWriter implements Clos
     // used only by asserts
     public synchronized boolean infoIsLive(SegmentInfo info) {
       int idx = segmentInfos.indexOf(info);
-      assert idx != -1;
-      assert segmentInfos.get(idx) == info;
+      assert idx != -1: "info=" + info + " isn't in pool";
+      assert segmentInfos.get(idx) == info: "info=" + info + " doesn't match live info in segmentInfos";
       return true;
     }
 
@@ -467,6 +480,21 @@ public class IndexWriter implements Clos
       return false;
     }
 
+    public synchronized void drop(SegmentInfos infos) throws IOException {
+      for(SegmentInfo info : infos) {
+        drop(info);
+      }
+    }
+
+    public synchronized void drop(SegmentInfo info) throws IOException {
+      final SegmentReader sr = readerMap.get(info);
+      if (sr != null) {
+        sr.hasChanges = false;
+        readerMap.remove(info);
+        sr.close();
+      }
+    }
+
     /** Remove all our references to readers, and commits
      *  any pending changes. */
     synchronized void close() throws IOException {
@@ -504,19 +532,18 @@ public class IndexWriter implements Clos
      * Commit all segment reader in the pool.
      * @throws IOException
      */
-    synchronized void commit() throws IOException {
+    synchronized void commit(SegmentInfos infos) throws IOException {
 
       // We invoke deleter.checkpoint below, so we must be
       // sync'd on IW:
       assert Thread.holdsLock(IndexWriter.this);
 
-      for (Map.Entry<SegmentInfo,SegmentReader> ent : readerMap.entrySet()) {
+      for (SegmentInfo info : infos) {
 
-        SegmentReader sr = ent.getValue();
-        if (sr.hasChanges) {
-          assert infoIsLive(sr.getSegmentInfo());
+        final SegmentReader sr = readerMap.get(info);
+        if (sr != null && sr.hasChanges) {
+          assert infoIsLive(info);
           sr.doCommit(null);
-
           // Must checkpoint w/ deleter, because this
           // segment reader will have created new _X_N.del
           // file.
@@ -574,6 +601,7 @@ public class IndexWriter implements Clos
         // synchronized
         // Returns a ref, which we xfer to readerMap:
         sr = SegmentReader.get(false, info.dir, info, readBufferSize, doOpenStores, termsIndexDivisor);
+        sr.readerFinishedListeners = readerFinishedListeners;
 
         if (info.dir == directory) {
           // Only pool if reader is not external
@@ -676,7 +704,7 @@ public class IndexWriter implements Clos
    *          according <code>conf.getOpenMode()</code>.
    * @param conf
    *          the configuration settings according to which IndexWriter should
-   *          be initalized.
+   *          be initialized.
    * @throws CorruptIndexException
    *           if the index is corrupt
    * @throws LockObtainFailedException
@@ -701,8 +729,8 @@ public class IndexWriter implements Clos
     mergedSegmentWarmer = conf.getMergedSegmentWarmer();
     codecs = conf.getCodecProvider();
 
-    bufferedDeletes = new BufferedDeletes(messageID);
-    bufferedDeletes.setInfoStream(infoStream);
+    bufferedDeletesStream = new BufferedDeletesStream(messageID);
+    bufferedDeletesStream.setInfoStream(infoStream);
     poolReaders = conf.getReaderPooling();
 
     OpenMode mode = conf.getOpenMode();
@@ -767,7 +795,10 @@ public class IndexWriter implements Clos
 
       setRollbackSegmentInfos(segmentInfos);
 
-      docWriter = new DocumentsWriter(directory, this, conf.getIndexingChain(), conf.getIndexerThreadPool(), getCurrentFieldInfos(), bufferedDeletes);
+      // start with previous field numbers, but new FieldInfos
+      fieldInfos = getCurrentFieldInfos();
+      docWriter = new DocumentsWriter(directory, this, conf.getIndexingChain(), conf.getIndexerThreadPool(),
+          fieldInfos.newFieldInfosWithGlobalFieldNumberMap(), bufferedDeletesStream);
       docWriter.setInfoStream(infoStream);
 
       // Default deleter (for backwards compatibility) is
@@ -830,23 +861,14 @@ public class IndexWriter implements Clos
   private FieldInfos getCurrentFieldInfos() throws IOException {
     final FieldInfos fieldInfos;
     if (segmentInfos.size() > 0) {
-      if (segmentInfos.getFormat() > DefaultSegmentInfosWriter.FORMAT_HAS_VECTORS) {
-        // Pre-4.0 index.  In this case we sweep all
-        // segments, merging their FieldInfos:
         fieldInfos = new FieldInfos();
         for(SegmentInfo info : segmentInfos) {
           final FieldInfos segFieldInfos = getFieldInfos(info);
-          final int fieldCount = segFieldInfos.size();
-          for(int fieldNumber=0;fieldNumber<fieldCount;fieldNumber++) {
-            fieldInfos.add(segFieldInfos.fieldInfo(fieldNumber));
+          for (FieldInfo fi : segFieldInfos) {
+            fieldInfos.add(fi);
           }
         }
       } else {
-        // Already a 4.0 index; just seed the FieldInfos
-        // from the last segment
-        fieldInfos = getFieldInfos(segmentInfos.info(segmentInfos.size()-1));
-      }
-    } else {
       fieldInfos = new FieldInfos();
     }
     return fieldInfos;
@@ -854,10 +876,6 @@ public class IndexWriter implements Clos
 
   private synchronized void setRollbackSegmentInfos(SegmentInfos infos) {
     rollbackSegmentInfos = (SegmentInfos) infos.clone();
-    rollbackSegments = new HashMap<SegmentInfo,Integer>();
-    final int size = rollbackSegmentInfos.size();
-    for(int i=0;i<size;i++)
-      rollbackSegments.put(rollbackSegmentInfos.info(i), Integer.valueOf(i));
   }
 
   /**
@@ -919,7 +937,7 @@ public class IndexWriter implements Clos
     this.infoStream = infoStream;
     docWriter.setInfoStream(infoStream);
     deleter.setInfoStream(infoStream);
-    bufferedDeletes.setInfoStream(infoStream);
+    bufferedDeletesStream.setInfoStream(infoStream);
     if (infoStream != null)
       messageState();
   }
@@ -1165,7 +1183,7 @@ public class IndexWriter implements Clos
 
   public synchronized boolean hasDeletions() throws IOException {
     ensureOpen();
-    if (bufferedDeletes.any()) {
+    if (bufferedDeletesStream.any()) {
       return true;
     }
     if (docWriter.anyDeletions()) {
@@ -1504,6 +1522,11 @@ public class IndexWriter implements Clos
    * you should immediately close the writer.  See <a
    * href="#OOME">above</a> for details.</p>
    *
+   * <p><b>NOTE</b>: if you call {@link #close(boolean)}
+   * with <tt>false</tt>, which aborts all running merges,
+   * then any thread still running this method might hit a
+   * {@link MergePolicy.MergeAbortedException}.
+   *
    * @throws CorruptIndexException if the index is corrupt
    * @throws IOException if there is a low-level IO error
    * @see MergePolicy#findMergesForOptimize
@@ -1653,6 +1676,11 @@ public class IndexWriter implements Clos
    * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
    * you should immediately close the writer.  See <a
    * href="#OOME">above</a> for details.</p>
+   *
+   * <p><b>NOTE</b>: if you call {@link #close(boolean)}
+   * with <tt>false</tt>, which aborts all running merges,
+   * then any thread still running this method might hit a
+   * {@link MergePolicy.MergeAbortedException}.
    */
   public void expungeDeletes(boolean doWait)
     throws CorruptIndexException, IOException {
@@ -1803,6 +1831,18 @@ public class IndexWriter implements Clos
     }
   }
 
+  /** Expert: to be used by a {@link MergePolicy} to avoid
+   *  selecting merges for segments already being merged.
+   *  The returned collection is not cloned, and thus is
+   *  only safe to access if you hold IndexWriter's lock
+   *  (which you do when IndexWriter invokes the
+   *  MergePolicy).
+   *
+   *  <p>Do not alter the returned collection! */
+  public synchronized Collection<SegmentInfo> getMergingSegments() {
+    return mergingSegments;
+  }
+
   /** Expert: the {@link MergeScheduler} calls this method
    *  to retrieve the next merge requested by the
    *  MergePolicy */
@@ -1860,9 +1900,9 @@ public class IndexWriter implements Clos
       mergePolicy.close();
       mergeScheduler.close();
 
-      synchronized(this) {
+      bufferedDeletesStream.clear();
 
-        bufferedDeletes.clear();
+      synchronized(this) {
 
         if (pendingCommit != null) {
           pendingCommit.rollbackCommit(directory);
@@ -1923,8 +1963,9 @@ public class IndexWriter implements Clos
    *
    * <p>NOTE: this method will forcefully abort all merges
    *    in progress.  If other threads are running {@link
-   *    #optimize()} or any of the addIndexes methods, they
-   *    will receive {@link MergePolicy.MergeAbortedException}s.
+   *    #optimize()}, {@link #addIndexes(IndexReader[])} or
+   *    {@link #expungeDeletes} methods, they may receive
+   *    {@link MergePolicy.MergeAbortedException}s.
    */
   public synchronized void deleteAll() throws IOException {
     try {
@@ -2039,7 +2080,7 @@ public class IndexWriter implements Clos
     deleter.checkpoint(segmentInfos, false);
   }
 
-  void addFlushedSegment(SegmentInfo newSegment) throws IOException {
+  void addFlushedSegment(SegmentInfo newSegment, BitVector deletedDocs) throws IOException {
     assert newSegment != null;
 
     setDiagnostics(newSegment, "flush");
@@ -2063,6 +2104,38 @@ public class IndexWriter implements Clos
 
         newSegment.setUseCompoundFile(true);
 
+        // Must write deleted docs after the CFS so we don't
+        // slurp the del file into CFS:
+        if (deletedDocs != null) {
+          final int delCount = deletedDocs.count();
+          assert delCount > 0;
+          newSegment.setDelCount(delCount);
+          newSegment.advanceDelGen();
+          final String delFileName = newSegment.getDelFileName();
+          if (infoStream != null) {
+            message("flush: write " + delCount + " deletes to " + delFileName);
+          }
+          boolean success2 = false;
+          try {
+            // TODO: in the NRT case it'd be better to hand
+            // this del vector over to the
+            // shortly-to-be-opened SegmentReader and let it
+            // carry the changes; there's no reason to use
+            // filesystem as intermediary here.
+            deletedDocs.write(directory, delFileName);
+            success2 = true;
+          } finally {
+            if (!success2) {
+              try {
+                directory.deleteFile(delFileName);
+              } catch (Throwable t) {
+                // suppress this so we keep throwing the
+                // original exception
+              }
+            }
+          }
+        }
+
         success = true;
       } finally {
         if (!success) {
@@ -2264,10 +2337,13 @@ public class IndexWriter implements Clos
    * close the writer. See <a href="#OOME">above</a> for details.
    * </p>
    *
-   * @throws CorruptIndexException
-   *           if the index is corrupt
-   * @throws IOException
-   *           if there is a low-level IO error
+   * <p><b>NOTE</b>: if you call {@link #close(boolean)}
+   * with <tt>false</tt>, which aborts all running merges,
+   * then any thread still running this method might hit a
+   * {@link MergePolicy.MergeAbortedException}.
+   *
+   * @throws CorruptIndexException if the index is corrupt
+   * @throws IOException if there is a low-level IO error
    */
   public void addIndexes(IndexReader... readers) throws CorruptIndexException, IOException {
     ensureOpen();
@@ -2276,7 +2352,7 @@ public class IndexWriter implements Clos
       String mergedName = newSegmentName();
       SegmentMerger merger = new SegmentMerger(directory, termIndexInterval,
                                                mergedName, null, codecs, payloadProcessorProvider,
-                                               ((FieldInfos) docWriter.getFieldInfos().clone()));
+                                               fieldInfos.newFieldInfosWithGlobalFieldNumberMap());
 
       for (IndexReader reader : readers)      // add new indexes
         merger.add(reader);
@@ -2284,8 +2360,8 @@ public class IndexWriter implements Clos
       int docCount = merger.merge();                // merge 'em
 
       SegmentInfo info = new SegmentInfo(mergedName, docCount, directory,
-                                         false, merger.fieldInfos().hasProx(), merger.getSegmentCodecs(),
-                                         merger.fieldInfos().hasVectors());
+                                         false, merger.getSegmentCodecs(),
+                                         merger.fieldInfos());
       setDiagnostics(info, "addIndexes(IndexReader...)");
 
       boolean useCompoundFile;
@@ -2493,13 +2569,13 @@ public class IndexWriter implements Clos
   }
 
   /**
-   * Flush all in-memory buffered udpates (adds and deletes)
+   * Flush all in-memory buffered updates (adds and deletes)
    * to the Directory.
    * @param triggerMerge if true, we may merge segments (if
    *  deletes or docs were flushed) if necessary
-   * @param flushDeletes whether pending deletes should also
+   * @param applyAllDeletes whether pending deletes should also
    */
-  protected final void flush(boolean triggerMerge, boolean flushDeletes) throws CorruptIndexException, IOException {
+  protected final void flush(boolean triggerMerge, boolean applyAllDeletes) throws CorruptIndexException, IOException {
 
     // NOTE: this method cannot be sync'd because
     // maybeMerge() in turn calls mergeScheduler.merge which
@@ -2510,7 +2586,7 @@ public class IndexWriter implements Clos
 
     // We can be called during close, when closing==true, so we must pass false to ensureOpen:
     ensureOpen(false);
-    if (doFlush(flushDeletes) && triggerMerge) {
+    if (doFlush(applyAllDeletes) && triggerMerge) {
       maybeMerge();
     }
   }
@@ -2554,10 +2630,10 @@ public class IndexWriter implements Clos
           // tiny segments:
           if (flushControl.getFlushDeletes() ||
               (config.getRAMBufferSizeMB() != IndexWriterConfig.DISABLE_AUTO_FLUSH &&
-               bufferedDeletes.bytesUsed() > (1024*1024*config.getRAMBufferSizeMB()/2))) {
+               bufferedDeletesStream.bytesUsed() > (1024*1024*config.getRAMBufferSizeMB()/2))) {
             applyAllDeletes = true;
             if (infoStream != null) {
-              message("force apply deletes bytesUsed=" + bufferedDeletes.bytesUsed() + " vs ramBuffer=" + (1024*1024*config.getRAMBufferSizeMB()));
+              message("force apply deletes bytesUsed=" + bufferedDeletesStream.bytesUsed() + " vs ramBuffer=" + (1024*1024*config.getRAMBufferSizeMB()));
             }
           }
         }
@@ -2567,12 +2643,34 @@ public class IndexWriter implements Clos
             message("apply all deletes during flush");
           }
           flushDeletesCount.incrementAndGet();
-          if (bufferedDeletes.applyDeletes(readerPool, segmentInfos, segmentInfos)) {
+          final BufferedDeletesStream.ApplyDeletesResult result = bufferedDeletesStream.applyDeletes(readerPool, segmentInfos);
+          if (result.anyDeletes) {
             checkpoint();
           }
+          if (!keepFullyDeletedSegments && result.allDeleted != null) {
+            if (infoStream != null) {
+              message("drop 100% deleted segments: " + result.allDeleted);
+            }
+            for(SegmentInfo info : result.allDeleted) {
+              // If a merge has already registered for this
+              // segment, we leave it in the readerPool; the
+              // merge will skip merging it and will then drop
+              // it once it's done:
+              if (!mergingSegments.contains(info)) {
+                segmentInfos.remove(info);
+                if (readerPool != null) {
+                  readerPool.drop(info);
+                }
+              }
+            }
+            checkpoint();
+          }
+          bufferedDeletesStream.prune(segmentInfos);
+          assert !bufferedDeletesStream.any();
+
           flushControl.clearDeletes();
         } else if (infoStream != null) {
-          message("don't apply deletes now delTermCount=" + bufferedDeletes.numTerms() + " bytesUsed=" + bufferedDeletes.bytesUsed());
+          message("don't apply deletes now delTermCount=" + bufferedDeletesStream.numTerms() + " bytesUsed=" + bufferedDeletesStream.bytesUsed());
         }
 
         doAfterFlush();
@@ -2600,7 +2698,7 @@ public class IndexWriter implements Clos
   public final long ramSizeInBytes() {
     ensureOpen();
     // nocommit
-    //return docWriter.bytesUsed() + bufferedDeletes.bytesUsed();
+    //return docWriter.bytesUsed() + bufferedDeletesStream.bytesUsed();
     return 0;
   }
 
@@ -2611,28 +2709,12 @@ public class IndexWriter implements Clos
     return docWriter.getNumDocs();
   }
 
-  private int ensureContiguousMerge(MergePolicy.OneMerge merge) {
-
-    int first = segmentInfos.indexOf(merge.segments.info(0));
-    if (first == -1)
-      throw new MergePolicy.MergeException("could not find segment " + merge.segments.info(0).name + " in current index " + segString(), directory);
-
-    final int numSegments = segmentInfos.size();
-
-    final int numSegmentsToMerge = merge.segments.size();
-    for(int i=0;i<numSegmentsToMerge;i++) {
-      final SegmentInfo info = merge.segments.info(i);
-
-      if (first + i >= numSegments || !segmentInfos.info(first+i).equals(info)) {
-        if (segmentInfos.indexOf(info) == -1)
-          throw new MergePolicy.MergeException("MergePolicy selected a segment (" + info.name + ") that is not in the current index " + segString(), directory);
-        else
-          throw new MergePolicy.MergeException("MergePolicy selected non-contiguous segments to merge (" + merge.segString(directory) + " vs " + segString() + "), which IndexWriter (currently) cannot handle",
-                                               directory);
+  private void ensureValidMerge(MergePolicy.OneMerge merge) {
+    for(SegmentInfo info : merge.segments) {
+      if (segmentInfos.indexOf(info) == -1) {
+        throw new MergePolicy.MergeException("MergePolicy selected a segment (" + info.name + ") that is not in the current index " + segString(), directory);
       }
     }
-
-    return first;
   }
 
   /** Carefully merges deletes for the segments we just
@@ -2657,13 +2739,19 @@ public class IndexWriter implements Clos
     // started merging:
     int docUpto = 0;
     int delCount = 0;
+    long minGen = Long.MAX_VALUE;
 
     for(int i=0; i < sourceSegments.size(); i++) {
       SegmentInfo info = sourceSegments.info(i);
+      minGen = Math.min(info.getBufferedDeletesGen(), minGen);
       int docCount = info.docCount;
-      SegmentReader previousReader = merge.readersClone[i];
+      final SegmentReader previousReader = merge.readerClones.get(i);
+      if (previousReader == null) {
+        // Reader was skipped because it was 100% deletions
+        continue;
+      }
       final Bits prevDelDocs = previousReader.getDeletedDocs();
-      SegmentReader currentReader = merge.readers[i];
+      final SegmentReader currentReader = merge.readers.get(i);
       final Bits currentDelDocs = currentReader.getDeletedDocs();
       if (previousReader.hasDeletions()) {
 
@@ -2710,9 +2798,17 @@ public class IndexWriter implements Clos
     assert mergedReader.numDeletedDocs() == delCount;
 
     mergedReader.hasChanges = delCount > 0;
+
+    // If new deletes were applied while we were merging
+    // (which happens if eg commit() or getReader() is
+    // called during our merge), then it better be the case
+    // that the delGen has increased for all our merged
+    // segments:
+    assert !mergedReader.hasChanges || minGen > mergedReader.getSegmentInfo().getBufferedDeletesGen();
+
+    mergedReader.getSegmentInfo().setBufferedDeletesGen(minGen);
   }
 
-  /* FIXME if we want to support non-contiguous segment merges */
   synchronized private boolean commitMerge(MergePolicy.OneMerge merge, SegmentReader mergedReader) throws IOException {
 
     assert testPoint("startCommitMerge");
@@ -2738,19 +2834,57 @@ public class IndexWriter implements Clos
       return false;
     }
 
-    final int start = ensureContiguousMerge(merge);
-
     commitMergedDeletes(merge, mergedReader);
 
     // If the doc store we are using has been closed and
     // is in now compound format (but wasn't when we
     // started), then we will switch to the compound
     // format as well:
-    setMergeDocStoreIsCompoundFile(merge);
 
-    segmentInfos.subList(start, start + merge.segments.size()).clear();
     assert !segmentInfos.contains(merge.info);
-    segmentInfos.add(start, merge.info);
+
+    final boolean allDeleted = mergedReader.numDocs() == 0;
+
+    if (infoStream != null && allDeleted) {
+      message("merged segment " + merge.info + " is 100% deleted" +  (keepFullyDeletedSegments ? "" : "; skipping insert"));
+    }
+
+    final Set mergedAway = new HashSet<SegmentInfo>(merge.segments);
+    int segIdx = 0;
+    int newSegIdx = 0;
+    boolean inserted = false;
+    final int curSegCount = segmentInfos.size();
+    while(segIdx < curSegCount) {
+      final SegmentInfo info = segmentInfos.info(segIdx++);
+      if (mergedAway.contains(info)) {
+        if (!inserted && (!allDeleted || keepFullyDeletedSegments)) {
+          segmentInfos.set(segIdx-1, merge.info);
+          inserted = true;
+          newSegIdx++;
+        }
+      } else {
+        segmentInfos.set(newSegIdx++, info);
+      }
+    }
+
+    // Either we found place to insert segment, or, we did
+    // not, but only because all segments we merged became
+    // deleted while we are merging, in which case it should
+    // be the case that the new segment is also all deleted:
+    if (!inserted) {
+      assert allDeleted;
+      if (keepFullyDeletedSegments) {
+        segmentInfos.add(0, merge.info);
+      } else {
+        readerPool.drop(merge.info);
+      }
+    }
+
+    segmentInfos.subList(newSegIdx, segmentInfos.size()).clear();
+
+    if (infoStream != null) {
+      message("after commit: " + segString());
+    }
 
     closeMergeReaders(merge, false);
 
@@ -2763,16 +2897,11 @@ public class IndexWriter implements Clos
     // disk, updating SegmentInfo, etc.:
     readerPool.clear(merge.segments);
 
-    // remove pending deletes of the segments
-    // that were merged, moving them onto the segment just
-    // before the merged segment
-    // Lock order: IW -> BD
-    bufferedDeletes.commitMerge(merge);
-
     if (merge.optimize) {
       // cascade the optimize:
       segmentsToOptimize.add(merge.info);
     }
+
     return true;
   }
 
@@ -2900,7 +3029,7 @@ public class IndexWriter implements Clos
       }
     }
 
-    ensureContiguousMerge(merge);
+    ensureValidMerge(merge);
 
     pendingMerges.add(merge);
 
@@ -2914,8 +3043,9 @@ public class IndexWriter implements Clos
     // is running (while synchronized) to avoid race
     // condition where two conflicting merges from different
     // threads, start
-    for(int i=0;i<count;i++)
+    for(int i=0;i<count;i++) {
       mergingSegments.add(merge.segments.info(i));
+    }
 
     // Merge is now registered
     merge.registerDone = true;
@@ -2927,10 +3057,6 @@ public class IndexWriter implements Clos
   final synchronized void mergeInit(MergePolicy.OneMerge merge) throws IOException {
     boolean success = false;
     try {
-      // Lock order: IW -> BD
-      if (bufferedDeletes.applyDeletes(readerPool, segmentInfos, merge.segments)) {
-        checkpoint();
-      }
       _mergeInit(merge);
       success = true;
     } finally {
@@ -2954,6 +3080,9 @@ public class IndexWriter implements Clos
       throw new IllegalStateException("this writer hit an OutOfMemoryError; cannot merge");
     }
 
+    // TODO: is there any perf benefit to sorting
+    // merged segments?  eg biggest to smallest?
+
     if (merge.info != null)
       // mergeInit already done
       return;
@@ -2964,7 +3093,36 @@ public class IndexWriter implements Clos
     // Bind a new segment name here so even with
     // ConcurrentMergePolicy we keep deterministic segment
     // names.
-    merge.info = new SegmentInfo(newSegmentName(), 0, directory, false, false, null, false);
+    merge.info = new SegmentInfo(newSegmentName(), 0, directory, false, null, fieldInfos.newFieldInfosWithGlobalFieldNumberMap());
+
+    // Lock order: IW -> BD
+    final BufferedDeletesStream.ApplyDeletesResult result = bufferedDeletesStream.applyDeletes(readerPool, merge.segments);
+
+    if (result.anyDeletes) {
+      checkpoint();
+    }
+
+    if (!keepFullyDeletedSegments && result.allDeleted != null) {
+      if (infoStream != null) {
+        message("drop 100% deleted segments: " + result.allDeleted);
+      }
+      for(SegmentInfo info : result.allDeleted) {
+        segmentInfos.remove(info);
+        if (merge.segments.contains(info)) {
+          mergingSegments.remove(info);
+          merge.segments.remove(info);
+        }
+      }
+      if (readerPool != null) {
+        readerPool.drop(result.allDeleted);
+      }
+      checkpoint();
+    }
+
+    merge.info.setBufferedDeletesGen(result.gen);
+
+    // Lock order: IW -> BD
+    bufferedDeletesStream.prune(segmentInfos);
 
     Map<String,String> details = new HashMap<String,String>();
     details.put("optimize", Boolean.toString(merge.optimize));
@@ -3014,8 +3172,9 @@ public class IndexWriter implements Clos
     if (merge.registerDone) {
       final SegmentInfos sourceSegments = merge.segments;
       final int end = sourceSegments.size();
-      for(int i=0;i<end;i++)
+      for(int i=0;i<end;i++) {
         mergingSegments.remove(sourceSegments.info(i));
+      }
       mergingSegments.remove(merge.info);
       merge.registerDone = false;
     }
@@ -3023,47 +3182,30 @@ public class IndexWriter implements Clos
     runningMerges.remove(merge);
   }
 
-  private synchronized void setMergeDocStoreIsCompoundFile(MergePolicy.OneMerge merge) {
-    final String mergeDocStoreSegment = merge.info.getDocStoreSegment();
-    if (mergeDocStoreSegment != null && !merge.info.getDocStoreIsCompoundFile()) {
-      final int size = segmentInfos.size();
-      for(int i=0;i<size;i++) {
-        final SegmentInfo info = segmentInfos.info(i);
-        final String docStoreSegment = info.getDocStoreSegment();
-        if (docStoreSegment != null &&
-            docStoreSegment.equals(mergeDocStoreSegment) &&
-            info.getDocStoreIsCompoundFile()) {
-          merge.info.setDocStoreIsCompoundFile(true);
-          break;
-        }
-      }
-    }
-  }
-
   private synchronized void closeMergeReaders(MergePolicy.OneMerge merge, boolean suppressExceptions) throws IOException {
-    final int numSegments = merge.segments.size();
+    final int numSegments = merge.readers.size();
     if (suppressExceptions) {
       // Suppress any new exceptions so we throw the
       // original cause
       boolean anyChanges = false;
       for (int i=0;i<numSegments;i++) {
-        if (merge.readers[i] != null) {
+        if (merge.readers.get(i) != null) {
           try {
-            anyChanges |= readerPool.release(merge.readers[i], false);
+            anyChanges |= readerPool.release(merge.readers.get(i), false);
           } catch (Throwable t) {
           }
-          merge.readers[i] = null;
+          merge.readers.set(i, null);
         }
 
-        if (merge.readersClone[i] != null) {
+        if (i < merge.readerClones.size() && merge.readerClones.get(i) != null) {
           try {
-            merge.readersClone[i].close();
+            merge.readerClones.get(i).close();
           } catch (Throwable t) {
           }
           // This was a private clone and we had the
           // only reference
-          assert merge.readersClone[i].getRefCount() == 0: "refCount should be 0 but is " + merge.readersClone[i].getRefCount();
-          merge.readersClone[i] = null;
+          assert merge.readerClones.get(i).getRefCount() == 0: "refCount should be 0 but is " + merge.readerClones.get(i).getRefCount();
+          merge.readerClones.set(i, null);
         }
       }
       if (anyChanges) {
@@ -3071,16 +3213,16 @@ public class IndexWriter implements Clos
       }
     } else {
       for (int i=0;i<numSegments;i++) {
-        if (merge.readers[i] != null) {
-          readerPool.release(merge.readers[i], true);
-          merge.readers[i] = null;
+        if (merge.readers.get(i) != null) {
+          readerPool.release(merge.readers.get(i), true);
+          merge.readers.set(i, null);
         }
 
-        if (merge.readersClone[i] != null) {
-          merge.readersClone[i].close();
+        if (i < merge.readerClones.size() && merge.readerClones.get(i) != null) {
+          merge.readerClones.get(i).close();
           // This was a private clone and we had the only reference
-          assert merge.readersClone[i].getRefCount() == 0;
-          merge.readersClone[i] = null;
+          assert merge.readerClones.get(i).getRefCount() == 0;
+          merge.readerClones.set(i, null);
         }
       }
     }
@@ -3099,46 +3241,53 @@ public class IndexWriter implements Clos
     int mergedDocCount = 0;
 
     SegmentInfos sourceSegments = merge.segments;
-    final int numSegments = sourceSegments.size();
 
     SegmentMerger merger = new SegmentMerger(directory, termIndexInterval, mergedName, merge,
                                              codecs, payloadProcessorProvider,
-                                             ((FieldInfos) docWriter.getFieldInfos().clone()));
+                                             merge.info.getFieldInfos());
 
     if (infoStream != null) {
       message("merging " + merge.segString(directory) + " mergeVectors=" + merger.fieldInfos().hasVectors());
     }
 
-    merge.info.setHasVectors(merger.fieldInfos().hasVectors());
-    merge.readers = new SegmentReader[numSegments];
-    merge.readersClone = new SegmentReader[numSegments];
+    merge.readers = new ArrayList<SegmentReader>();
+    merge.readerClones = new ArrayList<SegmentReader>();
+
+    merge.info.clearFilesCache();
+
 
     // This is try/finally to make sure merger's readers are
     // closed:
     boolean success = false;
     try {
       int totDocCount = 0;
+      int segUpto = 0;
+      while(segUpto < sourceSegments.size()) {
 
-      for (int i = 0; i < numSegments; i++) {
-        final SegmentInfo info = sourceSegments.info(i);
+        final SegmentInfo info = sourceSegments.info(segUpto);
 
         // Hold onto the "live" reader; we will use this to
         // commit merged deletes
-        SegmentReader reader = merge.readers[i] = readerPool.get(info, true,
-                                                                 MERGE_READ_BUFFER_SIZE,
-                                                                 -config.getReaderTermsIndexDivisor());
+        final SegmentReader reader = readerPool.get(info, true,
+                                                    MERGE_READ_BUFFER_SIZE,
+                                                    -config.getReaderTermsIndexDivisor());
+        merge.readers.add(reader);
 
         // We clone the segment readers because other
         // deletes may come in while we're merging so we
         // need readers that will not change
-        SegmentReader clone = merge.readersClone[i] = (SegmentReader) reader.clone(true);
-        merger.add(clone);
+        final SegmentReader clone = (SegmentReader) reader.clone(true);
+        merge.readerClones.add(clone);
 
+        if (reader.numDocs() > 0) {
+          merger.add(clone);
+        }
         totDocCount += clone.numDocs();
+        segUpto++;
       }
 
       if (infoStream != null) {
-        message("merge: total "+totDocCount+" docs");
+        message("merge: total " + totDocCount + " docs");
       }
 
       merge.checkAborted(directory);
@@ -3151,17 +3300,17 @@ public class IndexWriter implements Clos
 
       if (infoStream != null) {
         message("merge segmentCodecs=" + merger.getSegmentCodecs());
-        message("merge store matchedCount=" + merger.getMatchedSubReaderCount() + " vs " + numSegments);
+        message("merge store matchedCount=" + merger.getMatchedSubReaderCount() + " vs " + merge.readers.size());
       }
-      anyNonBulkMerges |= merger.getMatchedSubReaderCount() != numSegments;
+      anyNonBulkMerges |= merger.getMatchedSubReaderCount() != merge.readers.size();
 
-      assert mergedDocCount == totDocCount;
+      assert mergedDocCount == totDocCount: "mergedDocCount=" + mergedDocCount + " vs " + totDocCount;
 
       // Very important to do this before opening the reader
       // because codec must know if prox was written for
       // this segment:
       //System.out.println("merger set hasProx=" + merger.hasProx() + " seg=" + merge.info.name);
-      merge.info.setHasProx(merger.fieldInfos().hasProx());
+      merge.info.clearFilesCache();
 
       boolean useCompoundFile;
       synchronized (this) { // Guard segmentInfos
@@ -3329,6 +3478,19 @@ public class IndexWriter implements Clos
     }
   }
 
+  private boolean keepFullyDeletedSegments;
+
+  /** Only for testing.
+   *
+   * @lucene.internal */
+  void keepFullyDeletedSegments() {
+    keepFullyDeletedSegments = true;
+  }
+
+  boolean getKeepFullyDeletedSegments() {
+    return keepFullyDeletedSegments;
+  }
+
   // called only from assert
   private boolean filesExist(SegmentInfos toSync) throws IOException {
     Collection<String> files = toSync.files(directory, false);
@@ -3384,9 +3546,9 @@ public class IndexWriter implements Clos
         if (infoStream != null)
           message("startCommit index=" + segString(segmentInfos) + " changeCount=" + changeCount);
 
-        readerPool.commit();
-
+        readerPool.commit(segmentInfos);
         toSync = (SegmentInfos) segmentInfos.clone();
+
         assert filesExist(toSync);
 
         if (commitUserData != null)
@@ -3516,7 +3678,7 @@ public class IndexWriter implements Clos
   }
 
   synchronized boolean nrtIsCurrent(SegmentInfos infos) {
-    return infos.version == segmentInfos.version && !docWriter.anyChanges() && !bufferedDeletes.any();
+    return infos.version == segmentInfos.version && !docWriter.anyChanges() && !bufferedDeletesStream.any();
   }
 
   synchronized boolean isClosed() {
@@ -3680,11 +3842,10 @@ public class IndexWriter implements Clos
     }
 
     public synchronized boolean flushByRAMUsage(String reason) {
-      // nocommit
 //      final double ramBufferSizeMB = config.getRAMBufferSizeMB();
 //      if (ramBufferSizeMB != IndexWriterConfig.DISABLE_AUTO_FLUSH) {
 //        final long limit = (long) (ramBufferSizeMB*1024*1024);
-//        long used = bufferedDeletes.bytesUsed() + docWriter.bytesUsed();
+//        long used = bufferedDeletesStream.bytesUsed() + docWriter.bytesUsed();
 //        if (used >= limit) {
 //
 //          // DocumentsWriter may be able to free up some
@@ -3692,7 +3853,7 @@ public class IndexWriter implements Clos
 //          // Lock order: FC -> DW
 //          docWriter.balanceRAM();
 //
-//          used = bufferedDeletes.bytesUsed() + docWriter.bytesUsed();
+//          used = bufferedDeletesStream.bytesUsed() + docWriter.bytesUsed();
 //          if (used >= limit) {
 //            return setFlushPending("ram full: " + reason, false);
 //          }

Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/IndexWriterConfig.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/IndexWriterConfig.java?rev=1073192&r1=1073191&r2=1073192&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/IndexWriterConfig.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/IndexWriterConfig.java Tue Feb 22 01:00:39 2011
@@ -556,10 +556,13 @@ public final class IndexWriterConfig imp
   /** Sets the termsIndexDivisor passed to any readers that
    *  IndexWriter opens, for example when applying deletes
    *  or creating a near-real-time reader in {@link
-   *  IndexWriter#getReader}. */
+   *  IndexWriter#getReader}. If you pass -1, the terms index 
+   *  won't be loaded by the readers. This is only useful in 
+   *  advanced situations when you will only .next() through 
+   *  all terms; attempts to seek will hit an exception. */
   public IndexWriterConfig setReaderTermsIndexDivisor(int divisor) {
-    if (divisor <= 0) {
-      throw new IllegalArgumentException("divisor must be >= 1 (got " + divisor + ")");
+    if (divisor <= 0 && divisor != -1) {
+      throw new IllegalArgumentException("divisor must be >= 1, or -1 (got " + divisor + ")");
     }
     readerTermsIndexDivisor = divisor;
     return this;

Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/InvertedDocConsumer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/InvertedDocConsumer.java?rev=1073192&r1=1073191&r2=1073192&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/InvertedDocConsumer.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/InvertedDocConsumer.java Tue Feb 22 01:00:39 2011
@@ -37,10 +37,4 @@ abstract class InvertedDocConsumer {
   /** Attempt to free RAM, returning true if any RAM was
    *  freed */
   abstract boolean freeRAM();
-
-  FieldInfos fieldInfos;
-
-  void setFieldInfos(FieldInfos fieldInfos) {
-    this.fieldInfos = fieldInfos;
   }
-}

Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/InvertedDocEndConsumer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/InvertedDocEndConsumer.java?rev=1073192&r1=1073191&r2=1073192&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/InvertedDocEndConsumer.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/InvertedDocEndConsumer.java Tue Feb 22 01:00:39 2011
@@ -23,7 +23,6 @@ import java.util.Map;
 abstract class InvertedDocEndConsumer {
   abstract void flush(Map<FieldInfo, InvertedDocEndConsumerPerField> fieldsToFlush, SegmentWriteState state) throws IOException;
   abstract void abort();
-  abstract void setFieldInfos(FieldInfos fieldInfos);
   abstract InvertedDocEndConsumerPerField addField(DocInverterPerField docInverterPerField, FieldInfo fieldInfo);
   abstract void startDocument() throws IOException;
   abstract void finishDocument() throws IOException;