You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by si...@apache.org on 2011/02/09 10:36:03 UTC

svn commit: r1068809 [6/36] - in /lucene/dev/branches/docvalues: ./ dev-tools/eclipse/ dev-tools/idea/.idea/ dev-tools/idea/.idea/copyright/ dev-tools/idea/lucene/ dev-tools/idea/lucene/contrib/ant/ dev-tools/idea/lucene/contrib/queryparser/ dev-tools/...

Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/IndexReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/IndexReader.java?rev=1068809&r1=1068808&r2=1068809&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/IndexReader.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/IndexReader.java Wed Feb  9 09:35:27 2011
@@ -19,6 +19,7 @@ package org.apache.lucene.index;
 
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.FieldSelector;
+import org.apache.lucene.search.FieldCache; // javadocs
 import org.apache.lucene.search.Similarity;
 import org.apache.lucene.index.codecs.Codec;
 import org.apache.lucene.index.codecs.CodecProvider;
@@ -83,6 +84,62 @@ import java.util.concurrent.atomic.Atomi
 public abstract class IndexReader implements Cloneable,Closeable {
 
   /**
+   * A custom listener that's invoked when the IndexReader
+   * is finished.
+   *
+   * <p>For a SegmentReader, this listener is called only
+   * once all SegmentReaders sharing the same core are
+   * closed.  At this point it is safe for apps to evict
+   * this reader from any caches keyed on {@link
+   * #getCoreCacheKey}.  This is the same interface that
+   * {@link FieldCache} uses, internally, to evict
+   * entries.</p>
+   *
+   * <p>For other readers, this listener is called when they
+   * are closed.</p>
+   *
+   * @lucene.experimental
+   */
+  public static interface ReaderFinishedListener {
+    public void finished(IndexReader reader);
+  }
+
+  // Impls must set this if they may call add/removeReaderFinishedListener:
+  protected volatile Collection<ReaderFinishedListener> readerFinishedListeners;
+
+  /** Expert: adds a {@link ReaderFinishedListener}.  The
+   * provided listener is also added to any sub-readers, if
+   * this is a composite reader.  Also, any reader reopened
+   * or cloned from this one will also copy the listeners at
+   * the time of reopen.
+   *
+   * @lucene.experimental */
+  public void addReaderFinishedListener(ReaderFinishedListener listener) {
+    readerFinishedListeners.add(listener);
+  }
+
+  /** Expert: remove a previously added {@link ReaderFinishedListener}.
+   *
+   * @lucene.experimental */
+  public void removeReaderFinishedListener(ReaderFinishedListener listener) {
+    readerFinishedListeners.remove(listener);
+  }
+
+  protected void notifyReaderFinishedListeners() {
+    // Defensive (should never be null -- all impls must set
+    // this):
+    if (readerFinishedListeners != null) {
+      for(ReaderFinishedListener listener : readerFinishedListeners) {
+        listener.finished(this);
+      }
+    }
+  }
+
+  protected void readerFinished() {
+    notifyReaderFinishedListeners();
+  }
+
+  /**
    * Constants describing field properties, for example used for
    * {@link IndexReader#getFieldNames(FieldOption)}.
    */
@@ -199,6 +256,7 @@ public abstract class IndexReader implem
           refCount.incrementAndGet();
         }
       }
+      readerFinished();
     }
   }
   
@@ -242,24 +300,26 @@ public abstract class IndexReader implem
   /**
    * Open a near real time IndexReader from the {@link org.apache.lucene.index.IndexWriter}.
    *
-   *
    * @param writer The IndexWriter to open from
+   * @param applyAllDeletes If true, all buffered deletes will
+   * be applied (made visible) in the returned reader.  If
+   * false, the deletes are not applied but remain buffered
+   * (in IndexWriter) so that they will be applied in the
+   * future.  Applying deletes can be costly, so if your app
+   * can tolerate deleted documents being returned you might
+   * gain some performance by passing false.
    * @return The new IndexReader
    * @throws CorruptIndexException
    * @throws IOException if there is a low-level IO error
    *
-   * @see #reopen(IndexWriter)
+   * @see #reopen(IndexWriter,boolean)
    *
    * @lucene.experimental
    */
-  public static IndexReader open(final IndexWriter writer) throws CorruptIndexException, IOException {
-    return writer.getReader();
+  public static IndexReader open(final IndexWriter writer, boolean applyAllDeletes) throws CorruptIndexException, IOException {
+    return writer.getReader(applyAllDeletes);
   }
 
-  
-
-
-
   /** Expert: returns an IndexReader reading the index in the given
    *  {@link IndexCommit}.  You should pass readOnly=true, since it
    *  gives much better concurrent performance, unless you
@@ -305,7 +365,7 @@ public abstract class IndexReader implem
    * @param readOnly true if no changes (deletions, norms) will be made with this IndexReader
    * @param termInfosIndexDivisor Subsamples which indexed
    *  terms are loaded into RAM. This has the same effect as {@link
-   *  IndexWriter#setTermIndexInterval} except that setting
+   *  IndexWriterConfig#setTermIndexInterval} except that setting
    *  must be done at indexing time while this setting can be
    *  set per reader.  When set to N, then one in every
    *  N*termIndexInterval terms in the index is loaded into
@@ -355,14 +415,17 @@ public abstract class IndexReader implem
    * @param readOnly true if no changes (deletions, norms) will be made with this IndexReader
    * @param termInfosIndexDivisor Subsamples which indexed
    *  terms are loaded into RAM. This has the same effect as {@link
-   *  IndexWriter#setTermIndexInterval} except that setting
+   *  IndexWriterConfig#setTermIndexInterval} except that setting
    *  must be done at indexing time while this setting can be
    *  set per reader.  When set to N, then one in every
    *  N*termIndexInterval terms in the index is loaded into
    *  memory.  By setting this to a value > 1 you can reduce
    *  memory usage, at the expense of higher latency when
    *  loading a TermInfo.  The default value is 1.  Set this
-   *  to -1 to skip loading the terms index entirely.
+   *  to -1 to skip loading the terms index entirely. This is only useful in 
+   *  advanced situations when you will only .next() through all terms; 
+   *  attempts to seek will hit an exception.
+   *  
    * @throws CorruptIndexException if the index is corrupt
    * @throws IOException if there is a low-level IO error
    */
@@ -384,7 +447,7 @@ public abstract class IndexReader implem
    * @param readOnly true if no changes (deletions, norms) will be made with this IndexReader
    * @param termInfosIndexDivisor Subsamples which indexed
    *  terms are loaded into RAM. This has the same effect as {@link
-   *  IndexWriter#setTermIndexInterval} except that setting
+   *  IndexWriterConfig#setTermIndexInterval} except that setting
    *  must be done at indexing time while this setting can be
    *  set per reader.  When set to N, then one in every
    *  N*termIndexInterval terms in the index is loaded into
@@ -417,7 +480,7 @@ public abstract class IndexReader implem
    * @param readOnly true if no changes (deletions, norms) will be made with this IndexReader
    * @param termInfosIndexDivisor Subsamples which indexed
    *  terms are loaded into RAM. This has the same effect as {@link
-   *  IndexWriter#setTermIndexInterval} except that setting
+   *  IndexWriterConfig#setTermIndexInterval} except that setting
    *  must be done at indexing time while this setting can be
    *  set per reader.  When set to N, then one in every
    *  N*termIndexInterval terms in the index is loaded into
@@ -546,7 +609,7 @@ public abstract class IndexReader implem
    * file descriptors, CPU time) will be consumed.</p>
    *
    * <p>For lower latency on reopening a reader, you should
-   * call {@link #setMergedSegmentWarmer} to
+   * call {@link IndexWriterConfig#setMergedSegmentWarmer} to
    * pre-warm a newly merged segment before it's committed
    * to the index.  This is important for minimizing
    * index-to-search delay after a large merge.  </p>
@@ -561,18 +624,26 @@ public abstract class IndexReader implem
    * if you attempt to reopen any of those readers, you'll
    * hit an {@link AlreadyClosedException}.</p>
    *
-   * @lucene.experimental
-   *
    * @return IndexReader that covers entire index plus all
    * changes made so far by this IndexWriter instance
    *
+   * @param writer The IndexWriter to open from
+   * @param applyAllDeletes If true, all buffered deletes will
+   * be applied (made visible) in the returned reader.  If
+   * false, the deletes are not applied but remain buffered
+   * (in IndexWriter) so that they will be applied in the
+   * future.  Applying deletes can be costly, so if your app
+   * can tolerate deleted documents being returned you might
+   * gain some performance by passing false.
+   *
    * @throws IOException
+   *
+   * @lucene.experimental
    */
-  public IndexReader reopen(IndexWriter writer) throws CorruptIndexException, IOException {
-    return writer.getReader();
+  public IndexReader reopen(IndexWriter writer, boolean applyAllDeletes) throws CorruptIndexException, IOException {
+    return writer.getReader(applyAllDeletes);
   }
 
-
   /**
    * Efficiently clones the IndexReader (sharing most
    * internal state).
@@ -935,14 +1006,6 @@ public abstract class IndexReader implem
    */
   public abstract byte[] norms(String field) throws IOException;
 
-  /** Reads the byte-encoded normalization factor for the named field of every
-   *  document.  This is used by the search code to score documents.
-   *
-   * @see org.apache.lucene.document.Field#setBoost(float)
-   */
-  public abstract void norms(String field, byte[] bytes, int offset)
-    throws IOException;
-
   /** Expert: Resets the normalization factor for the named field of the named
    * document.  The norm represents the product of the field's {@link
    * org.apache.lucene.document.Fieldable#setBoost(float) boost} and its {@link Similarity#lengthNorm(String,
@@ -974,26 +1037,6 @@ public abstract class IndexReader implem
   protected abstract void doSetNorm(int doc, String field, byte value)
           throws CorruptIndexException, IOException;
 
-  /** Expert: Resets the normalization factor for the named field of the named
-   * document.
-   *
-   * @see #norms(String)
-   * @see Similarity#decodeNormValue(byte)
-   * 
-   * @throws StaleReaderException if the index has changed
-   *  since this reader was opened
-   * @throws CorruptIndexException if the index is corrupt
-   * @throws LockObtainFailedException if another writer
-   *  has this index open (<code>write.lock</code> could not
-   *  be obtained)
-   * @throws IOException if there is a low-level IO error
-   */
-  public void setNorm(int doc, String field, float value)
-          throws StaleReaderException, CorruptIndexException, LockObtainFailedException, IOException {
-    ensureOpen();
-    setNorm(doc, field, Similarity.getDefault().encodeNormValue(value));
-  }
-
   /** Flex API: returns {@link Fields} for this reader.
    *  This method may return null if the reader has no
    *  postings.
@@ -1029,6 +1072,23 @@ public abstract class IndexReader implem
     return terms.docFreq(term);
   }
 
+  /** Returns the number of documents containing the term
+   * <code>t</code>.  This method returns 0 if the term or
+   * field does not exists.  This method does not take into
+   * account deleted documents that have not yet been merged
+   * away. */
+  public long totalTermFreq(String field, BytesRef term) throws IOException {
+    final Fields fields = fields();
+    if (fields == null) {
+      return 0;
+    }
+    final Terms terms = fields.terms(field);
+    if (terms == null) {
+      return 0;
+    }
+    return terms.totalTermFreq(term);
+  }
+
   /** This may return null if the field does not exist.*/
   public Terms terms(String field) throws IOException {
     final Fields fields = fields();
@@ -1074,6 +1134,47 @@ public abstract class IndexReader implem
       return null;
     }
   }
+  
+  /**
+   * Returns {@link DocsEnum} for the specified field and
+   * {@link TermState}. This may return null, if either the field or the term
+   * does not exists or the {@link TermState} is invalid for the underlying
+   * implementation.*/
+  public DocsEnum termDocsEnum(Bits skipDocs, String field, BytesRef term, TermState state) throws IOException {
+    assert state != null;
+    assert field != null;
+    final Fields fields = fields();
+    if (fields == null) {
+      return null;
+    }
+    final Terms terms = fields.terms(field);
+    if (terms != null) {
+      return terms.docs(skipDocs, term, state, null);
+    } else {
+      return null;
+    }
+  }
+  
+  /**
+   * Returns {@link DocsAndPositionsEnum} for the specified field and
+   * {@link TermState}. This may return null, if either the field or the term
+   * does not exists, the {@link TermState} is invalid for the underlying
+   * implementation, or positions were not stored for this term.*/
+  public DocsAndPositionsEnum termPositionsEnum(Bits skipDocs, String field, BytesRef term, TermState state) throws IOException {
+    assert state != null;
+    assert field != null;
+    final Fields fields = fields();
+    if (fields == null) {
+      return null;
+    }
+    final Terms terms = fields.terms(field);
+    if (terms != null) {
+      return terms.docsAndPositions(skipDocs, term, state, null);
+    } else {
+      return null;
+    }
+  }
+
 
   /** Deletes the document numbered <code>docNum</code>.  Once a document is
    * deleted it will not appear in TermDocs or TermPositions enumerations.
@@ -1137,7 +1238,16 @@ public abstract class IndexReader implem
     return n;
   }
 
-  /** Undeletes all documents currently marked as deleted in this index.
+  /** Undeletes all documents currently marked as deleted in
+   * this index.
+   *
+   * <p>NOTE: this method can only recover documents marked
+   * for deletion but not yet removed from the index; when
+   * and how Lucene removes deleted documents is an
+   * implementation detail, subject to change from release
+   * to release.  However, you can use {@link
+   * #numDeletedDocs} on the current IndexReader instance to
+   * see how many documents will be un-deleted.
    *
    * @throws StaleReaderException if the index has changed
    *  since this reader was opened
@@ -1360,9 +1470,7 @@ public abstract class IndexReader implem
   }
 
   /** Expert: returns the sequential sub readers that this
-   *  reader is logically composed of.  For example,
-   *  IndexSearcher uses this API to drive searching by one
-   *  sub reader at a time.  If this reader is not composed
+   *  reader is logically composed of. If this reader is not composed
    *  of sequential child readers, it should return null.
    *  If this method returns an empty array, that means this
    *  reader is a null reader (for example a MultiReader
@@ -1377,12 +1485,33 @@ public abstract class IndexReader implem
   public IndexReader[] getSequentialSubReaders() {
     return null;
   }
-
-
-  /** Expert: returns the docID base for this subReader. */
-  public int getSubReaderDocBase(IndexReader subReader) {
-    throw new UnsupportedOperationException();
-  }
+  
+  /**
+   * Expert: Returns a the root {@link ReaderContext} for this
+   * {@link IndexReader}'s sub-reader tree. Iff this reader is composed of sub
+   * readers ,ie. this reader being a composite reader, this method returns a
+   * {@link CompositeReaderContext} holding the reader's direct children as well as a
+   * view of the reader tree's atomic leaf contexts. All sub-
+   * {@link ReaderContext} instances referenced from this readers top-level
+   * context are private to this reader and are not shared with another context
+   * tree. For example, IndexSearcher uses this API to drive searching by one
+   * atomic leaf reader at a time. If this reader is not composed of child
+   * readers, this method returns an {@link AtomicReaderContext}.
+   * <p>
+   * Note: Any of the sub-{@link CompositeReaderContext} instances reference from this
+   * top-level context holds a <code>null</code> {@link CompositeReaderContext#leaves}
+   * reference. Only the top-level context maintains the convenience leaf-view
+   * for performance reasons.
+   * <p>
+   * NOTE: You should not try using sub-readers returned by this method to make
+   * any changes (setNorm, deleteDocument, etc.). While this might succeed for
+   * one composite reader (like MultiReader), it will most likely lead to index
+   * corruption for other readers (like DirectoryReader obtained through
+   * {@link #open}. Use the top-level context's reader directly.
+   * 
+   * @lucene.experimental
+   */
+  public abstract ReaderContext getTopReaderContext();
 
   /** Expert */
   public Object getCoreCacheKey() {
@@ -1442,4 +1571,132 @@ public abstract class IndexReader implem
   Fields retrieveFields() {
     return fields;
   }
+
+  /**
+   * A struct like class that represents a hierarchical relationship between
+   * {@link IndexReader} instances. 
+   * @lucene.experimental
+   */
+  public static abstract class ReaderContext {
+    /** The reader context for this reader's immediate parent, or null if none */
+    public final ReaderContext parent;
+    /** The actual reader */
+    public final IndexReader reader;
+    /** <code>true</code> iff the reader is an atomic reader */
+    public final boolean isAtomic;
+    /** <code>true</code> if this context struct represents the top level reader within the hierarchical context */
+    public final boolean isTopLevel;
+    /** the doc base for this reader in the parent, <tt>0</tt> if parent is null */
+    public final int docBaseInParent;
+    /** the ord for this reader in the parent, <tt>0</tt> if parent is null */
+    public final int ordInParent;
+    
+    ReaderContext(ReaderContext parent, IndexReader reader,
+        boolean isAtomic, int ordInParent, int docBaseInParent) {
+      this.parent = parent;
+      this.reader = reader;
+      this.isAtomic = isAtomic;
+      this.docBaseInParent = docBaseInParent;
+      this.ordInParent = ordInParent;
+      this.isTopLevel = parent==null;
+    }
+    
+    /**
+     * Returns the context's leaves if this context is a top-level context
+     * otherwise <code>null</code>.
+     * <p>
+     * Note: this is convenience method since leaves can always be obtained by
+     * walking the context tree.
+     */
+    public AtomicReaderContext[] leaves() {
+      return null;
+    }
+    
+    /**
+     * Returns the context's children iff this context is a composite context
+     * otherwise <code>null</code>.
+     * <p>
+     * Note: this method is a convenience method to prevent
+     * <code>instanceof</code> checks and type-casts to
+     * {@link CompositeReaderContext}.
+     */
+    public ReaderContext[] children() {
+      return null;
+    }
+  }
+  
+  /**
+   * {@link ReaderContext} for composite {@link IndexReader} instance.
+   * @lucene.experimental
+   */
+  public static final class CompositeReaderContext extends ReaderContext {
+    /** the composite readers immediate children */
+    public final ReaderContext[] children;
+    /** the composite readers leaf reader contexts if this is the top level reader in this context */
+    public final AtomicReaderContext[] leaves;
+
+    /**
+     * Creates a {@link CompositeReaderContext} for intermediate readers that aren't
+     * not top-level readers in the current context
+     */
+    public CompositeReaderContext(ReaderContext parent, IndexReader reader,
+        int ordInParent, int docbaseInParent, ReaderContext[] children) {
+      this(parent, reader, ordInParent, docbaseInParent, children, null);
+    }
+    
+    /**
+     * Creates a {@link CompositeReaderContext} for top-level readers with parent set to <code>null</code>
+     */
+    public CompositeReaderContext(IndexReader reader, ReaderContext[] children, AtomicReaderContext[] leaves) {
+      this(null, reader, 0, 0, children, leaves);
+    }
+    
+    private CompositeReaderContext(ReaderContext parent, IndexReader reader,
+        int ordInParent, int docbaseInParent, ReaderContext[] children,
+        AtomicReaderContext[] leaves) {
+      super(parent, reader, false, ordInParent, docbaseInParent);
+      this.children = children;
+      this.leaves = leaves;
+    }
+
+    @Override
+    public AtomicReaderContext[] leaves() {
+      return leaves;
+    }
+    
+    
+    @Override
+    public ReaderContext[] children() {
+      return children;
+    }
+  }
+  
+  /**
+   * {@link ReaderContext} for atomic {@link IndexReader} instances
+   * @lucene.experimental
+   */
+  public static final class AtomicReaderContext extends ReaderContext {
+    /** The readers ord in the top-level's leaves array */
+    public final int ord;
+    /** The readers absolute doc base */
+    public final int docBase;
+    /**
+     * Creates a new {@link AtomicReaderContext} 
+     */    
+    public AtomicReaderContext(ReaderContext parent, IndexReader reader,
+        int ord, int docBase, int leafOrd, int leafDocBase) {
+      super(parent, reader, true, ord, docBase);
+      assert reader.getSequentialSubReaders() == null : "Atomic readers must not have subreaders";
+      this.ord = leafOrd;
+      this.docBase = leafDocBase;
+    }
+    
+    /**
+     * Creates a new {@link AtomicReaderContext} for a atomic reader without an immediate
+     * parent.
+     */
+    public AtomicReaderContext(IndexReader atomicReader) {
+      this(null, atomicReader, 0, 0, 0, 0);
+    }
+  }
 }

Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/IndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/IndexWriter.java?rev=1068809&r1=1068808&r2=1068809&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/IndexWriter.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/IndexWriter.java Wed Feb  9 09:35:27 2011
@@ -31,6 +31,7 @@ import java.util.List;
 import java.util.Map;
 import java.util.Set;
 import java.util.concurrent.atomic.AtomicInteger;
+import java.util.concurrent.ConcurrentHashMap;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.document.Document;
@@ -47,6 +48,7 @@ import org.apache.lucene.store.LockObtai
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.Constants;
 import org.apache.lucene.util.ThreadInterruptedException;
+import org.apache.lucene.util.MapBackedSet;
 
 /**
   An <code>IndexWriter</code> creates and maintains an index.
@@ -214,7 +216,6 @@ public class IndexWriter implements Clos
   private long lastCommitChangeCount; // last changeCount that was committed
 
   private SegmentInfos rollbackSegmentInfos;      // segmentInfos we will fallback to if the commit fails
-  private HashMap<SegmentInfo,Integer> rollbackSegments;
 
   volatile SegmentInfos pendingCommit;            // set when a commit is pending (after prepareCommit() & before commit())
   volatile long pendingCommitChangeCount;
@@ -250,7 +251,7 @@ public class IndexWriter implements Clos
   private final AtomicInteger flushDeletesCount = new AtomicInteger();
 
   final ReaderPool readerPool = new ReaderPool();
-  final BufferedDeletes bufferedDeletes;
+  final BufferedDeletesStream bufferedDeletesStream;
   
   // This is a "write once" variable (like the organic dye
   // on a DVD-R that may or may not be heated by a laser and
@@ -270,6 +271,13 @@ public class IndexWriter implements Clos
   // The PayloadProcessorProvider to use when segments are merged
   private PayloadProcessorProvider payloadProcessorProvider;
 
+  // for testing
+  boolean anyNonBulkMerges;
+
+  IndexReader getReader() throws IOException {
+    return getReader(true);
+  }
+
   /**
    * Expert: returns a readonly reader, covering all
    * committed as well as un-committed changes to the index.
@@ -329,9 +337,10 @@ public class IndexWriter implements Clos
    *
    * @throws IOException
    */
-  IndexReader getReader() throws IOException {
-
+  IndexReader getReader(boolean applyAllDeletes) throws IOException {
     ensureOpen();
+    
+    final long tStart = System.currentTimeMillis();
 
     if (infoStream != null) {
       message("flush at getReader");
@@ -347,17 +356,27 @@ public class IndexWriter implements Clos
     // just like we do when loading segments_N
     IndexReader r;
     synchronized(this) {
-      flush(false, true);
-      r = new DirectoryReader(this, segmentInfos, config.getReaderTermsIndexDivisor(), codecs);
+      flush(false, applyAllDeletes);
+      r = new DirectoryReader(this, segmentInfos, config.getReaderTermsIndexDivisor(), codecs, applyAllDeletes);
       if (infoStream != null) {
         message("return reader version=" + r.getVersion() + " reader=" + r);
       }
     }
     maybeMerge();
 
+    if (infoStream != null) {
+      message("getReader took " + (System.currentTimeMillis() - tStart) + " msec");
+    }
     return r;
   }
 
+  // Used for all SegmentReaders we open
+  private final Collection<IndexReader.ReaderFinishedListener> readerFinishedListeners = new MapBackedSet<IndexReader.ReaderFinishedListener>(new ConcurrentHashMap<IndexReader.ReaderFinishedListener,Boolean>());
+
+  Collection<IndexReader.ReaderFinishedListener> getReaderFinishedListeners() throws IOException {
+    return readerFinishedListeners;
+  }
+
   /** Holds shared SegmentReader instances. IndexWriter uses
    *  SegmentReaders for 1) applying deletes, 2) doing
    *  merges, 3) handing out a real-time reader.  This pool
@@ -567,6 +586,7 @@ public class IndexWriter implements Clos
         // synchronized
         // Returns a ref, which we xfer to readerMap:
         sr = SegmentReader.get(false, info.dir, info, readBufferSize, doOpenStores, termsIndexDivisor);
+        sr.readerFinishedListeners = readerFinishedListeners;
 
         if (info.dir == directory) {
           // Only pool if reader is not external
@@ -605,8 +625,6 @@ public class IndexWriter implements Clos
     }
   }
   
-  
-  
   /**
    * Obtain the number of deleted docs for a pooled reader.
    * If the reader isn't being pooled, the segmentInfo's 
@@ -662,16 +680,13 @@ public class IndexWriter implements Clos
    * IndexWriter. Additionally, calling {@link #getConfig()} and changing the
    * parameters does not affect that IndexWriter instance.
    * <p>
-   * <b>NOTE:</b> by default, {@link IndexWriterConfig#getMaxFieldLength()}
-   * returns {@link IndexWriterConfig#UNLIMITED_FIELD_LENGTH}. Pay attention to
-   * whether this setting fits your application.
    * 
    * @param d
    *          the index directory. The index is either created or appended
    *          according <code>conf.getOpenMode()</code>.
    * @param conf
    *          the configuration settings according to which IndexWriter should
-   *          be initalized.
+   *          be initialized.
    * @throws CorruptIndexException
    *           if the index is corrupt
    * @throws LockObtainFailedException
@@ -689,7 +704,6 @@ public class IndexWriter implements Clos
     directory = d;
     analyzer = conf.getAnalyzer();
     infoStream = defaultInfoStream;
-    maxFieldLength = conf.getMaxFieldLength();
     termIndexInterval = conf.getTermIndexInterval();
     mergePolicy = conf.getMergePolicy();
     mergePolicy.setIndexWriter(this);
@@ -697,8 +711,8 @@ public class IndexWriter implements Clos
     mergedSegmentWarmer = conf.getMergedSegmentWarmer();
     codecs = conf.getCodecProvider();
     
-    bufferedDeletes = new BufferedDeletes(messageID);
-    bufferedDeletes.setInfoStream(infoStream);
+    bufferedDeletesStream = new BufferedDeletesStream(messageID);
+    bufferedDeletesStream.setInfoStream(infoStream);
     poolReaders = conf.getReaderPooling();
 
     OpenMode mode = conf.getOpenMode();
@@ -719,11 +733,8 @@ public class IndexWriter implements Clos
 
     boolean success = false;
 
-    // TODO: we should check whether this index is too old,
-    // and throw an IndexFormatTooOldExc up front, here,
-    // instead of later when merge, applyDeletes, getReader
-    // is attempted.  I think to do this we should store the
-    // oldest segment's version in segments_N.
+    // If index is too old, reading the segments will throw
+    // IndexFormatTooOldException.
     segmentInfos = new SegmentInfos(codecs);
     try {
       if (create) {
@@ -766,9 +777,8 @@ public class IndexWriter implements Clos
 
       setRollbackSegmentInfos(segmentInfos);
 
-      docWriter = new DocumentsWriter(directory, this, conf.getIndexingChain(), conf.getMaxThreadStates(), getCurrentFieldInfos(), bufferedDeletes);
+      docWriter = new DocumentsWriter(directory, this, conf.getIndexingChain(), conf.getMaxThreadStates(), getCurrentFieldInfos(), bufferedDeletesStream);
       docWriter.setInfoStream(infoStream);
-      docWriter.setMaxFieldLength(maxFieldLength);
 
       // Default deleter (for backwards compatibility) is
       // KeepOnlyLastCommitDeleter:
@@ -854,10 +864,6 @@ public class IndexWriter implements Clos
 
   private synchronized void setRollbackSegmentInfos(SegmentInfos infos) {
     rollbackSegmentInfos = (SegmentInfos) infos.clone();
-    rollbackSegments = new HashMap<SegmentInfo,Integer>();
-    final int size = rollbackSegmentInfos.size();
-    for(int i=0;i<size;i++)
-      rollbackSegments.put(rollbackSegmentInfos.info(i), Integer.valueOf(i));
   }
 
   /**
@@ -919,7 +925,7 @@ public class IndexWriter implements Clos
     this.infoStream = infoStream;
     docWriter.setInfoStream(infoStream);
     deleter.setInfoStream(infoStream);
-    bufferedDeletes.setInfoStream(infoStream);
+    bufferedDeletesStream.setInfoStream(infoStream);
     if (infoStream != null)
       messageState();
   }
@@ -1051,8 +1057,9 @@ public class IndexWriter implements Clos
   private void closeInternal(boolean waitForMerges) throws CorruptIndexException, IOException {
 
     try {
-      if (infoStream != null)
-        message("now flush at close");
+      if (infoStream != null) {
+        message("now flush at close waitForMerges=" + waitForMerges);
+      }
 
       docWriter.close();
 
@@ -1164,7 +1171,7 @@ public class IndexWriter implements Clos
 
   public synchronized boolean hasDeletions() throws IOException {
     ensureOpen();
-    if (bufferedDeletes.any()) {
+    if (bufferedDeletesStream.any()) {
       return true;
     }
     if (docWriter.anyDeletions()) {
@@ -1177,25 +1184,7 @@ public class IndexWriter implements Clos
   }
 
   /**
-   * The maximum number of terms that will be indexed for a single field in a
-   * document.  This limits the amount of memory required for indexing, so that
-   * collections with very large files will not crash the indexing process by
-   * running out of memory.<p/>
-   * Note that this effectively truncates large documents, excluding from the
-   * index terms that occur further in the document.  If you know your source
-   * documents are large, be sure to set this value high enough to accommodate
-   * the expected size.  If you set it to Integer.MAX_VALUE, then the only limit
-   * is your memory, but you should anticipate an OutOfMemoryError.<p/>
-   * By default, no more than 10,000 terms will be indexed for a field.
-   *
-   * @see MaxFieldLength
-   */
-  private int maxFieldLength;
-
-  /**
-   * Adds a document to this index.  If the document contains more than
-   * {@link IndexWriterConfig#setMaxFieldLength(int)} terms for a given field, 
-   * the remainder are discarded.
+   * Adds a document to this index.
    *
    * <p> Note that if an Exception is hit (for example disk full)
    * then the index will be consistent, but this document
@@ -1242,9 +1231,7 @@ public class IndexWriter implements Clos
 
   /**
    * Adds a document to this index, using the provided analyzer instead of the
-   * value of {@link #getAnalyzer()}.  If the document contains more than
-   * {@link IndexWriterConfig#setMaxFieldLength(int)} terms for a given field, the remainder are
-   * discarded.
+   * value of {@link #getAnalyzer()}.
    *
    * <p>See {@link #addDocument(Document)} for details on
    * index and IndexWriter state after an Exception, and
@@ -1533,6 +1520,11 @@ public class IndexWriter implements Clos
    * you should immediately close the writer.  See <a
    * href="#OOME">above</a> for details.</p>
    *
+   * <p><b>NOTE</b>: if you call {@link #close(boolean)}
+   * with <tt>false</tt>, which aborts all running merges,
+   * then any thread still running this method might hit a
+   * {@link MergePolicy.MergeAbortedException}.
+   *
    * @throws CorruptIndexException if the index is corrupt
    * @throws IOException if there is a low-level IO error
    * @see MergePolicy#findMergesForOptimize
@@ -1682,6 +1674,11 @@ public class IndexWriter implements Clos
    * <p><b>NOTE</b>: if this method hits an OutOfMemoryError
    * you should immediately close the writer.  See <a
    * href="#OOME">above</a> for details.</p>
+   *
+   * <p><b>NOTE</b>: if you call {@link #close(boolean)}
+   * with <tt>false</tt>, which aborts all running merges,
+   * then any thread still running this method might hit a
+   * {@link MergePolicy.MergeAbortedException}.
    */
   public void expungeDeletes(boolean doWait)
     throws CorruptIndexException, IOException {
@@ -1832,6 +1829,18 @@ public class IndexWriter implements Clos
     }
   }
 
+  /** Expert: to be used by a {@link MergePolicy} to avoid
+   *  selecting merges for segments already being merged.
+   *  The returned collection is not cloned, and thus is
+   *  only safe to access if you hold IndexWriter's lock
+   *  (which you do when IndexWriter invokes the
+   *  MergePolicy).
+   *
+   *  <p>Do not alter the returned collection! */
+  public synchronized Collection<SegmentInfo> getMergingSegments() {
+    return mergingSegments;
+  }
+
   /** Expert: the {@link MergeScheduler} calls this method
    *  to retrieve the next merge requested by the
    *  MergePolicy */
@@ -1889,7 +1898,7 @@ public class IndexWriter implements Clos
       mergePolicy.close();
       mergeScheduler.close();
 
-      bufferedDeletes.clear();
+      bufferedDeletesStream.clear();
 
       synchronized(this) {
 
@@ -1952,8 +1961,9 @@ public class IndexWriter implements Clos
    *
    * <p>NOTE: this method will forcefully abort all merges
    *    in progress.  If other threads are running {@link
-   *    #optimize()} or any of the addIndexes methods, they
-   *    will receive {@link MergePolicy.MergeAbortedException}s.
+   *    #optimize()}, {@link #addIndexes(IndexReader[])} or
+   *    {@link #expungeDeletes} methods, they may receive
+   *    {@link MergePolicy.MergeAbortedException}s.
    */
   public synchronized void deleteAll() throws IOException {
     try {
@@ -2042,12 +2052,19 @@ public class IndexWriter implements Clos
    *    will have completed once this method completes.</p>
    */
   public synchronized void waitForMerges() {
+    if (infoStream != null) {
+      message("waitForMerges");
+    }
     while(pendingMerges.size() > 0 || runningMerges.size() > 0) {
       doWait();
     }
 
     // sanity check
     assert 0 == mergingSegments.size();
+
+    if (infoStream != null) {
+      message("waitForMerges done");
+    }
   }
 
   /**
@@ -2226,6 +2243,11 @@ public class IndexWriter implements Clos
    * you should immediately close the writer.  See <a
    * href="#OOME">above</a> for details.</p>
    *
+   * <p><b>NOTE</b>: if you call {@link #close(boolean)}
+   * with <tt>false</tt>, which aborts all running merges,
+   * then any thread still running this method might hit a
+   * {@link MergePolicy.MergeAbortedException}.
+   *
    * @throws CorruptIndexException if the index is corrupt
    * @throws IOException if there is a low-level IO error
    */
@@ -2453,13 +2475,13 @@ public class IndexWriter implements Clos
   }
 
   /**
-   * Flush all in-memory buffered udpates (adds and deletes)
+   * Flush all in-memory buffered updates (adds and deletes)
    * to the Directory.
    * @param triggerMerge if true, we may merge segments (if
    *  deletes or docs were flushed) if necessary
-   * @param flushDeletes whether pending deletes should also
+   * @param applyAllDeletes whether pending deletes should also
    */
-  protected final void flush(boolean triggerMerge, boolean flushDeletes) throws CorruptIndexException, IOException {
+  protected final void flush(boolean triggerMerge, boolean applyAllDeletes) throws CorruptIndexException, IOException {
 
     // NOTE: this method cannot be sync'd because
     // maybeMerge() in turn calls mergeScheduler.merge which
@@ -2470,7 +2492,7 @@ public class IndexWriter implements Clos
 
     // We can be called during close, when closing==true, so we must pass false to ensureOpen:
     ensureOpen(false);
-    if (doFlush(flushDeletes) && triggerMerge) {
+    if (doFlush(applyAllDeletes) && triggerMerge) {
       maybeMerge();
     }
   }
@@ -2519,10 +2541,10 @@ public class IndexWriter implements Clos
         // tiny segments:
         if (flushControl.getFlushDeletes() ||
             (config.getRAMBufferSizeMB() != IndexWriterConfig.DISABLE_AUTO_FLUSH &&
-             bufferedDeletes.bytesUsed() > (1024*1024*config.getRAMBufferSizeMB()/2))) {
+             bufferedDeletesStream.bytesUsed() > (1024*1024*config.getRAMBufferSizeMB()/2))) {
           applyAllDeletes = true;
           if (infoStream != null) {
-            message("force apply deletes bytesUsed=" + bufferedDeletes.bytesUsed() + " vs ramBuffer=" + (1024*1024*config.getRAMBufferSizeMB()));
+            message("force apply deletes bytesUsed=" + bufferedDeletesStream.bytesUsed() + " vs ramBuffer=" + (1024*1024*config.getRAMBufferSizeMB()));
           }
         }
       }
@@ -2532,12 +2554,15 @@ public class IndexWriter implements Clos
           message("apply all deletes during flush");
         }
         flushDeletesCount.incrementAndGet();
-        if (bufferedDeletes.applyDeletes(readerPool, segmentInfos, segmentInfos)) {
+        final BufferedDeletesStream.ApplyDeletesResult result = bufferedDeletesStream.applyDeletes(readerPool, segmentInfos);
+        if (result.anyDeletes) {
           checkpoint();
         }
+        bufferedDeletesStream.prune(segmentInfos);
+        assert !bufferedDeletesStream.any();
         flushControl.clearDeletes();
       } else if (infoStream != null) {
-        message("don't apply deletes now delTermCount=" + bufferedDeletes.numTerms() + " bytesUsed=" + bufferedDeletes.bytesUsed());
+        message("don't apply deletes now delTermCount=" + bufferedDeletesStream.numTerms() + " bytesUsed=" + bufferedDeletesStream.bytesUsed());
       }
 
       doAfterFlush();
@@ -2563,7 +2588,7 @@ public class IndexWriter implements Clos
    */
   public final long ramSizeInBytes() {
     ensureOpen();
-    return docWriter.bytesUsed() + bufferedDeletes.bytesUsed();
+    return docWriter.bytesUsed() + bufferedDeletesStream.bytesUsed();
   }
 
   /** Expert:  Return the number of documents currently
@@ -2573,28 +2598,12 @@ public class IndexWriter implements Clos
     return docWriter.getNumDocs();
   }
 
-  private int ensureContiguousMerge(MergePolicy.OneMerge merge) {
-
-    int first = segmentInfos.indexOf(merge.segments.info(0));
-    if (first == -1)
-      throw new MergePolicy.MergeException("could not find segment " + merge.segments.info(0).name + " in current index " + segString(), directory);
-
-    final int numSegments = segmentInfos.size();
-    
-    final int numSegmentsToMerge = merge.segments.size();
-    for(int i=0;i<numSegmentsToMerge;i++) {
-      final SegmentInfo info = merge.segments.info(i);
-
-      if (first + i >= numSegments || !segmentInfos.info(first+i).equals(info)) {
-        if (segmentInfos.indexOf(info) == -1)
-          throw new MergePolicy.MergeException("MergePolicy selected a segment (" + info.name + ") that is not in the current index " + segString(), directory);
-        else
-          throw new MergePolicy.MergeException("MergePolicy selected non-contiguous segments to merge (" + merge.segString(directory) + " vs " + segString() + "), which IndexWriter (currently) cannot handle",
-                                               directory);
+  private void ensureValidMerge(MergePolicy.OneMerge merge) {
+    for(SegmentInfo info : merge.segments) {
+      if (segmentInfos.indexOf(info) == -1) {
+        throw new MergePolicy.MergeException("MergePolicy selected a segment (" + info.name + ") that is not in the current index " + segString(), directory);
       }
     }
-
-    return first;
   }
 
   /** Carefully merges deletes for the segments we just
@@ -2619,9 +2628,11 @@ public class IndexWriter implements Clos
     // started merging:
     int docUpto = 0;
     int delCount = 0;
+    long minGen = Long.MAX_VALUE;
 
     for(int i=0; i < sourceSegments.size(); i++) {
       SegmentInfo info = sourceSegments.info(i);
+      minGen = Math.min(info.getBufferedDeletesGen(), minGen);
       int docCount = info.docCount;
       SegmentReader previousReader = merge.readersClone[i];
       final Bits prevDelDocs = previousReader.getDeletedDocs();
@@ -2672,9 +2683,17 @@ public class IndexWriter implements Clos
     assert mergedReader.numDeletedDocs() == delCount;
 
     mergedReader.hasChanges = delCount > 0;
+
+    // If new deletes were applied while we were merging
+    // (which happens if eg commit() or getReader() is
+    // called during our merge), then it better be the case
+    // that the delGen has increased for all our merged
+    // segments:
+    assert !mergedReader.hasChanges || minGen > mergedReader.getSegmentInfo().getBufferedDeletesGen();
+
+    mergedReader.getSegmentInfo().setBufferedDeletesGen(minGen);
   }
 
-  /* FIXME if we want to support non-contiguous segment merges */
   synchronized private boolean commitMerge(MergePolicy.OneMerge merge, SegmentReader mergedReader) throws IOException {
 
     assert testPoint("startCommitMerge");
@@ -2700,7 +2719,7 @@ public class IndexWriter implements Clos
       return false;
     }
 
-    final int start = ensureContiguousMerge(merge);
+    ensureValidMerge(merge);
 
     commitMergedDeletes(merge, mergedReader);
       
@@ -2710,10 +2729,32 @@ public class IndexWriter implements Clos
     // format as well:
     setMergeDocStoreIsCompoundFile(merge);
 
-    segmentInfos.subList(start, start + merge.segments.size()).clear();
     assert !segmentInfos.contains(merge.info);
-    segmentInfos.add(start, merge.info);
-    
+
+    final Set mergedAway = new HashSet<SegmentInfo>(merge.segments);
+    int segIdx = 0;
+    int newSegIdx = 0;
+    boolean inserted = false;
+    final int curSegCount = segmentInfos.size();
+    while(segIdx < curSegCount) {
+      final SegmentInfo info = segmentInfos.info(segIdx++);
+      if (mergedAway.contains(info)) {
+        if (!inserted) {
+          segmentInfos.set(segIdx-1, merge.info);
+          inserted = true;
+          newSegIdx++;
+        }
+      } else {
+        segmentInfos.set(newSegIdx++, info);
+      }
+    }
+    assert newSegIdx == curSegCount - merge.segments.size() + 1;
+    segmentInfos.subList(newSegIdx, segmentInfos.size()).clear();
+
+    if (infoStream != null) {
+      message("after commit: " + segString());
+    }
+
     closeMergeReaders(merge, false);
 
     // Must note the change to segmentInfos so any commits
@@ -2725,16 +2766,12 @@ public class IndexWriter implements Clos
     // disk, updating SegmentInfo, etc.:
     readerPool.clear(merge.segments);
     
-    // remove pending deletes of the segments 
-    // that were merged, moving them onto the segment just
-    // before the merged segment
-    // Lock order: IW -> BD
-    bufferedDeletes.commitMerge(merge);
-
     if (merge.optimize) {
       // cascade the optimize:
       segmentsToOptimize.add(merge.info);
     }
+
+    
     return true;
   }
   
@@ -2862,7 +2899,7 @@ public class IndexWriter implements Clos
       }
     }
 
-    ensureContiguousMerge(merge);
+    ensureValidMerge(merge);
 
     pendingMerges.add(merge);
 
@@ -2889,10 +2926,6 @@ public class IndexWriter implements Clos
   final synchronized void mergeInit(MergePolicy.OneMerge merge) throws IOException {
     boolean success = false;
     try {
-      // Lock order: IW -> BD
-      if (bufferedDeletes.applyDeletes(readerPool, segmentInfos, merge.segments)) {
-        checkpoint();
-      }
       _mergeInit(merge);
       success = true;
     } finally {
@@ -2916,6 +2949,9 @@ public class IndexWriter implements Clos
       throw new IllegalStateException("this writer hit an OutOfMemoryError; cannot merge");
     }
 
+    // TODO: is there any perf benefit to sorting
+    // merged segments?  eg biggest to smallest?
+
     if (merge.info != null)
       // mergeInit already done
       return;
@@ -2928,6 +2964,17 @@ public class IndexWriter implements Clos
     // names.
     merge.info = new SegmentInfo(newSegmentName(), 0, directory, false, false, null, false);
 
+    // Lock order: IW -> BD
+    final BufferedDeletesStream.ApplyDeletesResult result = bufferedDeletesStream.applyDeletes(readerPool, merge.segments);
+    if (result.anyDeletes) {
+      checkpoint();
+    }
+
+    merge.info.setBufferedDeletesGen(result.gen);
+
+    // Lock order: IW -> BD
+    bufferedDeletesStream.prune(segmentInfos);
+
     Map<String,String> details = new HashMap<String,String>();
     details.put("optimize", Boolean.toString(merge.optimize));
     details.put("mergeFactor", Integer.toString(merge.segments.size()));
@@ -3115,6 +3162,7 @@ public class IndexWriter implements Clos
         message("merge segmentCodecs=" + merger.getSegmentCodecs());
         message("merge store matchedCount=" + merger.getMatchedSubReaderCount() + " vs " + numSegments);
       }
+      anyNonBulkMerges |= merger.getMatchedSubReaderCount() != numSegments;
       
       assert mergedDocCount == totDocCount;
 
@@ -3280,7 +3328,7 @@ public class IndexWriter implements Clos
     // NOTE: the callers of this method should in theory
     // be able to do simply wait(), but, as a defense
     // against thread timing hazards where notifyAll()
-    // falls to be called, we wait for at most 1 second
+    // fails to be called, we wait for at most 1 second
     // and then return so caller can check if wait
     // conditions are satisfied:
     try {
@@ -3290,6 +3338,15 @@ public class IndexWriter implements Clos
     }
   }
 
+  private boolean keepFullyDeletedSegments;
+
+  /** Only for testing.
+   *
+   * @lucene.internal */
+  void keepFullyDeletedSegments() {
+    keepFullyDeletedSegments = true;
+  }
+
   // called only from assert
   private boolean filesExist(SegmentInfos toSync) throws IOException {
     Collection<String> files = toSync.files(directory, false);
@@ -3348,6 +3405,10 @@ public class IndexWriter implements Clos
         readerPool.commit();
         
         toSync = (SegmentInfos) segmentInfos.clone();
+        if (!keepFullyDeletedSegments) {
+          toSync.pruneDeletedSegments();
+        }
+
         assert filesExist(toSync);
         
         if (commitUserData != null)
@@ -3477,7 +3538,7 @@ public class IndexWriter implements Clos
   }
 
   synchronized boolean nrtIsCurrent(SegmentInfos infos) {
-    return infos.version == segmentInfos.version && !docWriter.anyChanges() && !bufferedDeletes.any();
+    return infos.version == segmentInfos.version && !docWriter.anyChanges() && !bufferedDeletesStream.any();
   }
 
   synchronized boolean isClosed() {
@@ -3644,7 +3705,7 @@ public class IndexWriter implements Clos
       final double ramBufferSizeMB = config.getRAMBufferSizeMB();
       if (ramBufferSizeMB != IndexWriterConfig.DISABLE_AUTO_FLUSH) {
         final long limit = (long) (ramBufferSizeMB*1024*1024);
-        long used = bufferedDeletes.bytesUsed() + docWriter.bytesUsed();
+        long used = bufferedDeletesStream.bytesUsed() + docWriter.bytesUsed();
         if (used >= limit) {
           
           // DocumentsWriter may be able to free up some
@@ -3652,7 +3713,7 @@ public class IndexWriter implements Clos
           // Lock order: FC -> DW
           docWriter.balanceRAM();
 
-          used = bufferedDeletes.bytesUsed() + docWriter.bytesUsed();
+          used = bufferedDeletesStream.bytesUsed() + docWriter.bytesUsed();
           if (used >= limit) {
             return setFlushPending("ram full: " + reason, false);
           }

Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/IndexWriterConfig.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/IndexWriterConfig.java?rev=1068809&r1=1068808&r2=1068809&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/IndexWriterConfig.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/IndexWriterConfig.java Wed Feb  9 09:35:27 2011
@@ -21,7 +21,8 @@ import org.apache.lucene.analysis.Analyz
 import org.apache.lucene.index.DocumentsWriter.IndexingChain;
 import org.apache.lucene.index.IndexWriter.IndexReaderWarmer;
 import org.apache.lucene.index.codecs.CodecProvider;
-import org.apache.lucene.search.Similarity;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.SimilarityProvider;
 import org.apache.lucene.util.Version;
 
 /**
@@ -41,8 +42,6 @@ import org.apache.lucene.util.Version;
  */
 public final class IndexWriterConfig implements Cloneable {
 
-  public static final int UNLIMITED_FIELD_LENGTH = Integer.MAX_VALUE;
-
   /**
    * Specifies the open mode for {@link IndexWriter}:
    * <ul>
@@ -55,7 +54,7 @@ public final class IndexWriterConfig imp
   public static enum OpenMode { CREATE, APPEND, CREATE_OR_APPEND }
   
   /** Default value is 32. Change using {@link #setTermIndexInterval(int)}. */
-  public static final int DEFAULT_TERM_INDEX_INTERVAL = 32;                   // TODO: this should be private to the codec, not settable here
+  public static final int DEFAULT_TERM_INDEX_INTERVAL = 32; // TODO: this should be private to the codec, not settable here
 
   /** Denotes a flush trigger is disabled. */
   public final static int DISABLE_AUTO_FLUSH = -1;
@@ -113,8 +112,7 @@ public final class IndexWriterConfig imp
   private IndexDeletionPolicy delPolicy;
   private IndexCommit commit;
   private OpenMode openMode;
-  private int maxFieldLength;
-  private Similarity similarity;
+  private SimilarityProvider similarityProvider;
   private int termIndexInterval; // TODO: this should be private to the codec, not settable here
   private MergeScheduler mergeScheduler;
   private long writeLockTimeout;
@@ -145,8 +143,7 @@ public final class IndexWriterConfig imp
     delPolicy = new KeepOnlyLastCommitDeletionPolicy();
     commit = null;
     openMode = OpenMode.CREATE_OR_APPEND;
-    maxFieldLength = UNLIMITED_FIELD_LENGTH;
-    similarity = Similarity.getDefault();
+    similarityProvider = IndexSearcher.getDefaultSimilarityProvider();
     termIndexInterval = DEFAULT_TERM_INDEX_INTERVAL; // TODO: this should be private to the codec, not settable here
     mergeScheduler = new ConcurrentMergeScheduler();
     writeLockTimeout = WRITE_LOCK_TIMEOUT;
@@ -220,37 +217,6 @@ public final class IndexWriterConfig imp
   }
 
   /**
-   * The maximum number of terms that will be indexed for a single field in a
-   * document. This limits the amount of memory required for indexing, so that
-   * collections with very large files will not crash the indexing process by
-   * running out of memory. This setting refers to the number of running terms,
-   * not to the number of different terms.
-   * <p>
-   * <b>NOTE:</b> this silently truncates large documents, excluding from the
-   * index all terms that occur further in the document. If you know your source
-   * documents are large, be sure to set this value high enough to accomodate
-   * the expected size. If you set it to {@link #UNLIMITED_FIELD_LENGTH}, then
-   * the only limit is your memory, but you should anticipate an
-   * OutOfMemoryError.
-   * <p>
-   * By default it is set to {@link #UNLIMITED_FIELD_LENGTH}.
-   */
-  public IndexWriterConfig setMaxFieldLength(int maxFieldLength) {
-    this.maxFieldLength = maxFieldLength;
-    return this;
-  }
-
-  /**
-   * Returns the maximum number of terms that will be indexed for a single field
-   * in a document.
-   * 
-   * @see #setMaxFieldLength(int)
-   */
-  public int getMaxFieldLength() {
-    return maxFieldLength;
-  }
-
-  /**
    * Expert: allows to open a certain commit point. The default is null which
    * opens the latest commit point.
    */
@@ -269,25 +235,22 @@ public final class IndexWriterConfig imp
   }
 
   /**
-   * Expert: set the {@link Similarity} implementation used by this IndexWriter.
+   * Expert: set the {@link SimilarityProvider} implementation used by this IndexWriter.
    * <p>
-   * <b>NOTE:</b> the similarity cannot be null. If <code>null</code> is passed,
-   * the similarity will be set to the default.
-   * 
-   * @see Similarity#setDefault(Similarity)
+   * <b>NOTE:</b> the similarity provider cannot be null. If <code>null</code> is passed,
+   * the similarity provider will be set to the default implementation (unspecified).
    */
-  public IndexWriterConfig setSimilarity(Similarity similarity) {
-    this.similarity = similarity == null ? Similarity.getDefault() : similarity;
+  public IndexWriterConfig setSimilarityProvider(SimilarityProvider similarityProvider) {
+    this.similarityProvider = similarityProvider == null ? IndexSearcher.getDefaultSimilarityProvider() : similarityProvider;
     return this;
   }
 
   /**
-   * Expert: returns the {@link Similarity} implementation used by this
-   * IndexWriter. This defaults to the current value of
-   * {@link Similarity#getDefault()}.
+   * Expert: returns the {@link SimilarityProvider} implementation used by this
+   * IndexWriter.
    */
-  public Similarity getSimilarity() {
-    return similarity;
+  public SimilarityProvider getSimilarityProvider() {
+    return similarityProvider;
   }
   
   /**
@@ -589,10 +552,13 @@ public final class IndexWriterConfig imp
   /** Sets the termsIndexDivisor passed to any readers that
    *  IndexWriter opens, for example when applying deletes
    *  or creating a near-real-time reader in {@link
-   *  IndexWriter#getReader}. */
+   *  IndexWriter#getReader}. If you pass -1, the terms index 
+   *  won't be loaded by the readers. This is only useful in 
+   *  advanced situations when you will only .next() through 
+   *  all terms; attempts to seek will hit an exception. */
   public IndexWriterConfig setReaderTermsIndexDivisor(int divisor) {
-    if (divisor <= 0) {
-      throw new IllegalArgumentException("divisor must be >= 1 (got " + divisor + ")");
+    if (divisor <= 0 && divisor != -1) {
+      throw new IllegalArgumentException("divisor must be >= 1, or -1 (got " + divisor + ")");
     }
     readerTermsIndexDivisor = divisor;
     return this;
@@ -611,8 +577,7 @@ public final class IndexWriterConfig imp
     sb.append("delPolicy=").append(delPolicy.getClass().getName()).append("\n");
     sb.append("commit=").append(commit == null ? "null" : commit).append("\n");
     sb.append("openMode=").append(openMode).append("\n");
-    sb.append("maxFieldLength=").append(maxFieldLength).append("\n");
-    sb.append("similarity=").append(similarity.getClass().getName()).append("\n");
+    sb.append("similarityProvider=").append(similarityProvider.getClass().getName()).append("\n");
     sb.append("termIndexInterval=").append(termIndexInterval).append("\n"); // TODO: this should be private to the codec, not settable here
     sb.append("mergeScheduler=").append(mergeScheduler.getClass().getName()).append("\n");
     sb.append("default WRITE_LOCK_TIMEOUT=").append(WRITE_LOCK_TIMEOUT).append("\n");

Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/LogByteSizeMergePolicy.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/LogByteSizeMergePolicy.java?rev=1068809&r1=1068808&r2=1068809&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/LogByteSizeMergePolicy.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/LogByteSizeMergePolicy.java Wed Feb  9 09:35:27 2011
@@ -30,9 +30,14 @@ public class LogByteSizeMergePolicy exte
    *  or larger will never be merged.  @see setMaxMergeMB */
   public static final double DEFAULT_MAX_MERGE_MB = 2048;
 
+  /** Default maximum segment size.  A segment of this size
+   *  or larger will never be merged during optimize.  @see setMaxMergeMBForOptimize */
+  public static final double DEFAULT_MAX_MERGE_MB_FOR_OPTIMIZE = Long.MAX_VALUE;
+
   public LogByteSizeMergePolicy() {
     minMergeSize = (long) (DEFAULT_MIN_MERGE_MB*1024*1024);
     maxMergeSize = (long) (DEFAULT_MAX_MERGE_MB*1024*1024);
+    maxMergeSizeForOptimize = (long) (DEFAULT_MAX_MERGE_MB_FOR_OPTIMIZE*1024*1024);
   }
   
   @Override
@@ -63,6 +68,23 @@ public class LogByteSizeMergePolicy exte
     return ((double) maxMergeSize)/1024/1024;
   }
 
+  /** <p>Determines the largest segment (measured by total
+   *  byte size of the segment's files, in MB) that may be
+   *  merged with other segments during optimize. Setting
+   *  it low will leave the index with more than 1 segment,
+   *  even if {@link IndexWriter#optimize()} is called.*/
+  public void setMaxMergeMBForOptimize(double mb) {
+    maxMergeSizeForOptimize = (long) (mb*1024*1024);
+  }
+
+  /** Returns the largest segment (measured by total byte
+   *  size of the segment's files, in MB) that may be merged
+   *  with other segments during optimize.
+   *  @see #setMaxMergeMBForOptimize */
+  public double getMaxMergeMBForOptimize() {
+    return ((double) maxMergeSizeForOptimize)/1024/1024;
+  }
+
   /** Sets the minimum size for the lowest level segments.
    * Any segments below this size are considered to be on
    * the same level (even if they vary drastically in size)

Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/LogDocMergePolicy.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/LogDocMergePolicy.java?rev=1068809&r1=1068808&r2=1068809&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/LogDocMergePolicy.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/LogDocMergePolicy.java Wed Feb  9 09:35:27 2011
@@ -31,9 +31,10 @@ public class LogDocMergePolicy extends L
   public LogDocMergePolicy() {
     minMergeSize = DEFAULT_MIN_MERGE_DOCS;
     
-    // maxMergeSize is never used by LogDocMergePolicy; set
+    // maxMergeSize(ForOptimize) are never used by LogDocMergePolicy; set
     // it to Long.MAX_VALUE to disable it
     maxMergeSize = Long.MAX_VALUE;
+    maxMergeSizeForOptimize = Long.MAX_VALUE;
   }
 
   @Override

Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java?rev=1068809&r1=1068808&r2=1068809&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java Wed Feb  9 09:35:27 2011
@@ -18,6 +18,11 @@ package org.apache.lucene.index;
  */
 
 import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.List;
 import java.util.Set;
 
 /** <p>This class implements a {@link MergePolicy} that tries
@@ -63,7 +68,11 @@ public abstract class LogMergePolicy ext
 
   protected long minMergeSize;
   protected long maxMergeSize;
+  // Although the core MPs set it explicitly, we must default in case someone
+  // out there wrote his own LMP ...
+  protected long maxMergeSizeForOptimize = Long.MAX_VALUE;
   protected int maxMergeDocs = DEFAULT_MAX_MERGE_DOCS;
+  protected boolean requireContiguousMerge = false;
 
   protected double noCFSRatio = DEFAULT_NO_CFS_RATIO;
 
@@ -102,6 +111,21 @@ public abstract class LogMergePolicy ext
       writer.get().message("LMP: " + message);
   }
 
+  /** If true, merges must be in-order slice of the
+   *  segments.  If false, then the merge policy is free to
+   *  pick any segments.  The default is false, which is
+   *  in general more efficient than true since it gives the
+   *  merge policy more freedom to pick closely sized
+   *  segments. */
+  public void setRequireContiguousMerge(boolean v) {
+    requireContiguousMerge = v;
+  }
+
+  /** See {@link #setRequireContiguousMerge}. */
+  public boolean getRequireContiguousMerge() {
+    return requireContiguousMerge;
+  }
+
   /** <p>Returns the number of segments that are merged at
    * once and also controls the total number of segments
    * allowed to accumulate in the index.</p> */
@@ -240,9 +264,9 @@ public abstract class LogMergePolicy ext
     int start = last - 1;
     while (start >= 0) {
       SegmentInfo info = infos.info(start);
-      if (size(info) > maxMergeSize || sizeDocs(info) > maxMergeDocs) {
+      if (size(info) > maxMergeSizeForOptimize || sizeDocs(info) > maxMergeDocs) {
         if (verbose()) {
-          message("optimize: skip segment=" + info + ": size is > maxMergeSize (" + maxMergeSize + ") or sizeDocs is > maxMergeDocs (" + maxMergeDocs + ")");
+          message("optimize: skip segment=" + info + ": size is > maxMergeSize (" + maxMergeSizeForOptimize + ") or sizeDocs is > maxMergeDocs (" + maxMergeDocs + ")");
         }
         // need to skip that segment + add a merge for the 'right' segments,
         // unless there is only 1 which is optimized.
@@ -326,9 +350,12 @@ public abstract class LogMergePolicy ext
   }
   
   /** Returns the merges necessary to optimize the index.
-   *  This merge policy defines "optimized" to mean only one
-   *  segment in the index, where that segment has no
-   *  deletions pending nor separate norms, and it is in
+   *  This merge policy defines "optimized" to mean only the
+   *  requested number of segments is left in the index, and
+   *  respects the {@link #maxMergeSizeForOptimize} setting.
+   *  By default, and assuming {@code maxNumSegments=1}, only
+   *  one segment will be left in the index, where that segment
+   *  has no deletions pending nor separate norms, and it is in
    *  compound file format if the current useCompoundFile
    *  setting is true.  This method returns multiple merges
    *  (mergeFactor at a time) so the {@link MergeScheduler}
@@ -350,6 +377,8 @@ public abstract class LogMergePolicy ext
       }
       return null;
     }
+
+    // TODO: handle non-contiguous merge case differently?
     
     // Find the newest (rightmost) segment that needs to
     // be optimized (other segments may have been flushed
@@ -382,7 +411,7 @@ public abstract class LogMergePolicy ext
     boolean anyTooLarge = false;
     for (int i = 0; i < last; i++) {
       SegmentInfo info = infos.info(i);
-      if (size(info) > maxMergeSize || sizeDocs(info) > maxMergeDocs) {
+      if (size(info) > maxMergeSizeForOptimize || sizeDocs(info) > maxMergeDocs) {
         anyTooLarge = true;
         break;
       }
@@ -448,6 +477,36 @@ public abstract class LogMergePolicy ext
     return spec;
   }
 
+  private static class SegmentInfoAndLevel implements Comparable<SegmentInfoAndLevel> {
+    SegmentInfo info;
+    float level;
+    int index;
+    
+    public SegmentInfoAndLevel(SegmentInfo info, float level, int index) {
+      this.info = info;
+      this.level = level;
+      this.index = index;
+    }
+
+    // Sorts largest to smallest
+    public int compareTo(SegmentInfoAndLevel other) {
+      if (level < other.level)
+        return 1;
+      else if (level > other.level)
+        return -1;
+      else
+        return 0;
+    }
+  }
+
+  private static class SortByIndex implements Comparator<SegmentInfoAndLevel> {
+    public int compare(SegmentInfoAndLevel o1, SegmentInfoAndLevel o2) {
+      return o1.index - o2.index;
+    }
+  }
+
+  private static final SortByIndex sortByIndex = new SortByIndex();
+
   /** Checks if any merges are now necessary and returns a
    *  {@link MergePolicy.MergeSpecification} if so.  A merge
    *  is necessary when there are more than {@link
@@ -464,17 +523,37 @@ public abstract class LogMergePolicy ext
 
     // Compute levels, which is just log (base mergeFactor)
     // of the size of each segment
-    float[] levels = new float[numSegments];
+    final List<SegmentInfoAndLevel> levels = new ArrayList<SegmentInfoAndLevel>();
     final float norm = (float) Math.log(mergeFactor);
 
+    final Collection<SegmentInfo> mergingSegments = writer.get().getMergingSegments();
+
     for(int i=0;i<numSegments;i++) {
       final SegmentInfo info = infos.info(i);
       long size = size(info);
 
+      // When we require contiguous merge, we still add the
+      // segment to levels to avoid merging "across" a set
+      // of segment being merged:
+      if (!requireContiguousMerge && mergingSegments.contains(info)) {
+        if (verbose()) {
+          message("seg " + info.name + " already being merged; skip");
+        }
+        continue;
+      }
+
       // Floor tiny segments
-      if (size < 1)
+      if (size < 1) {
         size = 1;
-      levels[i] = (float) Math.log(size)/norm;
+      }
+      levels.add(new SegmentInfoAndLevel(info, (float) Math.log(size)/norm, i));
+      if (verbose()) {
+        message("seg " + info.name + " level=" + levels.get(i).level + " size=" + size);
+      }
+    }
+
+    if (!requireContiguousMerge) {
+      Collections.sort(levels);
     }
 
     final float levelFloor;
@@ -492,14 +571,16 @@ public abstract class LogMergePolicy ext
 
     MergeSpecification spec = null;
 
+    final int numMergeableSegments = levels.size();
+
     int start = 0;
-    while(start < numSegments) {
+    while(start < numMergeableSegments) {
 
       // Find max level of all segments not already
       // quantized.
-      float maxLevel = levels[start];
-      for(int i=1+start;i<numSegments;i++) {
-        final float level = levels[i];
+      float maxLevel = levels.get(start).level;
+      for(int i=1+start;i<numMergeableSegments;i++) {
+        final float level = levels.get(i).level;
         if (level > maxLevel)
           maxLevel = level;
       }
@@ -518,9 +599,9 @@ public abstract class LogMergePolicy ext
           levelBottom = levelFloor;
       }
 
-      int upto = numSegments-1;
+      int upto = numMergeableSegments-1;
       while(upto >= start) {
-        if (levels[upto] >= levelBottom) {
+        if (levels.get(upto).level >= levelBottom) {
           break;
         }
         upto--;
@@ -533,18 +614,26 @@ public abstract class LogMergePolicy ext
       while(end <= 1+upto) {
         boolean anyTooLarge = false;
         for(int i=start;i<end;i++) {
-          final SegmentInfo info = infos.info(i);
+          final SegmentInfo info = levels.get(i).info;
           anyTooLarge |= (size(info) >= maxMergeSize || sizeDocs(info) >= maxMergeDocs);
         }
 
         if (!anyTooLarge) {
           if (spec == null)
             spec = new MergeSpecification();
-          if (verbose())
+          if (verbose()) {
             message("    " + start + " to " + end + ": add this merge");
-          spec.add(new OneMerge(infos.range(start, end)));
-        } else if (verbose())
+          }
+          Collections.sort(levels.subList(start, end), sortByIndex);
+          final SegmentInfos mergeInfos = new SegmentInfos();
+          for(int i=start;i<end;i++) {
+            mergeInfos.add(levels.get(i).info);
+            assert infos.contains(levels.get(i).info);
+          }
+          spec.add(new OneMerge(mergeInfos));
+        } else if (verbose()) {
           message("    " + start + " to " + end + ": contains segment over maxMergeSize or maxMergeDocs; skipping");
+        }
 
         start = end;
         end = start + mergeFactor;
@@ -588,9 +677,11 @@ public abstract class LogMergePolicy ext
     sb.append("minMergeSize=").append(minMergeSize).append(", ");
     sb.append("mergeFactor=").append(mergeFactor).append(", ");
     sb.append("maxMergeSize=").append(maxMergeSize).append(", ");
+    sb.append("maxMergeSizeForOptimize=").append(maxMergeSizeForOptimize).append(", ");
     sb.append("calibrateSizeByDeletes=").append(calibrateSizeByDeletes).append(", ");
     sb.append("maxMergeDocs=").append(maxMergeDocs).append(", ");
-    sb.append("useCompoundFile=").append(useCompoundFile);
+    sb.append("useCompoundFile=").append(useCompoundFile).append(", ");
+    sb.append("requireContiguousMerge=").append(requireContiguousMerge);
     sb.append("]");
     return sb.toString();
   }

Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MergePolicy.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MergePolicy.java?rev=1068809&r1=1068808&r2=1068809&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MergePolicy.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MergePolicy.java Wed Feb  9 09:35:27 2011
@@ -110,7 +110,7 @@ public abstract class MergePolicy implem
       return aborted;
     }
 
-    synchronized void checkAborted(Directory dir) throws MergeAbortedException {
+    public synchronized void checkAborted(Directory dir) throws MergeAbortedException {
       if (aborted) {
         throw new MergeAbortedException("merge is aborted: " + segString(dir));
       }

Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MultiNorms.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MultiNorms.java?rev=1068809&r1=1068808&r2=1068809&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MultiNorms.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MultiNorms.java Wed Feb  9 09:35:27 2011
@@ -22,7 +22,6 @@ import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
 
-import org.apache.lucene.search.Similarity;
 import org.apache.lucene.util.ReaderUtil;
 
 /**
@@ -61,26 +60,24 @@ public class MultiNorms {
       ReaderUtil.gatherSubReaders(leaves, r);
       int end = 0;
       for (IndexReader leaf : leaves) {
+        Fields fields = leaf.fields();
+        boolean hasField = (fields != null && fields.terms(field) != null);
+        
         int start = end;
-        leaf.norms(field, norms, start);
+        byte leafNorms[] = leaf.norms(field);
+        if (leafNorms == null) {
+          if (hasField) { // omitted norms
+            return null;
+          }
+          // doesn't have field, fill bytes
+          leafNorms = new byte[leaf.maxDoc()];
+          Arrays.fill(leafNorms, (byte) 0);
+        }
+        
+        System.arraycopy(leafNorms, 0, norms, start, leafNorms.length);
         end += leaf.maxDoc();
       }
       return norms;
     }
   }
-  
-  /**
-   * Warning: this is heavy! Do not use in a loop, or implement norms()
-   * in your own reader with this (you should likely cache the result).
-   */
-  public static void norms(IndexReader r, String field, byte[] bytes, int offset)
-      throws IOException {
-    // TODO: optimize more maybe
-    byte[] norms = norms(r, field);
-    if (norms == null) {
-      Arrays.fill(bytes, offset, bytes.length, Similarity.getDefault().encodeNormValue(1.0f));
-    } else {
-      System.arraycopy(norms, 0, bytes, offset, r.maxDoc());
-    }
-  }
 }

Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MultiReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MultiReader.java?rev=1068809&r1=1068808&r2=1068809&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MultiReader.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MultiReader.java Wed Feb  9 09:35:27 2011
@@ -19,22 +19,22 @@ package org.apache.lucene.index;
 
 import java.io.IOException;
 import java.util.Collection;
-import java.util.HashMap;
 import java.util.Map;
+import java.util.concurrent.ConcurrentHashMap;
 
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.FieldSelector;
-import org.apache.lucene.search.FieldCache; // not great (circular); used only to purge FieldCache entry on close
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.ReaderUtil;
+import org.apache.lucene.util.MapBackedSet;
 
 /** An IndexReader which reads multiple indexes, appending
  *  their content. */
 public class MultiReader extends IndexReader implements Cloneable {
   protected IndexReader[] subReaders;
+  private final ReaderContext topLevelContext;
   private int[] starts;                           // 1st docno for each segment
-  private final Map<IndexReader,ReaderUtil.Slice> subReaderToSlice = new HashMap<IndexReader,ReaderUtil.Slice>();
   private boolean[] decrefOnClose;                // remember which subreaders to decRef on close
   private int maxDoc = 0;
   private int numDocs = -1;
@@ -48,7 +48,7 @@ public class MultiReader extends IndexRe
   * @param subReaders set of (sub)readers
   */
   public MultiReader(IndexReader... subReaders) throws IOException {
-    initialize(subReaders, true);
+    topLevelContext = initialize(subReaders, true);
   }
 
   /**
@@ -60,14 +60,13 @@ public class MultiReader extends IndexRe
    * @param subReaders set of (sub)readers
    */
   public MultiReader(IndexReader[] subReaders, boolean closeSubReaders) throws IOException {
-    initialize(subReaders, closeSubReaders);
+    topLevelContext = initialize(subReaders, closeSubReaders);
   }
   
-  private void initialize(IndexReader[] subReaders, boolean closeSubReaders) throws IOException {
+  private ReaderContext initialize(IndexReader[] subReaders, boolean closeSubReaders) throws IOException {
     this.subReaders =  subReaders.clone();
     starts = new int[subReaders.length + 1];    // build starts array
     decrefOnClose = new boolean[subReaders.length];
-
     for (int i = 0; i < subReaders.length; i++) {
       starts[i] = maxDoc;
       maxDoc += subReaders[i].maxDoc();      // compute maxDocs
@@ -82,14 +81,10 @@ public class MultiReader extends IndexRe
       if (subReaders[i].hasDeletions()) {
         hasDeletions = true;
       }
-
-      final ReaderUtil.Slice slice = new ReaderUtil.Slice(starts[i],
-                                                          subReaders[i].maxDoc(),
-                                                          i);
-      subReaderToSlice.put(subReaders[i], slice);
     }
-
     starts[subReaders.length] = maxDoc;
+    readerFinishedListeners = new MapBackedSet<ReaderFinishedListener>(new ConcurrentHashMap<ReaderFinishedListener,Boolean>());
+    return ReaderUtil.buildReaderContext(this);
   }
 
   @Override
@@ -98,11 +93,6 @@ public class MultiReader extends IndexRe
   }
 
   @Override
-  public int getSubReaderDocBase(IndexReader subReader) {
-    return subReaderToSlice.get(subReader).start;
-  }
-
-  @Override
   public Fields fields() throws IOException {
     throw new UnsupportedOperationException("please use MultiFields.getFields, or wrap your IndexReader with SlowMultiReaderWrapper, if you really need a top level Fields");
   }
@@ -317,12 +307,6 @@ public class MultiReader extends IndexRe
   }
 
   @Override
-  public synchronized void norms(String field, byte[] result, int offset)
-    throws IOException {
-    throw new UnsupportedOperationException("please use MultiNorms.norms, or wrap your IndexReader with SlowMultiReaderWrapper, if you really need a top level norms");
-  }
-
-  @Override
   protected void doSetNorm(int n, String field, byte value)
     throws CorruptIndexException, IOException {
     int i = readerIndex(n);                           // find segment num
@@ -363,11 +347,6 @@ public class MultiReader extends IndexRe
         subReaders[i].close();
       }
     }
-
-    // NOTE: only needed in case someone had asked for
-    // FieldCache for top-level reader (which is generally
-    // not a good idea):
-    FieldCache.DEFAULT.purge(this);
   }
   
   @Override
@@ -403,4 +382,25 @@ public class MultiReader extends IndexRe
   public IndexReader[] getSequentialSubReaders() {
     return subReaders;
   }
+  
+  @Override
+  public ReaderContext getTopReaderContext() {
+    return topLevelContext;
+  }
+
+  @Override
+  public void addReaderFinishedListener(ReaderFinishedListener listener) {
+    super.addReaderFinishedListener(listener);
+    for(IndexReader sub : subReaders) {
+      sub.addReaderFinishedListener(listener);
+    }
+  }
+
+  @Override
+  public void removeReaderFinishedListener(ReaderFinishedListener listener) {
+    super.removeReaderFinishedListener(listener);
+    for(IndexReader sub : subReaders) {
+      sub.removeReaderFinishedListener(listener);
+    }
+  }
 }

Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MultiTerms.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MultiTerms.java?rev=1068809&r1=1068808&r2=1068809&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MultiTerms.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MultiTerms.java Wed Feb  9 09:35:27 2011
@@ -77,6 +77,19 @@ public final class MultiTerms extends Te
   }
 
   @Override
+  public long getSumTotalTermFreq() throws IOException {
+    long sum = 0;
+    for(Terms terms : subs) {
+      final long v = terms.getSumTotalTermFreq();
+      if (v == -1) {
+        return -1;
+      }
+      sum += v;
+    }
+    return sum;
+  }
+
+  @Override
   public Comparator<BytesRef> getComparator() {
     return termComp;
   }

Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MultiTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MultiTermsEnum.java?rev=1068809&r1=1068808&r2=1068809&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MultiTermsEnum.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MultiTermsEnum.java Wed Feb  9 09:35:27 2011
@@ -91,13 +91,6 @@ public final class MultiTermsEnum extend
   }
 
   @Override
-  public void cacheCurrentTerm() throws IOException {
-    for(int i=0;i<numTop;i++) {
-      top[i].terms.cacheCurrentTerm();
-    }
-  }
-
-  @Override
   public Comparator<BytesRef> getComparator() {
     return termComp;
   }
@@ -264,7 +257,7 @@ public final class MultiTermsEnum extend
   }
 
   @Override
-  public int docFreq() {
+  public int docFreq() throws IOException {
     int sum = 0;
     for(int i=0;i<numTop;i++) {
       sum += top[i].terms.docFreq();
@@ -273,6 +266,19 @@ public final class MultiTermsEnum extend
   }
 
   @Override
+  public long totalTermFreq() throws IOException {
+    long sum = 0;
+    for(int i=0;i<numTop;i++) {
+      final long v = top[i].terms.totalTermFreq();
+      if (v == -1) {
+        return v;
+      }
+      sum += v;
+    }
+    return sum;
+  }
+
+  @Override
   public DocsEnum docs(Bits skipDocs, DocsEnum reuse) throws IOException {
     final MultiDocsEnum docsEnum;
     if (reuse != null) {

Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/NoMergeScheduler.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/NoMergeScheduler.java?rev=1068809&r1=1068808&r2=1068809&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/NoMergeScheduler.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/NoMergeScheduler.java Wed Feb  9 09:35:27 2011
@@ -23,7 +23,7 @@ import java.io.IOException;
  * A {@link MergeScheduler} which never executes any merges. It is also a
  * singleton and can be accessed through {@link NoMergeScheduler#INSTANCE}. Use
  * it if you want to prevent an {@link IndexWriter} from ever executing merges,
- * irregardles of the {@link MergePolicy} used. Note that you can achieve the
+ * irregardless of the {@link MergePolicy} used. Note that you can achieve the
  * same thing by using {@link NoMergePolicy}, however with
  * {@link NoMergeScheduler} you also ensure that no unnecessary code of any
  * {@link MergeScheduler} implementation is ever executed. Hence it is

Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/NormsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/NormsWriter.java?rev=1068809&r1=1068808&r2=1068809&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/NormsWriter.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/NormsWriter.java Wed Feb  9 09:35:27 2011
@@ -26,7 +26,6 @@ import java.util.List;
 import java.util.ArrayList;
 
 import org.apache.lucene.store.IndexOutput;
-import org.apache.lucene.search.Similarity;
 
 // TODO FI: norms could actually be stored as doc store
 
@@ -37,7 +36,6 @@ import org.apache.lucene.search.Similari
 
 final class NormsWriter extends InvertedDocEndConsumer {
 
-  private static final byte defaultNorm = Similarity.getDefault().encodeNormValue(1.0f);
   private FieldInfos fieldInfos;
   @Override
   public InvertedDocEndConsumerPerThread addThread(DocInverterPerThread docInverterPerThread) {
@@ -62,6 +60,10 @@ final class NormsWriter extends Inverted
 
     final Map<FieldInfo,List<NormsWriterPerField>> byField = new HashMap<FieldInfo,List<NormsWriterPerField>>();
 
+    if (!fieldInfos.hasNorms()) {
+      return;
+    }
+
     // Typically, each thread will have encountered the same
     // field.  So first we collate by field, ie, all
     // per-thread field instances that correspond to the
@@ -137,7 +139,7 @@ final class NormsWriter extends Inverted
 
             // Fill hole
             for(;upto<minDocID;upto++)
-              normsOut.writeByte(defaultNorm);
+              normsOut.writeByte((byte) 0);
 
             normsOut.writeByte(fields[minLoc].norms[uptos[minLoc]]);
             (uptos[minLoc])++;
@@ -155,12 +157,12 @@ final class NormsWriter extends Inverted
           
           // Fill final hole with defaultNorm
           for(;upto<state.numDocs;upto++)
-            normsOut.writeByte(defaultNorm);
+            normsOut.writeByte((byte) 0);
         } else if (fieldInfo.isIndexed && !fieldInfo.omitNorms) {
           normCount++;
           // Fill entire field with default norm:
           for(;upto<state.numDocs;upto++)
-            normsOut.writeByte(defaultNorm);
+            normsOut.writeByte((byte) 0);
         }
 
         assert 4+normCount*state.numDocs == normsOut.getFilePointer() : ".nrm file size mismatch: expected=" + (4+normCount*state.numDocs) + " actual=" + normsOut.getFilePointer();

Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/NormsWriterPerField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/NormsWriterPerField.java?rev=1068809&r1=1068808&r2=1068809&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/NormsWriterPerField.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/NormsWriterPerField.java Wed Feb  9 09:35:27 2011
@@ -17,8 +17,8 @@ package org.apache.lucene.index;
  * limitations under the License.
  */
 
-import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.search.Similarity;
+import org.apache.lucene.util.ArrayUtil;
 
 /** Taps into DocInverter, as an InvertedDocEndConsumer,
  *  which is called at the end of inverting each field.  We
@@ -30,7 +30,8 @@ final class NormsWriterPerField extends 
   final NormsWriterPerThread perThread;
   final FieldInfo fieldInfo;
   final DocumentsWriter.DocState docState;
-
+  final Similarity similarity;
+  
   // Holds all docID/norm pairs we've seen
   int[] docIDs = new int[1];
   byte[] norms = new byte[1];
@@ -50,6 +51,7 @@ final class NormsWriterPerField extends 
     this.fieldInfo = fieldInfo;
     docState = perThread.docState;
     fieldState = docInverterPerField.fieldState;
+    similarity = docState.similarityProvider.get(fieldInfo.name);
   }
 
   @Override
@@ -72,8 +74,8 @@ final class NormsWriterPerField extends 
         assert norms.length == upto;
         norms = ArrayUtil.grow(norms, 1+upto);
       }
-      final float norm = docState.similarity.computeNorm(fieldInfo.name, fieldState);
-      norms[upto] = Similarity.getDefault().encodeNormValue(norm);
+      final float norm = similarity.computeNorm(fieldInfo.name, fieldState);
+      norms[upto] = similarity.encodeNormValue(norm);
       docIDs[upto] = docState.docID;
       upto++;
     }