You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by si...@apache.org on 2011/02/09 10:36:03 UTC
svn commit: r1068809 [6/36] - in /lucene/dev/branches/docvalues: ./
dev-tools/eclipse/ dev-tools/idea/.idea/ dev-tools/idea/.idea/copyright/
dev-tools/idea/lucene/ dev-tools/idea/lucene/contrib/ant/
dev-tools/idea/lucene/contrib/queryparser/ dev-tools/...
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/IndexReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/IndexReader.java?rev=1068809&r1=1068808&r2=1068809&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/IndexReader.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/IndexReader.java Wed Feb 9 09:35:27 2011
@@ -19,6 +19,7 @@ package org.apache.lucene.index;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.FieldSelector;
+import org.apache.lucene.search.FieldCache; // javadocs
import org.apache.lucene.search.Similarity;
import org.apache.lucene.index.codecs.Codec;
import org.apache.lucene.index.codecs.CodecProvider;
@@ -83,6 +84,62 @@ import java.util.concurrent.atomic.Atomi
public abstract class IndexReader implements Cloneable,Closeable {
/**
+ * A custom listener that's invoked when the IndexReader
+ * is finished.
+ *
+ * <p>For a SegmentReader, this listener is called only
+ * once all SegmentReaders sharing the same core are
+ * closed. At this point it is safe for apps to evict
+ * this reader from any caches keyed on {@link
+ * #getCoreCacheKey}. This is the same interface that
+ * {@link FieldCache} uses, internally, to evict
+ * entries.</p>
+ *
+ * <p>For other readers, this listener is called when they
+ * are closed.</p>
+ *
+ * @lucene.experimental
+ */
+ public static interface ReaderFinishedListener {
+ public void finished(IndexReader reader);
+ }
+
+ // Impls must set this if they may call add/removeReaderFinishedListener:
+ protected volatile Collection<ReaderFinishedListener> readerFinishedListeners;
+
+ /** Expert: adds a {@link ReaderFinishedListener}. The
+ * provided listener is also added to any sub-readers, if
+ * this is a composite reader. Also, any reader reopened
+ * or cloned from this one will also copy the listeners at
+ * the time of reopen.
+ *
+ * @lucene.experimental */
+ public void addReaderFinishedListener(ReaderFinishedListener listener) {
+ readerFinishedListeners.add(listener);
+ }
+
+ /** Expert: remove a previously added {@link ReaderFinishedListener}.
+ *
+ * @lucene.experimental */
+ public void removeReaderFinishedListener(ReaderFinishedListener listener) {
+ readerFinishedListeners.remove(listener);
+ }
+
+ protected void notifyReaderFinishedListeners() {
+ // Defensive (should never be null -- all impls must set
+ // this):
+ if (readerFinishedListeners != null) {
+ for(ReaderFinishedListener listener : readerFinishedListeners) {
+ listener.finished(this);
+ }
+ }
+ }
+
+ protected void readerFinished() {
+ notifyReaderFinishedListeners();
+ }
+
+ /**
* Constants describing field properties, for example used for
* {@link IndexReader#getFieldNames(FieldOption)}.
*/
@@ -199,6 +256,7 @@ public abstract class IndexReader implem
refCount.incrementAndGet();
}
}
+ readerFinished();
}
}
@@ -242,24 +300,26 @@ public abstract class IndexReader implem
/**
* Open a near real time IndexReader from the {@link org.apache.lucene.index.IndexWriter}.
*
- *
* @param writer The IndexWriter to open from
+ * @param applyAllDeletes If true, all buffered deletes will
+ * be applied (made visible) in the returned reader. If
+ * false, the deletes are not applied but remain buffered
+ * (in IndexWriter) so that they will be applied in the
+ * future. Applying deletes can be costly, so if your app
+ * can tolerate deleted documents being returned you might
+ * gain some performance by passing false.
* @return The new IndexReader
* @throws CorruptIndexException
* @throws IOException if there is a low-level IO error
*
- * @see #reopen(IndexWriter)
+ * @see #reopen(IndexWriter,boolean)
*
* @lucene.experimental
*/
- public static IndexReader open(final IndexWriter writer) throws CorruptIndexException, IOException {
- return writer.getReader();
+ public static IndexReader open(final IndexWriter writer, boolean applyAllDeletes) throws CorruptIndexException, IOException {
+ return writer.getReader(applyAllDeletes);
}
-
-
-
-
/** Expert: returns an IndexReader reading the index in the given
* {@link IndexCommit}. You should pass readOnly=true, since it
* gives much better concurrent performance, unless you
@@ -305,7 +365,7 @@ public abstract class IndexReader implem
* @param readOnly true if no changes (deletions, norms) will be made with this IndexReader
* @param termInfosIndexDivisor Subsamples which indexed
* terms are loaded into RAM. This has the same effect as {@link
- * IndexWriter#setTermIndexInterval} except that setting
+ * IndexWriterConfig#setTermIndexInterval} except that setting
* must be done at indexing time while this setting can be
* set per reader. When set to N, then one in every
* N*termIndexInterval terms in the index is loaded into
@@ -355,14 +415,17 @@ public abstract class IndexReader implem
* @param readOnly true if no changes (deletions, norms) will be made with this IndexReader
* @param termInfosIndexDivisor Subsamples which indexed
* terms are loaded into RAM. This has the same effect as {@link
- * IndexWriter#setTermIndexInterval} except that setting
+ * IndexWriterConfig#setTermIndexInterval} except that setting
* must be done at indexing time while this setting can be
* set per reader. When set to N, then one in every
* N*termIndexInterval terms in the index is loaded into
* memory. By setting this to a value > 1 you can reduce
* memory usage, at the expense of higher latency when
* loading a TermInfo. The default value is 1. Set this
- * to -1 to skip loading the terms index entirely.
+ * to -1 to skip loading the terms index entirely. This is only useful in
+ * advanced situations when you will only .next() through all terms;
+ * attempts to seek will hit an exception.
+ *
* @throws CorruptIndexException if the index is corrupt
* @throws IOException if there is a low-level IO error
*/
@@ -384,7 +447,7 @@ public abstract class IndexReader implem
* @param readOnly true if no changes (deletions, norms) will be made with this IndexReader
* @param termInfosIndexDivisor Subsamples which indexed
* terms are loaded into RAM. This has the same effect as {@link
- * IndexWriter#setTermIndexInterval} except that setting
+ * IndexWriterConfig#setTermIndexInterval} except that setting
* must be done at indexing time while this setting can be
* set per reader. When set to N, then one in every
* N*termIndexInterval terms in the index is loaded into
@@ -417,7 +480,7 @@ public abstract class IndexReader implem
* @param readOnly true if no changes (deletions, norms) will be made with this IndexReader
* @param termInfosIndexDivisor Subsamples which indexed
* terms are loaded into RAM. This has the same effect as {@link
- * IndexWriter#setTermIndexInterval} except that setting
+ * IndexWriterConfig#setTermIndexInterval} except that setting
* must be done at indexing time while this setting can be
* set per reader. When set to N, then one in every
* N*termIndexInterval terms in the index is loaded into
@@ -546,7 +609,7 @@ public abstract class IndexReader implem
* file descriptors, CPU time) will be consumed.</p>
*
* <p>For lower latency on reopening a reader, you should
- * call {@link #setMergedSegmentWarmer} to
+ * call {@link IndexWriterConfig#setMergedSegmentWarmer} to
* pre-warm a newly merged segment before it's committed
* to the index. This is important for minimizing
* index-to-search delay after a large merge. </p>
@@ -561,18 +624,26 @@ public abstract class IndexReader implem
* if you attempt to reopen any of those readers, you'll
* hit an {@link AlreadyClosedException}.</p>
*
- * @lucene.experimental
- *
* @return IndexReader that covers entire index plus all
* changes made so far by this IndexWriter instance
*
+ * @param writer The IndexWriter to open from
+ * @param applyAllDeletes If true, all buffered deletes will
+ * be applied (made visible) in the returned reader. If
+ * false, the deletes are not applied but remain buffered
+ * (in IndexWriter) so that they will be applied in the
+ * future. Applying deletes can be costly, so if your app
+ * can tolerate deleted documents being returned you might
+ * gain some performance by passing false.
+ *
* @throws IOException
+ *
+ * @lucene.experimental
*/
- public IndexReader reopen(IndexWriter writer) throws CorruptIndexException, IOException {
- return writer.getReader();
+ public IndexReader reopen(IndexWriter writer, boolean applyAllDeletes) throws CorruptIndexException, IOException {
+ return writer.getReader(applyAllDeletes);
}
-
/**
* Efficiently clones the IndexReader (sharing most
* internal state).
@@ -935,14 +1006,6 @@ public abstract class IndexReader implem
*/
public abstract byte[] norms(String field) throws IOException;
- /** Reads the byte-encoded normalization factor for the named field of every
- * document. This is used by the search code to score documents.
- *
- * @see org.apache.lucene.document.Field#setBoost(float)
- */
- public abstract void norms(String field, byte[] bytes, int offset)
- throws IOException;
-
/** Expert: Resets the normalization factor for the named field of the named
* document. The norm represents the product of the field's {@link
* org.apache.lucene.document.Fieldable#setBoost(float) boost} and its {@link Similarity#lengthNorm(String,
@@ -974,26 +1037,6 @@ public abstract class IndexReader implem
protected abstract void doSetNorm(int doc, String field, byte value)
throws CorruptIndexException, IOException;
- /** Expert: Resets the normalization factor for the named field of the named
- * document.
- *
- * @see #norms(String)
- * @see Similarity#decodeNormValue(byte)
- *
- * @throws StaleReaderException if the index has changed
- * since this reader was opened
- * @throws CorruptIndexException if the index is corrupt
- * @throws LockObtainFailedException if another writer
- * has this index open (<code>write.lock</code> could not
- * be obtained)
- * @throws IOException if there is a low-level IO error
- */
- public void setNorm(int doc, String field, float value)
- throws StaleReaderException, CorruptIndexException, LockObtainFailedException, IOException {
- ensureOpen();
- setNorm(doc, field, Similarity.getDefault().encodeNormValue(value));
- }
-
/** Flex API: returns {@link Fields} for this reader.
* This method may return null if the reader has no
* postings.
@@ -1029,6 +1072,23 @@ public abstract class IndexReader implem
return terms.docFreq(term);
}
+ /** Returns the number of documents containing the term
+ * <code>t</code>. This method returns 0 if the term or
+ * field does not exists. This method does not take into
+ * account deleted documents that have not yet been merged
+ * away. */
+ public long totalTermFreq(String field, BytesRef term) throws IOException {
+ final Fields fields = fields();
+ if (fields == null) {
+ return 0;
+ }
+ final Terms terms = fields.terms(field);
+ if (terms == null) {
+ return 0;
+ }
+ return terms.totalTermFreq(term);
+ }
+
/** This may return null if the field does not exist.*/
public Terms terms(String field) throws IOException {
final Fields fields = fields();
@@ -1074,6 +1134,47 @@ public abstract class IndexReader implem
return null;
}
}
+
+ /**
+ * Returns {@link DocsEnum} for the specified field and
+ * {@link TermState}. This may return null, if either the field or the term
+ * does not exists or the {@link TermState} is invalid for the underlying
+ * implementation.*/
+ public DocsEnum termDocsEnum(Bits skipDocs, String field, BytesRef term, TermState state) throws IOException {
+ assert state != null;
+ assert field != null;
+ final Fields fields = fields();
+ if (fields == null) {
+ return null;
+ }
+ final Terms terms = fields.terms(field);
+ if (terms != null) {
+ return terms.docs(skipDocs, term, state, null);
+ } else {
+ return null;
+ }
+ }
+
+ /**
+ * Returns {@link DocsAndPositionsEnum} for the specified field and
+ * {@link TermState}. This may return null, if either the field or the term
+ * does not exists, the {@link TermState} is invalid for the underlying
+ * implementation, or positions were not stored for this term.*/
+ public DocsAndPositionsEnum termPositionsEnum(Bits skipDocs, String field, BytesRef term, TermState state) throws IOException {
+ assert state != null;
+ assert field != null;
+ final Fields fields = fields();
+ if (fields == null) {
+ return null;
+ }
+ final Terms terms = fields.terms(field);
+ if (terms != null) {
+ return terms.docsAndPositions(skipDocs, term, state, null);
+ } else {
+ return null;
+ }
+ }
+
/** Deletes the document numbered <code>docNum</code>. Once a document is
* deleted it will not appear in TermDocs or TermPositions enumerations.
@@ -1137,7 +1238,16 @@ public abstract class IndexReader implem
return n;
}
- /** Undeletes all documents currently marked as deleted in this index.
+ /** Undeletes all documents currently marked as deleted in
+ * this index.
+ *
+ * <p>NOTE: this method can only recover documents marked
+ * for deletion but not yet removed from the index; when
+ * and how Lucene removes deleted documents is an
+ * implementation detail, subject to change from release
+ * to release. However, you can use {@link
+ * #numDeletedDocs} on the current IndexReader instance to
+ * see how many documents will be un-deleted.
*
* @throws StaleReaderException if the index has changed
* since this reader was opened
@@ -1360,9 +1470,7 @@ public abstract class IndexReader implem
}
/** Expert: returns the sequential sub readers that this
- * reader is logically composed of. For example,
- * IndexSearcher uses this API to drive searching by one
- * sub reader at a time. If this reader is not composed
+ * reader is logically composed of. If this reader is not composed
* of sequential child readers, it should return null.
* If this method returns an empty array, that means this
* reader is a null reader (for example a MultiReader
@@ -1377,12 +1485,33 @@ public abstract class IndexReader implem
public IndexReader[] getSequentialSubReaders() {
return null;
}
-
-
- /** Expert: returns the docID base for this subReader. */
- public int getSubReaderDocBase(IndexReader subReader) {
- throw new UnsupportedOperationException();
- }
+
+ /**
+ * Expert: Returns a the root {@link ReaderContext} for this
+ * {@link IndexReader}'s sub-reader tree. Iff this reader is composed of sub
+ * readers ,ie. this reader being a composite reader, this method returns a
+ * {@link CompositeReaderContext} holding the reader's direct children as well as a
+ * view of the reader tree's atomic leaf contexts. All sub-
+ * {@link ReaderContext} instances referenced from this readers top-level
+ * context are private to this reader and are not shared with another context
+ * tree. For example, IndexSearcher uses this API to drive searching by one
+ * atomic leaf reader at a time. If this reader is not composed of child
+ * readers, this method returns an {@link AtomicReaderContext}.
+ * <p>
+ * Note: Any of the sub-{@link CompositeReaderContext} instances reference from this
+ * top-level context holds a <code>null</code> {@link CompositeReaderContext#leaves}
+ * reference. Only the top-level context maintains the convenience leaf-view
+ * for performance reasons.
+ * <p>
+ * NOTE: You should not try using sub-readers returned by this method to make
+ * any changes (setNorm, deleteDocument, etc.). While this might succeed for
+ * one composite reader (like MultiReader), it will most likely lead to index
+ * corruption for other readers (like DirectoryReader obtained through
+ * {@link #open}. Use the top-level context's reader directly.
+ *
+ * @lucene.experimental
+ */
+ public abstract ReaderContext getTopReaderContext();
/** Expert */
public Object getCoreCacheKey() {
@@ -1442,4 +1571,132 @@ public abstract class IndexReader implem
Fields retrieveFields() {
return fields;
}
+
+ /**
+ * A struct like class that represents a hierarchical relationship between
+ * {@link IndexReader} instances.
+ * @lucene.experimental
+ */
+ public static abstract class ReaderContext {
+ /** The reader context for this reader's immediate parent, or null if none */
+ public final ReaderContext parent;
+ /** The actual reader */
+ public final IndexReader reader;
+ /** <code>true</code> iff the reader is an atomic reader */
+ public final boolean isAtomic;
+ /** <code>true</code> if this context struct represents the top level reader within the hierarchical context */
+ public final boolean isTopLevel;
+ /** the doc base for this reader in the parent, <tt>0</tt> if parent is null */
+ public final int docBaseInParent;
+ /** the ord for this reader in the parent, <tt>0</tt> if parent is null */
+ public final int ordInParent;
+
+ ReaderContext(ReaderContext parent, IndexReader reader,
+ boolean isAtomic, int ordInParent, int docBaseInParent) {
+ this.parent = parent;
+ this.reader = reader;
+ this.isAtomic = isAtomic;
+ this.docBaseInParent = docBaseInParent;
+ this.ordInParent = ordInParent;
+ this.isTopLevel = parent==null;
+ }
+
+ /**
+ * Returns the context's leaves if this context is a top-level context
+ * otherwise <code>null</code>.
+ * <p>
+ * Note: this is convenience method since leaves can always be obtained by
+ * walking the context tree.
+ */
+ public AtomicReaderContext[] leaves() {
+ return null;
+ }
+
+ /**
+ * Returns the context's children iff this context is a composite context
+ * otherwise <code>null</code>.
+ * <p>
+ * Note: this method is a convenience method to prevent
+ * <code>instanceof</code> checks and type-casts to
+ * {@link CompositeReaderContext}.
+ */
+ public ReaderContext[] children() {
+ return null;
+ }
+ }
+
+ /**
+ * {@link ReaderContext} for composite {@link IndexReader} instance.
+ * @lucene.experimental
+ */
+ public static final class CompositeReaderContext extends ReaderContext {
+ /** the composite readers immediate children */
+ public final ReaderContext[] children;
+ /** the composite readers leaf reader contexts if this is the top level reader in this context */
+ public final AtomicReaderContext[] leaves;
+
+ /**
+ * Creates a {@link CompositeReaderContext} for intermediate readers that aren't
+ * not top-level readers in the current context
+ */
+ public CompositeReaderContext(ReaderContext parent, IndexReader reader,
+ int ordInParent, int docbaseInParent, ReaderContext[] children) {
+ this(parent, reader, ordInParent, docbaseInParent, children, null);
+ }
+
+ /**
+ * Creates a {@link CompositeReaderContext} for top-level readers with parent set to <code>null</code>
+ */
+ public CompositeReaderContext(IndexReader reader, ReaderContext[] children, AtomicReaderContext[] leaves) {
+ this(null, reader, 0, 0, children, leaves);
+ }
+
+ private CompositeReaderContext(ReaderContext parent, IndexReader reader,
+ int ordInParent, int docbaseInParent, ReaderContext[] children,
+ AtomicReaderContext[] leaves) {
+ super(parent, reader, false, ordInParent, docbaseInParent);
+ this.children = children;
+ this.leaves = leaves;
+ }
+
+ @Override
+ public AtomicReaderContext[] leaves() {
+ return leaves;
+ }
+
+
+ @Override
+ public ReaderContext[] children() {
+ return children;
+ }
+ }
+
+ /**
+ * {@link ReaderContext} for atomic {@link IndexReader} instances
+ * @lucene.experimental
+ */
+ public static final class AtomicReaderContext extends ReaderContext {
+ /** The readers ord in the top-level's leaves array */
+ public final int ord;
+ /** The readers absolute doc base */
+ public final int docBase;
+ /**
+ * Creates a new {@link AtomicReaderContext}
+ */
+ public AtomicReaderContext(ReaderContext parent, IndexReader reader,
+ int ord, int docBase, int leafOrd, int leafDocBase) {
+ super(parent, reader, true, ord, docBase);
+ assert reader.getSequentialSubReaders() == null : "Atomic readers must not have subreaders";
+ this.ord = leafOrd;
+ this.docBase = leafDocBase;
+ }
+
+ /**
+ * Creates a new {@link AtomicReaderContext} for a atomic reader without an immediate
+ * parent.
+ */
+ public AtomicReaderContext(IndexReader atomicReader) {
+ this(null, atomicReader, 0, 0, 0, 0);
+ }
+ }
}
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/IndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/IndexWriter.java?rev=1068809&r1=1068808&r2=1068809&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/IndexWriter.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/IndexWriter.java Wed Feb 9 09:35:27 2011
@@ -31,6 +31,7 @@ import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.atomic.AtomicInteger;
+import java.util.concurrent.ConcurrentHashMap;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
@@ -47,6 +48,7 @@ import org.apache.lucene.store.LockObtai
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.Constants;
import org.apache.lucene.util.ThreadInterruptedException;
+import org.apache.lucene.util.MapBackedSet;
/**
An <code>IndexWriter</code> creates and maintains an index.
@@ -214,7 +216,6 @@ public class IndexWriter implements Clos
private long lastCommitChangeCount; // last changeCount that was committed
private SegmentInfos rollbackSegmentInfos; // segmentInfos we will fallback to if the commit fails
- private HashMap<SegmentInfo,Integer> rollbackSegments;
volatile SegmentInfos pendingCommit; // set when a commit is pending (after prepareCommit() & before commit())
volatile long pendingCommitChangeCount;
@@ -250,7 +251,7 @@ public class IndexWriter implements Clos
private final AtomicInteger flushDeletesCount = new AtomicInteger();
final ReaderPool readerPool = new ReaderPool();
- final BufferedDeletes bufferedDeletes;
+ final BufferedDeletesStream bufferedDeletesStream;
// This is a "write once" variable (like the organic dye
// on a DVD-R that may or may not be heated by a laser and
@@ -270,6 +271,13 @@ public class IndexWriter implements Clos
// The PayloadProcessorProvider to use when segments are merged
private PayloadProcessorProvider payloadProcessorProvider;
+ // for testing
+ boolean anyNonBulkMerges;
+
+ IndexReader getReader() throws IOException {
+ return getReader(true);
+ }
+
/**
* Expert: returns a readonly reader, covering all
* committed as well as un-committed changes to the index.
@@ -329,9 +337,10 @@ public class IndexWriter implements Clos
*
* @throws IOException
*/
- IndexReader getReader() throws IOException {
-
+ IndexReader getReader(boolean applyAllDeletes) throws IOException {
ensureOpen();
+
+ final long tStart = System.currentTimeMillis();
if (infoStream != null) {
message("flush at getReader");
@@ -347,17 +356,27 @@ public class IndexWriter implements Clos
// just like we do when loading segments_N
IndexReader r;
synchronized(this) {
- flush(false, true);
- r = new DirectoryReader(this, segmentInfos, config.getReaderTermsIndexDivisor(), codecs);
+ flush(false, applyAllDeletes);
+ r = new DirectoryReader(this, segmentInfos, config.getReaderTermsIndexDivisor(), codecs, applyAllDeletes);
if (infoStream != null) {
message("return reader version=" + r.getVersion() + " reader=" + r);
}
}
maybeMerge();
+ if (infoStream != null) {
+ message("getReader took " + (System.currentTimeMillis() - tStart) + " msec");
+ }
return r;
}
+ // Used for all SegmentReaders we open
+ private final Collection<IndexReader.ReaderFinishedListener> readerFinishedListeners = new MapBackedSet<IndexReader.ReaderFinishedListener>(new ConcurrentHashMap<IndexReader.ReaderFinishedListener,Boolean>());
+
+ Collection<IndexReader.ReaderFinishedListener> getReaderFinishedListeners() throws IOException {
+ return readerFinishedListeners;
+ }
+
/** Holds shared SegmentReader instances. IndexWriter uses
* SegmentReaders for 1) applying deletes, 2) doing
* merges, 3) handing out a real-time reader. This pool
@@ -567,6 +586,7 @@ public class IndexWriter implements Clos
// synchronized
// Returns a ref, which we xfer to readerMap:
sr = SegmentReader.get(false, info.dir, info, readBufferSize, doOpenStores, termsIndexDivisor);
+ sr.readerFinishedListeners = readerFinishedListeners;
if (info.dir == directory) {
// Only pool if reader is not external
@@ -605,8 +625,6 @@ public class IndexWriter implements Clos
}
}
-
-
/**
* Obtain the number of deleted docs for a pooled reader.
* If the reader isn't being pooled, the segmentInfo's
@@ -662,16 +680,13 @@ public class IndexWriter implements Clos
* IndexWriter. Additionally, calling {@link #getConfig()} and changing the
* parameters does not affect that IndexWriter instance.
* <p>
- * <b>NOTE:</b> by default, {@link IndexWriterConfig#getMaxFieldLength()}
- * returns {@link IndexWriterConfig#UNLIMITED_FIELD_LENGTH}. Pay attention to
- * whether this setting fits your application.
*
* @param d
* the index directory. The index is either created or appended
* according <code>conf.getOpenMode()</code>.
* @param conf
* the configuration settings according to which IndexWriter should
- * be initalized.
+ * be initialized.
* @throws CorruptIndexException
* if the index is corrupt
* @throws LockObtainFailedException
@@ -689,7 +704,6 @@ public class IndexWriter implements Clos
directory = d;
analyzer = conf.getAnalyzer();
infoStream = defaultInfoStream;
- maxFieldLength = conf.getMaxFieldLength();
termIndexInterval = conf.getTermIndexInterval();
mergePolicy = conf.getMergePolicy();
mergePolicy.setIndexWriter(this);
@@ -697,8 +711,8 @@ public class IndexWriter implements Clos
mergedSegmentWarmer = conf.getMergedSegmentWarmer();
codecs = conf.getCodecProvider();
- bufferedDeletes = new BufferedDeletes(messageID);
- bufferedDeletes.setInfoStream(infoStream);
+ bufferedDeletesStream = new BufferedDeletesStream(messageID);
+ bufferedDeletesStream.setInfoStream(infoStream);
poolReaders = conf.getReaderPooling();
OpenMode mode = conf.getOpenMode();
@@ -719,11 +733,8 @@ public class IndexWriter implements Clos
boolean success = false;
- // TODO: we should check whether this index is too old,
- // and throw an IndexFormatTooOldExc up front, here,
- // instead of later when merge, applyDeletes, getReader
- // is attempted. I think to do this we should store the
- // oldest segment's version in segments_N.
+ // If index is too old, reading the segments will throw
+ // IndexFormatTooOldException.
segmentInfos = new SegmentInfos(codecs);
try {
if (create) {
@@ -766,9 +777,8 @@ public class IndexWriter implements Clos
setRollbackSegmentInfos(segmentInfos);
- docWriter = new DocumentsWriter(directory, this, conf.getIndexingChain(), conf.getMaxThreadStates(), getCurrentFieldInfos(), bufferedDeletes);
+ docWriter = new DocumentsWriter(directory, this, conf.getIndexingChain(), conf.getMaxThreadStates(), getCurrentFieldInfos(), bufferedDeletesStream);
docWriter.setInfoStream(infoStream);
- docWriter.setMaxFieldLength(maxFieldLength);
// Default deleter (for backwards compatibility) is
// KeepOnlyLastCommitDeleter:
@@ -854,10 +864,6 @@ public class IndexWriter implements Clos
private synchronized void setRollbackSegmentInfos(SegmentInfos infos) {
rollbackSegmentInfos = (SegmentInfos) infos.clone();
- rollbackSegments = new HashMap<SegmentInfo,Integer>();
- final int size = rollbackSegmentInfos.size();
- for(int i=0;i<size;i++)
- rollbackSegments.put(rollbackSegmentInfos.info(i), Integer.valueOf(i));
}
/**
@@ -919,7 +925,7 @@ public class IndexWriter implements Clos
this.infoStream = infoStream;
docWriter.setInfoStream(infoStream);
deleter.setInfoStream(infoStream);
- bufferedDeletes.setInfoStream(infoStream);
+ bufferedDeletesStream.setInfoStream(infoStream);
if (infoStream != null)
messageState();
}
@@ -1051,8 +1057,9 @@ public class IndexWriter implements Clos
private void closeInternal(boolean waitForMerges) throws CorruptIndexException, IOException {
try {
- if (infoStream != null)
- message("now flush at close");
+ if (infoStream != null) {
+ message("now flush at close waitForMerges=" + waitForMerges);
+ }
docWriter.close();
@@ -1164,7 +1171,7 @@ public class IndexWriter implements Clos
public synchronized boolean hasDeletions() throws IOException {
ensureOpen();
- if (bufferedDeletes.any()) {
+ if (bufferedDeletesStream.any()) {
return true;
}
if (docWriter.anyDeletions()) {
@@ -1177,25 +1184,7 @@ public class IndexWriter implements Clos
}
/**
- * The maximum number of terms that will be indexed for a single field in a
- * document. This limits the amount of memory required for indexing, so that
- * collections with very large files will not crash the indexing process by
- * running out of memory.<p/>
- * Note that this effectively truncates large documents, excluding from the
- * index terms that occur further in the document. If you know your source
- * documents are large, be sure to set this value high enough to accommodate
- * the expected size. If you set it to Integer.MAX_VALUE, then the only limit
- * is your memory, but you should anticipate an OutOfMemoryError.<p/>
- * By default, no more than 10,000 terms will be indexed for a field.
- *
- * @see MaxFieldLength
- */
- private int maxFieldLength;
-
- /**
- * Adds a document to this index. If the document contains more than
- * {@link IndexWriterConfig#setMaxFieldLength(int)} terms for a given field,
- * the remainder are discarded.
+ * Adds a document to this index.
*
* <p> Note that if an Exception is hit (for example disk full)
* then the index will be consistent, but this document
@@ -1242,9 +1231,7 @@ public class IndexWriter implements Clos
/**
* Adds a document to this index, using the provided analyzer instead of the
- * value of {@link #getAnalyzer()}. If the document contains more than
- * {@link IndexWriterConfig#setMaxFieldLength(int)} terms for a given field, the remainder are
- * discarded.
+ * value of {@link #getAnalyzer()}.
*
* <p>See {@link #addDocument(Document)} for details on
* index and IndexWriter state after an Exception, and
@@ -1533,6 +1520,11 @@ public class IndexWriter implements Clos
* you should immediately close the writer. See <a
* href="#OOME">above</a> for details.</p>
*
+ * <p><b>NOTE</b>: if you call {@link #close(boolean)}
+ * with <tt>false</tt>, which aborts all running merges,
+ * then any thread still running this method might hit a
+ * {@link MergePolicy.MergeAbortedException}.
+ *
* @throws CorruptIndexException if the index is corrupt
* @throws IOException if there is a low-level IO error
* @see MergePolicy#findMergesForOptimize
@@ -1682,6 +1674,11 @@ public class IndexWriter implements Clos
* <p><b>NOTE</b>: if this method hits an OutOfMemoryError
* you should immediately close the writer. See <a
* href="#OOME">above</a> for details.</p>
+ *
+ * <p><b>NOTE</b>: if you call {@link #close(boolean)}
+ * with <tt>false</tt>, which aborts all running merges,
+ * then any thread still running this method might hit a
+ * {@link MergePolicy.MergeAbortedException}.
*/
public void expungeDeletes(boolean doWait)
throws CorruptIndexException, IOException {
@@ -1832,6 +1829,18 @@ public class IndexWriter implements Clos
}
}
+ /** Expert: to be used by a {@link MergePolicy} to avoid
+ * selecting merges for segments already being merged.
+ * The returned collection is not cloned, and thus is
+ * only safe to access if you hold IndexWriter's lock
+ * (which you do when IndexWriter invokes the
+ * MergePolicy).
+ *
+ * <p>Do not alter the returned collection! */
+ public synchronized Collection<SegmentInfo> getMergingSegments() {
+ return mergingSegments;
+ }
+
/** Expert: the {@link MergeScheduler} calls this method
* to retrieve the next merge requested by the
* MergePolicy */
@@ -1889,7 +1898,7 @@ public class IndexWriter implements Clos
mergePolicy.close();
mergeScheduler.close();
- bufferedDeletes.clear();
+ bufferedDeletesStream.clear();
synchronized(this) {
@@ -1952,8 +1961,9 @@ public class IndexWriter implements Clos
*
* <p>NOTE: this method will forcefully abort all merges
* in progress. If other threads are running {@link
- * #optimize()} or any of the addIndexes methods, they
- * will receive {@link MergePolicy.MergeAbortedException}s.
+ * #optimize()}, {@link #addIndexes(IndexReader[])} or
+ * {@link #expungeDeletes} methods, they may receive
+ * {@link MergePolicy.MergeAbortedException}s.
*/
public synchronized void deleteAll() throws IOException {
try {
@@ -2042,12 +2052,19 @@ public class IndexWriter implements Clos
* will have completed once this method completes.</p>
*/
public synchronized void waitForMerges() {
+ if (infoStream != null) {
+ message("waitForMerges");
+ }
while(pendingMerges.size() > 0 || runningMerges.size() > 0) {
doWait();
}
// sanity check
assert 0 == mergingSegments.size();
+
+ if (infoStream != null) {
+ message("waitForMerges done");
+ }
}
/**
@@ -2226,6 +2243,11 @@ public class IndexWriter implements Clos
* you should immediately close the writer. See <a
* href="#OOME">above</a> for details.</p>
*
+ * <p><b>NOTE</b>: if you call {@link #close(boolean)}
+ * with <tt>false</tt>, which aborts all running merges,
+ * then any thread still running this method might hit a
+ * {@link MergePolicy.MergeAbortedException}.
+ *
* @throws CorruptIndexException if the index is corrupt
* @throws IOException if there is a low-level IO error
*/
@@ -2453,13 +2475,13 @@ public class IndexWriter implements Clos
}
/**
- * Flush all in-memory buffered udpates (adds and deletes)
+ * Flush all in-memory buffered updates (adds and deletes)
* to the Directory.
* @param triggerMerge if true, we may merge segments (if
* deletes or docs were flushed) if necessary
- * @param flushDeletes whether pending deletes should also
+ * @param applyAllDeletes whether pending deletes should also
*/
- protected final void flush(boolean triggerMerge, boolean flushDeletes) throws CorruptIndexException, IOException {
+ protected final void flush(boolean triggerMerge, boolean applyAllDeletes) throws CorruptIndexException, IOException {
// NOTE: this method cannot be sync'd because
// maybeMerge() in turn calls mergeScheduler.merge which
@@ -2470,7 +2492,7 @@ public class IndexWriter implements Clos
// We can be called during close, when closing==true, so we must pass false to ensureOpen:
ensureOpen(false);
- if (doFlush(flushDeletes) && triggerMerge) {
+ if (doFlush(applyAllDeletes) && triggerMerge) {
maybeMerge();
}
}
@@ -2519,10 +2541,10 @@ public class IndexWriter implements Clos
// tiny segments:
if (flushControl.getFlushDeletes() ||
(config.getRAMBufferSizeMB() != IndexWriterConfig.DISABLE_AUTO_FLUSH &&
- bufferedDeletes.bytesUsed() > (1024*1024*config.getRAMBufferSizeMB()/2))) {
+ bufferedDeletesStream.bytesUsed() > (1024*1024*config.getRAMBufferSizeMB()/2))) {
applyAllDeletes = true;
if (infoStream != null) {
- message("force apply deletes bytesUsed=" + bufferedDeletes.bytesUsed() + " vs ramBuffer=" + (1024*1024*config.getRAMBufferSizeMB()));
+ message("force apply deletes bytesUsed=" + bufferedDeletesStream.bytesUsed() + " vs ramBuffer=" + (1024*1024*config.getRAMBufferSizeMB()));
}
}
}
@@ -2532,12 +2554,15 @@ public class IndexWriter implements Clos
message("apply all deletes during flush");
}
flushDeletesCount.incrementAndGet();
- if (bufferedDeletes.applyDeletes(readerPool, segmentInfos, segmentInfos)) {
+ final BufferedDeletesStream.ApplyDeletesResult result = bufferedDeletesStream.applyDeletes(readerPool, segmentInfos);
+ if (result.anyDeletes) {
checkpoint();
}
+ bufferedDeletesStream.prune(segmentInfos);
+ assert !bufferedDeletesStream.any();
flushControl.clearDeletes();
} else if (infoStream != null) {
- message("don't apply deletes now delTermCount=" + bufferedDeletes.numTerms() + " bytesUsed=" + bufferedDeletes.bytesUsed());
+ message("don't apply deletes now delTermCount=" + bufferedDeletesStream.numTerms() + " bytesUsed=" + bufferedDeletesStream.bytesUsed());
}
doAfterFlush();
@@ -2563,7 +2588,7 @@ public class IndexWriter implements Clos
*/
public final long ramSizeInBytes() {
ensureOpen();
- return docWriter.bytesUsed() + bufferedDeletes.bytesUsed();
+ return docWriter.bytesUsed() + bufferedDeletesStream.bytesUsed();
}
/** Expert: Return the number of documents currently
@@ -2573,28 +2598,12 @@ public class IndexWriter implements Clos
return docWriter.getNumDocs();
}
- private int ensureContiguousMerge(MergePolicy.OneMerge merge) {
-
- int first = segmentInfos.indexOf(merge.segments.info(0));
- if (first == -1)
- throw new MergePolicy.MergeException("could not find segment " + merge.segments.info(0).name + " in current index " + segString(), directory);
-
- final int numSegments = segmentInfos.size();
-
- final int numSegmentsToMerge = merge.segments.size();
- for(int i=0;i<numSegmentsToMerge;i++) {
- final SegmentInfo info = merge.segments.info(i);
-
- if (first + i >= numSegments || !segmentInfos.info(first+i).equals(info)) {
- if (segmentInfos.indexOf(info) == -1)
- throw new MergePolicy.MergeException("MergePolicy selected a segment (" + info.name + ") that is not in the current index " + segString(), directory);
- else
- throw new MergePolicy.MergeException("MergePolicy selected non-contiguous segments to merge (" + merge.segString(directory) + " vs " + segString() + "), which IndexWriter (currently) cannot handle",
- directory);
+ private void ensureValidMerge(MergePolicy.OneMerge merge) {
+ for(SegmentInfo info : merge.segments) {
+ if (segmentInfos.indexOf(info) == -1) {
+ throw new MergePolicy.MergeException("MergePolicy selected a segment (" + info.name + ") that is not in the current index " + segString(), directory);
}
}
-
- return first;
}
/** Carefully merges deletes for the segments we just
@@ -2619,9 +2628,11 @@ public class IndexWriter implements Clos
// started merging:
int docUpto = 0;
int delCount = 0;
+ long minGen = Long.MAX_VALUE;
for(int i=0; i < sourceSegments.size(); i++) {
SegmentInfo info = sourceSegments.info(i);
+ minGen = Math.min(info.getBufferedDeletesGen(), minGen);
int docCount = info.docCount;
SegmentReader previousReader = merge.readersClone[i];
final Bits prevDelDocs = previousReader.getDeletedDocs();
@@ -2672,9 +2683,17 @@ public class IndexWriter implements Clos
assert mergedReader.numDeletedDocs() == delCount;
mergedReader.hasChanges = delCount > 0;
+
+ // If new deletes were applied while we were merging
+ // (which happens if eg commit() or getReader() is
+ // called during our merge), then it better be the case
+ // that the delGen has increased for all our merged
+ // segments:
+ assert !mergedReader.hasChanges || minGen > mergedReader.getSegmentInfo().getBufferedDeletesGen();
+
+ mergedReader.getSegmentInfo().setBufferedDeletesGen(minGen);
}
- /* FIXME if we want to support non-contiguous segment merges */
synchronized private boolean commitMerge(MergePolicy.OneMerge merge, SegmentReader mergedReader) throws IOException {
assert testPoint("startCommitMerge");
@@ -2700,7 +2719,7 @@ public class IndexWriter implements Clos
return false;
}
- final int start = ensureContiguousMerge(merge);
+ ensureValidMerge(merge);
commitMergedDeletes(merge, mergedReader);
@@ -2710,10 +2729,32 @@ public class IndexWriter implements Clos
// format as well:
setMergeDocStoreIsCompoundFile(merge);
- segmentInfos.subList(start, start + merge.segments.size()).clear();
assert !segmentInfos.contains(merge.info);
- segmentInfos.add(start, merge.info);
-
+
+ final Set mergedAway = new HashSet<SegmentInfo>(merge.segments);
+ int segIdx = 0;
+ int newSegIdx = 0;
+ boolean inserted = false;
+ final int curSegCount = segmentInfos.size();
+ while(segIdx < curSegCount) {
+ final SegmentInfo info = segmentInfos.info(segIdx++);
+ if (mergedAway.contains(info)) {
+ if (!inserted) {
+ segmentInfos.set(segIdx-1, merge.info);
+ inserted = true;
+ newSegIdx++;
+ }
+ } else {
+ segmentInfos.set(newSegIdx++, info);
+ }
+ }
+ assert newSegIdx == curSegCount - merge.segments.size() + 1;
+ segmentInfos.subList(newSegIdx, segmentInfos.size()).clear();
+
+ if (infoStream != null) {
+ message("after commit: " + segString());
+ }
+
closeMergeReaders(merge, false);
// Must note the change to segmentInfos so any commits
@@ -2725,16 +2766,12 @@ public class IndexWriter implements Clos
// disk, updating SegmentInfo, etc.:
readerPool.clear(merge.segments);
- // remove pending deletes of the segments
- // that were merged, moving them onto the segment just
- // before the merged segment
- // Lock order: IW -> BD
- bufferedDeletes.commitMerge(merge);
-
if (merge.optimize) {
// cascade the optimize:
segmentsToOptimize.add(merge.info);
}
+
+
return true;
}
@@ -2862,7 +2899,7 @@ public class IndexWriter implements Clos
}
}
- ensureContiguousMerge(merge);
+ ensureValidMerge(merge);
pendingMerges.add(merge);
@@ -2889,10 +2926,6 @@ public class IndexWriter implements Clos
final synchronized void mergeInit(MergePolicy.OneMerge merge) throws IOException {
boolean success = false;
try {
- // Lock order: IW -> BD
- if (bufferedDeletes.applyDeletes(readerPool, segmentInfos, merge.segments)) {
- checkpoint();
- }
_mergeInit(merge);
success = true;
} finally {
@@ -2916,6 +2949,9 @@ public class IndexWriter implements Clos
throw new IllegalStateException("this writer hit an OutOfMemoryError; cannot merge");
}
+ // TODO: is there any perf benefit to sorting
+ // merged segments? eg biggest to smallest?
+
if (merge.info != null)
// mergeInit already done
return;
@@ -2928,6 +2964,17 @@ public class IndexWriter implements Clos
// names.
merge.info = new SegmentInfo(newSegmentName(), 0, directory, false, false, null, false);
+ // Lock order: IW -> BD
+ final BufferedDeletesStream.ApplyDeletesResult result = bufferedDeletesStream.applyDeletes(readerPool, merge.segments);
+ if (result.anyDeletes) {
+ checkpoint();
+ }
+
+ merge.info.setBufferedDeletesGen(result.gen);
+
+ // Lock order: IW -> BD
+ bufferedDeletesStream.prune(segmentInfos);
+
Map<String,String> details = new HashMap<String,String>();
details.put("optimize", Boolean.toString(merge.optimize));
details.put("mergeFactor", Integer.toString(merge.segments.size()));
@@ -3115,6 +3162,7 @@ public class IndexWriter implements Clos
message("merge segmentCodecs=" + merger.getSegmentCodecs());
message("merge store matchedCount=" + merger.getMatchedSubReaderCount() + " vs " + numSegments);
}
+ anyNonBulkMerges |= merger.getMatchedSubReaderCount() != numSegments;
assert mergedDocCount == totDocCount;
@@ -3280,7 +3328,7 @@ public class IndexWriter implements Clos
// NOTE: the callers of this method should in theory
// be able to do simply wait(), but, as a defense
// against thread timing hazards where notifyAll()
- // falls to be called, we wait for at most 1 second
+ // fails to be called, we wait for at most 1 second
// and then return so caller can check if wait
// conditions are satisfied:
try {
@@ -3290,6 +3338,15 @@ public class IndexWriter implements Clos
}
}
+ private boolean keepFullyDeletedSegments;
+
+ /** Only for testing.
+ *
+ * @lucene.internal */
+ void keepFullyDeletedSegments() {
+ keepFullyDeletedSegments = true;
+ }
+
// called only from assert
private boolean filesExist(SegmentInfos toSync) throws IOException {
Collection<String> files = toSync.files(directory, false);
@@ -3348,6 +3405,10 @@ public class IndexWriter implements Clos
readerPool.commit();
toSync = (SegmentInfos) segmentInfos.clone();
+ if (!keepFullyDeletedSegments) {
+ toSync.pruneDeletedSegments();
+ }
+
assert filesExist(toSync);
if (commitUserData != null)
@@ -3477,7 +3538,7 @@ public class IndexWriter implements Clos
}
synchronized boolean nrtIsCurrent(SegmentInfos infos) {
- return infos.version == segmentInfos.version && !docWriter.anyChanges() && !bufferedDeletes.any();
+ return infos.version == segmentInfos.version && !docWriter.anyChanges() && !bufferedDeletesStream.any();
}
synchronized boolean isClosed() {
@@ -3644,7 +3705,7 @@ public class IndexWriter implements Clos
final double ramBufferSizeMB = config.getRAMBufferSizeMB();
if (ramBufferSizeMB != IndexWriterConfig.DISABLE_AUTO_FLUSH) {
final long limit = (long) (ramBufferSizeMB*1024*1024);
- long used = bufferedDeletes.bytesUsed() + docWriter.bytesUsed();
+ long used = bufferedDeletesStream.bytesUsed() + docWriter.bytesUsed();
if (used >= limit) {
// DocumentsWriter may be able to free up some
@@ -3652,7 +3713,7 @@ public class IndexWriter implements Clos
// Lock order: FC -> DW
docWriter.balanceRAM();
- used = bufferedDeletes.bytesUsed() + docWriter.bytesUsed();
+ used = bufferedDeletesStream.bytesUsed() + docWriter.bytesUsed();
if (used >= limit) {
return setFlushPending("ram full: " + reason, false);
}
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/IndexWriterConfig.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/IndexWriterConfig.java?rev=1068809&r1=1068808&r2=1068809&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/IndexWriterConfig.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/IndexWriterConfig.java Wed Feb 9 09:35:27 2011
@@ -21,7 +21,8 @@ import org.apache.lucene.analysis.Analyz
import org.apache.lucene.index.DocumentsWriter.IndexingChain;
import org.apache.lucene.index.IndexWriter.IndexReaderWarmer;
import org.apache.lucene.index.codecs.CodecProvider;
-import org.apache.lucene.search.Similarity;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.SimilarityProvider;
import org.apache.lucene.util.Version;
/**
@@ -41,8 +42,6 @@ import org.apache.lucene.util.Version;
*/
public final class IndexWriterConfig implements Cloneable {
- public static final int UNLIMITED_FIELD_LENGTH = Integer.MAX_VALUE;
-
/**
* Specifies the open mode for {@link IndexWriter}:
* <ul>
@@ -55,7 +54,7 @@ public final class IndexWriterConfig imp
public static enum OpenMode { CREATE, APPEND, CREATE_OR_APPEND }
/** Default value is 32. Change using {@link #setTermIndexInterval(int)}. */
- public static final int DEFAULT_TERM_INDEX_INTERVAL = 32; // TODO: this should be private to the codec, not settable here
+ public static final int DEFAULT_TERM_INDEX_INTERVAL = 32; // TODO: this should be private to the codec, not settable here
/** Denotes a flush trigger is disabled. */
public final static int DISABLE_AUTO_FLUSH = -1;
@@ -113,8 +112,7 @@ public final class IndexWriterConfig imp
private IndexDeletionPolicy delPolicy;
private IndexCommit commit;
private OpenMode openMode;
- private int maxFieldLength;
- private Similarity similarity;
+ private SimilarityProvider similarityProvider;
private int termIndexInterval; // TODO: this should be private to the codec, not settable here
private MergeScheduler mergeScheduler;
private long writeLockTimeout;
@@ -145,8 +143,7 @@ public final class IndexWriterConfig imp
delPolicy = new KeepOnlyLastCommitDeletionPolicy();
commit = null;
openMode = OpenMode.CREATE_OR_APPEND;
- maxFieldLength = UNLIMITED_FIELD_LENGTH;
- similarity = Similarity.getDefault();
+ similarityProvider = IndexSearcher.getDefaultSimilarityProvider();
termIndexInterval = DEFAULT_TERM_INDEX_INTERVAL; // TODO: this should be private to the codec, not settable here
mergeScheduler = new ConcurrentMergeScheduler();
writeLockTimeout = WRITE_LOCK_TIMEOUT;
@@ -220,37 +217,6 @@ public final class IndexWriterConfig imp
}
/**
- * The maximum number of terms that will be indexed for a single field in a
- * document. This limits the amount of memory required for indexing, so that
- * collections with very large files will not crash the indexing process by
- * running out of memory. This setting refers to the number of running terms,
- * not to the number of different terms.
- * <p>
- * <b>NOTE:</b> this silently truncates large documents, excluding from the
- * index all terms that occur further in the document. If you know your source
- * documents are large, be sure to set this value high enough to accomodate
- * the expected size. If you set it to {@link #UNLIMITED_FIELD_LENGTH}, then
- * the only limit is your memory, but you should anticipate an
- * OutOfMemoryError.
- * <p>
- * By default it is set to {@link #UNLIMITED_FIELD_LENGTH}.
- */
- public IndexWriterConfig setMaxFieldLength(int maxFieldLength) {
- this.maxFieldLength = maxFieldLength;
- return this;
- }
-
- /**
- * Returns the maximum number of terms that will be indexed for a single field
- * in a document.
- *
- * @see #setMaxFieldLength(int)
- */
- public int getMaxFieldLength() {
- return maxFieldLength;
- }
-
- /**
* Expert: allows to open a certain commit point. The default is null which
* opens the latest commit point.
*/
@@ -269,25 +235,22 @@ public final class IndexWriterConfig imp
}
/**
- * Expert: set the {@link Similarity} implementation used by this IndexWriter.
+ * Expert: set the {@link SimilarityProvider} implementation used by this IndexWriter.
* <p>
- * <b>NOTE:</b> the similarity cannot be null. If <code>null</code> is passed,
- * the similarity will be set to the default.
- *
- * @see Similarity#setDefault(Similarity)
+ * <b>NOTE:</b> the similarity provider cannot be null. If <code>null</code> is passed,
+ * the similarity provider will be set to the default implementation (unspecified).
*/
- public IndexWriterConfig setSimilarity(Similarity similarity) {
- this.similarity = similarity == null ? Similarity.getDefault() : similarity;
+ public IndexWriterConfig setSimilarityProvider(SimilarityProvider similarityProvider) {
+ this.similarityProvider = similarityProvider == null ? IndexSearcher.getDefaultSimilarityProvider() : similarityProvider;
return this;
}
/**
- * Expert: returns the {@link Similarity} implementation used by this
- * IndexWriter. This defaults to the current value of
- * {@link Similarity#getDefault()}.
+ * Expert: returns the {@link SimilarityProvider} implementation used by this
+ * IndexWriter.
*/
- public Similarity getSimilarity() {
- return similarity;
+ public SimilarityProvider getSimilarityProvider() {
+ return similarityProvider;
}
/**
@@ -589,10 +552,13 @@ public final class IndexWriterConfig imp
/** Sets the termsIndexDivisor passed to any readers that
* IndexWriter opens, for example when applying deletes
* or creating a near-real-time reader in {@link
- * IndexWriter#getReader}. */
+ * IndexWriter#getReader}. If you pass -1, the terms index
+ * won't be loaded by the readers. This is only useful in
+ * advanced situations when you will only .next() through
+ * all terms; attempts to seek will hit an exception. */
public IndexWriterConfig setReaderTermsIndexDivisor(int divisor) {
- if (divisor <= 0) {
- throw new IllegalArgumentException("divisor must be >= 1 (got " + divisor + ")");
+ if (divisor <= 0 && divisor != -1) {
+ throw new IllegalArgumentException("divisor must be >= 1, or -1 (got " + divisor + ")");
}
readerTermsIndexDivisor = divisor;
return this;
@@ -611,8 +577,7 @@ public final class IndexWriterConfig imp
sb.append("delPolicy=").append(delPolicy.getClass().getName()).append("\n");
sb.append("commit=").append(commit == null ? "null" : commit).append("\n");
sb.append("openMode=").append(openMode).append("\n");
- sb.append("maxFieldLength=").append(maxFieldLength).append("\n");
- sb.append("similarity=").append(similarity.getClass().getName()).append("\n");
+ sb.append("similarityProvider=").append(similarityProvider.getClass().getName()).append("\n");
sb.append("termIndexInterval=").append(termIndexInterval).append("\n"); // TODO: this should be private to the codec, not settable here
sb.append("mergeScheduler=").append(mergeScheduler.getClass().getName()).append("\n");
sb.append("default WRITE_LOCK_TIMEOUT=").append(WRITE_LOCK_TIMEOUT).append("\n");
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/LogByteSizeMergePolicy.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/LogByteSizeMergePolicy.java?rev=1068809&r1=1068808&r2=1068809&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/LogByteSizeMergePolicy.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/LogByteSizeMergePolicy.java Wed Feb 9 09:35:27 2011
@@ -30,9 +30,14 @@ public class LogByteSizeMergePolicy exte
* or larger will never be merged. @see setMaxMergeMB */
public static final double DEFAULT_MAX_MERGE_MB = 2048;
+ /** Default maximum segment size. A segment of this size
+ * or larger will never be merged during optimize. @see setMaxMergeMBForOptimize */
+ public static final double DEFAULT_MAX_MERGE_MB_FOR_OPTIMIZE = Long.MAX_VALUE;
+
public LogByteSizeMergePolicy() {
minMergeSize = (long) (DEFAULT_MIN_MERGE_MB*1024*1024);
maxMergeSize = (long) (DEFAULT_MAX_MERGE_MB*1024*1024);
+ maxMergeSizeForOptimize = (long) (DEFAULT_MAX_MERGE_MB_FOR_OPTIMIZE*1024*1024);
}
@Override
@@ -63,6 +68,23 @@ public class LogByteSizeMergePolicy exte
return ((double) maxMergeSize)/1024/1024;
}
+ /** <p>Determines the largest segment (measured by total
+ * byte size of the segment's files, in MB) that may be
+ * merged with other segments during optimize. Setting
+ * it low will leave the index with more than 1 segment,
+ * even if {@link IndexWriter#optimize()} is called.*/
+ public void setMaxMergeMBForOptimize(double mb) {
+ maxMergeSizeForOptimize = (long) (mb*1024*1024);
+ }
+
+ /** Returns the largest segment (measured by total byte
+ * size of the segment's files, in MB) that may be merged
+ * with other segments during optimize.
+ * @see #setMaxMergeMBForOptimize */
+ public double getMaxMergeMBForOptimize() {
+ return ((double) maxMergeSizeForOptimize)/1024/1024;
+ }
+
/** Sets the minimum size for the lowest level segments.
* Any segments below this size are considered to be on
* the same level (even if they vary drastically in size)
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/LogDocMergePolicy.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/LogDocMergePolicy.java?rev=1068809&r1=1068808&r2=1068809&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/LogDocMergePolicy.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/LogDocMergePolicy.java Wed Feb 9 09:35:27 2011
@@ -31,9 +31,10 @@ public class LogDocMergePolicy extends L
public LogDocMergePolicy() {
minMergeSize = DEFAULT_MIN_MERGE_DOCS;
- // maxMergeSize is never used by LogDocMergePolicy; set
+ // maxMergeSize(ForOptimize) are never used by LogDocMergePolicy; set
// it to Long.MAX_VALUE to disable it
maxMergeSize = Long.MAX_VALUE;
+ maxMergeSizeForOptimize = Long.MAX_VALUE;
}
@Override
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java?rev=1068809&r1=1068808&r2=1068809&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java Wed Feb 9 09:35:27 2011
@@ -18,6 +18,11 @@ package org.apache.lucene.index;
*/
import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.List;
import java.util.Set;
/** <p>This class implements a {@link MergePolicy} that tries
@@ -63,7 +68,11 @@ public abstract class LogMergePolicy ext
protected long minMergeSize;
protected long maxMergeSize;
+ // Although the core MPs set it explicitly, we must default in case someone
+ // out there wrote his own LMP ...
+ protected long maxMergeSizeForOptimize = Long.MAX_VALUE;
protected int maxMergeDocs = DEFAULT_MAX_MERGE_DOCS;
+ protected boolean requireContiguousMerge = false;
protected double noCFSRatio = DEFAULT_NO_CFS_RATIO;
@@ -102,6 +111,21 @@ public abstract class LogMergePolicy ext
writer.get().message("LMP: " + message);
}
+ /** If true, merges must be in-order slice of the
+ * segments. If false, then the merge policy is free to
+ * pick any segments. The default is false, which is
+ * in general more efficient than true since it gives the
+ * merge policy more freedom to pick closely sized
+ * segments. */
+ public void setRequireContiguousMerge(boolean v) {
+ requireContiguousMerge = v;
+ }
+
+ /** See {@link #setRequireContiguousMerge}. */
+ public boolean getRequireContiguousMerge() {
+ return requireContiguousMerge;
+ }
+
/** <p>Returns the number of segments that are merged at
* once and also controls the total number of segments
* allowed to accumulate in the index.</p> */
@@ -240,9 +264,9 @@ public abstract class LogMergePolicy ext
int start = last - 1;
while (start >= 0) {
SegmentInfo info = infos.info(start);
- if (size(info) > maxMergeSize || sizeDocs(info) > maxMergeDocs) {
+ if (size(info) > maxMergeSizeForOptimize || sizeDocs(info) > maxMergeDocs) {
if (verbose()) {
- message("optimize: skip segment=" + info + ": size is > maxMergeSize (" + maxMergeSize + ") or sizeDocs is > maxMergeDocs (" + maxMergeDocs + ")");
+ message("optimize: skip segment=" + info + ": size is > maxMergeSize (" + maxMergeSizeForOptimize + ") or sizeDocs is > maxMergeDocs (" + maxMergeDocs + ")");
}
// need to skip that segment + add a merge for the 'right' segments,
// unless there is only 1 which is optimized.
@@ -326,9 +350,12 @@ public abstract class LogMergePolicy ext
}
/** Returns the merges necessary to optimize the index.
- * This merge policy defines "optimized" to mean only one
- * segment in the index, where that segment has no
- * deletions pending nor separate norms, and it is in
+ * This merge policy defines "optimized" to mean only the
+ * requested number of segments is left in the index, and
+ * respects the {@link #maxMergeSizeForOptimize} setting.
+ * By default, and assuming {@code maxNumSegments=1}, only
+ * one segment will be left in the index, where that segment
+ * has no deletions pending nor separate norms, and it is in
* compound file format if the current useCompoundFile
* setting is true. This method returns multiple merges
* (mergeFactor at a time) so the {@link MergeScheduler}
@@ -350,6 +377,8 @@ public abstract class LogMergePolicy ext
}
return null;
}
+
+ // TODO: handle non-contiguous merge case differently?
// Find the newest (rightmost) segment that needs to
// be optimized (other segments may have been flushed
@@ -382,7 +411,7 @@ public abstract class LogMergePolicy ext
boolean anyTooLarge = false;
for (int i = 0; i < last; i++) {
SegmentInfo info = infos.info(i);
- if (size(info) > maxMergeSize || sizeDocs(info) > maxMergeDocs) {
+ if (size(info) > maxMergeSizeForOptimize || sizeDocs(info) > maxMergeDocs) {
anyTooLarge = true;
break;
}
@@ -448,6 +477,36 @@ public abstract class LogMergePolicy ext
return spec;
}
+ private static class SegmentInfoAndLevel implements Comparable<SegmentInfoAndLevel> {
+ SegmentInfo info;
+ float level;
+ int index;
+
+ public SegmentInfoAndLevel(SegmentInfo info, float level, int index) {
+ this.info = info;
+ this.level = level;
+ this.index = index;
+ }
+
+ // Sorts largest to smallest
+ public int compareTo(SegmentInfoAndLevel other) {
+ if (level < other.level)
+ return 1;
+ else if (level > other.level)
+ return -1;
+ else
+ return 0;
+ }
+ }
+
+ private static class SortByIndex implements Comparator<SegmentInfoAndLevel> {
+ public int compare(SegmentInfoAndLevel o1, SegmentInfoAndLevel o2) {
+ return o1.index - o2.index;
+ }
+ }
+
+ private static final SortByIndex sortByIndex = new SortByIndex();
+
/** Checks if any merges are now necessary and returns a
* {@link MergePolicy.MergeSpecification} if so. A merge
* is necessary when there are more than {@link
@@ -464,17 +523,37 @@ public abstract class LogMergePolicy ext
// Compute levels, which is just log (base mergeFactor)
// of the size of each segment
- float[] levels = new float[numSegments];
+ final List<SegmentInfoAndLevel> levels = new ArrayList<SegmentInfoAndLevel>();
final float norm = (float) Math.log(mergeFactor);
+ final Collection<SegmentInfo> mergingSegments = writer.get().getMergingSegments();
+
for(int i=0;i<numSegments;i++) {
final SegmentInfo info = infos.info(i);
long size = size(info);
+ // When we require contiguous merge, we still add the
+ // segment to levels to avoid merging "across" a set
+ // of segment being merged:
+ if (!requireContiguousMerge && mergingSegments.contains(info)) {
+ if (verbose()) {
+ message("seg " + info.name + " already being merged; skip");
+ }
+ continue;
+ }
+
// Floor tiny segments
- if (size < 1)
+ if (size < 1) {
size = 1;
- levels[i] = (float) Math.log(size)/norm;
+ }
+ levels.add(new SegmentInfoAndLevel(info, (float) Math.log(size)/norm, i));
+ if (verbose()) {
+ message("seg " + info.name + " level=" + levels.get(i).level + " size=" + size);
+ }
+ }
+
+ if (!requireContiguousMerge) {
+ Collections.sort(levels);
}
final float levelFloor;
@@ -492,14 +571,16 @@ public abstract class LogMergePolicy ext
MergeSpecification spec = null;
+ final int numMergeableSegments = levels.size();
+
int start = 0;
- while(start < numSegments) {
+ while(start < numMergeableSegments) {
// Find max level of all segments not already
// quantized.
- float maxLevel = levels[start];
- for(int i=1+start;i<numSegments;i++) {
- final float level = levels[i];
+ float maxLevel = levels.get(start).level;
+ for(int i=1+start;i<numMergeableSegments;i++) {
+ final float level = levels.get(i).level;
if (level > maxLevel)
maxLevel = level;
}
@@ -518,9 +599,9 @@ public abstract class LogMergePolicy ext
levelBottom = levelFloor;
}
- int upto = numSegments-1;
+ int upto = numMergeableSegments-1;
while(upto >= start) {
- if (levels[upto] >= levelBottom) {
+ if (levels.get(upto).level >= levelBottom) {
break;
}
upto--;
@@ -533,18 +614,26 @@ public abstract class LogMergePolicy ext
while(end <= 1+upto) {
boolean anyTooLarge = false;
for(int i=start;i<end;i++) {
- final SegmentInfo info = infos.info(i);
+ final SegmentInfo info = levels.get(i).info;
anyTooLarge |= (size(info) >= maxMergeSize || sizeDocs(info) >= maxMergeDocs);
}
if (!anyTooLarge) {
if (spec == null)
spec = new MergeSpecification();
- if (verbose())
+ if (verbose()) {
message(" " + start + " to " + end + ": add this merge");
- spec.add(new OneMerge(infos.range(start, end)));
- } else if (verbose())
+ }
+ Collections.sort(levels.subList(start, end), sortByIndex);
+ final SegmentInfos mergeInfos = new SegmentInfos();
+ for(int i=start;i<end;i++) {
+ mergeInfos.add(levels.get(i).info);
+ assert infos.contains(levels.get(i).info);
+ }
+ spec.add(new OneMerge(mergeInfos));
+ } else if (verbose()) {
message(" " + start + " to " + end + ": contains segment over maxMergeSize or maxMergeDocs; skipping");
+ }
start = end;
end = start + mergeFactor;
@@ -588,9 +677,11 @@ public abstract class LogMergePolicy ext
sb.append("minMergeSize=").append(minMergeSize).append(", ");
sb.append("mergeFactor=").append(mergeFactor).append(", ");
sb.append("maxMergeSize=").append(maxMergeSize).append(", ");
+ sb.append("maxMergeSizeForOptimize=").append(maxMergeSizeForOptimize).append(", ");
sb.append("calibrateSizeByDeletes=").append(calibrateSizeByDeletes).append(", ");
sb.append("maxMergeDocs=").append(maxMergeDocs).append(", ");
- sb.append("useCompoundFile=").append(useCompoundFile);
+ sb.append("useCompoundFile=").append(useCompoundFile).append(", ");
+ sb.append("requireContiguousMerge=").append(requireContiguousMerge);
sb.append("]");
return sb.toString();
}
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MergePolicy.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MergePolicy.java?rev=1068809&r1=1068808&r2=1068809&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MergePolicy.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MergePolicy.java Wed Feb 9 09:35:27 2011
@@ -110,7 +110,7 @@ public abstract class MergePolicy implem
return aborted;
}
- synchronized void checkAborted(Directory dir) throws MergeAbortedException {
+ public synchronized void checkAborted(Directory dir) throws MergeAbortedException {
if (aborted) {
throw new MergeAbortedException("merge is aborted: " + segString(dir));
}
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MultiNorms.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MultiNorms.java?rev=1068809&r1=1068808&r2=1068809&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MultiNorms.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MultiNorms.java Wed Feb 9 09:35:27 2011
@@ -22,7 +22,6 @@ import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
-import org.apache.lucene.search.Similarity;
import org.apache.lucene.util.ReaderUtil;
/**
@@ -61,26 +60,24 @@ public class MultiNorms {
ReaderUtil.gatherSubReaders(leaves, r);
int end = 0;
for (IndexReader leaf : leaves) {
+ Fields fields = leaf.fields();
+ boolean hasField = (fields != null && fields.terms(field) != null);
+
int start = end;
- leaf.norms(field, norms, start);
+ byte leafNorms[] = leaf.norms(field);
+ if (leafNorms == null) {
+ if (hasField) { // omitted norms
+ return null;
+ }
+ // doesn't have field, fill bytes
+ leafNorms = new byte[leaf.maxDoc()];
+ Arrays.fill(leafNorms, (byte) 0);
+ }
+
+ System.arraycopy(leafNorms, 0, norms, start, leafNorms.length);
end += leaf.maxDoc();
}
return norms;
}
}
-
- /**
- * Warning: this is heavy! Do not use in a loop, or implement norms()
- * in your own reader with this (you should likely cache the result).
- */
- public static void norms(IndexReader r, String field, byte[] bytes, int offset)
- throws IOException {
- // TODO: optimize more maybe
- byte[] norms = norms(r, field);
- if (norms == null) {
- Arrays.fill(bytes, offset, bytes.length, Similarity.getDefault().encodeNormValue(1.0f));
- } else {
- System.arraycopy(norms, 0, bytes, offset, r.maxDoc());
- }
- }
}
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MultiReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MultiReader.java?rev=1068809&r1=1068808&r2=1068809&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MultiReader.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MultiReader.java Wed Feb 9 09:35:27 2011
@@ -19,22 +19,22 @@ package org.apache.lucene.index;
import java.io.IOException;
import java.util.Collection;
-import java.util.HashMap;
import java.util.Map;
+import java.util.concurrent.ConcurrentHashMap;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.FieldSelector;
-import org.apache.lucene.search.FieldCache; // not great (circular); used only to purge FieldCache entry on close
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.ReaderUtil;
+import org.apache.lucene.util.MapBackedSet;
/** An IndexReader which reads multiple indexes, appending
* their content. */
public class MultiReader extends IndexReader implements Cloneable {
protected IndexReader[] subReaders;
+ private final ReaderContext topLevelContext;
private int[] starts; // 1st docno for each segment
- private final Map<IndexReader,ReaderUtil.Slice> subReaderToSlice = new HashMap<IndexReader,ReaderUtil.Slice>();
private boolean[] decrefOnClose; // remember which subreaders to decRef on close
private int maxDoc = 0;
private int numDocs = -1;
@@ -48,7 +48,7 @@ public class MultiReader extends IndexRe
* @param subReaders set of (sub)readers
*/
public MultiReader(IndexReader... subReaders) throws IOException {
- initialize(subReaders, true);
+ topLevelContext = initialize(subReaders, true);
}
/**
@@ -60,14 +60,13 @@ public class MultiReader extends IndexRe
* @param subReaders set of (sub)readers
*/
public MultiReader(IndexReader[] subReaders, boolean closeSubReaders) throws IOException {
- initialize(subReaders, closeSubReaders);
+ topLevelContext = initialize(subReaders, closeSubReaders);
}
- private void initialize(IndexReader[] subReaders, boolean closeSubReaders) throws IOException {
+ private ReaderContext initialize(IndexReader[] subReaders, boolean closeSubReaders) throws IOException {
this.subReaders = subReaders.clone();
starts = new int[subReaders.length + 1]; // build starts array
decrefOnClose = new boolean[subReaders.length];
-
for (int i = 0; i < subReaders.length; i++) {
starts[i] = maxDoc;
maxDoc += subReaders[i].maxDoc(); // compute maxDocs
@@ -82,14 +81,10 @@ public class MultiReader extends IndexRe
if (subReaders[i].hasDeletions()) {
hasDeletions = true;
}
-
- final ReaderUtil.Slice slice = new ReaderUtil.Slice(starts[i],
- subReaders[i].maxDoc(),
- i);
- subReaderToSlice.put(subReaders[i], slice);
}
-
starts[subReaders.length] = maxDoc;
+ readerFinishedListeners = new MapBackedSet<ReaderFinishedListener>(new ConcurrentHashMap<ReaderFinishedListener,Boolean>());
+ return ReaderUtil.buildReaderContext(this);
}
@Override
@@ -98,11 +93,6 @@ public class MultiReader extends IndexRe
}
@Override
- public int getSubReaderDocBase(IndexReader subReader) {
- return subReaderToSlice.get(subReader).start;
- }
-
- @Override
public Fields fields() throws IOException {
throw new UnsupportedOperationException("please use MultiFields.getFields, or wrap your IndexReader with SlowMultiReaderWrapper, if you really need a top level Fields");
}
@@ -317,12 +307,6 @@ public class MultiReader extends IndexRe
}
@Override
- public synchronized void norms(String field, byte[] result, int offset)
- throws IOException {
- throw new UnsupportedOperationException("please use MultiNorms.norms, or wrap your IndexReader with SlowMultiReaderWrapper, if you really need a top level norms");
- }
-
- @Override
protected void doSetNorm(int n, String field, byte value)
throws CorruptIndexException, IOException {
int i = readerIndex(n); // find segment num
@@ -363,11 +347,6 @@ public class MultiReader extends IndexRe
subReaders[i].close();
}
}
-
- // NOTE: only needed in case someone had asked for
- // FieldCache for top-level reader (which is generally
- // not a good idea):
- FieldCache.DEFAULT.purge(this);
}
@Override
@@ -403,4 +382,25 @@ public class MultiReader extends IndexRe
public IndexReader[] getSequentialSubReaders() {
return subReaders;
}
+
+ @Override
+ public ReaderContext getTopReaderContext() {
+ return topLevelContext;
+ }
+
+ @Override
+ public void addReaderFinishedListener(ReaderFinishedListener listener) {
+ super.addReaderFinishedListener(listener);
+ for(IndexReader sub : subReaders) {
+ sub.addReaderFinishedListener(listener);
+ }
+ }
+
+ @Override
+ public void removeReaderFinishedListener(ReaderFinishedListener listener) {
+ super.removeReaderFinishedListener(listener);
+ for(IndexReader sub : subReaders) {
+ sub.removeReaderFinishedListener(listener);
+ }
+ }
}
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MultiTerms.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MultiTerms.java?rev=1068809&r1=1068808&r2=1068809&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MultiTerms.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MultiTerms.java Wed Feb 9 09:35:27 2011
@@ -77,6 +77,19 @@ public final class MultiTerms extends Te
}
@Override
+ public long getSumTotalTermFreq() throws IOException {
+ long sum = 0;
+ for(Terms terms : subs) {
+ final long v = terms.getSumTotalTermFreq();
+ if (v == -1) {
+ return -1;
+ }
+ sum += v;
+ }
+ return sum;
+ }
+
+ @Override
public Comparator<BytesRef> getComparator() {
return termComp;
}
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MultiTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MultiTermsEnum.java?rev=1068809&r1=1068808&r2=1068809&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MultiTermsEnum.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MultiTermsEnum.java Wed Feb 9 09:35:27 2011
@@ -91,13 +91,6 @@ public final class MultiTermsEnum extend
}
@Override
- public void cacheCurrentTerm() throws IOException {
- for(int i=0;i<numTop;i++) {
- top[i].terms.cacheCurrentTerm();
- }
- }
-
- @Override
public Comparator<BytesRef> getComparator() {
return termComp;
}
@@ -264,7 +257,7 @@ public final class MultiTermsEnum extend
}
@Override
- public int docFreq() {
+ public int docFreq() throws IOException {
int sum = 0;
for(int i=0;i<numTop;i++) {
sum += top[i].terms.docFreq();
@@ -273,6 +266,19 @@ public final class MultiTermsEnum extend
}
@Override
+ public long totalTermFreq() throws IOException {
+ long sum = 0;
+ for(int i=0;i<numTop;i++) {
+ final long v = top[i].terms.totalTermFreq();
+ if (v == -1) {
+ return v;
+ }
+ sum += v;
+ }
+ return sum;
+ }
+
+ @Override
public DocsEnum docs(Bits skipDocs, DocsEnum reuse) throws IOException {
final MultiDocsEnum docsEnum;
if (reuse != null) {
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/NoMergeScheduler.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/NoMergeScheduler.java?rev=1068809&r1=1068808&r2=1068809&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/NoMergeScheduler.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/NoMergeScheduler.java Wed Feb 9 09:35:27 2011
@@ -23,7 +23,7 @@ import java.io.IOException;
* A {@link MergeScheduler} which never executes any merges. It is also a
* singleton and can be accessed through {@link NoMergeScheduler#INSTANCE}. Use
* it if you want to prevent an {@link IndexWriter} from ever executing merges,
- * irregardles of the {@link MergePolicy} used. Note that you can achieve the
+ * irregardless of the {@link MergePolicy} used. Note that you can achieve the
* same thing by using {@link NoMergePolicy}, however with
* {@link NoMergeScheduler} you also ensure that no unnecessary code of any
* {@link MergeScheduler} implementation is ever executed. Hence it is
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/NormsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/NormsWriter.java?rev=1068809&r1=1068808&r2=1068809&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/NormsWriter.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/NormsWriter.java Wed Feb 9 09:35:27 2011
@@ -26,7 +26,6 @@ import java.util.List;
import java.util.ArrayList;
import org.apache.lucene.store.IndexOutput;
-import org.apache.lucene.search.Similarity;
// TODO FI: norms could actually be stored as doc store
@@ -37,7 +36,6 @@ import org.apache.lucene.search.Similari
final class NormsWriter extends InvertedDocEndConsumer {
- private static final byte defaultNorm = Similarity.getDefault().encodeNormValue(1.0f);
private FieldInfos fieldInfos;
@Override
public InvertedDocEndConsumerPerThread addThread(DocInverterPerThread docInverterPerThread) {
@@ -62,6 +60,10 @@ final class NormsWriter extends Inverted
final Map<FieldInfo,List<NormsWriterPerField>> byField = new HashMap<FieldInfo,List<NormsWriterPerField>>();
+ if (!fieldInfos.hasNorms()) {
+ return;
+ }
+
// Typically, each thread will have encountered the same
// field. So first we collate by field, ie, all
// per-thread field instances that correspond to the
@@ -137,7 +139,7 @@ final class NormsWriter extends Inverted
// Fill hole
for(;upto<minDocID;upto++)
- normsOut.writeByte(defaultNorm);
+ normsOut.writeByte((byte) 0);
normsOut.writeByte(fields[minLoc].norms[uptos[minLoc]]);
(uptos[minLoc])++;
@@ -155,12 +157,12 @@ final class NormsWriter extends Inverted
// Fill final hole with defaultNorm
for(;upto<state.numDocs;upto++)
- normsOut.writeByte(defaultNorm);
+ normsOut.writeByte((byte) 0);
} else if (fieldInfo.isIndexed && !fieldInfo.omitNorms) {
normCount++;
// Fill entire field with default norm:
for(;upto<state.numDocs;upto++)
- normsOut.writeByte(defaultNorm);
+ normsOut.writeByte((byte) 0);
}
assert 4+normCount*state.numDocs == normsOut.getFilePointer() : ".nrm file size mismatch: expected=" + (4+normCount*state.numDocs) + " actual=" + normsOut.getFilePointer();
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/NormsWriterPerField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/NormsWriterPerField.java?rev=1068809&r1=1068808&r2=1068809&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/NormsWriterPerField.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/NormsWriterPerField.java Wed Feb 9 09:35:27 2011
@@ -17,8 +17,8 @@ package org.apache.lucene.index;
* limitations under the License.
*/
-import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.search.Similarity;
+import org.apache.lucene.util.ArrayUtil;
/** Taps into DocInverter, as an InvertedDocEndConsumer,
* which is called at the end of inverting each field. We
@@ -30,7 +30,8 @@ final class NormsWriterPerField extends
final NormsWriterPerThread perThread;
final FieldInfo fieldInfo;
final DocumentsWriter.DocState docState;
-
+ final Similarity similarity;
+
// Holds all docID/norm pairs we've seen
int[] docIDs = new int[1];
byte[] norms = new byte[1];
@@ -50,6 +51,7 @@ final class NormsWriterPerField extends
this.fieldInfo = fieldInfo;
docState = perThread.docState;
fieldState = docInverterPerField.fieldState;
+ similarity = docState.similarityProvider.get(fieldInfo.name);
}
@Override
@@ -72,8 +74,8 @@ final class NormsWriterPerField extends
assert norms.length == upto;
norms = ArrayUtil.grow(norms, 1+upto);
}
- final float norm = docState.similarity.computeNorm(fieldInfo.name, fieldState);
- norms[upto] = Similarity.getDefault().encodeNormValue(norm);
+ final float norm = similarity.computeNorm(fieldInfo.name, fieldState);
+ norms[upto] = similarity.encodeNormValue(norm);
docIDs[upto] = docState.docID;
upto++;
}