You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by si...@apache.org on 2010/12/06 01:47:28 UTC
svn commit: r1042501 [3/13] - in /lucene/dev/branches/docvalues: ./ lucene/
lucene/contrib/
lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/
lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/
lucene/c...
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java?rev=1042501&r1=1042500&r2=1042501&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java Mon Dec 6 00:47:16 2010
@@ -315,13 +315,15 @@ final class DocumentsWriter {
}
private boolean closed;
+ private final FieldInfos fieldInfos;
- DocumentsWriter(Directory directory, IndexWriter writer, IndexingChain indexingChain, int maxThreadStates) throws IOException {
+ DocumentsWriter(Directory directory, IndexWriter writer, IndexingChain indexingChain, int maxThreadStates, FieldInfos fieldInfos) throws IOException {
this.directory = directory;
this.writer = writer;
this.similarity = writer.getConfig().getSimilarity();
this.maxThreadStates = maxThreadStates;
flushedDocCount = writer.maxDoc();
+ this.fieldInfos = fieldInfos;
consumer = indexingChain.getChain(this);
if (consumer instanceof DocFieldProcessor) {
@@ -329,10 +331,14 @@ final class DocumentsWriter {
}
}
+ public FieldInfos getFieldInfos() {
+ return fieldInfos;
+ }
+
/** Returns true if any of the fields in the current
* buffered docs have omitTermFreqAndPositions==false */
boolean hasProx() {
- return (docFieldProcessor != null) ? docFieldProcessor.fieldInfos.hasProx()
+ return (docFieldProcessor != null) ? fieldInfos.hasProx()
: true;
}
@@ -602,8 +608,8 @@ final class DocumentsWriter {
synchronized private void initFlushState(boolean onlyDocStore) {
initSegmentName(onlyDocStore);
- final SegmentCodecs info = SegmentCodecs.build(docFieldProcessor.fieldInfos, writer.codecs);
- flushState = new SegmentWriteState(infoStream, directory, segment, docFieldProcessor.fieldInfos,
+ final SegmentCodecs info = SegmentCodecs.build(fieldInfos, writer.codecs);
+ flushState = new SegmentWriteState(infoStream, directory, segment, fieldInfos,
docStoreSegment, numDocsInRAM, numDocsInStore, writer.getConfig().getTermIndexInterval(), info, bytesUsed);
}
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java?rev=1042501&r1=1042500&r2=1042501&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java Mon Dec 6 00:47:16 2010
@@ -291,7 +291,7 @@ public class FilterIndexReader extends I
@Override
public Bits getDeletedDocs() {
- return MultiFields.getDeletedDocs(in);
+ return in.getDeletedDocs();
}
@Override
@@ -427,12 +427,12 @@ public class FilterIndexReader extends I
@Override
public IndexReader[] getSequentialSubReaders() {
- return null;
+ return in.getSequentialSubReaders();
}
@Override
public Fields fields() throws IOException {
- return MultiFields.getFields(in);
+ return in.fields();
}
/** If the subclass of FilteredIndexReader modifies the
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/IndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/IndexWriter.java?rev=1042501&r1=1042500&r2=1042501&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/IndexWriter.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/IndexWriter.java Mon Dec 6 00:47:16 2010
@@ -21,7 +21,6 @@ import org.apache.lucene.analysis.Analyz
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.PayloadProcessorProvider.DirPayloadProcessor;
-import org.apache.lucene.search.Similarity;
import org.apache.lucene.search.Query;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.Lock;
@@ -31,7 +30,6 @@ import org.apache.lucene.store.BufferedI
import org.apache.lucene.util.Constants;
import org.apache.lucene.index.codecs.CodecProvider;
import org.apache.lucene.util.ThreadInterruptedException;
-import org.apache.lucene.util.Version;
import org.apache.lucene.util.Bits;
import java.io.IOException;
@@ -180,69 +178,12 @@ import java.util.Date;
* keeps track of the last non commit checkpoint.
*/
public class IndexWriter implements Closeable {
-
- /**
- * Default value for the write lock timeout (1,000).
- * @see #setDefaultWriteLockTimeout
- * @deprecated use {@link IndexWriterConfig#WRITE_LOCK_TIMEOUT} instead
- */
- @Deprecated
- public static long WRITE_LOCK_TIMEOUT = IndexWriterConfig.WRITE_LOCK_TIMEOUT;
-
- private long writeLockTimeout;
-
/**
* Name of the write lock in the index.
*/
public static final String WRITE_LOCK_NAME = "write.lock";
/**
- * Value to denote a flush trigger is disabled
- * @deprecated use {@link IndexWriterConfig#DISABLE_AUTO_FLUSH} instead
- */
- @Deprecated
- public final static int DISABLE_AUTO_FLUSH = IndexWriterConfig.DISABLE_AUTO_FLUSH;
-
- /**
- * Disabled by default (because IndexWriter flushes by RAM usage
- * by default). Change using {@link #setMaxBufferedDocs(int)}.
- * @deprecated use {@link IndexWriterConfig#DEFAULT_MAX_BUFFERED_DOCS} instead.
- */
- @Deprecated
- public final static int DEFAULT_MAX_BUFFERED_DOCS = IndexWriterConfig.DEFAULT_MAX_BUFFERED_DOCS;
-
- /**
- * Default value is 16 MB (which means flush when buffered
- * docs consume 16 MB RAM). Change using {@link #setRAMBufferSizeMB}.
- * @deprecated use {@link IndexWriterConfig#DEFAULT_RAM_BUFFER_SIZE_MB} instead.
- */
- @Deprecated
- public final static double DEFAULT_RAM_BUFFER_SIZE_MB = IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB;
-
- /**
- * Disabled by default (because IndexWriter flushes by RAM usage
- * by default). Change using {@link #setMaxBufferedDeleteTerms(int)}.
- * @deprecated use {@link IndexWriterConfig#DEFAULT_MAX_BUFFERED_DELETE_TERMS} instead
- */
- @Deprecated
- public final static int DEFAULT_MAX_BUFFERED_DELETE_TERMS = IndexWriterConfig.DEFAULT_MAX_BUFFERED_DELETE_TERMS;
-
- /**
- * Default value is 10,000. Change using {@link #setMaxFieldLength(int)}.
- *
- * @deprecated see {@link IndexWriterConfig}
- */
- @Deprecated
- public final static int DEFAULT_MAX_FIELD_LENGTH = 10000;
-
- /**
- * Default value is 128. Change using {@link #setTermIndexInterval(int)}.
- * @deprecated use {@link IndexWriterConfig#DEFAULT_TERM_INDEX_INTERVAL} instead.
- */
- @Deprecated
- public final static int DEFAULT_TERM_INDEX_INTERVAL = IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL;
-
- /**
* Absolute hard maximum length for a term, in bytes once
* encoded as UTF8. If a term arrives from the analyzer
* longer than this length, it is skipped and a message is
@@ -268,9 +209,6 @@ public class IndexWriter implements Clos
private final Directory directory; // where this index resides
private final Analyzer analyzer; // how to analyze text
- // TODO 4.0: this should be made final once the setter is out
- private /*final*/Similarity similarity = Similarity.getDefault(); // how to normalize
-
private volatile long changeCount; // increments every time a change is completed
private long lastCommitChangeCount; // last changeCount that was committed
@@ -290,8 +228,7 @@ public class IndexWriter implements Clos
private Lock writeLock;
- // TODO 4.0: this should be made final once the setter is out
- private /*final*/int termIndexInterval;
+ private final int termIndexInterval;
private boolean closed;
private boolean closing;
@@ -301,8 +238,7 @@ public class IndexWriter implements Clos
private HashSet<SegmentInfo> mergingSegments = new HashSet<SegmentInfo>();
private MergePolicy mergePolicy;
- // TODO 4.0: this should be made final once the setter is removed
- private /*final*/MergeScheduler mergeScheduler;
+ private final MergeScheduler mergeScheduler;
private LinkedList<MergePolicy.OneMerge> pendingMerges = new LinkedList<MergePolicy.OneMerge>();
private Set<MergePolicy.OneMerge> runningMerges = new HashSet<MergePolicy.OneMerge>();
private List<MergePolicy.OneMerge> mergeExceptions = new ArrayList<MergePolicy.OneMerge>();
@@ -719,287 +655,6 @@ public class IndexWriter implements Clos
this.infoStream = infoStream;
}
- /**
- * Casts current mergePolicy to LogMergePolicy, and throws
- * an exception if the mergePolicy is not a LogMergePolicy.
- */
- private LogMergePolicy getLogMergePolicy() {
- if (mergePolicy instanceof LogMergePolicy)
- return (LogMergePolicy) mergePolicy;
- else
- throw new IllegalArgumentException("this method can only be called when the merge policy is the default LogMergePolicy");
- }
-
- /** <p>Get the current setting of whether newly flushed
- * segments will use the compound file format. Note that
- * this just returns the value previously set with
- * setUseCompoundFile(boolean), or the default value
- * (true). You cannot use this to query the status of
- * previously flushed segments.</p>
- *
- * <p>Note that this method is a convenience method: it
- * just calls mergePolicy.getUseCompoundFile as long as
- * mergePolicy is an instance of {@link LogMergePolicy}.
- * Otherwise an IllegalArgumentException is thrown.</p>
- *
- * @see #setUseCompoundFile(boolean)
- * @deprecated use {@link LogMergePolicy#getUseCompoundDocStore()} and
- * {@link LogMergePolicy#getUseCompoundFile()} directly.
- */
- @Deprecated
- public boolean getUseCompoundFile() {
- return getLogMergePolicy().getUseCompoundFile();
- }
-
- /**
- * <p>
- * Setting to turn on usage of a compound file. When on, multiple files for
- * each segment are merged into a single file when a new segment is flushed.
- * </p>
- *
- * <p>
- * Note that this method is a convenience method: it just calls
- * mergePolicy.setUseCompoundFile as long as mergePolicy is an instance of
- * {@link LogMergePolicy}. Otherwise an IllegalArgumentException is thrown.
- * </p>
- *
- * @deprecated use {@link LogMergePolicy#setUseCompoundDocStore(boolean)} and
- * {@link LogMergePolicy#setUseCompoundFile(boolean)} directly.
- * Note that this method set the given value on both, therefore
- * you should consider doing the same.
- */
- @Deprecated
- public void setUseCompoundFile(boolean value) {
- getLogMergePolicy().setUseCompoundFile(value);
- getLogMergePolicy().setUseCompoundDocStore(value);
- }
-
- /** Expert: Set the Similarity implementation used by this IndexWriter.
- *
- * @see Similarity#setDefault(Similarity)
- * @deprecated use {@link IndexWriterConfig#setSimilarity(Similarity)} instead
- */
- @Deprecated
- public void setSimilarity(Similarity similarity) {
- ensureOpen();
- this.similarity = similarity;
- docWriter.setSimilarity(similarity);
- // Required so config.getSimilarity returns the right value. But this will
- // go away together with the method in 4.0.
- config.setSimilarity(similarity);
- }
-
- /** Expert: Return the Similarity implementation used by this IndexWriter.
- *
- * <p>This defaults to the current value of {@link Similarity#getDefault()}.
- * @deprecated use {@link IndexWriterConfig#getSimilarity()} instead
- */
- @Deprecated
- public Similarity getSimilarity() {
- ensureOpen();
- return similarity;
- }
-
- /** Expert: Set the interval between indexed terms. Large values cause less
- * memory to be used by IndexReader, but slow random-access to terms. Small
- * values cause more memory to be used by an IndexReader, and speed
- * random-access to terms.
- *
- * This parameter determines the amount of computation required per query
- * term, regardless of the number of documents that contain that term. In
- * particular, it is the maximum number of other terms that must be
- * scanned before a term is located and its frequency and position information
- * may be processed. In a large index with user-entered query terms, query
- * processing time is likely to be dominated not by term lookup but rather
- * by the processing of frequency and positional data. In a small index
- * or when many uncommon query terms are generated (e.g., by wildcard
- * queries) term lookup may become a dominant cost.
- *
- * In particular, <code>numUniqueTerms/interval</code> terms are read into
- * memory by an IndexReader, and, on average, <code>interval/2</code> terms
- * must be scanned for each random term access.
- *
- * @see #DEFAULT_TERM_INDEX_INTERVAL
- * @deprecated use {@link IndexWriterConfig#setTermIndexInterval(int)}
- */
- @Deprecated
- public void setTermIndexInterval(int interval) {
- ensureOpen();
- this.termIndexInterval = interval;
- // Required so config.getTermIndexInterval returns the right value. But this
- // will go away together with the method in 4.0.
- config.setTermIndexInterval(interval);
- }
-
- /** Expert: Return the interval between indexed terms.
- *
- * @see #setTermIndexInterval(int)
- * @deprecated use {@link IndexWriterConfig#getTermIndexInterval()}
- */
- @Deprecated
- public int getTermIndexInterval() {
- // We pass false because this method is called by SegmentMerger while we are in the process of closing
- ensureOpen(false);
- return termIndexInterval;
- }
-
- /**
- * Constructs an IndexWriter for the index in <code>d</code>.
- * Text will be analyzed with <code>a</code>. If <code>create</code>
- * is true, then a new, empty index will be created in
- * <code>d</code>, replacing the index already there, if any.
- *
- * @param d the index directory
- * @param a the analyzer to use
- * @param create <code>true</code> to create the index or overwrite
- * the existing one; <code>false</code> to append to the existing
- * index
- * @param mfl Maximum field length in number of terms/tokens: LIMITED, UNLIMITED, or user-specified
- * via the MaxFieldLength constructor.
- * @throws CorruptIndexException if the index is corrupt
- * @throws LockObtainFailedException if another writer
- * has this index open (<code>write.lock</code> could not
- * be obtained)
- * @throws IOException if the directory cannot be read/written to, or
- * if it does not exist and <code>create</code> is
- * <code>false</code> or if there is any other low-level
- * IO error
- * @deprecated use {@link #IndexWriter(Directory, IndexWriterConfig)} instead
- */
- @Deprecated
- public IndexWriter(Directory d, Analyzer a, boolean create, MaxFieldLength mfl)
- throws CorruptIndexException, LockObtainFailedException, IOException {
- this(d, new IndexWriterConfig(Version.LUCENE_31, a).setOpenMode(
- create ? OpenMode.CREATE : OpenMode.APPEND).setMaxFieldLength(
- mfl.getLimit()));
- }
-
- /**
- * Constructs an IndexWriter for the index in
- * <code>d</code>, first creating it if it does not
- * already exist. Text will be analyzed with
- * <code>a</code>.
- *
- * @param d the index directory
- * @param a the analyzer to use
- * @param mfl Maximum field length in number of terms/tokens: LIMITED, UNLIMITED, or user-specified
- * via the MaxFieldLength constructor.
- * @throws CorruptIndexException if the index is corrupt
- * @throws LockObtainFailedException if another writer
- * has this index open (<code>write.lock</code> could not
- * be obtained)
- * @throws IOException if the directory cannot be
- * read/written to or if there is any other low-level
- * IO error
- * @deprecated use {@link #IndexWriter(Directory, IndexWriterConfig)} instead
- */
- @Deprecated
- public IndexWriter(Directory d, Analyzer a, MaxFieldLength mfl)
- throws CorruptIndexException, LockObtainFailedException, IOException {
- this(d, new IndexWriterConfig(Version.LUCENE_31, a)
- .setMaxFieldLength(mfl.getLimit()));
- }
-
- /**
- * Expert: constructs an IndexWriter with a custom {@link
- * IndexDeletionPolicy}, for the index in <code>d</code>,
- * first creating it if it does not already exist. Text
- * will be analyzed with <code>a</code>.
- *
- * @param d the index directory
- * @param a the analyzer to use
- * @param deletionPolicy see <a href="#deletionPolicy">above</a>
- * @param mfl whether or not to limit field lengths
- * @throws CorruptIndexException if the index is corrupt
- * @throws LockObtainFailedException if another writer
- * has this index open (<code>write.lock</code> could not
- * be obtained)
- * @throws IOException if the directory cannot be
- * read/written to or if there is any other low-level
- * IO error
- * @deprecated use {@link #IndexWriter(Directory, IndexWriterConfig)} instead
- */
- @Deprecated
- public IndexWriter(Directory d, Analyzer a, IndexDeletionPolicy deletionPolicy, MaxFieldLength mfl)
- throws CorruptIndexException, LockObtainFailedException, IOException {
- this(d, new IndexWriterConfig(Version.LUCENE_31, a).setMaxFieldLength(
- mfl.getLimit()).setIndexDeletionPolicy(deletionPolicy));
- }
-
- /**
- * Expert: constructs an IndexWriter with a custom {@link
- * IndexDeletionPolicy}, for the index in <code>d</code>.
- * Text will be analyzed with <code>a</code>. If
- * <code>create</code> is true, then a new, empty index
- * will be created in <code>d</code>, replacing the index
- * already there, if any.
- *
- * @param d the index directory
- * @param a the analyzer to use
- * @param create <code>true</code> to create the index or overwrite
- * the existing one; <code>false</code> to append to the existing
- * index
- * @param deletionPolicy see <a href="#deletionPolicy">above</a>
- * @param mfl {@link org.apache.lucene.index.IndexWriter.MaxFieldLength}, whether or not to limit field lengths. Value is in number of terms/tokens
- * @throws CorruptIndexException if the index is corrupt
- * @throws LockObtainFailedException if another writer
- * has this index open (<code>write.lock</code> could not
- * be obtained)
- * @throws IOException if the directory cannot be read/written to, or
- * if it does not exist and <code>create</code> is
- * <code>false</code> or if there is any other low-level
- * IO error
- * @deprecated use {@link #IndexWriter(Directory, IndexWriterConfig)} instead
- */
- @Deprecated
- public IndexWriter(Directory d, Analyzer a, boolean create, IndexDeletionPolicy deletionPolicy, MaxFieldLength mfl)
- throws CorruptIndexException, LockObtainFailedException, IOException {
- this(d, new IndexWriterConfig(Version.LUCENE_31, a).setOpenMode(
- create ? OpenMode.CREATE : OpenMode.APPEND).setMaxFieldLength(
- mfl.getLimit()).setIndexDeletionPolicy(deletionPolicy));
- }
-
- /**
- * Expert: constructs an IndexWriter on specific commit
- * point, with a custom {@link IndexDeletionPolicy}, for
- * the index in <code>d</code>. Text will be analyzed
- * with <code>a</code>.
- *
- * <p> This is only meaningful if you've used a {@link
- * IndexDeletionPolicy} in that past that keeps more than
- * just the last commit.
- *
- * <p>This operation is similar to {@link #rollback()},
- * except that method can only rollback what's been done
- * with the current instance of IndexWriter since its last
- * commit, whereas this method can rollback to an
- * arbitrary commit point from the past, assuming the
- * {@link IndexDeletionPolicy} has preserved past
- * commits.
- *
- * @param d the index directory
- * @param a the analyzer to use
- * @param deletionPolicy see <a href="#deletionPolicy">above</a>
- * @param mfl whether or not to limit field lengths, value is in number of terms/tokens. See {@link org.apache.lucene.index.IndexWriter.MaxFieldLength}.
- * @param commit which commit to open
- * @throws CorruptIndexException if the index is corrupt
- * @throws LockObtainFailedException if another writer
- * has this index open (<code>write.lock</code> could not
- * be obtained)
- * @throws IOException if the directory cannot be read/written to, or
- * if it does not exist and <code>create</code> is
- * <code>false</code> or if there is any other low-level
- * IO error
- * @deprecated use {@link #IndexWriter(Directory, IndexWriterConfig)} instead
- */
- @Deprecated
- public IndexWriter(Directory d, Analyzer a, IndexDeletionPolicy deletionPolicy, MaxFieldLength mfl, IndexCommit commit)
- throws CorruptIndexException, LockObtainFailedException, IOException {
- this(d, new IndexWriterConfig(Version.LUCENE_31, a)
- .setOpenMode(OpenMode.APPEND).setMaxFieldLength(mfl.getLimit())
- .setIndexDeletionPolicy(deletionPolicy).setIndexCommit(commit));
- }
-
CodecProvider codecs;
/**
@@ -1038,8 +693,6 @@ public class IndexWriter implements Clos
setMessageID(defaultInfoStream);
maxFieldLength = conf.getMaxFieldLength();
termIndexInterval = conf.getTermIndexInterval();
- writeLockTimeout = conf.getWriteLockTimeout();
- similarity = conf.getSimilarity();
mergePolicy = conf.getMergePolicy();
mergePolicy.setIndexWriter(this);
mergeScheduler = conf.getMergeScheduler();
@@ -1061,7 +714,7 @@ public class IndexWriter implements Clos
writeLock = directory.makeLock(WRITE_LOCK_NAME);
- if (!writeLock.obtain(writeLockTimeout)) // obtain write lock
+ if (!writeLock.obtain(conf.getWriteLockTimeout())) // obtain write lock
throw new LockObtainFailedException("Index locked for write: " + writeLock);
boolean success = false;
@@ -1111,7 +764,7 @@ public class IndexWriter implements Clos
setRollbackSegmentInfos(segmentInfos);
- docWriter = new DocumentsWriter(directory, this, conf.getIndexingChain(), conf.getMaxThreadStates());
+ docWriter = new DocumentsWriter(directory, this, conf.getIndexingChain(), conf.getMaxThreadStates(), getCurrentFieldInfos());
docWriter.setInfoStream(infoStream);
docWriter.setMaxFieldLength(maxFieldLength);
@@ -1154,7 +807,27 @@ public class IndexWriter implements Clos
}
}
}
-
+
+ private FieldInfos getCurrentFieldInfos() throws IOException {
+ final FieldInfos fieldInfos;
+ if (segmentInfos.size() > 0) {
+ SegmentInfo info = segmentInfos.info(segmentInfos.size()-1);
+ Directory cfsDir;
+ if (info.getUseCompoundFile()) {
+ cfsDir = new CompoundFileReader(directory, IndexFileNames.segmentFileName(info.name, "", IndexFileNames.COMPOUND_FILE_EXTENSION));
+ } else {
+ cfsDir = directory;
+ }
+ fieldInfos = new FieldInfos(cfsDir, IndexFileNames.segmentFileName(info.name, "", IndexFileNames.FIELD_INFOS_EXTENSION));
+ if (info.getUseCompoundFile()) {
+ cfsDir.close();
+ }
+ } else {
+ fieldInfos = new FieldInfos();
+ }
+ return fieldInfos;
+ }
+
private synchronized void setRollbackSegmentInfos(SegmentInfos infos) {
rollbackSegmentInfos = (SegmentInfos) infos.clone();
rollbackSegments = new HashMap<SegmentInfo,Integer>();
@@ -1177,202 +850,12 @@ public class IndexWriter implements Clos
}
/**
- * Expert: set the merge policy used by this writer.
- *
- * @deprecated use {@link IndexWriterConfig#setMergePolicy(MergePolicy)} instead.
- */
- @Deprecated
- public void setMergePolicy(MergePolicy mp) {
- ensureOpen();
- if (mp == null)
- throw new NullPointerException("MergePolicy must be non-null");
-
- if (mergePolicy != mp)
- mergePolicy.close();
- mergePolicy = mp;
- mergePolicy.setIndexWriter(this);
- pushMaxBufferedDocs();
- if (infoStream != null)
- message("setMergePolicy " + mp);
- // Required so config.getMergePolicy returns the right value. But this will
- // go away together with the method in 4.0.
- config.setMergePolicy(mp);
- }
-
- /**
- * Expert: returns the current MergePolicy in use by this writer.
- * @see #setMergePolicy
- *
- * @deprecated use {@link IndexWriterConfig#getMergePolicy()} instead
- */
- @Deprecated
- public MergePolicy getMergePolicy() {
- ensureOpen();
- return mergePolicy;
- }
-
- /**
- * Expert: set the merge scheduler used by this writer.
- * @deprecated use {@link IndexWriterConfig#setMergeScheduler(MergeScheduler)} instead
- */
- @Deprecated
- synchronized public void setMergeScheduler(MergeScheduler mergeScheduler) throws CorruptIndexException, IOException {
- ensureOpen();
- if (mergeScheduler == null)
- throw new NullPointerException("MergeScheduler must be non-null");
-
- if (this.mergeScheduler != mergeScheduler) {
- finishMerges(true);
- this.mergeScheduler.close();
- }
- this.mergeScheduler = mergeScheduler;
- if (infoStream != null)
- message("setMergeScheduler " + mergeScheduler);
- // Required so config.getMergeScheduler returns the right value. But this will
- // go away together with the method in 4.0.
- config.setMergeScheduler(mergeScheduler);
- }
-
- /**
- * Expert: returns the current MergeScheduler in use by this
- * writer.
- * @see #setMergeScheduler(MergeScheduler)
- * @deprecated use {@link IndexWriterConfig#getMergeScheduler()} instead
- */
- @Deprecated
- public MergeScheduler getMergeScheduler() {
- ensureOpen();
- return mergeScheduler;
- }
-
- /** <p>Determines the largest segment (measured by
- * document count) that may be merged with other segments.
- * Small values (e.g., less than 10,000) are best for
- * interactive indexing, as this limits the length of
- * pauses while indexing to a few seconds. Larger values
- * are best for batched indexing and speedier
- * searches.</p>
- *
- * <p>The default value is {@link Integer#MAX_VALUE}.</p>
- *
- * <p>Note that this method is a convenience method: it
- * just calls mergePolicy.setMaxMergeDocs as long as
- * mergePolicy is an instance of {@link LogMergePolicy}.
- * Otherwise an IllegalArgumentException is thrown.</p>
- *
- * <p>The default merge policy ({@link
- * LogByteSizeMergePolicy}) also allows you to set this
- * limit by net size (in MB) of the segment, using {@link
- * LogByteSizeMergePolicy#setMaxMergeMB}.</p>
- * @deprecated use {@link LogMergePolicy#setMaxMergeDocs(int)} directly.
- */
- @Deprecated
- public void setMaxMergeDocs(int maxMergeDocs) {
- getLogMergePolicy().setMaxMergeDocs(maxMergeDocs);
- }
-
- /**
- * <p>Returns the largest segment (measured by document
- * count) that may be merged with other segments.</p>
- *
- * <p>Note that this method is a convenience method: it
- * just calls mergePolicy.getMaxMergeDocs as long as
- * mergePolicy is an instance of {@link LogMergePolicy}.
- * Otherwise an IllegalArgumentException is thrown.</p>
- *
- * @see #setMaxMergeDocs
- * @deprecated use {@link LogMergePolicy#getMaxMergeDocs()} directly.
- */
- @Deprecated
- public int getMaxMergeDocs() {
- return getLogMergePolicy().getMaxMergeDocs();
- }
-
- /**
- * The maximum number of terms that will be indexed for a single field in a
- * document. This limits the amount of memory required for indexing, so that
- * collections with very large files will not crash the indexing process by
- * running out of memory. This setting refers to the number of running terms,
- * not to the number of different terms.<p/>
- * <strong>Note:</strong> this silently truncates large documents, excluding from the
- * index all terms that occur further in the document. If you know your source
- * documents are large, be sure to set this value high enough to accomodate
- * the expected size. If you set it to Integer.MAX_VALUE, then the only limit
- * is your memory, but you should anticipate an OutOfMemoryError.<p/>
- * By default, no more than {@link #DEFAULT_MAX_FIELD_LENGTH} terms
- * will be indexed for a field.
- * @deprecated use {@link IndexWriterConfig#setMaxFieldLength(int)} instead
- */
- @Deprecated
- public void setMaxFieldLength(int maxFieldLength) {
- ensureOpen();
- this.maxFieldLength = maxFieldLength;
- docWriter.setMaxFieldLength(maxFieldLength);
- if (infoStream != null)
- message("setMaxFieldLength " + maxFieldLength);
- // Required so config.getMaxFieldLength returns the right value. But this
- // will go away together with the method in 4.0.
- config.setMaxFieldLength(maxFieldLength);
- }
-
- /**
- * Returns the maximum number of terms that will be
- * indexed for a single field in a document.
- * @see #setMaxFieldLength
- * @deprecated use {@link IndexWriterConfig#getMaxFieldLength()} instead
- */
- @Deprecated
- public int getMaxFieldLength() {
- ensureOpen();
- return maxFieldLength;
- }
-
- /** Determines the minimal number of documents required
- * before the buffered in-memory documents are flushed as
- * a new Segment. Large values generally gives faster
- * indexing.
- *
- * <p>When this is set, the writer will flush every
- * maxBufferedDocs added documents. Pass in {@link
- * #DISABLE_AUTO_FLUSH} to prevent triggering a flush due
- * to number of buffered documents. Note that if flushing
- * by RAM usage is also enabled, then the flush will be
- * triggered by whichever comes first.</p>
- *
- * <p>Disabled by default (writer flushes by RAM usage).</p>
- *
- * @throws IllegalArgumentException if maxBufferedDocs is
- * enabled but smaller than 2, or it disables maxBufferedDocs
- * when ramBufferSize is already disabled
- * @see #setRAMBufferSizeMB
- * @deprecated use {@link IndexWriterConfig#setMaxBufferedDocs(int)} instead.
- */
- @Deprecated
- public void setMaxBufferedDocs(int maxBufferedDocs) {
- ensureOpen();
- if (maxBufferedDocs != DISABLE_AUTO_FLUSH && maxBufferedDocs < 2)
- throw new IllegalArgumentException(
- "maxBufferedDocs must at least be 2 when enabled");
- if (maxBufferedDocs == DISABLE_AUTO_FLUSH
- && getRAMBufferSizeMB() == DISABLE_AUTO_FLUSH)
- throw new IllegalArgumentException(
- "at least one of ramBufferSize and maxBufferedDocs must be enabled");
- docWriter.setMaxBufferedDocs(maxBufferedDocs);
- pushMaxBufferedDocs();
- if (infoStream != null)
- message("setMaxBufferedDocs " + maxBufferedDocs);
- // Required so config.getMaxBufferedDocs returns the right value. But this
- // will go away together with the method in 4.0.
- config.setMaxBufferedDocs(maxBufferedDocs);
- }
-
- /**
* If we are flushing by doc count (not by RAM usage), and
* using LogDocMergePolicy then push maxBufferedDocs down
* as its minMergeDocs, to keep backwards compatibility.
*/
private void pushMaxBufferedDocs() {
- if (docWriter.getMaxBufferedDocs() != DISABLE_AUTO_FLUSH) {
+ if (docWriter.getMaxBufferedDocs() != IndexWriterConfig.DISABLE_AUTO_FLUSH) {
final MergePolicy mp = mergePolicy;
if (mp instanceof LogDocMergePolicy) {
LogDocMergePolicy lmp = (LogDocMergePolicy) mp;
@@ -1386,164 +869,6 @@ public class IndexWriter implements Clos
}
}
- /**
- * Returns the number of buffered added documents that will
- * trigger a flush if enabled.
- * @see #setMaxBufferedDocs
- * @deprecated use {@link IndexWriterConfig#getMaxBufferedDocs()} instead.
- */
- @Deprecated
- public int getMaxBufferedDocs() {
- ensureOpen();
- return docWriter.getMaxBufferedDocs();
- }
-
- /** Determines the amount of RAM that may be used for
- * buffering added documents and deletions before they are
- * flushed to the Directory. Generally for faster
- * indexing performance it's best to flush by RAM usage
- * instead of document count and use as large a RAM buffer
- * as you can.
- *
- * <p>When this is set, the writer will flush whenever
- * buffered documents and deletions use this much RAM.
- * Pass in {@link #DISABLE_AUTO_FLUSH} to prevent
- * triggering a flush due to RAM usage. Note that if
- * flushing by document count is also enabled, then the
- * flush will be triggered by whichever comes first.</p>
- *
- * <p> <b>NOTE</b>: the account of RAM usage for pending
- * deletions is only approximate. Specifically, if you
- * delete by Query, Lucene currently has no way to measure
- * the RAM usage if individual Queries so the accounting
- * will under-estimate and you should compensate by either
- * calling commit() periodically yourself, or by using
- * {@link #setMaxBufferedDeleteTerms} to flush by count
- * instead of RAM usage (each buffered delete Query counts
- * as one).
- *
- * <p> <b>NOTE</b>: because IndexWriter uses
- * <code>int</code>s when managing its internal storage,
- * the absolute maximum value for this setting is somewhat
- * less than 2048 MB. The precise limit depends on
- * various factors, such as how large your documents are,
- * how many fields have norms, etc., so it's best to set
- * this value comfortably under 2048.</p>
- *
- * <p> The default value is {@link #DEFAULT_RAM_BUFFER_SIZE_MB}.</p>
- *
- * @throws IllegalArgumentException if ramBufferSize is
- * enabled but non-positive, or it disables ramBufferSize
- * when maxBufferedDocs is already disabled
- * @deprecated use {@link IndexWriterConfig#setRAMBufferSizeMB(double)} instead.
- */
- @Deprecated
- public void setRAMBufferSizeMB(double mb) {
- if (mb > 2048.0) {
- throw new IllegalArgumentException("ramBufferSize " + mb + " is too large; should be comfortably less than 2048");
- }
- if (mb != DISABLE_AUTO_FLUSH && mb <= 0.0)
- throw new IllegalArgumentException(
- "ramBufferSize should be > 0.0 MB when enabled");
- if (mb == DISABLE_AUTO_FLUSH && getMaxBufferedDocs() == DISABLE_AUTO_FLUSH)
- throw new IllegalArgumentException(
- "at least one of ramBufferSize and maxBufferedDocs must be enabled");
- docWriter.setRAMBufferSizeMB(mb);
- if (infoStream != null)
- message("setRAMBufferSizeMB " + mb);
- // Required so config.getRAMBufferSizeMB returns the right value. But this
- // will go away together with the method in 4.0.
- config.setRAMBufferSizeMB(mb);
- }
-
- /**
- * Returns the value set by {@link #setRAMBufferSizeMB} if enabled.
- * @deprecated use {@link IndexWriterConfig#getRAMBufferSizeMB()} instead.
- */
- @Deprecated
- public double getRAMBufferSizeMB() {
- return docWriter.getRAMBufferSizeMB();
- }
-
- /**
- * <p>Determines the minimal number of delete terms required before the buffered
- * in-memory delete terms are applied and flushed. If there are documents
- * buffered in memory at the time, they are merged and a new segment is
- * created.</p>
-
- * <p>Disabled by default (writer flushes by RAM usage).</p>
- *
- * @throws IllegalArgumentException if maxBufferedDeleteTerms
- * is enabled but smaller than 1
- * @see #setRAMBufferSizeMB
- * @deprecated use {@link IndexWriterConfig#setMaxBufferedDeleteTerms(int)} instead.
- */
- @Deprecated
- public void setMaxBufferedDeleteTerms(int maxBufferedDeleteTerms) {
- ensureOpen();
- if (maxBufferedDeleteTerms != DISABLE_AUTO_FLUSH
- && maxBufferedDeleteTerms < 1)
- throw new IllegalArgumentException(
- "maxBufferedDeleteTerms must at least be 1 when enabled");
- docWriter.setMaxBufferedDeleteTerms(maxBufferedDeleteTerms);
- if (infoStream != null)
- message("setMaxBufferedDeleteTerms " + maxBufferedDeleteTerms);
- // Required so config.getMaxBufferedDeleteTerms returns the right value. But
- // this will go away together with the method in 4.0.
- config.setMaxBufferedDeleteTerms(maxBufferedDeleteTerms);
- }
-
- /**
- * Returns the number of buffered deleted terms that will
- * trigger a flush if enabled.
- * @see #setMaxBufferedDeleteTerms
- * @deprecated use {@link IndexWriterConfig#getMaxBufferedDeleteTerms()} instead
- */
- @Deprecated
- public int getMaxBufferedDeleteTerms() {
- ensureOpen();
- return docWriter.getMaxBufferedDeleteTerms();
- }
-
- /** Determines how often segment indices are merged by addDocument(). With
- * smaller values, less RAM is used while indexing, and searches on
- * unoptimized indices are faster, but indexing speed is slower. With larger
- * values, more RAM is used during indexing, and while searches on unoptimized
- * indices are slower, indexing is faster. Thus larger values (> 10) are best
- * for batch index creation, and smaller values (< 10) for indices that are
- * interactively maintained.
- *
- * <p>Note that this method is a convenience method: it
- * just calls mergePolicy.setMergeFactor as long as
- * mergePolicy is an instance of {@link LogMergePolicy}.
- * Otherwise an IllegalArgumentException is thrown.</p>
- *
- * <p>This must never be less than 2. The default value is 10.
- * @deprecated use {@link LogMergePolicy#setMergeFactor(int)} directly.
- */
- @Deprecated
- public void setMergeFactor(int mergeFactor) {
- getLogMergePolicy().setMergeFactor(mergeFactor);
- }
-
- /**
- * <p>Returns the number of segments that are merged at
- * once and also controls the total number of segments
- * allowed to accumulate in the index.</p>
- *
- * <p>Note that this method is a convenience method: it
- * just calls mergePolicy.getMergeFactor as long as
- * mergePolicy is an instance of {@link LogMergePolicy}.
- * Otherwise an IllegalArgumentException is thrown.</p>
- *
- * @see #setMergeFactor
- * @deprecated use {@link LogMergePolicy#getMergeFactor()} directly.
- */
- @Deprecated
- public int getMergeFactor() {
- return getLogMergePolicy().getMergeFactor();
- }
-
/** If non-null, this will be the default infoStream used
* by a newly instantiated IndexWriter.
* @see #setInfoStream
@@ -1596,52 +921,6 @@ public class IndexWriter implements Clos
}
/**
- * Sets the maximum time to wait for a write lock (in milliseconds) for this instance of IndexWriter. @see
- * @see #setDefaultWriteLockTimeout to change the default value for all instances of IndexWriter.
- * @deprecated use {@link IndexWriterConfig#setWriteLockTimeout(long)} instead
- */
- @Deprecated
- public void setWriteLockTimeout(long writeLockTimeout) {
- ensureOpen();
- this.writeLockTimeout = writeLockTimeout;
- // Required so config.getWriteLockTimeout returns the right value. But this
- // will go away together with the method in 4.0.
- config.setWriteLockTimeout(writeLockTimeout);
- }
-
- /**
- * Returns allowed timeout when acquiring the write lock.
- * @see #setWriteLockTimeout
- * @deprecated use {@link IndexWriterConfig#getWriteLockTimeout()}
- */
- @Deprecated
- public long getWriteLockTimeout() {
- ensureOpen();
- return writeLockTimeout;
- }
-
- /**
- * Sets the default (for any instance of IndexWriter) maximum time to wait for a write lock (in
- * milliseconds).
- * @deprecated use {@link IndexWriterConfig#setDefaultWriteLockTimeout(long)} instead
- */
- @Deprecated
- public static void setDefaultWriteLockTimeout(long writeLockTimeout) {
- IndexWriterConfig.setDefaultWriteLockTimeout(writeLockTimeout);
- }
-
- /**
- * Returns default write lock timeout for newly
- * instantiated IndexWriters.
- * @see #setDefaultWriteLockTimeout
- * @deprecated use {@link IndexWriterConfig#getDefaultWriteLockTimeout()} instead
- */
- @Deprecated
- public static long getDefaultWriteLockTimeout() {
- return IndexWriterConfig.getDefaultWriteLockTimeout();
- }
-
- /**
* Commits all changes to an index and closes all
* associated files. Note that this may be a costly
* operation, so, try to re-use a single writer instead of
@@ -1776,7 +1055,7 @@ public class IndexWriter implements Clos
message("now call final commit()");
if (!hitOOM) {
- commit(0);
+ commitInternal(null);
}
if (infoStream != null)
@@ -1822,6 +1101,9 @@ public class IndexWriter implements Clos
}
boolean useCompoundDocStore = false;
+ if (infoStream != null) {
+ message("closeDocStores segment=" + docWriter.getDocStoreSegment());
+ }
String docStoreSegment;
@@ -2277,11 +1559,12 @@ public class IndexWriter implements Clos
* calling optimize. </p>
*
* <p>Note that optimize requires 2X the index size free
- * space in your Directory. For example, if your index
- * size is 10 MB then you need 20 MB free for optimize to
- * complete. Also, it's best to call {@link #commit()}
- * after the optimize completes to allow IndexWriter to
- * free up disk space.</p>
+ * space in your Directory (3X if you're using compound
+ * file format). For example, if your index size is 10 MB
+ * then you need 20 MB free for optimize to complete (30
+ * MB if you're using compound file format). Also,
+ * it's best to call {@link #commit()} after the optimize
+ * completes to allow IndexWriter to free up disk space.</p>
*
* <p>If some but not all readers re-open while an
* optimize is underway, this will cause > 2X temporary
@@ -3026,7 +2309,7 @@ public class IndexWriter implements Clos
}
// Now create the compound file if needed
- if (mergePolicy instanceof LogMergePolicy && getUseCompoundFile()) {
+ if (mergePolicy instanceof LogMergePolicy && ((LogMergePolicy) mergePolicy).getUseCompoundFile()) {
List<String> files = null;
@@ -3129,19 +2412,12 @@ public class IndexWriter implements Clos
flush(true, true, true);
- startCommit(0, commitUserData);
+ startCommit(commitUserData);
}
// Used only by commit, below; lock order is commitLock -> IW
private final Object commitLock = new Object();
- private void commit(long sizeInBytes) throws IOException {
- synchronized(commitLock) {
- startCommit(sizeInBytes, null);
- finishCommit();
- }
- }
-
/**
* <p>Commits all pending changes (added & deleted
* documents, optimizations, segment merges, added
@@ -3189,6 +2465,11 @@ public class IndexWriter implements Clos
ensureOpen();
+ commitInternal(commitUserData);
+ }
+
+ private final void commitInternal(Map<String,String> commitUserData) throws CorruptIndexException, IOException {
+
if (infoStream != null) {
message("commit: start");
}
@@ -3594,8 +2875,6 @@ public class IndexWriter implements Clos
if (merge.isAborted()) {
if (infoStream != null)
message("commitMerge: skipping merge " + merge.segString(directory) + ": it was aborted");
-
- deleter.refresh(merge.info.name);
return false;
}
@@ -3604,13 +2883,20 @@ public class IndexWriter implements Clos
commitMergedDeletes(merge, mergedReader);
docWriter.remapDeletes(segmentInfos, merger.getDocMaps(), merger.getDelCounts(), merge, mergedDocCount);
+ // If the doc store we are using has been closed and
+ // is in now compound format (but wasn't when we
+ // started), then we will switch to the compound
+ // format as well:
setMergeDocStoreIsCompoundFile(merge);
+
merge.info.setHasProx(merger.hasProx());
segmentInfos.subList(start, start + merge.segments.size()).clear();
assert !segmentInfos.contains(merge.info);
segmentInfos.add(start, merge.info);
+ closeMergeReaders(merge, false);
+
// Must note the change to segmentInfos so any commits
// in-flight don't lose it:
checkpoint();
@@ -3627,11 +2913,6 @@ public class IndexWriter implements Clos
return true;
}
- private synchronized void decrefMergeSegments(MergePolicy.OneMerge merge) throws IOException {
- assert merge.increfDone;
- merge.increfDone = false;
- }
-
final private void handleMergeException(Throwable t, MergePolicy.OneMerge merge) throws IOException {
if (infoStream != null) {
@@ -3912,8 +3193,6 @@ public class IndexWriter implements Clos
updatePendingMerges(1, false);
}
- merge.increfDone = true;
-
merge.mergeDocStores = mergeDocStores;
// Bind a new segment name here so even with
@@ -3967,14 +3246,6 @@ public class IndexWriter implements Clos
// on merges to finish.
notifyAll();
- if (merge.increfDone)
- decrefMergeSegments(merge);
-
- if (merge.mergeFiles != null) {
- deleter.decRef(merge.mergeFiles);
- merge.mergeFiles = null;
- }
-
// It's possible we are called twice, eg if there was an
// exception inside mergeInit
if (merge.registerDone) {
@@ -4004,12 +3275,54 @@ public class IndexWriter implements Clos
}
}
}
- }
+ }
+
+ private synchronized void closeMergeReaders(MergePolicy.OneMerge merge, boolean suppressExceptions) throws IOException {
+ final int numSegments = merge.segments.size();
+ if (suppressExceptions) {
+ // Suppress any new exceptions so we throw the
+ // original cause
+ for (int i=0;i<numSegments;i++) {
+ if (merge.readers[i] != null) {
+ try {
+ readerPool.release(merge.readers[i], false);
+ } catch (Throwable t) {
+ }
+ merge.readers[i] = null;
+ }
+
+ if (merge.readersClone[i] != null) {
+ try {
+ merge.readersClone[i].close();
+ } catch (Throwable t) {
+ }
+ // This was a private clone and we had the
+ // only reference
+ assert merge.readersClone[i].getRefCount() == 0: "refCount should be 0 but is " + merge.readersClone[i].getRefCount();
+ merge.readersClone[i] = null;
+ }
+ }
+ } else {
+ for (int i=0;i<numSegments;i++) {
+ if (merge.readers[i] != null) {
+ readerPool.release(merge.readers[i], true);
+ merge.readers[i] = null;
+ }
+
+ if (merge.readersClone[i] != null) {
+ merge.readersClone[i].close();
+ // This was a private clone and we had the only reference
+ assert merge.readersClone[i].getRefCount() == 0;
+ merge.readersClone[i] = null;
+ }
+ }
+ }
+ }
/** Does the actual (time-consuming) work of the merge,
* but without holding synchronized lock on IndexWriter
* instance */
- final private int mergeMiddle(MergePolicy.OneMerge merge)
+ private int mergeMiddle(MergePolicy.OneMerge merge)
throws CorruptIndexException, IOException {
merge.checkAborted(directory);
@@ -4033,8 +3346,13 @@ public class IndexWriter implements Clos
boolean mergeDocStores = false;
- final Set<String> dss = new HashSet<String>();
-
+ final String currentDocStoreSegment;
+ synchronized(this) {
+ currentDocStoreSegment = docWriter.getDocStoreSegment();
+ }
+
+ boolean currentDSSMerged = false;
+
// This is try/finally to make sure merger's readers are
// closed:
boolean success = false;
@@ -4042,7 +3360,6 @@ public class IndexWriter implements Clos
int totDocCount = 0;
for (int i = 0; i < numSegments; i++) {
-
final SegmentInfo info = sourceSegments.info(i);
// Hold onto the "live" reader; we will use this to
@@ -4061,8 +3378,8 @@ public class IndexWriter implements Clos
mergeDocStores = true;
}
- if (info.getDocStoreOffset() != -1) {
- dss.add(info.getDocStoreSegment());
+ if (info.getDocStoreOffset() != -1 && currentDocStoreSegment != null) {
+ currentDSSMerged |= currentDocStoreSegment.equals(info.getDocStoreSegment());
}
totDocCount += clone.numDocs();
@@ -4088,9 +3405,10 @@ public class IndexWriter implements Clos
// readers will attempt to open an IndexInput
// on files that have still-open IndexOutputs
// against them:
- if (dss.contains(docWriter.getDocStoreSegment())) {
- if (infoStream != null)
+ if (currentDSSMerged) {
+ if (infoStream != null) {
message("now flush at mergeMiddle");
+ }
doFlush(true, false);
updatePendingMerges(1, false);
}
@@ -4101,9 +3419,7 @@ public class IndexWriter implements Clos
}
// Clear DSS
- synchronized(this) {
- merge.info.setDocStore(-1, null, false);
- }
+ merge.info.setDocStore(-1, null, false);
}
// This is where all the work happens:
@@ -4124,26 +3440,65 @@ public class IndexWriter implements Clos
//System.out.println("merger set hasProx=" + merger.hasProx() + " seg=" + merge.info.name);
merge.info.setHasProx(merger.hasProx());
- // TODO: in the non-realtime case, we may want to only
- // keep deletes (it's costly to open entire reader
- // when we just need deletes)
+ if (merge.useCompoundFile) {
- final int termsIndexDivisor;
- final boolean loadDocStores;
+ success = false;
+ final String compoundFileName = IndexFileNames.segmentFileName(mergedName, "", IndexFileNames.COMPOUND_FILE_EXTENSION);
- synchronized(this) {
- // If the doc store we are using has been closed and
- // is in now compound format (but wasn't when we
- // started), then we will switch to the compound
- // format as well:
- setMergeDocStoreIsCompoundFile(merge);
- assert merge.mergeFiles == null;
- merge.mergeFiles = merge.info.files();
- deleter.incRef(merge.mergeFiles);
+ try {
+ if (infoStream != null) {
+ message("create compound file " + compoundFileName);
+ }
+ merger.createCompoundFile(compoundFileName, merge.info);
+ success = true;
+ } catch (IOException ioe) {
+ synchronized(this) {
+ if (merge.isAborted()) {
+ // This can happen if rollback or close(false)
+ // is called -- fall through to logic below to
+ // remove the partially created CFS:
+ } else {
+ handleMergeException(ioe, merge);
+ }
+ }
+ } catch (Throwable t) {
+ handleMergeException(t, merge);
+ } finally {
+ if (!success) {
+ if (infoStream != null) {
+ message("hit exception creating compound file during merge");
+ }
+
+ synchronized(this) {
+ deleter.deleteFile(compoundFileName);
+ deleter.deleteNewFiles(merger.getMergedFiles(merge.info));
+ }
+ }
+ }
+
+ success = false;
+
+ synchronized(this) {
+
+ // delete new non cfs files directly: they were never
+ // registered with IFD
+ deleter.deleteNewFiles(merger.getMergedFiles(merge.info));
+
+ if (merge.isAborted()) {
+ if (infoStream != null) {
+ message("abort merge after building CFS");
+ }
+ deleter.deleteFile(compoundFileName);
+ return 0;
+ }
+ }
+
+ merge.info.setUseCompoundFile(true);
}
- final String currentDocStoreSegment = docWriter.getDocStoreSegment();
-
+ final int termsIndexDivisor;
+ final boolean loadDocStores;
+
// if the merged segment warmer was not installed when
// this merge was started, causing us to not force
// the docStores to close, we can't warm it now
@@ -4160,117 +3515,32 @@ public class IndexWriter implements Clos
loadDocStores = false;
}
+ // TODO: in the non-realtime case, we may want to only
+ // keep deletes (it's costly to open entire reader
+ // when we just need deletes)
+
final SegmentReader mergedReader = readerPool.get(merge.info, loadDocStores, BufferedIndexInput.BUFFER_SIZE, termsIndexDivisor);
try {
if (poolReaders && mergedSegmentWarmer != null) {
mergedSegmentWarmer.warm(mergedReader);
}
- if (!commitMerge(merge, merger, mergedDocCount, mergedReader))
+
+ if (!commitMerge(merge, merger, mergedDocCount, mergedReader)) {
// commitMerge will return false if this merge was aborted
return 0;
+ }
} finally {
synchronized(this) {
readerPool.release(mergedReader);
}
}
-
success = true;
- } finally {
- synchronized(this) {
- if (!success) {
- // Suppress any new exceptions so we throw the
- // original cause
- for (int i=0;i<numSegments;i++) {
- if (merge.readers[i] != null) {
- try {
- readerPool.release(merge.readers[i], false);
- } catch (Throwable t) {
- }
- }
-
- if (merge.readersClone[i] != null) {
- try {
- merge.readersClone[i].close();
- } catch (Throwable t) {
- }
- // This was a private clone and we had the
- // only reference
- assert merge.readersClone[i].getRefCount() == 0: "refCount should be 0 but is " + merge.readersClone[i].getRefCount();
- }
- }
- } else {
- for (int i=0;i<numSegments;i++) {
- if (merge.readers[i] != null) {
- readerPool.release(merge.readers[i], true);
- }
-
- if (merge.readersClone[i] != null) {
- merge.readersClone[i].close();
- // This was a private clone and we had the only reference
- assert merge.readersClone[i].getRefCount() == 0;
- }
- }
- }
- }
- }
-
- // Must checkpoint before decrefing so any newly
- // referenced files in the new merge.info are incref'd
- // first:
- synchronized(this) {
- deleter.checkpoint(segmentInfos, false);
- }
- decrefMergeSegments(merge);
-
- if (merge.useCompoundFile) {
-
- success = false;
- final String compoundFileName = IndexFileNames.segmentFileName(mergedName, "", IndexFileNames.COMPOUND_FILE_EXTENSION);
-
- try {
- merger.createCompoundFile(compoundFileName, merge.info);
- success = true;
- } catch (IOException ioe) {
- synchronized(this) {
- if (merge.isAborted()) {
- // This can happen if rollback or close(false)
- // is called -- fall through to logic below to
- // remove the partially created CFS:
- success = true;
- } else
- handleMergeException(ioe, merge);
- }
- } catch (Throwable t) {
- handleMergeException(t, merge);
- } finally {
- if (!success) {
- if (infoStream != null)
- message("hit exception creating compound file during merge");
- synchronized(this) {
- deleter.deleteFile(compoundFileName);
- }
- }
- }
-
- if (merge.isAborted()) {
- if (infoStream != null)
- message("abort merge after building CFS");
- synchronized(this) {
- deleter.deleteFile(compoundFileName);
- }
- return 0;
- }
- synchronized(this) {
- if (segmentInfos.indexOf(merge.info) == -1 || merge.isAborted()) {
- // Our segment (committed in non-compound
- // format) got merged away while we were
- // building the compound format.
- deleter.deleteFile(compoundFileName);
- } else {
- merge.info.setUseCompoundFile(true);
- checkpoint();
- }
+ } finally {
+ // Readers are already closed in commitMerge if we didn't hit
+ // an exc:
+ if (!success) {
+ closeMergeReaders(merge, true);
}
}
@@ -4374,7 +3644,7 @@ public class IndexWriter implements Clos
* if it wasn't already. If that succeeds, then we
* prepare a new segments_N file but do not fully commit
* it. */
- private void startCommit(long sizeInBytes, Map<String,String> commitUserData) throws IOException {
+ private void startCommit(Map<String,String> commitUserData) throws IOException {
assert testPoint("startStartCommit");
assert pendingCommit == null;
@@ -4386,7 +3656,7 @@ public class IndexWriter implements Clos
try {
if (infoStream != null)
- message("startCommit(): start sizeInBytes=" + sizeInBytes);
+ message("startCommit(): start");
final SegmentInfos toSync;
final long myChangeCount;
@@ -4394,6 +3664,7 @@ public class IndexWriter implements Clos
synchronized(this) {
assert lastCommitChangeCount <= changeCount;
+ myChangeCount = changeCount;
if (changeCount == lastCommitChangeCount) {
if (infoStream != null)
@@ -4410,7 +3681,24 @@ public class IndexWriter implements Clos
readerPool.commit();
+ // It's possible another flush (that did not close
+ // the open do stores) snuck in after the flush we
+ // just did, so we remove any tail segments
+ // referencing the open doc store from the
+ // SegmentInfos we are about to sync (the main
+ // SegmentInfos will keep them):
toSync = (SegmentInfos) segmentInfos.clone();
+ final String dss = docWriter.getDocStoreSegment();
+ if (dss != null) {
+ while(true) {
+ final String dss2 = toSync.info(toSync.size()-1).getDocStoreSegment();
+ if (dss2 == null || !dss2.equals(dss)) {
+ break;
+ }
+ toSync.remove(toSync.size()-1);
+ changeCount++;
+ }
+ }
assert filesExist(toSync);
if (commitUserData != null)
@@ -4422,7 +3710,6 @@ public class IndexWriter implements Clos
// merge completes which would otherwise have
// removed the files we are now syncing.
deleter.incRef(toSync, false);
- myChangeCount = changeCount;
}
assert testPoint("midStartCommit");
@@ -4499,63 +3786,6 @@ public class IndexWriter implements Clos
directory.makeLock(IndexWriter.WRITE_LOCK_NAME).release();
}
- /**
- * Specifies maximum field length (in number of tokens/terms) in
- * {@link IndexWriter} constructors. {@link #setMaxFieldLength(int)} overrides
- * the value set by the constructor.
- *
- * @deprecated use {@link IndexWriterConfig} and pass
- * {@link IndexWriterConfig#UNLIMITED_FIELD_LENGTH} or your own
- * value.
- */
- @Deprecated
- public static final class MaxFieldLength {
-
- private int limit;
- private String name;
-
- /**
- * Private type-safe-enum-pattern constructor.
- *
- * @param name instance name
- * @param limit maximum field length
- */
- private MaxFieldLength(String name, int limit) {
- this.name = name;
- this.limit = limit;
- }
-
- /**
- * Public constructor to allow users to specify the maximum field size limit.
- *
- * @param limit The maximum field length
- */
- public MaxFieldLength(int limit) {
- this("User-specified", limit);
- }
-
- public int getLimit() {
- return limit;
- }
-
- @Override
- public String toString()
- {
- return name + ":" + limit;
- }
-
- /** Sets the maximum field length to {@link Integer#MAX_VALUE}. */
- public static final MaxFieldLength UNLIMITED
- = new MaxFieldLength("UNLIMITED", Integer.MAX_VALUE);
-
- /**
- * Sets the maximum field length to
- * {@link #DEFAULT_MAX_FIELD_LENGTH}
- * */
- public static final MaxFieldLength LIMITED
- = new MaxFieldLength("LIMITED", DEFAULT_MAX_FIELD_LENGTH);
- }
-
/** If {@link #getReader} has been called (ie, this writer
* is in near real-time mode), then after a merge
* completes, this class can be invoked to warm the
@@ -4574,31 +3804,6 @@ public class IndexWriter implements Clos
private IndexReaderWarmer mergedSegmentWarmer;
- /**
- * Set the merged segment warmer. See {@link IndexReaderWarmer}.
- *
- * @deprecated use
- * {@link IndexWriterConfig#setMergedSegmentWarmer}
- * instead.
- */
- @Deprecated
- public void setMergedSegmentWarmer(IndexReaderWarmer warmer) {
- mergedSegmentWarmer = warmer;
- // Required so config.getMergedSegmentWarmer returns the right value. But
- // this will go away together with the method in 4.0.
- config.setMergedSegmentWarmer(mergedSegmentWarmer);
- }
-
- /**
- * Returns the current merged segment warmer. See {@link IndexReaderWarmer}.
- *
- * @deprecated use {@link IndexWriterConfig#getMergedSegmentWarmer()} instead.
- */
- @Deprecated
- public IndexReaderWarmer getMergedSegmentWarmer() {
- return mergedSegmentWarmer;
- }
-
private void handleOOM(OutOfMemoryError oom, String location) {
if (infoStream != null) {
message("hit OutOfMemoryError inside " + location);
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/IndexWriterConfig.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/IndexWriterConfig.java?rev=1042501&r1=1042500&r2=1042501&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/IndexWriterConfig.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/IndexWriterConfig.java Mon Dec 6 00:47:16 2010
@@ -79,9 +79,6 @@ public final class IndexWriterConfig imp
*/
public static long WRITE_LOCK_TIMEOUT = 1000;
- /** Default {@link CodecProvider}. */
- public final static CodecProvider DEFAULT_CODEC_PROVIDER = CodecProvider.getDefault();
-
/** The maximum number of simultaneous threads that may be
* indexing documents at once in IndexWriter; if more
* than this many threads arrive they will wait for
@@ -158,7 +155,7 @@ public final class IndexWriterConfig imp
maxBufferedDocs = DEFAULT_MAX_BUFFERED_DOCS;
indexingChain = DocumentsWriter.defaultIndexingChain;
mergedSegmentWarmer = null;
- codecProvider = DEFAULT_CODEC_PROVIDER;
+ codecProvider = CodecProvider.getDefault();
mergePolicy = new LogByteSizeMergePolicy();
maxThreadStates = DEFAULT_MAX_THREAD_STATES;
readerPooling = DEFAULT_READER_POOLING;
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/LogByteSizeMergePolicy.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/LogByteSizeMergePolicy.java?rev=1042501&r1=1042500&r2=1042501&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/LogByteSizeMergePolicy.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/LogByteSizeMergePolicy.java Mon Dec 6 00:47:16 2010
@@ -28,7 +28,7 @@ public class LogByteSizeMergePolicy exte
/** Default maximum segment size. A segment of this size
* or larger will never be merged. @see setMaxMergeMB */
- public static final double DEFAULT_MAX_MERGE_MB = Long.MAX_VALUE;
+ public static final double DEFAULT_MAX_MERGE_MB = 2048;
public LogByteSizeMergePolicy() {
minMergeSize = (long) (DEFAULT_MIN_MERGE_MB*1024*1024);
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java?rev=1042501&r1=1042500&r2=1042501&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java Mon Dec 6 00:47:16 2010
@@ -54,12 +54,19 @@ public abstract class LogMergePolicy ext
* or larger will never be merged. @see setMaxMergeDocs */
public static final int DEFAULT_MAX_MERGE_DOCS = Integer.MAX_VALUE;
+ /** Default noCFSRatio. If a merge's size is >= 10% of
+ * the index, then we disable compound file for it.
+ * @see #setNoCFSRatio */
+ public static final double DEFAULT_NO_CFS_RATIO = 0.1;
+
protected int mergeFactor = DEFAULT_MERGE_FACTOR;
protected long minMergeSize;
protected long maxMergeSize;
protected int maxMergeDocs = DEFAULT_MAX_MERGE_DOCS;
+ protected double noCFSRatio = DEFAULT_NO_CFS_RATIO;
+
protected boolean calibrateSizeByDeletes = true;
protected boolean useCompoundFile = true;
@@ -73,6 +80,23 @@ public abstract class LogMergePolicy ext
IndexWriter w = writer.get();
return w != null && w.verbose();
}
+
+ /** @see #setNoCFSRatio */
+ public double getNoCFSRatio() {
+ return noCFSRatio;
+ }
+
+ /** If a merged segment will be more than this percentage
+ * of the total size of the index, leave the segment as
+ * non-compound file even if compound file is enabled.
+ * Set to 1.0 to always use CFS regardless of merge
+ * size. */
+ public void setNoCFSRatio(double noCFSRatio) {
+ if (noCFSRatio < 0.0 || noCFSRatio > 1.0) {
+ throw new IllegalArgumentException("noCFSRatio must be 0.0 to 1.0 inclusive; got " + noCFSRatio);
+ }
+ this.noCFSRatio = noCFSRatio;
+ }
protected void message(String message) {
if (verbose())
@@ -207,7 +231,7 @@ public abstract class LogMergePolicy ext
return !hasDeletions &&
!info.hasSeparateNorms() &&
info.dir == w.getDirectory() &&
- info.getUseCompoundFile() == useCompoundFile;
+ (info.getUseCompoundFile() == useCompoundFile || noCFSRatio < 1.0);
}
/**
@@ -230,12 +254,12 @@ public abstract class LogMergePolicy ext
// unless there is only 1 which is optimized.
if (last - start - 1 > 1 || (start != last - 1 && !isOptimized(infos.info(start + 1)))) {
// there is more than 1 segment to the right of this one, or an unoptimized single segment.
- spec.add(new OneMerge(infos.range(start + 1, last), useCompoundFile));
+ spec.add(makeOneMerge(infos, infos.range(start + 1, last)));
}
last = start;
} else if (last - start == mergeFactor) {
// mergeFactor eligible segments were found, add them as a merge.
- spec.add(new OneMerge(infos.range(start, last), useCompoundFile));
+ spec.add(makeOneMerge(infos, infos.range(start, last)));
last = start;
}
--start;
@@ -243,7 +267,7 @@ public abstract class LogMergePolicy ext
// Add any left-over segments, unless there is just 1 already optimized.
if (last > 0 && (++start + 1 < last || !isOptimized(infos.info(start)))) {
- spec.add(new OneMerge(infos.range(start, last), useCompoundFile));
+ spec.add(makeOneMerge(infos, infos.range(start, last)));
}
return spec.merges.size() == 0 ? null : spec;
@@ -260,7 +284,7 @@ public abstract class LogMergePolicy ext
// First, enroll all "full" merges (size
// mergeFactor) to potentially be run concurrently:
while (last - maxNumSegments + 1 >= mergeFactor) {
- spec.add(new OneMerge(infos.range(last-mergeFactor, last), useCompoundFile));
+ spec.add(makeOneMerge(infos, infos.range(last-mergeFactor, last)));
last -= mergeFactor;
}
@@ -272,7 +296,7 @@ public abstract class LogMergePolicy ext
// Since we must optimize down to 1 segment, the
// choice is simple:
if (last > 1 || !isOptimized(infos.info(0))) {
- spec.add(new OneMerge(infos.range(0, last), useCompoundFile));
+ spec.add(makeOneMerge(infos, infos.range(0, last)));
}
} else if (last > maxNumSegments) {
@@ -301,7 +325,7 @@ public abstract class LogMergePolicy ext
}
}
- spec.add(new OneMerge(infos.range(bestStart, bestStart+finalMergeSize), useCompoundFile));
+ spec.add(makeOneMerge(infos, infos.range(bestStart, bestStart+finalMergeSize)));
}
}
return spec.merges.size() == 0 ? null : spec;
@@ -389,7 +413,7 @@ public abstract class LogMergePolicy ext
// deletions, so force a merge now:
if (verbose())
message(" add merge " + firstSegmentWithDeletions + " to " + (i-1) + " inclusive");
- spec.add(new OneMerge(segmentInfos.range(firstSegmentWithDeletions, i), useCompoundFile));
+ spec.add(makeOneMerge(segmentInfos, segmentInfos.range(firstSegmentWithDeletions, i)));
firstSegmentWithDeletions = i;
}
} else if (firstSegmentWithDeletions != -1) {
@@ -398,7 +422,7 @@ public abstract class LogMergePolicy ext
// mergeFactor segments
if (verbose())
message(" add merge " + firstSegmentWithDeletions + " to " + (i-1) + " inclusive");
- spec.add(new OneMerge(segmentInfos.range(firstSegmentWithDeletions, i), useCompoundFile));
+ spec.add(makeOneMerge(segmentInfos, segmentInfos.range(firstSegmentWithDeletions, i)));
firstSegmentWithDeletions = -1;
}
}
@@ -406,7 +430,7 @@ public abstract class LogMergePolicy ext
if (firstSegmentWithDeletions != -1) {
if (verbose())
message(" add merge " + firstSegmentWithDeletions + " to " + (numSegments-1) + " inclusive");
- spec.add(new OneMerge(segmentInfos.range(firstSegmentWithDeletions, numSegments), useCompoundFile));
+ spec.add(makeOneMerge(segmentInfos, segmentInfos.range(firstSegmentWithDeletions, numSegments)));
}
return spec;
@@ -506,7 +530,7 @@ public abstract class LogMergePolicy ext
spec = new MergeSpecification();
if (verbose())
message(" " + start + " to " + end + ": add this merge");
- spec.add(new OneMerge(infos.range(start, end), useCompoundFile));
+ spec.add(makeOneMerge(infos, infos.range(start, end)));
} else if (verbose())
message(" " + start + " to " + end + ": contains segment over maxMergeSize or maxMergeDocs; skipping");
@@ -520,6 +544,29 @@ public abstract class LogMergePolicy ext
return spec;
}
+ protected OneMerge makeOneMerge(SegmentInfos infos, SegmentInfos infosToMerge) throws IOException {
+ final boolean doCFS;
+ if (!useCompoundFile) {
+ doCFS = false;
+ } else if (noCFSRatio == 1.0) {
+ doCFS = true;
+ } else {
+
+ long totSize = 0;
+ for(SegmentInfo info : infos) {
+ totSize += size(info);
+ }
+ long mergeSize = 0;
+ for(SegmentInfo info : infosToMerge) {
+ mergeSize += size(info);
+ }
+
+ doCFS = mergeSize <= noCFSRatio * totSize;
+ }
+
+ return new OneMerge(infosToMerge, doCFS);
+ }
+
/** <p>Determines the largest segment (measured by
* document count) that may be merged with other segments.
* Small values (e.g., less than 10,000) are best for
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MergePolicy.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MergePolicy.java?rev=1042501&r1=1042500&r2=1042501&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MergePolicy.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MergePolicy.java Mon Dec 6 00:47:16 2010
@@ -69,14 +69,12 @@ public abstract class MergePolicy implem
SegmentInfo info; // used by IndexWriter
boolean mergeDocStores; // used by IndexWriter
boolean optimize; // used by IndexWriter
- boolean increfDone; // used by IndexWriter
boolean registerDone; // used by IndexWriter
long mergeGen; // used by IndexWriter
boolean isExternal; // used by IndexWriter
int maxNumSegmentsOptimize; // used by IndexWriter
SegmentReader[] readers; // used by IndexWriter
SegmentReader[] readersClone; // used by IndexWriter
- List<String> mergeFiles; // used by IndexWriter
public final SegmentInfos segments;
public final boolean useCompoundFile;
boolean aborted;
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/SegmentCodecs.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/SegmentCodecs.java?rev=1042501&r1=1042500&r2=1042501&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/SegmentCodecs.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/SegmentCodecs.java Mon Dec 6 00:47:16 2010
@@ -18,6 +18,7 @@ package org.apache.lucene.index;
*/
import java.io.IOException;
import java.util.ArrayList;
+import java.util.Arrays;
import java.util.IdentityHashMap;
import java.util.Map;
import java.util.Set;
@@ -127,6 +128,6 @@ final class SegmentCodecs implements Clo
@Override
public String toString() {
- return "CodecInfo [codecs=" + codecs + ", provider=" + provider + "]";
+ return "SegmentCodecs [codecs=" + Arrays.toString(codecs) + ", provider=" + provider + "]";
}
}
\ No newline at end of file
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/SegmentInfos.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/SegmentInfos.java?rev=1042501&r1=1042500&r2=1042501&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/SegmentInfos.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/SegmentInfos.java Mon Dec 6 00:47:16 2010
@@ -78,7 +78,7 @@ public final class SegmentInfos extends
* If non-null, information about loading segments_N files
* will be printed here. @see #setInfoStream.
*/
- private static PrintStream infoStream;
+ private static PrintStream infoStream = null;
public SegmentInfos() {
this(CodecProvider.getDefault());
@@ -621,7 +621,7 @@ public final class SegmentInfos extends
try {
Object v = doBody(segmentFileName);
- if (exc != null && infoStream != null) {
+ if (infoStream != null) {
message("success on " + segmentFileName);
}
return v;
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/SegmentMerger.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/SegmentMerger.java?rev=1042501&r1=1042500&r2=1042501&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/SegmentMerger.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/SegmentMerger.java Mon Dec 6 00:47:16 2010
@@ -44,9 +44,6 @@ import org.apache.lucene.util.MultiBits;
* The SegmentMerger class combines two or more Segments, represented by an IndexReader ({@link #add},
* into a single Segment. After adding the appropriate readers, call the merge method to combine the
* segments.
- *<P>
- * If the compoundFile flag is set, then the segments will be merged into a compound file.
- *
*
* @see #merge
* @see #add
@@ -110,16 +107,7 @@ final class SegmentMerger {
* @param reader
*/
final void add(IndexReader reader) {
- readers.add(reader);
- }
-
- /**
- *
- * @param i The index of the reader to return
- * @return The ith reader to be merged
- */
- final IndexReader segmentReader(int i) {
- return readers.get(i);
+ ReaderUtil.gatherSubReaders(readers, reader);
}
/**
@@ -161,21 +149,7 @@ final class SegmentMerger {
return mergedDocs;
}
- /**
- * close all IndexReaders that have been added.
- * Should not be called before merge().
- * @throws IOException
- */
- final void closeReaders() throws IOException {
- for (final IndexReader reader : readers) {
- reader.close();
- }
- }
-
- final List<String> createCompoundFile(String fileName, final SegmentInfo info)
- throws IOException {
- CompoundFileWriter cfsWriter = new CompoundFileWriter(directory, fileName, checkAbort);
-
+ final Collection<String> getMergedFiles(final SegmentInfo info) throws IOException {
Set<String> fileSet = new HashSet<String>();
// Basic files
@@ -203,18 +177,26 @@ final class SegmentMerger {
}
}
+ return fileSet;
+ }
+
+ final Collection<String> createCompoundFile(String fileName, final SegmentInfo info)
+ throws IOException {
+
// Now merge all added files
- for (String file : fileSet) {
+ Collection<String> files = getMergedFiles(info);
+ CompoundFileWriter cfsWriter = new CompoundFileWriter(directory, fileName, checkAbort);
+ for (String file : files) {
cfsWriter.addFile(file);
}
// Perform the merge
cfsWriter.close();
- return new ArrayList<String>(fileSet);
+ return files;
}
- private void addIndexed(IndexReader reader, FieldInfos fInfos,
+ private static void addIndexed(IndexReader reader, FieldInfos fInfos,
Collection<String> names, boolean storeTermVectors,
boolean storePositionWithTermVector, boolean storeOffsetWithTermVector,
boolean storePayloads, boolean omitTFAndPositions)
@@ -377,7 +359,7 @@ final class SegmentMerger {
throws IOException, MergeAbortedException, CorruptIndexException {
int docCount = 0;
final int maxDoc = reader.maxDoc();
- final Bits delDocs = MultiFields.getDeletedDocs(reader);
+ final Bits delDocs = reader.getDeletedDocs();
if (matchingFieldsReader != null) {
// We can bulk-copy because the fieldInfos are "congruent"
for (int j = 0; j < maxDoc;) {
@@ -461,7 +443,7 @@ final class SegmentMerger {
final SegmentReader matchingSegmentReader = matchingSegmentReaders[idx++];
TermVectorsReader matchingVectorsReader = null;
if (matchingSegmentReader != null) {
- TermVectorsReader vectorsReader = matchingSegmentReader.getTermVectorsReaderOrig();
+ TermVectorsReader vectorsReader = matchingSegmentReader.getTermVectorsReader();
// If the TV* files are an older format then they cannot read raw docs:
if (vectorsReader != null && vectorsReader.canReadRawDocs()) {
@@ -496,7 +478,7 @@ final class SegmentMerger {
final IndexReader reader)
throws IOException, MergeAbortedException {
final int maxDoc = reader.maxDoc();
- final Bits delDocs = MultiFields.getDeletedDocs(reader);
+ final Bits delDocs = reader.getDeletedDocs();
if (matchingVectorsReader != null) {
// We can bulk-copy because the fieldInfos are "congruent"
for (int docNum = 0; docNum < maxDoc;) {
@@ -577,34 +559,28 @@ final class SegmentMerger {
int docBase = 0;
final List<Fields> fields = new ArrayList<Fields>();
- final List<IndexReader> subReaders = new ArrayList<IndexReader>();
final List<ReaderUtil.Slice> slices = new ArrayList<ReaderUtil.Slice>();
final List<Bits> bits = new ArrayList<Bits>();
final List<Integer> bitsStarts = new ArrayList<Integer>();
- final int numReaders = readers.size();
- for(int i=0;i<numReaders;i++) {
- docBase = new ReaderUtil.Gather(readers.get(i)) {
- @Override
- protected void add(int base, IndexReader r) throws IOException {
- final Fields f = r.fields();
- if (f != null) {
- subReaders.add(r);
- fields.add(f);
- slices.add(new ReaderUtil.Slice(base, r.maxDoc(), fields.size()-1));
- bits.add(r.getDeletedDocs());
- bitsStarts.add(base);
- }
- }
- }.run(docBase);
+ for(IndexReader r : readers) {
+ final Fields f = r.fields();
+ final int maxDoc = r.maxDoc();
+ if (f != null) {
+ slices.add(new ReaderUtil.Slice(docBase, maxDoc, fields.size()));
+ fields.add(f);
+ bits.add(r.getDeletedDocs());
+ bitsStarts.add(docBase);
+ }
+ docBase += maxDoc;
}
bitsStarts.add(docBase);
// we may gather more readers than mergeState.readerCount
mergeState = new MergeState();
- mergeState.readers = subReaders;
- mergeState.readerCount = subReaders.size();
+ mergeState.readers = readers;
+ mergeState.readerCount = readers.size();
mergeState.fieldInfos = fieldInfos;
mergeState.mergedDocCount = mergedDocs;
@@ -619,13 +595,9 @@ final class SegmentMerger {
docBase = 0;
int inputDocBase = 0;
- final int[] starts = new int[mergeState.readerCount+1];
-
for(int i=0;i<mergeState.readerCount;i++) {
- final IndexReader reader = subReaders.get(i);
-
- starts[i] = inputDocBase;
+ final IndexReader reader = readers.get(i);
mergeState.delCounts[i] = reader.numDeletedDocs();
mergeState.docBase[i] = docBase;
@@ -633,7 +605,7 @@ final class SegmentMerger {
inputDocBase += reader.maxDoc();
if (mergeState.delCounts[i] != 0) {
int delCount = 0;
- final Bits delDocs = MultiFields.getDeletedDocs(reader);
+ final Bits delDocs = reader.getDeletedDocs();
assert delDocs != null;
final int maxDoc = reader.maxDoc();
final int[] docMap = mergeState.docMaps[i] = new int[maxDoc];
@@ -653,7 +625,6 @@ final class SegmentMerger {
mergeState.dirPayloadProcessor[i] = payloadProcessorProvider.getDirProcessor(reader.directory());
}
}
- starts[mergeState.readerCount] = inputDocBase;
codec = segmentWriteState.segmentCodecs.codec();
final FieldsConsumer consumer = codec.fieldsConsumer(segmentWriteState);
@@ -683,24 +654,27 @@ final class SegmentMerger {
}
private void mergeNorms() throws IOException {
+ // get needed buffer size by finding the largest segment
+ int bufferSize = 0;
+ for (IndexReader reader : readers) {
+ bufferSize = Math.max(bufferSize, reader.maxDoc());
+ }
+
byte[] normBuffer = null;
IndexOutput output = null;
try {
- int numFieldInfos = fieldInfos.size();
- for (int i = 0; i < numFieldInfos; i++) {
- FieldInfo fi = fieldInfos.fieldInfo(i);
+ for (int i = 0, numFieldInfos = fieldInfos.size(); i < numFieldInfos; i++) {
+ final FieldInfo fi = fieldInfos.fieldInfo(i);
if (fi.isIndexed && !fi.omitNorms) {
if (output == null) {
output = directory.createOutput(IndexFileNames.segmentFileName(segment, "", IndexFileNames.NORMS_EXTENSION));
output.writeBytes(NORMS_HEADER,NORMS_HEADER.length);
}
- for ( IndexReader reader : readers) {
- int maxDoc = reader.maxDoc();
- final Bits delDocs = MultiFields.getDeletedDocs(reader);
- if (normBuffer == null || normBuffer.length < maxDoc) {
- // the buffer is too small for the current segment
- normBuffer = new byte[maxDoc];
- }
+ if (normBuffer == null) {
+ normBuffer = new byte[bufferSize];
+ }
+ for (IndexReader reader : readers) {
+ final int maxDoc = reader.maxDoc();
reader.norms(fi.name, normBuffer, 0);
if (!reader.hasDeletions()) {
//optimized case for segments without deleted docs
@@ -708,6 +682,7 @@ final class SegmentMerger {
} else {
// this segment has deleted docs, so we have to
// check for every doc if it is deleted or not
+ final Bits delDocs = reader.getDeletedDocs();
for (int k = 0; k < maxDoc; k++) {
if (!delDocs.get(k)) {
output.writeByte(normBuffer[k]);
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/SegmentReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/SegmentReader.java?rev=1042501&r1=1042500&r2=1042501&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/SegmentReader.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/SegmentReader.java Mon Dec 6 00:47:16 2010
@@ -1248,33 +1248,6 @@ public class SegmentReader extends Index
public final Object getCoreCacheKey() {
return core;
}
-
- /**
- * Lotsa tests did hacks like:<br/>
- * SegmentReader reader = (SegmentReader) IndexReader.open(dir);<br/>
- * They broke. This method serves as a hack to keep hacks working
- * We do it with R/W access for the tests (BW compatibility)
- * @deprecated Remove this when tests are fixed!
- */
- @Deprecated
- static SegmentReader getOnlySegmentReader(Directory dir) throws IOException {
- return getOnlySegmentReader(IndexReader.open(dir, false));
- }
-
- static SegmentReader getOnlySegmentReader(IndexReader reader) {
- if (reader instanceof SegmentReader)
- return (SegmentReader) reader;
-
- if (reader instanceof DirectoryReader) {
- IndexReader[] subReaders = reader.getSequentialSubReaders();
- if (subReaders.length != 1)
- throw new IllegalArgumentException(reader + " has " + subReaders.length + " segments instead of exactly one");
-
- return (SegmentReader) subReaders[0];
- }
-
- throw new IllegalArgumentException(reader + " is not a SegmentReader or a single-segment DirectoryReader");
- }
@Override
public int getTermInfosIndexDivisor() {
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/SlowMultiReaderWrapper.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/SlowMultiReaderWrapper.java?rev=1042501&r1=1042500&r2=1042501&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/SlowMultiReaderWrapper.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/SlowMultiReaderWrapper.java Mon Dec 6 00:47:16 2010
@@ -18,10 +18,8 @@ package org.apache.lucene.index;
*/
import java.io.IOException;
-import java.util.List;
-import java.util.ArrayList;
import org.apache.lucene.util.Bits;
-import org.apache.lucene.util.ReaderUtil;
+import org.apache.lucene.util.ReaderUtil; // javadoc
import org.apache.lucene.index.DirectoryReader; // javadoc
import org.apache.lucene.index.MultiReader; // javadoc
@@ -49,22 +47,8 @@ import org.apache.lucene.index.MultiRead
*/
public final class SlowMultiReaderWrapper extends FilterIndexReader {
- /** This method may return the reader back, if the
- * incoming reader is already atomic. */
- public static IndexReader wrap(IndexReader reader) throws IOException {
- final List<IndexReader> subs = new ArrayList<IndexReader>();
- ReaderUtil.gatherSubReaders(subs, reader);
- if (subs == null) {
- // already an atomic reader
- return reader;
- } else if (subs.size() == 1) {
- return subs.get(0);
- } else {
- return new SlowMultiReaderWrapper(reader);
- }
- }
- private SlowMultiReaderWrapper(IndexReader other) throws IOException {
+ public SlowMultiReaderWrapper(IndexReader other) {
super(other);
}
@@ -79,7 +63,8 @@ public final class SlowMultiReaderWrappe
}
@Override
- public void doClose() throws IOException {
- throw new UnsupportedOperationException("please call close on the original reader instead");
+ public IndexReader[] getSequentialSubReaders() {
+ return null;
}
+
}