You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by do...@apache.org on 2009/07/29 20:04:24 UTC
svn commit: r798995 [10/35] - in /incubator/lucene.net/trunk/C#/src:
Lucene.Net/ Lucene.Net/Analysis/ Lucene.Net/Analysis/Standard/
Lucene.Net/Document/ Lucene.Net/Index/ Lucene.Net/QueryParser/
Lucene.Net/Search/ Lucene.Net/Search/Function/ Lucene.Net...
Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/IndexWriter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/IndexWriter.cs?rev=798995&r1=798994&r2=798995&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/IndexWriter.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/IndexWriter.cs Wed Jul 29 18:04:12 2009
@@ -15,4017 +15,6881 @@
* limitations under the License.
*/
-using System;
-
+using Analyzer = Lucene.Net.Analysis.Analyzer;
using Document = Lucene.Net.Documents.Document;
-using AlreadyClosedException = Lucene.Net.Store.AlreadyClosedException;
+using Similarity = Lucene.Net.Search.Similarity;
+using Query = Lucene.Net.Search.Query;
using Directory = Lucene.Net.Store.Directory;
using FSDirectory = Lucene.Net.Store.FSDirectory;
using Lock = Lucene.Net.Store.Lock;
using LockObtainFailedException = Lucene.Net.Store.LockObtainFailedException;
+using AlreadyClosedException = Lucene.Net.Store.AlreadyClosedException;
using BitVector = Lucene.Net.Util.BitVector;
-using Analyzer = Lucene.Net.Analysis.Analyzer;
-using Similarity = Lucene.Net.Search.Similarity;
-using System.Collections;
+using Constants = Lucene.Net.Util.Constants;
+
+using System.Collections.Generic;
namespace Lucene.Net.Index
{
-
- /// <summary>An <code>IndexWriter</code> creates and maintains an index.
- /// <p>The <code>create</code> argument to the
- /// <a href="#IndexWriter(Lucene.Net.Store.Directory, Lucene.Net.Analysis.Analyzer, boolean)"><b>constructor</b></a>
- /// determines whether a new index is created, or whether an existing index is
- /// opened. Note that you
- /// can open an index with <code>create=true</code> even while readers are
- /// using the index. The old readers will continue to search
- /// the "point in time" snapshot they had opened, and won't
- /// see the newly created index until they re-open. There are
- /// also <a href="#IndexWriter(Lucene.Net.Store.Directory, Lucene.Net.Analysis.Analyzer)"><b>constructors</b></a>
- /// with no <code>create</code> argument which
- /// will create a new index if there is not already an index at the
- /// provided path and otherwise open the existing index.</p>
- /// <p>In either case, documents are added with <a
- /// href="#addDocument(Lucene.Net.Documents.Document)"><b>addDocument</b></a>
- /// and removed with <a
- /// href="#deleteDocuments(Lucene.Net.Index.Term)"><b>deleteDocuments</b></a>.
- /// A document can be updated with <a href="#updateDocument(Lucene.Net.Index.Term, Lucene.Net.Documents.Document)"><b>updateDocument</b></a>
- /// (which just deletes and then adds the entire document).
- /// When finished adding, deleting and updating documents, <a href="#close()"><b>close</b></a> should be called.</p>
- /// <p>These changes are buffered in memory and periodically
- /// flushed to the {@link Directory} (during the above method
- /// calls). A flush is triggered when there are enough
- /// buffered deletes (see {@link #setMaxBufferedDeleteTerms})
- /// or enough added documents since the last flush, whichever
- /// is sooner. For the added documents, flushing is triggered
- /// either by RAM usage of the documents (see {@link
- /// #setRAMBufferSizeMB}) or the number of added documents.
- /// The default is to flush when RAM usage hits 16 MB. For
- /// best indexing speed you should flush by RAM usage with a
- /// large RAM buffer. You can also force a flush by calling
- /// {@link #flush}. When a flush occurs, both pending deletes
- /// and added documents are flushed to the index. A flush may
- /// also trigger one or more segment merges which by default
- /// run with a background thread so as not to block the
- /// addDocument calls (see <a href="#mergePolicy">below</a>
- /// for changing the {@link MergeScheduler}).</p>
- /// <a name="autoCommit"></a>
- /// <p>The optional <code>autoCommit</code> argument to the
- /// <a href="#IndexWriter(Lucene.Net.Store.Directory, boolean, Lucene.Net.Analysis.Analyzer)"><b>constructors</b></a>
- /// controls visibility of the changes to {@link IndexReader} instances reading the same index.
- /// When this is <code>false</code>, changes are not
- /// visible until {@link #Close()} is called.
- /// Note that changes will still be flushed to the
- /// {@link Lucene.Net.Store.Directory} as new files,
- /// but are not committed (no new <code>segments_N</code> file
- /// is written referencing the new files) until {@link #close} is
- /// called. If something goes terribly wrong (for example the
- /// JVM crashes) before {@link #Close()}, then
- /// the index will reflect none of the changes made (it will
- /// remain in its starting state).
- /// You can also call {@link #Abort()}, which closes the writer without committing any
- /// changes, and removes any index
- /// files that had been flushed but are now unreferenced.
- /// This mode is useful for preventing readers from refreshing
- /// at a bad time (for example after you've done all your
- /// deletes but before you've done your adds).
- /// It can also be used to implement simple single-writer
- /// transactional semantics ("all or none").</p>
- /// <p>When <code>autoCommit</code> is <code>true</code> then
- /// every flush is also a commit ({@link IndexReader}
- /// instances will see each flush as changes to the index).
- /// This is the default, to match the behavior before 2.2.
- /// When running in this mode, be careful not to refresh your
- /// readers while optimize or segment merges are taking place
- /// as this can tie up substantial disk space.</p>
- /// </summary>
- /// <summary><p>Regardless of <code>autoCommit</code>, an {@link
- /// IndexReader} or {@link Lucene.Net.Search.IndexSearcher} will only see the
- /// index as of the "point in time" that it was opened. Any
- /// changes committed to the index after the reader was opened
- /// are not visible until the reader is re-opened.</p>
- /// <p>If an index will not have more documents added for a while and optimal search
- /// performance is desired, then the <a href="#optimize()"><b>optimize</b></a>
- /// method should be called before the index is closed.</p>
- /// <p>Opening an <code>IndexWriter</code> creates a lock file for the directory in use. Trying to open
- /// another <code>IndexWriter</code> on the same directory will lead to a
- /// {@link LockObtainFailedException}. The {@link LockObtainFailedException}
- /// is also thrown if an IndexReader on the same directory is used to delete documents
- /// from the index.</p>
- /// </summary>
- /// <summary><a name="deletionPolicy"></a>
- /// <p>Expert: <code>IndexWriter</code> allows an optional
- /// {@link IndexDeletionPolicy} implementation to be
- /// specified. You can use this to control when prior commits
- /// are deleted from the index. The default policy is {@link
- /// KeepOnlyLastCommitDeletionPolicy} which removes all prior
- /// commits as soon as a new commit is done (this matches
- /// behavior before 2.2). Creating your own policy can allow
- /// you to explicitly keep previous "point in time" commits
- /// alive in the index for some time, to allow readers to
- /// refresh to the new commit without having the old commit
- /// deleted out from under them. This is necessary on
- /// filesystems like NFS that do not support "delete on last
- /// close" semantics, which Lucene's "point in time" search
- /// normally relies on. </p>
- /// <a name="mergePolicy"></a> <p>Expert:
- /// <code>IndexWriter</code> allows you to separately change
- /// the {@link MergePolicy} and the {@link MergeScheduler}.
- /// The {@link MergePolicy} is invoked whenever there are
- /// changes to the segments in the index. Its role is to
- /// select which merges to do, if any, and return a {@link
- /// MergePolicy.MergeSpecification} describing the merges. It
- /// also selects merges to do for optimize(). (The default is
- /// {@link LogByteSizeMergePolicy}. Then, the {@link
- /// MergeScheduler} is invoked with the requested merges and
- /// it decides when and how to run the merges. The default is
- /// {@link ConcurrentMergeScheduler}. </p>
- /// </summary>
-
- /*
- * Clarification: Check Points (and commits)
- * Being able to set autoCommit=false allows IndexWriter to flush and
- * write new index files to the directory without writing a new segments_N
- * file which references these new files. It also means that the state of
- * the in memory SegmentInfos object is different than the most recent
- * segments_N file written to the directory.
- *
- * Each time the SegmentInfos is changed, and matches the (possibly
- * modified) directory files, we have a new "check point".
- * If the modified/new SegmentInfos is written to disk - as a new
- * (generation of) segments_N file - this check point is also an
- * IndexCommitPoint.
- *
- * With autoCommit=true, every checkPoint is also a CommitPoint.
- * With autoCommit=false, some checkPoints may not be commits.
- *
- * A new checkpoint always replaces the previous checkpoint and
- * becomes the new "front" of the index. This allows the IndexFileDeleter
- * to delete files that are referenced only by stale checkpoints.
- * (files that were created since the last commit, but are no longer
- * referenced by the "front" of the index). For this, IndexFileDeleter
- * keeps track of the last non commit checkpoint.
- */
- public class IndexWriter
- {
- private void InitBlock()
- {
- similarity = Similarity.GetDefault();
- }
-
- /// <summary> Default value for the write lock timeout (1,000).</summary>
- /// <seealso cref="setDefaultWriteLockTimeout">
- /// </seealso>
- public static long WRITE_LOCK_TIMEOUT = 1000;
-
- private long writeLockTimeout = WRITE_LOCK_TIMEOUT;
-
- /// <summary> Name of the write lock in the index.</summary>
- public const System.String WRITE_LOCK_NAME = "write.lock";
-
- /// <deprecated>
- /// </deprecated>
- /// <seealso cref="LogMergePolicy.DEFAULT_MERGE_FACTOR">
- /// </seealso>
- public static readonly int DEFAULT_MERGE_FACTOR;
-
- /// <summary> Value to denote a flush trigger is disabled</summary>
- public const int DISABLE_AUTO_FLUSH = - 1;
-
- /// <summary> Disabled by default (because IndexWriter flushes by RAM usage
- /// by default). Change using {@link #SetMaxBufferedDocs(int)}.
- /// </summary>
- public static readonly int DEFAULT_MAX_BUFFERED_DOCS = DISABLE_AUTO_FLUSH;
-
- /// <summary> Default value is 16 MB (which means flush when buffered
- /// docs consume 16 MB RAM). Change using {@link #setRAMBufferSizeMB}.
- /// </summary>
- public const double DEFAULT_RAM_BUFFER_SIZE_MB = 16.0;
-
- /// <summary> Disabled by default (because IndexWriter flushes by RAM usage
- /// by default). Change using {@link #SetMaxBufferedDeleteTerms(int)}.
- /// </summary>
- public static readonly int DEFAULT_MAX_BUFFERED_DELETE_TERMS = DISABLE_AUTO_FLUSH;
-
- /// <deprecated>
- /// </deprecated>
- /// <seealso cref="LogDocMergePolicy.DEFAULT_MAX_MERGE_DOCS">
- /// </seealso>
- public static readonly int DEFAULT_MAX_MERGE_DOCS;
-
- /// <summary> Default value is 10,000. Change using {@link #SetMaxFieldLength(int)}.</summary>
- public const int DEFAULT_MAX_FIELD_LENGTH = 10000;
-
- /// <summary> Default value is 128. Change using {@link #SetTermIndexInterval(int)}.</summary>
- public const int DEFAULT_TERM_INDEX_INTERVAL = 128;
-
- /// <summary> Absolute hard maximum length for a term. If a term
- /// arrives from the analyzer longer than this length, it
- /// is skipped and a message is printed to infoStream, if
- /// set (see {@link #setInfoStream}).
- /// </summary>
- public static readonly int MAX_TERM_LENGTH;
-
- // The normal read buffer size defaults to 1024, but
- // increasing this during merging seems to yield
- // performance gains. However we don't want to increase
- // it too much because there are quite a few
- // BufferedIndexInputs created during merging. See
- // LUCENE-888 for details.
- private const int MERGE_READ_BUFFER_SIZE = 4096;
-
- // Used for printing messages
- private static System.Object MESSAGE_ID_LOCK = new System.Object();
- private static int MESSAGE_ID = 0;
- private int messageID = - 1;
+ /// <summary>
+ /// An <code>IndexWriter</code> creates and maintains an index.
+ /// <p>The <code>create</code> argument to the
+ /// <a href="#IndexWriter(org.apache.lucene.store.Directory, org.apache.lucene.analysis.Analyzer, bool)"><b>constructor</b></a>
+ /// determines whether a new index is created, or whether an existing index is
+ /// opened. Note that you
+ /// can open an index with <code>create=true</code> even while readers are
+ /// using the index. The old readers will continue to search
+ /// the "point in time" snapshot they had opened, and won't
+ /// see the newly created index until they re-open. There are
+ /// also <a href="#IndexWriter(org.apache.lucene.store.Directory, org.apache.lucene.analysis.Analyzer)"><b>constructors</b></a>
+ /// with no <code>create</code> argument which
+ /// will create a new index if there is not already an index at the
+ /// provided path and otherwise open the existing index.</p>
+ ///
+ /// <p>In either case, documents are added with <a
+ /// href="#addDocument(org.apache.lucene.document.Document)"><b>addDocument</b></a>
+ /// and removed with <a
+ /// href="#deleteDocuments(org.apache.lucene.index.Term)"><b>deleteDocuments(Term)</b></a>
+ /// or <a
+ /// href="#deleteDocuments(org.apache.lucene.search.Query)"><b>deleteDocuments(Query)</b></a>.
+ /// A document can be updated with <a href="#updateDocument(org.apache.lucene.index.Term, org.apache.lucene.document.Document)"><b>updateDocument</b></a>
+ /// (which just deletes and then adds the entire document).
+ /// When finished adding, deleting and updating documents, <a href="#Close()"><b>close</b></a> should be called.</p>
+ ///
+ /// <a name="flush"></a>
+ /// <p>These changes are buffered in memory and periodically
+ /// flushed to the {@link Directory} (during the above method
+ /// calls). A flush is triggered when there are enough
+ /// buffered deletes (see {@link #setMaxBufferedDeleteTerms})
+ /// or enough added documents since the last flush, whichever
+ /// is sooner. For the added documents, flushing is triggered
+ /// either by RAM usage of the documents (see {@link
+ /// #setRAMBufferSizeMB}) or the number of added documents.
+ /// The default is to flush when RAM usage hits 16 MB. For
+ /// best indexing speed you should flush by RAM usage with a
+ /// large RAM buffer. Note that flushing just moves the
+ /// internal buffered state in IndexWriter into the index, but
+ /// these changes are not visible to IndexReader until either
+ /// {@link #Commit()} or {@link #close} is called. A flush may
+ /// also trigger one or more segment merges which by default
+ /// run with a background thread so as not to block the
+ /// addDocument calls (see <a href="#mergePolicy">below</a>
+ /// for changing the {@link MergeScheduler}).</p>
+ ///
+ /// <a name="autoCommit"></a>
+ /// <p>The optional <code>autoCommit</code> argument to the <a
+ /// href="#IndexWriter(org.apache.lucene.store.Directory,
+ /// bool,
+ /// org.apache.lucene.analysis.Analyzer)"><b>constructors</b></a>
+ /// controls visibility of the changes to {@link IndexReader}
+ /// instances reading the same index. When this is
+ /// <code>false</code>, changes are not visible until {@link
+ /// #Close()} or {@link #Commit()} is called. Note that changes will still be
+ /// flushed to the {@link org.apache.lucene.store.Directory}
+ /// as new files, but are not committed (no new
+ /// <code>segments_N</code> file is written referencing the
+ /// new files, nor are the files sync'd to stable storage)
+ /// until {@link #Close()} or {@link #Commit()} is called. If something
+ /// goes terribly wrong (for example the JVM crashes), then
+ /// the index will reflect none of the changes made since the
+ /// last commit, or the starting state if commit was not called.
+ /// You can also call {@link #rollback}, which closes the writer
+ /// without committing any changes, and removes any index
+ /// files that had been flushed but are now unreferenced.
+ /// This mode is useful for preventing readers from refreshing
+ /// at a bad time (for example after you've done all your
+ /// deletes but before you've done your adds). It can also be
+ /// used to implement simple single-writer transactional
+ /// semantics ("all or none"). You can do a two-phase commit
+ /// by calling {@link #PrepareCommit()}
+ /// followed by {@link #Commit()}. This is necessary when
+ /// Lucene is working with an external resource (for example,
+ /// a database) and both must either commit or rollback the
+ /// transaction.</p>
+ ///
+ /// <p>When <code>autoCommit</code> is <code>true</code> then
+ /// the writer will periodically commit on its own. [<b>Deprecated</b>: Note that in 3.0, IndexWriter will
+ /// no longer accept autoCommit=true (it will be hardwired to
+ /// false). You can always call {@link #Commit()} yourself
+ /// when needed]. There is
+ /// no guarantee when exactly an auto commit will occur (it
+ /// used to be after every flush, but it is now after every
+ /// completed merge, as of 2.4). If you want to force a
+ /// commit, call {@link #Commit()}, or, close the writer. Once
+ /// a commit has finished, newly opened {@link IndexReader} instances will
+ /// see the changes to the index as of that commit. When
+ /// running in this mode, be careful not to refresh your
+ /// readers while optimize or segment merges are taking place
+ /// as this can tie up substantial disk space.</p>
+ ///
+ /// <p>Regardless of <code>autoCommit</code>, an {@link
+ /// IndexReader} or {@link org.apache.lucene.search.IndexSearcher} will only see the
+ /// index as of the "point in time" that it was opened. Any
+ /// changes committed to the index after the reader was opened
+ /// are not visible until the reader is re-opened.</p>
+ ///
+ /// <p>If an index will not have more documents added for a while and optimal search
+ /// performance is desired, then either the full <a href="#Optimize()"><b>optimize</b></a>
+ /// method or partial {@link #Optimize(int)} method should be
+ /// called before the index is closed.</p>
+ ///
+ /// <p>Opening an <code>IndexWriter</code> creates a lock file for the directory in use. Trying to open
+ /// another <code>IndexWriter</code> on the same directory will lead to a
+ /// {@link LockObtainFailedException}. The {@link LockObtainFailedException}
+ /// is also thrown if an IndexReader on the same directory is used to delete documents
+ /// from the index.</p>
+ ///
+ /// <a name="deletionPolicy"></a>
+ /// <p>Expert: <code>IndexWriter</code> allows an optional
+ /// {@link IndexDeletionPolicy} implementation to be
+ /// specified. You can use this to control when prior commits
+ /// are deleted from the index. The default policy is {@link
+ /// KeepOnlyLastCommitDeletionPolicy} which removes all prior
+ /// commits as soon as a new commit is done (this matches
+ /// behavior before 2.2). Creating your own policy can allow
+ /// you to explicitly keep previous "point in time" commits
+ /// alive in the index for some time, to allow readers to
+ /// refresh to the new commit without having the old commit
+ /// deleted out from under them. This is necessary on
+ /// filesystems like NFS that do not support "delete on last
+ /// close" semantics, which Lucene's "point in time" search
+ /// normally relies on. </p>
+ ///
+ /// <a name="mergePolicy"></a> <p>Expert:
+ /// <code>IndexWriter</code> allows you to separately change
+ /// the {@link MergePolicy} and the {@link MergeScheduler}.
+ /// The {@link MergePolicy} is invoked whenever there are
+ /// changes to the segments in the index. Its role is to
+ /// select which merges to do, if any, and return a {@link
+ /// MergePolicy.MergeSpecification} describing the merges. It
+ /// also selects merges to do for Optimize(). (The default is
+ /// {@link LogByteSizeMergePolicy}. Then, the {@link
+ /// MergeScheduler} is invoked with the requested merges and
+ /// it decides when and how to run the merges. The default is
+ /// {@link ConcurrentMergeScheduler}. </p>
+ /// </summary>
+
+ /*
+ * Clarification: Check Points (and commits)
+ * Being able to set autoCommit=false allows IndexWriter to flush and
+ * write new index files to the directory without writing a new segments_N
+ * file which references these new files. It also means that the state of
+ * the in memory SegmentInfos object is different than the most recent
+ * segments_N file written to the directory.
+ *
+ * Each time the SegmentInfos is changed, and matches the (possibly
+ * modified) directory files, we have a new "check point".
+ * If the modified/new SegmentInfos is written to disk - as a new
+ * (generation of) segments_N file - this check point is also an
+ * IndexCommit.
+ *
+ * With autoCommit=true, every checkPoint is also a CommitPoint.
+ * With autoCommit=false, some checkPoints may not be commits.
+ *
+ * A new checkpoint always replaces the previous checkpoint and
+ * becomes the new "front" of the index. This allows the IndexFileDeleter
+ * to delete files that are referenced only by stale checkpoints.
+ * (files that were created since the last commit, but are no longer
+ * referenced by the "front" of the index). For this, IndexFileDeleter
+ * keeps track of the last non commit checkpoint.
+ */
+ public class IndexWriter
+ {
+ private void InitBlock()
+ {
+ similarity = Similarity.GetDefault();
+ }
+
+ /// <summary> Default value for the write lock timeout (1,000).</summary>
+ /// <seealso cref="setDefaultWriteLockTimeout">
+ /// </seealso>
+ public static long WRITE_LOCK_TIMEOUT = 1000;
+
+ private long writeLockTimeout = WRITE_LOCK_TIMEOUT;
+
+ /// <summary> Name of the write lock in the index.</summary>
+ public const string WRITE_LOCK_NAME = "write.lock";
+
+ /// <deprecated>
+ /// </deprecated>
+ /// <seealso cref="LogMergePolicy.DEFAULT_MERGE_FACTOR">
+ /// </seealso>
+ public static readonly int DEFAULT_MERGE_FACTOR = LogMergePolicy.DEFAULT_MERGE_FACTOR;
+
+ /// <summary> Value to denote a flush trigger is disabled</summary>
+ public const int DISABLE_AUTO_FLUSH = -1;
+
+ /// <summary> Disabled by default (because IndexWriter flushes by RAM usage
+ /// by default). Change using {@link #SetMaxBufferedDocs(int)}.
+ /// </summary>
+ public static readonly int DEFAULT_MAX_BUFFERED_DOCS = DISABLE_AUTO_FLUSH;
+
+ /// <summary> Default value is 16 MB (which means flush when buffered
+ /// docs consume 16 MB RAM). Change using {@link #setRAMBufferSizeMB}.
+ /// </summary>
+ public const double DEFAULT_RAM_BUFFER_SIZE_MB = 16.0;
+
+ /// <summary> Disabled by default (because IndexWriter flushes by RAM usage
+ /// by default). Change using {@link #SetMaxBufferedDeleteTerms(int)}.
+ /// </summary>
+ public static readonly int DEFAULT_MAX_BUFFERED_DELETE_TERMS = DISABLE_AUTO_FLUSH;
+
+ /// <deprecated>
+ /// </deprecated>
+ /// <seealso cref="LogDocMergePolicy.DEFAULT_MAX_MERGE_DOCS">
+ /// </seealso>
+ public static readonly int DEFAULT_MAX_MERGE_DOCS;
+
+ /// <summary> Default value is 10,000. Change using {@link #SetMaxFieldLength(int)}.</summary>
+ public const int DEFAULT_MAX_FIELD_LENGTH = 10000;
+
+ /// <summary> Default value is 128. Change using {@link #SetTermIndexInterval(int)}.</summary>
+ public const int DEFAULT_TERM_INDEX_INTERVAL = 128;
+
+ /// <summary> Absolute hard maximum length for a term. If a term
+ /// arrives from the analyzer longer than this length, it
+ /// is skipped and a message is printed to infoStream, if
+ /// set (see {@link #setInfoStream}).
+ /// </summary>
+ public static readonly int MAX_TERM_LENGTH;
+
+ /// <summary>
+ /// Default for {@link #getMaxSyncPauseSeconds}. On
+ /// Windows this defaults to 10.0 seconds; elsewhere it's
+ /// 0.
+ /// </summary>
+ public static readonly double DEFAULT_MAX_SYNC_PAUSE_SECONDS;
+
+ // The normal read buffer size defaults to 1024, but
+ // increasing this during merging seems to yield
+ // performance gains. However we don't want to increase
+ // it too much because there are quite a few
+ // BufferedIndexInputs created during merging. See
+ // LUCENE-888 for details.
+ private const int MERGE_READ_BUFFER_SIZE = 4096;
+
+ // Used for printing messages
+ private static object MESSAGE_ID_LOCK = new object();
+ private static int MESSAGE_ID = 0;
+ private int messageID = -1;
volatile private bool hitOOM;
- private Directory directory; // where this index resides
- private Analyzer analyzer; // how to analyze text
-
- private Similarity similarity; // how to normalize
-
- private bool commitPending; // true if segmentInfos has changes not yet committed
- private SegmentInfos rollbackSegmentInfos; // segmentInfos we will fallback to if the commit fails
-
- private SegmentInfos localRollbackSegmentInfos; // segmentInfos we will fallback to if the commit fails
- private bool localAutoCommit; // saved autoCommit during local transaction
- private bool autoCommit = true; // false if we should commit only on close
-
- private SegmentInfos segmentInfos = new SegmentInfos(); // the segments
- private DocumentsWriter docWriter;
- private IndexFileDeleter deleter;
-
- private System.Collections.Hashtable segmentsToOptimize = new System.Collections.Hashtable(); // used by optimize to note those needing optimization
-
- private Lock writeLock;
-
- private int termIndexInterval = DEFAULT_TERM_INDEX_INTERVAL;
-
- private bool closeDir;
- private bool closed;
- private bool closing;
-
- // Holds all SegmentInfo instances currently involved in
- // merges
- private System.Collections.Hashtable mergingSegments = new System.Collections.Hashtable();
-
- private MergePolicy mergePolicy = new LogByteSizeMergePolicy();
- private MergeScheduler mergeScheduler = new ConcurrentMergeScheduler();
- private System.Collections.ArrayList pendingMerges = new System.Collections.ArrayList();
- private System.Collections.Hashtable runningMerges = new System.Collections.Hashtable();
- private System.Collections.IList mergeExceptions = new System.Collections.ArrayList();
- private long mergeGen;
- private bool stopMerges;
-
- /// <summary> Used internally to throw an {@link
- /// AlreadyClosedException} if this IndexWriter has been
- /// closed.
- /// </summary>
- /// <throws> AlreadyClosedException if this IndexWriter is </throws>
- protected internal void EnsureOpen()
- {
- if (closed)
- {
- throw new AlreadyClosedException("this IndexWriter is closed");
- }
- }
-
- /// <summary> Prints a message to the infoStream (if non-null),
- /// prefixed with the identifying information for this
- /// writer and the thread that's calling it.
- /// </summary>
- public virtual void Message(System.String message)
- {
- if (infoStream != null)
- infoStream.WriteLine("IW " + messageID + " [" + SupportClass.ThreadClass.Current().Name + "]: " + message);
- }
-
- private void SetMessageID()
- {
- lock (this)
- {
- if (infoStream != null && messageID == - 1)
- {
- lock (MESSAGE_ID_LOCK)
- {
- messageID = MESSAGE_ID++;
- }
- }
- }
- }
-
- /// <summary> Casts current mergePolicy to LogMergePolicy, and throws
- /// an exception if the mergePolicy is not a LogMergePolicy.
- /// </summary>
- private LogMergePolicy GetLogMergePolicy()
- {
- if (mergePolicy is LogMergePolicy)
- return (LogMergePolicy) mergePolicy;
- else
- throw new System.ArgumentException("this method can only be called when the merge policy is the default LogMergePolicy");
- }
-
- /// <summary><p>Get the current setting of whether newly flushed
- /// segments will use the compound file format. Note that
- /// this just returns the value previously set with
- /// setUseCompoundFile(boolean), or the default value
- /// (true). You cannot use this to query the status of
- /// previously flushed segments.</p>
- ///
- /// <p>Note that this method is a convenience method: it
- /// just calls mergePolicy.getUseCompoundFile as long as
- /// mergePolicy is an instance of {@link LogMergePolicy}.
- /// Otherwise an IllegalArgumentException is thrown.</p>
- ///
- /// </summary>
- /// <seealso cref="SetUseCompoundFile(boolean)">
- /// </seealso>
- public virtual bool GetUseCompoundFile()
- {
- return GetLogMergePolicy().GetUseCompoundFile();
- }
-
- /// <summary><p>Setting to turn on usage of a compound file. When on,
- /// multiple files for each segment are merged into a
- /// single file when a new segment is flushed.</p>
- ///
- /// <p>Note that this method is a convenience method: it
- /// just calls mergePolicy.setUseCompoundFile as long as
- /// mergePolicy is an instance of {@link LogMergePolicy}.
- /// Otherwise an IllegalArgumentException is thrown.</p>
- /// </summary>
- public virtual void SetUseCompoundFile(bool value_Renamed)
- {
- GetLogMergePolicy().SetUseCompoundFile(value_Renamed);
- GetLogMergePolicy().SetUseCompoundDocStore(value_Renamed);
- }
-
- /// <summary>Expert: Set the Similarity implementation used by this IndexWriter.
- ///
- /// </summary>
- /// <seealso cref="Similarity.SetDefault(Similarity)">
- /// </seealso>
- public virtual void SetSimilarity(Similarity similarity)
- {
- EnsureOpen();
- this.similarity = similarity;
- }
-
- /// <summary>Expert: Return the Similarity implementation used by this IndexWriter.
- ///
- /// <p>This defaults to the current value of {@link Similarity#GetDefault()}.
- /// </summary>
- public virtual Similarity GetSimilarity()
- {
- EnsureOpen();
- return this.similarity;
- }
-
- /// <summary>Expert: Set the interval between indexed terms. Large values cause less
- /// memory to be used by IndexReader, but slow random-access to terms. Small
- /// values cause more memory to be used by an IndexReader, and speed
- /// random-access to terms.
- ///
- /// This parameter determines the amount of computation required per query
- /// term, regardless of the number of documents that contain that term. In
- /// particular, it is the maximum number of other terms that must be
- /// scanned before a term is located and its frequency and position information
- /// may be processed. In a large index with user-entered query terms, query
- /// processing time is likely to be dominated not by term lookup but rather
- /// by the processing of frequency and positional data. In a small index
- /// or when many uncommon query terms are generated (e.g., by wildcard
- /// queries) term lookup may become a dominant cost.
- ///
- /// In particular, <code>numUniqueTerms/interval</code> terms are read into
- /// memory by an IndexReader, and, on average, <code>interval/2</code> terms
- /// must be scanned for each random term access.
- ///
- /// </summary>
- /// <seealso cref="DEFAULT_TERM_INDEX_INTERVAL">
- /// </seealso>
- public virtual void SetTermIndexInterval(int interval)
- {
- EnsureOpen();
- this.termIndexInterval = interval;
- }
-
- /// <summary>Expert: Return the interval between indexed terms.
- ///
- /// </summary>
- /// <seealso cref="SetTermIndexInterval(int)">
- /// </seealso>
- public virtual int GetTermIndexInterval()
- {
- EnsureOpen();
- return termIndexInterval;
- }
-
- /// <summary> Constructs an IndexWriter for the index in <code>path</code>.
- /// Text will be analyzed with <code>a</code>. If <code>create</code>
- /// is true, then a new, empty index will be created in
- /// <code>path</code>, replacing the index already there, if any.
- ///
- /// </summary>
- /// <param name="path">the path to the index directory
- /// </param>
- /// <param name="a">the analyzer to use
- /// </param>
- /// <param name="create"><code>true</code> to create the index or overwrite
- /// the existing one; <code>false</code> to append to the existing
- /// index
- /// </param>
- /// <throws> CorruptIndexException if the index is corrupt </throws>
- /// <throws> LockObtainFailedException if another writer </throws>
- /// <summary> has this index open (<code>write.lock</code> could not
- /// be obtained)
- /// </summary>
- /// <throws> IOException if the directory cannot be read/written to, or </throws>
- /// <summary> if it does not exist and <code>create</code> is
- /// <code>false</code> or if there is any other low-level
- /// IO error
- /// </summary>
- public IndexWriter(System.String path, Analyzer a, bool create)
- {
- InitBlock();
- Init(FSDirectory.GetDirectory(path), a, create, true, null, true);
- }
-
- /// <summary> Constructs an IndexWriter for the index in <code>path</code>.
- /// Text will be analyzed with <code>a</code>. If <code>create</code>
- /// is true, then a new, empty index will be created in
- /// <code>path</code>, replacing the index already there, if any.
- ///
- /// </summary>
- /// <param name="path">the path to the index directory
- /// </param>
- /// <param name="a">the analyzer to use
- /// </param>
- /// <param name="create"><code>true</code> to create the index or overwrite
- /// the existing one; <code>false</code> to append to the existing
- /// index
- /// </param>
- /// <throws> CorruptIndexException if the index is corrupt </throws>
- /// <throws> LockObtainFailedException if another writer </throws>
- /// <summary> has this index open (<code>write.lock</code> could not
- /// be obtained)
- /// </summary>
- /// <throws> IOException if the directory cannot be read/written to, or </throws>
- /// <summary> if it does not exist and <code>create</code> is
- /// <code>false</code> or if there is any other low-level
- /// IO error
- /// </summary>
- public IndexWriter(System.IO.FileInfo path, Analyzer a, bool create)
- {
- InitBlock();
- Init(FSDirectory.GetDirectory(path), a, create, true, null, true);
- }
-
- /// <summary> Constructs an IndexWriter for the index in <code>d</code>.
- /// Text will be analyzed with <code>a</code>. If <code>create</code>
- /// is true, then a new, empty index will be created in
- /// <code>d</code>, replacing the index already there, if any.
- ///
- /// </summary>
- /// <param name="d">the index directory
- /// </param>
- /// <param name="a">the analyzer to use
- /// </param>
- /// <param name="create"><code>true</code> to create the index or overwrite
- /// the existing one; <code>false</code> to append to the existing
- /// index
- /// </param>
- /// <throws> CorruptIndexException if the index is corrupt </throws>
- /// <throws> LockObtainFailedException if another writer </throws>
- /// <summary> has this index open (<code>write.lock</code> could not
- /// be obtained)
- /// </summary>
- /// <throws> IOException if the directory cannot be read/written to, or </throws>
- /// <summary> if it does not exist and <code>create</code> is
- /// <code>false</code> or if there is any other low-level
- /// IO error
- /// </summary>
- public IndexWriter(Directory d, Analyzer a, bool create)
- {
- InitBlock();
- Init(d, a, create, false, null, true);
- }
-
- /// <summary> Constructs an IndexWriter for the index in
- /// <code>path</code>, first creating it if it does not
- /// already exist. Text will be analyzed with
- /// <code>a</code>.
- ///
- /// </summary>
- /// <param name="path">the path to the index directory
- /// </param>
- /// <param name="a">the analyzer to use
- /// </param>
- /// <throws> CorruptIndexException if the index is corrupt </throws>
- /// <throws> LockObtainFailedException if another writer </throws>
- /// <summary> has this index open (<code>write.lock</code> could not
- /// be obtained)
- /// </summary>
- /// <throws> IOException if the directory cannot be </throws>
- /// <summary> read/written to or if there is any other low-level
- /// IO error
- /// </summary>
- public IndexWriter(System.String path, Analyzer a)
- {
- InitBlock();
- Init(FSDirectory.GetDirectory(path), a, true, null, true);
- }
-
- /// <summary> Constructs an IndexWriter for the index in
- /// <code>path</code>, first creating it if it does not
- /// already exist. Text will be analyzed with
- /// <code>a</code>.
- ///
- /// </summary>
- /// <param name="path">the path to the index directory
- /// </param>
- /// <param name="a">the analyzer to use
- /// </param>
- /// <throws> CorruptIndexException if the index is corrupt </throws>
- /// <throws> LockObtainFailedException if another writer </throws>
- /// <summary> has this index open (<code>write.lock</code> could not
- /// be obtained)
- /// </summary>
- /// <throws> IOException if the directory cannot be </throws>
- /// <summary> read/written to or if there is any other low-level
- /// IO error
- /// </summary>
- public IndexWriter(System.IO.FileInfo path, Analyzer a)
- {
- InitBlock();
- Init(FSDirectory.GetDirectory(path), a, true, null, true);
- }
-
- /// <summary> Constructs an IndexWriter for the index in
- /// <code>d</code>, first creating it if it does not
- /// already exist. Text will be analyzed with
- /// <code>a</code>.
- ///
- /// </summary>
- /// <param name="d">the index directory
- /// </param>
- /// <param name="a">the analyzer to use
- /// </param>
- /// <throws> CorruptIndexException if the index is corrupt </throws>
- /// <throws> LockObtainFailedException if another writer </throws>
- /// <summary> has this index open (<code>write.lock</code> could not
- /// be obtained)
- /// </summary>
- /// <throws> IOException if the directory cannot be </throws>
- /// <summary> read/written to or if there is any other low-level
- /// IO error
- /// </summary>
- public IndexWriter(Directory d, Analyzer a)
- {
- InitBlock();
- Init(d, a, false, null, true);
- }
-
- /// <summary> Constructs an IndexWriter for the index in
- /// <code>d</code>, first creating it if it does not
- /// already exist. Text will be analyzed with
- /// <code>a</code>.
- ///
- /// </summary>
- /// <param name="d">the index directory
- /// </param>
- /// <param name="autoCommit">see <a href="#autoCommit">above</a>
- /// </param>
- /// <param name="a">the analyzer to use
- /// </param>
- /// <throws> CorruptIndexException if the index is corrupt </throws>
- /// <throws> LockObtainFailedException if another writer </throws>
- /// <summary> has this index open (<code>write.lock</code> could not
- /// be obtained)
- /// </summary>
- /// <throws> IOException if the directory cannot be </throws>
- /// <summary> read/written to or if there is any other low-level
- /// IO error
- /// </summary>
- public IndexWriter(Directory d, bool autoCommit, Analyzer a)
- {
- InitBlock();
- Init(d, a, false, null, autoCommit);
- }
-
- /// <summary> Constructs an IndexWriter for the index in <code>d</code>.
- /// Text will be analyzed with <code>a</code>. If <code>create</code>
- /// is true, then a new, empty index will be created in
- /// <code>d</code>, replacing the index already there, if any.
- ///
- /// </summary>
- /// <param name="d">the index directory
- /// </param>
- /// <param name="autoCommit">see <a href="#autoCommit">above</a>
- /// </param>
- /// <param name="a">the analyzer to use
- /// </param>
- /// <param name="create"><code>true</code> to create the index or overwrite
- /// the existing one; <code>false</code> to append to the existing
- /// index
- /// </param>
- /// <throws> CorruptIndexException if the index is corrupt </throws>
- /// <throws> LockObtainFailedException if another writer </throws>
- /// <summary> has this index open (<code>write.lock</code> could not
- /// be obtained)
- /// </summary>
- /// <throws> IOException if the directory cannot be read/written to, or </throws>
- /// <summary> if it does not exist and <code>create</code> is
- /// <code>false</code> or if there is any other low-level
- /// IO error
- /// </summary>
- public IndexWriter(Directory d, bool autoCommit, Analyzer a, bool create)
- {
- InitBlock();
- Init(d, a, create, false, null, autoCommit);
- }
-
- /// <summary> Expert: constructs an IndexWriter with a custom {@link
- /// IndexDeletionPolicy}, for the index in <code>d</code>,
- /// first creating it if it does not already exist. Text
- /// will be analyzed with <code>a</code>.
- ///
- /// </summary>
- /// <param name="d">the index directory
- /// </param>
- /// <param name="autoCommit">see <a href="#autoCommit">above</a>
- /// </param>
- /// <param name="a">the analyzer to use
- /// </param>
- /// <param name="deletionPolicy">see <a href="#deletionPolicy">above</a>
- /// </param>
- /// <throws> CorruptIndexException if the index is corrupt </throws>
- /// <throws> LockObtainFailedException if another writer </throws>
- /// <summary> has this index open (<code>write.lock</code> could not
- /// be obtained)
- /// </summary>
- /// <throws> IOException if the directory cannot be </throws>
- /// <summary> read/written to or if there is any other low-level
- /// IO error
- /// </summary>
- public IndexWriter(Directory d, bool autoCommit, Analyzer a, IndexDeletionPolicy deletionPolicy)
- {
- InitBlock();
- Init(d, a, false, deletionPolicy, autoCommit);
- }
-
- /// <summary> Expert: constructs an IndexWriter with a custom {@link
- /// IndexDeletionPolicy}, for the index in <code>d</code>.
- /// Text will be analyzed with <code>a</code>. If
- /// <code>create</code> is true, then a new, empty index
- /// will be created in <code>d</code>, replacing the index
- /// already there, if any.
- ///
- /// </summary>
- /// <param name="d">the index directory
- /// </param>
- /// <param name="autoCommit">see <a href="#autoCommit">above</a>
- /// </param>
- /// <param name="a">the analyzer to use
- /// </param>
- /// <param name="create"><code>true</code> to create the index or overwrite
- /// the existing one; <code>false</code> to append to the existing
- /// index
- /// </param>
- /// <param name="deletionPolicy">see <a href="#deletionPolicy">above</a>
- /// </param>
- /// <throws> CorruptIndexException if the index is corrupt </throws>
- /// <throws> LockObtainFailedException if another writer </throws>
- /// <summary> has this index open (<code>write.lock</code> could not
- /// be obtained)
- /// </summary>
- /// <throws> IOException if the directory cannot be read/written to, or </throws>
- /// <summary> if it does not exist and <code>create</code> is
- /// <code>false</code> or if there is any other low-level
- /// IO error
- /// </summary>
- public IndexWriter(Directory d, bool autoCommit, Analyzer a, bool create, IndexDeletionPolicy deletionPolicy)
- {
- InitBlock();
- Init(d, a, create, false, deletionPolicy, autoCommit);
- }
-
- private void Init(Directory d, Analyzer a, bool closeDir, IndexDeletionPolicy deletionPolicy, bool autoCommit)
- {
- if (IndexReader.IndexExists(d))
- {
- Init(d, a, false, closeDir, deletionPolicy, autoCommit);
- }
- else
- {
- Init(d, a, true, closeDir, deletionPolicy, autoCommit);
- }
- }
-
- private void Init(Directory d, Analyzer a, bool create, bool closeDir, IndexDeletionPolicy deletionPolicy, bool autoCommit)
- {
- this.closeDir = closeDir;
- directory = d;
- analyzer = a;
- this.infoStream = defaultInfoStream;
- SetMessageID();
-
- if (create)
- {
- // Clear the write lock in case it's leftover:
- directory.ClearLock(IndexWriter.WRITE_LOCK_NAME);
- }
-
- Lock writeLock = directory.MakeLock(IndexWriter.WRITE_LOCK_NAME);
- if (!writeLock.Obtain(writeLockTimeout))
- // obtain write lock
- {
- throw new LockObtainFailedException("Index locked for write: " + writeLock);
- }
- this.writeLock = writeLock; // save it
-
- try
- {
- if (create)
- {
- // Try to read first. This is to allow create
- // against an index that's currently open for
- // searching. In this case we write the next
- // segments_N file with no segments:
- try
- {
- segmentInfos.Read(directory);
- segmentInfos.Clear();
- }
- catch (System.IO.IOException e)
- {
- // Likely this means it's a fresh directory
- }
- segmentInfos.Write(directory);
- }
- else
- {
- segmentInfos.Read(directory);
- }
-
- this.autoCommit = autoCommit;
- if (!autoCommit)
- {
- rollbackSegmentInfos = (SegmentInfos) segmentInfos.Clone();
- }
-
- docWriter = new DocumentsWriter(directory, this);
- docWriter.SetInfoStream(infoStream);
-
- // Default deleter (for backwards compatibility) is
- // KeepOnlyLastCommitDeleter:
- deleter = new IndexFileDeleter(directory, deletionPolicy == null ? new KeepOnlyLastCommitDeletionPolicy() : deletionPolicy, segmentInfos, infoStream, docWriter);
-
- PushMaxBufferedDocs();
-
- if (infoStream != null)
- {
- Message("init: create=" + create);
- MessageState();
- }
- }
- catch (System.IO.IOException e)
- {
- this.writeLock.Release();
- this.writeLock = null;
- throw e;
- }
- }
-
- /// <summary> Expert: set the merge policy used by this writer.</summary>
- public virtual void SetMergePolicy(MergePolicy mp)
- {
- EnsureOpen();
- if (mp == null)
- throw new System.NullReferenceException("MergePolicy must be non-null");
-
- if (mergePolicy != mp)
- mergePolicy.Close();
- mergePolicy = mp;
- PushMaxBufferedDocs();
- if (infoStream != null)
- {
- Message("setMergePolicy " + mp);
- }
- }
-
- /// <summary> Expert: returns the current MergePolicy in use by this writer.</summary>
- /// <seealso cref="setMergePolicy">
- /// </seealso>
- public virtual MergePolicy GetMergePolicy()
- {
- EnsureOpen();
- return mergePolicy;
- }
-
- /// <summary> Expert: set the merge scheduler used by this writer.</summary>
- public virtual void SetMergeScheduler(MergeScheduler mergeScheduler)
- {
- EnsureOpen();
- if (mergeScheduler == null)
- throw new System.NullReferenceException("MergeScheduler must be non-null");
-
- if (this.mergeScheduler != mergeScheduler)
- {
- FinishMerges(true);
- this.mergeScheduler.Close();
- }
- this.mergeScheduler = mergeScheduler;
- if (infoStream != null)
- {
- Message("setMergeScheduler " + mergeScheduler);
- }
- }
-
- /// <summary> Expert: returns the current MergePolicy in use by this
- /// writer.
- /// </summary>
- /// <seealso cref="setMergePolicy">
- /// </seealso>
- public virtual MergeScheduler GetMergeScheduler()
- {
- EnsureOpen();
- return mergeScheduler;
- }
-
- /// <summary><p>Determines the largest segment (measured by
- /// document count) that may be merged with other segments.
- /// Small values (e.g., less than 10,000) are best for
- /// interactive indexing, as this limits the length of
- /// pauses while indexing to a few seconds. Larger values
- /// are best for batched indexing and speedier
- /// searches.</p>
- ///
- /// <p>The default value is {@link Integer#MAX_VALUE}.</p>
- ///
- /// <p>Note that this method is a convenience method: it
- /// just calls mergePolicy.setMaxMergeDocs as long as
- /// mergePolicy is an instance of {@link LogMergePolicy}.
- /// Otherwise an IllegalArgumentException is thrown.</p>
- ///
- /// <p>The default merge policy ({@link
- /// LogByteSizeMergePolicy}) also allows you to set this
- /// limit by net size (in MB) of the segment, using {@link
- /// LogByteSizeMergePolicy#setMaxMergeMB}.</p>
- /// </summary>
- public virtual void SetMaxMergeDocs(int maxMergeDocs)
- {
- GetLogMergePolicy().SetMaxMergeDocs(maxMergeDocs);
- }
-
- /// <summary> <p>Returns the largest segment (measured by document
- /// count) that may be merged with other segments.</p>
- ///
- /// <p>Note that this method is a convenience method: it
- /// just calls mergePolicy.getMaxMergeDocs as long as
- /// mergePolicy is an instance of {@link LogMergePolicy}.
- /// Otherwise an IllegalArgumentException is thrown.</p>
- ///
- /// </summary>
- /// <seealso cref="setMaxMergeDocs">
- /// </seealso>
- public virtual int GetMaxMergeDocs()
- {
- return GetLogMergePolicy().GetMaxMergeDocs();
- }
-
- /// <summary> The maximum number of terms that will be indexed for a single field in a
- /// document. This limits the amount of memory required for indexing, so that
- /// collections with very large files will not crash the indexing process by
- /// running out of memory. This setting refers to the number of running terms,
- /// not to the number of different terms.<p/>
- /// <strong>Note:</strong> this silently truncates large documents, excluding from the
- /// index all terms that occur further in the document. If you know your source
- /// documents are large, be sure to set this value high enough to accomodate
- /// the expected size. If you set it to Integer.MAX_VALUE, then the only limit
- /// is your memory, but you should anticipate an OutOfMemoryError.<p/>
- /// By default, no more than 10,000 terms will be indexed for a field.
- /// </summary>
- public virtual void SetMaxFieldLength(int maxFieldLength)
- {
- EnsureOpen();
- this.maxFieldLength = maxFieldLength;
- if (infoStream != null)
- Message("setMaxFieldLength " + maxFieldLength);
- }
-
- /// <summary> Returns the maximum number of terms that will be
- /// indexed for a single field in a document.
- /// </summary>
- /// <seealso cref="setMaxFieldLength">
- /// </seealso>
- public virtual int GetMaxFieldLength()
- {
- EnsureOpen();
- return maxFieldLength;
- }
-
- /// <summary>Determines the minimal number of documents required
- /// before the buffered in-memory documents are flushed as
- /// a new Segment. Large values generally gives faster
- /// indexing.
- ///
- /// <p>When this is set, the writer will flush every
- /// maxBufferedDocs added documents. Pass in {@link
- /// #DISABLE_AUTO_FLUSH} to prevent triggering a flush due
- /// to number of buffered documents. Note that if flushing
- /// by RAM usage is also enabled, then the flush will be
- /// triggered by whichever comes first.</p>
- ///
- /// <p>Disabled by default (writer flushes by RAM usage).</p>
- ///
- /// </summary>
- /// <throws> IllegalArgumentException if maxBufferedDocs is </throws>
- /// <summary> enabled but smaller than 2, or it disables maxBufferedDocs
- /// when ramBufferSize is already disabled
- /// </summary>
- /// <seealso cref="setRAMBufferSizeMB">
- /// </seealso>
- public virtual void SetMaxBufferedDocs(int maxBufferedDocs)
- {
- EnsureOpen();
- if (maxBufferedDocs != DISABLE_AUTO_FLUSH && maxBufferedDocs < 2)
- throw new System.ArgumentException("maxBufferedDocs must at least be 2 when enabled");
- if (maxBufferedDocs == DISABLE_AUTO_FLUSH && GetRAMBufferSizeMB() == DISABLE_AUTO_FLUSH)
- throw new System.ArgumentException("at least one of ramBufferSize and maxBufferedDocs must be enabled");
- docWriter.SetMaxBufferedDocs(maxBufferedDocs);
- PushMaxBufferedDocs();
- if (infoStream != null)
- Message("setMaxBufferedDocs " + maxBufferedDocs);
- }
-
- /// <summary> If we are flushing by doc count (not by RAM usage), and
- /// using LogDocMergePolicy then push maxBufferedDocs down
- /// as its minMergeDocs, to keep backwards compatibility.
- /// </summary>
- private void PushMaxBufferedDocs()
- {
- if (docWriter.GetMaxBufferedDocs() != DISABLE_AUTO_FLUSH)
- {
- MergePolicy mp = mergePolicy;
- if (mp is LogDocMergePolicy)
- {
- LogDocMergePolicy lmp = (LogDocMergePolicy) mp;
- int maxBufferedDocs = docWriter.GetMaxBufferedDocs();
- if (lmp.GetMinMergeDocs() != maxBufferedDocs)
- {
- if (infoStream != null)
- Message("now push maxBufferedDocs " + maxBufferedDocs + " to LogDocMergePolicy");
- lmp.SetMinMergeDocs(maxBufferedDocs);
- }
- }
- }
- }
-
- /// <summary> Returns the number of buffered added documents that will
- /// trigger a flush if enabled.
- /// </summary>
- /// <seealso cref="setMaxBufferedDocs">
- /// </seealso>
- public virtual int GetMaxBufferedDocs()
- {
- EnsureOpen();
- return docWriter.GetMaxBufferedDocs();
- }
-
- /// <summary>Determines the amount of RAM that may be used for
- /// buffering added documents before they are flushed as a
- /// new Segment. Generally for faster indexing performance
- /// it's best to flush by RAM usage instead of document
- /// count and use as large a RAM buffer as you can.
- ///
- /// <p>When this is set, the writer will flush whenever
- /// buffered documents use this much RAM. Pass in {@link
- /// #DISABLE_AUTO_FLUSH} to prevent triggering a flush due
- /// to RAM usage. Note that if flushing by document count
- /// is also enabled, then the flush will be triggered by
- /// whichever comes first.</p>
- ///
- /// <p> The default value is {@link #DEFAULT_RAM_BUFFER_SIZE_MB}.</p>
- ///
- /// </summary>
- /// <throws> IllegalArgumentException if ramBufferSize is </throws>
- /// <summary> enabled but non-positive, or it disables ramBufferSize
- /// when maxBufferedDocs is already disabled
- /// </summary>
- public virtual void SetRAMBufferSizeMB(double mb)
- {
- if (mb != DISABLE_AUTO_FLUSH && mb <= 0.0)
- throw new System.ArgumentException("ramBufferSize should be > 0.0 MB when enabled");
- if (mb == DISABLE_AUTO_FLUSH && GetMaxBufferedDocs() == DISABLE_AUTO_FLUSH)
- throw new System.ArgumentException("at least one of ramBufferSize and maxBufferedDocs must be enabled");
- docWriter.SetRAMBufferSizeMB(mb);
- if (infoStream != null)
- Message("setRAMBufferSizeMB " + mb);
- }
-
- /// <summary> Returns the value set by {@link #setRAMBufferSizeMB} if enabled.</summary>
- public virtual double GetRAMBufferSizeMB()
- {
- return docWriter.GetRAMBufferSizeMB();
- }
-
- /// <summary> <p>Determines the minimal number of delete terms required before the buffered
- /// in-memory delete terms are applied and flushed. If there are documents
- /// buffered in memory at the time, they are merged and a new segment is
- /// created.</p>
- /// <p>Disabled by default (writer flushes by RAM usage).</p>
- ///
- /// </summary>
- /// <throws> IllegalArgumentException if maxBufferedDeleteTerms </throws>
- /// <summary> is enabled but smaller than 1
- /// </summary>
- /// <seealso cref="setRAMBufferSizeMB">
- /// </seealso>
- public virtual void SetMaxBufferedDeleteTerms(int maxBufferedDeleteTerms)
- {
- EnsureOpen();
- if (maxBufferedDeleteTerms != DISABLE_AUTO_FLUSH && maxBufferedDeleteTerms < 1)
- throw new System.ArgumentException("maxBufferedDeleteTerms must at least be 1 when enabled");
- docWriter.SetMaxBufferedDeleteTerms(maxBufferedDeleteTerms);
- if (infoStream != null)
- Message("setMaxBufferedDeleteTerms " + maxBufferedDeleteTerms);
- }
-
- /// <summary> Returns the number of buffered deleted terms that will
- /// trigger a flush if enabled.
- /// </summary>
- /// <seealso cref="setMaxBufferedDeleteTerms">
- /// </seealso>
- public virtual int GetMaxBufferedDeleteTerms()
- {
- EnsureOpen();
- return docWriter.GetMaxBufferedDeleteTerms();
- }
-
- /// <summary>Determines how often segment indices are merged by addDocument(). With
- /// smaller values, less RAM is used while indexing, and searches on
- /// unoptimized indices are faster, but indexing speed is slower. With larger
- /// values, more RAM is used during indexing, and while searches on unoptimized
- /// indices are slower, indexing is faster. Thus larger values (> 10) are best
- /// for batch index creation, and smaller values (< 10) for indices that are
- /// interactively maintained.
- ///
- /// <p>Note that this method is a convenience method: it
- /// just calls mergePolicy.setMergeFactor as long as
- /// mergePolicy is an instance of {@link LogMergePolicy}.
- /// Otherwise an IllegalArgumentException is thrown.</p>
- ///
- /// <p>This must never be less than 2. The default value is 10.
- /// </summary>
- public virtual void SetMergeFactor(int mergeFactor)
- {
- GetLogMergePolicy().SetMergeFactor(mergeFactor);
- }
-
- /// <summary> <p>Returns the number of segments that are merged at
- /// once and also controls the total number of segments
- /// allowed to accumulate in the index.</p>
- ///
- /// <p>Note that this method is a convenience method: it
- /// just calls mergePolicy.getMergeFactor as long as
- /// mergePolicy is an instance of {@link LogMergePolicy}.
- /// Otherwise an IllegalArgumentException is thrown.</p>
- ///
- /// </summary>
- /// <seealso cref="setMergeFactor">
- /// </seealso>
- public virtual int GetMergeFactor()
- {
- return GetLogMergePolicy().GetMergeFactor();
- }
-
- /// <summary>If non-null, this will be the default infoStream used
- /// by a newly instantiated IndexWriter.
- /// </summary>
- /// <seealso cref="setInfoStream">
- /// </seealso>
- public static void SetDefaultInfoStream(System.IO.TextWriter infoStream)
- {
- IndexWriter.defaultInfoStream = infoStream;
- }
-
- /// <summary> Returns the current default infoStream for newly
- /// instantiated IndexWriters.
- /// </summary>
- /// <seealso cref="setDefaultInfoStream">
- /// </seealso>
- public static System.IO.TextWriter GetDefaultInfoStream()
- {
- return IndexWriter.defaultInfoStream;
- }
-
- /// <summary>If non-null, information about merges, deletes and a
- /// message when maxFieldLength is reached will be printed
- /// to this.
- /// </summary>
- public virtual void SetInfoStream(System.IO.TextWriter infoStream)
- {
- EnsureOpen();
- this.infoStream = infoStream;
- SetMessageID();
- docWriter.SetInfoStream(infoStream);
- deleter.SetInfoStream(infoStream);
- if (infoStream != null)
- MessageState();
- }
-
- private void MessageState()
- {
- Message("setInfoStream: dir=" + directory + " autoCommit=" + autoCommit + " mergePolicy=" + mergePolicy + " mergeScheduler=" + mergeScheduler + " ramBufferSizeMB=" + docWriter.GetRAMBufferSizeMB() + " maxBuffereDocs=" + docWriter.GetMaxBufferedDocs() + " maxBuffereDeleteTerms=" + docWriter.GetMaxBufferedDeleteTerms() + " maxFieldLength=" + maxFieldLength + " index=" + SegString());
- }
-
- /// <summary> Returns the current infoStream in use by this writer.</summary>
- /// <seealso cref="setInfoStream">
- /// </seealso>
- public virtual System.IO.TextWriter GetInfoStream()
- {
- EnsureOpen();
- return infoStream;
- }
-
- /// <seealso cref="">
- /// </seealso>
- /// <seealso cref="setDefaultWriteLockTimeout to change the default value for all instances of IndexWriter.">
- /// </seealso>
- public virtual void SetWriteLockTimeout(long writeLockTimeout)
- {
- EnsureOpen();
- this.writeLockTimeout = writeLockTimeout;
- }
-
- /// <summary> Returns allowed timeout when acquiring the write lock.</summary>
- /// <seealso cref="setWriteLockTimeout">
- /// </seealso>
- public virtual long GetWriteLockTimeout()
- {
- EnsureOpen();
- return writeLockTimeout;
- }
-
- /// <summary> Sets the default (for any instance of IndexWriter) maximum time to wait for a write lock (in
- /// milliseconds).
- /// </summary>
- public static void SetDefaultWriteLockTimeout(long writeLockTimeout)
- {
- IndexWriter.WRITE_LOCK_TIMEOUT = writeLockTimeout;
- }
-
- /// <summary> Returns default write lock timeout for newly
- /// instantiated IndexWriters.
- /// </summary>
- /// <seealso cref="setDefaultWriteLockTimeout">
- /// </seealso>
- public static long GetDefaultWriteLockTimeout()
- {
- return IndexWriter.WRITE_LOCK_TIMEOUT;
- }
-
- /// <summary> Flushes all changes to an index and closes all
- /// associated files.
- ///
- /// <p> If an Exception is hit during close, eg due to disk
- /// full or some other reason, then both the on-disk index
- /// and the internal state of the IndexWriter instance will
- /// be consistent. However, the close will not be complete
- /// even though part of it (flushing buffered documents)
- /// may have succeeded, so the write lock will still be
- /// held.</p>
- ///
- /// <p> If you can correct the underlying cause (eg free up
- /// some disk space) then you can call close() again.
- /// Failing that, if you want to force the write lock to be
- /// released (dangerous, because you may then lose buffered
- /// docs in the IndexWriter instance) then you can do
- /// something like this:</p>
- ///
- /// <pre>
- /// try {
- /// writer.close();
- /// } finally {
- /// if (IndexReader.isLocked(directory)) {
- /// IndexReader.unlock(directory);
- /// }
- /// }
- /// </pre>
- ///
- /// after which, you must be certain not to use the writer
- /// instance anymore.</p>
- /// </summary>
- /// <throws> CorruptIndexException if the index is corrupt </throws>
- /// <throws> IOException if there is a low-level IO error </throws>
- public virtual void Close()
- {
- Close(true);
- }
-
- /// <summary> Closes the index with or without waiting for currently
- /// running merges to finish. This is only meaningful when
- /// using a MergeScheduler that runs merges in background
- /// threads.
- /// </summary>
- /// <param name="waitForMerges">if true, this call will block
- /// until all merges complete; else, it will ask all
- /// running merges to abort, wait until those merges have
- /// finished (which should be at most a few seconds), and
- /// then return.
- /// </param>
- public virtual void Close(bool waitForMerges)
- {
- bool doClose;
+ private Directory directory; // where this index resides
+ private Analyzer analyzer; // how to analyze text
- // If any methods have hit OutOfMemoryError, then abort
- // on close, in case theinternal state of IndexWriter
- // or DocumentsWriter is corrupt
- if (hitOOM)
- Abort();
+ private Similarity similarity; // how to normalize
- lock (this)
- {
- // Ensure that only one thread actually gets to do the closing:
- if (!closing)
- {
- doClose = true;
- closing = true;
- }
- else
- doClose = false;
- }
- if (doClose)
- CloseInternal(waitForMerges);
- // Another thread beat us to it (is actually doing the
- // close), so we will block until that other thread
- // has finished closing
- else
- WaitForClose();
- }
-
- private void WaitForClose()
- {
- lock (this)
- {
- while (!closed && closing)
- {
- try
- {
- System.Threading.Monitor.Wait(this);
- }
- catch (System.Threading.ThreadInterruptedException ie)
- {
- }
- }
- }
- }
-
- private void CloseInternal(bool waitForMerges)
- {
- try
- {
- if (infoStream != null)
- Message("now flush at close");
-
- docWriter.Close();
-
- // Only allow a new merge to be triggered if we are
- // going to wait for merges:
- Flush(waitForMerges, true);
+ // {{dougsale-2.4.0}}:
+ // per the VS compiler: "a volatile field cannot be of type 'long'"
+ // use uint, same positive cardinality as Java long
+ private volatile uint changeCount; // increments every time a change is completed
+ private long lastCommitChangeCount; // last changeCount that was committed
+
+ private SegmentInfos rollbackSegmentInfos; // segmentInfos we will fallback to if the commit fails
+ private Dictionary<SegmentInfo, int> rollbackSegments;
+
+ internal volatile SegmentInfos pendingCommit; // set when a commit is pending (after PrepareCommit() & before Commit())
+ // {{dougsale-2.4.0}}:
+ // per the VS compiler: "a volatile field cannot be of type 'long'"
+ // use uint, same positive cardinality as Java long
+ internal volatile uint pendingCommitChangeCount;
+
+ private SegmentInfos localRollbackSegmentInfos; // segmentInfos we will fallback to if the commit fails
+ private bool localAutoCommit; // saved autoCommit during local transaction
+ private int localFlushedDocCount; // saved docWriter.GetFlushedDocCount during local transaction
+ private bool autoCommit = true; // false if we should commit only on close
+
+ private SegmentInfos segmentInfos = new SegmentInfos(); // the segments
+ private DocumentsWriter docWriter;
+ private IndexFileDeleter deleter;
+
+ private Dictionary<SegmentInfo, SegmentInfo> segmentsToOptimize = new Dictionary<SegmentInfo, SegmentInfo>(); // used by optimize to note those needing optimization
+
+ private Lock writeLock;
+
+ private int termIndexInterval = DEFAULT_TERM_INDEX_INTERVAL;
+
+ private bool closeDir;
+ private bool closed;
+ private bool closing;
+
+ // Holds all SegmentInfo instances currently involved in
+ // merges
+ private Dictionary<SegmentInfo, SegmentInfo> mergingSegments = new Dictionary<SegmentInfo, SegmentInfo>();
+
+ private MergePolicy mergePolicy = new LogByteSizeMergePolicy();
+ private MergeScheduler mergeScheduler = new ConcurrentMergeScheduler();
+ private List<MergePolicy.OneMerge> pendingMerges = new List<MergePolicy.OneMerge>();
+ private Dictionary<MergePolicy.OneMerge, MergePolicy.OneMerge> runningMerges = new Dictionary<MergePolicy.OneMerge, MergePolicy.OneMerge>();
+ private System.Collections.IList mergeExceptions = new System.Collections.ArrayList();
+ private long mergeGen;
+ private bool stopMerges;
+
+ private int flushCount;
+ private int flushDeletesCount;
+ private double maxSyncPauseSeconds = DEFAULT_MAX_SYNC_PAUSE_SECONDS;
+
+ // Used to only allow one AddIndexes to proceed at once
+ // TODO: use ReadWriteLock once we are on 5.0
+ private int readCount; // count of how many threads are holding read lock
+ //private SupportClass.ThreadClass writeThread; // non-null if any thread holds write lock
+ private System.Threading.Thread writeThread; // non-null if any thread holds write lock
- if (waitForMerges)
- // Give merge scheduler last chance to run, in case
- // any pending merges are waiting
- mergeScheduler.Merge(this);
-
- mergePolicy.Close();
-
- FinishMerges(waitForMerges);
-
- mergeScheduler.Close();
-
- lock (this)
- {
- if (commitPending)
- {
- bool success = false;
- try
- {
- segmentInfos.Write(directory); // now commit changes
- success = true;
- }
- finally
- {
- if (!success)
- {
- if (infoStream != null)
- Message("hit exception committing segments file during close");
- DeletePartialSegmentsFile();
- }
- }
- if (infoStream != null)
- Message("close: wrote segments file \"" + segmentInfos.GetCurrentSegmentFileName() + "\"");
-
- deleter.Checkpoint(segmentInfos, true);
-
- commitPending = false;
- rollbackSegmentInfos = null;
- }
-
- if (infoStream != null)
- Message("at close: " + SegString());
-
- docWriter = null;
-
- deleter.Close();
- }
-
- if (closeDir)
- directory.Close();
-
- if (writeLock != null)
- {
- writeLock.Release(); // release write lock
- writeLock = null;
- }
- closed = true;
- }
- catch (OutOfMemoryException oom)
+ internal void AcquireWrite()
+ {
+ lock (this)
{
- hitOOM = true;
- throw oom;
+ while (writeThread != null || readCount > 0)
+ DoWait();
+
+ // We could have been closed while we were waiting:
+ EnsureOpen();
+
+ //writeThread = SupportClass.ThreadClass.Current();
+ writeThread = System.Threading.Thread.CurrentThread;
}
- finally
- {
- lock (this)
- {
- if (!closed)
+ }
+
+ internal void ReleaseWrite()
+ {
+ lock (this)
+ {
+ //System.Diagnostics.Debug.Assert(SupportClass.ThreadClass.Current() == writeThread);
+ System.Diagnostics.Debug.Assert(System.Threading.Thread.CurrentThread == writeThread);
+ writeThread = null;
+ System.Threading.Monitor.PulseAll(this);
+ }
+ }
+
+ internal void AcquireRead()
+ {
+ lock (this)
+ {
+ //SupportClass.ThreadClass current = SupportClass.ThreadClass.Current();
+ System.Threading.Thread current = System.Threading.Thread.CurrentThread;
+ while (writeThread != null && writeThread != current)
+ DoWait();
+
+ readCount++;
+ }
+ }
+
+ internal void ReleaseRead()
+ {
+ lock (this)
+ {
+ readCount--;
+ System.Diagnostics.Debug.Assert(readCount >= 0);
+ if (0 == readCount)
+ System.Threading.Monitor.PulseAll(this);
+ }
+ }
+
+ /// <summary> Used internally to throw an {@link
+ /// AlreadyClosedException} if this IndexWriter has been
+ /// closed.
+ /// </summary>
+ /// <throws> AlreadyClosedException if this IndexWriter is </throws>
+ protected internal void EnsureOpen(bool includePendingClose)
+ {
+ lock (this)
+ {
+ if (closed || (includePendingClose && closing))
+ {
+ throw new AlreadyClosedException("this IndexWriter is closed");
+ }
+ }
+ }
+
+ protected void EnsureOpen()
+ {
+ lock (this)
+ {
+ EnsureOpen(true);
+ }
+ }
+
+ /// <summary> Prints a message to the infoStream (if non-null),
+ /// prefixed with the identifying information for this
+ /// writer and the thread that's calling it.
+ /// </summary>
+ public virtual void Message(string message)
+ {
+ if (infoStream != null)
+ infoStream.WriteLine("IW " + messageID + " [" + SupportClass.ThreadClass.Current().Name + "]: " + message);
+ }
+
+ private void SetMessageID(System.IO.TextWriter infoStream)
+ {
+ lock (this)
+ {
+ if (infoStream != null && messageID == -1)
+ {
+ lock (MESSAGE_ID_LOCK)
{
- closing = false;
- if (infoStream != null)
- Message("hit exception while closing");
+ messageID = MESSAGE_ID++;
}
- System.Threading.Monitor.PulseAll(this);
- }
- }
- }
-
- /// <summary>Tells the docWriter to close its currently open shared
- /// doc stores (stored fields & vectors files).
- /// Return value specifices whether new doc store files are compound or not.
- /// </summary>
- private bool FlushDocStores()
- {
- lock (this)
- {
-
- System.Collections.IList files = docWriter.Files();
-
- bool useCompoundDocStore = false;
-
- if (files.Count > 0)
- {
- System.String docStoreSegment;
-
- bool success = false;
- try
- {
- docStoreSegment = docWriter.CloseDocStore();
- success = true;
- }
- finally
- {
- if (!success)
- {
- if (infoStream != null)
- Message("hit exception closing doc store segment");
- docWriter.Abort(null);
- }
- }
-
- useCompoundDocStore = mergePolicy.UseCompoundDocStore(segmentInfos);
-
- if (useCompoundDocStore && docStoreSegment != null)
- {
- // Now build compound doc store file
-
- success = false;
-
- int numSegments = segmentInfos.Count;
- System.String compoundFileName = docStoreSegment + "." + IndexFileNames.COMPOUND_FILE_STORE_EXTENSION;
-
- try
- {
- CompoundFileWriter cfsWriter = new CompoundFileWriter(directory, compoundFileName);
- int size = files.Count;
- for (int i = 0; i < size; i++)
- cfsWriter.AddFile((System.String) files[i]);
-
- // Perform the merge
- cfsWriter.Close();
-
- for (int i = 0; i < numSegments; i++)
- {
- SegmentInfo si = segmentInfos.Info(i);
- if (si.GetDocStoreOffset() != - 1 && si.GetDocStoreSegment().Equals(docStoreSegment))
- si.SetDocStoreIsCompoundFile(true);
- }
- Checkpoint();
- success = true;
- }
- finally
- {
- if (!success)
- {
-
- if (infoStream != null)
- Message("hit exception building compound file doc store for segment " + docStoreSegment);
-
- // Rollback to no compound file
- for (int i = 0; i < numSegments; i++)
- {
- SegmentInfo si = segmentInfos.Info(i);
- if (si.GetDocStoreOffset() != - 1 && si.GetDocStoreSegment().Equals(docStoreSegment))
- si.SetDocStoreIsCompoundFile(false);
- }
- deleter.DeleteFile(compoundFileName);
- DeletePartialSegmentsFile();
- }
- }
-
- deleter.Checkpoint(segmentInfos, false);
- }
- }
-
- return useCompoundDocStore;
- }
- }
-
- /// <summary>Release the write lock, if needed. </summary>
- ~IndexWriter()
- {
- try
- {
- if (writeLock != null)
- {
- writeLock.Release(); // release write lock
- writeLock = null;
- }
- }
- finally
- {
- }
- }
-
- /// <summary>Returns the Directory used by this index. </summary>
- public virtual Directory GetDirectory()
- {
- EnsureOpen();
- return directory;
- }
-
- /// <summary>Returns the analyzer used by this index. </summary>
- public virtual Analyzer GetAnalyzer()
- {
- EnsureOpen();
- return analyzer;
- }
-
- /// <summary>Returns the number of documents currently in this index. </summary>
- public virtual int DocCount()
- {
- lock (this)
- {
- EnsureOpen();
- int count = docWriter.GetNumDocsInRAM();
- for (int i = 0; i < segmentInfos.Count; i++)
- {
- SegmentInfo si = segmentInfos.Info(i);
- count += si.docCount;
- }
- return count;
- }
- }
-
- /// <summary> The maximum number of terms that will be indexed for a single field in a
- /// document. This limits the amount of memory required for indexing, so that
- /// collections with very large files will not crash the indexing process by
- /// running out of memory.<p/>
- /// Note that this effectively truncates large documents, excluding from the
- /// index terms that occur further in the document. If you know your source
- /// documents are large, be sure to set this value high enough to accomodate
- /// the expected size. If you set it to Integer.MAX_VALUE, then the only limit
- /// is your memory, but you should anticipate an OutOfMemoryError.<p/>
- /// By default, no more than 10,000 terms will be indexed for a field.
- ///
- /// </summary>
- private int maxFieldLength = DEFAULT_MAX_FIELD_LENGTH;
-
- /// <summary> Adds a document to this index. If the document contains more than
- /// {@link #SetMaxFieldLength(int)} terms for a given field, the remainder are
- /// discarded.
- ///
- /// <p> Note that if an Exception is hit (for example disk full)
- /// then the index will be consistent, but this document
- /// may not have been added. Furthermore, it's possible
- /// the index will have one segment in non-compound format
- /// even when using compound files (when a merge has
- /// partially succeeded).</p>
- ///
- /// <p> This method periodically flushes pending documents
- /// to the Directory (every {@link #setMaxBufferedDocs}),
- /// and also periodically merges segments in the index
- /// (every {@link #setMergeFactor} flushes). When this
- /// occurs, the method will take more time to run (possibly
- /// a long time if the index is large), and will require
- /// free temporary space in the Directory to do the
- /// merging.</p>
- ///
- /// <p>The amount of free space required when a merge is triggered is
- /// up to 1X the size of all segments being merged, when no
- /// readers/searchers are open against the index, and up to 2X the
- /// size of all segments being merged when readers/searchers are open
- /// against the index (see {@link #Optimize()} for details). The
- /// sequence of primitive merge operations performed is governed by
- /// the merge policy.
- ///
- /// <p>Note that each term in the document can be no longer
- /// than 16383 characters, otherwise an
- /// IllegalArgumentException will be thrown.</p>
- ///
- /// </summary>
- /// <throws> CorruptIndexException if the index is corrupt </throws>
- /// <throws> IOException if there is a low-level IO error </throws>
- public virtual void AddDocument(Document doc)
- {
- AddDocument(doc, analyzer);
- }
-
- /// <summary> Adds a document to this index, using the provided analyzer instead of the
- /// value of {@link #GetAnalyzer()}. If the document contains more than
- /// {@link #SetMaxFieldLength(int)} terms for a given field, the remainder are
- /// discarded.
- ///
- /// <p>See {@link #AddDocument(Document)} for details on
- /// index and IndexWriter state after an Exception, and
- /// flushing/merging temporary free space requirements.</p>
- ///
- /// </summary>
- /// <throws> CorruptIndexException if the index is corrupt </throws>
- /// <throws> IOException if there is a low-level IO error </throws>
- public virtual void AddDocument(Document doc, Analyzer analyzer)
- {
- EnsureOpen();
- bool doFlush = false;
- bool success = false;
+ }
+ this.infoStream = infoStream;
+ }
+ }
+
+ /// <summary> Casts current mergePolicy to LogMergePolicy, and throws
+ /// an exception if the mergePolicy is not a LogMergePolicy.
+ /// </summary>
+ private LogMergePolicy GetLogMergePolicy()
+ {
+ if (mergePolicy is LogMergePolicy)
+ return (LogMergePolicy)mergePolicy;
+ else
+ throw new System.ArgumentException("this method can only be called when the merge policy is the default LogMergePolicy");
+ }
+
+ /// <summary><p>Get the current setting of whether newly flushed
+ /// segments will use the compound file format. Note that
+ /// this just returns the value previously set with
+ /// setUseCompoundFile(bool), or the default value
+ /// (true). You cannot use this to query the status of
+ /// previously flushed segments.</p>
+ ///
+ /// <p>Note that this method is a convenience method: it
+ /// just calls mergePolicy.GetUseCompoundFile as long as
+ /// mergePolicy is an instance of {@link LogMergePolicy}.
+ /// Otherwise an System.ArgumentException is thrown.</p>
+ ///
+ /// </summary>
+ /// <seealso cref="SetUseCompoundFile(bool)">
+ /// </seealso>
+ public virtual bool GetUseCompoundFile()
+ {
+ return GetLogMergePolicy().GetUseCompoundFile();
+ }
+
+ /// <summary><p>Setting to turn on usage of a compound file. When on,
+ /// multiple files for each segment are merged into a
+ /// single file when a new segment is flushed.</p>
+ ///
+ /// <p>Note that this method is a convenience method: it
+ /// just calls mergePolicy.SetUseCompoundFile as long as
+ /// mergePolicy is an instance of {@link LogMergePolicy}.
+ /// Otherwise an System.ArgumentException is thrown.</p>
+ /// </summary>
+ public virtual void SetUseCompoundFile(bool value_Renamed)
+ {
+ GetLogMergePolicy().SetUseCompoundFile(value_Renamed);
+ GetLogMergePolicy().SetUseCompoundDocStore(value_Renamed);
+ }
+
+ /// <summary>Expert: Set the Similarity implementation used by this IndexWriter.
+ ///
+ /// </summary>
+ /// <seealso cref="Similarity.SetDefault(Similarity)">
+ /// </seealso>
+ public virtual void SetSimilarity(Similarity similarity)
+ {
+ EnsureOpen();
+ this.similarity = similarity;
+ docWriter.SetSimilarity(similarity);
+ }
+
+ /// <summary>Expert: Return the Similarity implementation used by this IndexWriter.
+ ///
+ /// <p>This defaults to the current value of {@link Similarity#GetDefault()}.
+ /// </summary>
+ public virtual Similarity GetSimilarity()
+ {
+ EnsureOpen();
+ return this.similarity;
+ }
+
+ /// <summary>Expert: Set the interval between indexed terms. Large values cause less
+ /// memory to be used by IndexReader, but slow random-access to terms. Small
+ /// values cause more memory to be used by an IndexReader, and speed
+ /// random-access to terms.
+ ///
+ /// This parameter determines the amount of computation required per query
+ /// term, regardless of the number of documents that contain that term. In
+ /// particular, it is the maximum number of other terms that must be
+ /// scanned before a term is located and its frequency and position information
+ /// may be processed. In a large index with user-entered query terms, query
+ /// processing time is likely to be dominated not by term lookup but rather
+ /// by the processing of frequency and positional data. In a small index
+ /// or when many uncommon query terms are generated (e.g., by wildcard
+ /// queries) term lookup may become a dominant cost.
+ ///
+ /// In particular, <code>numUniqueTerms/interval</code> terms are read into
+ /// memory by an IndexReader, and, on average, <code>interval/2</code> terms
+ /// must be scanned for each random term access.
+ ///
+ /// </summary>
+ /// <seealso cref="DEFAULT_TERM_INDEX_INTERVAL">
+ /// </seealso>
+ public virtual void SetTermIndexInterval(int interval)
+ {
+ EnsureOpen();
+ this.termIndexInterval = interval;
+ }
+
+ /// <summary>Expert: Return the interval between indexed terms.
+ ///
+ /// </summary>
+ /// <seealso cref="SetTermIndexInterval(int)">
+ /// </seealso>
+ public virtual int GetTermIndexInterval()
+ {
+ // we pass false because this method is called by SegmentMerger while we are in the process of closing
+ EnsureOpen(false);
+ return termIndexInterval;
+ }
+
+ /// <summary>
+ /// Constructs an IndexWriter for the index in <code>path</code>.
+ /// Text will be analyzed with <code>a</code>. If <code>create</code>
+ /// is true, then a new, empty index will be created in
+ /// <code>path</code>, replacing the index already there, if any.
+ /// <para>
+ /// NOTE: autoCommit (see above) is set to false with this constructor.
+ /// </para>
+ /// <para>Throws CorruptIndexException if the index is corrupt</para>
+ /// <para>Throws LockObtainFailedException if another writer has this index open (<code>write.lock</code> could not be obtained)</para>
+ /// <para>Throws System.IO.IOException if the directory cannot be read/written to, or if it does not exist and <code>create</code> is <code>false</code> or if there is any other low-level IO error</para>
+ /// </summary>
+ /// <param name="path">the path to the index directory</param>
+ /// <param name="a">the analyzer to use</param>
+ /// <param name="create"><code>true</code> to create the index or overwrite the existing one; <code>false</code> to append to the existing index</param>
+ /// <param name="mfl">Maximum field Length: LIMITED, UNLIMITED, or user-specified</param>
+ public IndexWriter(string path, Analyzer a, bool create, MaxFieldLength mfl)
+ {
+ InitBlock();
+ Init(FSDirectory.GetDirectory(path), a, create, true, null, false, mfl.GetLimit());
+ }
+
+ /// <summary>
+ /// Constructs an IndexWriter for the index in <code>path</code>.
+ /// Text will be analyzed with <code>a</code>. If <code>create</code>
+ /// is true, then a new, empty index will be created in
+ /// <code>path</code>, replacing the index already there, if any.
+ /// <para>Throws CorruptIndexException if the index is corrupt</para>
+ /// <para>Throws LockObtainFailedException if another writer has this index open (<code>write.lock</code> could not be obtained)</para>
+ /// <para>Throws System.IO.IOException if the directory cannot be read/written to, or if it does not exist and <code>create</code> is <code>false</code> or if there is any other low-level IO error</para>
+ /// </summary>
+ /// <param name="path">the path to the index directory</param>
+ /// <param name="a">the analyzer to use</param>
+ /// <param name="create"><code>true</code> to create the index or overwrite the existing one; <code>false</code> to append to the existing index</param>
+ [System.Obsolete("This constructor will be removed in the 3.0 release. Use IndexWriter(string, Analyzer, bool, MaxFieldLength) instead, and call Commit() when needed")]
+ public IndexWriter(string path, Analyzer a, bool create)
+ {
+ InitBlock();
+ Init(FSDirectory.GetDirectory(path), a, create, true, null, true, DEFAULT_MAX_FIELD_LENGTH);
+ }
+
+ /// <summary>
+ /// Constructs an IndexWriter for the index in <code>path</code>.
+ /// Text will be analyzed with <code>a</code>. If <code>create</code>
[... 8516 lines stripped ...]