You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by cc...@apache.org on 2013/04/03 19:39:58 UTC
[15/51] [partial] Mass convert mixed tabs to spaces
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/62f018ab/src/core/Index/DocumentsWriter.cs
----------------------------------------------------------------------
diff --git a/src/core/Index/DocumentsWriter.cs b/src/core/Index/DocumentsWriter.cs
index 6545d11..c5d9f40 100644
--- a/src/core/Index/DocumentsWriter.cs
+++ b/src/core/Index/DocumentsWriter.cs
@@ -34,171 +34,171 @@ using Weight = Lucene.Net.Search.Weight;
namespace Lucene.Net.Index
{
-
- /// <summary> This class accepts multiple added documents and directly
- /// writes a single segment file. It does this more
- /// efficiently than creating a single segment per document
- /// (with DocumentWriter) and doing standard merges on those
- /// segments.
- ///
- /// Each added document is passed to the <see cref="DocConsumer" />,
- /// which in turn processes the document and interacts with
- /// other consumers in the indexing chain. Certain
- /// consumers, like <see cref="StoredFieldsWriter" /> and <see cref="TermVectorsTermsWriter" />
- ///, digest a document and
- /// immediately write bytes to the "doc store" files (ie,
- /// they do not consume RAM per document, except while they
- /// are processing the document).
- ///
- /// Other consumers, eg <see cref="FreqProxTermsWriter" /> and
- /// <see cref="NormsWriter" />, buffer bytes in RAM and flush only
- /// when a new segment is produced.
- /// Once we have used our allowed RAM buffer, or the number
- /// of added docs is large enough (in the case we are
- /// flushing by doc count instead of RAM usage), we create a
- /// real segment and flush it to the Directory.
- ///
- /// Threads:
- ///
- /// Multiple threads are allowed into addDocument at once.
- /// There is an initial synchronized call to getThreadState
- /// which allocates a ThreadState for this thread. The same
- /// thread will get the same ThreadState over time (thread
- /// affinity) so that if there are consistent patterns (for
- /// example each thread is indexing a different content
- /// source) then we make better use of RAM. Then
- /// processDocument is called on that ThreadState without
- /// synchronization (most of the "heavy lifting" is in this
- /// call). Finally the synchronized "finishDocument" is
- /// called to flush changes to the directory.
- ///
- /// When flush is called by IndexWriter we forcefully idle
- /// all threads and flush only once they are all idle. This
- /// means you can call flush with a given thread even while
- /// other threads are actively adding/deleting documents.
- ///
- ///
- /// Exceptions:
- ///
- /// Because this class directly updates in-memory posting
- /// lists, and flushes stored fields and term vectors
- /// directly to files in the directory, there are certain
- /// limited times when an exception can corrupt this state.
- /// For example, a disk full while flushing stored fields
- /// leaves this file in a corrupt state. Or, an OOM
- /// exception while appending to the in-memory posting lists
- /// can corrupt that posting list. We call such exceptions
- /// "aborting exceptions". In these cases we must call
- /// abort() to discard all docs added since the last flush.
- ///
- /// All other exceptions ("non-aborting exceptions") can
- /// still partially update the index structures. These
- /// updates are consistent, but, they represent only a part
- /// of the document seen up until the exception was hit.
- /// When this happens, we immediately mark the document as
- /// deleted so that the document is always atomically ("all
- /// or none") added to the index.
- /// </summary>
-
- public sealed class DocumentsWriter : IDisposable
- {
- internal class AnonymousClassIndexingChain:IndexingChain
- {
-
- internal override DocConsumer GetChain(DocumentsWriter documentsWriter)
- {
- /*
- This is the current indexing chain:
-
- DocConsumer / DocConsumerPerThread
- --> code: DocFieldProcessor / DocFieldProcessorPerThread
- --> DocFieldConsumer / DocFieldConsumerPerThread / DocFieldConsumerPerField
- --> code: DocFieldConsumers / DocFieldConsumersPerThread / DocFieldConsumersPerField
- --> code: DocInverter / DocInverterPerThread / DocInverterPerField
- --> InvertedDocConsumer / InvertedDocConsumerPerThread / InvertedDocConsumerPerField
- --> code: TermsHash / TermsHashPerThread / TermsHashPerField
- --> TermsHashConsumer / TermsHashConsumerPerThread / TermsHashConsumerPerField
- --> code: FreqProxTermsWriter / FreqProxTermsWriterPerThread / FreqProxTermsWriterPerField
- --> code: TermVectorsTermsWriter / TermVectorsTermsWriterPerThread / TermVectorsTermsWriterPerField
- --> InvertedDocEndConsumer / InvertedDocConsumerPerThread / InvertedDocConsumerPerField
- --> code: NormsWriter / NormsWriterPerThread / NormsWriterPerField
- --> code: StoredFieldsWriter / StoredFieldsWriterPerThread / StoredFieldsWriterPerField
- */
-
- // Build up indexing chain:
-
- TermsHashConsumer termVectorsWriter = new TermVectorsTermsWriter(documentsWriter);
- TermsHashConsumer freqProxWriter = new FreqProxTermsWriter();
-
- InvertedDocConsumer termsHash = new TermsHash(documentsWriter, true, freqProxWriter, new TermsHash(documentsWriter, false, termVectorsWriter, null));
- NormsWriter normsWriter = new NormsWriter();
- DocInverter docInverter = new DocInverter(termsHash, normsWriter);
- return new DocFieldProcessor(documentsWriter, docInverter);
- }
- }
- private void InitBlock()
- {
- maxFieldLength = IndexWriter.DEFAULT_MAX_FIELD_LENGTH;
- maxBufferedDeleteTerms = IndexWriter.DEFAULT_MAX_BUFFERED_DELETE_TERMS;
- ramBufferSize = (long) (IndexWriter.DEFAULT_RAM_BUFFER_SIZE_MB * 1024 * 1024);
- waitQueuePauseBytes = (long) (ramBufferSize * 0.1);
- waitQueueResumeBytes = (long) (ramBufferSize * 0.05);
- freeTrigger = (long) (IndexWriter.DEFAULT_RAM_BUFFER_SIZE_MB * 1024 * 1024 * 1.05);
- freeLevel = (long) (IndexWriter.DEFAULT_RAM_BUFFER_SIZE_MB * 1024 * 1024 * 0.95);
- maxBufferedDocs = IndexWriter.DEFAULT_MAX_BUFFERED_DOCS;
- skipDocWriter = new SkipDocWriter();
+
+ /// <summary> This class accepts multiple added documents and directly
+ /// writes a single segment file. It does this more
+ /// efficiently than creating a single segment per document
+ /// (with DocumentWriter) and doing standard merges on those
+ /// segments.
+ ///
+ /// Each added document is passed to the <see cref="DocConsumer" />,
+ /// which in turn processes the document and interacts with
+ /// other consumers in the indexing chain. Certain
+ /// consumers, like <see cref="StoredFieldsWriter" /> and <see cref="TermVectorsTermsWriter" />
+ ///, digest a document and
+ /// immediately write bytes to the "doc store" files (ie,
+ /// they do not consume RAM per document, except while they
+ /// are processing the document).
+ ///
+ /// Other consumers, eg <see cref="FreqProxTermsWriter" /> and
+ /// <see cref="NormsWriter" />, buffer bytes in RAM and flush only
+ /// when a new segment is produced.
+ /// Once we have used our allowed RAM buffer, or the number
+ /// of added docs is large enough (in the case we are
+ /// flushing by doc count instead of RAM usage), we create a
+ /// real segment and flush it to the Directory.
+ ///
+ /// Threads:
+ ///
+ /// Multiple threads are allowed into addDocument at once.
+ /// There is an initial synchronized call to getThreadState
+ /// which allocates a ThreadState for this thread. The same
+ /// thread will get the same ThreadState over time (thread
+ /// affinity) so that if there are consistent patterns (for
+ /// example each thread is indexing a different content
+ /// source) then we make better use of RAM. Then
+ /// processDocument is called on that ThreadState without
+ /// synchronization (most of the "heavy lifting" is in this
+ /// call). Finally the synchronized "finishDocument" is
+ /// called to flush changes to the directory.
+ ///
+ /// When flush is called by IndexWriter we forcefully idle
+ /// all threads and flush only once they are all idle. This
+ /// means you can call flush with a given thread even while
+ /// other threads are actively adding/deleting documents.
+ ///
+ ///
+ /// Exceptions:
+ ///
+ /// Because this class directly updates in-memory posting
+ /// lists, and flushes stored fields and term vectors
+ /// directly to files in the directory, there are certain
+ /// limited times when an exception can corrupt this state.
+ /// For example, a disk full while flushing stored fields
+ /// leaves this file in a corrupt state. Or, an OOM
+ /// exception while appending to the in-memory posting lists
+ /// can corrupt that posting list. We call such exceptions
+ /// "aborting exceptions". In these cases we must call
+ /// abort() to discard all docs added since the last flush.
+ ///
+ /// All other exceptions ("non-aborting exceptions") can
+ /// still partially update the index structures. These
+ /// updates are consistent, but, they represent only a part
+ /// of the document seen up until the exception was hit.
+ /// When this happens, we immediately mark the document as
+ /// deleted so that the document is always atomically ("all
+ /// or none") added to the index.
+ /// </summary>
+
+ public sealed class DocumentsWriter : IDisposable
+ {
+ internal class AnonymousClassIndexingChain:IndexingChain
+ {
+
+ internal override DocConsumer GetChain(DocumentsWriter documentsWriter)
+ {
+ /*
+ This is the current indexing chain:
+
+ DocConsumer / DocConsumerPerThread
+ --> code: DocFieldProcessor / DocFieldProcessorPerThread
+ --> DocFieldConsumer / DocFieldConsumerPerThread / DocFieldConsumerPerField
+ --> code: DocFieldConsumers / DocFieldConsumersPerThread / DocFieldConsumersPerField
+ --> code: DocInverter / DocInverterPerThread / DocInverterPerField
+ --> InvertedDocConsumer / InvertedDocConsumerPerThread / InvertedDocConsumerPerField
+ --> code: TermsHash / TermsHashPerThread / TermsHashPerField
+ --> TermsHashConsumer / TermsHashConsumerPerThread / TermsHashConsumerPerField
+ --> code: FreqProxTermsWriter / FreqProxTermsWriterPerThread / FreqProxTermsWriterPerField
+ --> code: TermVectorsTermsWriter / TermVectorsTermsWriterPerThread / TermVectorsTermsWriterPerField
+ --> InvertedDocEndConsumer / InvertedDocConsumerPerThread / InvertedDocConsumerPerField
+ --> code: NormsWriter / NormsWriterPerThread / NormsWriterPerField
+ --> code: StoredFieldsWriter / StoredFieldsWriterPerThread / StoredFieldsWriterPerField
+ */
+
+ // Build up indexing chain:
+
+ TermsHashConsumer termVectorsWriter = new TermVectorsTermsWriter(documentsWriter);
+ TermsHashConsumer freqProxWriter = new FreqProxTermsWriter();
+
+ InvertedDocConsumer termsHash = new TermsHash(documentsWriter, true, freqProxWriter, new TermsHash(documentsWriter, false, termVectorsWriter, null));
+ NormsWriter normsWriter = new NormsWriter();
+ DocInverter docInverter = new DocInverter(termsHash, normsWriter);
+ return new DocFieldProcessor(documentsWriter, docInverter);
+ }
+ }
+ private void InitBlock()
+ {
+ maxFieldLength = IndexWriter.DEFAULT_MAX_FIELD_LENGTH;
+ maxBufferedDeleteTerms = IndexWriter.DEFAULT_MAX_BUFFERED_DELETE_TERMS;
+ ramBufferSize = (long) (IndexWriter.DEFAULT_RAM_BUFFER_SIZE_MB * 1024 * 1024);
+ waitQueuePauseBytes = (long) (ramBufferSize * 0.1);
+ waitQueueResumeBytes = (long) (ramBufferSize * 0.05);
+ freeTrigger = (long) (IndexWriter.DEFAULT_RAM_BUFFER_SIZE_MB * 1024 * 1024 * 1.05);
+ freeLevel = (long) (IndexWriter.DEFAULT_RAM_BUFFER_SIZE_MB * 1024 * 1024 * 0.95);
+ maxBufferedDocs = IndexWriter.DEFAULT_MAX_BUFFERED_DOCS;
+ skipDocWriter = new SkipDocWriter();
byteBlockAllocator = new ByteBlockAllocator(this, DocumentsWriter.BYTE_BLOCK_SIZE);
perDocAllocator = new ByteBlockAllocator(this,DocumentsWriter.PER_DOC_BLOCK_SIZE);
- waitQueue = new WaitQueue(this);
- }
-
- internal IndexWriter writer;
- internal Directory directory;
-
- internal System.String segment; // Current segment we are working on
- private System.String docStoreSegment; // Current doc-store segment we are writing
- private int docStoreOffset; // Current starting doc-store offset of current segment
-
- private int nextDocID; // Next docID to be added
- private int numDocsInRAM; // # docs buffered in RAM
- internal int numDocsInStore; // # docs written to doc stores
-
- // Max # ThreadState instances; if there are more threads
- // than this they share ThreadStates
- private const int MAX_THREAD_STATE = 5;
- private DocumentsWriterThreadState[] threadStates = new DocumentsWriterThreadState[0];
+ waitQueue = new WaitQueue(this);
+ }
+
+ internal IndexWriter writer;
+ internal Directory directory;
+
+ internal System.String segment; // Current segment we are working on
+ private System.String docStoreSegment; // Current doc-store segment we are writing
+ private int docStoreOffset; // Current starting doc-store offset of current segment
+
+ private int nextDocID; // Next docID to be added
+ private int numDocsInRAM; // # docs buffered in RAM
+ internal int numDocsInStore; // # docs written to doc stores
+
+ // Max # ThreadState instances; if there are more threads
+ // than this they share ThreadStates
+ private const int MAX_THREAD_STATE = 5;
+ private DocumentsWriterThreadState[] threadStates = new DocumentsWriterThreadState[0];
private HashMap<ThreadClass, DocumentsWriterThreadState> threadBindings = new HashMap<ThreadClass, DocumentsWriterThreadState>();
-
- private int pauseThreads; // Non-zero when we need all threads to
- // pause (eg to flush)
- internal bool flushPending; // True when a thread has decided to flush
- internal bool bufferIsFull; // True when it's time to write segment
- private bool aborting; // True if an abort is pending
-
- private DocFieldProcessor docFieldProcessor;
-
- internal System.IO.StreamWriter infoStream;
- internal int maxFieldLength;
- internal Similarity similarity;
-
- internal IList<string> newFiles;
-
- internal class DocState
- {
- internal DocumentsWriter docWriter;
- internal Analyzer analyzer;
- internal int maxFieldLength;
- internal System.IO.StreamWriter infoStream;
- internal Similarity similarity;
- internal int docID;
- internal Document doc;
- internal System.String maxTermPrefix;
-
- // Only called by asserts
- public bool TestPoint(System.String name)
- {
- return docWriter.writer.TestPoint(name);
- }
+
+ private int pauseThreads; // Non-zero when we need all threads to
+ // pause (eg to flush)
+ internal bool flushPending; // True when a thread has decided to flush
+ internal bool bufferIsFull; // True when it's time to write segment
+ private bool aborting; // True if an abort is pending
+
+ private DocFieldProcessor docFieldProcessor;
+
+ internal System.IO.StreamWriter infoStream;
+ internal int maxFieldLength;
+ internal Similarity similarity;
+
+ internal IList<string> newFiles;
+
+ internal class DocState
+ {
+ internal DocumentsWriter docWriter;
+ internal Analyzer analyzer;
+ internal int maxFieldLength;
+ internal System.IO.StreamWriter infoStream;
+ internal Similarity similarity;
+ internal int docID;
+ internal Document doc;
+ internal System.String maxTermPrefix;
+
+ // Only called by asserts
+ public bool TestPoint(System.String name)
+ {
+ return docWriter.writer.TestPoint(name);
+ }
public void Clear()
{
@@ -207,26 +207,26 @@ namespace Lucene.Net.Index
doc = null;
analyzer = null;
}
- }
-
- /// <summary>Consumer returns this on each doc. This holds any
- /// state that must be flushed synchronized "in docID
- /// order". We gather these and flush them in order.
- /// </summary>
- internal abstract class DocWriter
- {
- internal DocWriter next;
- internal int docID;
- public abstract void Finish();
- public abstract void Abort();
- public abstract long SizeInBytes();
-
- internal void SetNext(DocWriter next)
- {
- this.next = next;
- }
- }
-
+ }
+
+ /// <summary>Consumer returns this on each doc. This holds any
+ /// state that must be flushed synchronized "in docID
+ /// order". We gather these and flush them in order.
+ /// </summary>
+ internal abstract class DocWriter
+ {
+ internal DocWriter next;
+ internal int docID;
+ public abstract void Finish();
+ public abstract void Abort();
+ public abstract long SizeInBytes();
+
+ internal void SetNext(DocWriter next)
+ {
+ this.next = next;
+ }
+ }
+
/*
* Create and return a new DocWriterBuffer.
*/
@@ -276,595 +276,595 @@ namespace Lucene.Net.Index
}
}
- /// <summary> The IndexingChain must define the <see cref="GetChain(DocumentsWriter)" /> method
- /// which returns the DocConsumer that the DocumentsWriter calls to process the
- /// documents.
- /// </summary>
- internal abstract class IndexingChain
- {
- internal abstract DocConsumer GetChain(DocumentsWriter documentsWriter);
- }
-
- internal static readonly IndexingChain DefaultIndexingChain;
-
- internal DocConsumer consumer;
-
- // Deletes done after the last flush; these are discarded
- // on abort
- private BufferedDeletes deletesInRAM = new BufferedDeletes(false);
-
- // Deletes done before the last flush; these are still
- // kept on abort
- private BufferedDeletes deletesFlushed = new BufferedDeletes(true);
-
- // The max number of delete terms that can be buffered before
- // they must be flushed to disk.
- private int maxBufferedDeleteTerms;
-
- // How much RAM we can use before flushing. This is 0 if
- // we are flushing by doc count instead.
- private long ramBufferSize;
- private long waitQueuePauseBytes;
- private long waitQueueResumeBytes;
-
- // If we've allocated 5% over our RAM budget, we then
- // free down to 95%
- private long freeTrigger;
- private long freeLevel;
-
- // Flush @ this number of docs. If ramBufferSize is
- // non-zero we will flush by RAM usage instead.
- private int maxBufferedDocs;
-
- private int flushedDocCount; // How many docs already flushed to index
-
- internal void UpdateFlushedDocCount(int n)
- {
- lock (this)
- {
- flushedDocCount += n;
- }
- }
- internal int GetFlushedDocCount()
- {
- lock (this)
- {
- return flushedDocCount;
- }
- }
- internal void SetFlushedDocCount(int n)
- {
- lock (this)
- {
- flushedDocCount = n;
- }
- }
-
- private bool closed;
-
- internal DocumentsWriter(Directory directory, IndexWriter writer, IndexingChain indexingChain)
- {
- InitBlock();
- this.directory = directory;
- this.writer = writer;
- this.similarity = writer.Similarity;
- flushedDocCount = writer.MaxDoc();
-
- consumer = indexingChain.GetChain(this);
- if (consumer is DocFieldProcessor)
- {
- docFieldProcessor = (DocFieldProcessor) consumer;
- }
- }
-
- /// <summary>Returns true if any of the fields in the current
- /// buffered docs have omitTermFreqAndPositions==false
- /// </summary>
- internal bool HasProx()
- {
- return (docFieldProcessor != null)?docFieldProcessor.fieldInfos.HasProx():true;
- }
-
- /// <summary>If non-null, various details of indexing are printed
- /// here.
- /// </summary>
- internal void SetInfoStream(System.IO.StreamWriter infoStream)
- {
- lock (this)
- {
- this.infoStream = infoStream;
- for (int i = 0; i < threadStates.Length; i++)
- threadStates[i].docState.infoStream = infoStream;
- }
- }
-
- internal void SetMaxFieldLength(int maxFieldLength)
- {
- lock (this)
- {
- this.maxFieldLength = maxFieldLength;
- for (int i = 0; i < threadStates.Length; i++)
- threadStates[i].docState.maxFieldLength = maxFieldLength;
- }
- }
-
- internal void SetSimilarity(Similarity similarity)
- {
- lock (this)
- {
- this.similarity = similarity;
- for (int i = 0; i < threadStates.Length; i++)
- threadStates[i].docState.similarity = similarity;
- }
- }
-
- /// <summary>Set how much RAM we can use before flushing. </summary>
- internal void SetRAMBufferSizeMB(double mb)
- {
- lock (this)
- {
- if (mb == IndexWriter.DISABLE_AUTO_FLUSH)
- {
- ramBufferSize = IndexWriter.DISABLE_AUTO_FLUSH;
- waitQueuePauseBytes = 4 * 1024 * 1024;
- waitQueueResumeBytes = 2 * 1024 * 1024;
- }
- else
- {
- ramBufferSize = (long) (mb * 1024 * 1024);
- waitQueuePauseBytes = (long) (ramBufferSize * 0.1);
- waitQueueResumeBytes = (long) (ramBufferSize * 0.05);
- freeTrigger = (long) (1.05 * ramBufferSize);
- freeLevel = (long) (0.95 * ramBufferSize);
- }
- }
- }
-
- internal double GetRAMBufferSizeMB()
- {
- lock (this)
- {
- if (ramBufferSize == IndexWriter.DISABLE_AUTO_FLUSH)
- {
- return ramBufferSize;
- }
- else
- {
- return ramBufferSize / 1024.0 / 1024.0;
- }
- }
- }
+ /// <summary> The IndexingChain must define the <see cref="GetChain(DocumentsWriter)" /> method
+ /// which returns the DocConsumer that the DocumentsWriter calls to process the
+ /// documents.
+ /// </summary>
+ internal abstract class IndexingChain
+ {
+ internal abstract DocConsumer GetChain(DocumentsWriter documentsWriter);
+ }
+
+ internal static readonly IndexingChain DefaultIndexingChain;
+
+ internal DocConsumer consumer;
+
+ // Deletes done after the last flush; these are discarded
+ // on abort
+ private BufferedDeletes deletesInRAM = new BufferedDeletes(false);
+
+ // Deletes done before the last flush; these are still
+ // kept on abort
+ private BufferedDeletes deletesFlushed = new BufferedDeletes(true);
+
+ // The max number of delete terms that can be buffered before
+ // they must be flushed to disk.
+ private int maxBufferedDeleteTerms;
+
+ // How much RAM we can use before flushing. This is 0 if
+ // we are flushing by doc count instead.
+ private long ramBufferSize;
+ private long waitQueuePauseBytes;
+ private long waitQueueResumeBytes;
+
+ // If we've allocated 5% over our RAM budget, we then
+ // free down to 95%
+ private long freeTrigger;
+ private long freeLevel;
+
+ // Flush @ this number of docs. If ramBufferSize is
+ // non-zero we will flush by RAM usage instead.
+ private int maxBufferedDocs;
+
+ private int flushedDocCount; // How many docs already flushed to index
+
+ internal void UpdateFlushedDocCount(int n)
+ {
+ lock (this)
+ {
+ flushedDocCount += n;
+ }
+ }
+ internal int GetFlushedDocCount()
+ {
+ lock (this)
+ {
+ return flushedDocCount;
+ }
+ }
+ internal void SetFlushedDocCount(int n)
+ {
+ lock (this)
+ {
+ flushedDocCount = n;
+ }
+ }
+
+ private bool closed;
+
+ internal DocumentsWriter(Directory directory, IndexWriter writer, IndexingChain indexingChain)
+ {
+ InitBlock();
+ this.directory = directory;
+ this.writer = writer;
+ this.similarity = writer.Similarity;
+ flushedDocCount = writer.MaxDoc();
+
+ consumer = indexingChain.GetChain(this);
+ if (consumer is DocFieldProcessor)
+ {
+ docFieldProcessor = (DocFieldProcessor) consumer;
+ }
+ }
+
+ /// <summary>Returns true if any of the fields in the current
+ /// buffered docs have omitTermFreqAndPositions==false
+ /// </summary>
+ internal bool HasProx()
+ {
+ return (docFieldProcessor != null)?docFieldProcessor.fieldInfos.HasProx():true;
+ }
+
+ /// <summary>If non-null, various details of indexing are printed
+ /// here.
+ /// </summary>
+ internal void SetInfoStream(System.IO.StreamWriter infoStream)
+ {
+ lock (this)
+ {
+ this.infoStream = infoStream;
+ for (int i = 0; i < threadStates.Length; i++)
+ threadStates[i].docState.infoStream = infoStream;
+ }
+ }
+
+ internal void SetMaxFieldLength(int maxFieldLength)
+ {
+ lock (this)
+ {
+ this.maxFieldLength = maxFieldLength;
+ for (int i = 0; i < threadStates.Length; i++)
+ threadStates[i].docState.maxFieldLength = maxFieldLength;
+ }
+ }
+
+ internal void SetSimilarity(Similarity similarity)
+ {
+ lock (this)
+ {
+ this.similarity = similarity;
+ for (int i = 0; i < threadStates.Length; i++)
+ threadStates[i].docState.similarity = similarity;
+ }
+ }
+
+ /// <summary>Set how much RAM we can use before flushing. </summary>
+ internal void SetRAMBufferSizeMB(double mb)
+ {
+ lock (this)
+ {
+ if (mb == IndexWriter.DISABLE_AUTO_FLUSH)
+ {
+ ramBufferSize = IndexWriter.DISABLE_AUTO_FLUSH;
+ waitQueuePauseBytes = 4 * 1024 * 1024;
+ waitQueueResumeBytes = 2 * 1024 * 1024;
+ }
+ else
+ {
+ ramBufferSize = (long) (mb * 1024 * 1024);
+ waitQueuePauseBytes = (long) (ramBufferSize * 0.1);
+ waitQueueResumeBytes = (long) (ramBufferSize * 0.05);
+ freeTrigger = (long) (1.05 * ramBufferSize);
+ freeLevel = (long) (0.95 * ramBufferSize);
+ }
+ }
+ }
+
+ internal double GetRAMBufferSizeMB()
+ {
+ lock (this)
+ {
+ if (ramBufferSize == IndexWriter.DISABLE_AUTO_FLUSH)
+ {
+ return ramBufferSize;
+ }
+ else
+ {
+ return ramBufferSize / 1024.0 / 1024.0;
+ }
+ }
+ }
- /// <summary>Gets or sets max buffered docs, which means we will flush by
- /// doc count instead of by RAM usage.
- /// </summary>
- internal int MaxBufferedDocs
- {
- get { return maxBufferedDocs; }
- set { maxBufferedDocs = value; }
- }
+ /// <summary>Gets or sets max buffered docs, which means we will flush by
+ /// doc count instead of by RAM usage.
+ /// </summary>
+ internal int MaxBufferedDocs
+ {
+ get { return maxBufferedDocs; }
+ set { maxBufferedDocs = value; }
+ }
- /// <summary>Get current segment name we are writing. </summary>
- internal string Segment
- {
- get { return segment; }
- }
+ /// <summary>Get current segment name we are writing. </summary>
+ internal string Segment
+ {
+ get { return segment; }
+ }
- /// <summary>Returns how many docs are currently buffered in RAM. </summary>
- internal int NumDocsInRAM
- {
- get { return numDocsInRAM; }
- }
+ /// <summary>Returns how many docs are currently buffered in RAM. </summary>
+ internal int NumDocsInRAM
+ {
+ get { return numDocsInRAM; }
+ }
- /// <summary>Returns the current doc store segment we are writing
- /// to.
- /// </summary>
- internal string DocStoreSegment
- {
- get
- {
- lock (this)
- {
- return docStoreSegment;
- }
- }
- }
+ /// <summary>Returns the current doc store segment we are writing
+ /// to.
+ /// </summary>
+ internal string DocStoreSegment
+ {
+ get
+ {
+ lock (this)
+ {
+ return docStoreSegment;
+ }
+ }
+ }
- /// <summary>Returns the doc offset into the shared doc store for
- /// the current buffered docs.
- /// </summary>
- internal int DocStoreOffset
- {
- get { return docStoreOffset; }
- }
+ /// <summary>Returns the doc offset into the shared doc store for
+ /// the current buffered docs.
+ /// </summary>
+ internal int DocStoreOffset
+ {
+ get { return docStoreOffset; }
+ }
- /// <summary>Closes the current open doc stores an returns the doc
- /// store segment name. This returns null if there are *
- /// no buffered documents.
- /// </summary>
- internal System.String CloseDocStore()
- {
- lock (this)
- {
-
- System.Diagnostics.Debug.Assert(AllThreadsIdle());
-
- if (infoStream != null)
- Message("closeDocStore: " + openFiles.Count + " files to flush to segment " + docStoreSegment + " numDocs=" + numDocsInStore);
-
- bool success = false;
-
- try
- {
- InitFlushState(true);
- closedFiles.Clear();
-
- consumer.CloseDocStore(flushState);
- System.Diagnostics.Debug.Assert(0 == openFiles.Count);
-
- System.String s = docStoreSegment;
- docStoreSegment = null;
- docStoreOffset = 0;
- numDocsInStore = 0;
- success = true;
- return s;
- }
- finally
- {
- if (!success)
- {
- Abort();
- }
- }
- }
- }
-
- private ICollection<string> abortedFiles; // List of files that were written before last abort()
-
- private SegmentWriteState flushState;
+ /// <summary>Closes the current open doc stores an returns the doc
+ /// store segment name. This returns null if there are *
+ /// no buffered documents.
+ /// </summary>
+ internal System.String CloseDocStore()
+ {
+ lock (this)
+ {
+
+ System.Diagnostics.Debug.Assert(AllThreadsIdle());
+
+ if (infoStream != null)
+ Message("closeDocStore: " + openFiles.Count + " files to flush to segment " + docStoreSegment + " numDocs=" + numDocsInStore);
+
+ bool success = false;
+
+ try
+ {
+ InitFlushState(true);
+ closedFiles.Clear();
+
+ consumer.CloseDocStore(flushState);
+ System.Diagnostics.Debug.Assert(0 == openFiles.Count);
+
+ System.String s = docStoreSegment;
+ docStoreSegment = null;
+ docStoreOffset = 0;
+ numDocsInStore = 0;
+ success = true;
+ return s;
+ }
+ finally
+ {
+ if (!success)
+ {
+ Abort();
+ }
+ }
+ }
+ }
+
+ private ICollection<string> abortedFiles; // List of files that were written before last abort()
+
+ private SegmentWriteState flushState;
internal ICollection<string> AbortedFiles()
- {
- return abortedFiles;
- }
-
- internal void Message(System.String message)
- {
- if (infoStream != null)
- writer.Message("DW: " + message);
- }
+ {
+ return abortedFiles;
+ }
+
+ internal void Message(System.String message)
+ {
+ if (infoStream != null)
+ writer.Message("DW: " + message);
+ }
internal IList<string> openFiles = new List<string>();
internal IList<string> closedFiles = new List<string>();
-
- /* Returns Collection of files in use by this instance,
- * including any flushed segments. */
- internal IList<string> OpenFiles()
- {
- lock (this)
- {
+
+ /* Returns Collection of files in use by this instance,
+ * including any flushed segments. */
+ internal IList<string> OpenFiles()
+ {
+ lock (this)
+ {
// ToArray returns a copy
- return openFiles.ToArray();
- }
- }
-
- internal IList<string> ClosedFiles()
- {
+ return openFiles.ToArray();
+ }
+ }
+
+ internal IList<string> ClosedFiles()
+ {
lock (this)
{
// ToArray returns a copy
return closedFiles.ToArray();
}
- }
-
- internal void AddOpenFile(System.String name)
- {
- lock (this)
- {
- System.Diagnostics.Debug.Assert(!openFiles.Contains(name));
- openFiles.Add(name);
- }
- }
-
- internal void RemoveOpenFile(System.String name)
- {
- lock (this)
- {
- System.Diagnostics.Debug.Assert(openFiles.Contains(name));
- openFiles.Remove(name);
- closedFiles.Add(name);
- }
- }
-
- internal void SetAborting()
- {
- lock (this)
- {
- aborting = true;
- }
- }
-
- /// <summary>Called if we hit an exception at a bad time (when
- /// updating the index files) and must discard all
- /// currently buffered docs. This resets our state,
- /// discarding any docs added since last flush.
- /// </summary>
- internal void Abort()
- {
- lock (this)
- {
- try
- {
+ }
+
+ internal void AddOpenFile(System.String name)
+ {
+ lock (this)
+ {
+ System.Diagnostics.Debug.Assert(!openFiles.Contains(name));
+ openFiles.Add(name);
+ }
+ }
+
+ internal void RemoveOpenFile(System.String name)
+ {
+ lock (this)
+ {
+ System.Diagnostics.Debug.Assert(openFiles.Contains(name));
+ openFiles.Remove(name);
+ closedFiles.Add(name);
+ }
+ }
+
+ internal void SetAborting()
+ {
+ lock (this)
+ {
+ aborting = true;
+ }
+ }
+
+ /// <summary>Called if we hit an exception at a bad time (when
+ /// updating the index files) and must discard all
+ /// currently buffered docs. This resets our state,
+ /// discarding any docs added since last flush.
+ /// </summary>
+ internal void Abort()
+ {
+ lock (this)
+ {
+ try
+ {
if (infoStream != null)
{
Message("docWriter: now abort");
}
- // Forcefully remove waiting ThreadStates from line
- waitQueue.Abort();
-
- // Wait for all other threads to finish with
- // DocumentsWriter:
- PauseAllThreads();
-
- try
- {
-
- System.Diagnostics.Debug.Assert(0 == waitQueue.numWaiting);
-
- waitQueue.waitingBytes = 0;
-
- try
- {
- abortedFiles = OpenFiles();
- }
- catch (System.Exception)
- {
- abortedFiles = null;
- }
-
- deletesInRAM.Clear();
+ // Forcefully remove waiting ThreadStates from line
+ waitQueue.Abort();
+
+ // Wait for all other threads to finish with
+ // DocumentsWriter:
+ PauseAllThreads();
+
+ try
+ {
+
+ System.Diagnostics.Debug.Assert(0 == waitQueue.numWaiting);
+
+ waitQueue.waitingBytes = 0;
+
+ try
+ {
+ abortedFiles = OpenFiles();
+ }
+ catch (System.Exception)
+ {
+ abortedFiles = null;
+ }
+
+ deletesInRAM.Clear();
deletesFlushed.Clear();
- openFiles.Clear();
-
- for (int i = 0; i < threadStates.Length; i++)
- try
- {
- threadStates[i].consumer.Abort();
- }
- catch (System.Exception)
- {
- }
-
- try
- {
- consumer.Abort();
- }
- catch (System.Exception)
- {
- }
-
- docStoreSegment = null;
- numDocsInStore = 0;
- docStoreOffset = 0;
-
- // Reset all postings data
- DoAfterFlush();
- }
- finally
- {
- ResumeAllThreads();
- }
- }
- finally
- {
- aborting = false;
- System.Threading.Monitor.PulseAll(this);
+ openFiles.Clear();
+
+ for (int i = 0; i < threadStates.Length; i++)
+ try
+ {
+ threadStates[i].consumer.Abort();
+ }
+ catch (System.Exception)
+ {
+ }
+
+ try
+ {
+ consumer.Abort();
+ }
+ catch (System.Exception)
+ {
+ }
+
+ docStoreSegment = null;
+ numDocsInStore = 0;
+ docStoreOffset = 0;
+
+ // Reset all postings data
+ DoAfterFlush();
+ }
+ finally
+ {
+ ResumeAllThreads();
+ }
+ }
+ finally
+ {
+ aborting = false;
+ System.Threading.Monitor.PulseAll(this);
if (infoStream != null)
{
Message("docWriter: done abort; abortedFiles=" + abortedFiles);
}
- }
- }
- }
-
- /// <summary>Reset after a flush </summary>
- private void DoAfterFlush()
- {
- // All ThreadStates should be idle when we are called
- System.Diagnostics.Debug.Assert(AllThreadsIdle());
- threadBindings.Clear();
- waitQueue.Reset();
- segment = null;
- numDocsInRAM = 0;
- nextDocID = 0;
- bufferIsFull = false;
- flushPending = false;
- for (int i = 0; i < threadStates.Length; i++)
- threadStates[i].DoAfterFlush();
- numBytesUsed = 0;
- }
-
- // Returns true if an abort is in progress
- internal bool PauseAllThreads()
- {
- lock (this)
- {
- pauseThreads++;
- while (!AllThreadsIdle())
- {
- System.Threading.Monitor.Wait(this);
- }
-
- return aborting;
- }
- }
-
- internal void ResumeAllThreads()
- {
- lock (this)
- {
- pauseThreads--;
- System.Diagnostics.Debug.Assert(pauseThreads >= 0);
- if (0 == pauseThreads)
- System.Threading.Monitor.PulseAll(this);
- }
- }
-
- private bool AllThreadsIdle()
- {
- lock (this)
- {
- for (int i = 0; i < threadStates.Length; i++)
- if (!threadStates[i].isIdle)
- return false;
- return true;
- }
- }
+ }
+ }
+ }
+
+ /// <summary>Reset after a flush </summary>
+ private void DoAfterFlush()
+ {
+ // All ThreadStates should be idle when we are called
+ System.Diagnostics.Debug.Assert(AllThreadsIdle());
+ threadBindings.Clear();
+ waitQueue.Reset();
+ segment = null;
+ numDocsInRAM = 0;
+ nextDocID = 0;
+ bufferIsFull = false;
+ flushPending = false;
+ for (int i = 0; i < threadStates.Length; i++)
+ threadStates[i].DoAfterFlush();
+ numBytesUsed = 0;
+ }
+
+ // Returns true if an abort is in progress
+ internal bool PauseAllThreads()
+ {
+ lock (this)
+ {
+ pauseThreads++;
+ while (!AllThreadsIdle())
+ {
+ System.Threading.Monitor.Wait(this);
+ }
+
+ return aborting;
+ }
+ }
+
+ internal void ResumeAllThreads()
+ {
+ lock (this)
+ {
+ pauseThreads--;
+ System.Diagnostics.Debug.Assert(pauseThreads >= 0);
+ if (0 == pauseThreads)
+ System.Threading.Monitor.PulseAll(this);
+ }
+ }
+
+ private bool AllThreadsIdle()
+ {
+ lock (this)
+ {
+ for (int i = 0; i < threadStates.Length; i++)
+ if (!threadStates[i].isIdle)
+ return false;
+ return true;
+ }
+ }
- internal bool AnyChanges
- {
- get
- {
- lock (this)
- {
- return numDocsInRAM != 0 || deletesInRAM.numTerms != 0 || deletesInRAM.docIDs.Count != 0 ||
- deletesInRAM.queries.Count != 0;
- }
- }
- }
+ internal bool AnyChanges
+ {
+ get
+ {
+ lock (this)
+ {
+ return numDocsInRAM != 0 || deletesInRAM.numTerms != 0 || deletesInRAM.docIDs.Count != 0 ||
+ deletesInRAM.queries.Count != 0;
+ }
+ }
+ }
- private void InitFlushState(bool onlyDocStore)
- {
- lock (this)
- {
- InitSegmentName(onlyDocStore);
- flushState = new SegmentWriteState(this, directory, segment, docStoreSegment, numDocsInRAM, numDocsInStore, writer.TermIndexInterval);
- }
- }
-
- /// <summary>Flush all pending docs to a new segment </summary>
- internal int Flush(bool closeDocStore)
- {
- lock (this)
- {
-
- System.Diagnostics.Debug.Assert(AllThreadsIdle());
-
- System.Diagnostics.Debug.Assert(numDocsInRAM > 0);
-
- System.Diagnostics.Debug.Assert(nextDocID == numDocsInRAM);
- System.Diagnostics.Debug.Assert(waitQueue.numWaiting == 0);
- System.Diagnostics.Debug.Assert(waitQueue.waitingBytes == 0);
-
- InitFlushState(false);
-
- docStoreOffset = numDocsInStore;
-
- if (infoStream != null)
- Message("flush postings as segment " + flushState.segmentName + " numDocs=" + numDocsInRAM);
-
- bool success = false;
-
- try
- {
-
- if (closeDocStore)
- {
- System.Diagnostics.Debug.Assert(flushState.docStoreSegmentName != null);
- System.Diagnostics.Debug.Assert(flushState.docStoreSegmentName.Equals(flushState.segmentName));
- CloseDocStore();
- flushState.numDocsInStore = 0;
- }
-
- ICollection<DocConsumerPerThread> threads = new HashSet<DocConsumerPerThread>();
- for (int i = 0; i < threadStates.Length; i++)
- threads.Add(threadStates[i].consumer);
- consumer.Flush(threads, flushState);
-
- if (infoStream != null)
- {
+ private void InitFlushState(bool onlyDocStore)
+ {
+ lock (this)
+ {
+ InitSegmentName(onlyDocStore);
+ flushState = new SegmentWriteState(this, directory, segment, docStoreSegment, numDocsInRAM, numDocsInStore, writer.TermIndexInterval);
+ }
+ }
+
+ /// <summary>Flush all pending docs to a new segment </summary>
+ internal int Flush(bool closeDocStore)
+ {
+ lock (this)
+ {
+
+ System.Diagnostics.Debug.Assert(AllThreadsIdle());
+
+ System.Diagnostics.Debug.Assert(numDocsInRAM > 0);
+
+ System.Diagnostics.Debug.Assert(nextDocID == numDocsInRAM);
+ System.Diagnostics.Debug.Assert(waitQueue.numWaiting == 0);
+ System.Diagnostics.Debug.Assert(waitQueue.waitingBytes == 0);
+
+ InitFlushState(false);
+
+ docStoreOffset = numDocsInStore;
+
+ if (infoStream != null)
+ Message("flush postings as segment " + flushState.segmentName + " numDocs=" + numDocsInRAM);
+
+ bool success = false;
+
+ try
+ {
+
+ if (closeDocStore)
+ {
+ System.Diagnostics.Debug.Assert(flushState.docStoreSegmentName != null);
+ System.Diagnostics.Debug.Assert(flushState.docStoreSegmentName.Equals(flushState.segmentName));
+ CloseDocStore();
+ flushState.numDocsInStore = 0;
+ }
+
+ ICollection<DocConsumerPerThread> threads = new HashSet<DocConsumerPerThread>();
+ for (int i = 0; i < threadStates.Length; i++)
+ threads.Add(threadStates[i].consumer);
+ consumer.Flush(threads, flushState);
+
+ if (infoStream != null)
+ {
SegmentInfo si = new SegmentInfo(flushState.segmentName, flushState.numDocs, directory);
long newSegmentSize = si.SizeInBytes();
System.String message = System.String.Format(nf, " oldRAMSize={0:d} newFlushedSize={1:d} docs/MB={2:f} new/old={3:%}",
new System.Object[] { numBytesUsed, newSegmentSize, (numDocsInRAM / (newSegmentSize / 1024.0 / 1024.0)), (100.0 * newSegmentSize / numBytesUsed) });
- Message(message);
- }
-
- flushedDocCount += flushState.numDocs;
-
- DoAfterFlush();
-
- success = true;
- }
- finally
- {
- if (!success)
- {
- Abort();
- }
- }
-
- System.Diagnostics.Debug.Assert(waitQueue.waitingBytes == 0);
-
- return flushState.numDocs;
- }
- }
+ Message(message);
+ }
+
+ flushedDocCount += flushState.numDocs;
+
+ DoAfterFlush();
+
+ success = true;
+ }
+ finally
+ {
+ if (!success)
+ {
+ Abort();
+ }
+ }
+
+ System.Diagnostics.Debug.Assert(waitQueue.waitingBytes == 0);
+
+ return flushState.numDocs;
+ }
+ }
internal ICollection<string> GetFlushedFiles()
{
return flushState.flushedFiles;
}
-
- /// <summary>Build compound file for the segment we just flushed </summary>
- internal void CreateCompoundFile(System.String segment)
- {
-
- CompoundFileWriter cfsWriter = new CompoundFileWriter(directory, segment + "." + IndexFileNames.COMPOUND_FILE_EXTENSION);
- foreach(string flushedFile in flushState.flushedFiles)
- {
+
+ /// <summary>Build compound file for the segment we just flushed </summary>
+ internal void CreateCompoundFile(System.String segment)
+ {
+
+ CompoundFileWriter cfsWriter = new CompoundFileWriter(directory, segment + "." + IndexFileNames.COMPOUND_FILE_EXTENSION);
+ foreach(string flushedFile in flushState.flushedFiles)
+ {
cfsWriter.AddFile(flushedFile);
- }
-
- // Perform the merge
- cfsWriter.Close();
- }
-
- /// <summary>Set flushPending if it is not already set and returns
- /// whether it was set. This is used by IndexWriter to
- /// trigger a single flush even when multiple threads are
- /// trying to do so.
- /// </summary>
- internal bool SetFlushPending()
- {
- lock (this)
- {
- if (flushPending)
- return false;
- else
- {
- flushPending = true;
- return true;
- }
- }
- }
-
- internal void ClearFlushPending()
- {
- lock (this)
- {
- flushPending = false;
- }
- }
-
- internal void PushDeletes()
- {
- lock (this)
- {
- deletesFlushed.Update(deletesInRAM);
- }
- }
-
+ }
+
+ // Perform the merge
+ cfsWriter.Close();
+ }
+
+ /// <summary>Set flushPending if it is not already set and returns
+ /// whether it was set. This is used by IndexWriter to
+ /// trigger a single flush even when multiple threads are
+ /// trying to do so.
+ /// </summary>
+ internal bool SetFlushPending()
+ {
+ lock (this)
+ {
+ if (flushPending)
+ return false;
+ else
+ {
+ flushPending = true;
+ return true;
+ }
+ }
+ }
+
+ internal void ClearFlushPending()
+ {
+ lock (this)
+ {
+ flushPending = false;
+ }
+ }
+
+ internal void PushDeletes()
+ {
+ lock (this)
+ {
+ deletesFlushed.Update(deletesInRAM);
+ }
+ }
+
public void Dispose()
{
// Move to protected method if class becomes unsealed
@@ -874,417 +874,417 @@ namespace Lucene.Net.Index
System.Threading.Monitor.PulseAll(this);
}
}
-
- internal void InitSegmentName(bool onlyDocStore)
- {
- lock (this)
- {
- if (segment == null && (!onlyDocStore || docStoreSegment == null))
- {
- segment = writer.NewSegmentName();
- System.Diagnostics.Debug.Assert(numDocsInRAM == 0);
- }
- if (docStoreSegment == null)
- {
- docStoreSegment = segment;
- System.Diagnostics.Debug.Assert(numDocsInStore == 0);
- }
- }
- }
-
- /// <summary>Returns a free (idle) ThreadState that may be used for
- /// indexing this one document. This call also pauses if a
- /// flush is pending. If delTerm is non-null then we
- /// buffer this deleted term after the thread state has
- /// been acquired.
- /// </summary>
- internal DocumentsWriterThreadState GetThreadState(Document doc, Term delTerm)
- {
- lock (this)
- {
-
- // First, find a thread state. If this thread already
- // has affinity to a specific ThreadState, use that one
- // again.
- DocumentsWriterThreadState state = threadBindings[ThreadClass.Current()];
- if (state == null)
- {
-
- // First time this thread has called us since last
- // flush. Find the least loaded thread state:
- DocumentsWriterThreadState minThreadState = null;
- for (int i = 0; i < threadStates.Length; i++)
- {
- DocumentsWriterThreadState ts = threadStates[i];
- if (minThreadState == null || ts.numThreads < minThreadState.numThreads)
- minThreadState = ts;
- }
- if (minThreadState != null && (minThreadState.numThreads == 0 || threadStates.Length >= MAX_THREAD_STATE))
- {
- state = minThreadState;
- state.numThreads++;
- }
- else
- {
- // Just create a new "private" thread state
- DocumentsWriterThreadState[] newArray = new DocumentsWriterThreadState[1 + threadStates.Length];
- if (threadStates.Length > 0)
- Array.Copy(threadStates, 0, newArray, 0, threadStates.Length);
- state = newArray[threadStates.Length] = new DocumentsWriterThreadState(this);
- threadStates = newArray;
- }
- threadBindings[ThreadClass.Current()] = state;
- }
-
- // Next, wait until my thread state is idle (in case
- // it's shared with other threads) and for threads to
- // not be paused nor a flush pending:
- WaitReady(state);
-
- // Allocate segment name if this is the first doc since
- // last flush:
- InitSegmentName(false);
-
- state.isIdle = false;
-
- bool success = false;
- try
- {
- state.docState.docID = nextDocID;
-
- System.Diagnostics.Debug.Assert(writer.TestPoint("DocumentsWriter.ThreadState.init start"));
-
- if (delTerm != null)
- {
- AddDeleteTerm(delTerm, state.docState.docID);
- state.doFlushAfter = TimeToFlushDeletes();
- }
-
- System.Diagnostics.Debug.Assert(writer.TestPoint("DocumentsWriter.ThreadState.init after delTerm"));
-
- nextDocID++;
- numDocsInRAM++;
-
- // We must at this point commit to flushing to ensure we
- // always get N docs when we flush by doc count, even if
- // > 1 thread is adding documents:
- if (!flushPending && maxBufferedDocs != IndexWriter.DISABLE_AUTO_FLUSH && numDocsInRAM >= maxBufferedDocs)
- {
- flushPending = true;
- state.doFlushAfter = true;
- }
-
- success = true;
- }
- finally
- {
- if (!success)
- {
- // Forcefully idle this ThreadState:
- state.isIdle = true;
- System.Threading.Monitor.PulseAll(this);
- if (state.doFlushAfter)
- {
- state.doFlushAfter = false;
- flushPending = false;
- }
- }
- }
-
- return state;
- }
- }
-
- /// <summary>Returns true if the caller (IndexWriter) should now
- /// flush.
- /// </summary>
- internal bool AddDocument(Document doc, Analyzer analyzer)
- {
- return UpdateDocument(doc, analyzer, null);
- }
-
- internal bool UpdateDocument(Term t, Document doc, Analyzer analyzer)
- {
- return UpdateDocument(doc, analyzer, t);
- }
-
- internal bool UpdateDocument(Document doc, Analyzer analyzer, Term delTerm)
- {
-
- // This call is synchronized but fast
- DocumentsWriterThreadState state = GetThreadState(doc, delTerm);
-
- DocState docState = state.docState;
- docState.doc = doc;
- docState.analyzer = analyzer;
-
- bool doReturnFalse = false; // {{Aroush-2.9}} to handle return from finally clause
-
- bool success = false;
- try
- {
- // This call is not synchronized and does all the
- // work
- DocWriter perDoc;
- try
+
+ internal void InitSegmentName(bool onlyDocStore)
+ {
+ lock (this)
+ {
+ if (segment == null && (!onlyDocStore || docStoreSegment == null))
{
- perDoc = state.consumer.ProcessDocument();
+ segment = writer.NewSegmentName();
+ System.Diagnostics.Debug.Assert(numDocsInRAM == 0);
}
- finally
+ if (docStoreSegment == null)
{
- docState.Clear();
+ docStoreSegment = segment;
+ System.Diagnostics.Debug.Assert(numDocsInStore == 0);
}
- // This call is synchronized but fast
- FinishDocument(state, perDoc);
- success = true;
- }
- finally
- {
- if (!success)
- {
- lock (this)
- {
-
- if (aborting)
- {
- state.isIdle = true;
- System.Threading.Monitor.PulseAll(this);
- Abort();
- }
- else
- {
- skipDocWriter.docID = docState.docID;
- bool success2 = false;
- try
- {
- waitQueue.Add(skipDocWriter);
- success2 = true;
- }
- finally
- {
- if (!success2)
- {
- state.isIdle = true;
- System.Threading.Monitor.PulseAll(this);
- Abort();
- // return false; // {{Aroush-2.9}} this 'return false' is move to outside finally
- doReturnFalse = true;
- }
- }
-
- if (!doReturnFalse) // {{Aroush-2.9}} added because of the above 'return false' removal
- {
- state.isIdle = true;
- System.Threading.Monitor.PulseAll(this);
-
- // If this thread state had decided to flush, we
- // must clear it so another thread can flush
- if (state.doFlushAfter)
- {
- state.doFlushAfter = false;
- flushPending = false;
- System.Threading.Monitor.PulseAll(this);
- }
-
- // Immediately mark this document as deleted
- // since likely it was partially added. This
- // keeps indexing as "all or none" (atomic) when
- // adding a document:
- AddDeleteDocID(state.docState.docID);
- }
- }
- }
- }
- }
-
- if (doReturnFalse) // {{Aroush-2.9}} see comment abouve
- {
- return false;
}
-
- return state.doFlushAfter || TimeToFlushDeletes();
- }
-
- // for testing
- internal int GetNumBufferedDeleteTerms()
- {
- lock (this)
- {
- return deletesInRAM.numTerms;
- }
- }
-
- // for testing
- internal IDictionary<Term, BufferedDeletes.Num> GetBufferedDeleteTerms()
- {
- lock (this)
- {
- return deletesInRAM.terms;
- }
- }
-
- /// <summary>Called whenever a merge has completed and the merged segments had deletions </summary>
- internal void RemapDeletes(SegmentInfos infos, int[][] docMaps, int[] delCounts, MergePolicy.OneMerge merge, int mergeDocCount)
- {
- lock (this)
- {
- if (docMaps == null)
- // The merged segments had no deletes so docIDs did not change and we have nothing to do
- return ;
- MergeDocIDRemapper mapper = new MergeDocIDRemapper(infos, docMaps, delCounts, merge, mergeDocCount);
- deletesInRAM.Remap(mapper, infos, docMaps, delCounts, merge, mergeDocCount);
- deletesFlushed.Remap(mapper, infos, docMaps, delCounts, merge, mergeDocCount);
- flushedDocCount -= mapper.docShift;
- }
- }
-
- private void WaitReady(DocumentsWriterThreadState state)
- {
- lock (this)
- {
-
- while (!closed && ((state != null && !state.isIdle) || pauseThreads != 0 || flushPending || aborting))
- {
- System.Threading.Monitor.Wait(this);
- }
-
- if (closed)
- throw new AlreadyClosedException("this IndexWriter is closed");
- }
- }
-
- internal bool BufferDeleteTerms(Term[] terms)
- {
- lock (this)
- {
- WaitReady(null);
- for (int i = 0; i < terms.Length; i++)
- AddDeleteTerm(terms[i], numDocsInRAM);
- return TimeToFlushDeletes();
- }
- }
-
- internal bool BufferDeleteTerm(Term term)
- {
- lock (this)
- {
- WaitReady(null);
- AddDeleteTerm(term, numDocsInRAM);
- return TimeToFlushDeletes();
- }
- }
-
- internal bool BufferDeleteQueries(Query[] queries)
- {
- lock (this)
- {
- WaitReady(null);
- for (int i = 0; i < queries.Length; i++)
- AddDeleteQuery(queries[i], numDocsInRAM);
- return TimeToFlushDeletes();
- }
- }
-
- internal bool BufferDeleteQuery(Query query)
- {
- lock (this)
- {
- WaitReady(null);
- AddDeleteQuery(query, numDocsInRAM);
- return TimeToFlushDeletes();
- }
- }
-
- internal bool DeletesFull()
- {
- lock (this)
- {
- return (ramBufferSize != IndexWriter.DISABLE_AUTO_FLUSH && (deletesInRAM.bytesUsed + deletesFlushed.bytesUsed + numBytesUsed) >= ramBufferSize) || (maxBufferedDeleteTerms != IndexWriter.DISABLE_AUTO_FLUSH && ((deletesInRAM.Size() + deletesFlushed.Size()) >= maxBufferedDeleteTerms));
- }
- }
-
- internal bool DoApplyDeletes()
- {
- lock (this)
- {
- // Very similar to deletesFull(), except we don't count
- // numBytesAlloc, because we are checking whether
- // deletes (alone) are consuming too many resources now
- // and thus should be applied. We apply deletes if RAM
- // usage is > 1/2 of our allowed RAM buffer, to prevent
- // too-frequent flushing of a long tail of tiny segments
- // when merges (which always apply deletes) are
- // infrequent.
- return (ramBufferSize != IndexWriter.DISABLE_AUTO_FLUSH && (deletesInRAM.bytesUsed + deletesFlushed.bytesUsed) >= ramBufferSize / 2) || (maxBufferedDeleteTerms != IndexWriter.DISABLE_AUTO_FLUSH && ((deletesInRAM.Size() + deletesFlushed.Size()) >= maxBufferedDeleteTerms));
- }
- }
-
- private bool TimeToFlushDeletes()
- {
- lock (this)
- {
- return (bufferIsFull || DeletesFull()) && SetFlushPending();
- }
- }
-
- internal int MaxBufferedDeleteTerms
- {
- set { this.maxBufferedDeleteTerms = value; }
- get { return maxBufferedDeleteTerms; }
- }
-
- internal bool HasDeletes()
- {
- lock (this)
- {
- return deletesFlushed.Any();
- }
- }
-
- internal bool ApplyDeletes(SegmentInfos infos)
- {
- lock (this)
- {
- if (!HasDeletes())
- return false;
-
- if (infoStream != null)
- Message("apply " + deletesFlushed.numTerms + " buffered deleted terms and " + deletesFlushed.docIDs.Count + " deleted docIDs and " + deletesFlushed.queries.Count + " deleted queries on " + (+ infos.Count) + " segments.");
-
- int infosEnd = infos.Count;
-
- int docStart = 0;
- bool any = false;
- for (int i = 0; i < infosEnd; i++)
- {
-
- // Make sure we never attempt to apply deletes to
- // segment in external dir
- System.Diagnostics.Debug.Assert(infos.Info(i).dir == directory);
-
- SegmentReader reader = writer.readerPool.Get(infos.Info(i), false);
- try
- {
- any |= ApplyDeletes(reader, docStart);
- docStart += reader.MaxDoc;
- }
- finally
- {
- writer.readerPool.Release(reader);
- }
- }
-
- deletesFlushed.Clear();
-
- return any;
- }
- }
-
- // used only by assert
- private Term lastDeleteTerm;
-
- // used only by assert
- private bool CheckDeleteTerm(Term term)
+ }
+
+ /// <summary>Returns a free (idle) ThreadState that may be used for
+ /// indexing this one document. This call also pauses if a
+ /// flush is pending. If delTerm is non-null then we
+ /// buffer this deleted term after the thread state has
+ /// been acquired.
+ /// </summary>
+ internal DocumentsWriterThreadState GetThreadState(Document doc, Term delTerm)
+ {
+ lock (this)
+ {
+
+ // First, find a thread state. If this thread already
+ // has affinity to a specific ThreadState, use that one
+ // again.
+ DocumentsWriterThreadState state = threadBindings[ThreadClass.Current()];
+ if (state == null)
+ {
+
+ // First time this thread has called us since last
+ // flush. Find the least loaded thread state:
+ DocumentsWriterThreadState minThreadState = null;
+ for (int i = 0; i < threadStates.Length; i++)
+ {
+ DocumentsWriterThreadState ts = threadStates[i];
+ if (minThreadState == null || ts.numThreads < minThreadState.numThreads)
+ minThreadState = ts;
+ }
+ if (minThreadState != null && (minThreadState.numThreads == 0 || threadStates.Length >= MAX_THREAD_STATE))
+ {
+ state = minThreadState;
+ state.numThreads++;
+ }
+ else
+ {
+ // Just create a new "private" thread state
+ DocumentsWriterThreadState[] newArray = new DocumentsWriterThreadState[1 + threadStates.Length];
+ if (threadStates.Length > 0)
+ Array.Copy(threadStates, 0, newArray, 0, threadStates.Length);
+ state = newArray[threadStates.Length] = new DocumentsWriterThreadState(this);
+ threadStates = newArray;
+ }
+ threadBindings[ThreadClass.Current()] = state;
+ }
+
+ // Next, wait until my thread state is idle (in case
+ // it's shared with other threads) and for threads to
+ // not be paused nor a flush pending:
+ WaitReady(state);
+
+ // Allocate segment name if this is the first doc since
+ // last flush:
+ InitSegmentName(false);
+
+ state.isIdle = false;
+
+ bool success = false;
+ try
+ {
+ state.docState.docID = nextDocID;
+
+ System.Diagnostics.Debug.Assert(writer.TestPoint("DocumentsWriter.ThreadState.init start"));
+
+ if (delTerm != null)
+ {
+ AddDeleteTerm(delTerm, state.docState.docID);
+ state.doFlushAfter = TimeToFlushDeletes();
+ }
+
+ System.Diagnostics.Debug.Assert(writer.TestPoint("DocumentsWriter.ThreadState.init after delTerm"));
+
+ nextDocID++;
+ numDocsInRAM++;
+
+ // We must at this point commit to flushing to ensure we
+ // always get N docs when we flush by doc count, even if
+ // > 1 thread is adding documents:
+ if (!flushPending && maxBufferedDocs != IndexWriter.DISABLE_AUTO_FLUSH && numDocsInRAM >= maxBufferedDocs)
+ {
+ flushPending = true;
+ state.doFlushAfter = true;
+ }
+
+ success = true;
+ }
+ finally
+ {
+ if (!success)
+ {
+ // Forcefully idle this ThreadState:
+ state.isIdle = true;
+ System.Threading.Monitor.PulseAll(this);
+ if (state.doFlushAfter)
+ {
+ state.doFlushAfter = false;
+ flushPending = false;
+ }
+ }
+ }
+
+ return state;
+ }
+ }
+
+ /// <summary>Returns true if the caller (IndexWriter) should now
+ /// flush.
+ /// </summary>
+ internal bool AddDocument(Document doc, Analyzer analyzer)
+ {
+ return UpdateDocument(doc, analyzer, null);
+ }
+
+ internal bool UpdateDocument(Term t, Document doc, Analyzer analyzer)
+ {
+ return UpdateDocument(doc, analyzer, t);
+ }
+
+ internal bool UpdateDocument(Document doc, Analyzer analyzer, Term delTerm)
+ {
+
+ // This call is synchronized but fast
+ DocumentsWriterThreadState state = GetThreadState(doc, delTerm);
+
+ DocState docState = state.docState;
+ docState.doc = doc;
+ docState.analyzer = analyzer;
+
+ bool doReturnFalse = false; // {{Aroush-2.9}} to handle return from finally clause
+
+ bool success = false;
+ try
+ {
+ // This call is not synchronized and does all the
+ // work
+ DocWriter perDoc;
+ try
+ {
+ perDoc = state.consumer.ProcessDocument();
+ }
+ finally
+ {
+ docState.Clear();
+ }
+ // This call is synchronized but fast
+ FinishDocument(state, perDoc);
+ success = true;
+ }
+ finally
+ {
+ if (!success)
+ {
+ lock (this)
+ {
+
+ if (aborting)
+ {
+ state.isIdle = true;
+ System.Threading.Monitor.PulseAll(this);
+ Abort();
+ }
+ else
+ {
+ skipDocWriter.docID = docState.docID;
+ bool success2 = false;
+ try
+ {
+ waitQueue.Add(skipDocWriter);
+ success2 = true;
+ }
+ finally
+ {
+ if (!success2)
+ {
+ state.isIdle = true;
+ System.Threading.Monitor.PulseAll(this);
+ Abort();
+ // return false; // {{Aroush-2.9}} this 'return false' is move to outside finally
+ doReturnFalse = true;
+ }
+ }
+
+ if (!doReturnFalse) // {{Aroush-2.9}} added because of the above 'return false' removal
+ {
+ state.isIdle = true;
+ System.Threading.Monitor.PulseAll(this);
+
+ // If this thread state had decided to flush, we
+ // must clear it so another thread can flush
+ if (state.doFlushAfter)
+ {
+ state.doFlushAfter = false;
+ flushPending = false;
+ System.Threading.Monitor.PulseAll(this);
+ }
+
+ // Immediately mark this document as deleted
+ // since likely it was partially added. This
+ // keeps indexing as "all or none" (atomic) when
+ // adding a document:
+ AddDeleteDocID(state.docState.docID);
+ }
+ }
+ }
+ }
+ }
+
+ if (doReturnFalse) // {{Aroush-2.9}} see comment abouve
+ {
+ return false;
+ }
+
+ return state.doFlushAfter || TimeToFlushDeletes();
+ }
+
+ // for testing
+ internal int GetNumBufferedDeleteTerms()
+ {
+ lock (this)
+ {
+ return deletesInRAM.numTerms;
+ }
+ }
+
+ // for testing
+ internal IDictionary<Term, BufferedDeletes.Num> GetBufferedDeleteTerms()
+ {
+ lock (this)
+ {
+ return deletesInRAM.terms;
+ }
+ }
+
+ /// <summary>Called whenever a merge has completed and the merged segments had deletions </summary>
+ internal void RemapDeletes(SegmentInfos infos, int[][] docMaps, int[] delCounts, MergePolicy.OneMerge merge, int mergeDocCount)
+ {
+ lock (this)
+ {
+ if (docMaps == null)
+ // The merged segments had no deletes so docIDs did not change and we have nothing to do
+ return ;
+ MergeDocIDRemapper mapper = new MergeDocIDRemapper(infos, docMaps, delCounts, merge, mergeDocCount);
+ deletesInRAM.Remap(mapper, infos, docMaps, delCounts, merge, mergeDocCount);
+ deletesFlushed.Remap(mapper, infos, docMaps, delCounts, merge, mergeDocCount);
+ flushedDocCount -= mapper.docShift;
+ }
+ }
+
+ private void WaitReady(DocumentsWriterThreadState state)
+ {
+ lock (this)
+ {
+
+ while (!closed && ((state != null && !state.isIdle) || pauseThreads != 0 || flushPending || aborting))
+ {
+ System.Threading.Monitor.Wait(this);
+ }
+
+ if (closed)
+ throw new AlreadyClosedException("this IndexWriter is closed");
+ }
+ }
+
+ internal bool BufferDeleteTerms(Term[] terms)
+ {
+ lock (this)
+ {
+ WaitReady(null);
+ for (int i = 0; i < terms.Length; i++)
+ AddDeleteTerm(terms[i], numDocsInRAM);
+ return TimeToFlushDeletes();
+ }
+ }
+
+ internal bool BufferDeleteTerm(Term term)
+ {
+ lock (this)
+ {
+ WaitReady(null);
+ AddDeleteTerm(term, numDocsInRAM);
+ return TimeToFlushDeletes();
+ }
+ }
+
+ internal bool BufferDeleteQueries(Query[] queries)
+ {
+ lock (this)
+ {
+ WaitReady(null);
+ for (int i = 0; i < queries.Length; i++)
+ AddDeleteQuery(queries[i], numDocsInRAM);
+ return TimeToFlushDeletes();
+ }
+ }
+
+ internal bool BufferDeleteQuery(Query query)
+ {
+ lock (this)
+ {
+ WaitReady(null);
+ AddDeleteQuery(query, numDocsInRAM);
+ return TimeToFlushDeletes();
+ }
+ }
+
+ internal bool DeletesFull()
+ {
+ lock (this)
+ {
+ return (ramBufferSize != IndexWriter.DISABLE_AUTO_FLUSH && (deletesInRAM.bytesUsed + deletesFlushed.bytesUsed + numBytesUsed) >= ramBufferSize) || (maxBufferedDeleteTerms != IndexWriter.DISABLE_AUTO_FLUSH && ((deletesInRAM.Size() + deletesFlushed.Size()) >= maxBufferedDeleteTerms));
+ }
+ }
+
+ internal bool DoApplyDeletes()
+ {
+ lock (this)
+ {
+ // Very similar to deletesFull(), except we don't count
+ // numBytesAlloc, because we are checking whether
+ // deletes (alone) are consuming too many resources now
+ // and thus should be applied. We apply deletes if RAM
+ // usage is > 1/2 of our allowed RAM buffer, to prevent
+ // too-frequent flushing of a long tail of tiny segments
+ // when merges (which always apply deletes) are
+ // infrequent.
+ return (ramBufferSize != IndexWriter.DISABLE_AUTO_FLUSH && (deletesInRAM.bytesUsed + deletesFlushed.bytesUsed) >= ramBufferSize / 2) || (maxBufferedDeleteTerms != IndexWriter.DISABLE_AUTO_FLUSH && ((deletesInRAM.Size() + deletesFlushed.Size()) >= maxBufferedDeleteTerms));
+ }
+ }
+
+ private bool TimeToFlushDeletes()
+ {
+ lock (this)
+ {
+ return (bufferIsFull || DeletesFull()) && SetFlushPending();
+ }
+ }
+
+ internal int MaxBufferedDeleteTerms
+ {
+ set { this.maxBufferedDeleteTerms = value; }
+ get { return maxBufferedDeleteTerms; }
+ }
+
+ internal bool HasDeletes()
+ {
+ lock (this)
+ {
+ return deletesFlushed.Any();
+ }
+ }
+
+ internal bool ApplyDeletes(SegmentInfos infos)
+ {
+ lock (this)
+ {
+ if (!HasDeletes())
+ return false;
+
+ if (infoStream != null)
+ Message("apply " + deletesFlushed.numTerms + " buffered deleted terms and " + deletesFlushed.docIDs.Count + " deleted docIDs and " + deletesFlushed.queries.Count + " deleted queries on " + (+ infos.Count) + " segments.");
+
+ int infosEnd = infos.Count;
+
+ int docStart = 0;
+ bool any = false;
+ for (int i = 0; i < infosEnd; i++)
+ {
+
+ // Make sure we never attempt to apply deletes to
+ // segment in external dir
+ System.Diagnostics.Debug.Assert(infos.Info(i).dir == directory);
+
+ SegmentReader reader = writer.readerPool.Get(infos.Info(i), false);
+ try
+ {
+ any |= ApplyDeletes(reader, docStart);
+ docStart += reader.MaxDoc;
+ }
+ finally
+ {
+ writer.readerPool.Release(reader);
+ }
+ }
+
+ deletesFlushed.Clear();
+
+ return any;
+ }
+ }
+
+ // used only by assert
+ private Term lastDeleteTerm;
+
+ // used only by assert
+ private bool CheckDeleteTerm(Term term)
{
if (term != null) {
System.Diagnostics.Debug.Assert(lastDeleteTerm == null || term.CompareTo(lastDeleteTerm) > 0, "lastTerm=" + lastDeleteTerm + " vs term=" + term);
@@ -1292,330 +1292,330 @@ namespace Lucene.Net.Index
lastDeleteTerm = term;
return true;
}
-
- // Apply buffered delete terms, queries and docIDs to the
- // provided reader
- private bool ApplyDeletes(IndexReader reader, int docIDStart)
- {
- lock (this)
- {
- int docEnd = docIDStart + reader.MaxDoc;
- bool any = false;
-
+
+ // Apply buffered delete terms, queries and docIDs to the
+ // provided reader
+ private bool ApplyDeletes(IndexReader reader, int docIDStart)
+ {
+ lock (this)
+ {
+ int docEnd = docIDStart + reader.MaxDoc;
+ bool any = false;
+
System.Diagnostics.Debug.Assert(CheckDeleteTerm(null));
- // Delete by term
- TermDocs docs = reader.TermDocs();
- try
- {
- foreach(KeyValuePair<Term, BufferedDeletes.Num> entry in deletesFlushed.terms)
- {
- Term term = entry.Key;
- // LUCENE-2086: we should be iterating a TreeMap,
+ // Delete by term
+ TermDocs docs = reader.TermDocs();
+ try
+ {
+ foreach(KeyValuePair<Term, BufferedDeletes.Num> entry in deletesFlushed.terms)
+ {
+ Term term = entry.Key;
+ // LUCENE-2086: we should be iterating a TreeMap,
// here, so terms better be in order:
System.Diagnostics.Debug.Assert(CheckDeleteTerm(term));
- docs.Seek(term);
- int limit = entry.Value.GetNum();
- while (docs.Next())
- {
- int docID = docs.Doc;
- if (docIDStart + docID >= limit)
- break;
- reader.DeleteDocument(docID);
- any = true;
- }
- }
- }
- finally
- {
- docs.Close();
- }
-
- // Delete by docID
- foreach(int docIdInt in deletesFlushed.docIDs)
- {
- int docID = docIdInt;
- if (docID >= docIDStart && docID < docEnd)
- {
- reader.DeleteDocument(docID - docIDStart);
- any = true;
- }
- }
-
- // Delete by query
- IndexSearcher searcher = new IndexSearcher(reader);
- foreach(KeyValuePair<Query, int> entry in deletesFlushed.queries)
- {
- Query query = (Query) entry.Key;
- int limit = (int)entry.Value;
- Weight weight = query.Weight(searcher);
- Scorer scorer = weight.Scorer(reader, true, false);
- if (scorer != null)
- {
- while (true)
- {
- int doc = scorer.NextDoc();
- if (((long) docIDStart) + doc >= limit)
- break;
- reader.DeleteDocument(doc);
- any = true;
- }
- }
- }
- searcher.Close();
- return any;
- }
- }
-
- // Buffer a term in bufferedDeleteTerms, which records the
- // current number of documents buffered in ram so that the
- // delete term will be applied to those documents as well
- // as the disk segments.
- private void AddDeleteTerm(Term term, int docCount)
- {
- lock (this)
- {
- BufferedDeletes.Num num = deletesInRAM.terms[term];
- int docIDUpto = flushedDocCount + docCount;
- if (num == null)
- deletesInRAM.terms[term] = new BufferedDeletes.Num(docIDUpto);
- else
- num.SetNum(docIDUpto);
- deletesInRAM.numTerms++;
-
- deletesInRAM.AddBytesUsed(BYTES_PER_DEL_TERM + term.Text.Length * CHAR_NUM_BYTE);
- }
- }
-
- // Buffer a specific docID for deletion. Currently only
- // used when we hit a exception when adding a document
- private void AddDeleteDocID(int docID)
- {
- lock (this)
- {
- deletesInRAM.docIDs.Add(flushedDocCount + docID);
+ docs.Seek(term);
+ int limit = entry.Value.GetNum();
+ while (docs.Next())
+ {
+ int docID = docs.Doc;
+ if (docIDStart + docID >= limit)
+ break;
+ reader.DeleteDocument(docID);
+ any = true;
+ }
+ }
+ }
+ finally
+ {
+ docs.Close();
+ }
+
+ // Delete by docID
+ foreach(int docIdInt in deletesFlushed.docIDs)
+ {
+ int docID = docIdInt;
+ if (docID >= docIDStart && docID < docEnd)
+ {
+ reader.DeleteDocument(docID - docIDStart);
+ any = true;
+ }
+ }
+
+ // Delete by query
+ IndexSearcher searcher = new IndexSearcher(reader);
+ foreach(KeyValuePair<Query, int> entry in deletesFlushed.queries)
+ {
+ Query query = (Query) entry.Key;
+ int limit = (int)entry.Value;
+ Weight weight = query.Weight(searcher);
+ Scorer scorer = weight.Scorer(reader, true, false);
+ if (scorer != null)
+ {
+ while (true)
+ {
+ int doc = scorer.NextDoc();
+ if (((long) docIDStart) + doc >= limit)
+ break;
+ reader.DeleteDocument(doc);
+ any = true;
+ }
+ }
+ }
+ searcher.Close();
+ return any;
+ }
+ }
+
+ // Buffer a term in bufferedDeleteTerms, which records the
+ // current number of documents buffered in ram so that the
+ // delete term will be applied to those documents as well
+ // as the disk segments.
+ private void AddDeleteTerm(Term term, int docCount)
+ {
+ lock (this)
+ {
+ BufferedDeletes.Num num = deletesInRAM.terms[term];
+ int docIDUpto = flushedDocCount + docCount;
+ if (num == null)
+ deletesInRAM.terms[term] = new BufferedDeletes.Num(docIDUpto);
+ else
+ num.SetNum(docIDUpto);
+ deletesInRAM.numTerms++;
+
+ deletesInRAM.AddBytesUsed(BYTES_PER_DEL_TERM + term.Text.Length * CHAR_NUM_BYTE);
+ }
+ }
+
+ // Buffer a specific docID for deletion. Currently only
+ // used when we hit a exception when adding a document
+ private void AddDeleteDocID(int docID)
+ {
+ lock (this)
+ {
+ deletesInRAM.docIDs.Add(flushedDocCount + docID);
deletesInRAM.AddBytesUsed(BYTES_PER_DEL_DOCID);
- }
- }
-
- private void AddDeleteQuery(Query query, int docID)
- {
- lock (this)
- {
- deletesInRAM.queries[query] = flushedDocCount + docID;
- deletesInRAM.AddBytesUsed(BYTES_PER_DEL_QUERY);
- }
- }
-
- internal bool DoBalanceRAM()
- {
- lock (this)
- {
- return ramBufferSize != IndexWriter.DISABLE_AUTO_FLUSH && !bufferIsFull && (numBytesUsed + deletesInRAM.bytesUsed + deletesFlushed.bytesUsed >= ramBufferSize || numBytesAlloc >= freeTrigger);
- }
- }
-
- /// <summary>Does the synchronized work to finish/flush the
- /// inverted document.
- /// </summary>
- private void FinishDocument(DocumentsWriterThreadState perThread, DocWriter docWriter)
- {
-
- if (DoBalanceRAM())
- // Must call this w/o holding synchronized(this) else
- // we'll hit deadlock:
- BalanceRAM();
-
- lock (this)
- {
-
- System.Diagnostics.Debug.Assert(docWriter == null || docWriter.docID == perThread.docState.docID);
-
- if (aborting)
- {
-
- // We are currently aborting, and another thread is
- // waiting for me to become idle. We just forcefully
- // idle this threadState; it will be fully reset by
- // abort()
- if (docWriter != null)
- try
- {
- docWriter.Abort();
- }
- catch (System.Exception)
- {
- }
-
- perThread.isIdle = true;
- System.Threading.Monitor.PulseAll(this);
- return ;
- }
-
- bool doPause;
-
- if (docWriter != null)
- doPause = waitQueue.Add(docWriter);
- else
- {
- skipDocWriter.docID = perThread.docState.docID;
- doPause = waitQueue.Add(skipDocWriter);
- }
-
- if (doPause)
- WaitForWaitQueue();
-
- if (bufferIsFull && !flushPending)
- {
- flushPending = true;
- perThread.doFlushAfter = true;
- }
-
- perThread.isIdle = true;
- System.Threading.Monitor.PulseAll(this);
- }
- }
-
- internal void WaitForWaitQueue()
- {
- lock (this)
- {
- do
- {
- System.Threading.Monitor.Wait(this);
- }
- while (!waitQueue.DoResume());
- }
- }
-
- internal class S
<TRUNCATED>