You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by cc...@apache.org on 2013/04/03 19:39:55 UTC
[12/51] [partial] Mass convert mixed tabs to spaces
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/62f018ab/src/core/Index/FormatPostingsDocsWriter.cs
----------------------------------------------------------------------
diff --git a/src/core/Index/FormatPostingsDocsWriter.cs b/src/core/Index/FormatPostingsDocsWriter.cs
index 82a7398..a0d2d5f 100644
--- a/src/core/Index/FormatPostingsDocsWriter.cs
+++ b/src/core/Index/FormatPostingsDocsWriter.cs
@@ -24,105 +24,105 @@ namespace Lucene.Net.Index
/// <summary>Consumes doc and freq, writing them using the current
/// index file format
/// </summary>
- sealed class FormatPostingsDocsWriter : FormatPostingsDocsConsumer, IDisposable
- {
-
- internal IndexOutput out_Renamed;
- internal FormatPostingsTermsWriter parent;
- internal FormatPostingsPositionsWriter posWriter;
- internal DefaultSkipListWriter skipListWriter;
- internal int skipInterval;
- internal int totalNumDocs;
-
- internal bool omitTermFreqAndPositions;
- internal bool storePayloads;
- internal long freqStart;
- internal FieldInfo fieldInfo;
-
- internal FormatPostingsDocsWriter(SegmentWriteState state, FormatPostingsTermsWriter parent):base()
- {
- this.parent = parent;
- System.String fileName = IndexFileNames.SegmentFileName(parent.parent.segment, IndexFileNames.FREQ_EXTENSION);
- state.flushedFiles.Add(fileName);
- out_Renamed = parent.parent.dir.CreateOutput(fileName);
- totalNumDocs = parent.parent.totalNumDocs;
-
- // TODO: abstraction violation
- skipInterval = parent.parent.termsOut.skipInterval;
- skipListWriter = parent.parent.skipListWriter;
- skipListWriter.SetFreqOutput(out_Renamed);
-
- posWriter = new FormatPostingsPositionsWriter(state, this);
- }
-
- internal void SetField(FieldInfo fieldInfo)
- {
- this.fieldInfo = fieldInfo;
- omitTermFreqAndPositions = fieldInfo.omitTermFreqAndPositions;
- storePayloads = fieldInfo.storePayloads;
- posWriter.SetField(fieldInfo);
- }
-
- internal int lastDocID;
- internal int df;
-
- /// <summary>Adds a new doc in this term. If this returns null
- /// then we just skip consuming positions/payloads.
- /// </summary>
- internal override FormatPostingsPositionsConsumer AddDoc(int docID, int termDocFreq)
- {
-
- int delta = docID - lastDocID;
-
- if (docID < 0 || (df > 0 && delta <= 0))
- throw new CorruptIndexException("docs out of order (" + docID + " <= " + lastDocID + " )");
-
- if ((++df % skipInterval) == 0)
- {
- // TODO: abstraction violation
- skipListWriter.SetSkipData(lastDocID, storePayloads, posWriter.lastPayloadLength);
- skipListWriter.BufferSkip(df);
- }
-
- System.Diagnostics.Debug.Assert(docID < totalNumDocs, "docID=" + docID + " totalNumDocs=" + totalNumDocs);
-
- lastDocID = docID;
- if (omitTermFreqAndPositions)
- out_Renamed.WriteVInt(delta);
- else if (1 == termDocFreq)
- out_Renamed.WriteVInt((delta << 1) | 1);
- else
- {
- out_Renamed.WriteVInt(delta << 1);
- out_Renamed.WriteVInt(termDocFreq);
- }
-
- return posWriter;
- }
-
- private TermInfo termInfo = new TermInfo(); // minimize consing
- internal UnicodeUtil.UTF8Result utf8 = new UnicodeUtil.UTF8Result();
-
- /// <summary>Called when we are done adding docs to this term </summary>
- internal override void Finish()
- {
- long skipPointer = skipListWriter.WriteSkip(out_Renamed);
-
- // TODO: this is abstraction violation -- we should not
- // peek up into parents terms encoding format
- termInfo.Set(df, parent.freqStart, parent.proxStart, (int) (skipPointer - parent.freqStart));
-
- // TODO: we could do this incrementally
- UnicodeUtil.UTF16toUTF8(parent.currentTerm, parent.currentTermStart, utf8);
-
- if (df > 0)
- {
- parent.termsOut.Add(fieldInfo.number, utf8.result, utf8.length, termInfo);
- }
-
- lastDocID = 0;
- df = 0;
- }
+ sealed class FormatPostingsDocsWriter : FormatPostingsDocsConsumer, IDisposable
+ {
+
+ internal IndexOutput out_Renamed;
+ internal FormatPostingsTermsWriter parent;
+ internal FormatPostingsPositionsWriter posWriter;
+ internal DefaultSkipListWriter skipListWriter;
+ internal int skipInterval;
+ internal int totalNumDocs;
+
+ internal bool omitTermFreqAndPositions;
+ internal bool storePayloads;
+ internal long freqStart;
+ internal FieldInfo fieldInfo;
+
+ internal FormatPostingsDocsWriter(SegmentWriteState state, FormatPostingsTermsWriter parent):base()
+ {
+ this.parent = parent;
+ System.String fileName = IndexFileNames.SegmentFileName(parent.parent.segment, IndexFileNames.FREQ_EXTENSION);
+ state.flushedFiles.Add(fileName);
+ out_Renamed = parent.parent.dir.CreateOutput(fileName);
+ totalNumDocs = parent.parent.totalNumDocs;
+
+ // TODO: abstraction violation
+ skipInterval = parent.parent.termsOut.skipInterval;
+ skipListWriter = parent.parent.skipListWriter;
+ skipListWriter.SetFreqOutput(out_Renamed);
+
+ posWriter = new FormatPostingsPositionsWriter(state, this);
+ }
+
+ internal void SetField(FieldInfo fieldInfo)
+ {
+ this.fieldInfo = fieldInfo;
+ omitTermFreqAndPositions = fieldInfo.omitTermFreqAndPositions;
+ storePayloads = fieldInfo.storePayloads;
+ posWriter.SetField(fieldInfo);
+ }
+
+ internal int lastDocID;
+ internal int df;
+
+ /// <summary>Adds a new doc in this term. If this returns null
+ /// then we just skip consuming positions/payloads.
+ /// </summary>
+ internal override FormatPostingsPositionsConsumer AddDoc(int docID, int termDocFreq)
+ {
+
+ int delta = docID - lastDocID;
+
+ if (docID < 0 || (df > 0 && delta <= 0))
+ throw new CorruptIndexException("docs out of order (" + docID + " <= " + lastDocID + " )");
+
+ if ((++df % skipInterval) == 0)
+ {
+ // TODO: abstraction violation
+ skipListWriter.SetSkipData(lastDocID, storePayloads, posWriter.lastPayloadLength);
+ skipListWriter.BufferSkip(df);
+ }
+
+ System.Diagnostics.Debug.Assert(docID < totalNumDocs, "docID=" + docID + " totalNumDocs=" + totalNumDocs);
+
+ lastDocID = docID;
+ if (omitTermFreqAndPositions)
+ out_Renamed.WriteVInt(delta);
+ else if (1 == termDocFreq)
+ out_Renamed.WriteVInt((delta << 1) | 1);
+ else
+ {
+ out_Renamed.WriteVInt(delta << 1);
+ out_Renamed.WriteVInt(termDocFreq);
+ }
+
+ return posWriter;
+ }
+
+ private TermInfo termInfo = new TermInfo(); // minimize consing
+ internal UnicodeUtil.UTF8Result utf8 = new UnicodeUtil.UTF8Result();
+
+ /// <summary>Called when we are done adding docs to this term </summary>
+ internal override void Finish()
+ {
+ long skipPointer = skipListWriter.WriteSkip(out_Renamed);
+
+ // TODO: this is abstraction violation -- we should not
+ // peek up into parents terms encoding format
+ termInfo.Set(df, parent.freqStart, parent.proxStart, (int) (skipPointer - parent.freqStart));
+
+ // TODO: we could do this incrementally
+ UnicodeUtil.UTF16toUTF8(parent.currentTerm, parent.currentTermStart, utf8);
+
+ if (df > 0)
+ {
+ parent.termsOut.Add(fieldInfo.number, utf8.result, utf8.length, termInfo);
+ }
+
+ lastDocID = 0;
+ df = 0;
+ }
public void Dispose()
{
@@ -130,5 +130,5 @@ namespace Lucene.Net.Index
out_Renamed.Dispose();
posWriter.Dispose();
}
- }
+ }
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/62f018ab/src/core/Index/FormatPostingsFieldsConsumer.cs
----------------------------------------------------------------------
diff --git a/src/core/Index/FormatPostingsFieldsConsumer.cs b/src/core/Index/FormatPostingsFieldsConsumer.cs
index a3f86ec..1808a33 100644
--- a/src/core/Index/FormatPostingsFieldsConsumer.cs
+++ b/src/core/Index/FormatPostingsFieldsConsumer.cs
@@ -19,21 +19,21 @@ using System;
namespace Lucene.Net.Index
{
-
- /// <summary>Abstract API that consumes terms, doc, freq, prox and
- /// payloads postings. Concrete implementations of this
- /// actually do "something" with the postings (write it into
- /// the index in a specific format).
- ///
- /// NOTE: this API is experimental and will likely change
- /// </summary>
- abstract class FormatPostingsFieldsConsumer
- {
-
- /// <summary>Add a new field </summary>
- internal abstract FormatPostingsTermsConsumer AddField(FieldInfo field);
-
- /// <summary>Called when we are done adding everything. </summary>
- internal abstract void Finish();
- }
+
+ /// <summary>Abstract API that consumes terms, doc, freq, prox and
+ /// payloads postings. Concrete implementations of this
+ /// actually do "something" with the postings (write it into
+ /// the index in a specific format).
+ ///
+ /// NOTE: this API is experimental and will likely change
+ /// </summary>
+ abstract class FormatPostingsFieldsConsumer
+ {
+
+ /// <summary>Add a new field </summary>
+ internal abstract FormatPostingsTermsConsumer AddField(FieldInfo field);
+
+ /// <summary>Called when we are done adding everything. </summary>
+ internal abstract void Finish();
+ }
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/62f018ab/src/core/Index/FormatPostingsFieldsWriter.cs
----------------------------------------------------------------------
diff --git a/src/core/Index/FormatPostingsFieldsWriter.cs b/src/core/Index/FormatPostingsFieldsWriter.cs
index 40ef619..577cacf 100644
--- a/src/core/Index/FormatPostingsFieldsWriter.cs
+++ b/src/core/Index/FormatPostingsFieldsWriter.cs
@@ -21,51 +21,51 @@ using Directory = Lucene.Net.Store.Directory;
namespace Lucene.Net.Index
{
-
- sealed class FormatPostingsFieldsWriter:FormatPostingsFieldsConsumer
- {
-
- internal Directory dir;
- internal System.String segment;
- internal TermInfosWriter termsOut;
- internal FieldInfos fieldInfos;
- internal FormatPostingsTermsWriter termsWriter;
- internal DefaultSkipListWriter skipListWriter;
- internal int totalNumDocs;
-
- public FormatPostingsFieldsWriter(SegmentWriteState state, FieldInfos fieldInfos):base()
- {
-
- dir = state.directory;
- segment = state.segmentName;
- totalNumDocs = state.numDocs;
- this.fieldInfos = fieldInfos;
- termsOut = new TermInfosWriter(dir, segment, fieldInfos, state.termIndexInterval);
-
- // TODO: this is a nasty abstraction violation (that we
- // peek down to find freqOut/proxOut) -- we need a
- // better abstraction here whereby these child consumers
- // can provide skip data or not
- skipListWriter = new DefaultSkipListWriter(termsOut.skipInterval, termsOut.maxSkipLevels, totalNumDocs, null, null);
-
- state.flushedFiles.Add(state.SegmentFileName(IndexFileNames.TERMS_EXTENSION));
- state.flushedFiles.Add(state.SegmentFileName(IndexFileNames.TERMS_INDEX_EXTENSION));
-
- termsWriter = new FormatPostingsTermsWriter(state, this);
- }
-
- /// <summary>Add a new field </summary>
- internal override FormatPostingsTermsConsumer AddField(FieldInfo field)
- {
- termsWriter.SetField(field);
- return termsWriter;
- }
-
- /// <summary>Called when we are done adding everything. </summary>
- internal override void Finish()
- {
- termsOut.Dispose();
- termsWriter.Dispose();
- }
- }
+
+ sealed class FormatPostingsFieldsWriter:FormatPostingsFieldsConsumer
+ {
+
+ internal Directory dir;
+ internal System.String segment;
+ internal TermInfosWriter termsOut;
+ internal FieldInfos fieldInfos;
+ internal FormatPostingsTermsWriter termsWriter;
+ internal DefaultSkipListWriter skipListWriter;
+ internal int totalNumDocs;
+
+ public FormatPostingsFieldsWriter(SegmentWriteState state, FieldInfos fieldInfos):base()
+ {
+
+ dir = state.directory;
+ segment = state.segmentName;
+ totalNumDocs = state.numDocs;
+ this.fieldInfos = fieldInfos;
+ termsOut = new TermInfosWriter(dir, segment, fieldInfos, state.termIndexInterval);
+
+ // TODO: this is a nasty abstraction violation (that we
+ // peek down to find freqOut/proxOut) -- we need a
+ // better abstraction here whereby these child consumers
+ // can provide skip data or not
+ skipListWriter = new DefaultSkipListWriter(termsOut.skipInterval, termsOut.maxSkipLevels, totalNumDocs, null, null);
+
+ state.flushedFiles.Add(state.SegmentFileName(IndexFileNames.TERMS_EXTENSION));
+ state.flushedFiles.Add(state.SegmentFileName(IndexFileNames.TERMS_INDEX_EXTENSION));
+
+ termsWriter = new FormatPostingsTermsWriter(state, this);
+ }
+
+ /// <summary>Add a new field </summary>
+ internal override FormatPostingsTermsConsumer AddField(FieldInfo field)
+ {
+ termsWriter.SetField(field);
+ return termsWriter;
+ }
+
+ /// <summary>Called when we are done adding everything. </summary>
+ internal override void Finish()
+ {
+ termsOut.Dispose();
+ termsWriter.Dispose();
+ }
+ }
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/62f018ab/src/core/Index/FormatPostingsPositionsConsumer.cs
----------------------------------------------------------------------
diff --git a/src/core/Index/FormatPostingsPositionsConsumer.cs b/src/core/Index/FormatPostingsPositionsConsumer.cs
index f5bc440..2821ef4 100644
--- a/src/core/Index/FormatPostingsPositionsConsumer.cs
+++ b/src/core/Index/FormatPostingsPositionsConsumer.cs
@@ -17,16 +17,16 @@
namespace Lucene.Net.Index
{
-
- abstract class FormatPostingsPositionsConsumer
- {
-
- /// <summary>Add a new position & payload. If payloadLength > 0
- /// you must read those bytes from the IndexInput.
- /// </summary>
- internal abstract void AddPosition(int position, byte[] payload, int payloadOffset, int payloadLength);
-
- /// <summary>Called when we are done adding positions & payloads </summary>
- internal abstract void Finish();
- }
+
+ abstract class FormatPostingsPositionsConsumer
+ {
+
+ /// <summary>Add a new position & payload. If payloadLength > 0
+ /// you must read those bytes from the IndexInput.
+ /// </summary>
+ internal abstract void AddPosition(int position, byte[] payload, int payloadOffset, int payloadLength);
+
+ /// <summary>Called when we are done adding positions & payloads </summary>
+ internal abstract void Finish();
+ }
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/62f018ab/src/core/Index/FormatPostingsPositionsWriter.cs
----------------------------------------------------------------------
diff --git a/src/core/Index/FormatPostingsPositionsWriter.cs b/src/core/Index/FormatPostingsPositionsWriter.cs
index 8b70fcc..fc6e1e2 100644
--- a/src/core/Index/FormatPostingsPositionsWriter.cs
+++ b/src/core/Index/FormatPostingsPositionsWriter.cs
@@ -22,80 +22,80 @@ using IndexOutput = Lucene.Net.Store.IndexOutput;
namespace Lucene.Net.Index
{
-
- sealed class FormatPostingsPositionsWriter:FormatPostingsPositionsConsumer
- {
- internal FormatPostingsDocsWriter parent;
- internal IndexOutput out_Renamed;
-
- internal bool omitTermFreqAndPositions;
- internal bool storePayloads;
- internal int lastPayloadLength = - 1;
-
- internal FormatPostingsPositionsWriter(SegmentWriteState state, FormatPostingsDocsWriter parent)
- {
- this.parent = parent;
- omitTermFreqAndPositions = parent.omitTermFreqAndPositions;
- if (parent.parent.parent.fieldInfos.HasProx())
- {
- // At least one field does not omit TF, so create the
- // prox file
- System.String fileName = IndexFileNames.SegmentFileName(parent.parent.parent.segment, IndexFileNames.PROX_EXTENSION);
- state.flushedFiles.Add(fileName);
- out_Renamed = parent.parent.parent.dir.CreateOutput(fileName);
- parent.skipListWriter.SetProxOutput(out_Renamed);
- }
- // Every field omits TF so we will write no prox file
- else
- out_Renamed = null;
- }
-
- internal int lastPosition;
-
- /// <summary>Add a new position & payload </summary>
- internal override void AddPosition(int position, byte[] payload, int payloadOffset, int payloadLength)
- {
- System.Diagnostics.Debug.Assert(!omitTermFreqAndPositions, "omitTermFreqAndPositions is true");
- System.Diagnostics.Debug.Assert(out_Renamed != null);
-
- int delta = position - lastPosition;
- lastPosition = position;
-
- if (storePayloads)
- {
- if (payloadLength != lastPayloadLength)
- {
- lastPayloadLength = payloadLength;
- out_Renamed.WriteVInt((delta << 1) | 1);
- out_Renamed.WriteVInt(payloadLength);
- }
- else
- out_Renamed.WriteVInt(delta << 1);
- if (payloadLength > 0)
- out_Renamed.WriteBytes(payload, payloadLength);
- }
- else
- out_Renamed.WriteVInt(delta);
- }
-
- internal void SetField(FieldInfo fieldInfo)
- {
- omitTermFreqAndPositions = fieldInfo.omitTermFreqAndPositions;
- storePayloads = omitTermFreqAndPositions?false:fieldInfo.storePayloads;
- }
-
- /// <summary>Called when we are done adding positions & payloads </summary>
- internal override void Finish()
- {
- lastPosition = 0;
- lastPayloadLength = - 1;
- }
-
+
+ sealed class FormatPostingsPositionsWriter:FormatPostingsPositionsConsumer
+ {
+ internal FormatPostingsDocsWriter parent;
+ internal IndexOutput out_Renamed;
+
+ internal bool omitTermFreqAndPositions;
+ internal bool storePayloads;
+ internal int lastPayloadLength = - 1;
+
+ internal FormatPostingsPositionsWriter(SegmentWriteState state, FormatPostingsDocsWriter parent)
+ {
+ this.parent = parent;
+ omitTermFreqAndPositions = parent.omitTermFreqAndPositions;
+ if (parent.parent.parent.fieldInfos.HasProx())
+ {
+ // At least one field does not omit TF, so create the
+ // prox file
+ System.String fileName = IndexFileNames.SegmentFileName(parent.parent.parent.segment, IndexFileNames.PROX_EXTENSION);
+ state.flushedFiles.Add(fileName);
+ out_Renamed = parent.parent.parent.dir.CreateOutput(fileName);
+ parent.skipListWriter.SetProxOutput(out_Renamed);
+ }
+ // Every field omits TF so we will write no prox file
+ else
+ out_Renamed = null;
+ }
+
+ internal int lastPosition;
+
+ /// <summary>Add a new position & payload </summary>
+ internal override void AddPosition(int position, byte[] payload, int payloadOffset, int payloadLength)
+ {
+ System.Diagnostics.Debug.Assert(!omitTermFreqAndPositions, "omitTermFreqAndPositions is true");
+ System.Diagnostics.Debug.Assert(out_Renamed != null);
+
+ int delta = position - lastPosition;
+ lastPosition = position;
+
+ if (storePayloads)
+ {
+ if (payloadLength != lastPayloadLength)
+ {
+ lastPayloadLength = payloadLength;
+ out_Renamed.WriteVInt((delta << 1) | 1);
+ out_Renamed.WriteVInt(payloadLength);
+ }
+ else
+ out_Renamed.WriteVInt(delta << 1);
+ if (payloadLength > 0)
+ out_Renamed.WriteBytes(payload, payloadLength);
+ }
+ else
+ out_Renamed.WriteVInt(delta);
+ }
+
+ internal void SetField(FieldInfo fieldInfo)
+ {
+ omitTermFreqAndPositions = fieldInfo.omitTermFreqAndPositions;
+ storePayloads = omitTermFreqAndPositions?false:fieldInfo.storePayloads;
+ }
+
+ /// <summary>Called when we are done adding positions & payloads </summary>
+ internal override void Finish()
+ {
+ lastPosition = 0;
+ lastPayloadLength = - 1;
+ }
+
public void Dispose()
{
// Move to protected method if class becomes unsealed
if (out_Renamed != null)
out_Renamed.Close();
}
- }
+ }
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/62f018ab/src/core/Index/FormatPostingsTermsConsumer.cs
----------------------------------------------------------------------
diff --git a/src/core/Index/FormatPostingsTermsConsumer.cs b/src/core/Index/FormatPostingsTermsConsumer.cs
index 637ecff..eb26223 100644
--- a/src/core/Index/FormatPostingsTermsConsumer.cs
+++ b/src/core/Index/FormatPostingsTermsConsumer.cs
@@ -21,32 +21,32 @@ using ArrayUtil = Lucene.Net.Util.ArrayUtil;
namespace Lucene.Net.Index
{
-
- /// <summary> NOTE: this API is experimental and will likely change</summary>
-
- abstract class FormatPostingsTermsConsumer
- {
-
- /// <summary>Adds a new term in this field; term ends with U+FFFF
- /// char
- /// </summary>
- internal abstract FormatPostingsDocsConsumer AddTerm(char[] text, int start);
-
- internal char[] termBuffer;
- internal virtual FormatPostingsDocsConsumer AddTerm(System.String text)
- {
- int len = text.Length;
- if (termBuffer == null || termBuffer.Length < 1 + len)
- termBuffer = new char[ArrayUtil.GetNextSize(1 + len)];
- for (int i = 0; i < len; i++)
- {
- termBuffer[i] = (char) text[i];
- }
- termBuffer[len] = (char) (0xffff);
- return AddTerm(termBuffer, 0);
- }
-
- /// <summary>Called when we are done adding terms to this field </summary>
- internal abstract void Finish();
- }
+
+ /// <summary> NOTE: this API is experimental and will likely change</summary>
+
+ abstract class FormatPostingsTermsConsumer
+ {
+
+ /// <summary>Adds a new term in this field; term ends with U+FFFF
+ /// char
+ /// </summary>
+ internal abstract FormatPostingsDocsConsumer AddTerm(char[] text, int start);
+
+ internal char[] termBuffer;
+ internal virtual FormatPostingsDocsConsumer AddTerm(System.String text)
+ {
+ int len = text.Length;
+ if (termBuffer == null || termBuffer.Length < 1 + len)
+ termBuffer = new char[ArrayUtil.GetNextSize(1 + len)];
+ for (int i = 0; i < len; i++)
+ {
+ termBuffer[i] = (char) text[i];
+ }
+ termBuffer[len] = (char) (0xffff);
+ return AddTerm(termBuffer, 0);
+ }
+
+ /// <summary>Called when we are done adding terms to this field </summary>
+ internal abstract void Finish();
+ }
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/62f018ab/src/core/Index/FormatPostingsTermsWriter.cs
----------------------------------------------------------------------
diff --git a/src/core/Index/FormatPostingsTermsWriter.cs b/src/core/Index/FormatPostingsTermsWriter.cs
index 87d2026..7f3126c 100644
--- a/src/core/Index/FormatPostingsTermsWriter.cs
+++ b/src/core/Index/FormatPostingsTermsWriter.cs
@@ -19,59 +19,59 @@ using System;
namespace Lucene.Net.Index
{
-
- sealed class FormatPostingsTermsWriter : FormatPostingsTermsConsumer, IDisposable
- {
- internal FormatPostingsFieldsWriter parent;
- internal FormatPostingsDocsWriter docsWriter;
- internal TermInfosWriter termsOut;
- internal FieldInfo fieldInfo;
-
- internal FormatPostingsTermsWriter(SegmentWriteState state, FormatPostingsFieldsWriter parent):base()
- {
- this.parent = parent;
- termsOut = parent.termsOut;
- docsWriter = new FormatPostingsDocsWriter(state, this);
- }
-
- internal void SetField(FieldInfo fieldInfo)
- {
- this.fieldInfo = fieldInfo;
- docsWriter.SetField(fieldInfo);
- }
-
- internal char[] currentTerm;
- internal int currentTermStart;
-
- internal long freqStart;
- internal long proxStart;
-
- /// <summary>Adds a new term in this field </summary>
- internal override FormatPostingsDocsConsumer AddTerm(char[] text, int start)
- {
- currentTerm = text;
- currentTermStart = start;
-
- // TODO: this is abstraction violation -- ideally this
- // terms writer is not so "invasive", looking for file
- // pointers in its child consumers.
- freqStart = docsWriter.out_Renamed.FilePointer;
- if (docsWriter.posWriter.out_Renamed != null)
- proxStart = docsWriter.posWriter.out_Renamed.FilePointer;
-
- parent.skipListWriter.ResetSkip();
-
- return docsWriter;
- }
-
- /// <summary>Called when we are done adding terms to this field </summary>
- internal override void Finish()
- {
- }
-
+
+ sealed class FormatPostingsTermsWriter : FormatPostingsTermsConsumer, IDisposable
+ {
+ internal FormatPostingsFieldsWriter parent;
+ internal FormatPostingsDocsWriter docsWriter;
+ internal TermInfosWriter termsOut;
+ internal FieldInfo fieldInfo;
+
+ internal FormatPostingsTermsWriter(SegmentWriteState state, FormatPostingsFieldsWriter parent):base()
+ {
+ this.parent = parent;
+ termsOut = parent.termsOut;
+ docsWriter = new FormatPostingsDocsWriter(state, this);
+ }
+
+ internal void SetField(FieldInfo fieldInfo)
+ {
+ this.fieldInfo = fieldInfo;
+ docsWriter.SetField(fieldInfo);
+ }
+
+ internal char[] currentTerm;
+ internal int currentTermStart;
+
+ internal long freqStart;
+ internal long proxStart;
+
+ /// <summary>Adds a new term in this field </summary>
+ internal override FormatPostingsDocsConsumer AddTerm(char[] text, int start)
+ {
+ currentTerm = text;
+ currentTermStart = start;
+
+ // TODO: this is abstraction violation -- ideally this
+ // terms writer is not so "invasive", looking for file
+ // pointers in its child consumers.
+ freqStart = docsWriter.out_Renamed.FilePointer;
+ if (docsWriter.posWriter.out_Renamed != null)
+ proxStart = docsWriter.posWriter.out_Renamed.FilePointer;
+
+ parent.skipListWriter.ResetSkip();
+
+ return docsWriter;
+ }
+
+ /// <summary>Called when we are done adding terms to this field </summary>
+ internal override void Finish()
+ {
+ }
+
public void Dispose()
{
docsWriter.Dispose();
}
- }
+ }
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/62f018ab/src/core/Index/FreqProxFieldMergeState.cs
----------------------------------------------------------------------
diff --git a/src/core/Index/FreqProxFieldMergeState.cs b/src/core/Index/FreqProxFieldMergeState.cs
index 5306918..c3bd35f 100644
--- a/src/core/Index/FreqProxFieldMergeState.cs
+++ b/src/core/Index/FreqProxFieldMergeState.cs
@@ -20,98 +20,98 @@ using Lucene.Net.Support;
namespace Lucene.Net.Index
{
-
- // TODO FI: some of this is "generic" to TermsHash* so we
- // should factor it out so other consumers don't have to
- // duplicate this code
-
- /// <summary>Used by DocumentsWriter to merge the postings from
- /// multiple ThreadStates when creating a segment
- /// </summary>
- sealed class FreqProxFieldMergeState
- {
-
- internal FreqProxTermsWriterPerField field;
- internal int numPostings;
- internal CharBlockPool charPool;
- internal RawPostingList[] postings;
-
- private FreqProxTermsWriter.PostingList p;
- internal char[] text;
- internal int textOffset;
-
- private int postingUpto = - 1;
-
- internal ByteSliceReader freq = new ByteSliceReader();
- internal ByteSliceReader prox = new ByteSliceReader();
-
- internal int docID;
- internal int termFreq;
-
- public FreqProxFieldMergeState(FreqProxTermsWriterPerField field)
- {
- this.field = field;
- this.charPool = field.perThread.termsHashPerThread.charPool;
- this.numPostings = field.termsHashPerField.numPostings;
- this.postings = field.termsHashPerField.SortPostings();
- }
-
- internal bool NextTerm()
- {
- postingUpto++;
- if (postingUpto == numPostings)
- return false;
-
- p = (FreqProxTermsWriter.PostingList) postings[postingUpto];
- docID = 0;
-
- text = charPool.buffers[p.textStart >> DocumentsWriter.CHAR_BLOCK_SHIFT];
- textOffset = p.textStart & DocumentsWriter.CHAR_BLOCK_MASK;
-
- field.termsHashPerField.InitReader(freq, p, 0);
- if (!field.fieldInfo.omitTermFreqAndPositions)
- field.termsHashPerField.InitReader(prox, p, 1);
-
- // Should always be true
- bool result = NextDoc();
- System.Diagnostics.Debug.Assert(result);
-
- return true;
- }
-
- public bool NextDoc()
- {
- if (freq.Eof())
- {
- if (p.lastDocCode != - 1)
- {
- // Return last doc
- docID = p.lastDocID;
- if (!field.omitTermFreqAndPositions)
- termFreq = p.docFreq;
- p.lastDocCode = - 1;
- return true;
- }
- // EOF
- else
- return false;
- }
-
- int code = freq.ReadVInt();
- if (field.omitTermFreqAndPositions)
- docID += code;
- else
- {
- docID += Number.URShift(code, 1);
- if ((code & 1) != 0)
- termFreq = 1;
- else
- termFreq = freq.ReadVInt();
- }
-
- System.Diagnostics.Debug.Assert(docID != p.lastDocID);
-
- return true;
- }
- }
+
+ // TODO FI: some of this is "generic" to TermsHash* so we
+ // should factor it out so other consumers don't have to
+ // duplicate this code
+
+ /// <summary>Used by DocumentsWriter to merge the postings from
+ /// multiple ThreadStates when creating a segment
+ /// </summary>
+ sealed class FreqProxFieldMergeState
+ {
+
+ internal FreqProxTermsWriterPerField field;
+ internal int numPostings;
+ internal CharBlockPool charPool;
+ internal RawPostingList[] postings;
+
+ private FreqProxTermsWriter.PostingList p;
+ internal char[] text;
+ internal int textOffset;
+
+ private int postingUpto = - 1;
+
+ internal ByteSliceReader freq = new ByteSliceReader();
+ internal ByteSliceReader prox = new ByteSliceReader();
+
+ internal int docID;
+ internal int termFreq;
+
+ public FreqProxFieldMergeState(FreqProxTermsWriterPerField field)
+ {
+ this.field = field;
+ this.charPool = field.perThread.termsHashPerThread.charPool;
+ this.numPostings = field.termsHashPerField.numPostings;
+ this.postings = field.termsHashPerField.SortPostings();
+ }
+
+ internal bool NextTerm()
+ {
+ postingUpto++;
+ if (postingUpto == numPostings)
+ return false;
+
+ p = (FreqProxTermsWriter.PostingList) postings[postingUpto];
+ docID = 0;
+
+ text = charPool.buffers[p.textStart >> DocumentsWriter.CHAR_BLOCK_SHIFT];
+ textOffset = p.textStart & DocumentsWriter.CHAR_BLOCK_MASK;
+
+ field.termsHashPerField.InitReader(freq, p, 0);
+ if (!field.fieldInfo.omitTermFreqAndPositions)
+ field.termsHashPerField.InitReader(prox, p, 1);
+
+ // Should always be true
+ bool result = NextDoc();
+ System.Diagnostics.Debug.Assert(result);
+
+ return true;
+ }
+
+ public bool NextDoc()
+ {
+ if (freq.Eof())
+ {
+ if (p.lastDocCode != - 1)
+ {
+ // Return last doc
+ docID = p.lastDocID;
+ if (!field.omitTermFreqAndPositions)
+ termFreq = p.docFreq;
+ p.lastDocCode = - 1;
+ return true;
+ }
+ // EOF
+ else
+ return false;
+ }
+
+ int code = freq.ReadVInt();
+ if (field.omitTermFreqAndPositions)
+ docID += code;
+ else
+ {
+ docID += Number.URShift(code, 1);
+ if ((code & 1) != 0)
+ termFreq = 1;
+ else
+ termFreq = freq.ReadVInt();
+ }
+
+ System.Diagnostics.Debug.Assert(docID != p.lastDocID);
+
+ return true;
+ }
+ }
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/62f018ab/src/core/Index/FreqProxTermsWriter.cs
----------------------------------------------------------------------
diff --git a/src/core/Index/FreqProxTermsWriter.cs b/src/core/Index/FreqProxTermsWriter.cs
index f98d646..a289a47 100644
--- a/src/core/Index/FreqProxTermsWriter.cs
+++ b/src/core/Index/FreqProxTermsWriter.cs
@@ -21,283 +21,283 @@ using UnicodeUtil = Lucene.Net.Util.UnicodeUtil;
namespace Lucene.Net.Index
{
- sealed class FreqProxTermsWriter : TermsHashConsumer
- {
- public override TermsHashConsumerPerThread AddThread(TermsHashPerThread perThread)
- {
- return new FreqProxTermsWriterPerThread(perThread);
- }
-
- internal override void CreatePostings(RawPostingList[] postings, int start, int count)
- {
- int end = start + count;
- for (int i = start; i < end; i++)
- postings[i] = new PostingList();
- }
-
- private static int compareText(char[] text1, int pos1, char[] text2, int pos2)
- {
- while (true)
- {
- char c1 = text1[pos1++];
- char c2 = text2[pos2++];
- if (c1 != c2)
- {
- if (0xffff == c2)
- return 1;
- else if (0xffff == c1)
- return - 1;
- else
- return c1 - c2;
- }
- else if (0xffff == c1)
- return 0;
- }
- }
-
- internal override void CloseDocStore(SegmentWriteState state)
- {
- }
- public override void Abort()
- {
- }
-
-
- // TODO: would be nice to factor out more of this, eg the
- // FreqProxFieldMergeState, and code to visit all Fields
- // under the same FieldInfo together, up into TermsHash*.
- // Other writers would presumably share alot of this...
+ sealed class FreqProxTermsWriter : TermsHashConsumer
+ {
+ public override TermsHashConsumerPerThread AddThread(TermsHashPerThread perThread)
+ {
+ return new FreqProxTermsWriterPerThread(perThread);
+ }
+
+ internal override void CreatePostings(RawPostingList[] postings, int start, int count)
+ {
+ int end = start + count;
+ for (int i = start; i < end; i++)
+ postings[i] = new PostingList();
+ }
+
+ private static int compareText(char[] text1, int pos1, char[] text2, int pos2)
+ {
+ while (true)
+ {
+ char c1 = text1[pos1++];
+ char c2 = text2[pos2++];
+ if (c1 != c2)
+ {
+ if (0xffff == c2)
+ return 1;
+ else if (0xffff == c1)
+ return - 1;
+ else
+ return c1 - c2;
+ }
+ else if (0xffff == c1)
+ return 0;
+ }
+ }
+
+ internal override void CloseDocStore(SegmentWriteState state)
+ {
+ }
+ public override void Abort()
+ {
+ }
+
+
+ // TODO: would be nice to factor out more of this, eg the
+ // FreqProxFieldMergeState, and code to visit all Fields
+ // under the same FieldInfo together, up into TermsHash*.
+ // Other writers would presumably share alot of this...
public override void Flush(IDictionary<TermsHashConsumerPerThread, ICollection<TermsHashConsumerPerField>> threadsAndFields, SegmentWriteState state)
- {
-
- // Gather all FieldData's that have postings, across all
- // ThreadStates
- var allFields = new List<FreqProxTermsWriterPerField>();
+ {
+
+ // Gather all FieldData's that have postings, across all
+ // ThreadStates
+ var allFields = new List<FreqProxTermsWriterPerField>();
foreach(var entry in threadsAndFields)
- {
- var fields = entry.Value;
-
- foreach(var i in fields)
- {
- FreqProxTermsWriterPerField perField = (FreqProxTermsWriterPerField)i;
- if (perField.termsHashPerField.numPostings > 0)
- allFields.Add(perField);
- }
- }
-
- // Sort by field name
+ {
+ var fields = entry.Value;
+
+ foreach(var i in fields)
+ {
+ FreqProxTermsWriterPerField perField = (FreqProxTermsWriterPerField)i;
+ if (perField.termsHashPerField.numPostings > 0)
+ allFields.Add(perField);
+ }
+ }
+
+ // Sort by field name
allFields.Sort();
- int numAllFields = allFields.Count;
-
- // TODO: allow Lucene user to customize this consumer:
- FormatPostingsFieldsConsumer consumer = new FormatPostingsFieldsWriter(state, fieldInfos);
- /*
- Current writer chain:
- FormatPostingsFieldsConsumer
- -> IMPL: FormatPostingsFieldsWriter
- -> FormatPostingsTermsConsumer
- -> IMPL: FormatPostingsTermsWriter
- -> FormatPostingsDocConsumer
- -> IMPL: FormatPostingsDocWriter
- -> FormatPostingsPositionsConsumer
- -> IMPL: FormatPostingsPositionsWriter
- */
-
- int start = 0;
- while (start < numAllFields)
- {
- FieldInfo fieldInfo = allFields[start].fieldInfo;
- System.String fieldName = fieldInfo.name;
-
- int end = start + 1;
- while (end < numAllFields && allFields[end].fieldInfo.name.Equals(fieldName))
- end++;
-
- FreqProxTermsWriterPerField[] fields = new FreqProxTermsWriterPerField[end - start];
- for (int i = start; i < end; i++)
- {
- fields[i - start] = allFields[i];
-
- // Aggregate the storePayload as seen by the same
- // field across multiple threads
- fieldInfo.storePayloads |= fields[i - start].hasPayloads;
- }
-
- // If this field has postings then add them to the
- // segment
- AppendPostings(fields, consumer);
-
- for (int i = 0; i < fields.Length; i++)
- {
- TermsHashPerField perField = fields[i].termsHashPerField;
- int numPostings = perField.numPostings;
- perField.Reset();
- perField.ShrinkHash(numPostings);
- fields[i].Reset();
- }
-
- start = end;
- }
+ int numAllFields = allFields.Count;
+
+ // TODO: allow Lucene user to customize this consumer:
+ FormatPostingsFieldsConsumer consumer = new FormatPostingsFieldsWriter(state, fieldInfos);
+ /*
+ Current writer chain:
+ FormatPostingsFieldsConsumer
+ -> IMPL: FormatPostingsFieldsWriter
+ -> FormatPostingsTermsConsumer
+ -> IMPL: FormatPostingsTermsWriter
+ -> FormatPostingsDocConsumer
+ -> IMPL: FormatPostingsDocWriter
+ -> FormatPostingsPositionsConsumer
+ -> IMPL: FormatPostingsPositionsWriter
+ */
+
+ int start = 0;
+ while (start < numAllFields)
+ {
+ FieldInfo fieldInfo = allFields[start].fieldInfo;
+ System.String fieldName = fieldInfo.name;
+
+ int end = start + 1;
+ while (end < numAllFields && allFields[end].fieldInfo.name.Equals(fieldName))
+ end++;
+
+ FreqProxTermsWriterPerField[] fields = new FreqProxTermsWriterPerField[end - start];
+ for (int i = start; i < end; i++)
+ {
+ fields[i - start] = allFields[i];
+
+ // Aggregate the storePayload as seen by the same
+ // field across multiple threads
+ fieldInfo.storePayloads |= fields[i - start].hasPayloads;
+ }
+
+ // If this field has postings then add them to the
+ // segment
+ AppendPostings(fields, consumer);
+
+ for (int i = 0; i < fields.Length; i++)
+ {
+ TermsHashPerField perField = fields[i].termsHashPerField;
+ int numPostings = perField.numPostings;
+ perField.Reset();
+ perField.ShrinkHash(numPostings);
+ fields[i].Reset();
+ }
+
+ start = end;
+ }
foreach(var entry in threadsAndFields)
- {
- FreqProxTermsWriterPerThread perThread = (FreqProxTermsWriterPerThread) entry.Key;
- perThread.termsHashPerThread.Reset(true);
- }
-
- consumer.Finish();
- }
-
- private byte[] payloadBuffer;
-
- /* Walk through all unique text tokens (Posting
- * instances) found in this field and serialize them
- * into a single RAM segment. */
- internal void AppendPostings(FreqProxTermsWriterPerField[] fields, FormatPostingsFieldsConsumer consumer)
- {
-
- int numFields = fields.Length;
-
- FreqProxFieldMergeState[] mergeStates = new FreqProxFieldMergeState[numFields];
-
- for (int i = 0; i < numFields; i++)
- {
- FreqProxFieldMergeState fms = mergeStates[i] = new FreqProxFieldMergeState(fields[i]);
-
- System.Diagnostics.Debug.Assert(fms.field.fieldInfo == fields [0].fieldInfo);
-
- // Should always be true
- bool result = fms.NextTerm();
- System.Diagnostics.Debug.Assert(result);
- }
-
- FormatPostingsTermsConsumer termsConsumer = consumer.AddField(fields[0].fieldInfo);
-
- FreqProxFieldMergeState[] termStates = new FreqProxFieldMergeState[numFields];
-
- bool currentFieldOmitTermFreqAndPositions = fields[0].fieldInfo.omitTermFreqAndPositions;
-
- while (numFields > 0)
- {
-
- // Get the next term to merge
- termStates[0] = mergeStates[0];
- int numToMerge = 1;
-
- for (int i = 1; i < numFields; i++)
- {
- char[] text = mergeStates[i].text;
- int textOffset = mergeStates[i].textOffset;
- int cmp = compareText(text, textOffset, termStates[0].text, termStates[0].textOffset);
-
- if (cmp < 0)
- {
- termStates[0] = mergeStates[i];
- numToMerge = 1;
- }
- else if (cmp == 0)
- termStates[numToMerge++] = mergeStates[i];
- }
-
- FormatPostingsDocsConsumer docConsumer = termsConsumer.AddTerm(termStates[0].text, termStates[0].textOffset);
-
- // Now termStates has numToMerge FieldMergeStates
- // which all share the same term. Now we must
- // interleave the docID streams.
- while (numToMerge > 0)
- {
-
- FreqProxFieldMergeState minState = termStates[0];
- for (int i = 1; i < numToMerge; i++)
- if (termStates[i].docID < minState.docID)
- minState = termStates[i];
-
- int termDocFreq = minState.termFreq;
-
- FormatPostingsPositionsConsumer posConsumer = docConsumer.AddDoc(minState.docID, termDocFreq);
-
- ByteSliceReader prox = minState.prox;
-
- // Carefully copy over the prox + payload info,
- // changing the format to match Lucene's segment
- // format.
- if (!currentFieldOmitTermFreqAndPositions)
- {
- // omitTermFreqAndPositions == false so we do write positions &
- // payload
- int position = 0;
- for (int j = 0; j < termDocFreq; j++)
- {
- int code = prox.ReadVInt();
- position += (code >> 1);
-
- int payloadLength;
- if ((code & 1) != 0)
- {
- // This position has a payload
- payloadLength = prox.ReadVInt();
-
- if (payloadBuffer == null || payloadBuffer.Length < payloadLength)
- payloadBuffer = new byte[payloadLength];
-
- prox.ReadBytes(payloadBuffer, 0, payloadLength);
- }
- else
- payloadLength = 0;
-
- posConsumer.AddPosition(position, payloadBuffer, 0, payloadLength);
- } //End for
-
- posConsumer.Finish();
- }
-
- if (!minState.NextDoc())
- {
-
- // Remove from termStates
- int upto = 0;
- for (int i = 0; i < numToMerge; i++)
- if (termStates[i] != minState)
- termStates[upto++] = termStates[i];
- numToMerge--;
- System.Diagnostics.Debug.Assert(upto == numToMerge);
-
- // Advance this state to the next term
-
- if (!minState.NextTerm())
- {
- // OK, no more terms, so remove from mergeStates
- // as well
- upto = 0;
- for (int i = 0; i < numFields; i++)
- if (mergeStates[i] != minState)
- mergeStates[upto++] = mergeStates[i];
- numFields--;
- System.Diagnostics.Debug.Assert(upto == numFields);
- }
- }
- }
-
- docConsumer.Finish();
- }
-
- termsConsumer.Finish();
- }
+ {
+ FreqProxTermsWriterPerThread perThread = (FreqProxTermsWriterPerThread) entry.Key;
+ perThread.termsHashPerThread.Reset(true);
+ }
+
+ consumer.Finish();
+ }
+
+ private byte[] payloadBuffer;
+
+ /* Walk through all unique text tokens (Posting
+ * instances) found in this field and serialize them
+ * into a single RAM segment. */
+ internal void AppendPostings(FreqProxTermsWriterPerField[] fields, FormatPostingsFieldsConsumer consumer)
+ {
+
+ int numFields = fields.Length;
+
+ FreqProxFieldMergeState[] mergeStates = new FreqProxFieldMergeState[numFields];
+
+ for (int i = 0; i < numFields; i++)
+ {
+ FreqProxFieldMergeState fms = mergeStates[i] = new FreqProxFieldMergeState(fields[i]);
+
+ System.Diagnostics.Debug.Assert(fms.field.fieldInfo == fields [0].fieldInfo);
+
+ // Should always be true
+ bool result = fms.NextTerm();
+ System.Diagnostics.Debug.Assert(result);
+ }
+
+ FormatPostingsTermsConsumer termsConsumer = consumer.AddField(fields[0].fieldInfo);
+
+ FreqProxFieldMergeState[] termStates = new FreqProxFieldMergeState[numFields];
+
+ bool currentFieldOmitTermFreqAndPositions = fields[0].fieldInfo.omitTermFreqAndPositions;
+
+ while (numFields > 0)
+ {
+
+ // Get the next term to merge
+ termStates[0] = mergeStates[0];
+ int numToMerge = 1;
+
+ for (int i = 1; i < numFields; i++)
+ {
+ char[] text = mergeStates[i].text;
+ int textOffset = mergeStates[i].textOffset;
+ int cmp = compareText(text, textOffset, termStates[0].text, termStates[0].textOffset);
+
+ if (cmp < 0)
+ {
+ termStates[0] = mergeStates[i];
+ numToMerge = 1;
+ }
+ else if (cmp == 0)
+ termStates[numToMerge++] = mergeStates[i];
+ }
+
+ FormatPostingsDocsConsumer docConsumer = termsConsumer.AddTerm(termStates[0].text, termStates[0].textOffset);
+
+ // Now termStates has numToMerge FieldMergeStates
+ // which all share the same term. Now we must
+ // interleave the docID streams.
+ while (numToMerge > 0)
+ {
+
+ FreqProxFieldMergeState minState = termStates[0];
+ for (int i = 1; i < numToMerge; i++)
+ if (termStates[i].docID < minState.docID)
+ minState = termStates[i];
+
+ int termDocFreq = minState.termFreq;
+
+ FormatPostingsPositionsConsumer posConsumer = docConsumer.AddDoc(minState.docID, termDocFreq);
+
+ ByteSliceReader prox = minState.prox;
+
+ // Carefully copy over the prox + payload info,
+ // changing the format to match Lucene's segment
+ // format.
+ if (!currentFieldOmitTermFreqAndPositions)
+ {
+ // omitTermFreqAndPositions == false so we do write positions &
+ // payload
+ int position = 0;
+ for (int j = 0; j < termDocFreq; j++)
+ {
+ int code = prox.ReadVInt();
+ position += (code >> 1);
+
+ int payloadLength;
+ if ((code & 1) != 0)
+ {
+ // This position has a payload
+ payloadLength = prox.ReadVInt();
+
+ if (payloadBuffer == null || payloadBuffer.Length < payloadLength)
+ payloadBuffer = new byte[payloadLength];
+
+ prox.ReadBytes(payloadBuffer, 0, payloadLength);
+ }
+ else
+ payloadLength = 0;
+
+ posConsumer.AddPosition(position, payloadBuffer, 0, payloadLength);
+ } //End for
+
+ posConsumer.Finish();
+ }
+
+ if (!minState.NextDoc())
+ {
+
+ // Remove from termStates
+ int upto = 0;
+ for (int i = 0; i < numToMerge; i++)
+ if (termStates[i] != minState)
+ termStates[upto++] = termStates[i];
+ numToMerge--;
+ System.Diagnostics.Debug.Assert(upto == numToMerge);
+
+ // Advance this state to the next term
+
+ if (!minState.NextTerm())
+ {
+ // OK, no more terms, so remove from mergeStates
+ // as well
+ upto = 0;
+ for (int i = 0; i < numFields; i++)
+ if (mergeStates[i] != minState)
+ mergeStates[upto++] = mergeStates[i];
+ numFields--;
+ System.Diagnostics.Debug.Assert(upto == numFields);
+ }
+ }
+ }
+
+ docConsumer.Finish();
+ }
+
+ termsConsumer.Finish();
+ }
- internal UnicodeUtil.UTF8Result termsUTF8 = new UnicodeUtil.UTF8Result();
-
- internal sealed class PostingList:RawPostingList
- {
- internal int docFreq; // # times this term occurs in the current doc
- internal int lastDocID; // Last docID where this term occurred
- internal int lastDocCode; // Code for prior doc
- internal int lastPosition; // Last position where this term occurred
- }
-
- internal override int BytesPerPosting()
- {
- return RawPostingList.BYTES_SIZE + 4 * DocumentsWriter.INT_NUM_BYTE;
- }
- }
+ internal UnicodeUtil.UTF8Result termsUTF8 = new UnicodeUtil.UTF8Result();
+
+ internal sealed class PostingList:RawPostingList
+ {
+ internal int docFreq; // # times this term occurs in the current doc
+ internal int lastDocID; // Last docID where this term occurred
+ internal int lastDocCode; // Code for prior doc
+ internal int lastPosition; // Last position where this term occurred
+ }
+
+ internal override int BytesPerPosting()
+ {
+ return RawPostingList.BYTES_SIZE + 4 * DocumentsWriter.INT_NUM_BYTE;
+ }
+ }
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/62f018ab/src/core/Index/FreqProxTermsWriterPerField.cs
----------------------------------------------------------------------
diff --git a/src/core/Index/FreqProxTermsWriterPerField.cs b/src/core/Index/FreqProxTermsWriterPerField.cs
index c654b48..8facae6 100644
--- a/src/core/Index/FreqProxTermsWriterPerField.cs
+++ b/src/core/Index/FreqProxTermsWriterPerField.cs
@@ -21,176 +21,176 @@ using Lucene.Net.Documents;
namespace Lucene.Net.Index
{
-
- // TODO: break into separate freq and prox writers as
- // codecs; make separate container (tii/tis/skip/*) that can
- // be configured as any number of files 1..N
- sealed class FreqProxTermsWriterPerField:TermsHashConsumerPerField, System.IComparable<FreqProxTermsWriterPerField>
- {
-
- internal FreqProxTermsWriterPerThread perThread;
- internal TermsHashPerField termsHashPerField;
- internal FieldInfo fieldInfo;
- internal DocumentsWriter.DocState docState;
- internal FieldInvertState fieldState;
- internal bool omitTermFreqAndPositions;
- internal IPayloadAttribute payloadAttribute;
-
- public FreqProxTermsWriterPerField(TermsHashPerField termsHashPerField, FreqProxTermsWriterPerThread perThread, FieldInfo fieldInfo)
- {
- this.termsHashPerField = termsHashPerField;
- this.perThread = perThread;
- this.fieldInfo = fieldInfo;
- docState = termsHashPerField.docState;
- fieldState = termsHashPerField.fieldState;
- omitTermFreqAndPositions = fieldInfo.omitTermFreqAndPositions;
- }
-
- internal override int GetStreamCount()
- {
- if (fieldInfo.omitTermFreqAndPositions)
- return 1;
- else
- return 2;
- }
-
- internal override void Finish()
- {
- }
-
- internal bool hasPayloads;
-
- internal override void SkippingLongTerm()
- {
- }
-
- public int CompareTo(FreqProxTermsWriterPerField other)
- {
- return String.CompareOrdinal(fieldInfo.name, other.fieldInfo.name);
- }
-
- internal void Reset()
- {
- // Record, up front, whether our in-RAM format will be
- // with or without term freqs:
- omitTermFreqAndPositions = fieldInfo.omitTermFreqAndPositions;
- payloadAttribute = null;
- }
-
- internal override bool Start(IFieldable[] fields, int count)
- {
- for (int i = 0; i < count; i++)
- if (fields[i].IsIndexed)
- return true;
- return false;
- }
-
- internal override void Start(IFieldable f)
- {
+
+ // TODO: break into separate freq and prox writers as
+ // codecs; make separate container (tii/tis/skip/*) that can
+ // be configured as any number of files 1..N
+ sealed class FreqProxTermsWriterPerField:TermsHashConsumerPerField, System.IComparable<FreqProxTermsWriterPerField>
+ {
+
+ internal FreqProxTermsWriterPerThread perThread;
+ internal TermsHashPerField termsHashPerField;
+ internal FieldInfo fieldInfo;
+ internal DocumentsWriter.DocState docState;
+ internal FieldInvertState fieldState;
+ internal bool omitTermFreqAndPositions;
+ internal IPayloadAttribute payloadAttribute;
+
+ public FreqProxTermsWriterPerField(TermsHashPerField termsHashPerField, FreqProxTermsWriterPerThread perThread, FieldInfo fieldInfo)
+ {
+ this.termsHashPerField = termsHashPerField;
+ this.perThread = perThread;
+ this.fieldInfo = fieldInfo;
+ docState = termsHashPerField.docState;
+ fieldState = termsHashPerField.fieldState;
+ omitTermFreqAndPositions = fieldInfo.omitTermFreqAndPositions;
+ }
+
+ internal override int GetStreamCount()
+ {
+ if (fieldInfo.omitTermFreqAndPositions)
+ return 1;
+ else
+ return 2;
+ }
+
+ internal override void Finish()
+ {
+ }
+
+ internal bool hasPayloads;
+
+ internal override void SkippingLongTerm()
+ {
+ }
+
+ public int CompareTo(FreqProxTermsWriterPerField other)
+ {
+ return String.CompareOrdinal(fieldInfo.name, other.fieldInfo.name);
+ }
+
+ internal void Reset()
+ {
+ // Record, up front, whether our in-RAM format will be
+ // with or without term freqs:
+ omitTermFreqAndPositions = fieldInfo.omitTermFreqAndPositions;
+ payloadAttribute = null;
+ }
+
+ internal override bool Start(IFieldable[] fields, int count)
+ {
+ for (int i = 0; i < count; i++)
+ if (fields[i].IsIndexed)
+ return true;
+ return false;
+ }
+
+ internal override void Start(IFieldable f)
+ {
if (fieldState.attributeSource.HasAttribute<IPayloadAttribute>())
- {
+ {
payloadAttribute = fieldState.attributeSource.GetAttribute<IPayloadAttribute>();
- }
- else
- {
- payloadAttribute = null;
- }
- }
-
- internal void WriteProx(FreqProxTermsWriter.PostingList p, int proxCode)
- {
- Payload payload;
- if (payloadAttribute == null)
- {
- payload = null;
- }
- else
- {
- payload = payloadAttribute.Payload;
- }
-
- if (payload != null && payload.internalLength > 0)
- {
- termsHashPerField.WriteVInt(1, (proxCode << 1) | 1);
- termsHashPerField.WriteVInt(1, payload.internalLength);
- termsHashPerField.WriteBytes(1, payload.data, payload.internalOffset, payload.internalLength);
- hasPayloads = true;
- }
- else
- termsHashPerField.WriteVInt(1, proxCode << 1);
- p.lastPosition = fieldState.position;
- }
-
- internal override void NewTerm(RawPostingList p0)
- {
- // First time we're seeing this term since the last
- // flush
- System.Diagnostics.Debug.Assert(docState.TestPoint("FreqProxTermsWriterPerField.newTerm start"));
- FreqProxTermsWriter.PostingList p = (FreqProxTermsWriter.PostingList) p0;
- p.lastDocID = docState.docID;
- if (omitTermFreqAndPositions)
- {
- p.lastDocCode = docState.docID;
- }
- else
- {
- p.lastDocCode = docState.docID << 1;
- p.docFreq = 1;
- WriteProx(p, fieldState.position);
- }
- }
-
- internal override void AddTerm(RawPostingList p0)
- {
-
- System.Diagnostics.Debug.Assert(docState.TestPoint("FreqProxTermsWriterPerField.addTerm start"));
-
- FreqProxTermsWriter.PostingList p = (FreqProxTermsWriter.PostingList) p0;
-
- System.Diagnostics.Debug.Assert(omitTermFreqAndPositions || p.docFreq > 0);
-
- if (omitTermFreqAndPositions)
- {
- if (docState.docID != p.lastDocID)
- {
- System.Diagnostics.Debug.Assert(docState.docID > p.lastDocID);
- termsHashPerField.WriteVInt(0, p.lastDocCode);
- p.lastDocCode = docState.docID - p.lastDocID;
- p.lastDocID = docState.docID;
- }
- }
- else
- {
- if (docState.docID != p.lastDocID)
- {
- System.Diagnostics.Debug.Assert(docState.docID > p.lastDocID);
- // Term not yet seen in the current doc but previously
- // seen in other doc(s) since the last flush
-
- // Now that we know doc freq for previous doc,
- // write it & lastDocCode
- if (1 == p.docFreq)
- termsHashPerField.WriteVInt(0, p.lastDocCode | 1);
- else
- {
- termsHashPerField.WriteVInt(0, p.lastDocCode);
- termsHashPerField.WriteVInt(0, p.docFreq);
- }
- p.docFreq = 1;
- p.lastDocCode = (docState.docID - p.lastDocID) << 1;
- p.lastDocID = docState.docID;
- WriteProx(p, fieldState.position);
- }
- else
- {
- p.docFreq++;
- WriteProx(p, fieldState.position - p.lastPosition);
- }
- }
- }
-
- public void Abort()
- {
- }
- }
+ }
+ else
+ {
+ payloadAttribute = null;
+ }
+ }
+
+ internal void WriteProx(FreqProxTermsWriter.PostingList p, int proxCode)
+ {
+ Payload payload;
+ if (payloadAttribute == null)
+ {
+ payload = null;
+ }
+ else
+ {
+ payload = payloadAttribute.Payload;
+ }
+
+ if (payload != null && payload.internalLength > 0)
+ {
+ termsHashPerField.WriteVInt(1, (proxCode << 1) | 1);
+ termsHashPerField.WriteVInt(1, payload.internalLength);
+ termsHashPerField.WriteBytes(1, payload.data, payload.internalOffset, payload.internalLength);
+ hasPayloads = true;
+ }
+ else
+ termsHashPerField.WriteVInt(1, proxCode << 1);
+ p.lastPosition = fieldState.position;
+ }
+
+ internal override void NewTerm(RawPostingList p0)
+ {
+ // First time we're seeing this term since the last
+ // flush
+ System.Diagnostics.Debug.Assert(docState.TestPoint("FreqProxTermsWriterPerField.newTerm start"));
+ FreqProxTermsWriter.PostingList p = (FreqProxTermsWriter.PostingList) p0;
+ p.lastDocID = docState.docID;
+ if (omitTermFreqAndPositions)
+ {
+ p.lastDocCode = docState.docID;
+ }
+ else
+ {
+ p.lastDocCode = docState.docID << 1;
+ p.docFreq = 1;
+ WriteProx(p, fieldState.position);
+ }
+ }
+
+ internal override void AddTerm(RawPostingList p0)
+ {
+
+ System.Diagnostics.Debug.Assert(docState.TestPoint("FreqProxTermsWriterPerField.addTerm start"));
+
+ FreqProxTermsWriter.PostingList p = (FreqProxTermsWriter.PostingList) p0;
+
+ System.Diagnostics.Debug.Assert(omitTermFreqAndPositions || p.docFreq > 0);
+
+ if (omitTermFreqAndPositions)
+ {
+ if (docState.docID != p.lastDocID)
+ {
+ System.Diagnostics.Debug.Assert(docState.docID > p.lastDocID);
+ termsHashPerField.WriteVInt(0, p.lastDocCode);
+ p.lastDocCode = docState.docID - p.lastDocID;
+ p.lastDocID = docState.docID;
+ }
+ }
+ else
+ {
+ if (docState.docID != p.lastDocID)
+ {
+ System.Diagnostics.Debug.Assert(docState.docID > p.lastDocID);
+ // Term not yet seen in the current doc but previously
+ // seen in other doc(s) since the last flush
+
+ // Now that we know doc freq for previous doc,
+ // write it & lastDocCode
+ if (1 == p.docFreq)
+ termsHashPerField.WriteVInt(0, p.lastDocCode | 1);
+ else
+ {
+ termsHashPerField.WriteVInt(0, p.lastDocCode);
+ termsHashPerField.WriteVInt(0, p.docFreq);
+ }
+ p.docFreq = 1;
+ p.lastDocCode = (docState.docID - p.lastDocID) << 1;
+ p.lastDocID = docState.docID;
+ WriteProx(p, fieldState.position);
+ }
+ else
+ {
+ p.docFreq++;
+ WriteProx(p, fieldState.position - p.lastPosition);
+ }
+ }
+ }
+
+ public void Abort()
+ {
+ }
+ }
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/62f018ab/src/core/Index/FreqProxTermsWriterPerThread.cs
----------------------------------------------------------------------
diff --git a/src/core/Index/FreqProxTermsWriterPerThread.cs b/src/core/Index/FreqProxTermsWriterPerThread.cs
index 01f1ae9..1e4ac41 100644
--- a/src/core/Index/FreqProxTermsWriterPerThread.cs
+++ b/src/core/Index/FreqProxTermsWriterPerThread.cs
@@ -19,34 +19,34 @@ using System;
namespace Lucene.Net.Index
{
-
- sealed class FreqProxTermsWriterPerThread:TermsHashConsumerPerThread
- {
- internal TermsHashPerThread termsHashPerThread;
- internal DocumentsWriter.DocState docState;
-
- public FreqProxTermsWriterPerThread(TermsHashPerThread perThread)
- {
- docState = perThread.docState;
- termsHashPerThread = perThread;
- }
-
- public override TermsHashConsumerPerField AddField(TermsHashPerField termsHashPerField, FieldInfo fieldInfo)
- {
- return new FreqProxTermsWriterPerField(termsHashPerField, this, fieldInfo);
- }
-
- public override void StartDocument()
- {
- }
-
- public override DocumentsWriter.DocWriter FinishDocument()
- {
- return null;
- }
-
- public override void Abort()
- {
- }
- }
+
+ sealed class FreqProxTermsWriterPerThread:TermsHashConsumerPerThread
+ {
+ internal TermsHashPerThread termsHashPerThread;
+ internal DocumentsWriter.DocState docState;
+
+ public FreqProxTermsWriterPerThread(TermsHashPerThread perThread)
+ {
+ docState = perThread.docState;
+ termsHashPerThread = perThread;
+ }
+
+ public override TermsHashConsumerPerField AddField(TermsHashPerField termsHashPerField, FieldInfo fieldInfo)
+ {
+ return new FreqProxTermsWriterPerField(termsHashPerField, this, fieldInfo);
+ }
+
+ public override void StartDocument()
+ {
+ }
+
+ public override DocumentsWriter.DocWriter FinishDocument()
+ {
+ return null;
+ }
+
+ public override void Abort()
+ {
+ }
+ }
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/62f018ab/src/core/Index/IndexCommit.cs
----------------------------------------------------------------------
diff --git a/src/core/Index/IndexCommit.cs b/src/core/Index/IndexCommit.cs
index 306d7f1..7bfd351 100644
--- a/src/core/Index/IndexCommit.cs
+++ b/src/core/Index/IndexCommit.cs
@@ -22,98 +22,98 @@ using Directory = Lucene.Net.Store.Directory;
namespace Lucene.Net.Index
{
-
- /// <summary> <p/>Expert: represents a single commit into an index as seen by the
- /// <see cref="IndexDeletionPolicy" /> or <see cref="IndexReader" />.<p/>
- ///
- /// <p/> Changes to the content of an index are made visible
- /// only after the writer who made that change commits by
- /// writing a new segments file
- /// (<c>segments_N</c>). This point in time, when the
- /// action of writing of a new segments file to the directory
- /// is completed, is an index commit.<p/>
- ///
- /// <p/>Each index commit point has a unique segments file
- /// associated with it. The segments file associated with a
- /// later index commit point would have a larger N.<p/>
- ///
- /// <p/><b>WARNING</b>: This API is a new and experimental and
- /// may suddenly change. <p/>
- /// </summary>
-
- public abstract class IndexCommit
- {
- /// <summary> Get the segments file (<c>segments_N</c>) associated
- /// with this commit point.
- /// </summary>
- public abstract string SegmentsFileName { get; }
+
+ /// <summary> <p/>Expert: represents a single commit into an index as seen by the
+ /// <see cref="IndexDeletionPolicy" /> or <see cref="IndexReader" />.<p/>
+ ///
+ /// <p/> Changes to the content of an index are made visible
+ /// only after the writer who made that change commits by
+ /// writing a new segments file
+ /// (<c>segments_N</c>). This point in time, when the
+ /// action of writing of a new segments file to the directory
+ /// is completed, is an index commit.<p/>
+ ///
+ /// <p/>Each index commit point has a unique segments file
+ /// associated with it. The segments file associated with a
+ /// later index commit point would have a larger N.<p/>
+ ///
+ /// <p/><b>WARNING</b>: This API is a new and experimental and
+ /// may suddenly change. <p/>
+ /// </summary>
+
+ public abstract class IndexCommit
+ {
+ /// <summary> Get the segments file (<c>segments_N</c>) associated
+ /// with this commit point.
+ /// </summary>
+ public abstract string SegmentsFileName { get; }
- /// <summary> Returns all index files referenced by this commit point.</summary>
- public abstract ICollection<string> FileNames { get; }
+ /// <summary> Returns all index files referenced by this commit point.</summary>
+ public abstract ICollection<string> FileNames { get; }
- /// <summary> Returns the <see cref="Store.Directory" /> for the index.</summary>
- public abstract Directory Directory { get; }
+ /// <summary> Returns the <see cref="Store.Directory" /> for the index.</summary>
+ public abstract Directory Directory { get; }
- /// <summary> Delete this commit point. This only applies when using
- /// the commit point in the context of IndexWriter's
- /// IndexDeletionPolicy.
- /// <p/>
- /// Upon calling this, the writer is notified that this commit
- /// point should be deleted.
- /// <p/>
- /// Decision that a commit-point should be deleted is taken by the <see cref="IndexDeletionPolicy" /> in effect
+ /// <summary> Delete this commit point. This only applies when using
+ /// the commit point in the context of IndexWriter's
+ /// IndexDeletionPolicy.
+ /// <p/>
+ /// Upon calling this, the writer is notified that this commit
+ /// point should be deleted.
+ /// <p/>
+ /// Decision that a commit-point should be deleted is taken by the <see cref="IndexDeletionPolicy" /> in effect
/// and therefore this should only be called by its <see cref="IndexDeletionPolicy.OnInit{T}(IList{T})" /> or
/// <see cref="IndexDeletionPolicy.OnCommit{T}(IList{T})" /> methods.
- /// </summary>
+ /// </summary>
public abstract void Delete();
- public abstract bool IsDeleted { get; }
+ public abstract bool IsDeleted { get; }
- /// <summary> Returns true if this commit is an optimized index.</summary>
- public abstract bool IsOptimized { get; }
+ /// <summary> Returns true if this commit is an optimized index.</summary>
+ public abstract bool IsOptimized { get; }
- /// <summary> Two IndexCommits are equal if both their Directory and versions are equal.</summary>
- public override bool Equals(System.Object other)
- {
- if (other is IndexCommit)
- {
- IndexCommit otherCommit = (IndexCommit) other;
- return otherCommit.Directory.Equals(Directory) && otherCommit.Version == Version;
- }
- else
- return false;
- }
-
- public override int GetHashCode()
- {
- return (int)(Directory.GetHashCode() + Version);
- }
+ /// <summary> Two IndexCommits are equal if both their Directory and versions are equal.</summary>
+ public override bool Equals(System.Object other)
+ {
+ if (other is IndexCommit)
+ {
+ IndexCommit otherCommit = (IndexCommit) other;
+ return otherCommit.Directory.Equals(Directory) && otherCommit.Version == Version;
+ }
+ else
+ return false;
+ }
+
+ public override int GetHashCode()
+ {
+ return (int)(Directory.GetHashCode() + Version);
+ }
- /// <summary>Returns the version for this IndexCommit. This is the
- /// same value that <see cref="IndexReader.Version" /> would
- /// return if it were opened on this commit.
- /// </summary>
- public abstract long Version { get; }
+ /// <summary>Returns the version for this IndexCommit. This is the
+ /// same value that <see cref="IndexReader.Version" /> would
+ /// return if it were opened on this commit.
+ /// </summary>
+ public abstract long Version { get; }
- /// <summary>Returns the generation (the _N in segments_N) for this
- /// IndexCommit
- /// </summary>
- public abstract long Generation { get; }
+ /// <summary>Returns the generation (the _N in segments_N) for this
+ /// IndexCommit
+ /// </summary>
+ public abstract long Generation { get; }
- /// <summary>Convenience method that returns the last modified time
- /// of the segments_N file corresponding to this index
- /// commit, equivalent to
- /// getDirectory().fileModified(getSegmentsFileName()).
- /// </summary>
- public virtual long Timestamp
- {
- get { return Directory.FileModified(SegmentsFileName); }
- }
+ /// <summary>Convenience method that returns the last modified time
+ /// of the segments_N file corresponding to this index
+ /// commit, equivalent to
+ /// getDirectory().fileModified(getSegmentsFileName()).
+ /// </summary>
+ public virtual long Timestamp
+ {
+ get { return Directory.FileModified(SegmentsFileName); }
+ }
- /// <summary>Returns userData, previously passed to
- /// <see cref="IndexWriter.Commit(System.Collections.Generic.IDictionary{string, string})" />
- /// for this commit. IDictionary is String -> String.
- /// </summary>
- public abstract IDictionary<string, string> UserData { get; }
- }
+ /// <summary>Returns userData, previously passed to
+ /// <see cref="IndexWriter.Commit(System.Collections.Generic.IDictionary{string, string})" />
+ /// for this commit. IDictionary is String -> String.
+ /// </summary>
+ public abstract IDictionary<string, string> UserData { get; }
+ }
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/62f018ab/src/core/Index/IndexDeletionPolicy.cs
----------------------------------------------------------------------
diff --git a/src/core/Index/IndexDeletionPolicy.cs b/src/core/Index/IndexDeletionPolicy.cs
index bef9924..fb27ec0 100644
--- a/src/core/Index/IndexDeletionPolicy.cs
+++ b/src/core/Index/IndexDeletionPolicy.cs
@@ -20,58 +20,58 @@ using System.Collections.Generic;
namespace Lucene.Net.Index
{
-
- /// <summary> <p/>Expert: policy for deletion of stale <see cref="IndexCommit">index commits</see>.
- ///
- /// <p/>Implement this interface, and pass it to one
- /// of the <see cref="IndexWriter" /> or <see cref="IndexReader" />
- /// constructors, to customize when older
- /// <see cref="IndexCommit">point-in-time commits</see>
- /// are deleted from the index directory. The default deletion policy
- /// is <see cref="KeepOnlyLastCommitDeletionPolicy" />, which always
- /// removes old commits as soon as a new commit is done (this
- /// matches the behavior before 2.2).<p/>
- ///
- /// <p/>One expected use case for this (and the reason why it
- /// was first created) is to work around problems with an
- /// index directory accessed via filesystems like NFS because
- /// NFS does not provide the "delete on last close" semantics
- /// that Lucene's "point in time" search normally relies on.
- /// By implementing a custom deletion policy, such as "a
- /// commit is only removed once it has been stale for more
- /// than X minutes", you can give your readers time to
- /// refresh to the new commit before <see cref="IndexWriter" />
- /// removes the old commits. Note that doing so will
- /// increase the storage requirements of the index. See <a
- /// target="top"
- /// href="http://issues.apache.org/jira/browse/LUCENE-710">LUCENE-710</a>
- /// for details.<p/>
- /// </summary>
-
- public interface IndexDeletionPolicy
- {
-
- /// <summary> <p/>This is called once when a writer is first
- /// instantiated to give the policy a chance to remove old
- /// commit points.<p/>
- ///
- /// <p/>The writer locates all index commits present in the
- /// index directory and calls this method. The policy may
- /// choose to delete some of the commit points, doing so by
- /// calling method <see cref="IndexCommit.Delete()" />
- /// of <see cref="IndexCommit" />.<p/>
- ///
- /// <p/><u>Note:</u> the last CommitPoint is the most recent one,
- /// i.e. the "front index state". Be careful not to delete it,
- /// unless you know for sure what you are doing, and unless
- /// you can afford to lose the index content while doing that.
- ///
- /// </summary>
- /// <param name="commits">List of current
- /// <see cref="IndexCommit">point-in-time commits</see>,
- /// sorted by age (the 0th one is the oldest commit).
- /// </param>
- void OnInit<T>(IList<T> commits) where T : IndexCommit;
+
+ /// <summary> <p/>Expert: policy for deletion of stale <see cref="IndexCommit">index commits</see>.
+ ///
+ /// <p/>Implement this interface, and pass it to one
+ /// of the <see cref="IndexWriter" /> or <see cref="IndexReader" />
+ /// constructors, to customize when older
+ /// <see cref="IndexCommit">point-in-time commits</see>
+ /// are deleted from the index directory. The default deletion policy
+ /// is <see cref="KeepOnlyLastCommitDeletionPolicy" />, which always
+ /// removes old commits as soon as a new commit is done (this
+ /// matches the behavior before 2.2).<p/>
+ ///
+ /// <p/>One expected use case for this (and the reason why it
+ /// was first created) is to work around problems with an
+ /// index directory accessed via filesystems like NFS because
+ /// NFS does not provide the "delete on last close" semantics
+ /// that Lucene's "point in time" search normally relies on.
+ /// By implementing a custom deletion policy, such as "a
+ /// commit is only removed once it has been stale for more
+ /// than X minutes", you can give your readers time to
+ /// refresh to the new commit before <see cref="IndexWriter" />
+ /// removes the old commits. Note that doing so will
+ /// increase the storage requirements of the index. See <a
+ /// target="top"
+ /// href="http://issues.apache.org/jira/browse/LUCENE-710">LUCENE-710</a>
+ /// for details.<p/>
+ /// </summary>
+
+ public interface IndexDeletionPolicy
+ {
+
+ /// <summary> <p/>This is called once when a writer is first
+ /// instantiated to give the policy a chance to remove old
+ /// commit points.<p/>
+ ///
+ /// <p/>The writer locates all index commits present in the
+ /// index directory and calls this method. The policy may
+ /// choose to delete some of the commit points, doing so by
+ /// calling method <see cref="IndexCommit.Delete()" />
+ /// of <see cref="IndexCommit" />.<p/>
+ ///
+ /// <p/><u>Note:</u> the last CommitPoint is the most recent one,
+ /// i.e. the "front index state". Be careful not to delete it,
+ /// unless you know for sure what you are doing, and unless
+ /// you can afford to lose the index content while doing that.
+ ///
+ /// </summary>
+ /// <param name="commits">List of current
+ /// <see cref="IndexCommit">point-in-time commits</see>,
+ /// sorted by age (the 0th one is the oldest commit).
+ /// </param>
+ void OnInit<T>(IList<T> commits) where T : IndexCommit;
/// <summary>
/// <p>This is called each time the writer completed a commit.
@@ -94,6 +94,6 @@ namespace Lucene.Net.Index
/// <param name="commits">
/// List of <see cref="IndexCommit" />, sorted by age (the 0th one is the oldest commit).
/// </param>
- void OnCommit<T>(IList<T> commits) where T : IndexCommit;
- }
+ void OnCommit<T>(IList<T> commits) where T : IndexCommit;
+ }
}
\ No newline at end of file